[ARM][committed] Sort ARMv8 processors by alphabetic order
[official-gcc.git] / gcc / omp-low.c
blob7bcaeee240918ea54fe550fb014119ebed92990d
1 /* Lowering pass for OMP directives. Converts OMP directives into explicit
2 calls to the runtime library (libgomp), data marshalling to implement data
3 sharing and copying clauses, offloading to accelerators, and more.
5 Contributed by Diego Novillo <dnovillo@redhat.com>
7 Copyright (C) 2005-2016 Free Software Foundation, Inc.
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it under
12 the terms of the GNU General Public License as published by the Free
13 Software Foundation; either version 3, or (at your option) any later
14 version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "memmodel.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "alloc-pool.h"
36 #include "tree-pass.h"
37 #include "ssa.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "emit-rtl.h"
41 #include "cgraph.h"
42 #include "pretty-print.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "cfganal.h"
48 #include "internal-fn.h"
49 #include "gimple-fold.h"
50 #include "gimplify.h"
51 #include "gimple-iterator.h"
52 #include "gimplify-me.h"
53 #include "gimple-walk.h"
54 #include "tree-iterator.h"
55 #include "tree-inline.h"
56 #include "langhooks.h"
57 #include "tree-cfg.h"
58 #include "tree-into-ssa.h"
59 #include "flags.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "calls.h"
63 #include "varasm.h"
64 #include "stmt.h"
65 #include "expr.h"
66 #include "tree-dfa.h"
67 #include "tree-ssa.h"
68 #include "except.h"
69 #include "splay-tree.h"
70 #include "cfgloop.h"
71 #include "common/common-target.h"
72 #include "omp-low.h"
73 #include "gimple-low.h"
74 #include "tree-cfgcleanup.h"
75 #include "symbol-summary.h"
76 #include "ipa-prop.h"
77 #include "tree-nested.h"
78 #include "tree-eh.h"
79 #include "cilk.h"
80 #include "context.h"
81 #include "lto-section-names.h"
82 #include "gomp-constants.h"
83 #include "gimple-pretty-print.h"
84 #include "symbol-summary.h"
85 #include "hsa.h"
86 #include "params.h"
88 /* Lowering of OMP parallel and workshare constructs proceeds in two
89 phases. The first phase scans the function looking for OMP statements
90 and then for variables that must be replaced to satisfy data sharing
91 clauses. The second phase expands code for the constructs, as well as
92 re-gimplifying things when variables have been replaced with complex
93 expressions.
95 Final code generation is done by pass_expand_omp. The flowgraph is
96 scanned for regions which are then moved to a new
97 function, to be invoked by the thread library, or offloaded. */
99 /* OMP region information. Every parallel and workshare
100 directive is enclosed between two markers, the OMP_* directive
101 and a corresponding GIMPLE_OMP_RETURN statement. */
103 struct omp_region
105 /* The enclosing region. */
106 struct omp_region *outer;
108 /* First child region. */
109 struct omp_region *inner;
111 /* Next peer region. */
112 struct omp_region *next;
114 /* Block containing the omp directive as its last stmt. */
115 basic_block entry;
117 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
118 basic_block exit;
120 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
121 basic_block cont;
123 /* If this is a combined parallel+workshare region, this is a list
124 of additional arguments needed by the combined parallel+workshare
125 library call. */
126 vec<tree, va_gc> *ws_args;
128 /* The code for the omp directive of this region. */
129 enum gimple_code type;
131 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
132 enum omp_clause_schedule_kind sched_kind;
134 /* Schedule modifiers. */
135 unsigned char sched_modifiers;
137 /* True if this is a combined parallel+workshare region. */
138 bool is_combined_parallel;
140 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
141 a depend clause. */
142 gomp_ordered *ord_stmt;
145 /* Context structure. Used to store information about each parallel
146 directive in the code. */
148 struct omp_context
150 /* This field must be at the beginning, as we do "inheritance": Some
151 callback functions for tree-inline.c (e.g., omp_copy_decl)
152 receive a copy_body_data pointer that is up-casted to an
153 omp_context pointer. */
154 copy_body_data cb;
156 /* The tree of contexts corresponding to the encountered constructs. */
157 struct omp_context *outer;
158 gimple *stmt;
160 /* Map variables to fields in a structure that allows communication
161 between sending and receiving threads. */
162 splay_tree field_map;
163 tree record_type;
164 tree sender_decl;
165 tree receiver_decl;
167 /* These are used just by task contexts, if task firstprivate fn is
168 needed. srecord_type is used to communicate from the thread
169 that encountered the task construct to task firstprivate fn,
170 record_type is allocated by GOMP_task, initialized by task firstprivate
171 fn and passed to the task body fn. */
172 splay_tree sfield_map;
173 tree srecord_type;
175 /* A chain of variables to add to the top-level block surrounding the
176 construct. In the case of a parallel, this is in the child function. */
177 tree block_vars;
179 /* Label to which GOMP_cancel{,llation_point} and explicit and implicit
180 barriers should jump to during omplower pass. */
181 tree cancel_label;
183 /* What to do with variables with implicitly determined sharing
184 attributes. */
185 enum omp_clause_default_kind default_kind;
187 /* Nesting depth of this context. Used to beautify error messages re
188 invalid gotos. The outermost ctx is depth 1, with depth 0 being
189 reserved for the main body of the function. */
190 int depth;
192 /* True if this parallel directive is nested within another. */
193 bool is_nested;
195 /* True if this construct can be cancelled. */
196 bool cancellable;
199 /* A structure holding the elements of:
200 for (V = N1; V cond N2; V += STEP) [...] */
202 struct omp_for_data_loop
204 tree v, n1, n2, step;
205 enum tree_code cond_code;
208 /* A structure describing the main elements of a parallel loop. */
210 struct omp_for_data
212 struct omp_for_data_loop loop;
213 tree chunk_size;
214 gomp_for *for_stmt;
215 tree pre, iter_type;
216 int collapse;
217 int ordered;
218 bool have_nowait, have_ordered, simd_schedule;
219 unsigned char sched_modifiers;
220 enum omp_clause_schedule_kind sched_kind;
221 struct omp_for_data_loop *loops;
224 /* Describe the OpenACC looping structure of a function. The entire
225 function is held in a 'NULL' loop. */
227 struct oacc_loop
229 oacc_loop *parent; /* Containing loop. */
231 oacc_loop *child; /* First inner loop. */
233 oacc_loop *sibling; /* Next loop within same parent. */
235 location_t loc; /* Location of the loop start. */
237 gcall *marker; /* Initial head marker. */
239 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
240 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
242 tree routine; /* Pseudo-loop enclosing a routine. */
244 unsigned mask; /* Partitioning mask. */
245 unsigned inner; /* Partitioning of inner loops. */
246 unsigned flags; /* Partitioning flags. */
247 unsigned ifns; /* Contained loop abstraction functions. */
248 tree chunk_size; /* Chunk size. */
249 gcall *head_end; /* Final marker of head sequence. */
252 /* Flags for an OpenACC loop. */
254 enum oacc_loop_flags {
255 OLF_SEQ = 1u << 0, /* Explicitly sequential */
256 OLF_AUTO = 1u << 1, /* Compiler chooses axes. */
257 OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */
258 OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */
260 /* Explicitly specified loop axes. */
261 OLF_DIM_BASE = 4,
262 OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
263 OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
264 OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),
266 OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX
270 static splay_tree all_contexts;
271 static int taskreg_nesting_level;
272 static int target_nesting_level;
273 static struct omp_region *root_omp_region;
274 static bitmap task_shared_vars;
275 static vec<omp_context *> taskreg_contexts;
276 static bool omp_any_child_fn_dumped;
278 static void scan_omp (gimple_seq *, omp_context *);
279 static tree scan_omp_1_op (tree *, int *, void *);
280 static gphi *find_phi_with_arg_on_edge (tree, edge);
281 static int omp_max_simt_vf (void);
283 #define WALK_SUBSTMTS \
284 case GIMPLE_BIND: \
285 case GIMPLE_TRY: \
286 case GIMPLE_CATCH: \
287 case GIMPLE_EH_FILTER: \
288 case GIMPLE_TRANSACTION: \
289 /* The sub-statements for these should be walked. */ \
290 *handled_ops_p = false; \
291 break;
293 /* Return true if CTX corresponds to an oacc parallel region. */
295 static bool
296 is_oacc_parallel (omp_context *ctx)
298 enum gimple_code outer_type = gimple_code (ctx->stmt);
299 return ((outer_type == GIMPLE_OMP_TARGET)
300 && (gimple_omp_target_kind (ctx->stmt)
301 == GF_OMP_TARGET_KIND_OACC_PARALLEL));
304 /* Return true if CTX corresponds to an oacc kernels region. */
306 static bool
307 is_oacc_kernels (omp_context *ctx)
309 enum gimple_code outer_type = gimple_code (ctx->stmt);
310 return ((outer_type == GIMPLE_OMP_TARGET)
311 && (gimple_omp_target_kind (ctx->stmt)
312 == GF_OMP_TARGET_KIND_OACC_KERNELS));
315 /* If DECL is the artificial dummy VAR_DECL created for non-static
316 data member privatization, return the underlying "this" parameter,
317 otherwise return NULL. */
319 tree
320 omp_member_access_dummy_var (tree decl)
322 if (!VAR_P (decl)
323 || !DECL_ARTIFICIAL (decl)
324 || !DECL_IGNORED_P (decl)
325 || !DECL_HAS_VALUE_EXPR_P (decl)
326 || !lang_hooks.decls.omp_disregard_value_expr (decl, false))
327 return NULL_TREE;
329 tree v = DECL_VALUE_EXPR (decl);
330 if (TREE_CODE (v) != COMPONENT_REF)
331 return NULL_TREE;
333 while (1)
334 switch (TREE_CODE (v))
336 case COMPONENT_REF:
337 case MEM_REF:
338 case INDIRECT_REF:
339 CASE_CONVERT:
340 case POINTER_PLUS_EXPR:
341 v = TREE_OPERAND (v, 0);
342 continue;
343 case PARM_DECL:
344 if (DECL_CONTEXT (v) == current_function_decl
345 && DECL_ARTIFICIAL (v)
346 && TREE_CODE (TREE_TYPE (v)) == POINTER_TYPE)
347 return v;
348 return NULL_TREE;
349 default:
350 return NULL_TREE;
354 /* Helper for unshare_and_remap, called through walk_tree. */
356 static tree
357 unshare_and_remap_1 (tree *tp, int *walk_subtrees, void *data)
359 tree *pair = (tree *) data;
360 if (*tp == pair[0])
362 *tp = unshare_expr (pair[1]);
363 *walk_subtrees = 0;
365 else if (IS_TYPE_OR_DECL_P (*tp))
366 *walk_subtrees = 0;
367 return NULL_TREE;
370 /* Return unshare_expr (X) with all occurrences of FROM
371 replaced with TO. */
373 static tree
374 unshare_and_remap (tree x, tree from, tree to)
376 tree pair[2] = { from, to };
377 x = unshare_expr (x);
378 walk_tree (&x, unshare_and_remap_1, pair, NULL);
379 return x;
382 /* Holds offload tables with decls. */
383 vec<tree, va_gc> *offload_funcs, *offload_vars;
385 /* Convenience function for calling scan_omp_1_op on tree operands. */
387 static inline tree
388 scan_omp_op (tree *tp, omp_context *ctx)
390 struct walk_stmt_info wi;
392 memset (&wi, 0, sizeof (wi));
393 wi.info = ctx;
394 wi.want_locations = true;
396 return walk_tree (tp, scan_omp_1_op, &wi, NULL);
399 static void lower_omp (gimple_seq *, omp_context *);
400 static tree lookup_decl_in_outer_ctx (tree, omp_context *);
401 static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
403 /* Find an OMP clause of type KIND within CLAUSES. */
405 tree
406 find_omp_clause (tree clauses, enum omp_clause_code kind)
408 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
409 if (OMP_CLAUSE_CODE (clauses) == kind)
410 return clauses;
412 return NULL_TREE;
415 /* Return true if CTX is for an omp parallel. */
417 static inline bool
418 is_parallel_ctx (omp_context *ctx)
420 return gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL;
424 /* Return true if CTX is for an omp task. */
426 static inline bool
427 is_task_ctx (omp_context *ctx)
429 return gimple_code (ctx->stmt) == GIMPLE_OMP_TASK;
433 /* Return true if CTX is for an omp taskloop. */
435 static inline bool
436 is_taskloop_ctx (omp_context *ctx)
438 return gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
439 && gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP;
443 /* Return true if CTX is for an omp parallel or omp task. */
445 static inline bool
446 is_taskreg_ctx (omp_context *ctx)
448 return is_parallel_ctx (ctx) || is_task_ctx (ctx);
452 /* Return true if REGION is a combined parallel+workshare region. */
454 static inline bool
455 is_combined_parallel (struct omp_region *region)
457 return region->is_combined_parallel;
460 /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or
461 GT_EXPR. */
463 static void
464 adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2)
466 switch (*cond_code)
468 case LT_EXPR:
469 case GT_EXPR:
470 case NE_EXPR:
471 break;
472 case LE_EXPR:
473 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
474 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1);
475 else
476 *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2,
477 build_int_cst (TREE_TYPE (*n2), 1));
478 *cond_code = LT_EXPR;
479 break;
480 case GE_EXPR:
481 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
482 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1);
483 else
484 *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2,
485 build_int_cst (TREE_TYPE (*n2), 1));
486 *cond_code = GT_EXPR;
487 break;
488 default:
489 gcc_unreachable ();
493 /* Return the looping step from INCR, extracted from the step of a gimple omp
494 for statement. */
496 static tree
497 get_omp_for_step_from_incr (location_t loc, tree incr)
499 tree step;
500 switch (TREE_CODE (incr))
502 case PLUS_EXPR:
503 step = TREE_OPERAND (incr, 1);
504 break;
505 case POINTER_PLUS_EXPR:
506 step = fold_convert (ssizetype, TREE_OPERAND (incr, 1));
507 break;
508 case MINUS_EXPR:
509 step = TREE_OPERAND (incr, 1);
510 step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step);
511 break;
512 default:
513 gcc_unreachable ();
515 return step;
518 /* Extract the header elements of parallel loop FOR_STMT and store
519 them into *FD. */
521 static void
522 extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
523 struct omp_for_data_loop *loops)
525 tree t, var, *collapse_iter, *collapse_count;
526 tree count = NULL_TREE, iter_type = long_integer_type_node;
527 struct omp_for_data_loop *loop;
528 int i;
529 struct omp_for_data_loop dummy_loop;
530 location_t loc = gimple_location (for_stmt);
531 bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD;
532 bool distribute = gimple_omp_for_kind (for_stmt)
533 == GF_OMP_FOR_KIND_DISTRIBUTE;
534 bool taskloop = gimple_omp_for_kind (for_stmt)
535 == GF_OMP_FOR_KIND_TASKLOOP;
536 tree iterv, countv;
538 fd->for_stmt = for_stmt;
539 fd->pre = NULL;
540 if (gimple_omp_for_collapse (for_stmt) > 1)
541 fd->loops = loops;
542 else
543 fd->loops = &fd->loop;
545 fd->have_nowait = distribute || simd;
546 fd->have_ordered = false;
547 fd->collapse = 1;
548 fd->ordered = 0;
549 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
550 fd->sched_modifiers = 0;
551 fd->chunk_size = NULL_TREE;
552 fd->simd_schedule = false;
553 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
554 fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
555 collapse_iter = NULL;
556 collapse_count = NULL;
558 for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
559 switch (OMP_CLAUSE_CODE (t))
561 case OMP_CLAUSE_NOWAIT:
562 fd->have_nowait = true;
563 break;
564 case OMP_CLAUSE_ORDERED:
565 fd->have_ordered = true;
566 if (OMP_CLAUSE_ORDERED_EXPR (t))
567 fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t));
568 break;
569 case OMP_CLAUSE_SCHEDULE:
570 gcc_assert (!distribute && !taskloop);
571 fd->sched_kind
572 = (enum omp_clause_schedule_kind)
573 (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK);
574 fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t)
575 & ~OMP_CLAUSE_SCHEDULE_MASK);
576 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
577 fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t);
578 break;
579 case OMP_CLAUSE_DIST_SCHEDULE:
580 gcc_assert (distribute);
581 fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
582 break;
583 case OMP_CLAUSE_COLLAPSE:
584 fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t));
585 if (fd->collapse > 1)
587 collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
588 collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t);
590 break;
591 default:
592 break;
594 if (fd->ordered && fd->collapse == 1 && loops != NULL)
596 fd->loops = loops;
597 iterv = NULL_TREE;
598 countv = NULL_TREE;
599 collapse_iter = &iterv;
600 collapse_count = &countv;
603 /* FIXME: for now map schedule(auto) to schedule(static).
604 There should be analysis to determine whether all iterations
605 are approximately the same amount of work (then schedule(static)
606 is best) or if it varies (then schedule(dynamic,N) is better). */
607 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO)
609 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
610 gcc_assert (fd->chunk_size == NULL);
612 gcc_assert (fd->collapse == 1 || collapse_iter != NULL);
613 if (taskloop)
614 fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME;
615 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
616 gcc_assert (fd->chunk_size == NULL);
617 else if (fd->chunk_size == NULL)
619 /* We only need to compute a default chunk size for ordered
620 static loops and dynamic loops. */
621 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
622 || fd->have_ordered)
623 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
624 ? integer_zero_node : integer_one_node;
627 int cnt = fd->ordered ? fd->ordered : fd->collapse;
628 for (i = 0; i < cnt; i++)
630 if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL))
631 loop = &fd->loop;
632 else if (loops != NULL)
633 loop = loops + i;
634 else
635 loop = &dummy_loop;
637 loop->v = gimple_omp_for_index (for_stmt, i);
638 gcc_assert (SSA_VAR_P (loop->v));
639 gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
640 || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE);
641 var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v;
642 loop->n1 = gimple_omp_for_initial (for_stmt, i);
644 loop->cond_code = gimple_omp_for_cond (for_stmt, i);
645 loop->n2 = gimple_omp_for_final (for_stmt, i);
646 gcc_assert (loop->cond_code != NE_EXPR
647 || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD
648 || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR);
649 adjust_for_condition (loc, &loop->cond_code, &loop->n2);
651 t = gimple_omp_for_incr (for_stmt, i);
652 gcc_assert (TREE_OPERAND (t, 0) == var);
653 loop->step = get_omp_for_step_from_incr (loc, t);
655 if (simd
656 || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
657 && !fd->have_ordered))
659 if (fd->collapse == 1)
660 iter_type = TREE_TYPE (loop->v);
661 else if (i == 0
662 || TYPE_PRECISION (iter_type)
663 < TYPE_PRECISION (TREE_TYPE (loop->v)))
664 iter_type
665 = build_nonstandard_integer_type
666 (TYPE_PRECISION (TREE_TYPE (loop->v)), 1);
668 else if (iter_type != long_long_unsigned_type_node)
670 if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
671 iter_type = long_long_unsigned_type_node;
672 else if (TYPE_UNSIGNED (TREE_TYPE (loop->v))
673 && TYPE_PRECISION (TREE_TYPE (loop->v))
674 >= TYPE_PRECISION (iter_type))
676 tree n;
678 if (loop->cond_code == LT_EXPR)
679 n = fold_build2_loc (loc,
680 PLUS_EXPR, TREE_TYPE (loop->v),
681 loop->n2, loop->step);
682 else
683 n = loop->n1;
684 if (TREE_CODE (n) != INTEGER_CST
685 || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n))
686 iter_type = long_long_unsigned_type_node;
688 else if (TYPE_PRECISION (TREE_TYPE (loop->v))
689 > TYPE_PRECISION (iter_type))
691 tree n1, n2;
693 if (loop->cond_code == LT_EXPR)
695 n1 = loop->n1;
696 n2 = fold_build2_loc (loc,
697 PLUS_EXPR, TREE_TYPE (loop->v),
698 loop->n2, loop->step);
700 else
702 n1 = fold_build2_loc (loc,
703 MINUS_EXPR, TREE_TYPE (loop->v),
704 loop->n2, loop->step);
705 n2 = loop->n1;
707 if (TREE_CODE (n1) != INTEGER_CST
708 || TREE_CODE (n2) != INTEGER_CST
709 || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1)
710 || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type)))
711 iter_type = long_long_unsigned_type_node;
715 if (i >= fd->collapse)
716 continue;
718 if (collapse_count && *collapse_count == NULL)
720 t = fold_binary (loop->cond_code, boolean_type_node,
721 fold_convert (TREE_TYPE (loop->v), loop->n1),
722 fold_convert (TREE_TYPE (loop->v), loop->n2));
723 if (t && integer_zerop (t))
724 count = build_zero_cst (long_long_unsigned_type_node);
725 else if ((i == 0 || count != NULL_TREE)
726 && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
727 && TREE_CONSTANT (loop->n1)
728 && TREE_CONSTANT (loop->n2)
729 && TREE_CODE (loop->step) == INTEGER_CST)
731 tree itype = TREE_TYPE (loop->v);
733 if (POINTER_TYPE_P (itype))
734 itype = signed_type_for (itype);
735 t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
736 t = fold_build2_loc (loc,
737 PLUS_EXPR, itype,
738 fold_convert_loc (loc, itype, loop->step), t);
739 t = fold_build2_loc (loc, PLUS_EXPR, itype, t,
740 fold_convert_loc (loc, itype, loop->n2));
741 t = fold_build2_loc (loc, MINUS_EXPR, itype, t,
742 fold_convert_loc (loc, itype, loop->n1));
743 if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
744 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype,
745 fold_build1_loc (loc, NEGATE_EXPR, itype, t),
746 fold_build1_loc (loc, NEGATE_EXPR, itype,
747 fold_convert_loc (loc, itype,
748 loop->step)));
749 else
750 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t,
751 fold_convert_loc (loc, itype, loop->step));
752 t = fold_convert_loc (loc, long_long_unsigned_type_node, t);
753 if (count != NULL_TREE)
754 count = fold_build2_loc (loc,
755 MULT_EXPR, long_long_unsigned_type_node,
756 count, t);
757 else
758 count = t;
759 if (TREE_CODE (count) != INTEGER_CST)
760 count = NULL_TREE;
762 else if (count && !integer_zerop (count))
763 count = NULL_TREE;
767 if (count
768 && !simd
769 && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
770 || fd->have_ordered))
772 if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
773 iter_type = long_long_unsigned_type_node;
774 else
775 iter_type = long_integer_type_node;
777 else if (collapse_iter && *collapse_iter != NULL)
778 iter_type = TREE_TYPE (*collapse_iter);
779 fd->iter_type = iter_type;
780 if (collapse_iter && *collapse_iter == NULL)
781 *collapse_iter = create_tmp_var (iter_type, ".iter");
782 if (collapse_count && *collapse_count == NULL)
784 if (count)
785 *collapse_count = fold_convert_loc (loc, iter_type, count);
786 else
787 *collapse_count = create_tmp_var (iter_type, ".count");
790 if (fd->collapse > 1 || (fd->ordered && loops))
792 fd->loop.v = *collapse_iter;
793 fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
794 fd->loop.n2 = *collapse_count;
795 fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
796 fd->loop.cond_code = LT_EXPR;
798 else if (loops)
799 loops[0] = fd->loop;
803 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
804 is the immediate dominator of PAR_ENTRY_BB, return true if there
805 are no data dependencies that would prevent expanding the parallel
806 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
808 When expanding a combined parallel+workshare region, the call to
809 the child function may need additional arguments in the case of
810 GIMPLE_OMP_FOR regions. In some cases, these arguments are
811 computed out of variables passed in from the parent to the child
812 via 'struct .omp_data_s'. For instance:
814 #pragma omp parallel for schedule (guided, i * 4)
815 for (j ...)
817 Is lowered into:
819 # BLOCK 2 (PAR_ENTRY_BB)
820 .omp_data_o.i = i;
821 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
823 # BLOCK 3 (WS_ENTRY_BB)
824 .omp_data_i = &.omp_data_o;
825 D.1667 = .omp_data_i->i;
826 D.1598 = D.1667 * 4;
827 #pragma omp for schedule (guided, D.1598)
829 When we outline the parallel region, the call to the child function
830 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
831 that value is computed *after* the call site. So, in principle we
832 cannot do the transformation.
834 To see whether the code in WS_ENTRY_BB blocks the combined
835 parallel+workshare call, we collect all the variables used in the
836 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
837 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
838 call.
840 FIXME. If we had the SSA form built at this point, we could merely
841 hoist the code in block 3 into block 2 and be done with it. But at
842 this point we don't have dataflow information and though we could
843 hack something up here, it is really not worth the aggravation. */
845 static bool
846 workshare_safe_to_combine_p (basic_block ws_entry_bb)
848 struct omp_for_data fd;
849 gimple *ws_stmt = last_stmt (ws_entry_bb);
851 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
852 return true;
854 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
856 extract_omp_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
858 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
859 return false;
860 if (fd.iter_type != long_integer_type_node)
861 return false;
863 /* FIXME. We give up too easily here. If any of these arguments
864 are not constants, they will likely involve variables that have
865 been mapped into fields of .omp_data_s for sharing with the child
866 function. With appropriate data flow, it would be possible to
867 see through this. */
868 if (!is_gimple_min_invariant (fd.loop.n1)
869 || !is_gimple_min_invariant (fd.loop.n2)
870 || !is_gimple_min_invariant (fd.loop.step)
871 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
872 return false;
874 return true;
878 static int omp_max_vf (void);
880 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
881 presence (SIMD_SCHEDULE). */
883 static tree
884 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
886 if (!simd_schedule)
887 return chunk_size;
889 int vf = omp_max_vf ();
890 if (vf == 1)
891 return chunk_size;
893 tree type = TREE_TYPE (chunk_size);
894 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
895 build_int_cst (type, vf - 1));
896 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
897 build_int_cst (type, -vf));
901 /* Collect additional arguments needed to emit a combined
902 parallel+workshare call. WS_STMT is the workshare directive being
903 expanded. */
905 static vec<tree, va_gc> *
906 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
908 tree t;
909 location_t loc = gimple_location (ws_stmt);
910 vec<tree, va_gc> *ws_args;
912 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
914 struct omp_for_data fd;
915 tree n1, n2;
917 extract_omp_for_data (for_stmt, &fd, NULL);
918 n1 = fd.loop.n1;
919 n2 = fd.loop.n2;
921 if (gimple_omp_for_combined_into_p (for_stmt))
923 tree innerc
924 = find_omp_clause (gimple_omp_parallel_clauses (par_stmt),
925 OMP_CLAUSE__LOOPTEMP_);
926 gcc_assert (innerc);
927 n1 = OMP_CLAUSE_DECL (innerc);
928 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
929 OMP_CLAUSE__LOOPTEMP_);
930 gcc_assert (innerc);
931 n2 = OMP_CLAUSE_DECL (innerc);
934 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
936 t = fold_convert_loc (loc, long_integer_type_node, n1);
937 ws_args->quick_push (t);
939 t = fold_convert_loc (loc, long_integer_type_node, n2);
940 ws_args->quick_push (t);
942 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
943 ws_args->quick_push (t);
945 if (fd.chunk_size)
947 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
948 t = omp_adjust_chunk_size (t, fd.simd_schedule);
949 ws_args->quick_push (t);
952 return ws_args;
954 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
956 /* Number of sections is equal to the number of edges from the
957 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
958 the exit of the sections region. */
959 basic_block bb = single_succ (gimple_bb (ws_stmt));
960 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
961 vec_alloc (ws_args, 1);
962 ws_args->quick_push (t);
963 return ws_args;
966 gcc_unreachable ();
970 /* Discover whether REGION is a combined parallel+workshare region. */
972 static void
973 determine_parallel_type (struct omp_region *region)
975 basic_block par_entry_bb, par_exit_bb;
976 basic_block ws_entry_bb, ws_exit_bb;
978 if (region == NULL || region->inner == NULL
979 || region->exit == NULL || region->inner->exit == NULL
980 || region->inner->cont == NULL)
981 return;
983 /* We only support parallel+for and parallel+sections. */
984 if (region->type != GIMPLE_OMP_PARALLEL
985 || (region->inner->type != GIMPLE_OMP_FOR
986 && region->inner->type != GIMPLE_OMP_SECTIONS))
987 return;
989 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
990 WS_EXIT_BB -> PAR_EXIT_BB. */
991 par_entry_bb = region->entry;
992 par_exit_bb = region->exit;
993 ws_entry_bb = region->inner->entry;
994 ws_exit_bb = region->inner->exit;
996 if (single_succ (par_entry_bb) == ws_entry_bb
997 && single_succ (ws_exit_bb) == par_exit_bb
998 && workshare_safe_to_combine_p (ws_entry_bb)
999 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
1000 || (last_and_only_stmt (ws_entry_bb)
1001 && last_and_only_stmt (par_exit_bb))))
1003 gimple *par_stmt = last_stmt (par_entry_bb);
1004 gimple *ws_stmt = last_stmt (ws_entry_bb);
1006 if (region->inner->type == GIMPLE_OMP_FOR)
1008 /* If this is a combined parallel loop, we need to determine
1009 whether or not to use the combined library calls. There
1010 are two cases where we do not apply the transformation:
1011 static loops and any kind of ordered loop. In the first
1012 case, we already open code the loop so there is no need
1013 to do anything else. In the latter case, the combined
1014 parallel loop call would still need extra synchronization
1015 to implement ordered semantics, so there would not be any
1016 gain in using the combined call. */
1017 tree clauses = gimple_omp_for_clauses (ws_stmt);
1018 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
1019 if (c == NULL
1020 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
1021 == OMP_CLAUSE_SCHEDULE_STATIC)
1022 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
1024 region->is_combined_parallel = false;
1025 region->inner->is_combined_parallel = false;
1026 return;
1030 region->is_combined_parallel = true;
1031 region->inner->is_combined_parallel = true;
1032 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
1037 /* Return true if EXPR is variable sized. */
1039 static inline bool
1040 is_variable_sized (const_tree expr)
1042 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1045 /* Return true if DECL is a reference type. */
1047 static inline bool
1048 is_reference (tree decl)
1050 return lang_hooks.decls.omp_privatize_by_reference (decl);
1053 /* Return the type of a decl. If the decl is reference type,
1054 return its base type. */
1055 static inline tree
1056 get_base_type (tree decl)
1058 tree type = TREE_TYPE (decl);
1059 if (is_reference (decl))
1060 type = TREE_TYPE (type);
1061 return type;
1064 /* Lookup variables. The "maybe" form
1065 allows for the variable form to not have been entered, otherwise we
1066 assert that the variable must have been entered. */
1068 static inline tree
1069 lookup_decl (tree var, omp_context *ctx)
1071 tree *n = ctx->cb.decl_map->get (var);
1072 return *n;
1075 static inline tree
1076 maybe_lookup_decl (const_tree var, omp_context *ctx)
1078 tree *n = ctx->cb.decl_map->get (const_cast<tree> (var));
1079 return n ? *n : NULL_TREE;
1082 static inline tree
1083 lookup_field (tree var, omp_context *ctx)
1085 splay_tree_node n;
1086 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
1087 return (tree) n->value;
1090 static inline tree
1091 lookup_sfield (splay_tree_key key, omp_context *ctx)
1093 splay_tree_node n;
1094 n = splay_tree_lookup (ctx->sfield_map
1095 ? ctx->sfield_map : ctx->field_map, key);
1096 return (tree) n->value;
1099 static inline tree
1100 lookup_sfield (tree var, omp_context *ctx)
1102 return lookup_sfield ((splay_tree_key) var, ctx);
1105 static inline tree
1106 maybe_lookup_field (splay_tree_key key, omp_context *ctx)
1108 splay_tree_node n;
1109 n = splay_tree_lookup (ctx->field_map, key);
1110 return n ? (tree) n->value : NULL_TREE;
1113 static inline tree
1114 maybe_lookup_field (tree var, omp_context *ctx)
1116 return maybe_lookup_field ((splay_tree_key) var, ctx);
1119 /* Return true if DECL should be copied by pointer. SHARED_CTX is
1120 the parallel context if DECL is to be shared. */
1122 static bool
1123 use_pointer_for_field (tree decl, omp_context *shared_ctx)
1125 if (AGGREGATE_TYPE_P (TREE_TYPE (decl))
1126 || TYPE_ATOMIC (TREE_TYPE (decl)))
1127 return true;
1129 /* We can only use copy-in/copy-out semantics for shared variables
1130 when we know the value is not accessible from an outer scope. */
1131 if (shared_ctx)
1133 gcc_assert (!is_gimple_omp_oacc (shared_ctx->stmt));
1135 /* ??? Trivially accessible from anywhere. But why would we even
1136 be passing an address in this case? Should we simply assert
1137 this to be false, or should we have a cleanup pass that removes
1138 these from the list of mappings? */
1139 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
1140 return true;
1142 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
1143 without analyzing the expression whether or not its location
1144 is accessible to anyone else. In the case of nested parallel
1145 regions it certainly may be. */
1146 if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
1147 return true;
1149 /* Do not use copy-in/copy-out for variables that have their
1150 address taken. */
1151 if (TREE_ADDRESSABLE (decl))
1152 return true;
1154 /* lower_send_shared_vars only uses copy-in, but not copy-out
1155 for these. */
1156 if (TREE_READONLY (decl)
1157 || ((TREE_CODE (decl) == RESULT_DECL
1158 || TREE_CODE (decl) == PARM_DECL)
1159 && DECL_BY_REFERENCE (decl)))
1160 return false;
1162 /* Disallow copy-in/out in nested parallel if
1163 decl is shared in outer parallel, otherwise
1164 each thread could store the shared variable
1165 in its own copy-in location, making the
1166 variable no longer really shared. */
1167 if (shared_ctx->is_nested)
1169 omp_context *up;
1171 for (up = shared_ctx->outer; up; up = up->outer)
1172 if (is_taskreg_ctx (up) && maybe_lookup_decl (decl, up))
1173 break;
1175 if (up)
1177 tree c;
1179 for (c = gimple_omp_taskreg_clauses (up->stmt);
1180 c; c = OMP_CLAUSE_CHAIN (c))
1181 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
1182 && OMP_CLAUSE_DECL (c) == decl)
1183 break;
1185 if (c)
1186 goto maybe_mark_addressable_and_ret;
1190 /* For tasks avoid using copy-in/out. As tasks can be
1191 deferred or executed in different thread, when GOMP_task
1192 returns, the task hasn't necessarily terminated. */
1193 if (is_task_ctx (shared_ctx))
1195 tree outer;
1196 maybe_mark_addressable_and_ret:
1197 outer = maybe_lookup_decl_in_outer_ctx (decl, shared_ctx);
1198 if (is_gimple_reg (outer) && !omp_member_access_dummy_var (outer))
1200 /* Taking address of OUTER in lower_send_shared_vars
1201 might need regimplification of everything that uses the
1202 variable. */
1203 if (!task_shared_vars)
1204 task_shared_vars = BITMAP_ALLOC (NULL);
1205 bitmap_set_bit (task_shared_vars, DECL_UID (outer));
1206 TREE_ADDRESSABLE (outer) = 1;
1208 return true;
1212 return false;
1215 /* Construct a new automatic decl similar to VAR. */
1217 static tree
1218 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
1220 tree copy = copy_var_decl (var, name, type);
1222 DECL_CONTEXT (copy) = current_function_decl;
1223 DECL_CHAIN (copy) = ctx->block_vars;
1224 /* If VAR is listed in task_shared_vars, it means it wasn't
1225 originally addressable and is just because task needs to take
1226 it's address. But we don't need to take address of privatizations
1227 from that var. */
1228 if (TREE_ADDRESSABLE (var)
1229 && task_shared_vars
1230 && bitmap_bit_p (task_shared_vars, DECL_UID (var)))
1231 TREE_ADDRESSABLE (copy) = 0;
1232 ctx->block_vars = copy;
1234 return copy;
1237 static tree
1238 omp_copy_decl_1 (tree var, omp_context *ctx)
1240 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
1243 /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
1244 as appropriate. */
1245 static tree
1246 omp_build_component_ref (tree obj, tree field)
1248 tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
1249 if (TREE_THIS_VOLATILE (field))
1250 TREE_THIS_VOLATILE (ret) |= 1;
1251 if (TREE_READONLY (field))
1252 TREE_READONLY (ret) |= 1;
1253 return ret;
1256 /* Build tree nodes to access the field for VAR on the receiver side. */
1258 static tree
1259 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
1261 tree x, field = lookup_field (var, ctx);
1263 /* If the receiver record type was remapped in the child function,
1264 remap the field into the new record type. */
1265 x = maybe_lookup_field (field, ctx);
1266 if (x != NULL)
1267 field = x;
1269 x = build_simple_mem_ref (ctx->receiver_decl);
1270 TREE_THIS_NOTRAP (x) = 1;
1271 x = omp_build_component_ref (x, field);
1272 if (by_ref)
1274 x = build_simple_mem_ref (x);
1275 TREE_THIS_NOTRAP (x) = 1;
1278 return x;
1281 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
1282 of a parallel, this is a component reference; for workshare constructs
1283 this is some variable. */
1285 static tree
1286 build_outer_var_ref (tree var, omp_context *ctx,
1287 enum omp_clause_code code = OMP_CLAUSE_ERROR)
1289 tree x;
1291 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
1292 x = var;
1293 else if (is_variable_sized (var))
1295 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
1296 x = build_outer_var_ref (x, ctx, code);
1297 x = build_simple_mem_ref (x);
1299 else if (is_taskreg_ctx (ctx))
1301 bool by_ref = use_pointer_for_field (var, NULL);
1302 x = build_receiver_ref (var, by_ref, ctx);
1304 else if ((gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
1305 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
1306 || (code == OMP_CLAUSE_PRIVATE
1307 && (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
1308 || gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS
1309 || gimple_code (ctx->stmt) == GIMPLE_OMP_SINGLE)))
1311 /* #pragma omp simd isn't a worksharing construct, and can reference
1312 even private vars in its linear etc. clauses.
1313 Similarly for OMP_CLAUSE_PRIVATE with outer ref, that can refer
1314 to private vars in all worksharing constructs. */
1315 x = NULL_TREE;
1316 if (ctx->outer && is_taskreg_ctx (ctx))
1317 x = lookup_decl (var, ctx->outer);
1318 else if (ctx->outer)
1319 x = maybe_lookup_decl_in_outer_ctx (var, ctx);
1320 if (x == NULL_TREE)
1321 x = var;
1323 else if (code == OMP_CLAUSE_LASTPRIVATE && is_taskloop_ctx (ctx))
1325 gcc_assert (ctx->outer);
1326 splay_tree_node n
1327 = splay_tree_lookup (ctx->outer->field_map,
1328 (splay_tree_key) &DECL_UID (var));
1329 if (n == NULL)
1331 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
1332 x = var;
1333 else
1334 x = lookup_decl (var, ctx->outer);
1336 else
1338 tree field = (tree) n->value;
1339 /* If the receiver record type was remapped in the child function,
1340 remap the field into the new record type. */
1341 x = maybe_lookup_field (field, ctx->outer);
1342 if (x != NULL)
1343 field = x;
1345 x = build_simple_mem_ref (ctx->outer->receiver_decl);
1346 x = omp_build_component_ref (x, field);
1347 if (use_pointer_for_field (var, ctx->outer))
1348 x = build_simple_mem_ref (x);
1351 else if (ctx->outer)
1353 omp_context *outer = ctx->outer;
1354 if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY)
1356 outer = outer->outer;
1357 gcc_assert (outer
1358 && gimple_code (outer->stmt) != GIMPLE_OMP_GRID_BODY);
1360 x = lookup_decl (var, outer);
1362 else if (is_reference (var))
1363 /* This can happen with orphaned constructs. If var is reference, it is
1364 possible it is shared and as such valid. */
1365 x = var;
1366 else if (omp_member_access_dummy_var (var))
1367 x = var;
1368 else
1369 gcc_unreachable ();
1371 if (x == var)
1373 tree t = omp_member_access_dummy_var (var);
1374 if (t)
1376 x = DECL_VALUE_EXPR (var);
1377 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx);
1378 if (o != t)
1379 x = unshare_and_remap (x, t, o);
1380 else
1381 x = unshare_expr (x);
1385 if (is_reference (var))
1386 x = build_simple_mem_ref (x);
1388 return x;
1391 /* Build tree nodes to access the field for VAR on the sender side. */
1393 static tree
1394 build_sender_ref (splay_tree_key key, omp_context *ctx)
1396 tree field = lookup_sfield (key, ctx);
1397 return omp_build_component_ref (ctx->sender_decl, field);
1400 static tree
1401 build_sender_ref (tree var, omp_context *ctx)
1403 return build_sender_ref ((splay_tree_key) var, ctx);
1406 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. If
1407 BASE_POINTERS_RESTRICT, declare the field with restrict. */
1409 static void
1410 install_var_field (tree var, bool by_ref, int mask, omp_context *ctx,
1411 bool base_pointers_restrict = false)
1413 tree field, type, sfield = NULL_TREE;
1414 splay_tree_key key = (splay_tree_key) var;
1416 if ((mask & 8) != 0)
1418 key = (splay_tree_key) &DECL_UID (var);
1419 gcc_checking_assert (key != (splay_tree_key) var);
1421 gcc_assert ((mask & 1) == 0
1422 || !splay_tree_lookup (ctx->field_map, key));
1423 gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
1424 || !splay_tree_lookup (ctx->sfield_map, key));
1425 gcc_assert ((mask & 3) == 3
1426 || !is_gimple_omp_oacc (ctx->stmt));
1428 type = TREE_TYPE (var);
1429 /* Prevent redeclaring the var in the split-off function with a restrict
1430 pointer type. Note that we only clear type itself, restrict qualifiers in
1431 the pointed-to type will be ignored by points-to analysis. */
1432 if (POINTER_TYPE_P (type)
1433 && TYPE_RESTRICT (type))
1434 type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
1436 if (mask & 4)
1438 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1439 type = build_pointer_type (build_pointer_type (type));
1441 else if (by_ref)
1443 type = build_pointer_type (type);
1444 if (base_pointers_restrict)
1445 type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
1447 else if ((mask & 3) == 1 && is_reference (var))
1448 type = TREE_TYPE (type);
1450 field = build_decl (DECL_SOURCE_LOCATION (var),
1451 FIELD_DECL, DECL_NAME (var), type);
1453 /* Remember what variable this field was created for. This does have a
1454 side effect of making dwarf2out ignore this member, so for helpful
1455 debugging we clear it later in delete_omp_context. */
1456 DECL_ABSTRACT_ORIGIN (field) = var;
1457 if (type == TREE_TYPE (var))
1459 SET_DECL_ALIGN (field, DECL_ALIGN (var));
1460 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1461 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1463 else
1464 SET_DECL_ALIGN (field, TYPE_ALIGN (type));
1466 if ((mask & 3) == 3)
1468 insert_field_into_struct (ctx->record_type, field);
1469 if (ctx->srecord_type)
1471 sfield = build_decl (DECL_SOURCE_LOCATION (var),
1472 FIELD_DECL, DECL_NAME (var), type);
1473 DECL_ABSTRACT_ORIGIN (sfield) = var;
1474 SET_DECL_ALIGN (sfield, DECL_ALIGN (field));
1475 DECL_USER_ALIGN (sfield) = DECL_USER_ALIGN (field);
1476 TREE_THIS_VOLATILE (sfield) = TREE_THIS_VOLATILE (field);
1477 insert_field_into_struct (ctx->srecord_type, sfield);
1480 else
1482 if (ctx->srecord_type == NULL_TREE)
1484 tree t;
1486 ctx->srecord_type = lang_hooks.types.make_type (RECORD_TYPE);
1487 ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1488 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
1490 sfield = build_decl (DECL_SOURCE_LOCATION (t),
1491 FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
1492 DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
1493 insert_field_into_struct (ctx->srecord_type, sfield);
1494 splay_tree_insert (ctx->sfield_map,
1495 (splay_tree_key) DECL_ABSTRACT_ORIGIN (t),
1496 (splay_tree_value) sfield);
1499 sfield = field;
1500 insert_field_into_struct ((mask & 1) ? ctx->record_type
1501 : ctx->srecord_type, field);
1504 if (mask & 1)
1505 splay_tree_insert (ctx->field_map, key, (splay_tree_value) field);
1506 if ((mask & 2) && ctx->sfield_map)
1507 splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield);
1510 static tree
1511 install_var_local (tree var, omp_context *ctx)
1513 tree new_var = omp_copy_decl_1 (var, ctx);
1514 insert_decl_map (&ctx->cb, var, new_var);
1515 return new_var;
1518 /* Adjust the replacement for DECL in CTX for the new context. This means
1519 copying the DECL_VALUE_EXPR, and fixing up the type. */
1521 static void
1522 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
1524 tree new_decl, size;
1526 new_decl = lookup_decl (decl, ctx);
1528 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
1530 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
1531 && DECL_HAS_VALUE_EXPR_P (decl))
1533 tree ve = DECL_VALUE_EXPR (decl);
1534 walk_tree (&ve, copy_tree_body_r, &ctx->cb, NULL);
1535 SET_DECL_VALUE_EXPR (new_decl, ve);
1536 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
1539 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
1541 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
1542 if (size == error_mark_node)
1543 size = TYPE_SIZE (TREE_TYPE (new_decl));
1544 DECL_SIZE (new_decl) = size;
1546 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
1547 if (size == error_mark_node)
1548 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
1549 DECL_SIZE_UNIT (new_decl) = size;
1553 /* The callback for remap_decl. Search all containing contexts for a
1554 mapping of the variable; this avoids having to duplicate the splay
1555 tree ahead of time. We know a mapping doesn't already exist in the
1556 given context. Create new mappings to implement default semantics. */
1558 static tree
1559 omp_copy_decl (tree var, copy_body_data *cb)
1561 omp_context *ctx = (omp_context *) cb;
1562 tree new_var;
1564 if (TREE_CODE (var) == LABEL_DECL)
1566 new_var = create_artificial_label (DECL_SOURCE_LOCATION (var));
1567 DECL_CONTEXT (new_var) = current_function_decl;
1568 insert_decl_map (&ctx->cb, var, new_var);
1569 return new_var;
1572 while (!is_taskreg_ctx (ctx))
1574 ctx = ctx->outer;
1575 if (ctx == NULL)
1576 return var;
1577 new_var = maybe_lookup_decl (var, ctx);
1578 if (new_var)
1579 return new_var;
1582 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
1583 return var;
1585 return error_mark_node;
1589 /* Debugging dumps for parallel regions. */
1590 void dump_omp_region (FILE *, struct omp_region *, int);
1591 void debug_omp_region (struct omp_region *);
1592 void debug_all_omp_regions (void);
1594 /* Dump the parallel region tree rooted at REGION. */
1596 void
1597 dump_omp_region (FILE *file, struct omp_region *region, int indent)
1599 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
1600 gimple_code_name[region->type]);
1602 if (region->inner)
1603 dump_omp_region (file, region->inner, indent + 4);
1605 if (region->cont)
1607 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
1608 region->cont->index);
1611 if (region->exit)
1612 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
1613 region->exit->index);
1614 else
1615 fprintf (file, "%*s[no exit marker]\n", indent, "");
1617 if (region->next)
1618 dump_omp_region (file, region->next, indent);
1621 DEBUG_FUNCTION void
1622 debug_omp_region (struct omp_region *region)
1624 dump_omp_region (stderr, region, 0);
1627 DEBUG_FUNCTION void
1628 debug_all_omp_regions (void)
1630 dump_omp_region (stderr, root_omp_region, 0);
1634 /* Create a new parallel region starting at STMT inside region PARENT. */
1636 static struct omp_region *
1637 new_omp_region (basic_block bb, enum gimple_code type,
1638 struct omp_region *parent)
1640 struct omp_region *region = XCNEW (struct omp_region);
1642 region->outer = parent;
1643 region->entry = bb;
1644 region->type = type;
1646 if (parent)
1648 /* This is a nested region. Add it to the list of inner
1649 regions in PARENT. */
1650 region->next = parent->inner;
1651 parent->inner = region;
1653 else
1655 /* This is a toplevel region. Add it to the list of toplevel
1656 regions in ROOT_OMP_REGION. */
1657 region->next = root_omp_region;
1658 root_omp_region = region;
1661 return region;
1664 /* Release the memory associated with the region tree rooted at REGION. */
1666 static void
1667 free_omp_region_1 (struct omp_region *region)
1669 struct omp_region *i, *n;
1671 for (i = region->inner; i ; i = n)
1673 n = i->next;
1674 free_omp_region_1 (i);
1677 free (region);
1680 /* Release the memory for the entire omp region tree. */
1682 void
1683 free_omp_regions (void)
1685 struct omp_region *r, *n;
1686 for (r = root_omp_region; r ; r = n)
1688 n = r->next;
1689 free_omp_region_1 (r);
1691 root_omp_region = NULL;
1695 /* Create a new context, with OUTER_CTX being the surrounding context. */
1697 static omp_context *
1698 new_omp_context (gimple *stmt, omp_context *outer_ctx)
1700 omp_context *ctx = XCNEW (omp_context);
1702 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
1703 (splay_tree_value) ctx);
1704 ctx->stmt = stmt;
1706 if (outer_ctx)
1708 ctx->outer = outer_ctx;
1709 ctx->cb = outer_ctx->cb;
1710 ctx->cb.block = NULL;
1711 ctx->depth = outer_ctx->depth + 1;
1713 else
1715 ctx->cb.src_fn = current_function_decl;
1716 ctx->cb.dst_fn = current_function_decl;
1717 ctx->cb.src_node = cgraph_node::get (current_function_decl);
1718 gcc_checking_assert (ctx->cb.src_node);
1719 ctx->cb.dst_node = ctx->cb.src_node;
1720 ctx->cb.src_cfun = cfun;
1721 ctx->cb.copy_decl = omp_copy_decl;
1722 ctx->cb.eh_lp_nr = 0;
1723 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
1724 ctx->depth = 1;
1727 ctx->cb.decl_map = new hash_map<tree, tree>;
1729 return ctx;
1732 static gimple_seq maybe_catch_exception (gimple_seq);
1734 /* Finalize task copyfn. */
1736 static void
1737 finalize_task_copyfn (gomp_task *task_stmt)
1739 struct function *child_cfun;
1740 tree child_fn;
1741 gimple_seq seq = NULL, new_seq;
1742 gbind *bind;
1744 child_fn = gimple_omp_task_copy_fn (task_stmt);
1745 if (child_fn == NULL_TREE)
1746 return;
1748 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1749 DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
1751 push_cfun (child_cfun);
1752 bind = gimplify_body (child_fn, false);
1753 gimple_seq_add_stmt (&seq, bind);
1754 new_seq = maybe_catch_exception (seq);
1755 if (new_seq != seq)
1757 bind = gimple_build_bind (NULL, new_seq, NULL);
1758 seq = NULL;
1759 gimple_seq_add_stmt (&seq, bind);
1761 gimple_set_body (child_fn, seq);
1762 pop_cfun ();
1764 /* Inform the callgraph about the new function. */
1765 cgraph_node *node = cgraph_node::get_create (child_fn);
1766 node->parallelized_function = 1;
1767 cgraph_node::add_new_function (child_fn, false);
1770 /* Destroy a omp_context data structures. Called through the splay tree
1771 value delete callback. */
1773 static void
1774 delete_omp_context (splay_tree_value value)
1776 omp_context *ctx = (omp_context *) value;
1778 delete ctx->cb.decl_map;
1780 if (ctx->field_map)
1781 splay_tree_delete (ctx->field_map);
1782 if (ctx->sfield_map)
1783 splay_tree_delete (ctx->sfield_map);
1785 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
1786 it produces corrupt debug information. */
1787 if (ctx->record_type)
1789 tree t;
1790 for (t = TYPE_FIELDS (ctx->record_type); t ; t = DECL_CHAIN (t))
1791 DECL_ABSTRACT_ORIGIN (t) = NULL;
1793 if (ctx->srecord_type)
1795 tree t;
1796 for (t = TYPE_FIELDS (ctx->srecord_type); t ; t = DECL_CHAIN (t))
1797 DECL_ABSTRACT_ORIGIN (t) = NULL;
1800 if (is_task_ctx (ctx))
1801 finalize_task_copyfn (as_a <gomp_task *> (ctx->stmt));
1803 XDELETE (ctx);
1806 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
1807 context. */
1809 static void
1810 fixup_child_record_type (omp_context *ctx)
1812 tree f, type = ctx->record_type;
1814 if (!ctx->receiver_decl)
1815 return;
1816 /* ??? It isn't sufficient to just call remap_type here, because
1817 variably_modified_type_p doesn't work the way we expect for
1818 record types. Testing each field for whether it needs remapping
1819 and creating a new record by hand works, however. */
1820 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
1821 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
1822 break;
1823 if (f)
1825 tree name, new_fields = NULL;
1827 type = lang_hooks.types.make_type (RECORD_TYPE);
1828 name = DECL_NAME (TYPE_NAME (ctx->record_type));
1829 name = build_decl (DECL_SOURCE_LOCATION (ctx->receiver_decl),
1830 TYPE_DECL, name, type);
1831 TYPE_NAME (type) = name;
1833 for (f = TYPE_FIELDS (ctx->record_type); f ; f = DECL_CHAIN (f))
1835 tree new_f = copy_node (f);
1836 DECL_CONTEXT (new_f) = type;
1837 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
1838 DECL_CHAIN (new_f) = new_fields;
1839 walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &ctx->cb, NULL);
1840 walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r,
1841 &ctx->cb, NULL);
1842 walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
1843 &ctx->cb, NULL);
1844 new_fields = new_f;
1846 /* Arrange to be able to look up the receiver field
1847 given the sender field. */
1848 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
1849 (splay_tree_value) new_f);
1851 TYPE_FIELDS (type) = nreverse (new_fields);
1852 layout_type (type);
1855 /* In a target region we never modify any of the pointers in *.omp_data_i,
1856 so attempt to help the optimizers. */
1857 if (is_gimple_omp_offloaded (ctx->stmt))
1858 type = build_qualified_type (type, TYPE_QUAL_CONST);
1860 TREE_TYPE (ctx->receiver_decl)
1861 = build_qualified_type (build_reference_type (type), TYPE_QUAL_RESTRICT);
1864 /* Instantiate decls as necessary in CTX to satisfy the data sharing
1865 specified by CLAUSES. If BASE_POINTERS_RESTRICT, install var field with
1866 restrict. */
1868 static void
1869 scan_sharing_clauses (tree clauses, omp_context *ctx,
1870 bool base_pointers_restrict = false)
1872 tree c, decl;
1873 bool scan_array_reductions = false;
1875 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1877 bool by_ref;
1879 switch (OMP_CLAUSE_CODE (c))
1881 case OMP_CLAUSE_PRIVATE:
1882 decl = OMP_CLAUSE_DECL (c);
1883 if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
1884 goto do_private;
1885 else if (!is_variable_sized (decl))
1886 install_var_local (decl, ctx);
1887 break;
1889 case OMP_CLAUSE_SHARED:
1890 decl = OMP_CLAUSE_DECL (c);
1891 /* Ignore shared directives in teams construct. */
1892 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
1894 /* Global variables don't need to be copied,
1895 the receiver side will use them directly. */
1896 tree odecl = maybe_lookup_decl_in_outer_ctx (decl, ctx);
1897 if (is_global_var (odecl))
1898 break;
1899 insert_decl_map (&ctx->cb, decl, odecl);
1900 break;
1902 gcc_assert (is_taskreg_ctx (ctx));
1903 gcc_assert (!COMPLETE_TYPE_P (TREE_TYPE (decl))
1904 || !is_variable_sized (decl));
1905 /* Global variables don't need to be copied,
1906 the receiver side will use them directly. */
1907 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
1908 break;
1909 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
1911 use_pointer_for_field (decl, ctx);
1912 break;
1914 by_ref = use_pointer_for_field (decl, NULL);
1915 if ((! TREE_READONLY (decl) && !OMP_CLAUSE_SHARED_READONLY (c))
1916 || TREE_ADDRESSABLE (decl)
1917 || by_ref
1918 || is_reference (decl))
1920 by_ref = use_pointer_for_field (decl, ctx);
1921 install_var_field (decl, by_ref, 3, ctx);
1922 install_var_local (decl, ctx);
1923 break;
1925 /* We don't need to copy const scalar vars back. */
1926 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
1927 goto do_private;
1929 case OMP_CLAUSE_REDUCTION:
1930 decl = OMP_CLAUSE_DECL (c);
1931 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1932 && TREE_CODE (decl) == MEM_REF)
1934 tree t = TREE_OPERAND (decl, 0);
1935 if (TREE_CODE (t) == POINTER_PLUS_EXPR)
1936 t = TREE_OPERAND (t, 0);
1937 if (TREE_CODE (t) == INDIRECT_REF
1938 || TREE_CODE (t) == ADDR_EXPR)
1939 t = TREE_OPERAND (t, 0);
1940 install_var_local (t, ctx);
1941 if (is_taskreg_ctx (ctx)
1942 && !is_global_var (maybe_lookup_decl_in_outer_ctx (t, ctx))
1943 && !is_variable_sized (t))
1945 by_ref = use_pointer_for_field (t, ctx);
1946 install_var_field (t, by_ref, 3, ctx);
1948 break;
1950 goto do_private;
1952 case OMP_CLAUSE_LASTPRIVATE:
1953 /* Let the corresponding firstprivate clause create
1954 the variable. */
1955 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1956 break;
1957 /* FALLTHRU */
1959 case OMP_CLAUSE_FIRSTPRIVATE:
1960 case OMP_CLAUSE_LINEAR:
1961 decl = OMP_CLAUSE_DECL (c);
1962 do_private:
1963 if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
1964 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
1965 && is_gimple_omp_offloaded (ctx->stmt))
1967 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
1968 install_var_field (decl, !is_reference (decl), 3, ctx);
1969 else if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
1970 install_var_field (decl, true, 3, ctx);
1971 else
1972 install_var_field (decl, false, 3, ctx);
1974 if (is_variable_sized (decl))
1976 if (is_task_ctx (ctx))
1977 install_var_field (decl, false, 1, ctx);
1978 break;
1980 else if (is_taskreg_ctx (ctx))
1982 bool global
1983 = is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx));
1984 by_ref = use_pointer_for_field (decl, NULL);
1986 if (is_task_ctx (ctx)
1987 && (global || by_ref || is_reference (decl)))
1989 install_var_field (decl, false, 1, ctx);
1990 if (!global)
1991 install_var_field (decl, by_ref, 2, ctx);
1993 else if (!global)
1994 install_var_field (decl, by_ref, 3, ctx);
1996 install_var_local (decl, ctx);
1997 break;
1999 case OMP_CLAUSE_USE_DEVICE_PTR:
2000 decl = OMP_CLAUSE_DECL (c);
2001 if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2002 install_var_field (decl, true, 3, ctx);
2003 else
2004 install_var_field (decl, false, 3, ctx);
2005 if (DECL_SIZE (decl)
2006 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2008 tree decl2 = DECL_VALUE_EXPR (decl);
2009 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2010 decl2 = TREE_OPERAND (decl2, 0);
2011 gcc_assert (DECL_P (decl2));
2012 install_var_local (decl2, ctx);
2014 install_var_local (decl, ctx);
2015 break;
2017 case OMP_CLAUSE_IS_DEVICE_PTR:
2018 decl = OMP_CLAUSE_DECL (c);
2019 goto do_private;
2021 case OMP_CLAUSE__LOOPTEMP_:
2022 gcc_assert (is_taskreg_ctx (ctx));
2023 decl = OMP_CLAUSE_DECL (c);
2024 install_var_field (decl, false, 3, ctx);
2025 install_var_local (decl, ctx);
2026 break;
2028 case OMP_CLAUSE_COPYPRIVATE:
2029 case OMP_CLAUSE_COPYIN:
2030 decl = OMP_CLAUSE_DECL (c);
2031 by_ref = use_pointer_for_field (decl, NULL);
2032 install_var_field (decl, by_ref, 3, ctx);
2033 break;
2035 case OMP_CLAUSE_DEFAULT:
2036 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
2037 break;
2039 case OMP_CLAUSE_FINAL:
2040 case OMP_CLAUSE_IF:
2041 case OMP_CLAUSE_NUM_THREADS:
2042 case OMP_CLAUSE_NUM_TEAMS:
2043 case OMP_CLAUSE_THREAD_LIMIT:
2044 case OMP_CLAUSE_DEVICE:
2045 case OMP_CLAUSE_SCHEDULE:
2046 case OMP_CLAUSE_DIST_SCHEDULE:
2047 case OMP_CLAUSE_DEPEND:
2048 case OMP_CLAUSE_PRIORITY:
2049 case OMP_CLAUSE_GRAINSIZE:
2050 case OMP_CLAUSE_NUM_TASKS:
2051 case OMP_CLAUSE__CILK_FOR_COUNT_:
2052 case OMP_CLAUSE_NUM_GANGS:
2053 case OMP_CLAUSE_NUM_WORKERS:
2054 case OMP_CLAUSE_VECTOR_LENGTH:
2055 if (ctx->outer)
2056 scan_omp_op (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
2057 break;
2059 case OMP_CLAUSE_TO:
2060 case OMP_CLAUSE_FROM:
2061 case OMP_CLAUSE_MAP:
2062 if (ctx->outer)
2063 scan_omp_op (&OMP_CLAUSE_SIZE (c), ctx->outer);
2064 decl = OMP_CLAUSE_DECL (c);
2065 /* Global variables with "omp declare target" attribute
2066 don't need to be copied, the receiver side will use them
2067 directly. However, global variables with "omp declare target link"
2068 attribute need to be copied. */
2069 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2070 && DECL_P (decl)
2071 && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
2072 && (OMP_CLAUSE_MAP_KIND (c)
2073 != GOMP_MAP_FIRSTPRIVATE_REFERENCE))
2074 || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2075 && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
2076 && varpool_node::get_create (decl)->offloadable
2077 && !lookup_attribute ("omp declare target link",
2078 DECL_ATTRIBUTES (decl)))
2079 break;
2080 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2081 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER)
2083 /* Ignore GOMP_MAP_POINTER kind for arrays in regions that are
2084 not offloaded; there is nothing to map for those. */
2085 if (!is_gimple_omp_offloaded (ctx->stmt)
2086 && !POINTER_TYPE_P (TREE_TYPE (decl))
2087 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
2088 break;
2090 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2091 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
2092 || (OMP_CLAUSE_MAP_KIND (c)
2093 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
2095 if (TREE_CODE (decl) == COMPONENT_REF
2096 || (TREE_CODE (decl) == INDIRECT_REF
2097 && TREE_CODE (TREE_OPERAND (decl, 0)) == COMPONENT_REF
2098 && (TREE_CODE (TREE_TYPE (TREE_OPERAND (decl, 0)))
2099 == REFERENCE_TYPE)))
2100 break;
2101 if (DECL_SIZE (decl)
2102 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2104 tree decl2 = DECL_VALUE_EXPR (decl);
2105 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2106 decl2 = TREE_OPERAND (decl2, 0);
2107 gcc_assert (DECL_P (decl2));
2108 install_var_local (decl2, ctx);
2110 install_var_local (decl, ctx);
2111 break;
2113 if (DECL_P (decl))
2115 if (DECL_SIZE (decl)
2116 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2118 tree decl2 = DECL_VALUE_EXPR (decl);
2119 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2120 decl2 = TREE_OPERAND (decl2, 0);
2121 gcc_assert (DECL_P (decl2));
2122 install_var_field (decl2, true, 3, ctx);
2123 install_var_local (decl2, ctx);
2124 install_var_local (decl, ctx);
2126 else
2128 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2129 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
2130 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
2131 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2132 install_var_field (decl, true, 7, ctx);
2133 else
2134 install_var_field (decl, true, 3, ctx,
2135 base_pointers_restrict);
2136 if (is_gimple_omp_offloaded (ctx->stmt)
2137 && !OMP_CLAUSE_MAP_IN_REDUCTION (c))
2138 install_var_local (decl, ctx);
2141 else
2143 tree base = get_base_address (decl);
2144 tree nc = OMP_CLAUSE_CHAIN (c);
2145 if (DECL_P (base)
2146 && nc != NULL_TREE
2147 && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP
2148 && OMP_CLAUSE_DECL (nc) == base
2149 && OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_POINTER
2150 && integer_zerop (OMP_CLAUSE_SIZE (nc)))
2152 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) = 1;
2153 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (nc) = 1;
2155 else
2157 if (ctx->outer)
2159 scan_omp_op (&OMP_CLAUSE_DECL (c), ctx->outer);
2160 decl = OMP_CLAUSE_DECL (c);
2162 gcc_assert (!splay_tree_lookup (ctx->field_map,
2163 (splay_tree_key) decl));
2164 tree field
2165 = build_decl (OMP_CLAUSE_LOCATION (c),
2166 FIELD_DECL, NULL_TREE, ptr_type_node);
2167 SET_DECL_ALIGN (field, TYPE_ALIGN (ptr_type_node));
2168 insert_field_into_struct (ctx->record_type, field);
2169 splay_tree_insert (ctx->field_map, (splay_tree_key) decl,
2170 (splay_tree_value) field);
2173 break;
2175 case OMP_CLAUSE__GRIDDIM_:
2176 if (ctx->outer)
2178 scan_omp_op (&OMP_CLAUSE__GRIDDIM__SIZE (c), ctx->outer);
2179 scan_omp_op (&OMP_CLAUSE__GRIDDIM__GROUP (c), ctx->outer);
2181 break;
2183 case OMP_CLAUSE_NOWAIT:
2184 case OMP_CLAUSE_ORDERED:
2185 case OMP_CLAUSE_COLLAPSE:
2186 case OMP_CLAUSE_UNTIED:
2187 case OMP_CLAUSE_MERGEABLE:
2188 case OMP_CLAUSE_PROC_BIND:
2189 case OMP_CLAUSE_SAFELEN:
2190 case OMP_CLAUSE_SIMDLEN:
2191 case OMP_CLAUSE_THREADS:
2192 case OMP_CLAUSE_SIMD:
2193 case OMP_CLAUSE_NOGROUP:
2194 case OMP_CLAUSE_DEFAULTMAP:
2195 case OMP_CLAUSE_ASYNC:
2196 case OMP_CLAUSE_WAIT:
2197 case OMP_CLAUSE_GANG:
2198 case OMP_CLAUSE_WORKER:
2199 case OMP_CLAUSE_VECTOR:
2200 case OMP_CLAUSE_INDEPENDENT:
2201 case OMP_CLAUSE_AUTO:
2202 case OMP_CLAUSE_SEQ:
2203 case OMP_CLAUSE__SIMT_:
2204 break;
2206 case OMP_CLAUSE_ALIGNED:
2207 decl = OMP_CLAUSE_DECL (c);
2208 if (is_global_var (decl)
2209 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2210 install_var_local (decl, ctx);
2211 break;
2213 case OMP_CLAUSE_TILE:
2214 case OMP_CLAUSE__CACHE_:
2215 default:
2216 gcc_unreachable ();
2220 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
2222 switch (OMP_CLAUSE_CODE (c))
2224 case OMP_CLAUSE_LASTPRIVATE:
2225 /* Let the corresponding firstprivate clause create
2226 the variable. */
2227 if (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
2228 scan_array_reductions = true;
2229 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
2230 break;
2231 /* FALLTHRU */
2233 case OMP_CLAUSE_FIRSTPRIVATE:
2234 case OMP_CLAUSE_PRIVATE:
2235 case OMP_CLAUSE_LINEAR:
2236 case OMP_CLAUSE_IS_DEVICE_PTR:
2237 decl = OMP_CLAUSE_DECL (c);
2238 if (is_variable_sized (decl))
2240 if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
2241 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
2242 && is_gimple_omp_offloaded (ctx->stmt))
2244 tree decl2 = DECL_VALUE_EXPR (decl);
2245 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2246 decl2 = TREE_OPERAND (decl2, 0);
2247 gcc_assert (DECL_P (decl2));
2248 install_var_local (decl2, ctx);
2249 fixup_remapped_decl (decl2, ctx, false);
2251 install_var_local (decl, ctx);
2253 fixup_remapped_decl (decl, ctx,
2254 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
2255 && OMP_CLAUSE_PRIVATE_DEBUG (c));
2256 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2257 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
2258 scan_array_reductions = true;
2259 break;
2261 case OMP_CLAUSE_REDUCTION:
2262 decl = OMP_CLAUSE_DECL (c);
2263 if (TREE_CODE (decl) != MEM_REF)
2265 if (is_variable_sized (decl))
2266 install_var_local (decl, ctx);
2267 fixup_remapped_decl (decl, ctx, false);
2269 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
2270 scan_array_reductions = true;
2271 break;
2273 case OMP_CLAUSE_SHARED:
2274 /* Ignore shared directives in teams construct. */
2275 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
2276 break;
2277 decl = OMP_CLAUSE_DECL (c);
2278 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
2279 break;
2280 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
2282 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
2283 ctx->outer)))
2284 break;
2285 bool by_ref = use_pointer_for_field (decl, ctx);
2286 install_var_field (decl, by_ref, 11, ctx);
2287 break;
2289 fixup_remapped_decl (decl, ctx, false);
2290 break;
2292 case OMP_CLAUSE_MAP:
2293 if (!is_gimple_omp_offloaded (ctx->stmt))
2294 break;
2295 decl = OMP_CLAUSE_DECL (c);
2296 if (DECL_P (decl)
2297 && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
2298 && (OMP_CLAUSE_MAP_KIND (c)
2299 != GOMP_MAP_FIRSTPRIVATE_REFERENCE))
2300 || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2301 && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
2302 && varpool_node::get_create (decl)->offloadable)
2303 break;
2304 if (DECL_P (decl))
2306 if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
2307 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
2308 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE
2309 && !COMPLETE_TYPE_P (TREE_TYPE (decl)))
2311 tree new_decl = lookup_decl (decl, ctx);
2312 TREE_TYPE (new_decl)
2313 = remap_type (TREE_TYPE (decl), &ctx->cb);
2315 else if (DECL_SIZE (decl)
2316 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2318 tree decl2 = DECL_VALUE_EXPR (decl);
2319 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2320 decl2 = TREE_OPERAND (decl2, 0);
2321 gcc_assert (DECL_P (decl2));
2322 fixup_remapped_decl (decl2, ctx, false);
2323 fixup_remapped_decl (decl, ctx, true);
2325 else
2326 fixup_remapped_decl (decl, ctx, false);
2328 break;
2330 case OMP_CLAUSE_COPYPRIVATE:
2331 case OMP_CLAUSE_COPYIN:
2332 case OMP_CLAUSE_DEFAULT:
2333 case OMP_CLAUSE_IF:
2334 case OMP_CLAUSE_NUM_THREADS:
2335 case OMP_CLAUSE_NUM_TEAMS:
2336 case OMP_CLAUSE_THREAD_LIMIT:
2337 case OMP_CLAUSE_DEVICE:
2338 case OMP_CLAUSE_SCHEDULE:
2339 case OMP_CLAUSE_DIST_SCHEDULE:
2340 case OMP_CLAUSE_NOWAIT:
2341 case OMP_CLAUSE_ORDERED:
2342 case OMP_CLAUSE_COLLAPSE:
2343 case OMP_CLAUSE_UNTIED:
2344 case OMP_CLAUSE_FINAL:
2345 case OMP_CLAUSE_MERGEABLE:
2346 case OMP_CLAUSE_PROC_BIND:
2347 case OMP_CLAUSE_SAFELEN:
2348 case OMP_CLAUSE_SIMDLEN:
2349 case OMP_CLAUSE_ALIGNED:
2350 case OMP_CLAUSE_DEPEND:
2351 case OMP_CLAUSE__LOOPTEMP_:
2352 case OMP_CLAUSE_TO:
2353 case OMP_CLAUSE_FROM:
2354 case OMP_CLAUSE_PRIORITY:
2355 case OMP_CLAUSE_GRAINSIZE:
2356 case OMP_CLAUSE_NUM_TASKS:
2357 case OMP_CLAUSE_THREADS:
2358 case OMP_CLAUSE_SIMD:
2359 case OMP_CLAUSE_NOGROUP:
2360 case OMP_CLAUSE_DEFAULTMAP:
2361 case OMP_CLAUSE_USE_DEVICE_PTR:
2362 case OMP_CLAUSE__CILK_FOR_COUNT_:
2363 case OMP_CLAUSE_ASYNC:
2364 case OMP_CLAUSE_WAIT:
2365 case OMP_CLAUSE_NUM_GANGS:
2366 case OMP_CLAUSE_NUM_WORKERS:
2367 case OMP_CLAUSE_VECTOR_LENGTH:
2368 case OMP_CLAUSE_GANG:
2369 case OMP_CLAUSE_WORKER:
2370 case OMP_CLAUSE_VECTOR:
2371 case OMP_CLAUSE_INDEPENDENT:
2372 case OMP_CLAUSE_AUTO:
2373 case OMP_CLAUSE_SEQ:
2374 case OMP_CLAUSE__GRIDDIM_:
2375 case OMP_CLAUSE__SIMT_:
2376 break;
2378 case OMP_CLAUSE_TILE:
2379 case OMP_CLAUSE__CACHE_:
2380 default:
2381 gcc_unreachable ();
2385 gcc_checking_assert (!scan_array_reductions
2386 || !is_gimple_omp_oacc (ctx->stmt));
2387 if (scan_array_reductions)
2389 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
2390 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
2391 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
2393 scan_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c), ctx);
2394 scan_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
2396 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
2397 && OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
2398 scan_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
2399 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2400 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
2401 scan_omp (&OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c), ctx);
2405 /* Create a new name for omp child function. Returns an identifier. If
2406 IS_CILK_FOR is true then the suffix for the child function is
2407 "_cilk_for_fn." */
2409 static tree
2410 create_omp_child_function_name (bool task_copy, bool is_cilk_for)
2412 if (is_cilk_for)
2413 return clone_function_name (current_function_decl, "_cilk_for_fn");
2414 return clone_function_name (current_function_decl,
2415 task_copy ? "_omp_cpyfn" : "_omp_fn");
2418 /* Returns the type of the induction variable for the child function for
2419 _Cilk_for and the types for _high and _low variables based on TYPE. */
2421 static tree
2422 cilk_for_check_loop_diff_type (tree type)
2424 if (TYPE_PRECISION (type) <= TYPE_PRECISION (uint32_type_node))
2426 if (TYPE_UNSIGNED (type))
2427 return uint32_type_node;
2428 else
2429 return integer_type_node;
2431 else
2433 if (TYPE_UNSIGNED (type))
2434 return uint64_type_node;
2435 else
2436 return long_long_integer_type_node;
2440 /* Return true if CTX may belong to offloaded code: either if current function
2441 is offloaded, or any enclosing context corresponds to a target region. */
2443 static bool
2444 omp_maybe_offloaded_ctx (omp_context *ctx)
2446 if (cgraph_node::get (current_function_decl)->offloadable)
2447 return true;
2448 for (; ctx; ctx = ctx->outer)
2449 if (is_gimple_omp_offloaded (ctx->stmt))
2450 return true;
2451 return false;
2454 /* Build a decl for the omp child function. It'll not contain a body
2455 yet, just the bare decl. */
2457 static void
2458 create_omp_child_function (omp_context *ctx, bool task_copy)
2460 tree decl, type, name, t;
2462 tree cilk_for_count
2463 = (flag_cilkplus && gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
2464 ? find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
2465 OMP_CLAUSE__CILK_FOR_COUNT_) : NULL_TREE;
2466 tree cilk_var_type = NULL_TREE;
2468 name = create_omp_child_function_name (task_copy,
2469 cilk_for_count != NULL_TREE);
2470 if (task_copy)
2471 type = build_function_type_list (void_type_node, ptr_type_node,
2472 ptr_type_node, NULL_TREE);
2473 else if (cilk_for_count)
2475 type = TREE_TYPE (OMP_CLAUSE_OPERAND (cilk_for_count, 0));
2476 cilk_var_type = cilk_for_check_loop_diff_type (type);
2477 type = build_function_type_list (void_type_node, ptr_type_node,
2478 cilk_var_type, cilk_var_type, NULL_TREE);
2480 else
2481 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2483 decl = build_decl (gimple_location (ctx->stmt), FUNCTION_DECL, name, type);
2485 gcc_checking_assert (!is_gimple_omp_oacc (ctx->stmt)
2486 || !task_copy);
2487 if (!task_copy)
2488 ctx->cb.dst_fn = decl;
2489 else
2490 gimple_omp_task_set_copy_fn (ctx->stmt, decl);
2492 TREE_STATIC (decl) = 1;
2493 TREE_USED (decl) = 1;
2494 DECL_ARTIFICIAL (decl) = 1;
2495 DECL_IGNORED_P (decl) = 0;
2496 TREE_PUBLIC (decl) = 0;
2497 DECL_UNINLINABLE (decl) = 1;
2498 DECL_EXTERNAL (decl) = 0;
2499 DECL_CONTEXT (decl) = NULL_TREE;
2500 DECL_INITIAL (decl) = make_node (BLOCK);
2501 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
2502 if (omp_maybe_offloaded_ctx (ctx))
2504 cgraph_node::get_create (decl)->offloadable = 1;
2505 if (ENABLE_OFFLOADING)
2506 g->have_offload = true;
2509 if (cgraph_node::get_create (decl)->offloadable
2510 && !lookup_attribute ("omp declare target",
2511 DECL_ATTRIBUTES (current_function_decl)))
2513 const char *target_attr = (is_gimple_omp_offloaded (ctx->stmt)
2514 ? "omp target entrypoint"
2515 : "omp declare target");
2516 DECL_ATTRIBUTES (decl)
2517 = tree_cons (get_identifier (target_attr),
2518 NULL_TREE, DECL_ATTRIBUTES (decl));
2521 t = build_decl (DECL_SOURCE_LOCATION (decl),
2522 RESULT_DECL, NULL_TREE, void_type_node);
2523 DECL_ARTIFICIAL (t) = 1;
2524 DECL_IGNORED_P (t) = 1;
2525 DECL_CONTEXT (t) = decl;
2526 DECL_RESULT (decl) = t;
2528 /* _Cilk_for's child function requires two extra parameters called
2529 __low and __high that are set the by Cilk runtime when it calls this
2530 function. */
2531 if (cilk_for_count)
2533 t = build_decl (DECL_SOURCE_LOCATION (decl),
2534 PARM_DECL, get_identifier ("__high"), cilk_var_type);
2535 DECL_ARTIFICIAL (t) = 1;
2536 DECL_NAMELESS (t) = 1;
2537 DECL_ARG_TYPE (t) = ptr_type_node;
2538 DECL_CONTEXT (t) = current_function_decl;
2539 TREE_USED (t) = 1;
2540 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2541 DECL_ARGUMENTS (decl) = t;
2543 t = build_decl (DECL_SOURCE_LOCATION (decl),
2544 PARM_DECL, get_identifier ("__low"), cilk_var_type);
2545 DECL_ARTIFICIAL (t) = 1;
2546 DECL_NAMELESS (t) = 1;
2547 DECL_ARG_TYPE (t) = ptr_type_node;
2548 DECL_CONTEXT (t) = current_function_decl;
2549 TREE_USED (t) = 1;
2550 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2551 DECL_ARGUMENTS (decl) = t;
2554 tree data_name = get_identifier (".omp_data_i");
2555 t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
2556 ptr_type_node);
2557 DECL_ARTIFICIAL (t) = 1;
2558 DECL_NAMELESS (t) = 1;
2559 DECL_ARG_TYPE (t) = ptr_type_node;
2560 DECL_CONTEXT (t) = current_function_decl;
2561 TREE_USED (t) = 1;
2562 TREE_READONLY (t) = 1;
2563 if (cilk_for_count)
2564 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2565 DECL_ARGUMENTS (decl) = t;
2566 if (!task_copy)
2567 ctx->receiver_decl = t;
2568 else
2570 t = build_decl (DECL_SOURCE_LOCATION (decl),
2571 PARM_DECL, get_identifier (".omp_data_o"),
2572 ptr_type_node);
2573 DECL_ARTIFICIAL (t) = 1;
2574 DECL_NAMELESS (t) = 1;
2575 DECL_ARG_TYPE (t) = ptr_type_node;
2576 DECL_CONTEXT (t) = current_function_decl;
2577 TREE_USED (t) = 1;
2578 TREE_ADDRESSABLE (t) = 1;
2579 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2580 DECL_ARGUMENTS (decl) = t;
2583 /* Allocate memory for the function structure. The call to
2584 allocate_struct_function clobbers CFUN, so we need to restore
2585 it afterward. */
2586 push_struct_function (decl);
2587 cfun->function_end_locus = gimple_location (ctx->stmt);
2588 init_tree_ssa (cfun);
2589 pop_cfun ();
2592 /* Callback for walk_gimple_seq. Check if combined parallel
2593 contains gimple_omp_for_combined_into_p OMP_FOR. */
2595 static tree
2596 find_combined_for (gimple_stmt_iterator *gsi_p,
2597 bool *handled_ops_p,
2598 struct walk_stmt_info *wi)
2600 gimple *stmt = gsi_stmt (*gsi_p);
2602 *handled_ops_p = true;
2603 switch (gimple_code (stmt))
2605 WALK_SUBSTMTS;
2607 case GIMPLE_OMP_FOR:
2608 if (gimple_omp_for_combined_into_p (stmt)
2609 && gimple_omp_for_kind (stmt)
2610 == *(const enum gf_mask *) (wi->info))
2612 wi->info = stmt;
2613 return integer_zero_node;
2615 break;
2616 default:
2617 break;
2619 return NULL;
2622 /* Add _LOOPTEMP_ clauses on OpenMP parallel or task. */
2624 static void
2625 add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt,
2626 omp_context *outer_ctx)
2628 struct walk_stmt_info wi;
2630 memset (&wi, 0, sizeof (wi));
2631 wi.val_only = true;
2632 wi.info = (void *) &msk;
2633 walk_gimple_seq (gimple_omp_body (stmt), find_combined_for, NULL, &wi);
2634 if (wi.info != (void *) &msk)
2636 gomp_for *for_stmt = as_a <gomp_for *> ((gimple *) wi.info);
2637 struct omp_for_data fd;
2638 extract_omp_for_data (for_stmt, &fd, NULL);
2639 /* We need two temporaries with fd.loop.v type (istart/iend)
2640 and then (fd.collapse - 1) temporaries with the same
2641 type for count2 ... countN-1 vars if not constant. */
2642 size_t count = 2, i;
2643 tree type = fd.iter_type;
2644 if (fd.collapse > 1
2645 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
2647 count += fd.collapse - 1;
2648 /* If there are lastprivate clauses on the inner
2649 GIMPLE_OMP_FOR, add one more temporaries for the total number
2650 of iterations (product of count1 ... countN-1). */
2651 if (find_omp_clause (gimple_omp_for_clauses (for_stmt),
2652 OMP_CLAUSE_LASTPRIVATE))
2653 count++;
2654 else if (msk == GF_OMP_FOR_KIND_FOR
2655 && find_omp_clause (gimple_omp_parallel_clauses (stmt),
2656 OMP_CLAUSE_LASTPRIVATE))
2657 count++;
2659 for (i = 0; i < count; i++)
2661 tree temp = create_tmp_var (type);
2662 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
2663 insert_decl_map (&outer_ctx->cb, temp, temp);
2664 OMP_CLAUSE_DECL (c) = temp;
2665 OMP_CLAUSE_CHAIN (c) = gimple_omp_taskreg_clauses (stmt);
2666 gimple_omp_taskreg_set_clauses (stmt, c);
2671 /* Scan an OpenMP parallel directive. */
2673 static void
2674 scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
2676 omp_context *ctx;
2677 tree name;
2678 gomp_parallel *stmt = as_a <gomp_parallel *> (gsi_stmt (*gsi));
2680 /* Ignore parallel directives with empty bodies, unless there
2681 are copyin clauses. */
2682 if (optimize > 0
2683 && empty_body_p (gimple_omp_body (stmt))
2684 && find_omp_clause (gimple_omp_parallel_clauses (stmt),
2685 OMP_CLAUSE_COPYIN) == NULL)
2687 gsi_replace (gsi, gimple_build_nop (), false);
2688 return;
2691 if (gimple_omp_parallel_combined_p (stmt))
2692 add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_FOR, stmt, outer_ctx);
2694 ctx = new_omp_context (stmt, outer_ctx);
2695 taskreg_contexts.safe_push (ctx);
2696 if (taskreg_nesting_level > 1)
2697 ctx->is_nested = true;
2698 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
2699 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
2700 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
2701 name = create_tmp_var_name (".omp_data_s");
2702 name = build_decl (gimple_location (stmt),
2703 TYPE_DECL, name, ctx->record_type);
2704 DECL_ARTIFICIAL (name) = 1;
2705 DECL_NAMELESS (name) = 1;
2706 TYPE_NAME (ctx->record_type) = name;
2707 TYPE_ARTIFICIAL (ctx->record_type) = 1;
2708 if (!gimple_omp_parallel_grid_phony (stmt))
2710 create_omp_child_function (ctx, false);
2711 gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
2714 scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx);
2715 scan_omp (gimple_omp_body_ptr (stmt), ctx);
2717 if (TYPE_FIELDS (ctx->record_type) == NULL)
2718 ctx->record_type = ctx->receiver_decl = NULL;
2721 /* Scan an OpenMP task directive. */
2723 static void
2724 scan_omp_task (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
2726 omp_context *ctx;
2727 tree name, t;
2728 gomp_task *stmt = as_a <gomp_task *> (gsi_stmt (*gsi));
2730 /* Ignore task directives with empty bodies. */
2731 if (optimize > 0
2732 && empty_body_p (gimple_omp_body (stmt)))
2734 gsi_replace (gsi, gimple_build_nop (), false);
2735 return;
2738 if (gimple_omp_task_taskloop_p (stmt))
2739 add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_TASKLOOP, stmt, outer_ctx);
2741 ctx = new_omp_context (stmt, outer_ctx);
2742 taskreg_contexts.safe_push (ctx);
2743 if (taskreg_nesting_level > 1)
2744 ctx->is_nested = true;
2745 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
2746 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
2747 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
2748 name = create_tmp_var_name (".omp_data_s");
2749 name = build_decl (gimple_location (stmt),
2750 TYPE_DECL, name, ctx->record_type);
2751 DECL_ARTIFICIAL (name) = 1;
2752 DECL_NAMELESS (name) = 1;
2753 TYPE_NAME (ctx->record_type) = name;
2754 TYPE_ARTIFICIAL (ctx->record_type) = 1;
2755 create_omp_child_function (ctx, false);
2756 gimple_omp_task_set_child_fn (stmt, ctx->cb.dst_fn);
2758 scan_sharing_clauses (gimple_omp_task_clauses (stmt), ctx);
2760 if (ctx->srecord_type)
2762 name = create_tmp_var_name (".omp_data_a");
2763 name = build_decl (gimple_location (stmt),
2764 TYPE_DECL, name, ctx->srecord_type);
2765 DECL_ARTIFICIAL (name) = 1;
2766 DECL_NAMELESS (name) = 1;
2767 TYPE_NAME (ctx->srecord_type) = name;
2768 TYPE_ARTIFICIAL (ctx->srecord_type) = 1;
2769 create_omp_child_function (ctx, true);
2772 scan_omp (gimple_omp_body_ptr (stmt), ctx);
2774 if (TYPE_FIELDS (ctx->record_type) == NULL)
2776 ctx->record_type = ctx->receiver_decl = NULL;
2777 t = build_int_cst (long_integer_type_node, 0);
2778 gimple_omp_task_set_arg_size (stmt, t);
2779 t = build_int_cst (long_integer_type_node, 1);
2780 gimple_omp_task_set_arg_align (stmt, t);
2785 /* If any decls have been made addressable during scan_omp,
2786 adjust their fields if needed, and layout record types
2787 of parallel/task constructs. */
2789 static void
2790 finish_taskreg_scan (omp_context *ctx)
2792 if (ctx->record_type == NULL_TREE)
2793 return;
2795 /* If any task_shared_vars were needed, verify all
2796 OMP_CLAUSE_SHARED clauses on GIMPLE_OMP_{PARALLEL,TASK}
2797 statements if use_pointer_for_field hasn't changed
2798 because of that. If it did, update field types now. */
2799 if (task_shared_vars)
2801 tree c;
2803 for (c = gimple_omp_taskreg_clauses (ctx->stmt);
2804 c; c = OMP_CLAUSE_CHAIN (c))
2805 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
2806 && !OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
2808 tree decl = OMP_CLAUSE_DECL (c);
2810 /* Global variables don't need to be copied,
2811 the receiver side will use them directly. */
2812 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
2813 continue;
2814 if (!bitmap_bit_p (task_shared_vars, DECL_UID (decl))
2815 || !use_pointer_for_field (decl, ctx))
2816 continue;
2817 tree field = lookup_field (decl, ctx);
2818 if (TREE_CODE (TREE_TYPE (field)) == POINTER_TYPE
2819 && TREE_TYPE (TREE_TYPE (field)) == TREE_TYPE (decl))
2820 continue;
2821 TREE_TYPE (field) = build_pointer_type (TREE_TYPE (decl));
2822 TREE_THIS_VOLATILE (field) = 0;
2823 DECL_USER_ALIGN (field) = 0;
2824 SET_DECL_ALIGN (field, TYPE_ALIGN (TREE_TYPE (field)));
2825 if (TYPE_ALIGN (ctx->record_type) < DECL_ALIGN (field))
2826 SET_TYPE_ALIGN (ctx->record_type, DECL_ALIGN (field));
2827 if (ctx->srecord_type)
2829 tree sfield = lookup_sfield (decl, ctx);
2830 TREE_TYPE (sfield) = TREE_TYPE (field);
2831 TREE_THIS_VOLATILE (sfield) = 0;
2832 DECL_USER_ALIGN (sfield) = 0;
2833 SET_DECL_ALIGN (sfield, DECL_ALIGN (field));
2834 if (TYPE_ALIGN (ctx->srecord_type) < DECL_ALIGN (sfield))
2835 SET_TYPE_ALIGN (ctx->srecord_type, DECL_ALIGN (sfield));
2840 if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
2842 layout_type (ctx->record_type);
2843 fixup_child_record_type (ctx);
2845 else
2847 location_t loc = gimple_location (ctx->stmt);
2848 tree *p, vla_fields = NULL_TREE, *q = &vla_fields;
2849 /* Move VLA fields to the end. */
2850 p = &TYPE_FIELDS (ctx->record_type);
2851 while (*p)
2852 if (!TYPE_SIZE_UNIT (TREE_TYPE (*p))
2853 || ! TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (*p))))
2855 *q = *p;
2856 *p = TREE_CHAIN (*p);
2857 TREE_CHAIN (*q) = NULL_TREE;
2858 q = &TREE_CHAIN (*q);
2860 else
2861 p = &DECL_CHAIN (*p);
2862 *p = vla_fields;
2863 if (gimple_omp_task_taskloop_p (ctx->stmt))
2865 /* Move fields corresponding to first and second _looptemp_
2866 clause first. There are filled by GOMP_taskloop
2867 and thus need to be in specific positions. */
2868 tree c1 = gimple_omp_task_clauses (ctx->stmt);
2869 c1 = find_omp_clause (c1, OMP_CLAUSE__LOOPTEMP_);
2870 tree c2 = find_omp_clause (OMP_CLAUSE_CHAIN (c1),
2871 OMP_CLAUSE__LOOPTEMP_);
2872 tree f1 = lookup_field (OMP_CLAUSE_DECL (c1), ctx);
2873 tree f2 = lookup_field (OMP_CLAUSE_DECL (c2), ctx);
2874 p = &TYPE_FIELDS (ctx->record_type);
2875 while (*p)
2876 if (*p == f1 || *p == f2)
2877 *p = DECL_CHAIN (*p);
2878 else
2879 p = &DECL_CHAIN (*p);
2880 DECL_CHAIN (f1) = f2;
2881 DECL_CHAIN (f2) = TYPE_FIELDS (ctx->record_type);
2882 TYPE_FIELDS (ctx->record_type) = f1;
2883 if (ctx->srecord_type)
2885 f1 = lookup_sfield (OMP_CLAUSE_DECL (c1), ctx);
2886 f2 = lookup_sfield (OMP_CLAUSE_DECL (c2), ctx);
2887 p = &TYPE_FIELDS (ctx->srecord_type);
2888 while (*p)
2889 if (*p == f1 || *p == f2)
2890 *p = DECL_CHAIN (*p);
2891 else
2892 p = &DECL_CHAIN (*p);
2893 DECL_CHAIN (f1) = f2;
2894 DECL_CHAIN (f2) = TYPE_FIELDS (ctx->srecord_type);
2895 TYPE_FIELDS (ctx->srecord_type) = f1;
2898 layout_type (ctx->record_type);
2899 fixup_child_record_type (ctx);
2900 if (ctx->srecord_type)
2901 layout_type (ctx->srecord_type);
2902 tree t = fold_convert_loc (loc, long_integer_type_node,
2903 TYPE_SIZE_UNIT (ctx->record_type));
2904 gimple_omp_task_set_arg_size (ctx->stmt, t);
2905 t = build_int_cst (long_integer_type_node,
2906 TYPE_ALIGN_UNIT (ctx->record_type));
2907 gimple_omp_task_set_arg_align (ctx->stmt, t);
2911 /* Find the enclosing offload context. */
2913 static omp_context *
2914 enclosing_target_ctx (omp_context *ctx)
2916 for (; ctx; ctx = ctx->outer)
2917 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET)
2918 break;
2920 return ctx;
2923 /* Return true if ctx is part of an oacc kernels region. */
2925 static bool
2926 ctx_in_oacc_kernels_region (omp_context *ctx)
2928 for (;ctx != NULL; ctx = ctx->outer)
2930 gimple *stmt = ctx->stmt;
2931 if (gimple_code (stmt) == GIMPLE_OMP_TARGET
2932 && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
2933 return true;
2936 return false;
2939 /* Check the parallelism clauses inside a kernels regions.
2940 Until kernels handling moves to use the same loop indirection
2941 scheme as parallel, we need to do this checking early. */
2943 static unsigned
2944 check_oacc_kernel_gwv (gomp_for *stmt, omp_context *ctx)
2946 bool checking = true;
2947 unsigned outer_mask = 0;
2948 unsigned this_mask = 0;
2949 bool has_seq = false, has_auto = false;
2951 if (ctx->outer)
2952 outer_mask = check_oacc_kernel_gwv (NULL, ctx->outer);
2953 if (!stmt)
2955 checking = false;
2956 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR)
2957 return outer_mask;
2958 stmt = as_a <gomp_for *> (ctx->stmt);
2961 for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
2963 switch (OMP_CLAUSE_CODE (c))
2965 case OMP_CLAUSE_GANG:
2966 this_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
2967 break;
2968 case OMP_CLAUSE_WORKER:
2969 this_mask |= GOMP_DIM_MASK (GOMP_DIM_WORKER);
2970 break;
2971 case OMP_CLAUSE_VECTOR:
2972 this_mask |= GOMP_DIM_MASK (GOMP_DIM_VECTOR);
2973 break;
2974 case OMP_CLAUSE_SEQ:
2975 has_seq = true;
2976 break;
2977 case OMP_CLAUSE_AUTO:
2978 has_auto = true;
2979 break;
2980 default:
2981 break;
2985 if (checking)
2987 if (has_seq && (this_mask || has_auto))
2988 error_at (gimple_location (stmt), "%<seq%> overrides other"
2989 " OpenACC loop specifiers");
2990 else if (has_auto && this_mask)
2991 error_at (gimple_location (stmt), "%<auto%> conflicts with other"
2992 " OpenACC loop specifiers");
2994 if (this_mask & outer_mask)
2995 error_at (gimple_location (stmt), "inner loop uses same"
2996 " OpenACC parallelism as containing loop");
2999 return outer_mask | this_mask;
3002 /* Scan a GIMPLE_OMP_FOR. */
3004 static void
3005 scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
3007 omp_context *ctx;
3008 size_t i;
3009 tree clauses = gimple_omp_for_clauses (stmt);
3011 ctx = new_omp_context (stmt, outer_ctx);
3013 if (is_gimple_omp_oacc (stmt))
3015 omp_context *tgt = enclosing_target_ctx (outer_ctx);
3017 if (!tgt || is_oacc_parallel (tgt))
3018 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3020 char const *check = NULL;
3022 switch (OMP_CLAUSE_CODE (c))
3024 case OMP_CLAUSE_GANG:
3025 check = "gang";
3026 break;
3028 case OMP_CLAUSE_WORKER:
3029 check = "worker";
3030 break;
3032 case OMP_CLAUSE_VECTOR:
3033 check = "vector";
3034 break;
3036 default:
3037 break;
3040 if (check && OMP_CLAUSE_OPERAND (c, 0))
3041 error_at (gimple_location (stmt),
3042 "argument not permitted on %qs clause in"
3043 " OpenACC %<parallel%>", check);
3046 if (tgt && is_oacc_kernels (tgt))
3048 /* Strip out reductions, as they are not handled yet. */
3049 tree *prev_ptr = &clauses;
3051 while (tree probe = *prev_ptr)
3053 tree *next_ptr = &OMP_CLAUSE_CHAIN (probe);
3055 if (OMP_CLAUSE_CODE (probe) == OMP_CLAUSE_REDUCTION)
3056 *prev_ptr = *next_ptr;
3057 else
3058 prev_ptr = next_ptr;
3061 gimple_omp_for_set_clauses (stmt, clauses);
3062 check_oacc_kernel_gwv (stmt, ctx);
3066 scan_sharing_clauses (clauses, ctx);
3068 scan_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
3069 for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
3071 scan_omp_op (gimple_omp_for_index_ptr (stmt, i), ctx);
3072 scan_omp_op (gimple_omp_for_initial_ptr (stmt, i), ctx);
3073 scan_omp_op (gimple_omp_for_final_ptr (stmt, i), ctx);
3074 scan_omp_op (gimple_omp_for_incr_ptr (stmt, i), ctx);
3076 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3079 /* Duplicate #pragma omp simd, one for SIMT, another one for SIMD. */
3081 static void
3082 scan_omp_simd (gimple_stmt_iterator *gsi, gomp_for *stmt,
3083 omp_context *outer_ctx)
3085 gbind *bind = gimple_build_bind (NULL, NULL, NULL);
3086 gsi_replace (gsi, bind, false);
3087 gimple_seq seq = NULL;
3088 gimple *g = gimple_build_call_internal (IFN_GOMP_USE_SIMT, 0);
3089 tree cond = create_tmp_var_raw (integer_type_node);
3090 DECL_CONTEXT (cond) = current_function_decl;
3091 DECL_SEEN_IN_BIND_EXPR_P (cond) = 1;
3092 gimple_bind_set_vars (bind, cond);
3093 gimple_call_set_lhs (g, cond);
3094 gimple_seq_add_stmt (&seq, g);
3095 tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
3096 tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
3097 tree lab3 = create_artificial_label (UNKNOWN_LOCATION);
3098 g = gimple_build_cond (NE_EXPR, cond, integer_zero_node, lab1, lab2);
3099 gimple_seq_add_stmt (&seq, g);
3100 g = gimple_build_label (lab1);
3101 gimple_seq_add_stmt (&seq, g);
3102 gimple_seq new_seq = copy_gimple_seq_and_replace_locals (stmt);
3103 gomp_for *new_stmt = as_a <gomp_for *> (new_seq);
3104 tree clause = build_omp_clause (gimple_location (stmt), OMP_CLAUSE__SIMT_);
3105 OMP_CLAUSE_CHAIN (clause) = gimple_omp_for_clauses (new_stmt);
3106 gimple_omp_for_set_clauses (new_stmt, clause);
3107 gimple_seq_add_stmt (&seq, new_stmt);
3108 g = gimple_build_goto (lab3);
3109 gimple_seq_add_stmt (&seq, g);
3110 g = gimple_build_label (lab2);
3111 gimple_seq_add_stmt (&seq, g);
3112 gimple_seq_add_stmt (&seq, stmt);
3113 g = gimple_build_label (lab3);
3114 gimple_seq_add_stmt (&seq, g);
3115 gimple_bind_set_body (bind, seq);
3116 update_stmt (bind);
3117 scan_omp_for (new_stmt, outer_ctx);
3118 scan_omp_for (stmt, outer_ctx);
3121 /* Scan an OpenMP sections directive. */
3123 static void
3124 scan_omp_sections (gomp_sections *stmt, omp_context *outer_ctx)
3126 omp_context *ctx;
3128 ctx = new_omp_context (stmt, outer_ctx);
3129 scan_sharing_clauses (gimple_omp_sections_clauses (stmt), ctx);
3130 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3133 /* Scan an OpenMP single directive. */
3135 static void
3136 scan_omp_single (gomp_single *stmt, omp_context *outer_ctx)
3138 omp_context *ctx;
3139 tree name;
3141 ctx = new_omp_context (stmt, outer_ctx);
3142 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
3143 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
3144 name = create_tmp_var_name (".omp_copy_s");
3145 name = build_decl (gimple_location (stmt),
3146 TYPE_DECL, name, ctx->record_type);
3147 TYPE_NAME (ctx->record_type) = name;
3149 scan_sharing_clauses (gimple_omp_single_clauses (stmt), ctx);
3150 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3152 if (TYPE_FIELDS (ctx->record_type) == NULL)
3153 ctx->record_type = NULL;
3154 else
3155 layout_type (ctx->record_type);
3158 /* Return true if the CLAUSES of an omp target guarantee that the base pointers
3159 used in the corresponding offloaded function are restrict. */
3161 static bool
3162 omp_target_base_pointers_restrict_p (tree clauses)
3164 /* The analysis relies on the GOMP_MAP_FORCE_* mapping kinds, which are only
3165 used by OpenACC. */
3166 if (flag_openacc == 0)
3167 return false;
3169 /* I. Basic example:
3171 void foo (void)
3173 unsigned int a[2], b[2];
3175 #pragma acc kernels \
3176 copyout (a) \
3177 copyout (b)
3179 a[0] = 0;
3180 b[0] = 1;
3184 After gimplification, we have:
3186 #pragma omp target oacc_kernels \
3187 map(force_from:a [len: 8]) \
3188 map(force_from:b [len: 8])
3190 a[0] = 0;
3191 b[0] = 1;
3194 Because both mappings have the force prefix, we know that they will be
3195 allocated when calling the corresponding offloaded function, which means we
3196 can mark the base pointers for a and b in the offloaded function as
3197 restrict. */
3199 tree c;
3200 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3202 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
3203 return false;
3205 switch (OMP_CLAUSE_MAP_KIND (c))
3207 case GOMP_MAP_FORCE_ALLOC:
3208 case GOMP_MAP_FORCE_TO:
3209 case GOMP_MAP_FORCE_FROM:
3210 case GOMP_MAP_FORCE_TOFROM:
3211 break;
3212 default:
3213 return false;
3217 return true;
3220 /* Scan a GIMPLE_OMP_TARGET. */
3222 static void
3223 scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
3225 omp_context *ctx;
3226 tree name;
3227 bool offloaded = is_gimple_omp_offloaded (stmt);
3228 tree clauses = gimple_omp_target_clauses (stmt);
3230 ctx = new_omp_context (stmt, outer_ctx);
3231 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
3232 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
3233 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
3234 name = create_tmp_var_name (".omp_data_t");
3235 name = build_decl (gimple_location (stmt),
3236 TYPE_DECL, name, ctx->record_type);
3237 DECL_ARTIFICIAL (name) = 1;
3238 DECL_NAMELESS (name) = 1;
3239 TYPE_NAME (ctx->record_type) = name;
3240 TYPE_ARTIFICIAL (ctx->record_type) = 1;
3242 bool base_pointers_restrict = false;
3243 if (offloaded)
3245 create_omp_child_function (ctx, false);
3246 gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
3248 base_pointers_restrict = omp_target_base_pointers_restrict_p (clauses);
3249 if (base_pointers_restrict
3250 && dump_file && (dump_flags & TDF_DETAILS))
3251 fprintf (dump_file,
3252 "Base pointers in offloaded function are restrict\n");
3255 scan_sharing_clauses (clauses, ctx, base_pointers_restrict);
3256 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3258 if (TYPE_FIELDS (ctx->record_type) == NULL)
3259 ctx->record_type = ctx->receiver_decl = NULL;
3260 else
3262 TYPE_FIELDS (ctx->record_type)
3263 = nreverse (TYPE_FIELDS (ctx->record_type));
3264 if (flag_checking)
3266 unsigned int align = DECL_ALIGN (TYPE_FIELDS (ctx->record_type));
3267 for (tree field = TYPE_FIELDS (ctx->record_type);
3268 field;
3269 field = DECL_CHAIN (field))
3270 gcc_assert (DECL_ALIGN (field) == align);
3272 layout_type (ctx->record_type);
3273 if (offloaded)
3274 fixup_child_record_type (ctx);
3278 /* Scan an OpenMP teams directive. */
3280 static void
3281 scan_omp_teams (gomp_teams *stmt, omp_context *outer_ctx)
3283 omp_context *ctx = new_omp_context (stmt, outer_ctx);
3284 scan_sharing_clauses (gimple_omp_teams_clauses (stmt), ctx);
3285 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3288 /* Check nesting restrictions. */
3289 static bool
3290 check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
3292 tree c;
3294 if (ctx && gimple_code (ctx->stmt) == GIMPLE_OMP_GRID_BODY)
3295 /* GRID_BODY is an artificial construct, nesting rules will be checked in
3296 the original copy of its contents. */
3297 return true;
3299 /* No nesting of non-OpenACC STMT (that is, an OpenMP one, or a GOMP builtin)
3300 inside an OpenACC CTX. */
3301 if (!(is_gimple_omp (stmt)
3302 && is_gimple_omp_oacc (stmt))
3303 /* Except for atomic codes that we share with OpenMP. */
3304 && !(gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD
3305 || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE))
3307 if (get_oacc_fn_attrib (cfun->decl) != NULL)
3309 error_at (gimple_location (stmt),
3310 "non-OpenACC construct inside of OpenACC routine");
3311 return false;
3313 else
3314 for (omp_context *octx = ctx; octx != NULL; octx = octx->outer)
3315 if (is_gimple_omp (octx->stmt)
3316 && is_gimple_omp_oacc (octx->stmt))
3318 error_at (gimple_location (stmt),
3319 "non-OpenACC construct inside of OpenACC region");
3320 return false;
3324 if (ctx != NULL)
3326 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
3327 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
3329 c = NULL_TREE;
3330 if (gimple_code (stmt) == GIMPLE_OMP_ORDERED)
3332 c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3333 if (find_omp_clause (c, OMP_CLAUSE_SIMD))
3335 if (find_omp_clause (c, OMP_CLAUSE_THREADS)
3336 && (ctx->outer == NULL
3337 || !gimple_omp_for_combined_into_p (ctx->stmt)
3338 || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR
3339 || (gimple_omp_for_kind (ctx->outer->stmt)
3340 != GF_OMP_FOR_KIND_FOR)
3341 || !gimple_omp_for_combined_p (ctx->outer->stmt)))
3343 error_at (gimple_location (stmt),
3344 "%<ordered simd threads%> must be closely "
3345 "nested inside of %<for simd%> region");
3346 return false;
3348 return true;
3351 error_at (gimple_location (stmt),
3352 "OpenMP constructs other than %<#pragma omp ordered simd%>"
3353 " may not be nested inside %<simd%> region");
3354 return false;
3356 else if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
3358 if ((gimple_code (stmt) != GIMPLE_OMP_FOR
3359 || ((gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_DISTRIBUTE)
3360 && (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP)))
3361 && gimple_code (stmt) != GIMPLE_OMP_PARALLEL)
3363 error_at (gimple_location (stmt),
3364 "only %<distribute%> or %<parallel%> regions are "
3365 "allowed to be strictly nested inside %<teams%> "
3366 "region");
3367 return false;
3371 switch (gimple_code (stmt))
3373 case GIMPLE_OMP_FOR:
3374 if (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)
3375 return true;
3376 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3378 if (ctx != NULL && gimple_code (ctx->stmt) != GIMPLE_OMP_TEAMS)
3380 error_at (gimple_location (stmt),
3381 "%<distribute%> region must be strictly nested "
3382 "inside %<teams%> construct");
3383 return false;
3385 return true;
3387 /* We split taskloop into task and nested taskloop in it. */
3388 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP)
3389 return true;
3390 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
3392 bool ok = false;
3394 if (ctx)
3395 switch (gimple_code (ctx->stmt))
3397 case GIMPLE_OMP_FOR:
3398 ok = (gimple_omp_for_kind (ctx->stmt)
3399 == GF_OMP_FOR_KIND_OACC_LOOP);
3400 break;
3402 case GIMPLE_OMP_TARGET:
3403 switch (gimple_omp_target_kind (ctx->stmt))
3405 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
3406 case GF_OMP_TARGET_KIND_OACC_KERNELS:
3407 ok = true;
3408 break;
3410 default:
3411 break;
3414 default:
3415 break;
3417 else if (get_oacc_fn_attrib (current_function_decl))
3418 ok = true;
3419 if (!ok)
3421 error_at (gimple_location (stmt),
3422 "OpenACC loop directive must be associated with"
3423 " an OpenACC compute region");
3424 return false;
3427 /* FALLTHRU */
3428 case GIMPLE_CALL:
3429 if (is_gimple_call (stmt)
3430 && (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3431 == BUILT_IN_GOMP_CANCEL
3432 || DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3433 == BUILT_IN_GOMP_CANCELLATION_POINT))
3435 const char *bad = NULL;
3436 const char *kind = NULL;
3437 const char *construct
3438 = (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3439 == BUILT_IN_GOMP_CANCEL)
3440 ? "#pragma omp cancel"
3441 : "#pragma omp cancellation point";
3442 if (ctx == NULL)
3444 error_at (gimple_location (stmt), "orphaned %qs construct",
3445 construct);
3446 return false;
3448 switch (tree_fits_shwi_p (gimple_call_arg (stmt, 0))
3449 ? tree_to_shwi (gimple_call_arg (stmt, 0))
3450 : 0)
3452 case 1:
3453 if (gimple_code (ctx->stmt) != GIMPLE_OMP_PARALLEL)
3454 bad = "#pragma omp parallel";
3455 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3456 == BUILT_IN_GOMP_CANCEL
3457 && !integer_zerop (gimple_call_arg (stmt, 1)))
3458 ctx->cancellable = true;
3459 kind = "parallel";
3460 break;
3461 case 2:
3462 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
3463 || gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR)
3464 bad = "#pragma omp for";
3465 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3466 == BUILT_IN_GOMP_CANCEL
3467 && !integer_zerop (gimple_call_arg (stmt, 1)))
3469 ctx->cancellable = true;
3470 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3471 OMP_CLAUSE_NOWAIT))
3472 warning_at (gimple_location (stmt), 0,
3473 "%<#pragma omp cancel for%> inside "
3474 "%<nowait%> for construct");
3475 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3476 OMP_CLAUSE_ORDERED))
3477 warning_at (gimple_location (stmt), 0,
3478 "%<#pragma omp cancel for%> inside "
3479 "%<ordered%> for construct");
3481 kind = "for";
3482 break;
3483 case 4:
3484 if (gimple_code (ctx->stmt) != GIMPLE_OMP_SECTIONS
3485 && gimple_code (ctx->stmt) != GIMPLE_OMP_SECTION)
3486 bad = "#pragma omp sections";
3487 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3488 == BUILT_IN_GOMP_CANCEL
3489 && !integer_zerop (gimple_call_arg (stmt, 1)))
3491 if (gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS)
3493 ctx->cancellable = true;
3494 if (find_omp_clause (gimple_omp_sections_clauses
3495 (ctx->stmt),
3496 OMP_CLAUSE_NOWAIT))
3497 warning_at (gimple_location (stmt), 0,
3498 "%<#pragma omp cancel sections%> inside "
3499 "%<nowait%> sections construct");
3501 else
3503 gcc_assert (ctx->outer
3504 && gimple_code (ctx->outer->stmt)
3505 == GIMPLE_OMP_SECTIONS);
3506 ctx->outer->cancellable = true;
3507 if (find_omp_clause (gimple_omp_sections_clauses
3508 (ctx->outer->stmt),
3509 OMP_CLAUSE_NOWAIT))
3510 warning_at (gimple_location (stmt), 0,
3511 "%<#pragma omp cancel sections%> inside "
3512 "%<nowait%> sections construct");
3515 kind = "sections";
3516 break;
3517 case 8:
3518 if (gimple_code (ctx->stmt) != GIMPLE_OMP_TASK)
3519 bad = "#pragma omp task";
3520 else
3522 for (omp_context *octx = ctx->outer;
3523 octx; octx = octx->outer)
3525 switch (gimple_code (octx->stmt))
3527 case GIMPLE_OMP_TASKGROUP:
3528 break;
3529 case GIMPLE_OMP_TARGET:
3530 if (gimple_omp_target_kind (octx->stmt)
3531 != GF_OMP_TARGET_KIND_REGION)
3532 continue;
3533 /* FALLTHRU */
3534 case GIMPLE_OMP_PARALLEL:
3535 case GIMPLE_OMP_TEAMS:
3536 error_at (gimple_location (stmt),
3537 "%<%s taskgroup%> construct not closely "
3538 "nested inside of %<taskgroup%> region",
3539 construct);
3540 return false;
3541 default:
3542 continue;
3544 break;
3546 ctx->cancellable = true;
3548 kind = "taskgroup";
3549 break;
3550 default:
3551 error_at (gimple_location (stmt), "invalid arguments");
3552 return false;
3554 if (bad)
3556 error_at (gimple_location (stmt),
3557 "%<%s %s%> construct not closely nested inside of %qs",
3558 construct, kind, bad);
3559 return false;
3562 /* FALLTHRU */
3563 case GIMPLE_OMP_SECTIONS:
3564 case GIMPLE_OMP_SINGLE:
3565 for (; ctx != NULL; ctx = ctx->outer)
3566 switch (gimple_code (ctx->stmt))
3568 case GIMPLE_OMP_FOR:
3569 if (gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR
3570 && gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_TASKLOOP)
3571 break;
3572 /* FALLTHRU */
3573 case GIMPLE_OMP_SECTIONS:
3574 case GIMPLE_OMP_SINGLE:
3575 case GIMPLE_OMP_ORDERED:
3576 case GIMPLE_OMP_MASTER:
3577 case GIMPLE_OMP_TASK:
3578 case GIMPLE_OMP_CRITICAL:
3579 if (is_gimple_call (stmt))
3581 if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3582 != BUILT_IN_GOMP_BARRIER)
3583 return true;
3584 error_at (gimple_location (stmt),
3585 "barrier region may not be closely nested inside "
3586 "of work-sharing, %<critical%>, %<ordered%>, "
3587 "%<master%>, explicit %<task%> or %<taskloop%> "
3588 "region");
3589 return false;
3591 error_at (gimple_location (stmt),
3592 "work-sharing region may not be closely nested inside "
3593 "of work-sharing, %<critical%>, %<ordered%>, "
3594 "%<master%>, explicit %<task%> or %<taskloop%> region");
3595 return false;
3596 case GIMPLE_OMP_PARALLEL:
3597 case GIMPLE_OMP_TEAMS:
3598 return true;
3599 case GIMPLE_OMP_TARGET:
3600 if (gimple_omp_target_kind (ctx->stmt)
3601 == GF_OMP_TARGET_KIND_REGION)
3602 return true;
3603 break;
3604 default:
3605 break;
3607 break;
3608 case GIMPLE_OMP_MASTER:
3609 for (; ctx != NULL; ctx = ctx->outer)
3610 switch (gimple_code (ctx->stmt))
3612 case GIMPLE_OMP_FOR:
3613 if (gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR
3614 && gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_TASKLOOP)
3615 break;
3616 /* FALLTHRU */
3617 case GIMPLE_OMP_SECTIONS:
3618 case GIMPLE_OMP_SINGLE:
3619 case GIMPLE_OMP_TASK:
3620 error_at (gimple_location (stmt),
3621 "%<master%> region may not be closely nested inside "
3622 "of work-sharing, explicit %<task%> or %<taskloop%> "
3623 "region");
3624 return false;
3625 case GIMPLE_OMP_PARALLEL:
3626 case GIMPLE_OMP_TEAMS:
3627 return true;
3628 case GIMPLE_OMP_TARGET:
3629 if (gimple_omp_target_kind (ctx->stmt)
3630 == GF_OMP_TARGET_KIND_REGION)
3631 return true;
3632 break;
3633 default:
3634 break;
3636 break;
3637 case GIMPLE_OMP_TASK:
3638 for (c = gimple_omp_task_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
3639 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
3640 && (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE
3641 || OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
3643 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3644 error_at (OMP_CLAUSE_LOCATION (c),
3645 "%<depend(%s)%> is only allowed in %<omp ordered%>",
3646 kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
3647 return false;
3649 break;
3650 case GIMPLE_OMP_ORDERED:
3651 for (c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3652 c; c = OMP_CLAUSE_CHAIN (c))
3654 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND)
3656 gcc_assert (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_THREADS
3657 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SIMD);
3658 continue;
3660 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3661 if (kind == OMP_CLAUSE_DEPEND_SOURCE
3662 || kind == OMP_CLAUSE_DEPEND_SINK)
3664 tree oclause;
3665 /* Look for containing ordered(N) loop. */
3666 if (ctx == NULL
3667 || gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
3668 || (oclause
3669 = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3670 OMP_CLAUSE_ORDERED)) == NULL_TREE)
3672 error_at (OMP_CLAUSE_LOCATION (c),
3673 "%<ordered%> construct with %<depend%> clause "
3674 "must be closely nested inside an %<ordered%> "
3675 "loop");
3676 return false;
3678 else if (OMP_CLAUSE_ORDERED_EXPR (oclause) == NULL_TREE)
3680 error_at (OMP_CLAUSE_LOCATION (c),
3681 "%<ordered%> construct with %<depend%> clause "
3682 "must be closely nested inside a loop with "
3683 "%<ordered%> clause with a parameter");
3684 return false;
3687 else
3689 error_at (OMP_CLAUSE_LOCATION (c),
3690 "invalid depend kind in omp %<ordered%> %<depend%>");
3691 return false;
3694 c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3695 if (find_omp_clause (c, OMP_CLAUSE_SIMD))
3697 /* ordered simd must be closely nested inside of simd region,
3698 and simd region must not encounter constructs other than
3699 ordered simd, therefore ordered simd may be either orphaned,
3700 or ctx->stmt must be simd. The latter case is handled already
3701 earlier. */
3702 if (ctx != NULL)
3704 error_at (gimple_location (stmt),
3705 "%<ordered%> %<simd%> must be closely nested inside "
3706 "%<simd%> region");
3707 return false;
3710 for (; ctx != NULL; ctx = ctx->outer)
3711 switch (gimple_code (ctx->stmt))
3713 case GIMPLE_OMP_CRITICAL:
3714 case GIMPLE_OMP_TASK:
3715 case GIMPLE_OMP_ORDERED:
3716 ordered_in_taskloop:
3717 error_at (gimple_location (stmt),
3718 "%<ordered%> region may not be closely nested inside "
3719 "of %<critical%>, %<ordered%>, explicit %<task%> or "
3720 "%<taskloop%> region");
3721 return false;
3722 case GIMPLE_OMP_FOR:
3723 if (gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP)
3724 goto ordered_in_taskloop;
3725 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3726 OMP_CLAUSE_ORDERED) == NULL)
3728 error_at (gimple_location (stmt),
3729 "%<ordered%> region must be closely nested inside "
3730 "a loop region with an %<ordered%> clause");
3731 return false;
3733 return true;
3734 case GIMPLE_OMP_TARGET:
3735 if (gimple_omp_target_kind (ctx->stmt)
3736 != GF_OMP_TARGET_KIND_REGION)
3737 break;
3738 /* FALLTHRU */
3739 case GIMPLE_OMP_PARALLEL:
3740 case GIMPLE_OMP_TEAMS:
3741 error_at (gimple_location (stmt),
3742 "%<ordered%> region must be closely nested inside "
3743 "a loop region with an %<ordered%> clause");
3744 return false;
3745 default:
3746 break;
3748 break;
3749 case GIMPLE_OMP_CRITICAL:
3751 tree this_stmt_name
3752 = gimple_omp_critical_name (as_a <gomp_critical *> (stmt));
3753 for (; ctx != NULL; ctx = ctx->outer)
3754 if (gomp_critical *other_crit
3755 = dyn_cast <gomp_critical *> (ctx->stmt))
3756 if (this_stmt_name == gimple_omp_critical_name (other_crit))
3758 error_at (gimple_location (stmt),
3759 "%<critical%> region may not be nested inside "
3760 "a %<critical%> region with the same name");
3761 return false;
3764 break;
3765 case GIMPLE_OMP_TEAMS:
3766 if (ctx == NULL
3767 || gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET
3768 || gimple_omp_target_kind (ctx->stmt) != GF_OMP_TARGET_KIND_REGION)
3770 error_at (gimple_location (stmt),
3771 "%<teams%> construct not closely nested inside of "
3772 "%<target%> construct");
3773 return false;
3775 break;
3776 case GIMPLE_OMP_TARGET:
3777 for (c = gimple_omp_target_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
3778 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
3779 && (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE
3780 || OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
3782 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3783 error_at (OMP_CLAUSE_LOCATION (c),
3784 "%<depend(%s)%> is only allowed in %<omp ordered%>",
3785 kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
3786 return false;
3788 if (is_gimple_omp_offloaded (stmt)
3789 && get_oacc_fn_attrib (cfun->decl) != NULL)
3791 error_at (gimple_location (stmt),
3792 "OpenACC region inside of OpenACC routine, nested "
3793 "parallelism not supported yet");
3794 return false;
3796 for (; ctx != NULL; ctx = ctx->outer)
3798 if (gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET)
3800 if (is_gimple_omp (stmt)
3801 && is_gimple_omp_oacc (stmt)
3802 && is_gimple_omp (ctx->stmt))
3804 error_at (gimple_location (stmt),
3805 "OpenACC construct inside of non-OpenACC region");
3806 return false;
3808 continue;
3811 const char *stmt_name, *ctx_stmt_name;
3812 switch (gimple_omp_target_kind (stmt))
3814 case GF_OMP_TARGET_KIND_REGION: stmt_name = "target"; break;
3815 case GF_OMP_TARGET_KIND_DATA: stmt_name = "target data"; break;
3816 case GF_OMP_TARGET_KIND_UPDATE: stmt_name = "target update"; break;
3817 case GF_OMP_TARGET_KIND_ENTER_DATA:
3818 stmt_name = "target enter data"; break;
3819 case GF_OMP_TARGET_KIND_EXIT_DATA:
3820 stmt_name = "target exit data"; break;
3821 case GF_OMP_TARGET_KIND_OACC_PARALLEL: stmt_name = "parallel"; break;
3822 case GF_OMP_TARGET_KIND_OACC_KERNELS: stmt_name = "kernels"; break;
3823 case GF_OMP_TARGET_KIND_OACC_DATA: stmt_name = "data"; break;
3824 case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break;
3825 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
3826 stmt_name = "enter/exit data"; break;
3827 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
3828 break;
3829 default: gcc_unreachable ();
3831 switch (gimple_omp_target_kind (ctx->stmt))
3833 case GF_OMP_TARGET_KIND_REGION: ctx_stmt_name = "target"; break;
3834 case GF_OMP_TARGET_KIND_DATA: ctx_stmt_name = "target data"; break;
3835 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
3836 ctx_stmt_name = "parallel"; break;
3837 case GF_OMP_TARGET_KIND_OACC_KERNELS:
3838 ctx_stmt_name = "kernels"; break;
3839 case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
3840 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
3841 ctx_stmt_name = "host_data"; break;
3842 default: gcc_unreachable ();
3845 /* OpenACC/OpenMP mismatch? */
3846 if (is_gimple_omp_oacc (stmt)
3847 != is_gimple_omp_oacc (ctx->stmt))
3849 error_at (gimple_location (stmt),
3850 "%s %qs construct inside of %s %qs region",
3851 (is_gimple_omp_oacc (stmt)
3852 ? "OpenACC" : "OpenMP"), stmt_name,
3853 (is_gimple_omp_oacc (ctx->stmt)
3854 ? "OpenACC" : "OpenMP"), ctx_stmt_name);
3855 return false;
3857 if (is_gimple_omp_offloaded (ctx->stmt))
3859 /* No GIMPLE_OMP_TARGET inside offloaded OpenACC CTX. */
3860 if (is_gimple_omp_oacc (ctx->stmt))
3862 error_at (gimple_location (stmt),
3863 "%qs construct inside of %qs region",
3864 stmt_name, ctx_stmt_name);
3865 return false;
3867 else
3869 warning_at (gimple_location (stmt), 0,
3870 "%qs construct inside of %qs region",
3871 stmt_name, ctx_stmt_name);
3875 break;
3876 default:
3877 break;
3879 return true;
3883 /* Helper function scan_omp.
3885 Callback for walk_tree or operators in walk_gimple_stmt used to
3886 scan for OMP directives in TP. */
3888 static tree
3889 scan_omp_1_op (tree *tp, int *walk_subtrees, void *data)
3891 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
3892 omp_context *ctx = (omp_context *) wi->info;
3893 tree t = *tp;
3895 switch (TREE_CODE (t))
3897 case VAR_DECL:
3898 case PARM_DECL:
3899 case LABEL_DECL:
3900 case RESULT_DECL:
3901 if (ctx)
3903 tree repl = remap_decl (t, &ctx->cb);
3904 gcc_checking_assert (TREE_CODE (repl) != ERROR_MARK);
3905 *tp = repl;
3907 break;
3909 default:
3910 if (ctx && TYPE_P (t))
3911 *tp = remap_type (t, &ctx->cb);
3912 else if (!DECL_P (t))
3914 *walk_subtrees = 1;
3915 if (ctx)
3917 tree tem = remap_type (TREE_TYPE (t), &ctx->cb);
3918 if (tem != TREE_TYPE (t))
3920 if (TREE_CODE (t) == INTEGER_CST)
3921 *tp = wide_int_to_tree (tem, t);
3922 else
3923 TREE_TYPE (t) = tem;
3927 break;
3930 return NULL_TREE;
3933 /* Return true if FNDECL is a setjmp or a longjmp. */
3935 static bool
3936 setjmp_or_longjmp_p (const_tree fndecl)
3938 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3939 && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_SETJMP
3940 || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LONGJMP))
3941 return true;
3943 tree declname = DECL_NAME (fndecl);
3944 if (!declname)
3945 return false;
3946 const char *name = IDENTIFIER_POINTER (declname);
3947 return !strcmp (name, "setjmp") || !strcmp (name, "longjmp");
3951 /* Helper function for scan_omp.
3953 Callback for walk_gimple_stmt used to scan for OMP directives in
3954 the current statement in GSI. */
3956 static tree
3957 scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
3958 struct walk_stmt_info *wi)
3960 gimple *stmt = gsi_stmt (*gsi);
3961 omp_context *ctx = (omp_context *) wi->info;
3963 if (gimple_has_location (stmt))
3964 input_location = gimple_location (stmt);
3966 /* Check the nesting restrictions. */
3967 bool remove = false;
3968 if (is_gimple_omp (stmt))
3969 remove = !check_omp_nesting_restrictions (stmt, ctx);
3970 else if (is_gimple_call (stmt))
3972 tree fndecl = gimple_call_fndecl (stmt);
3973 if (fndecl)
3975 if (setjmp_or_longjmp_p (fndecl)
3976 && ctx
3977 && gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
3978 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
3980 remove = true;
3981 error_at (gimple_location (stmt),
3982 "setjmp/longjmp inside simd construct");
3984 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3985 switch (DECL_FUNCTION_CODE (fndecl))
3987 case BUILT_IN_GOMP_BARRIER:
3988 case BUILT_IN_GOMP_CANCEL:
3989 case BUILT_IN_GOMP_CANCELLATION_POINT:
3990 case BUILT_IN_GOMP_TASKYIELD:
3991 case BUILT_IN_GOMP_TASKWAIT:
3992 case BUILT_IN_GOMP_TASKGROUP_START:
3993 case BUILT_IN_GOMP_TASKGROUP_END:
3994 remove = !check_omp_nesting_restrictions (stmt, ctx);
3995 break;
3996 default:
3997 break;
4001 if (remove)
4003 stmt = gimple_build_nop ();
4004 gsi_replace (gsi, stmt, false);
4007 *handled_ops_p = true;
4009 switch (gimple_code (stmt))
4011 case GIMPLE_OMP_PARALLEL:
4012 taskreg_nesting_level++;
4013 scan_omp_parallel (gsi, ctx);
4014 taskreg_nesting_level--;
4015 break;
4017 case GIMPLE_OMP_TASK:
4018 taskreg_nesting_level++;
4019 scan_omp_task (gsi, ctx);
4020 taskreg_nesting_level--;
4021 break;
4023 case GIMPLE_OMP_FOR:
4024 if (((gimple_omp_for_kind (as_a <gomp_for *> (stmt))
4025 & GF_OMP_FOR_KIND_MASK) == GF_OMP_FOR_KIND_SIMD)
4026 && omp_maybe_offloaded_ctx (ctx)
4027 && omp_max_simt_vf ())
4028 scan_omp_simd (gsi, as_a <gomp_for *> (stmt), ctx);
4029 else
4030 scan_omp_for (as_a <gomp_for *> (stmt), ctx);
4031 break;
4033 case GIMPLE_OMP_SECTIONS:
4034 scan_omp_sections (as_a <gomp_sections *> (stmt), ctx);
4035 break;
4037 case GIMPLE_OMP_SINGLE:
4038 scan_omp_single (as_a <gomp_single *> (stmt), ctx);
4039 break;
4041 case GIMPLE_OMP_SECTION:
4042 case GIMPLE_OMP_MASTER:
4043 case GIMPLE_OMP_TASKGROUP:
4044 case GIMPLE_OMP_ORDERED:
4045 case GIMPLE_OMP_CRITICAL:
4046 case GIMPLE_OMP_GRID_BODY:
4047 ctx = new_omp_context (stmt, ctx);
4048 scan_omp (gimple_omp_body_ptr (stmt), ctx);
4049 break;
4051 case GIMPLE_OMP_TARGET:
4052 scan_omp_target (as_a <gomp_target *> (stmt), ctx);
4053 break;
4055 case GIMPLE_OMP_TEAMS:
4056 scan_omp_teams (as_a <gomp_teams *> (stmt), ctx);
4057 break;
4059 case GIMPLE_BIND:
4061 tree var;
4063 *handled_ops_p = false;
4064 if (ctx)
4065 for (var = gimple_bind_vars (as_a <gbind *> (stmt));
4066 var ;
4067 var = DECL_CHAIN (var))
4068 insert_decl_map (&ctx->cb, var, var);
4070 break;
4071 default:
4072 *handled_ops_p = false;
4073 break;
4076 return NULL_TREE;
4080 /* Scan all the statements starting at the current statement. CTX
4081 contains context information about the OMP directives and
4082 clauses found during the scan. */
4084 static void
4085 scan_omp (gimple_seq *body_p, omp_context *ctx)
4087 location_t saved_location;
4088 struct walk_stmt_info wi;
4090 memset (&wi, 0, sizeof (wi));
4091 wi.info = ctx;
4092 wi.want_locations = true;
4094 saved_location = input_location;
4095 walk_gimple_seq_mod (body_p, scan_omp_1_stmt, scan_omp_1_op, &wi);
4096 input_location = saved_location;
4099 /* Re-gimplification and code generation routines. */
4101 /* Build a call to GOMP_barrier. */
4103 static gimple *
4104 build_omp_barrier (tree lhs)
4106 tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL
4107 : BUILT_IN_GOMP_BARRIER);
4108 gcall *g = gimple_build_call (fndecl, 0);
4109 if (lhs)
4110 gimple_call_set_lhs (g, lhs);
4111 return g;
4114 /* If a context was created for STMT when it was scanned, return it. */
4116 static omp_context *
4117 maybe_lookup_ctx (gimple *stmt)
4119 splay_tree_node n;
4120 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
4121 return n ? (omp_context *) n->value : NULL;
4125 /* Find the mapping for DECL in CTX or the immediately enclosing
4126 context that has a mapping for DECL.
4128 If CTX is a nested parallel directive, we may have to use the decl
4129 mappings created in CTX's parent context. Suppose that we have the
4130 following parallel nesting (variable UIDs showed for clarity):
4132 iD.1562 = 0;
4133 #omp parallel shared(iD.1562) -> outer parallel
4134 iD.1562 = iD.1562 + 1;
4136 #omp parallel shared (iD.1562) -> inner parallel
4137 iD.1562 = iD.1562 - 1;
4139 Each parallel structure will create a distinct .omp_data_s structure
4140 for copying iD.1562 in/out of the directive:
4142 outer parallel .omp_data_s.1.i -> iD.1562
4143 inner parallel .omp_data_s.2.i -> iD.1562
4145 A shared variable mapping will produce a copy-out operation before
4146 the parallel directive and a copy-in operation after it. So, in
4147 this case we would have:
4149 iD.1562 = 0;
4150 .omp_data_o.1.i = iD.1562;
4151 #omp parallel shared(iD.1562) -> outer parallel
4152 .omp_data_i.1 = &.omp_data_o.1
4153 .omp_data_i.1->i = .omp_data_i.1->i + 1;
4155 .omp_data_o.2.i = iD.1562; -> **
4156 #omp parallel shared(iD.1562) -> inner parallel
4157 .omp_data_i.2 = &.omp_data_o.2
4158 .omp_data_i.2->i = .omp_data_i.2->i - 1;
4161 ** This is a problem. The symbol iD.1562 cannot be referenced
4162 inside the body of the outer parallel region. But since we are
4163 emitting this copy operation while expanding the inner parallel
4164 directive, we need to access the CTX structure of the outer
4165 parallel directive to get the correct mapping:
4167 .omp_data_o.2.i = .omp_data_i.1->i
4169 Since there may be other workshare or parallel directives enclosing
4170 the parallel directive, it may be necessary to walk up the context
4171 parent chain. This is not a problem in general because nested
4172 parallelism happens only rarely. */
4174 static tree
4175 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
4177 tree t;
4178 omp_context *up;
4180 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
4181 t = maybe_lookup_decl (decl, up);
4183 gcc_assert (!ctx->is_nested || t || is_global_var (decl));
4185 return t ? t : decl;
4189 /* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
4190 in outer contexts. */
4192 static tree
4193 maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
4195 tree t = NULL;
4196 omp_context *up;
4198 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
4199 t = maybe_lookup_decl (decl, up);
4201 return t ? t : decl;
4205 /* Construct the initialization value for reduction operation OP. */
4207 tree
4208 omp_reduction_init_op (location_t loc, enum tree_code op, tree type)
4210 switch (op)
4212 case PLUS_EXPR:
4213 case MINUS_EXPR:
4214 case BIT_IOR_EXPR:
4215 case BIT_XOR_EXPR:
4216 case TRUTH_OR_EXPR:
4217 case TRUTH_ORIF_EXPR:
4218 case TRUTH_XOR_EXPR:
4219 case NE_EXPR:
4220 return build_zero_cst (type);
4222 case MULT_EXPR:
4223 case TRUTH_AND_EXPR:
4224 case TRUTH_ANDIF_EXPR:
4225 case EQ_EXPR:
4226 return fold_convert_loc (loc, type, integer_one_node);
4228 case BIT_AND_EXPR:
4229 return fold_convert_loc (loc, type, integer_minus_one_node);
4231 case MAX_EXPR:
4232 if (SCALAR_FLOAT_TYPE_P (type))
4234 REAL_VALUE_TYPE max, min;
4235 if (HONOR_INFINITIES (type))
4237 real_inf (&max);
4238 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
4240 else
4241 real_maxval (&min, 1, TYPE_MODE (type));
4242 return build_real (type, min);
4244 else if (POINTER_TYPE_P (type))
4246 wide_int min
4247 = wi::min_value (TYPE_PRECISION (type), TYPE_SIGN (type));
4248 return wide_int_to_tree (type, min);
4250 else
4252 gcc_assert (INTEGRAL_TYPE_P (type));
4253 return TYPE_MIN_VALUE (type);
4256 case MIN_EXPR:
4257 if (SCALAR_FLOAT_TYPE_P (type))
4259 REAL_VALUE_TYPE max;
4260 if (HONOR_INFINITIES (type))
4261 real_inf (&max);
4262 else
4263 real_maxval (&max, 0, TYPE_MODE (type));
4264 return build_real (type, max);
4266 else if (POINTER_TYPE_P (type))
4268 wide_int max
4269 = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
4270 return wide_int_to_tree (type, max);
4272 else
4274 gcc_assert (INTEGRAL_TYPE_P (type));
4275 return TYPE_MAX_VALUE (type);
4278 default:
4279 gcc_unreachable ();
4283 /* Construct the initialization value for reduction CLAUSE. */
4285 tree
4286 omp_reduction_init (tree clause, tree type)
4288 return omp_reduction_init_op (OMP_CLAUSE_LOCATION (clause),
4289 OMP_CLAUSE_REDUCTION_CODE (clause), type);
4292 /* Return alignment to be assumed for var in CLAUSE, which should be
4293 OMP_CLAUSE_ALIGNED. */
4295 static tree
4296 omp_clause_aligned_alignment (tree clause)
4298 if (OMP_CLAUSE_ALIGNED_ALIGNMENT (clause))
4299 return OMP_CLAUSE_ALIGNED_ALIGNMENT (clause);
4301 /* Otherwise return implementation defined alignment. */
4302 unsigned int al = 1;
4303 machine_mode mode, vmode;
4304 int vs = targetm.vectorize.autovectorize_vector_sizes ();
4305 if (vs)
4306 vs = 1 << floor_log2 (vs);
4307 static enum mode_class classes[]
4308 = { MODE_INT, MODE_VECTOR_INT, MODE_FLOAT, MODE_VECTOR_FLOAT };
4309 for (int i = 0; i < 4; i += 2)
4310 for (mode = GET_CLASS_NARROWEST_MODE (classes[i]);
4311 mode != VOIDmode;
4312 mode = GET_MODE_WIDER_MODE (mode))
4314 vmode = targetm.vectorize.preferred_simd_mode (mode);
4315 if (GET_MODE_CLASS (vmode) != classes[i + 1])
4316 continue;
4317 while (vs
4318 && GET_MODE_SIZE (vmode) < vs
4319 && GET_MODE_2XWIDER_MODE (vmode) != VOIDmode)
4320 vmode = GET_MODE_2XWIDER_MODE (vmode);
4322 tree type = lang_hooks.types.type_for_mode (mode, 1);
4323 if (type == NULL_TREE || TYPE_MODE (type) != mode)
4324 continue;
4325 type = build_vector_type (type, GET_MODE_SIZE (vmode)
4326 / GET_MODE_SIZE (mode));
4327 if (TYPE_MODE (type) != vmode)
4328 continue;
4329 if (TYPE_ALIGN_UNIT (type) > al)
4330 al = TYPE_ALIGN_UNIT (type);
4332 return build_int_cst (integer_type_node, al);
4336 /* Return maximum SIMT width if offloading may target SIMT hardware. */
4338 static int
4339 omp_max_simt_vf (void)
4341 if (!optimize)
4342 return 0;
4343 if (ENABLE_OFFLOADING)
4344 for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c; )
4346 if (!strncmp (c, "nvptx", strlen ("nvptx")))
4347 return 32;
4348 else if ((c = strchr (c, ',')))
4349 c++;
4351 return 0;
4354 /* Return maximum possible vectorization factor for the target. */
4356 static int
4357 omp_max_vf (void)
4359 if (!optimize
4360 || optimize_debug
4361 || !flag_tree_loop_optimize
4362 || (!flag_tree_loop_vectorize
4363 && (global_options_set.x_flag_tree_loop_vectorize
4364 || global_options_set.x_flag_tree_vectorize)))
4365 return 1;
4367 int vf = 1;
4368 int vs = targetm.vectorize.autovectorize_vector_sizes ();
4369 if (vs)
4370 vf = 1 << floor_log2 (vs);
4371 else
4373 machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
4374 if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
4375 vf = GET_MODE_NUNITS (vqimode);
4377 return vf;
4380 /* Helper function of lower_rec_input_clauses, used for #pragma omp simd
4381 privatization. */
4383 static bool
4384 lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf,
4385 tree &idx, tree &lane, tree &ivar, tree &lvar)
4387 if (max_vf == 0)
4389 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
4390 OMP_CLAUSE__SIMT_))
4391 max_vf = omp_max_simt_vf ();
4392 else
4393 max_vf = omp_max_vf ();
4394 if (max_vf > 1)
4396 tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
4397 OMP_CLAUSE_SAFELEN);
4398 if (c
4399 && (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) != INTEGER_CST
4400 || tree_int_cst_sgn (OMP_CLAUSE_SAFELEN_EXPR (c)) != 1))
4401 max_vf = 1;
4402 else if (c && compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c),
4403 max_vf) == -1)
4404 max_vf = tree_to_shwi (OMP_CLAUSE_SAFELEN_EXPR (c));
4406 if (max_vf > 1)
4408 idx = create_tmp_var (unsigned_type_node);
4409 lane = create_tmp_var (unsigned_type_node);
4412 if (max_vf == 1)
4413 return false;
4415 tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
4416 tree avar = create_tmp_var_raw (atype);
4417 if (TREE_ADDRESSABLE (new_var))
4418 TREE_ADDRESSABLE (avar) = 1;
4419 DECL_ATTRIBUTES (avar)
4420 = tree_cons (get_identifier ("omp simd array"), NULL,
4421 DECL_ATTRIBUTES (avar));
4422 gimple_add_tmp_var (avar);
4423 ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, idx,
4424 NULL_TREE, NULL_TREE);
4425 lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, lane,
4426 NULL_TREE, NULL_TREE);
4427 if (DECL_P (new_var))
4429 SET_DECL_VALUE_EXPR (new_var, lvar);
4430 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4432 return true;
4435 /* Helper function of lower_rec_input_clauses. For a reference
4436 in simd reduction, add an underlying variable it will reference. */
4438 static void
4439 handle_simd_reference (location_t loc, tree new_vard, gimple_seq *ilist)
4441 tree z = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_vard)));
4442 if (TREE_CONSTANT (z))
4444 z = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_vard)),
4445 get_name (new_vard));
4446 gimple_add_tmp_var (z);
4447 TREE_ADDRESSABLE (z) = 1;
4448 z = build_fold_addr_expr_loc (loc, z);
4449 gimplify_assign (new_vard, z, ilist);
4453 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
4454 from the receiver (aka child) side and initializers for REFERENCE_TYPE
4455 private variables. Initialization statements go in ILIST, while calls
4456 to destructors go in DLIST. */
4458 static void
4459 lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
4460 omp_context *ctx, struct omp_for_data *fd)
4462 tree c, dtor, copyin_seq, x, ptr;
4463 bool copyin_by_ref = false;
4464 bool lastprivate_firstprivate = false;
4465 bool reduction_omp_orig_ref = false;
4466 int pass;
4467 bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
4468 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
4469 bool maybe_simt = is_simd && find_omp_clause (clauses, OMP_CLAUSE__SIMT_);
4470 int max_vf = 0;
4471 tree lane = NULL_TREE, idx = NULL_TREE;
4472 tree simt_lane = NULL_TREE;
4473 tree ivar = NULL_TREE, lvar = NULL_TREE;
4474 gimple_seq llist[3] = { };
4476 copyin_seq = NULL;
4478 /* Set max_vf=1 (which will later enforce safelen=1) in simd loops
4479 with data sharing clauses referencing variable sized vars. That
4480 is unnecessarily hard to support and very unlikely to result in
4481 vectorized code anyway. */
4482 if (is_simd)
4483 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
4484 switch (OMP_CLAUSE_CODE (c))
4486 case OMP_CLAUSE_LINEAR:
4487 if (OMP_CLAUSE_LINEAR_ARRAY (c))
4488 max_vf = 1;
4489 /* FALLTHRU */
4490 case OMP_CLAUSE_PRIVATE:
4491 case OMP_CLAUSE_FIRSTPRIVATE:
4492 case OMP_CLAUSE_LASTPRIVATE:
4493 if (is_variable_sized (OMP_CLAUSE_DECL (c)))
4494 max_vf = 1;
4495 break;
4496 case OMP_CLAUSE_REDUCTION:
4497 if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF
4498 || is_variable_sized (OMP_CLAUSE_DECL (c)))
4499 max_vf = 1;
4500 break;
4501 default:
4502 continue;
4505 /* Do all the fixed sized types in the first pass, and the variable sized
4506 types in the second pass. This makes sure that the scalar arguments to
4507 the variable sized types are processed before we use them in the
4508 variable sized operations. */
4509 for (pass = 0; pass < 2; ++pass)
4511 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
4513 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
4514 tree var, new_var;
4515 bool by_ref;
4516 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
4518 switch (c_kind)
4520 case OMP_CLAUSE_PRIVATE:
4521 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
4522 continue;
4523 break;
4524 case OMP_CLAUSE_SHARED:
4525 /* Ignore shared directives in teams construct. */
4526 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
4527 continue;
4528 if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
4530 gcc_assert (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)
4531 || is_global_var (OMP_CLAUSE_DECL (c)));
4532 continue;
4534 case OMP_CLAUSE_FIRSTPRIVATE:
4535 case OMP_CLAUSE_COPYIN:
4536 break;
4537 case OMP_CLAUSE_LINEAR:
4538 if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c)
4539 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c))
4540 lastprivate_firstprivate = true;
4541 break;
4542 case OMP_CLAUSE_REDUCTION:
4543 if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
4544 reduction_omp_orig_ref = true;
4545 break;
4546 case OMP_CLAUSE__LOOPTEMP_:
4547 /* Handle _looptemp_ clauses only on parallel/task. */
4548 if (fd)
4549 continue;
4550 break;
4551 case OMP_CLAUSE_LASTPRIVATE:
4552 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
4554 lastprivate_firstprivate = true;
4555 if (pass != 0 || is_taskloop_ctx (ctx))
4556 continue;
4558 /* Even without corresponding firstprivate, if
4559 decl is Fortran allocatable, it needs outer var
4560 reference. */
4561 else if (pass == 0
4562 && lang_hooks.decls.omp_private_outer_ref
4563 (OMP_CLAUSE_DECL (c)))
4564 lastprivate_firstprivate = true;
4565 break;
4566 case OMP_CLAUSE_ALIGNED:
4567 if (pass == 0)
4568 continue;
4569 var = OMP_CLAUSE_DECL (c);
4570 if (TREE_CODE (TREE_TYPE (var)) == POINTER_TYPE
4571 && !is_global_var (var))
4573 new_var = maybe_lookup_decl (var, ctx);
4574 if (new_var == NULL_TREE)
4575 new_var = maybe_lookup_decl_in_outer_ctx (var, ctx);
4576 x = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
4577 tree alarg = omp_clause_aligned_alignment (c);
4578 alarg = fold_convert_loc (clause_loc, size_type_node, alarg);
4579 x = build_call_expr_loc (clause_loc, x, 2, new_var, alarg);
4580 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4581 x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
4582 gimplify_and_add (x, ilist);
4584 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
4585 && is_global_var (var))
4587 tree ptype = build_pointer_type (TREE_TYPE (var)), t, t2;
4588 new_var = lookup_decl (var, ctx);
4589 t = maybe_lookup_decl_in_outer_ctx (var, ctx);
4590 t = build_fold_addr_expr_loc (clause_loc, t);
4591 t2 = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
4592 tree alarg = omp_clause_aligned_alignment (c);
4593 alarg = fold_convert_loc (clause_loc, size_type_node, alarg);
4594 t = build_call_expr_loc (clause_loc, t2, 2, t, alarg);
4595 t = fold_convert_loc (clause_loc, ptype, t);
4596 x = create_tmp_var (ptype);
4597 t = build2 (MODIFY_EXPR, ptype, x, t);
4598 gimplify_and_add (t, ilist);
4599 t = build_simple_mem_ref_loc (clause_loc, x);
4600 SET_DECL_VALUE_EXPR (new_var, t);
4601 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4603 continue;
4604 default:
4605 continue;
4608 new_var = var = OMP_CLAUSE_DECL (c);
4609 if (c_kind == OMP_CLAUSE_REDUCTION && TREE_CODE (var) == MEM_REF)
4611 var = TREE_OPERAND (var, 0);
4612 if (TREE_CODE (var) == POINTER_PLUS_EXPR)
4613 var = TREE_OPERAND (var, 0);
4614 if (TREE_CODE (var) == INDIRECT_REF
4615 || TREE_CODE (var) == ADDR_EXPR)
4616 var = TREE_OPERAND (var, 0);
4617 if (is_variable_sized (var))
4619 gcc_assert (DECL_HAS_VALUE_EXPR_P (var));
4620 var = DECL_VALUE_EXPR (var);
4621 gcc_assert (TREE_CODE (var) == INDIRECT_REF);
4622 var = TREE_OPERAND (var, 0);
4623 gcc_assert (DECL_P (var));
4625 new_var = var;
4627 if (c_kind != OMP_CLAUSE_COPYIN)
4628 new_var = lookup_decl (var, ctx);
4630 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
4632 if (pass != 0)
4633 continue;
4635 /* C/C++ array section reductions. */
4636 else if (c_kind == OMP_CLAUSE_REDUCTION
4637 && var != OMP_CLAUSE_DECL (c))
4639 if (pass == 0)
4640 continue;
4642 tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (c), 1);
4643 tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0);
4644 if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR)
4646 tree b = TREE_OPERAND (orig_var, 1);
4647 b = maybe_lookup_decl (b, ctx);
4648 if (b == NULL)
4650 b = TREE_OPERAND (orig_var, 1);
4651 b = maybe_lookup_decl_in_outer_ctx (b, ctx);
4653 if (integer_zerop (bias))
4654 bias = b;
4655 else
4657 bias = fold_convert_loc (clause_loc,
4658 TREE_TYPE (b), bias);
4659 bias = fold_build2_loc (clause_loc, PLUS_EXPR,
4660 TREE_TYPE (b), b, bias);
4662 orig_var = TREE_OPERAND (orig_var, 0);
4664 if (TREE_CODE (orig_var) == INDIRECT_REF
4665 || TREE_CODE (orig_var) == ADDR_EXPR)
4666 orig_var = TREE_OPERAND (orig_var, 0);
4667 tree d = OMP_CLAUSE_DECL (c);
4668 tree type = TREE_TYPE (d);
4669 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
4670 tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
4671 const char *name = get_name (orig_var);
4672 if (TREE_CONSTANT (v))
4674 x = create_tmp_var_raw (type, name);
4675 gimple_add_tmp_var (x);
4676 TREE_ADDRESSABLE (x) = 1;
4677 x = build_fold_addr_expr_loc (clause_loc, x);
4679 else
4681 tree atmp
4682 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4683 tree t = maybe_lookup_decl (v, ctx);
4684 if (t)
4685 v = t;
4686 else
4687 v = maybe_lookup_decl_in_outer_ctx (v, ctx);
4688 gimplify_expr (&v, ilist, NULL, is_gimple_val, fb_rvalue);
4689 t = fold_build2_loc (clause_loc, PLUS_EXPR,
4690 TREE_TYPE (v), v,
4691 build_int_cst (TREE_TYPE (v), 1));
4692 t = fold_build2_loc (clause_loc, MULT_EXPR,
4693 TREE_TYPE (v), t,
4694 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4695 tree al = size_int (TYPE_ALIGN (TREE_TYPE (type)));
4696 x = build_call_expr_loc (clause_loc, atmp, 2, t, al);
4699 tree ptype = build_pointer_type (TREE_TYPE (type));
4700 x = fold_convert_loc (clause_loc, ptype, x);
4701 tree y = create_tmp_var (ptype, name);
4702 gimplify_assign (y, x, ilist);
4703 x = y;
4704 tree yb = y;
4706 if (!integer_zerop (bias))
4708 bias = fold_convert_loc (clause_loc, pointer_sized_int_node,
4709 bias);
4710 yb = fold_convert_loc (clause_loc, pointer_sized_int_node,
4712 yb = fold_build2_loc (clause_loc, MINUS_EXPR,
4713 pointer_sized_int_node, yb, bias);
4714 x = fold_convert_loc (clause_loc, TREE_TYPE (x), yb);
4715 yb = create_tmp_var (ptype, name);
4716 gimplify_assign (yb, x, ilist);
4717 x = yb;
4720 d = TREE_OPERAND (d, 0);
4721 if (TREE_CODE (d) == POINTER_PLUS_EXPR)
4722 d = TREE_OPERAND (d, 0);
4723 if (TREE_CODE (d) == ADDR_EXPR)
4725 if (orig_var != var)
4727 gcc_assert (is_variable_sized (orig_var));
4728 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var),
4730 gimplify_assign (new_var, x, ilist);
4731 tree new_orig_var = lookup_decl (orig_var, ctx);
4732 tree t = build_fold_indirect_ref (new_var);
4733 DECL_IGNORED_P (new_var) = 0;
4734 TREE_THIS_NOTRAP (t);
4735 SET_DECL_VALUE_EXPR (new_orig_var, t);
4736 DECL_HAS_VALUE_EXPR_P (new_orig_var) = 1;
4738 else
4740 x = build2 (MEM_REF, TREE_TYPE (new_var), x,
4741 build_int_cst (ptype, 0));
4742 SET_DECL_VALUE_EXPR (new_var, x);
4743 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4746 else
4748 gcc_assert (orig_var == var);
4749 if (TREE_CODE (d) == INDIRECT_REF)
4751 x = create_tmp_var (ptype, name);
4752 TREE_ADDRESSABLE (x) = 1;
4753 gimplify_assign (x, yb, ilist);
4754 x = build_fold_addr_expr_loc (clause_loc, x);
4756 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4757 gimplify_assign (new_var, x, ilist);
4759 tree y1 = create_tmp_var (ptype, NULL);
4760 gimplify_assign (y1, y, ilist);
4761 tree i2 = NULL_TREE, y2 = NULL_TREE;
4762 tree body2 = NULL_TREE, end2 = NULL_TREE;
4763 tree y3 = NULL_TREE, y4 = NULL_TREE;
4764 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || is_simd)
4766 y2 = create_tmp_var (ptype, NULL);
4767 gimplify_assign (y2, y, ilist);
4768 tree ref = build_outer_var_ref (var, ctx);
4769 /* For ref build_outer_var_ref already performs this. */
4770 if (TREE_CODE (d) == INDIRECT_REF)
4771 gcc_assert (is_reference (var));
4772 else if (TREE_CODE (d) == ADDR_EXPR)
4773 ref = build_fold_addr_expr (ref);
4774 else if (is_reference (var))
4775 ref = build_fold_addr_expr (ref);
4776 ref = fold_convert_loc (clause_loc, ptype, ref);
4777 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
4778 && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
4780 y3 = create_tmp_var (ptype, NULL);
4781 gimplify_assign (y3, unshare_expr (ref), ilist);
4783 if (is_simd)
4785 y4 = create_tmp_var (ptype, NULL);
4786 gimplify_assign (y4, ref, dlist);
4789 tree i = create_tmp_var (TREE_TYPE (v), NULL);
4790 gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), ilist);
4791 tree body = create_artificial_label (UNKNOWN_LOCATION);
4792 tree end = create_artificial_label (UNKNOWN_LOCATION);
4793 gimple_seq_add_stmt (ilist, gimple_build_label (body));
4794 if (y2)
4796 i2 = create_tmp_var (TREE_TYPE (v), NULL);
4797 gimplify_assign (i2, build_int_cst (TREE_TYPE (v), 0), dlist);
4798 body2 = create_artificial_label (UNKNOWN_LOCATION);
4799 end2 = create_artificial_label (UNKNOWN_LOCATION);
4800 gimple_seq_add_stmt (dlist, gimple_build_label (body2));
4802 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
4804 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
4805 tree decl_placeholder
4806 = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
4807 SET_DECL_VALUE_EXPR (decl_placeholder,
4808 build_simple_mem_ref (y1));
4809 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 1;
4810 SET_DECL_VALUE_EXPR (placeholder,
4811 y3 ? build_simple_mem_ref (y3)
4812 : error_mark_node);
4813 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
4814 x = lang_hooks.decls.omp_clause_default_ctor
4815 (c, build_simple_mem_ref (y1),
4816 y3 ? build_simple_mem_ref (y3) : NULL_TREE);
4817 if (x)
4818 gimplify_and_add (x, ilist);
4819 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
4821 gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
4822 lower_omp (&tseq, ctx);
4823 gimple_seq_add_seq (ilist, tseq);
4825 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
4826 if (is_simd)
4828 SET_DECL_VALUE_EXPR (decl_placeholder,
4829 build_simple_mem_ref (y2));
4830 SET_DECL_VALUE_EXPR (placeholder,
4831 build_simple_mem_ref (y4));
4832 gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
4833 lower_omp (&tseq, ctx);
4834 gimple_seq_add_seq (dlist, tseq);
4835 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
4837 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
4838 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 0;
4839 x = lang_hooks.decls.omp_clause_dtor
4840 (c, build_simple_mem_ref (y2));
4841 if (x)
4843 gimple_seq tseq = NULL;
4844 dtor = x;
4845 gimplify_stmt (&dtor, &tseq);
4846 gimple_seq_add_seq (dlist, tseq);
4849 else
4851 x = omp_reduction_init (c, TREE_TYPE (type));
4852 enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
4854 /* reduction(-:var) sums up the partial results, so it
4855 acts identically to reduction(+:var). */
4856 if (code == MINUS_EXPR)
4857 code = PLUS_EXPR;
4859 gimplify_assign (build_simple_mem_ref (y1), x, ilist);
4860 if (is_simd)
4862 x = build2 (code, TREE_TYPE (type),
4863 build_simple_mem_ref (y4),
4864 build_simple_mem_ref (y2));
4865 gimplify_assign (build_simple_mem_ref (y4), x, dlist);
4868 gimple *g
4869 = gimple_build_assign (y1, POINTER_PLUS_EXPR, y1,
4870 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4871 gimple_seq_add_stmt (ilist, g);
4872 if (y3)
4874 g = gimple_build_assign (y3, POINTER_PLUS_EXPR, y3,
4875 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4876 gimple_seq_add_stmt (ilist, g);
4878 g = gimple_build_assign (i, PLUS_EXPR, i,
4879 build_int_cst (TREE_TYPE (i), 1));
4880 gimple_seq_add_stmt (ilist, g);
4881 g = gimple_build_cond (LE_EXPR, i, v, body, end);
4882 gimple_seq_add_stmt (ilist, g);
4883 gimple_seq_add_stmt (ilist, gimple_build_label (end));
4884 if (y2)
4886 g = gimple_build_assign (y2, POINTER_PLUS_EXPR, y2,
4887 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4888 gimple_seq_add_stmt (dlist, g);
4889 if (y4)
4891 g = gimple_build_assign
4892 (y4, POINTER_PLUS_EXPR, y4,
4893 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4894 gimple_seq_add_stmt (dlist, g);
4896 g = gimple_build_assign (i2, PLUS_EXPR, i2,
4897 build_int_cst (TREE_TYPE (i2), 1));
4898 gimple_seq_add_stmt (dlist, g);
4899 g = gimple_build_cond (LE_EXPR, i2, v, body2, end2);
4900 gimple_seq_add_stmt (dlist, g);
4901 gimple_seq_add_stmt (dlist, gimple_build_label (end2));
4903 continue;
4905 else if (is_variable_sized (var))
4907 /* For variable sized types, we need to allocate the
4908 actual storage here. Call alloca and store the
4909 result in the pointer decl that we created elsewhere. */
4910 if (pass == 0)
4911 continue;
4913 if (c_kind != OMP_CLAUSE_FIRSTPRIVATE || !is_task_ctx (ctx))
4915 gcall *stmt;
4916 tree tmp, atmp;
4918 ptr = DECL_VALUE_EXPR (new_var);
4919 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
4920 ptr = TREE_OPERAND (ptr, 0);
4921 gcc_assert (DECL_P (ptr));
4922 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
4924 /* void *tmp = __builtin_alloca */
4925 atmp = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4926 stmt = gimple_build_call (atmp, 2, x,
4927 size_int (DECL_ALIGN (var)));
4928 tmp = create_tmp_var_raw (ptr_type_node);
4929 gimple_add_tmp_var (tmp);
4930 gimple_call_set_lhs (stmt, tmp);
4932 gimple_seq_add_stmt (ilist, stmt);
4934 x = fold_convert_loc (clause_loc, TREE_TYPE (ptr), tmp);
4935 gimplify_assign (ptr, x, ilist);
4938 else if (is_reference (var))
4940 /* For references that are being privatized for Fortran,
4941 allocate new backing storage for the new pointer
4942 variable. This allows us to avoid changing all the
4943 code that expects a pointer to something that expects
4944 a direct variable. */
4945 if (pass == 0)
4946 continue;
4948 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
4949 if (c_kind == OMP_CLAUSE_FIRSTPRIVATE && is_task_ctx (ctx))
4951 x = build_receiver_ref (var, false, ctx);
4952 x = build_fold_addr_expr_loc (clause_loc, x);
4954 else if (TREE_CONSTANT (x))
4956 /* For reduction in SIMD loop, defer adding the
4957 initialization of the reference, because if we decide
4958 to use SIMD array for it, the initilization could cause
4959 expansion ICE. */
4960 if (c_kind == OMP_CLAUSE_REDUCTION && is_simd)
4961 x = NULL_TREE;
4962 else
4964 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
4965 get_name (var));
4966 gimple_add_tmp_var (x);
4967 TREE_ADDRESSABLE (x) = 1;
4968 x = build_fold_addr_expr_loc (clause_loc, x);
4971 else
4973 tree atmp
4974 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4975 tree rtype = TREE_TYPE (TREE_TYPE (new_var));
4976 tree al = size_int (TYPE_ALIGN (rtype));
4977 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
4980 if (x)
4982 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4983 gimplify_assign (new_var, x, ilist);
4986 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
4988 else if (c_kind == OMP_CLAUSE_REDUCTION
4989 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
4991 if (pass == 0)
4992 continue;
4994 else if (pass != 0)
4995 continue;
4997 switch (OMP_CLAUSE_CODE (c))
4999 case OMP_CLAUSE_SHARED:
5000 /* Ignore shared directives in teams construct. */
5001 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
5002 continue;
5003 /* Shared global vars are just accessed directly. */
5004 if (is_global_var (new_var))
5005 break;
5006 /* For taskloop firstprivate/lastprivate, represented
5007 as firstprivate and shared clause on the task, new_var
5008 is the firstprivate var. */
5009 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
5010 break;
5011 /* Set up the DECL_VALUE_EXPR for shared variables now. This
5012 needs to be delayed until after fixup_child_record_type so
5013 that we get the correct type during the dereference. */
5014 by_ref = use_pointer_for_field (var, ctx);
5015 x = build_receiver_ref (var, by_ref, ctx);
5016 SET_DECL_VALUE_EXPR (new_var, x);
5017 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
5019 /* ??? If VAR is not passed by reference, and the variable
5020 hasn't been initialized yet, then we'll get a warning for
5021 the store into the omp_data_s structure. Ideally, we'd be
5022 able to notice this and not store anything at all, but
5023 we're generating code too early. Suppress the warning. */
5024 if (!by_ref)
5025 TREE_NO_WARNING (var) = 1;
5026 break;
5028 case OMP_CLAUSE_LASTPRIVATE:
5029 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
5030 break;
5031 /* FALLTHRU */
5033 case OMP_CLAUSE_PRIVATE:
5034 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_PRIVATE)
5035 x = build_outer_var_ref (var, ctx);
5036 else if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
5038 if (is_task_ctx (ctx))
5039 x = build_receiver_ref (var, false, ctx);
5040 else
5041 x = build_outer_var_ref (var, ctx, OMP_CLAUSE_PRIVATE);
5043 else
5044 x = NULL;
5045 do_private:
5046 tree nx;
5047 nx = lang_hooks.decls.omp_clause_default_ctor
5048 (c, unshare_expr (new_var), x);
5049 if (is_simd)
5051 tree y = lang_hooks.decls.omp_clause_dtor (c, new_var);
5052 if ((TREE_ADDRESSABLE (new_var) || nx || y
5053 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
5054 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5055 idx, lane, ivar, lvar))
5057 if (nx)
5058 x = lang_hooks.decls.omp_clause_default_ctor
5059 (c, unshare_expr (ivar), x);
5060 if (nx && x)
5061 gimplify_and_add (x, &llist[0]);
5062 if (y)
5064 y = lang_hooks.decls.omp_clause_dtor (c, ivar);
5065 if (y)
5067 gimple_seq tseq = NULL;
5069 dtor = y;
5070 gimplify_stmt (&dtor, &tseq);
5071 gimple_seq_add_seq (&llist[1], tseq);
5074 break;
5077 if (nx)
5078 gimplify_and_add (nx, ilist);
5079 /* FALLTHRU */
5081 do_dtor:
5082 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
5083 if (x)
5085 gimple_seq tseq = NULL;
5087 dtor = x;
5088 gimplify_stmt (&dtor, &tseq);
5089 gimple_seq_add_seq (dlist, tseq);
5091 break;
5093 case OMP_CLAUSE_LINEAR:
5094 if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5095 goto do_firstprivate;
5096 if (OMP_CLAUSE_LINEAR_NO_COPYOUT (c))
5097 x = NULL;
5098 else
5099 x = build_outer_var_ref (var, ctx);
5100 goto do_private;
5102 case OMP_CLAUSE_FIRSTPRIVATE:
5103 if (is_task_ctx (ctx))
5105 if (is_reference (var) || is_variable_sized (var))
5106 goto do_dtor;
5107 else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var,
5108 ctx))
5109 || use_pointer_for_field (var, NULL))
5111 x = build_receiver_ref (var, false, ctx);
5112 SET_DECL_VALUE_EXPR (new_var, x);
5113 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
5114 goto do_dtor;
5117 do_firstprivate:
5118 x = build_outer_var_ref (var, ctx);
5119 if (is_simd)
5121 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5122 && gimple_omp_for_combined_into_p (ctx->stmt))
5124 tree t = OMP_CLAUSE_LINEAR_STEP (c);
5125 tree stept = TREE_TYPE (t);
5126 tree ct = find_omp_clause (clauses,
5127 OMP_CLAUSE__LOOPTEMP_);
5128 gcc_assert (ct);
5129 tree l = OMP_CLAUSE_DECL (ct);
5130 tree n1 = fd->loop.n1;
5131 tree step = fd->loop.step;
5132 tree itype = TREE_TYPE (l);
5133 if (POINTER_TYPE_P (itype))
5134 itype = signed_type_for (itype);
5135 l = fold_build2 (MINUS_EXPR, itype, l, n1);
5136 if (TYPE_UNSIGNED (itype)
5137 && fd->loop.cond_code == GT_EXPR)
5138 l = fold_build2 (TRUNC_DIV_EXPR, itype,
5139 fold_build1 (NEGATE_EXPR, itype, l),
5140 fold_build1 (NEGATE_EXPR,
5141 itype, step));
5142 else
5143 l = fold_build2 (TRUNC_DIV_EXPR, itype, l, step);
5144 t = fold_build2 (MULT_EXPR, stept,
5145 fold_convert (stept, l), t);
5147 if (OMP_CLAUSE_LINEAR_ARRAY (c))
5149 x = lang_hooks.decls.omp_clause_linear_ctor
5150 (c, new_var, x, t);
5151 gimplify_and_add (x, ilist);
5152 goto do_dtor;
5155 if (POINTER_TYPE_P (TREE_TYPE (x)))
5156 x = fold_build2 (POINTER_PLUS_EXPR,
5157 TREE_TYPE (x), x, t);
5158 else
5159 x = fold_build2 (PLUS_EXPR, TREE_TYPE (x), x, t);
5162 if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LINEAR
5163 || TREE_ADDRESSABLE (new_var))
5164 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5165 idx, lane, ivar, lvar))
5167 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
5169 tree iv = create_tmp_var (TREE_TYPE (new_var));
5170 x = lang_hooks.decls.omp_clause_copy_ctor (c, iv, x);
5171 gimplify_and_add (x, ilist);
5172 gimple_stmt_iterator gsi
5173 = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
5174 gassign *g
5175 = gimple_build_assign (unshare_expr (lvar), iv);
5176 gsi_insert_before_without_update (&gsi, g,
5177 GSI_SAME_STMT);
5178 tree t = OMP_CLAUSE_LINEAR_STEP (c);
5179 enum tree_code code = PLUS_EXPR;
5180 if (POINTER_TYPE_P (TREE_TYPE (new_var)))
5181 code = POINTER_PLUS_EXPR;
5182 g = gimple_build_assign (iv, code, iv, t);
5183 gsi_insert_before_without_update (&gsi, g,
5184 GSI_SAME_STMT);
5185 break;
5187 x = lang_hooks.decls.omp_clause_copy_ctor
5188 (c, unshare_expr (ivar), x);
5189 gimplify_and_add (x, &llist[0]);
5190 x = lang_hooks.decls.omp_clause_dtor (c, ivar);
5191 if (x)
5193 gimple_seq tseq = NULL;
5195 dtor = x;
5196 gimplify_stmt (&dtor, &tseq);
5197 gimple_seq_add_seq (&llist[1], tseq);
5199 break;
5202 x = lang_hooks.decls.omp_clause_copy_ctor
5203 (c, unshare_expr (new_var), x);
5204 gimplify_and_add (x, ilist);
5205 goto do_dtor;
5207 case OMP_CLAUSE__LOOPTEMP_:
5208 gcc_assert (is_taskreg_ctx (ctx));
5209 x = build_outer_var_ref (var, ctx);
5210 x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
5211 gimplify_and_add (x, ilist);
5212 break;
5214 case OMP_CLAUSE_COPYIN:
5215 by_ref = use_pointer_for_field (var, NULL);
5216 x = build_receiver_ref (var, by_ref, ctx);
5217 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
5218 append_to_statement_list (x, &copyin_seq);
5219 copyin_by_ref |= by_ref;
5220 break;
5222 case OMP_CLAUSE_REDUCTION:
5223 /* OpenACC reductions are initialized using the
5224 GOACC_REDUCTION internal function. */
5225 if (is_gimple_omp_oacc (ctx->stmt))
5226 break;
5227 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
5229 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
5230 gimple *tseq;
5231 x = build_outer_var_ref (var, ctx);
5233 if (is_reference (var)
5234 && !useless_type_conversion_p (TREE_TYPE (placeholder),
5235 TREE_TYPE (x)))
5236 x = build_fold_addr_expr_loc (clause_loc, x);
5237 SET_DECL_VALUE_EXPR (placeholder, x);
5238 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
5239 tree new_vard = new_var;
5240 if (is_reference (var))
5242 gcc_assert (TREE_CODE (new_var) == MEM_REF);
5243 new_vard = TREE_OPERAND (new_var, 0);
5244 gcc_assert (DECL_P (new_vard));
5246 if (is_simd
5247 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5248 idx, lane, ivar, lvar))
5250 if (new_vard == new_var)
5252 gcc_assert (DECL_VALUE_EXPR (new_var) == lvar);
5253 SET_DECL_VALUE_EXPR (new_var, ivar);
5255 else
5257 SET_DECL_VALUE_EXPR (new_vard,
5258 build_fold_addr_expr (ivar));
5259 DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
5261 x = lang_hooks.decls.omp_clause_default_ctor
5262 (c, unshare_expr (ivar),
5263 build_outer_var_ref (var, ctx));
5264 if (x)
5265 gimplify_and_add (x, &llist[0]);
5266 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
5268 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
5269 lower_omp (&tseq, ctx);
5270 gimple_seq_add_seq (&llist[0], tseq);
5272 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
5273 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
5274 lower_omp (&tseq, ctx);
5275 gimple_seq_add_seq (&llist[1], tseq);
5276 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5277 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
5278 if (new_vard == new_var)
5279 SET_DECL_VALUE_EXPR (new_var, lvar);
5280 else
5281 SET_DECL_VALUE_EXPR (new_vard,
5282 build_fold_addr_expr (lvar));
5283 x = lang_hooks.decls.omp_clause_dtor (c, ivar);
5284 if (x)
5286 tseq = NULL;
5287 dtor = x;
5288 gimplify_stmt (&dtor, &tseq);
5289 gimple_seq_add_seq (&llist[1], tseq);
5291 break;
5293 /* If this is a reference to constant size reduction var
5294 with placeholder, we haven't emitted the initializer
5295 for it because it is undesirable if SIMD arrays are used.
5296 But if they aren't used, we need to emit the deferred
5297 initialization now. */
5298 else if (is_reference (var) && is_simd)
5299 handle_simd_reference (clause_loc, new_vard, ilist);
5300 x = lang_hooks.decls.omp_clause_default_ctor
5301 (c, unshare_expr (new_var),
5302 build_outer_var_ref (var, ctx));
5303 if (x)
5304 gimplify_and_add (x, ilist);
5305 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
5307 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
5308 lower_omp (&tseq, ctx);
5309 gimple_seq_add_seq (ilist, tseq);
5311 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
5312 if (is_simd)
5314 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
5315 lower_omp (&tseq, ctx);
5316 gimple_seq_add_seq (dlist, tseq);
5317 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5319 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
5320 goto do_dtor;
5322 else
5324 x = omp_reduction_init (c, TREE_TYPE (new_var));
5325 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
5326 enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
5328 /* reduction(-:var) sums up the partial results, so it
5329 acts identically to reduction(+:var). */
5330 if (code == MINUS_EXPR)
5331 code = PLUS_EXPR;
5333 tree new_vard = new_var;
5334 if (is_simd && is_reference (var))
5336 gcc_assert (TREE_CODE (new_var) == MEM_REF);
5337 new_vard = TREE_OPERAND (new_var, 0);
5338 gcc_assert (DECL_P (new_vard));
5340 if (is_simd
5341 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5342 idx, lane, ivar, lvar))
5344 tree ref = build_outer_var_ref (var, ctx);
5346 gimplify_assign (unshare_expr (ivar), x, &llist[0]);
5348 if (maybe_simt)
5350 if (!simt_lane)
5351 simt_lane = create_tmp_var (unsigned_type_node);
5352 x = build_call_expr_internal_loc
5353 (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_BFLY,
5354 TREE_TYPE (ivar), 2, ivar, simt_lane);
5355 x = build2 (code, TREE_TYPE (ivar), ivar, x);
5356 gimplify_assign (ivar, x, &llist[2]);
5358 x = build2 (code, TREE_TYPE (ref), ref, ivar);
5359 ref = build_outer_var_ref (var, ctx);
5360 gimplify_assign (ref, x, &llist[1]);
5362 if (new_vard != new_var)
5364 SET_DECL_VALUE_EXPR (new_vard,
5365 build_fold_addr_expr (lvar));
5366 DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
5369 else
5371 if (is_reference (var) && is_simd)
5372 handle_simd_reference (clause_loc, new_vard, ilist);
5373 gimplify_assign (new_var, x, ilist);
5374 if (is_simd)
5376 tree ref = build_outer_var_ref (var, ctx);
5378 x = build2 (code, TREE_TYPE (ref), ref, new_var);
5379 ref = build_outer_var_ref (var, ctx);
5380 gimplify_assign (ref, x, dlist);
5384 break;
5386 default:
5387 gcc_unreachable ();
5392 if (lane)
5394 tree uid = create_tmp_var (ptr_type_node, "simduid");
5395 /* Don't want uninit warnings on simduid, it is always uninitialized,
5396 but we use it not for the value, but for the DECL_UID only. */
5397 TREE_NO_WARNING (uid) = 1;
5398 gimple *g
5399 = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
5400 gimple_call_set_lhs (g, lane);
5401 gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
5402 gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
5403 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
5404 OMP_CLAUSE__SIMDUID__DECL (c) = uid;
5405 OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
5406 gimple_omp_for_set_clauses (ctx->stmt, c);
5407 g = gimple_build_assign (lane, INTEGER_CST,
5408 build_int_cst (unsigned_type_node, 0));
5409 gimple_seq_add_stmt (ilist, g);
5410 /* Emit reductions across SIMT lanes in log_2(simt_vf) steps. */
5411 if (llist[2])
5413 tree simt_vf = create_tmp_var (unsigned_type_node);
5414 g = gimple_build_call_internal (IFN_GOMP_SIMT_VF, 0);
5415 gimple_call_set_lhs (g, simt_vf);
5416 gimple_seq_add_stmt (dlist, g);
5418 tree t = build_int_cst (unsigned_type_node, 1);
5419 g = gimple_build_assign (simt_lane, INTEGER_CST, t);
5420 gimple_seq_add_stmt (dlist, g);
5422 t = build_int_cst (unsigned_type_node, 0);
5423 g = gimple_build_assign (idx, INTEGER_CST, t);
5424 gimple_seq_add_stmt (dlist, g);
5426 tree body = create_artificial_label (UNKNOWN_LOCATION);
5427 tree header = create_artificial_label (UNKNOWN_LOCATION);
5428 tree end = create_artificial_label (UNKNOWN_LOCATION);
5429 gimple_seq_add_stmt (dlist, gimple_build_goto (header));
5430 gimple_seq_add_stmt (dlist, gimple_build_label (body));
5432 gimple_seq_add_seq (dlist, llist[2]);
5434 g = gimple_build_assign (simt_lane, LSHIFT_EXPR, simt_lane, integer_one_node);
5435 gimple_seq_add_stmt (dlist, g);
5437 gimple_seq_add_stmt (dlist, gimple_build_label (header));
5438 g = gimple_build_cond (LT_EXPR, simt_lane, simt_vf, body, end);
5439 gimple_seq_add_stmt (dlist, g);
5441 gimple_seq_add_stmt (dlist, gimple_build_label (end));
5443 for (int i = 0; i < 2; i++)
5444 if (llist[i])
5446 tree vf = create_tmp_var (unsigned_type_node);
5447 g = gimple_build_call_internal (IFN_GOMP_SIMD_VF, 1, uid);
5448 gimple_call_set_lhs (g, vf);
5449 gimple_seq *seq = i == 0 ? ilist : dlist;
5450 gimple_seq_add_stmt (seq, g);
5451 tree t = build_int_cst (unsigned_type_node, 0);
5452 g = gimple_build_assign (idx, INTEGER_CST, t);
5453 gimple_seq_add_stmt (seq, g);
5454 tree body = create_artificial_label (UNKNOWN_LOCATION);
5455 tree header = create_artificial_label (UNKNOWN_LOCATION);
5456 tree end = create_artificial_label (UNKNOWN_LOCATION);
5457 gimple_seq_add_stmt (seq, gimple_build_goto (header));
5458 gimple_seq_add_stmt (seq, gimple_build_label (body));
5459 gimple_seq_add_seq (seq, llist[i]);
5460 t = build_int_cst (unsigned_type_node, 1);
5461 g = gimple_build_assign (idx, PLUS_EXPR, idx, t);
5462 gimple_seq_add_stmt (seq, g);
5463 gimple_seq_add_stmt (seq, gimple_build_label (header));
5464 g = gimple_build_cond (LT_EXPR, idx, vf, body, end);
5465 gimple_seq_add_stmt (seq, g);
5466 gimple_seq_add_stmt (seq, gimple_build_label (end));
5470 /* The copyin sequence is not to be executed by the main thread, since
5471 that would result in self-copies. Perhaps not visible to scalars,
5472 but it certainly is to C++ operator=. */
5473 if (copyin_seq)
5475 x = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM),
5477 x = build2 (NE_EXPR, boolean_type_node, x,
5478 build_int_cst (TREE_TYPE (x), 0));
5479 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
5480 gimplify_and_add (x, ilist);
5483 /* If any copyin variable is passed by reference, we must ensure the
5484 master thread doesn't modify it before it is copied over in all
5485 threads. Similarly for variables in both firstprivate and
5486 lastprivate clauses we need to ensure the lastprivate copying
5487 happens after firstprivate copying in all threads. And similarly
5488 for UDRs if initializer expression refers to omp_orig. */
5489 if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref)
5491 /* Don't add any barrier for #pragma omp simd or
5492 #pragma omp distribute. */
5493 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
5494 || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR)
5495 gimple_seq_add_stmt (ilist, build_omp_barrier (NULL_TREE));
5498 /* If max_vf is non-zero, then we can use only a vectorization factor
5499 up to the max_vf we chose. So stick it into the safelen clause. */
5500 if (max_vf)
5502 tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
5503 OMP_CLAUSE_SAFELEN);
5504 if (c == NULL_TREE
5505 || (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) == INTEGER_CST
5506 && compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c),
5507 max_vf) == 1))
5509 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
5510 OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node,
5511 max_vf);
5512 OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
5513 gimple_omp_for_set_clauses (ctx->stmt, c);
5519 /* Generate code to implement the LASTPRIVATE clauses. This is used for
5520 both parallel and workshare constructs. PREDICATE may be NULL if it's
5521 always true. */
5523 static void
5524 lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
5525 omp_context *ctx)
5527 tree x, c, label = NULL, orig_clauses = clauses;
5528 bool par_clauses = false;
5529 tree simduid = NULL, lastlane = NULL, simtcond = NULL, simtlast = NULL;
5531 /* Early exit if there are no lastprivate or linear clauses. */
5532 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
5533 if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LASTPRIVATE
5534 || (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LINEAR
5535 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (clauses)))
5536 break;
5537 if (clauses == NULL)
5539 /* If this was a workshare clause, see if it had been combined
5540 with its parallel. In that case, look for the clauses on the
5541 parallel statement itself. */
5542 if (is_parallel_ctx (ctx))
5543 return;
5545 ctx = ctx->outer;
5546 if (ctx == NULL || !is_parallel_ctx (ctx))
5547 return;
5549 clauses = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
5550 OMP_CLAUSE_LASTPRIVATE);
5551 if (clauses == NULL)
5552 return;
5553 par_clauses = true;
5556 bool maybe_simt = false;
5557 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
5558 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
5560 maybe_simt = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMT_);
5561 simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_);
5562 if (simduid)
5563 simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5566 if (predicate)
5568 gcond *stmt;
5569 tree label_true, arm1, arm2;
5570 enum tree_code pred_code = TREE_CODE (predicate);
5572 label = create_artificial_label (UNKNOWN_LOCATION);
5573 label_true = create_artificial_label (UNKNOWN_LOCATION);
5574 if (TREE_CODE_CLASS (pred_code) == tcc_comparison)
5576 arm1 = TREE_OPERAND (predicate, 0);
5577 arm2 = TREE_OPERAND (predicate, 1);
5578 gimplify_expr (&arm1, stmt_list, NULL, is_gimple_val, fb_rvalue);
5579 gimplify_expr (&arm2, stmt_list, NULL, is_gimple_val, fb_rvalue);
5581 else
5583 arm1 = predicate;
5584 gimplify_expr (&arm1, stmt_list, NULL, is_gimple_val, fb_rvalue);
5585 arm2 = boolean_false_node;
5586 pred_code = NE_EXPR;
5588 if (maybe_simt)
5590 c = build2 (pred_code, boolean_type_node, arm1, arm2);
5591 c = fold_convert (integer_type_node, c);
5592 simtcond = create_tmp_var (integer_type_node);
5593 gimplify_assign (simtcond, c, stmt_list);
5594 gcall *g = gimple_build_call_internal (IFN_GOMP_SIMT_VOTE_ANY,
5595 1, simtcond);
5596 c = create_tmp_var (integer_type_node);
5597 gimple_call_set_lhs (g, c);
5598 gimple_seq_add_stmt (stmt_list, g);
5599 stmt = gimple_build_cond (NE_EXPR, c, integer_zero_node,
5600 label_true, label);
5602 else
5603 stmt = gimple_build_cond (pred_code, arm1, arm2, label_true, label);
5604 gimple_seq_add_stmt (stmt_list, stmt);
5605 gimple_seq_add_stmt (stmt_list, gimple_build_label (label_true));
5608 for (c = clauses; c ;)
5610 tree var, new_var;
5611 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
5613 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5614 || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5615 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)))
5617 var = OMP_CLAUSE_DECL (c);
5618 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5619 && OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)
5620 && is_taskloop_ctx (ctx))
5622 gcc_checking_assert (ctx->outer && is_task_ctx (ctx->outer));
5623 new_var = lookup_decl (var, ctx->outer);
5625 else
5627 new_var = lookup_decl (var, ctx);
5628 /* Avoid uninitialized warnings for lastprivate and
5629 for linear iterators. */
5630 if (predicate
5631 && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5632 || OMP_CLAUSE_LINEAR_NO_COPYIN (c)))
5633 TREE_NO_WARNING (new_var) = 1;
5636 if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
5638 tree val = DECL_VALUE_EXPR (new_var);
5639 if (TREE_CODE (val) == ARRAY_REF
5640 && VAR_P (TREE_OPERAND (val, 0))
5641 && lookup_attribute ("omp simd array",
5642 DECL_ATTRIBUTES (TREE_OPERAND (val,
5643 0))))
5645 if (lastlane == NULL)
5647 lastlane = create_tmp_var (unsigned_type_node);
5648 gcall *g
5649 = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
5650 2, simduid,
5651 TREE_OPERAND (val, 1));
5652 gimple_call_set_lhs (g, lastlane);
5653 gimple_seq_add_stmt (stmt_list, g);
5655 new_var = build4 (ARRAY_REF, TREE_TYPE (val),
5656 TREE_OPERAND (val, 0), lastlane,
5657 NULL_TREE, NULL_TREE);
5658 if (maybe_simt)
5660 gcall *g;
5661 if (simtlast == NULL)
5663 simtlast = create_tmp_var (unsigned_type_node);
5664 g = gimple_build_call_internal
5665 (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
5666 gimple_call_set_lhs (g, simtlast);
5667 gimple_seq_add_stmt (stmt_list, g);
5669 x = build_call_expr_internal_loc
5670 (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
5671 TREE_TYPE (new_var), 2, new_var, simtlast);
5672 new_var = unshare_expr (new_var);
5673 gimplify_assign (new_var, x, stmt_list);
5674 new_var = unshare_expr (new_var);
5679 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5680 && OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
5682 lower_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
5683 gimple_seq_add_seq (stmt_list,
5684 OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
5685 OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c) = NULL;
5687 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5688 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
5690 lower_omp (&OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c), ctx);
5691 gimple_seq_add_seq (stmt_list,
5692 OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c));
5693 OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c) = NULL;
5696 x = NULL_TREE;
5697 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5698 && OMP_CLAUSE_LASTPRIVATE_TASKLOOP_IV (c))
5700 gcc_checking_assert (is_taskloop_ctx (ctx));
5701 tree ovar = maybe_lookup_decl_in_outer_ctx (var,
5702 ctx->outer->outer);
5703 if (is_global_var (ovar))
5704 x = ovar;
5706 if (!x)
5707 x = build_outer_var_ref (var, ctx, OMP_CLAUSE_LASTPRIVATE);
5708 if (is_reference (var))
5709 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5710 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
5711 gimplify_and_add (x, stmt_list);
5713 c = OMP_CLAUSE_CHAIN (c);
5714 if (c == NULL && !par_clauses)
5716 /* If this was a workshare clause, see if it had been combined
5717 with its parallel. In that case, continue looking for the
5718 clauses also on the parallel statement itself. */
5719 if (is_parallel_ctx (ctx))
5720 break;
5722 ctx = ctx->outer;
5723 if (ctx == NULL || !is_parallel_ctx (ctx))
5724 break;
5726 c = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
5727 OMP_CLAUSE_LASTPRIVATE);
5728 par_clauses = true;
5732 if (label)
5733 gimple_seq_add_stmt (stmt_list, gimple_build_label (label));
5736 /* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL
5737 (which might be a placeholder). INNER is true if this is an inner
5738 axis of a multi-axis loop. FORK and JOIN are (optional) fork and
5739 join markers. Generate the before-loop forking sequence in
5740 FORK_SEQ and the after-loop joining sequence to JOIN_SEQ. The
5741 general form of these sequences is
5743 GOACC_REDUCTION_SETUP
5744 GOACC_FORK
5745 GOACC_REDUCTION_INIT
5747 GOACC_REDUCTION_FINI
5748 GOACC_JOIN
5749 GOACC_REDUCTION_TEARDOWN. */
5751 static void
5752 lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
5753 gcall *fork, gcall *join, gimple_seq *fork_seq,
5754 gimple_seq *join_seq, omp_context *ctx)
5756 gimple_seq before_fork = NULL;
5757 gimple_seq after_fork = NULL;
5758 gimple_seq before_join = NULL;
5759 gimple_seq after_join = NULL;
5760 tree init_code = NULL_TREE, fini_code = NULL_TREE,
5761 setup_code = NULL_TREE, teardown_code = NULL_TREE;
5762 unsigned offset = 0;
5764 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5765 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
5767 tree orig = OMP_CLAUSE_DECL (c);
5768 tree var = maybe_lookup_decl (orig, ctx);
5769 tree ref_to_res = NULL_TREE;
5770 tree incoming, outgoing, v1, v2, v3;
5771 bool is_private = false;
5773 enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
5774 if (rcode == MINUS_EXPR)
5775 rcode = PLUS_EXPR;
5776 else if (rcode == TRUTH_ANDIF_EXPR)
5777 rcode = BIT_AND_EXPR;
5778 else if (rcode == TRUTH_ORIF_EXPR)
5779 rcode = BIT_IOR_EXPR;
5780 tree op = build_int_cst (unsigned_type_node, rcode);
5782 if (!var)
5783 var = orig;
5785 incoming = outgoing = var;
5787 if (!inner)
5789 /* See if an outer construct also reduces this variable. */
5790 omp_context *outer = ctx;
5792 while (omp_context *probe = outer->outer)
5794 enum gimple_code type = gimple_code (probe->stmt);
5795 tree cls;
5797 switch (type)
5799 case GIMPLE_OMP_FOR:
5800 cls = gimple_omp_for_clauses (probe->stmt);
5801 break;
5803 case GIMPLE_OMP_TARGET:
5804 if (gimple_omp_target_kind (probe->stmt)
5805 != GF_OMP_TARGET_KIND_OACC_PARALLEL)
5806 goto do_lookup;
5808 cls = gimple_omp_target_clauses (probe->stmt);
5809 break;
5811 default:
5812 goto do_lookup;
5815 outer = probe;
5816 for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
5817 if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
5818 && orig == OMP_CLAUSE_DECL (cls))
5820 incoming = outgoing = lookup_decl (orig, probe);
5821 goto has_outer_reduction;
5823 else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
5824 || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
5825 && orig == OMP_CLAUSE_DECL (cls))
5827 is_private = true;
5828 goto do_lookup;
5832 do_lookup:
5833 /* This is the outermost construct with this reduction,
5834 see if there's a mapping for it. */
5835 if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
5836 && maybe_lookup_field (orig, outer) && !is_private)
5838 ref_to_res = build_receiver_ref (orig, false, outer);
5839 if (is_reference (orig))
5840 ref_to_res = build_simple_mem_ref (ref_to_res);
5842 tree type = TREE_TYPE (var);
5843 if (POINTER_TYPE_P (type))
5844 type = TREE_TYPE (type);
5846 outgoing = var;
5847 incoming = omp_reduction_init_op (loc, rcode, type);
5849 else
5851 /* Try to look at enclosing contexts for reduction var,
5852 use original if no mapping found. */
5853 tree t = NULL_TREE;
5854 omp_context *c = ctx->outer;
5855 while (c && !t)
5857 t = maybe_lookup_decl (orig, c);
5858 c = c->outer;
5860 incoming = outgoing = (t ? t : orig);
5863 has_outer_reduction:;
5866 if (!ref_to_res)
5867 ref_to_res = integer_zero_node;
5869 if (is_reference (orig))
5871 tree type = TREE_TYPE (var);
5872 const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
5874 if (!inner)
5876 tree x = create_tmp_var (TREE_TYPE (type), id);
5877 gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
5880 v1 = create_tmp_var (type, id);
5881 v2 = create_tmp_var (type, id);
5882 v3 = create_tmp_var (type, id);
5884 gimplify_assign (v1, var, fork_seq);
5885 gimplify_assign (v2, var, fork_seq);
5886 gimplify_assign (v3, var, fork_seq);
5888 var = build_simple_mem_ref (var);
5889 v1 = build_simple_mem_ref (v1);
5890 v2 = build_simple_mem_ref (v2);
5891 v3 = build_simple_mem_ref (v3);
5892 outgoing = build_simple_mem_ref (outgoing);
5894 if (!TREE_CONSTANT (incoming))
5895 incoming = build_simple_mem_ref (incoming);
5897 else
5898 v1 = v2 = v3 = var;
5900 /* Determine position in reduction buffer, which may be used
5901 by target. */
5902 enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
5903 unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
5904 offset = (offset + align - 1) & ~(align - 1);
5905 tree off = build_int_cst (sizetype, offset);
5906 offset += GET_MODE_SIZE (mode);
5908 if (!init_code)
5910 init_code = build_int_cst (integer_type_node,
5911 IFN_GOACC_REDUCTION_INIT);
5912 fini_code = build_int_cst (integer_type_node,
5913 IFN_GOACC_REDUCTION_FINI);
5914 setup_code = build_int_cst (integer_type_node,
5915 IFN_GOACC_REDUCTION_SETUP);
5916 teardown_code = build_int_cst (integer_type_node,
5917 IFN_GOACC_REDUCTION_TEARDOWN);
5920 tree setup_call
5921 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5922 TREE_TYPE (var), 6, setup_code,
5923 unshare_expr (ref_to_res),
5924 incoming, level, op, off);
5925 tree init_call
5926 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5927 TREE_TYPE (var), 6, init_code,
5928 unshare_expr (ref_to_res),
5929 v1, level, op, off);
5930 tree fini_call
5931 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5932 TREE_TYPE (var), 6, fini_code,
5933 unshare_expr (ref_to_res),
5934 v2, level, op, off);
5935 tree teardown_call
5936 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5937 TREE_TYPE (var), 6, teardown_code,
5938 ref_to_res, v3, level, op, off);
5940 gimplify_assign (v1, setup_call, &before_fork);
5941 gimplify_assign (v2, init_call, &after_fork);
5942 gimplify_assign (v3, fini_call, &before_join);
5943 gimplify_assign (outgoing, teardown_call, &after_join);
5946 /* Now stitch things together. */
5947 gimple_seq_add_seq (fork_seq, before_fork);
5948 if (fork)
5949 gimple_seq_add_stmt (fork_seq, fork);
5950 gimple_seq_add_seq (fork_seq, after_fork);
5952 gimple_seq_add_seq (join_seq, before_join);
5953 if (join)
5954 gimple_seq_add_stmt (join_seq, join);
5955 gimple_seq_add_seq (join_seq, after_join);
5958 /* Generate code to implement the REDUCTION clauses. */
5960 static void
5961 lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
5963 gimple_seq sub_seq = NULL;
5964 gimple *stmt;
5965 tree x, c;
5966 int count = 0;
5968 /* OpenACC loop reductions are handled elsewhere. */
5969 if (is_gimple_omp_oacc (ctx->stmt))
5970 return;
5972 /* SIMD reductions are handled in lower_rec_input_clauses. */
5973 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
5974 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
5975 return;
5977 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
5978 update in that case, otherwise use a lock. */
5979 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
5980 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
5982 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
5983 || TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
5985 /* Never use OMP_ATOMIC for array reductions or UDRs. */
5986 count = -1;
5987 break;
5989 count++;
5992 if (count == 0)
5993 return;
5995 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
5997 tree var, ref, new_var, orig_var;
5998 enum tree_code code;
5999 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
6001 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
6002 continue;
6004 orig_var = var = OMP_CLAUSE_DECL (c);
6005 if (TREE_CODE (var) == MEM_REF)
6007 var = TREE_OPERAND (var, 0);
6008 if (TREE_CODE (var) == POINTER_PLUS_EXPR)
6009 var = TREE_OPERAND (var, 0);
6010 if (TREE_CODE (var) == INDIRECT_REF
6011 || TREE_CODE (var) == ADDR_EXPR)
6012 var = TREE_OPERAND (var, 0);
6013 orig_var = var;
6014 if (is_variable_sized (var))
6016 gcc_assert (DECL_HAS_VALUE_EXPR_P (var));
6017 var = DECL_VALUE_EXPR (var);
6018 gcc_assert (TREE_CODE (var) == INDIRECT_REF);
6019 var = TREE_OPERAND (var, 0);
6020 gcc_assert (DECL_P (var));
6023 new_var = lookup_decl (var, ctx);
6024 if (var == OMP_CLAUSE_DECL (c) && is_reference (var))
6025 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
6026 ref = build_outer_var_ref (var, ctx);
6027 code = OMP_CLAUSE_REDUCTION_CODE (c);
6029 /* reduction(-:var) sums up the partial results, so it acts
6030 identically to reduction(+:var). */
6031 if (code == MINUS_EXPR)
6032 code = PLUS_EXPR;
6034 if (count == 1)
6036 tree addr = build_fold_addr_expr_loc (clause_loc, ref);
6038 addr = save_expr (addr);
6039 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
6040 x = fold_build2_loc (clause_loc, code, TREE_TYPE (ref), ref, new_var);
6041 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
6042 gimplify_and_add (x, stmt_seqp);
6043 return;
6045 else if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
6047 tree d = OMP_CLAUSE_DECL (c);
6048 tree type = TREE_TYPE (d);
6049 tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
6050 tree i = create_tmp_var (TREE_TYPE (v), NULL);
6051 tree ptype = build_pointer_type (TREE_TYPE (type));
6052 tree bias = TREE_OPERAND (d, 1);
6053 d = TREE_OPERAND (d, 0);
6054 if (TREE_CODE (d) == POINTER_PLUS_EXPR)
6056 tree b = TREE_OPERAND (d, 1);
6057 b = maybe_lookup_decl (b, ctx);
6058 if (b == NULL)
6060 b = TREE_OPERAND (d, 1);
6061 b = maybe_lookup_decl_in_outer_ctx (b, ctx);
6063 if (integer_zerop (bias))
6064 bias = b;
6065 else
6067 bias = fold_convert_loc (clause_loc, TREE_TYPE (b), bias);
6068 bias = fold_build2_loc (clause_loc, PLUS_EXPR,
6069 TREE_TYPE (b), b, bias);
6071 d = TREE_OPERAND (d, 0);
6073 /* For ref build_outer_var_ref already performs this, so
6074 only new_var needs a dereference. */
6075 if (TREE_CODE (d) == INDIRECT_REF)
6077 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
6078 gcc_assert (is_reference (var) && var == orig_var);
6080 else if (TREE_CODE (d) == ADDR_EXPR)
6082 if (orig_var == var)
6084 new_var = build_fold_addr_expr (new_var);
6085 ref = build_fold_addr_expr (ref);
6088 else
6090 gcc_assert (orig_var == var);
6091 if (is_reference (var))
6092 ref = build_fold_addr_expr (ref);
6094 if (DECL_P (v))
6096 tree t = maybe_lookup_decl (v, ctx);
6097 if (t)
6098 v = t;
6099 else
6100 v = maybe_lookup_decl_in_outer_ctx (v, ctx);
6101 gimplify_expr (&v, stmt_seqp, NULL, is_gimple_val, fb_rvalue);
6103 if (!integer_zerop (bias))
6105 bias = fold_convert_loc (clause_loc, sizetype, bias);
6106 new_var = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
6107 TREE_TYPE (new_var), new_var,
6108 unshare_expr (bias));
6109 ref = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
6110 TREE_TYPE (ref), ref, bias);
6112 new_var = fold_convert_loc (clause_loc, ptype, new_var);
6113 ref = fold_convert_loc (clause_loc, ptype, ref);
6114 tree m = create_tmp_var (ptype, NULL);
6115 gimplify_assign (m, new_var, stmt_seqp);
6116 new_var = m;
6117 m = create_tmp_var (ptype, NULL);
6118 gimplify_assign (m, ref, stmt_seqp);
6119 ref = m;
6120 gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), stmt_seqp);
6121 tree body = create_artificial_label (UNKNOWN_LOCATION);
6122 tree end = create_artificial_label (UNKNOWN_LOCATION);
6123 gimple_seq_add_stmt (&sub_seq, gimple_build_label (body));
6124 tree priv = build_simple_mem_ref_loc (clause_loc, new_var);
6125 tree out = build_simple_mem_ref_loc (clause_loc, ref);
6126 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
6128 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
6129 tree decl_placeholder
6130 = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
6131 SET_DECL_VALUE_EXPR (placeholder, out);
6132 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
6133 SET_DECL_VALUE_EXPR (decl_placeholder, priv);
6134 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 1;
6135 lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
6136 gimple_seq_add_seq (&sub_seq,
6137 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c));
6138 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
6139 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
6140 OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL;
6142 else
6144 x = build2 (code, TREE_TYPE (out), out, priv);
6145 out = unshare_expr (out);
6146 gimplify_assign (out, x, &sub_seq);
6148 gimple *g = gimple_build_assign (new_var, POINTER_PLUS_EXPR, new_var,
6149 TYPE_SIZE_UNIT (TREE_TYPE (type)));
6150 gimple_seq_add_stmt (&sub_seq, g);
6151 g = gimple_build_assign (ref, POINTER_PLUS_EXPR, ref,
6152 TYPE_SIZE_UNIT (TREE_TYPE (type)));
6153 gimple_seq_add_stmt (&sub_seq, g);
6154 g = gimple_build_assign (i, PLUS_EXPR, i,
6155 build_int_cst (TREE_TYPE (i), 1));
6156 gimple_seq_add_stmt (&sub_seq, g);
6157 g = gimple_build_cond (LE_EXPR, i, v, body, end);
6158 gimple_seq_add_stmt (&sub_seq, g);
6159 gimple_seq_add_stmt (&sub_seq, gimple_build_label (end));
6161 else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
6163 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
6165 if (is_reference (var)
6166 && !useless_type_conversion_p (TREE_TYPE (placeholder),
6167 TREE_TYPE (ref)))
6168 ref = build_fold_addr_expr_loc (clause_loc, ref);
6169 SET_DECL_VALUE_EXPR (placeholder, ref);
6170 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
6171 lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
6172 gimple_seq_add_seq (&sub_seq, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c));
6173 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
6174 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
6176 else
6178 x = build2 (code, TREE_TYPE (ref), ref, new_var);
6179 ref = build_outer_var_ref (var, ctx);
6180 gimplify_assign (ref, x, &sub_seq);
6184 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START),
6186 gimple_seq_add_stmt (stmt_seqp, stmt);
6188 gimple_seq_add_seq (stmt_seqp, sub_seq);
6190 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END),
6192 gimple_seq_add_stmt (stmt_seqp, stmt);
6196 /* Generate code to implement the COPYPRIVATE clauses. */
6198 static void
6199 lower_copyprivate_clauses (tree clauses, gimple_seq *slist, gimple_seq *rlist,
6200 omp_context *ctx)
6202 tree c;
6204 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
6206 tree var, new_var, ref, x;
6207 bool by_ref;
6208 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
6210 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
6211 continue;
6213 var = OMP_CLAUSE_DECL (c);
6214 by_ref = use_pointer_for_field (var, NULL);
6216 ref = build_sender_ref (var, ctx);
6217 x = new_var = lookup_decl_in_outer_ctx (var, ctx);
6218 if (by_ref)
6220 x = build_fold_addr_expr_loc (clause_loc, new_var);
6221 x = fold_convert_loc (clause_loc, TREE_TYPE (ref), x);
6223 gimplify_assign (ref, x, slist);
6225 ref = build_receiver_ref (var, false, ctx);
6226 if (by_ref)
6228 ref = fold_convert_loc (clause_loc,
6229 build_pointer_type (TREE_TYPE (new_var)),
6230 ref);
6231 ref = build_fold_indirect_ref_loc (clause_loc, ref);
6233 if (is_reference (var))
6235 ref = fold_convert_loc (clause_loc, TREE_TYPE (new_var), ref);
6236 ref = build_simple_mem_ref_loc (clause_loc, ref);
6237 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
6239 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, ref);
6240 gimplify_and_add (x, rlist);
6245 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
6246 and REDUCTION from the sender (aka parent) side. */
6248 static void
6249 lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
6250 omp_context *ctx)
6252 tree c, t;
6253 int ignored_looptemp = 0;
6254 bool is_taskloop = false;
6256 /* For taskloop, ignore first two _looptemp_ clauses, those are initialized
6257 by GOMP_taskloop. */
6258 if (is_task_ctx (ctx) && gimple_omp_task_taskloop_p (ctx->stmt))
6260 ignored_looptemp = 2;
6261 is_taskloop = true;
6264 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
6266 tree val, ref, x, var;
6267 bool by_ref, do_in = false, do_out = false;
6268 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
6270 switch (OMP_CLAUSE_CODE (c))
6272 case OMP_CLAUSE_PRIVATE:
6273 if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
6274 break;
6275 continue;
6276 case OMP_CLAUSE_FIRSTPRIVATE:
6277 case OMP_CLAUSE_COPYIN:
6278 case OMP_CLAUSE_LASTPRIVATE:
6279 case OMP_CLAUSE_REDUCTION:
6280 break;
6281 case OMP_CLAUSE_SHARED:
6282 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
6283 break;
6284 continue;
6285 case OMP_CLAUSE__LOOPTEMP_:
6286 if (ignored_looptemp)
6288 ignored_looptemp--;
6289 continue;
6291 break;
6292 default:
6293 continue;
6296 val = OMP_CLAUSE_DECL (c);
6297 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
6298 && TREE_CODE (val) == MEM_REF)
6300 val = TREE_OPERAND (val, 0);
6301 if (TREE_CODE (val) == POINTER_PLUS_EXPR)
6302 val = TREE_OPERAND (val, 0);
6303 if (TREE_CODE (val) == INDIRECT_REF
6304 || TREE_CODE (val) == ADDR_EXPR)
6305 val = TREE_OPERAND (val, 0);
6306 if (is_variable_sized (val))
6307 continue;
6310 /* For OMP_CLAUSE_SHARED_FIRSTPRIVATE, look beyond the
6311 outer taskloop region. */
6312 omp_context *ctx_for_o = ctx;
6313 if (is_taskloop
6314 && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
6315 && OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
6316 ctx_for_o = ctx->outer;
6318 var = lookup_decl_in_outer_ctx (val, ctx_for_o);
6320 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
6321 && is_global_var (var))
6322 continue;
6324 t = omp_member_access_dummy_var (var);
6325 if (t)
6327 var = DECL_VALUE_EXPR (var);
6328 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx_for_o);
6329 if (o != t)
6330 var = unshare_and_remap (var, t, o);
6331 else
6332 var = unshare_expr (var);
6335 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
6337 /* Handle taskloop firstprivate/lastprivate, where the
6338 lastprivate on GIMPLE_OMP_TASK is represented as
6339 OMP_CLAUSE_SHARED_FIRSTPRIVATE. */
6340 tree f = lookup_sfield ((splay_tree_key) &DECL_UID (val), ctx);
6341 x = omp_build_component_ref (ctx->sender_decl, f);
6342 if (use_pointer_for_field (val, ctx))
6343 var = build_fold_addr_expr (var);
6344 gimplify_assign (x, var, ilist);
6345 DECL_ABSTRACT_ORIGIN (f) = NULL;
6346 continue;
6349 if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION
6350 || val == OMP_CLAUSE_DECL (c))
6351 && is_variable_sized (val))
6352 continue;
6353 by_ref = use_pointer_for_field (val, NULL);
6355 switch (OMP_CLAUSE_CODE (c))
6357 case OMP_CLAUSE_FIRSTPRIVATE:
6358 if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)
6359 && !by_ref
6360 && is_task_ctx (ctx))
6361 TREE_NO_WARNING (var) = 1;
6362 do_in = true;
6363 break;
6365 case OMP_CLAUSE_PRIVATE:
6366 case OMP_CLAUSE_COPYIN:
6367 case OMP_CLAUSE__LOOPTEMP_:
6368 do_in = true;
6369 break;
6371 case OMP_CLAUSE_LASTPRIVATE:
6372 if (by_ref || is_reference (val))
6374 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
6375 continue;
6376 do_in = true;
6378 else
6380 do_out = true;
6381 if (lang_hooks.decls.omp_private_outer_ref (val))
6382 do_in = true;
6384 break;
6386 case OMP_CLAUSE_REDUCTION:
6387 do_in = true;
6388 if (val == OMP_CLAUSE_DECL (c))
6389 do_out = !(by_ref || is_reference (val));
6390 else
6391 by_ref = TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE;
6392 break;
6394 default:
6395 gcc_unreachable ();
6398 if (do_in)
6400 ref = build_sender_ref (val, ctx);
6401 x = by_ref ? build_fold_addr_expr_loc (clause_loc, var) : var;
6402 gimplify_assign (ref, x, ilist);
6403 if (is_task_ctx (ctx))
6404 DECL_ABSTRACT_ORIGIN (TREE_OPERAND (ref, 1)) = NULL;
6407 if (do_out)
6409 ref = build_sender_ref (val, ctx);
6410 gimplify_assign (var, ref, olist);
6415 /* Generate code to implement SHARED from the sender (aka parent)
6416 side. This is trickier, since GIMPLE_OMP_PARALLEL_CLAUSES doesn't
6417 list things that got automatically shared. */
6419 static void
6420 lower_send_shared_vars (gimple_seq *ilist, gimple_seq *olist, omp_context *ctx)
6422 tree var, ovar, nvar, t, f, x, record_type;
6424 if (ctx->record_type == NULL)
6425 return;
6427 record_type = ctx->srecord_type ? ctx->srecord_type : ctx->record_type;
6428 for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
6430 ovar = DECL_ABSTRACT_ORIGIN (f);
6431 if (!ovar || TREE_CODE (ovar) == FIELD_DECL)
6432 continue;
6434 nvar = maybe_lookup_decl (ovar, ctx);
6435 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
6436 continue;
6438 /* If CTX is a nested parallel directive. Find the immediately
6439 enclosing parallel or workshare construct that contains a
6440 mapping for OVAR. */
6441 var = lookup_decl_in_outer_ctx (ovar, ctx);
6443 t = omp_member_access_dummy_var (var);
6444 if (t)
6446 var = DECL_VALUE_EXPR (var);
6447 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx);
6448 if (o != t)
6449 var = unshare_and_remap (var, t, o);
6450 else
6451 var = unshare_expr (var);
6454 if (use_pointer_for_field (ovar, ctx))
6456 x = build_sender_ref (ovar, ctx);
6457 var = build_fold_addr_expr (var);
6458 gimplify_assign (x, var, ilist);
6460 else
6462 x = build_sender_ref (ovar, ctx);
6463 gimplify_assign (x, var, ilist);
6465 if (!TREE_READONLY (var)
6466 /* We don't need to receive a new reference to a result
6467 or parm decl. In fact we may not store to it as we will
6468 invalidate any pending RSO and generate wrong gimple
6469 during inlining. */
6470 && !((TREE_CODE (var) == RESULT_DECL
6471 || TREE_CODE (var) == PARM_DECL)
6472 && DECL_BY_REFERENCE (var)))
6474 x = build_sender_ref (ovar, ctx);
6475 gimplify_assign (var, x, olist);
6481 /* Emit an OpenACC head marker call, encapulating the partitioning and
6482 other information that must be processed by the target compiler.
6483 Return the maximum number of dimensions the associated loop might
6484 be partitioned over. */
6486 static unsigned
6487 lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
6488 gimple_seq *seq, omp_context *ctx)
6490 unsigned levels = 0;
6491 unsigned tag = 0;
6492 tree gang_static = NULL_TREE;
6493 auto_vec<tree, 5> args;
6495 args.quick_push (build_int_cst
6496 (integer_type_node, IFN_UNIQUE_OACC_HEAD_MARK));
6497 args.quick_push (ddvar);
6498 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6500 switch (OMP_CLAUSE_CODE (c))
6502 case OMP_CLAUSE_GANG:
6503 tag |= OLF_DIM_GANG;
6504 gang_static = OMP_CLAUSE_GANG_STATIC_EXPR (c);
6505 /* static:* is represented by -1, and we can ignore it, as
6506 scheduling is always static. */
6507 if (gang_static && integer_minus_onep (gang_static))
6508 gang_static = NULL_TREE;
6509 levels++;
6510 break;
6512 case OMP_CLAUSE_WORKER:
6513 tag |= OLF_DIM_WORKER;
6514 levels++;
6515 break;
6517 case OMP_CLAUSE_VECTOR:
6518 tag |= OLF_DIM_VECTOR;
6519 levels++;
6520 break;
6522 case OMP_CLAUSE_SEQ:
6523 tag |= OLF_SEQ;
6524 break;
6526 case OMP_CLAUSE_AUTO:
6527 tag |= OLF_AUTO;
6528 break;
6530 case OMP_CLAUSE_INDEPENDENT:
6531 tag |= OLF_INDEPENDENT;
6532 break;
6534 default:
6535 continue;
6539 if (gang_static)
6541 if (DECL_P (gang_static))
6542 gang_static = build_outer_var_ref (gang_static, ctx);
6543 tag |= OLF_GANG_STATIC;
6546 /* In a parallel region, loops are implicitly INDEPENDENT. */
6547 omp_context *tgt = enclosing_target_ctx (ctx);
6548 if (!tgt || is_oacc_parallel (tgt))
6549 tag |= OLF_INDEPENDENT;
6551 /* A loop lacking SEQ, GANG, WORKER and/or VECTOR is implicitly AUTO. */
6552 if (!(tag & (((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE)
6553 | OLF_SEQ)))
6554 tag |= OLF_AUTO;
6556 /* Ensure at least one level. */
6557 if (!levels)
6558 levels++;
6560 args.quick_push (build_int_cst (integer_type_node, levels));
6561 args.quick_push (build_int_cst (integer_type_node, tag));
6562 if (gang_static)
6563 args.quick_push (gang_static);
6565 gcall *call = gimple_build_call_internal_vec (IFN_UNIQUE, args);
6566 gimple_set_location (call, loc);
6567 gimple_set_lhs (call, ddvar);
6568 gimple_seq_add_stmt (seq, call);
6570 return levels;
6573 /* Emit an OpenACC lopp head or tail marker to SEQ. LEVEL is the
6574 partitioning level of the enclosed region. */
6576 static void
6577 lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
6578 tree tofollow, gimple_seq *seq)
6580 int marker_kind = (head ? IFN_UNIQUE_OACC_HEAD_MARK
6581 : IFN_UNIQUE_OACC_TAIL_MARK);
6582 tree marker = build_int_cst (integer_type_node, marker_kind);
6583 int nargs = 2 + (tofollow != NULL_TREE);
6584 gcall *call = gimple_build_call_internal (IFN_UNIQUE, nargs,
6585 marker, ddvar, tofollow);
6586 gimple_set_location (call, loc);
6587 gimple_set_lhs (call, ddvar);
6588 gimple_seq_add_stmt (seq, call);
6591 /* Generate the before and after OpenACC loop sequences. CLAUSES are
6592 the loop clauses, from which we extract reductions. Initialize
6593 HEAD and TAIL. */
6595 static void
6596 lower_oacc_head_tail (location_t loc, tree clauses,
6597 gimple_seq *head, gimple_seq *tail, omp_context *ctx)
6599 bool inner = false;
6600 tree ddvar = create_tmp_var (integer_type_node, ".data_dep");
6601 gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
6603 unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
6604 tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
6605 tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
6607 gcc_assert (count);
6608 for (unsigned done = 1; count; count--, done++)
6610 gimple_seq fork_seq = NULL;
6611 gimple_seq join_seq = NULL;
6613 tree place = build_int_cst (integer_type_node, -1);
6614 gcall *fork = gimple_build_call_internal (IFN_UNIQUE, 3,
6615 fork_kind, ddvar, place);
6616 gimple_set_location (fork, loc);
6617 gimple_set_lhs (fork, ddvar);
6619 gcall *join = gimple_build_call_internal (IFN_UNIQUE, 3,
6620 join_kind, ddvar, place);
6621 gimple_set_location (join, loc);
6622 gimple_set_lhs (join, ddvar);
6624 /* Mark the beginning of this level sequence. */
6625 if (inner)
6626 lower_oacc_loop_marker (loc, ddvar, true,
6627 build_int_cst (integer_type_node, count),
6628 &fork_seq);
6629 lower_oacc_loop_marker (loc, ddvar, false,
6630 build_int_cst (integer_type_node, done),
6631 &join_seq);
6633 lower_oacc_reductions (loc, clauses, place, inner,
6634 fork, join, &fork_seq, &join_seq, ctx);
6636 /* Append this level to head. */
6637 gimple_seq_add_seq (head, fork_seq);
6638 /* Prepend it to tail. */
6639 gimple_seq_add_seq (&join_seq, *tail);
6640 *tail = join_seq;
6642 inner = true;
6645 /* Mark the end of the sequence. */
6646 lower_oacc_loop_marker (loc, ddvar, true, NULL_TREE, head);
6647 lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail);
6650 /* A convenience function to build an empty GIMPLE_COND with just the
6651 condition. */
6653 static gcond *
6654 gimple_build_cond_empty (tree cond)
6656 enum tree_code pred_code;
6657 tree lhs, rhs;
6659 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
6660 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
6663 /* Return true if a parallel REGION is within a declare target function or
6664 within a target region and is not a part of a gridified target. */
6666 static bool
6667 parallel_needs_hsa_kernel_p (struct omp_region *region)
6669 bool indirect = false;
6670 for (region = region->outer; region; region = region->outer)
6672 if (region->type == GIMPLE_OMP_PARALLEL)
6673 indirect = true;
6674 else if (region->type == GIMPLE_OMP_TARGET)
6676 gomp_target *tgt_stmt
6677 = as_a <gomp_target *> (last_stmt (region->entry));
6679 if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
6680 OMP_CLAUSE__GRIDDIM_))
6681 return indirect;
6682 else
6683 return true;
6687 if (lookup_attribute ("omp declare target",
6688 DECL_ATTRIBUTES (current_function_decl)))
6689 return true;
6691 return false;
6694 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
6695 bool = false);
6697 /* Build the function calls to GOMP_parallel_start etc to actually
6698 generate the parallel operation. REGION is the parallel region
6699 being expanded. BB is the block where to insert the code. WS_ARGS
6700 will be set if this is a call to a combined parallel+workshare
6701 construct, it contains the list of additional arguments needed by
6702 the workshare construct. */
6704 static void
6705 expand_parallel_call (struct omp_region *region, basic_block bb,
6706 gomp_parallel *entry_stmt,
6707 vec<tree, va_gc> *ws_args)
6709 tree t, t1, t2, val, cond, c, clauses, flags;
6710 gimple_stmt_iterator gsi;
6711 gimple *stmt;
6712 enum built_in_function start_ix;
6713 int start_ix2;
6714 location_t clause_loc;
6715 vec<tree, va_gc> *args;
6717 clauses = gimple_omp_parallel_clauses (entry_stmt);
6719 /* Determine what flavor of GOMP_parallel we will be
6720 emitting. */
6721 start_ix = BUILT_IN_GOMP_PARALLEL;
6722 if (is_combined_parallel (region))
6724 switch (region->inner->type)
6726 case GIMPLE_OMP_FOR:
6727 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
6728 switch (region->inner->sched_kind)
6730 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6731 start_ix2 = 3;
6732 break;
6733 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6734 case OMP_CLAUSE_SCHEDULE_GUIDED:
6735 if (region->inner->sched_modifiers
6736 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
6738 start_ix2 = 3 + region->inner->sched_kind;
6739 break;
6741 /* FALLTHRU */
6742 default:
6743 start_ix2 = region->inner->sched_kind;
6744 break;
6746 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
6747 start_ix = (enum built_in_function) start_ix2;
6748 break;
6749 case GIMPLE_OMP_SECTIONS:
6750 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
6751 break;
6752 default:
6753 gcc_unreachable ();
6757 /* By default, the value of NUM_THREADS is zero (selected at run time)
6758 and there is no conditional. */
6759 cond = NULL_TREE;
6760 val = build_int_cst (unsigned_type_node, 0);
6761 flags = build_int_cst (unsigned_type_node, 0);
6763 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
6764 if (c)
6765 cond = OMP_CLAUSE_IF_EXPR (c);
6767 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
6768 if (c)
6770 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
6771 clause_loc = OMP_CLAUSE_LOCATION (c);
6773 else
6774 clause_loc = gimple_location (entry_stmt);
6776 c = find_omp_clause (clauses, OMP_CLAUSE_PROC_BIND);
6777 if (c)
6778 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
6780 /* Ensure 'val' is of the correct type. */
6781 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
6783 /* If we found the clause 'if (cond)', build either
6784 (cond != 0) or (cond ? val : 1u). */
6785 if (cond)
6787 cond = gimple_boolify (cond);
6789 if (integer_zerop (val))
6790 val = fold_build2_loc (clause_loc,
6791 EQ_EXPR, unsigned_type_node, cond,
6792 build_int_cst (TREE_TYPE (cond), 0));
6793 else
6795 basic_block cond_bb, then_bb, else_bb;
6796 edge e, e_then, e_else;
6797 tree tmp_then, tmp_else, tmp_join, tmp_var;
6799 tmp_var = create_tmp_var (TREE_TYPE (val));
6800 if (gimple_in_ssa_p (cfun))
6802 tmp_then = make_ssa_name (tmp_var);
6803 tmp_else = make_ssa_name (tmp_var);
6804 tmp_join = make_ssa_name (tmp_var);
6806 else
6808 tmp_then = tmp_var;
6809 tmp_else = tmp_var;
6810 tmp_join = tmp_var;
6813 e = split_block_after_labels (bb);
6814 cond_bb = e->src;
6815 bb = e->dest;
6816 remove_edge (e);
6818 then_bb = create_empty_bb (cond_bb);
6819 else_bb = create_empty_bb (then_bb);
6820 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
6821 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
6823 stmt = gimple_build_cond_empty (cond);
6824 gsi = gsi_start_bb (cond_bb);
6825 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
6827 gsi = gsi_start_bb (then_bb);
6828 expand_omp_build_assign (&gsi, tmp_then, val, true);
6830 gsi = gsi_start_bb (else_bb);
6831 expand_omp_build_assign (&gsi, tmp_else,
6832 build_int_cst (unsigned_type_node, 1),
6833 true);
6835 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
6836 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
6837 add_bb_to_loop (then_bb, cond_bb->loop_father);
6838 add_bb_to_loop (else_bb, cond_bb->loop_father);
6839 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
6840 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
6842 if (gimple_in_ssa_p (cfun))
6844 gphi *phi = create_phi_node (tmp_join, bb);
6845 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
6846 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
6849 val = tmp_join;
6852 gsi = gsi_start_bb (bb);
6853 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
6854 false, GSI_CONTINUE_LINKING);
6857 gsi = gsi_last_bb (bb);
6858 t = gimple_omp_parallel_data_arg (entry_stmt);
6859 if (t == NULL)
6860 t1 = null_pointer_node;
6861 else
6862 t1 = build_fold_addr_expr (t);
6863 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
6864 t2 = build_fold_addr_expr (child_fndecl);
6866 vec_alloc (args, 4 + vec_safe_length (ws_args));
6867 args->quick_push (t2);
6868 args->quick_push (t1);
6869 args->quick_push (val);
6870 if (ws_args)
6871 args->splice (*ws_args);
6872 args->quick_push (flags);
6874 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
6875 builtin_decl_explicit (start_ix), args);
6877 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6878 false, GSI_CONTINUE_LINKING);
6880 if (hsa_gen_requested_p ()
6881 && parallel_needs_hsa_kernel_p (region))
6883 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
6884 hsa_register_kernel (child_cnode);
6888 /* Insert a function call whose name is FUNC_NAME with the information from
6889 ENTRY_STMT into the basic_block BB. */
6891 static void
6892 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
6893 vec <tree, va_gc> *ws_args)
6895 tree t, t1, t2;
6896 gimple_stmt_iterator gsi;
6897 vec <tree, va_gc> *args;
6899 gcc_assert (vec_safe_length (ws_args) == 2);
6900 tree func_name = (*ws_args)[0];
6901 tree grain = (*ws_args)[1];
6903 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
6904 tree count = find_omp_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
6905 gcc_assert (count != NULL_TREE);
6906 count = OMP_CLAUSE_OPERAND (count, 0);
6908 gsi = gsi_last_bb (bb);
6909 t = gimple_omp_parallel_data_arg (entry_stmt);
6910 if (t == NULL)
6911 t1 = null_pointer_node;
6912 else
6913 t1 = build_fold_addr_expr (t);
6914 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
6916 vec_alloc (args, 4);
6917 args->quick_push (t2);
6918 args->quick_push (t1);
6919 args->quick_push (count);
6920 args->quick_push (grain);
6921 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
6923 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
6924 GSI_CONTINUE_LINKING);
6927 /* Build the function call to GOMP_task to actually
6928 generate the task operation. BB is the block where to insert the code. */
6930 static void
6931 expand_task_call (struct omp_region *region, basic_block bb,
6932 gomp_task *entry_stmt)
6934 tree t1, t2, t3;
6935 gimple_stmt_iterator gsi;
6936 location_t loc = gimple_location (entry_stmt);
6938 tree clauses = gimple_omp_task_clauses (entry_stmt);
6940 tree ifc = find_omp_clause (clauses, OMP_CLAUSE_IF);
6941 tree untied = find_omp_clause (clauses, OMP_CLAUSE_UNTIED);
6942 tree mergeable = find_omp_clause (clauses, OMP_CLAUSE_MERGEABLE);
6943 tree depend = find_omp_clause (clauses, OMP_CLAUSE_DEPEND);
6944 tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL);
6945 tree priority = find_omp_clause (clauses, OMP_CLAUSE_PRIORITY);
6947 unsigned int iflags
6948 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
6949 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
6950 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
6952 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
6953 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
6954 tree num_tasks = NULL_TREE;
6955 bool ull = false;
6956 if (taskloop_p)
6958 gimple *g = last_stmt (region->outer->entry);
6959 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
6960 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
6961 struct omp_for_data fd;
6962 extract_omp_for_data (as_a <gomp_for *> (g), &fd, NULL);
6963 startvar = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6964 endvar = find_omp_clause (OMP_CLAUSE_CHAIN (startvar),
6965 OMP_CLAUSE__LOOPTEMP_);
6966 startvar = OMP_CLAUSE_DECL (startvar);
6967 endvar = OMP_CLAUSE_DECL (endvar);
6968 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
6969 if (fd.loop.cond_code == LT_EXPR)
6970 iflags |= GOMP_TASK_FLAG_UP;
6971 tree tclauses = gimple_omp_for_clauses (g);
6972 num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
6973 if (num_tasks)
6974 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
6975 else
6977 num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
6978 if (num_tasks)
6980 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
6981 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
6983 else
6984 num_tasks = integer_zero_node;
6986 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
6987 if (ifc == NULL_TREE)
6988 iflags |= GOMP_TASK_FLAG_IF;
6989 if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP))
6990 iflags |= GOMP_TASK_FLAG_NOGROUP;
6991 ull = fd.iter_type == long_long_unsigned_type_node;
6993 else if (priority)
6994 iflags |= GOMP_TASK_FLAG_PRIORITY;
6996 tree flags = build_int_cst (unsigned_type_node, iflags);
6998 tree cond = boolean_true_node;
6999 if (ifc)
7001 if (taskloop_p)
7003 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
7004 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
7005 build_int_cst (unsigned_type_node,
7006 GOMP_TASK_FLAG_IF),
7007 build_int_cst (unsigned_type_node, 0));
7008 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
7009 flags, t);
7011 else
7012 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
7015 if (finalc)
7017 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
7018 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
7019 build_int_cst (unsigned_type_node,
7020 GOMP_TASK_FLAG_FINAL),
7021 build_int_cst (unsigned_type_node, 0));
7022 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
7024 if (depend)
7025 depend = OMP_CLAUSE_DECL (depend);
7026 else
7027 depend = build_int_cst (ptr_type_node, 0);
7028 if (priority)
7029 priority = fold_convert (integer_type_node,
7030 OMP_CLAUSE_PRIORITY_EXPR (priority));
7031 else
7032 priority = integer_zero_node;
7034 gsi = gsi_last_bb (bb);
7035 tree t = gimple_omp_task_data_arg (entry_stmt);
7036 if (t == NULL)
7037 t2 = null_pointer_node;
7038 else
7039 t2 = build_fold_addr_expr_loc (loc, t);
7040 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
7041 t = gimple_omp_task_copy_fn (entry_stmt);
7042 if (t == NULL)
7043 t3 = null_pointer_node;
7044 else
7045 t3 = build_fold_addr_expr_loc (loc, t);
7047 if (taskloop_p)
7048 t = build_call_expr (ull
7049 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
7050 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
7051 11, t1, t2, t3,
7052 gimple_omp_task_arg_size (entry_stmt),
7053 gimple_omp_task_arg_align (entry_stmt), flags,
7054 num_tasks, priority, startvar, endvar, step);
7055 else
7056 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
7057 9, t1, t2, t3,
7058 gimple_omp_task_arg_size (entry_stmt),
7059 gimple_omp_task_arg_align (entry_stmt), cond, flags,
7060 depend, priority);
7062 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7063 false, GSI_CONTINUE_LINKING);
7067 /* If exceptions are enabled, wrap the statements in BODY in a MUST_NOT_THROW
7068 catch handler and return it. This prevents programs from violating the
7069 structured block semantics with throws. */
7071 static gimple_seq
7072 maybe_catch_exception (gimple_seq body)
7074 gimple *g;
7075 tree decl;
7077 if (!flag_exceptions)
7078 return body;
7080 if (lang_hooks.eh_protect_cleanup_actions != NULL)
7081 decl = lang_hooks.eh_protect_cleanup_actions ();
7082 else
7083 decl = builtin_decl_explicit (BUILT_IN_TRAP);
7085 g = gimple_build_eh_must_not_throw (decl);
7086 g = gimple_build_try (body, gimple_seq_alloc_with_stmt (g),
7087 GIMPLE_TRY_CATCH);
7089 return gimple_seq_alloc_with_stmt (g);
7092 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
7094 static tree
7095 vec2chain (vec<tree, va_gc> *v)
7097 tree chain = NULL_TREE, t;
7098 unsigned ix;
7100 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
7102 DECL_CHAIN (t) = chain;
7103 chain = t;
7106 return chain;
7110 /* Remove barriers in REGION->EXIT's block. Note that this is only
7111 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
7112 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
7113 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
7114 removed. */
7116 static void
7117 remove_exit_barrier (struct omp_region *region)
7119 gimple_stmt_iterator gsi;
7120 basic_block exit_bb;
7121 edge_iterator ei;
7122 edge e;
7123 gimple *stmt;
7124 int any_addressable_vars = -1;
7126 exit_bb = region->exit;
7128 /* If the parallel region doesn't return, we don't have REGION->EXIT
7129 block at all. */
7130 if (! exit_bb)
7131 return;
7133 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
7134 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
7135 statements that can appear in between are extremely limited -- no
7136 memory operations at all. Here, we allow nothing at all, so the
7137 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
7138 gsi = gsi_last_bb (exit_bb);
7139 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7140 gsi_prev (&gsi);
7141 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
7142 return;
7144 FOR_EACH_EDGE (e, ei, exit_bb->preds)
7146 gsi = gsi_last_bb (e->src);
7147 if (gsi_end_p (gsi))
7148 continue;
7149 stmt = gsi_stmt (gsi);
7150 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
7151 && !gimple_omp_return_nowait_p (stmt))
7153 /* OpenMP 3.0 tasks unfortunately prevent this optimization
7154 in many cases. If there could be tasks queued, the barrier
7155 might be needed to let the tasks run before some local
7156 variable of the parallel that the task uses as shared
7157 runs out of scope. The task can be spawned either
7158 from within current function (this would be easy to check)
7159 or from some function it calls and gets passed an address
7160 of such a variable. */
7161 if (any_addressable_vars < 0)
7163 gomp_parallel *parallel_stmt
7164 = as_a <gomp_parallel *> (last_stmt (region->entry));
7165 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
7166 tree local_decls, block, decl;
7167 unsigned ix;
7169 any_addressable_vars = 0;
7170 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
7171 if (TREE_ADDRESSABLE (decl))
7173 any_addressable_vars = 1;
7174 break;
7176 for (block = gimple_block (stmt);
7177 !any_addressable_vars
7178 && block
7179 && TREE_CODE (block) == BLOCK;
7180 block = BLOCK_SUPERCONTEXT (block))
7182 for (local_decls = BLOCK_VARS (block);
7183 local_decls;
7184 local_decls = DECL_CHAIN (local_decls))
7185 if (TREE_ADDRESSABLE (local_decls))
7187 any_addressable_vars = 1;
7188 break;
7190 if (block == gimple_block (parallel_stmt))
7191 break;
7194 if (!any_addressable_vars)
7195 gimple_omp_return_set_nowait (stmt);
7200 static void
7201 remove_exit_barriers (struct omp_region *region)
7203 if (region->type == GIMPLE_OMP_PARALLEL)
7204 remove_exit_barrier (region);
7206 if (region->inner)
7208 region = region->inner;
7209 remove_exit_barriers (region);
7210 while (region->next)
7212 region = region->next;
7213 remove_exit_barriers (region);
7218 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
7219 calls. These can't be declared as const functions, but
7220 within one parallel body they are constant, so they can be
7221 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
7222 which are declared const. Similarly for task body, except
7223 that in untied task omp_get_thread_num () can change at any task
7224 scheduling point. */
7226 static void
7227 optimize_omp_library_calls (gimple *entry_stmt)
7229 basic_block bb;
7230 gimple_stmt_iterator gsi;
7231 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
7232 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
7233 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
7234 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
7235 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
7236 && find_omp_clause (gimple_omp_task_clauses (entry_stmt),
7237 OMP_CLAUSE_UNTIED) != NULL);
7239 FOR_EACH_BB_FN (bb, cfun)
7240 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7242 gimple *call = gsi_stmt (gsi);
7243 tree decl;
7245 if (is_gimple_call (call)
7246 && (decl = gimple_call_fndecl (call))
7247 && DECL_EXTERNAL (decl)
7248 && TREE_PUBLIC (decl)
7249 && DECL_INITIAL (decl) == NULL)
7251 tree built_in;
7253 if (DECL_NAME (decl) == thr_num_id)
7255 /* In #pragma omp task untied omp_get_thread_num () can change
7256 during the execution of the task region. */
7257 if (untied_task)
7258 continue;
7259 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
7261 else if (DECL_NAME (decl) == num_thr_id)
7262 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
7263 else
7264 continue;
7266 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
7267 || gimple_call_num_args (call) != 0)
7268 continue;
7270 if (flag_exceptions && !TREE_NOTHROW (decl))
7271 continue;
7273 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
7274 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
7275 TREE_TYPE (TREE_TYPE (built_in))))
7276 continue;
7278 gimple_call_set_fndecl (call, built_in);
7283 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
7284 regimplified. */
7286 static tree
7287 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
7289 tree t = *tp;
7291 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
7292 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
7293 return t;
7295 if (TREE_CODE (t) == ADDR_EXPR)
7296 recompute_tree_invariant_for_addr_expr (t);
7298 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7299 return NULL_TREE;
7302 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
7304 static void
7305 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
7306 bool after)
7308 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
7309 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
7310 !after, after ? GSI_CONTINUE_LINKING
7311 : GSI_SAME_STMT);
7312 gimple *stmt = gimple_build_assign (to, from);
7313 if (after)
7314 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
7315 else
7316 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
7317 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
7318 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
7320 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
7321 gimple_regimplify_operands (stmt, &gsi);
7325 /* Expand the OpenMP parallel or task directive starting at REGION. */
7327 static void
7328 expand_omp_taskreg (struct omp_region *region)
7330 basic_block entry_bb, exit_bb, new_bb;
7331 struct function *child_cfun;
7332 tree child_fn, block, t;
7333 gimple_stmt_iterator gsi;
7334 gimple *entry_stmt, *stmt;
7335 edge e;
7336 vec<tree, va_gc> *ws_args;
7338 entry_stmt = last_stmt (region->entry);
7339 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
7340 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7342 entry_bb = region->entry;
7343 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
7344 exit_bb = region->cont;
7345 else
7346 exit_bb = region->exit;
7348 bool is_cilk_for
7349 = (flag_cilkplus
7350 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
7351 && find_omp_clause (gimple_omp_parallel_clauses (entry_stmt),
7352 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
7354 if (is_cilk_for)
7355 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
7356 and the inner statement contains the name of the built-in function
7357 and grain. */
7358 ws_args = region->inner->ws_args;
7359 else if (is_combined_parallel (region))
7360 ws_args = region->ws_args;
7361 else
7362 ws_args = NULL;
7364 if (child_cfun->cfg)
7366 /* Due to inlining, it may happen that we have already outlined
7367 the region, in which case all we need to do is make the
7368 sub-graph unreachable and emit the parallel call. */
7369 edge entry_succ_e, exit_succ_e;
7371 entry_succ_e = single_succ_edge (entry_bb);
7373 gsi = gsi_last_bb (entry_bb);
7374 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
7375 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
7376 gsi_remove (&gsi, true);
7378 new_bb = entry_bb;
7379 if (exit_bb)
7381 exit_succ_e = single_succ_edge (exit_bb);
7382 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
7384 remove_edge_and_dominated_blocks (entry_succ_e);
7386 else
7388 unsigned srcidx, dstidx, num;
7390 /* If the parallel region needs data sent from the parent
7391 function, then the very first statement (except possible
7392 tree profile counter updates) of the parallel body
7393 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7394 &.OMP_DATA_O is passed as an argument to the child function,
7395 we need to replace it with the argument as seen by the child
7396 function.
7398 In most cases, this will end up being the identity assignment
7399 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
7400 a function call that has been inlined, the original PARM_DECL
7401 .OMP_DATA_I may have been converted into a different local
7402 variable. In which case, we need to keep the assignment. */
7403 if (gimple_omp_taskreg_data_arg (entry_stmt))
7405 basic_block entry_succ_bb
7406 = single_succ_p (entry_bb) ? single_succ (entry_bb)
7407 : FALLTHRU_EDGE (entry_bb)->dest;
7408 tree arg;
7409 gimple *parcopy_stmt = NULL;
7411 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7413 gimple *stmt;
7415 gcc_assert (!gsi_end_p (gsi));
7416 stmt = gsi_stmt (gsi);
7417 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7418 continue;
7420 if (gimple_num_ops (stmt) == 2)
7422 tree arg = gimple_assign_rhs1 (stmt);
7424 /* We're ignore the subcode because we're
7425 effectively doing a STRIP_NOPS. */
7427 if (TREE_CODE (arg) == ADDR_EXPR
7428 && TREE_OPERAND (arg, 0)
7429 == gimple_omp_taskreg_data_arg (entry_stmt))
7431 parcopy_stmt = stmt;
7432 break;
7437 gcc_assert (parcopy_stmt != NULL);
7438 arg = DECL_ARGUMENTS (child_fn);
7440 if (!gimple_in_ssa_p (cfun))
7442 if (gimple_assign_lhs (parcopy_stmt) == arg)
7443 gsi_remove (&gsi, true);
7444 else
7446 /* ?? Is setting the subcode really necessary ?? */
7447 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
7448 gimple_assign_set_rhs1 (parcopy_stmt, arg);
7451 else
7453 tree lhs = gimple_assign_lhs (parcopy_stmt);
7454 gcc_assert (SSA_NAME_VAR (lhs) == arg);
7455 /* We'd like to set the rhs to the default def in the child_fn,
7456 but it's too early to create ssa names in the child_fn.
7457 Instead, we set the rhs to the parm. In
7458 move_sese_region_to_fn, we introduce a default def for the
7459 parm, map the parm to it's default def, and once we encounter
7460 this stmt, replace the parm with the default def. */
7461 gimple_assign_set_rhs1 (parcopy_stmt, arg);
7462 update_stmt (parcopy_stmt);
7466 /* Declare local variables needed in CHILD_CFUN. */
7467 block = DECL_INITIAL (child_fn);
7468 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7469 /* The gimplifier could record temporaries in parallel/task block
7470 rather than in containing function's local_decls chain,
7471 which would mean cgraph missed finalizing them. Do it now. */
7472 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7473 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7474 varpool_node::finalize_decl (t);
7475 DECL_SAVED_TREE (child_fn) = NULL;
7476 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7477 gimple_set_body (child_fn, NULL);
7478 TREE_USED (block) = 1;
7480 /* Reset DECL_CONTEXT on function arguments. */
7481 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7482 DECL_CONTEXT (t) = child_fn;
7484 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
7485 so that it can be moved to the child function. */
7486 gsi = gsi_last_bb (entry_bb);
7487 stmt = gsi_stmt (gsi);
7488 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
7489 || gimple_code (stmt) == GIMPLE_OMP_TASK));
7490 e = split_block (entry_bb, stmt);
7491 gsi_remove (&gsi, true);
7492 entry_bb = e->dest;
7493 edge e2 = NULL;
7494 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
7495 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7496 else
7498 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
7499 gcc_assert (e2->dest == region->exit);
7500 remove_edge (BRANCH_EDGE (entry_bb));
7501 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
7502 gsi = gsi_last_bb (region->exit);
7503 gcc_assert (!gsi_end_p (gsi)
7504 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7505 gsi_remove (&gsi, true);
7508 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
7509 if (exit_bb)
7511 gsi = gsi_last_bb (exit_bb);
7512 gcc_assert (!gsi_end_p (gsi)
7513 && (gimple_code (gsi_stmt (gsi))
7514 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
7515 stmt = gimple_build_return (NULL);
7516 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7517 gsi_remove (&gsi, true);
7520 /* Move the parallel region into CHILD_CFUN. */
7522 if (gimple_in_ssa_p (cfun))
7524 init_tree_ssa (child_cfun);
7525 init_ssa_operands (child_cfun);
7526 child_cfun->gimple_df->in_ssa_p = true;
7527 block = NULL_TREE;
7529 else
7530 block = gimple_block (entry_stmt);
7532 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7533 if (exit_bb)
7534 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7535 if (e2)
7537 basic_block dest_bb = e2->dest;
7538 if (!exit_bb)
7539 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
7540 remove_edge (e2);
7541 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
7543 /* When the OMP expansion process cannot guarantee an up-to-date
7544 loop tree arrange for the child function to fixup loops. */
7545 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7546 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7548 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7549 num = vec_safe_length (child_cfun->local_decls);
7550 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7552 t = (*child_cfun->local_decls)[srcidx];
7553 if (DECL_CONTEXT (t) == cfun->decl)
7554 continue;
7555 if (srcidx != dstidx)
7556 (*child_cfun->local_decls)[dstidx] = t;
7557 dstidx++;
7559 if (dstidx != num)
7560 vec_safe_truncate (child_cfun->local_decls, dstidx);
7562 /* Inform the callgraph about the new function. */
7563 child_cfun->curr_properties = cfun->curr_properties;
7564 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7565 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7566 cgraph_node *node = cgraph_node::get_create (child_fn);
7567 node->parallelized_function = 1;
7568 cgraph_node::add_new_function (child_fn, true);
7570 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7571 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7573 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7574 fixed in a following pass. */
7575 push_cfun (child_cfun);
7576 if (need_asm)
7577 assign_assembler_name_if_neeeded (child_fn);
7579 if (optimize)
7580 optimize_omp_library_calls (entry_stmt);
7581 cgraph_edge::rebuild_edges ();
7583 /* Some EH regions might become dead, see PR34608. If
7584 pass_cleanup_cfg isn't the first pass to happen with the
7585 new child, these dead EH edges might cause problems.
7586 Clean them up now. */
7587 if (flag_exceptions)
7589 basic_block bb;
7590 bool changed = false;
7592 FOR_EACH_BB_FN (bb, cfun)
7593 changed |= gimple_purge_dead_eh_edges (bb);
7594 if (changed)
7595 cleanup_tree_cfg ();
7597 if (gimple_in_ssa_p (cfun))
7598 update_ssa (TODO_update_ssa);
7599 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7600 verify_loop_structure ();
7601 pop_cfun ();
7603 if (dump_file && !gimple_in_ssa_p (cfun))
7605 omp_any_child_fn_dumped = true;
7606 dump_function_header (dump_file, child_fn, dump_flags);
7607 dump_function_to_file (child_fn, dump_file, dump_flags);
7611 /* Emit a library call to launch the children threads. */
7612 if (is_cilk_for)
7613 expand_cilk_for_call (new_bb,
7614 as_a <gomp_parallel *> (entry_stmt), ws_args);
7615 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
7616 expand_parallel_call (region, new_bb,
7617 as_a <gomp_parallel *> (entry_stmt), ws_args);
7618 else
7619 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
7620 if (gimple_in_ssa_p (cfun))
7621 update_ssa (TODO_update_ssa_only_virtuals);
7624 /* Information about members of an OpenACC collapsed loop nest. */
7626 struct oacc_collapse
7628 tree base; /* Base value. */
7629 tree iters; /* Number of steps. */
7630 tree step; /* step size. */
7633 /* Helper for expand_oacc_for. Determine collapsed loop information.
7634 Fill in COUNTS array. Emit any initialization code before GSI.
7635 Return the calculated outer loop bound of BOUND_TYPE. */
7637 static tree
7638 expand_oacc_collapse_init (const struct omp_for_data *fd,
7639 gimple_stmt_iterator *gsi,
7640 oacc_collapse *counts, tree bound_type)
7642 tree total = build_int_cst (bound_type, 1);
7643 int ix;
7645 gcc_assert (integer_onep (fd->loop.step));
7646 gcc_assert (integer_zerop (fd->loop.n1));
7648 for (ix = 0; ix != fd->collapse; ix++)
7650 const omp_for_data_loop *loop = &fd->loops[ix];
7652 tree iter_type = TREE_TYPE (loop->v);
7653 tree diff_type = iter_type;
7654 tree plus_type = iter_type;
7656 gcc_assert (loop->cond_code == fd->loop.cond_code);
7658 if (POINTER_TYPE_P (iter_type))
7659 plus_type = sizetype;
7660 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7661 diff_type = signed_type_for (diff_type);
7663 tree b = loop->n1;
7664 tree e = loop->n2;
7665 tree s = loop->step;
7666 bool up = loop->cond_code == LT_EXPR;
7667 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7668 bool negating;
7669 tree expr;
7671 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
7672 true, GSI_SAME_STMT);
7673 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
7674 true, GSI_SAME_STMT);
7676 /* Convert the step, avoiding possible unsigned->signed overflow. */
7677 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7678 if (negating)
7679 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7680 s = fold_convert (diff_type, s);
7681 if (negating)
7682 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7683 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
7684 true, GSI_SAME_STMT);
7686 /* Determine the range, avoiding possible unsigned->signed overflow. */
7687 negating = !up && TYPE_UNSIGNED (iter_type);
7688 expr = fold_build2 (MINUS_EXPR, plus_type,
7689 fold_convert (plus_type, negating ? b : e),
7690 fold_convert (plus_type, negating ? e : b));
7691 expr = fold_convert (diff_type, expr);
7692 if (negating)
7693 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7694 tree range = force_gimple_operand_gsi
7695 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
7697 /* Determine number of iterations. */
7698 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7699 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7700 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7702 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
7703 true, GSI_SAME_STMT);
7705 counts[ix].base = b;
7706 counts[ix].iters = iters;
7707 counts[ix].step = s;
7709 total = fold_build2 (MULT_EXPR, bound_type, total,
7710 fold_convert (bound_type, iters));
7713 return total;
7716 /* Emit initializers for collapsed loop members. IVAR is the outer
7717 loop iteration variable, from which collapsed loop iteration values
7718 are calculated. COUNTS array has been initialized by
7719 expand_oacc_collapse_inits. */
7721 static void
7722 expand_oacc_collapse_vars (const struct omp_for_data *fd,
7723 gimple_stmt_iterator *gsi,
7724 const oacc_collapse *counts, tree ivar)
7726 tree ivar_type = TREE_TYPE (ivar);
7728 /* The most rapidly changing iteration variable is the innermost
7729 one. */
7730 for (int ix = fd->collapse; ix--;)
7732 const omp_for_data_loop *loop = &fd->loops[ix];
7733 const oacc_collapse *collapse = &counts[ix];
7734 tree iter_type = TREE_TYPE (loop->v);
7735 tree diff_type = TREE_TYPE (collapse->step);
7736 tree plus_type = iter_type;
7737 enum tree_code plus_code = PLUS_EXPR;
7738 tree expr;
7740 if (POINTER_TYPE_P (iter_type))
7742 plus_code = POINTER_PLUS_EXPR;
7743 plus_type = sizetype;
7746 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
7747 fold_convert (ivar_type, collapse->iters));
7748 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
7749 collapse->step);
7750 expr = fold_build2 (plus_code, iter_type, collapse->base,
7751 fold_convert (plus_type, expr));
7752 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
7753 true, GSI_SAME_STMT);
7754 gassign *ass = gimple_build_assign (loop->v, expr);
7755 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
7757 if (ix)
7759 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
7760 fold_convert (ivar_type, collapse->iters));
7761 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
7762 true, GSI_SAME_STMT);
7768 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
7769 of the combined collapse > 1 loop constructs, generate code like:
7770 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
7771 if (cond3 is <)
7772 adj = STEP3 - 1;
7773 else
7774 adj = STEP3 + 1;
7775 count3 = (adj + N32 - N31) / STEP3;
7776 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
7777 if (cond2 is <)
7778 adj = STEP2 - 1;
7779 else
7780 adj = STEP2 + 1;
7781 count2 = (adj + N22 - N21) / STEP2;
7782 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
7783 if (cond1 is <)
7784 adj = STEP1 - 1;
7785 else
7786 adj = STEP1 + 1;
7787 count1 = (adj + N12 - N11) / STEP1;
7788 count = count1 * count2 * count3;
7789 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
7790 count = 0;
7791 and set ZERO_ITER_BB to that bb. If this isn't the outermost
7792 of the combined loop constructs, just initialize COUNTS array
7793 from the _looptemp_ clauses. */
7795 /* NOTE: It *could* be better to moosh all of the BBs together,
7796 creating one larger BB with all the computation and the unexpected
7797 jump at the end. I.e.
7799 bool zero3, zero2, zero1, zero;
7801 zero3 = N32 c3 N31;
7802 count3 = (N32 - N31) /[cl] STEP3;
7803 zero2 = N22 c2 N21;
7804 count2 = (N22 - N21) /[cl] STEP2;
7805 zero1 = N12 c1 N11;
7806 count1 = (N12 - N11) /[cl] STEP1;
7807 zero = zero3 || zero2 || zero1;
7808 count = count1 * count2 * count3;
7809 if (__builtin_expect(zero, false)) goto zero_iter_bb;
7811 After all, we expect the zero=false, and thus we expect to have to
7812 evaluate all of the comparison expressions, so short-circuiting
7813 oughtn't be a win. Since the condition isn't protecting a
7814 denominator, we're not concerned about divide-by-zero, so we can
7815 fully evaluate count even if a numerator turned out to be wrong.
7817 It seems like putting this all together would create much better
7818 scheduling opportunities, and less pressure on the chip's branch
7819 predictor. */
7821 static void
7822 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
7823 basic_block &entry_bb, tree *counts,
7824 basic_block &zero_iter1_bb, int &first_zero_iter1,
7825 basic_block &zero_iter2_bb, int &first_zero_iter2,
7826 basic_block &l2_dom_bb)
7828 tree t, type = TREE_TYPE (fd->loop.v);
7829 edge e, ne;
7830 int i;
7832 /* Collapsed loops need work for expansion into SSA form. */
7833 gcc_assert (!gimple_in_ssa_p (cfun));
7835 if (gimple_omp_for_combined_into_p (fd->for_stmt)
7836 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7838 gcc_assert (fd->ordered == 0);
7839 /* First two _looptemp_ clauses are for istart/iend, counts[0]
7840 isn't supposed to be handled, as the inner loop doesn't
7841 use it. */
7842 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
7843 OMP_CLAUSE__LOOPTEMP_);
7844 gcc_assert (innerc);
7845 for (i = 0; i < fd->collapse; i++)
7847 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
7848 OMP_CLAUSE__LOOPTEMP_);
7849 gcc_assert (innerc);
7850 if (i)
7851 counts[i] = OMP_CLAUSE_DECL (innerc);
7852 else
7853 counts[0] = NULL_TREE;
7855 return;
7858 for (i = fd->collapse; i < fd->ordered; i++)
7860 tree itype = TREE_TYPE (fd->loops[i].v);
7861 counts[i] = NULL_TREE;
7862 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
7863 fold_convert (itype, fd->loops[i].n1),
7864 fold_convert (itype, fd->loops[i].n2));
7865 if (t && integer_zerop (t))
7867 for (i = fd->collapse; i < fd->ordered; i++)
7868 counts[i] = build_int_cst (type, 0);
7869 break;
7872 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
7874 tree itype = TREE_TYPE (fd->loops[i].v);
7876 if (i >= fd->collapse && counts[i])
7877 continue;
7878 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
7879 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
7880 fold_convert (itype, fd->loops[i].n1),
7881 fold_convert (itype, fd->loops[i].n2)))
7882 == NULL_TREE || !integer_onep (t)))
7884 gcond *cond_stmt;
7885 tree n1, n2;
7886 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
7887 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
7888 true, GSI_SAME_STMT);
7889 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
7890 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
7891 true, GSI_SAME_STMT);
7892 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
7893 NULL_TREE, NULL_TREE);
7894 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
7895 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
7896 expand_omp_regimplify_p, NULL, NULL)
7897 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
7898 expand_omp_regimplify_p, NULL, NULL))
7900 *gsi = gsi_for_stmt (cond_stmt);
7901 gimple_regimplify_operands (cond_stmt, gsi);
7903 e = split_block (entry_bb, cond_stmt);
7904 basic_block &zero_iter_bb
7905 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
7906 int &first_zero_iter
7907 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
7908 if (zero_iter_bb == NULL)
7910 gassign *assign_stmt;
7911 first_zero_iter = i;
7912 zero_iter_bb = create_empty_bb (entry_bb);
7913 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
7914 *gsi = gsi_after_labels (zero_iter_bb);
7915 if (i < fd->collapse)
7916 assign_stmt = gimple_build_assign (fd->loop.n2,
7917 build_zero_cst (type));
7918 else
7920 counts[i] = create_tmp_reg (type, ".count");
7921 assign_stmt
7922 = gimple_build_assign (counts[i], build_zero_cst (type));
7924 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
7925 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
7926 entry_bb);
7928 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
7929 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
7930 e->flags = EDGE_TRUE_VALUE;
7931 e->probability = REG_BR_PROB_BASE - ne->probability;
7932 if (l2_dom_bb == NULL)
7933 l2_dom_bb = entry_bb;
7934 entry_bb = e->dest;
7935 *gsi = gsi_last_bb (entry_bb);
7938 if (POINTER_TYPE_P (itype))
7939 itype = signed_type_for (itype);
7940 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
7941 ? -1 : 1));
7942 t = fold_build2 (PLUS_EXPR, itype,
7943 fold_convert (itype, fd->loops[i].step), t);
7944 t = fold_build2 (PLUS_EXPR, itype, t,
7945 fold_convert (itype, fd->loops[i].n2));
7946 t = fold_build2 (MINUS_EXPR, itype, t,
7947 fold_convert (itype, fd->loops[i].n1));
7948 /* ?? We could probably use CEIL_DIV_EXPR instead of
7949 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
7950 generate the same code in the end because generically we
7951 don't know that the values involved must be negative for
7952 GT?? */
7953 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
7954 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7955 fold_build1 (NEGATE_EXPR, itype, t),
7956 fold_build1 (NEGATE_EXPR, itype,
7957 fold_convert (itype,
7958 fd->loops[i].step)));
7959 else
7960 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
7961 fold_convert (itype, fd->loops[i].step));
7962 t = fold_convert (type, t);
7963 if (TREE_CODE (t) == INTEGER_CST)
7964 counts[i] = t;
7965 else
7967 if (i < fd->collapse || i != first_zero_iter2)
7968 counts[i] = create_tmp_reg (type, ".count");
7969 expand_omp_build_assign (gsi, counts[i], t);
7971 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
7973 if (i == 0)
7974 t = counts[0];
7975 else
7976 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
7977 expand_omp_build_assign (gsi, fd->loop.n2, t);
7983 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
7984 T = V;
7985 V3 = N31 + (T % count3) * STEP3;
7986 T = T / count3;
7987 V2 = N21 + (T % count2) * STEP2;
7988 T = T / count2;
7989 V1 = N11 + T * STEP1;
7990 if this loop doesn't have an inner loop construct combined with it.
7991 If it does have an inner loop construct combined with it and the
7992 iteration count isn't known constant, store values from counts array
7993 into its _looptemp_ temporaries instead. */
7995 static void
7996 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
7997 tree *counts, gimple *inner_stmt, tree startvar)
7999 int i;
8000 if (gimple_omp_for_combined_p (fd->for_stmt))
8002 /* If fd->loop.n2 is constant, then no propagation of the counts
8003 is needed, they are constant. */
8004 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
8005 return;
8007 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
8008 ? gimple_omp_taskreg_clauses (inner_stmt)
8009 : gimple_omp_for_clauses (inner_stmt);
8010 /* First two _looptemp_ clauses are for istart/iend, counts[0]
8011 isn't supposed to be handled, as the inner loop doesn't
8012 use it. */
8013 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
8014 gcc_assert (innerc);
8015 for (i = 0; i < fd->collapse; i++)
8017 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
8018 OMP_CLAUSE__LOOPTEMP_);
8019 gcc_assert (innerc);
8020 if (i)
8022 tree tem = OMP_CLAUSE_DECL (innerc);
8023 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
8024 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
8025 false, GSI_CONTINUE_LINKING);
8026 gassign *stmt = gimple_build_assign (tem, t);
8027 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
8030 return;
8033 tree type = TREE_TYPE (fd->loop.v);
8034 tree tem = create_tmp_reg (type, ".tem");
8035 gassign *stmt = gimple_build_assign (tem, startvar);
8036 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
8038 for (i = fd->collapse - 1; i >= 0; i--)
8040 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
8041 itype = vtype;
8042 if (POINTER_TYPE_P (vtype))
8043 itype = signed_type_for (vtype);
8044 if (i != 0)
8045 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
8046 else
8047 t = tem;
8048 t = fold_convert (itype, t);
8049 t = fold_build2 (MULT_EXPR, itype, t,
8050 fold_convert (itype, fd->loops[i].step));
8051 if (POINTER_TYPE_P (vtype))
8052 t = fold_build_pointer_plus (fd->loops[i].n1, t);
8053 else
8054 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
8055 t = force_gimple_operand_gsi (gsi, t,
8056 DECL_P (fd->loops[i].v)
8057 && TREE_ADDRESSABLE (fd->loops[i].v),
8058 NULL_TREE, false,
8059 GSI_CONTINUE_LINKING);
8060 stmt = gimple_build_assign (fd->loops[i].v, t);
8061 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
8062 if (i != 0)
8064 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
8065 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
8066 false, GSI_CONTINUE_LINKING);
8067 stmt = gimple_build_assign (tem, t);
8068 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
8074 /* Helper function for expand_omp_for_*. Generate code like:
8075 L10:
8076 V3 += STEP3;
8077 if (V3 cond3 N32) goto BODY_BB; else goto L11;
8078 L11:
8079 V3 = N31;
8080 V2 += STEP2;
8081 if (V2 cond2 N22) goto BODY_BB; else goto L12;
8082 L12:
8083 V2 = N21;
8084 V1 += STEP1;
8085 goto BODY_BB; */
8087 static basic_block
8088 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
8089 basic_block body_bb)
8091 basic_block last_bb, bb, collapse_bb = NULL;
8092 int i;
8093 gimple_stmt_iterator gsi;
8094 edge e;
8095 tree t;
8096 gimple *stmt;
8098 last_bb = cont_bb;
8099 for (i = fd->collapse - 1; i >= 0; i--)
8101 tree vtype = TREE_TYPE (fd->loops[i].v);
8103 bb = create_empty_bb (last_bb);
8104 add_bb_to_loop (bb, last_bb->loop_father);
8105 gsi = gsi_start_bb (bb);
8107 if (i < fd->collapse - 1)
8109 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
8110 e->probability = REG_BR_PROB_BASE / 8;
8112 t = fd->loops[i + 1].n1;
8113 t = force_gimple_operand_gsi (&gsi, t,
8114 DECL_P (fd->loops[i + 1].v)
8115 && TREE_ADDRESSABLE (fd->loops[i
8116 + 1].v),
8117 NULL_TREE, false,
8118 GSI_CONTINUE_LINKING);
8119 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
8120 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8122 else
8123 collapse_bb = bb;
8125 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
8127 if (POINTER_TYPE_P (vtype))
8128 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
8129 else
8130 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
8131 t = force_gimple_operand_gsi (&gsi, t,
8132 DECL_P (fd->loops[i].v)
8133 && TREE_ADDRESSABLE (fd->loops[i].v),
8134 NULL_TREE, false, GSI_CONTINUE_LINKING);
8135 stmt = gimple_build_assign (fd->loops[i].v, t);
8136 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8138 if (i > 0)
8140 t = fd->loops[i].n2;
8141 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8142 false, GSI_CONTINUE_LINKING);
8143 tree v = fd->loops[i].v;
8144 if (DECL_P (v) && TREE_ADDRESSABLE (v))
8145 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
8146 false, GSI_CONTINUE_LINKING);
8147 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
8148 stmt = gimple_build_cond_empty (t);
8149 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8150 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
8151 e->probability = REG_BR_PROB_BASE * 7 / 8;
8153 else
8154 make_edge (bb, body_bb, EDGE_FALLTHRU);
8155 last_bb = bb;
8158 return collapse_bb;
8162 /* Expand #pragma omp ordered depend(source). */
8164 static void
8165 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
8166 tree *counts, location_t loc)
8168 enum built_in_function source_ix
8169 = fd->iter_type == long_integer_type_node
8170 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
8171 gimple *g
8172 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
8173 build_fold_addr_expr (counts[fd->ordered]));
8174 gimple_set_location (g, loc);
8175 gsi_insert_before (gsi, g, GSI_SAME_STMT);
8178 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
8180 static void
8181 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
8182 tree *counts, tree c, location_t loc)
8184 auto_vec<tree, 10> args;
8185 enum built_in_function sink_ix
8186 = fd->iter_type == long_integer_type_node
8187 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
8188 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
8189 int i;
8190 gimple_stmt_iterator gsi2 = *gsi;
8191 bool warned_step = false;
8193 for (i = 0; i < fd->ordered; i++)
8195 tree step = NULL_TREE;
8196 off = TREE_PURPOSE (deps);
8197 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
8199 step = TREE_OPERAND (off, 1);
8200 off = TREE_OPERAND (off, 0);
8202 if (!integer_zerop (off))
8204 gcc_assert (fd->loops[i].cond_code == LT_EXPR
8205 || fd->loops[i].cond_code == GT_EXPR);
8206 bool forward = fd->loops[i].cond_code == LT_EXPR;
8207 if (step)
8209 /* Non-simple Fortran DO loops. If step is variable,
8210 we don't know at compile even the direction, so can't
8211 warn. */
8212 if (TREE_CODE (step) != INTEGER_CST)
8213 break;
8214 forward = tree_int_cst_sgn (step) != -1;
8216 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8217 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
8218 "lexically later iteration");
8219 break;
8221 deps = TREE_CHAIN (deps);
8223 /* If all offsets corresponding to the collapsed loops are zero,
8224 this depend clause can be ignored. FIXME: but there is still a
8225 flush needed. We need to emit one __sync_synchronize () for it
8226 though (perhaps conditionally)? Solve this together with the
8227 conservative dependence folding optimization.
8228 if (i >= fd->collapse)
8229 return; */
8231 deps = OMP_CLAUSE_DECL (c);
8232 gsi_prev (&gsi2);
8233 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
8234 edge e2 = split_block_after_labels (e1->dest);
8236 gsi2 = gsi_after_labels (e1->dest);
8237 *gsi = gsi_last_bb (e1->src);
8238 for (i = 0; i < fd->ordered; i++)
8240 tree itype = TREE_TYPE (fd->loops[i].v);
8241 tree step = NULL_TREE;
8242 tree orig_off = NULL_TREE;
8243 if (POINTER_TYPE_P (itype))
8244 itype = sizetype;
8245 if (i)
8246 deps = TREE_CHAIN (deps);
8247 off = TREE_PURPOSE (deps);
8248 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
8250 step = TREE_OPERAND (off, 1);
8251 off = TREE_OPERAND (off, 0);
8252 gcc_assert (fd->loops[i].cond_code == LT_EXPR
8253 && integer_onep (fd->loops[i].step)
8254 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
8256 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
8257 if (step)
8259 off = fold_convert_loc (loc, itype, off);
8260 orig_off = off;
8261 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
8264 if (integer_zerop (off))
8265 t = boolean_true_node;
8266 else
8268 tree a;
8269 tree co = fold_convert_loc (loc, itype, off);
8270 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
8272 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8273 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
8274 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
8275 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
8276 co);
8278 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8279 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8280 fd->loops[i].v, co);
8281 else
8282 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
8283 fd->loops[i].v, co);
8284 if (step)
8286 tree t1, t2;
8287 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8288 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
8289 fd->loops[i].n1);
8290 else
8291 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
8292 fd->loops[i].n2);
8293 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8294 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
8295 fd->loops[i].n2);
8296 else
8297 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
8298 fd->loops[i].n1);
8299 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
8300 step, build_int_cst (TREE_TYPE (step), 0));
8301 if (TREE_CODE (step) != INTEGER_CST)
8303 t1 = unshare_expr (t1);
8304 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
8305 false, GSI_CONTINUE_LINKING);
8306 t2 = unshare_expr (t2);
8307 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
8308 false, GSI_CONTINUE_LINKING);
8310 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
8311 t, t2, t1);
8313 else if (fd->loops[i].cond_code == LT_EXPR)
8315 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8316 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
8317 fd->loops[i].n1);
8318 else
8319 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
8320 fd->loops[i].n2);
8322 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8323 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
8324 fd->loops[i].n2);
8325 else
8326 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
8327 fd->loops[i].n1);
8329 if (cond)
8330 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
8331 else
8332 cond = t;
8334 off = fold_convert_loc (loc, itype, off);
8336 if (step
8337 || (fd->loops[i].cond_code == LT_EXPR
8338 ? !integer_onep (fd->loops[i].step)
8339 : !integer_minus_onep (fd->loops[i].step)))
8341 if (step == NULL_TREE
8342 && TYPE_UNSIGNED (itype)
8343 && fd->loops[i].cond_code == GT_EXPR)
8344 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
8345 fold_build1_loc (loc, NEGATE_EXPR, itype,
8346 s));
8347 else
8348 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
8349 orig_off ? orig_off : off, s);
8350 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
8351 build_int_cst (itype, 0));
8352 if (integer_zerop (t) && !warned_step)
8354 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
8355 "in the iteration space");
8356 warned_step = true;
8358 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
8359 cond, t);
8362 if (i <= fd->collapse - 1 && fd->collapse > 1)
8363 t = fd->loop.v;
8364 else if (counts[i])
8365 t = counts[i];
8366 else
8368 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8369 fd->loops[i].v, fd->loops[i].n1);
8370 t = fold_convert_loc (loc, fd->iter_type, t);
8372 if (step)
8373 /* We have divided off by step already earlier. */;
8374 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
8375 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
8376 fold_build1_loc (loc, NEGATE_EXPR, itype,
8377 s));
8378 else
8379 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
8380 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8381 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
8382 off = fold_convert_loc (loc, fd->iter_type, off);
8383 if (i <= fd->collapse - 1 && fd->collapse > 1)
8385 if (i)
8386 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
8387 off);
8388 if (i < fd->collapse - 1)
8390 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
8391 counts[i]);
8392 continue;
8395 off = unshare_expr (off);
8396 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
8397 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
8398 true, GSI_SAME_STMT);
8399 args.safe_push (t);
8401 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
8402 gimple_set_location (g, loc);
8403 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
8405 cond = unshare_expr (cond);
8406 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
8407 GSI_CONTINUE_LINKING);
8408 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
8409 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
8410 e3->probability = REG_BR_PROB_BASE / 8;
8411 e1->probability = REG_BR_PROB_BASE - e3->probability;
8412 e1->flags = EDGE_TRUE_VALUE;
8413 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
8415 *gsi = gsi_after_labels (e2->dest);
8418 /* Expand all #pragma omp ordered depend(source) and
8419 #pragma omp ordered depend(sink:...) constructs in the current
8420 #pragma omp for ordered(n) region. */
8422 static void
8423 expand_omp_ordered_source_sink (struct omp_region *region,
8424 struct omp_for_data *fd, tree *counts,
8425 basic_block cont_bb)
8427 struct omp_region *inner;
8428 int i;
8429 for (i = fd->collapse - 1; i < fd->ordered; i++)
8430 if (i == fd->collapse - 1 && fd->collapse > 1)
8431 counts[i] = NULL_TREE;
8432 else if (i >= fd->collapse && !cont_bb)
8433 counts[i] = build_zero_cst (fd->iter_type);
8434 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
8435 && integer_onep (fd->loops[i].step))
8436 counts[i] = NULL_TREE;
8437 else
8438 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
8439 tree atype
8440 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
8441 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
8442 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
8444 for (inner = region->inner; inner; inner = inner->next)
8445 if (inner->type == GIMPLE_OMP_ORDERED)
8447 gomp_ordered *ord_stmt = inner->ord_stmt;
8448 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
8449 location_t loc = gimple_location (ord_stmt);
8450 tree c;
8451 for (c = gimple_omp_ordered_clauses (ord_stmt);
8452 c; c = OMP_CLAUSE_CHAIN (c))
8453 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
8454 break;
8455 if (c)
8456 expand_omp_ordered_source (&gsi, fd, counts, loc);
8457 for (c = gimple_omp_ordered_clauses (ord_stmt);
8458 c; c = OMP_CLAUSE_CHAIN (c))
8459 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
8460 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
8461 gsi_remove (&gsi, true);
8465 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
8466 collapsed. */
8468 static basic_block
8469 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
8470 basic_block cont_bb, basic_block body_bb,
8471 bool ordered_lastprivate)
8473 if (fd->ordered == fd->collapse)
8474 return cont_bb;
8476 if (!cont_bb)
8478 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8479 for (int i = fd->collapse; i < fd->ordered; i++)
8481 tree type = TREE_TYPE (fd->loops[i].v);
8482 tree n1 = fold_convert (type, fd->loops[i].n1);
8483 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
8484 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8485 size_int (i - fd->collapse + 1),
8486 NULL_TREE, NULL_TREE);
8487 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
8489 return NULL;
8492 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
8494 tree t, type = TREE_TYPE (fd->loops[i].v);
8495 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8496 expand_omp_build_assign (&gsi, fd->loops[i].v,
8497 fold_convert (type, fd->loops[i].n1));
8498 if (counts[i])
8499 expand_omp_build_assign (&gsi, counts[i],
8500 build_zero_cst (fd->iter_type));
8501 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8502 size_int (i - fd->collapse + 1),
8503 NULL_TREE, NULL_TREE);
8504 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
8505 if (!gsi_end_p (gsi))
8506 gsi_prev (&gsi);
8507 else
8508 gsi = gsi_last_bb (body_bb);
8509 edge e1 = split_block (body_bb, gsi_stmt (gsi));
8510 basic_block new_body = e1->dest;
8511 if (body_bb == cont_bb)
8512 cont_bb = new_body;
8513 edge e2 = NULL;
8514 basic_block new_header;
8515 if (EDGE_COUNT (cont_bb->preds) > 0)
8517 gsi = gsi_last_bb (cont_bb);
8518 if (POINTER_TYPE_P (type))
8519 t = fold_build_pointer_plus (fd->loops[i].v,
8520 fold_convert (sizetype,
8521 fd->loops[i].step));
8522 else
8523 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
8524 fold_convert (type, fd->loops[i].step));
8525 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
8526 if (counts[i])
8528 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
8529 build_int_cst (fd->iter_type, 1));
8530 expand_omp_build_assign (&gsi, counts[i], t);
8531 t = counts[i];
8533 else
8535 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8536 fd->loops[i].v, fd->loops[i].n1);
8537 t = fold_convert (fd->iter_type, t);
8538 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8539 true, GSI_SAME_STMT);
8541 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8542 size_int (i - fd->collapse + 1),
8543 NULL_TREE, NULL_TREE);
8544 expand_omp_build_assign (&gsi, aref, t);
8545 gsi_prev (&gsi);
8546 e2 = split_block (cont_bb, gsi_stmt (gsi));
8547 new_header = e2->dest;
8549 else
8550 new_header = cont_bb;
8551 gsi = gsi_after_labels (new_header);
8552 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
8553 true, GSI_SAME_STMT);
8554 tree n2
8555 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
8556 true, NULL_TREE, true, GSI_SAME_STMT);
8557 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
8558 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
8559 edge e3 = split_block (new_header, gsi_stmt (gsi));
8560 cont_bb = e3->dest;
8561 remove_edge (e1);
8562 make_edge (body_bb, new_header, EDGE_FALLTHRU);
8563 e3->flags = EDGE_FALSE_VALUE;
8564 e3->probability = REG_BR_PROB_BASE / 8;
8565 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
8566 e1->probability = REG_BR_PROB_BASE - e3->probability;
8568 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
8569 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
8571 if (e2)
8573 struct loop *loop = alloc_loop ();
8574 loop->header = new_header;
8575 loop->latch = e2->src;
8576 add_loop (loop, body_bb->loop_father);
8580 /* If there are any lastprivate clauses and it is possible some loops
8581 might have zero iterations, ensure all the decls are initialized,
8582 otherwise we could crash evaluating C++ class iterators with lastprivate
8583 clauses. */
8584 bool need_inits = false;
8585 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
8586 if (need_inits)
8588 tree type = TREE_TYPE (fd->loops[i].v);
8589 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8590 expand_omp_build_assign (&gsi, fd->loops[i].v,
8591 fold_convert (type, fd->loops[i].n1));
8593 else
8595 tree type = TREE_TYPE (fd->loops[i].v);
8596 tree this_cond = fold_build2 (fd->loops[i].cond_code,
8597 boolean_type_node,
8598 fold_convert (type, fd->loops[i].n1),
8599 fold_convert (type, fd->loops[i].n2));
8600 if (!integer_onep (this_cond))
8601 need_inits = true;
8604 return cont_bb;
8608 /* A subroutine of expand_omp_for. Generate code for a parallel
8609 loop with any schedule. Given parameters:
8611 for (V = N1; V cond N2; V += STEP) BODY;
8613 where COND is "<" or ">", we generate pseudocode
8615 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
8616 if (more) goto L0; else goto L3;
8618 V = istart0;
8619 iend = iend0;
8621 BODY;
8622 V += STEP;
8623 if (V cond iend) goto L1; else goto L2;
8625 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
8628 If this is a combined omp parallel loop, instead of the call to
8629 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
8630 If this is gimple_omp_for_combined_p loop, then instead of assigning
8631 V and iend in L0 we assign the first two _looptemp_ clause decls of the
8632 inner GIMPLE_OMP_FOR and V += STEP; and
8633 if (V cond iend) goto L1; else goto L2; are removed.
8635 For collapsed loops, given parameters:
8636 collapse(3)
8637 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
8638 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
8639 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
8640 BODY;
8642 we generate pseudocode
8644 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
8645 if (cond3 is <)
8646 adj = STEP3 - 1;
8647 else
8648 adj = STEP3 + 1;
8649 count3 = (adj + N32 - N31) / STEP3;
8650 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
8651 if (cond2 is <)
8652 adj = STEP2 - 1;
8653 else
8654 adj = STEP2 + 1;
8655 count2 = (adj + N22 - N21) / STEP2;
8656 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
8657 if (cond1 is <)
8658 adj = STEP1 - 1;
8659 else
8660 adj = STEP1 + 1;
8661 count1 = (adj + N12 - N11) / STEP1;
8662 count = count1 * count2 * count3;
8663 goto Z1;
8665 count = 0;
8667 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
8668 if (more) goto L0; else goto L3;
8670 V = istart0;
8671 T = V;
8672 V3 = N31 + (T % count3) * STEP3;
8673 T = T / count3;
8674 V2 = N21 + (T % count2) * STEP2;
8675 T = T / count2;
8676 V1 = N11 + T * STEP1;
8677 iend = iend0;
8679 BODY;
8680 V += 1;
8681 if (V < iend) goto L10; else goto L2;
8682 L10:
8683 V3 += STEP3;
8684 if (V3 cond3 N32) goto L1; else goto L11;
8685 L11:
8686 V3 = N31;
8687 V2 += STEP2;
8688 if (V2 cond2 N22) goto L1; else goto L12;
8689 L12:
8690 V2 = N21;
8691 V1 += STEP1;
8692 goto L1;
8694 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
8699 static void
8700 expand_omp_for_generic (struct omp_region *region,
8701 struct omp_for_data *fd,
8702 enum built_in_function start_fn,
8703 enum built_in_function next_fn,
8704 gimple *inner_stmt)
8706 tree type, istart0, iend0, iend;
8707 tree t, vmain, vback, bias = NULL_TREE;
8708 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
8709 basic_block l2_bb = NULL, l3_bb = NULL;
8710 gimple_stmt_iterator gsi;
8711 gassign *assign_stmt;
8712 bool in_combined_parallel = is_combined_parallel (region);
8713 bool broken_loop = region->cont == NULL;
8714 edge e, ne;
8715 tree *counts = NULL;
8716 int i;
8717 bool ordered_lastprivate = false;
8719 gcc_assert (!broken_loop || !in_combined_parallel);
8720 gcc_assert (fd->iter_type == long_integer_type_node
8721 || !in_combined_parallel);
8723 entry_bb = region->entry;
8724 cont_bb = region->cont;
8725 collapse_bb = NULL;
8726 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
8727 gcc_assert (broken_loop
8728 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
8729 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
8730 l1_bb = single_succ (l0_bb);
8731 if (!broken_loop)
8733 l2_bb = create_empty_bb (cont_bb);
8734 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
8735 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
8736 == l1_bb));
8737 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
8739 else
8740 l2_bb = NULL;
8741 l3_bb = BRANCH_EDGE (entry_bb)->dest;
8742 exit_bb = region->exit;
8744 gsi = gsi_last_bb (entry_bb);
8746 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
8747 if (fd->ordered
8748 && find_omp_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
8749 OMP_CLAUSE_LASTPRIVATE))
8750 ordered_lastprivate = false;
8751 if (fd->collapse > 1 || fd->ordered)
8753 int first_zero_iter1 = -1, first_zero_iter2 = -1;
8754 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
8756 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
8757 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
8758 zero_iter1_bb, first_zero_iter1,
8759 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
8761 if (zero_iter1_bb)
8763 /* Some counts[i] vars might be uninitialized if
8764 some loop has zero iterations. But the body shouldn't
8765 be executed in that case, so just avoid uninit warnings. */
8766 for (i = first_zero_iter1;
8767 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
8768 if (SSA_VAR_P (counts[i]))
8769 TREE_NO_WARNING (counts[i]) = 1;
8770 gsi_prev (&gsi);
8771 e = split_block (entry_bb, gsi_stmt (gsi));
8772 entry_bb = e->dest;
8773 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
8774 gsi = gsi_last_bb (entry_bb);
8775 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
8776 get_immediate_dominator (CDI_DOMINATORS,
8777 zero_iter1_bb));
8779 if (zero_iter2_bb)
8781 /* Some counts[i] vars might be uninitialized if
8782 some loop has zero iterations. But the body shouldn't
8783 be executed in that case, so just avoid uninit warnings. */
8784 for (i = first_zero_iter2; i < fd->ordered; i++)
8785 if (SSA_VAR_P (counts[i]))
8786 TREE_NO_WARNING (counts[i]) = 1;
8787 if (zero_iter1_bb)
8788 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
8789 else
8791 gsi_prev (&gsi);
8792 e = split_block (entry_bb, gsi_stmt (gsi));
8793 entry_bb = e->dest;
8794 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
8795 gsi = gsi_last_bb (entry_bb);
8796 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
8797 get_immediate_dominator
8798 (CDI_DOMINATORS, zero_iter2_bb));
8801 if (fd->collapse == 1)
8803 counts[0] = fd->loop.n2;
8804 fd->loop = fd->loops[0];
8808 type = TREE_TYPE (fd->loop.v);
8809 istart0 = create_tmp_var (fd->iter_type, ".istart0");
8810 iend0 = create_tmp_var (fd->iter_type, ".iend0");
8811 TREE_ADDRESSABLE (istart0) = 1;
8812 TREE_ADDRESSABLE (iend0) = 1;
8814 /* See if we need to bias by LLONG_MIN. */
8815 if (fd->iter_type == long_long_unsigned_type_node
8816 && TREE_CODE (type) == INTEGER_TYPE
8817 && !TYPE_UNSIGNED (type)
8818 && fd->ordered == 0)
8820 tree n1, n2;
8822 if (fd->loop.cond_code == LT_EXPR)
8824 n1 = fd->loop.n1;
8825 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
8827 else
8829 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
8830 n2 = fd->loop.n1;
8832 if (TREE_CODE (n1) != INTEGER_CST
8833 || TREE_CODE (n2) != INTEGER_CST
8834 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
8835 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
8838 gimple_stmt_iterator gsif = gsi;
8839 gsi_prev (&gsif);
8841 tree arr = NULL_TREE;
8842 if (in_combined_parallel)
8844 gcc_assert (fd->ordered == 0);
8845 /* In a combined parallel loop, emit a call to
8846 GOMP_loop_foo_next. */
8847 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
8848 build_fold_addr_expr (istart0),
8849 build_fold_addr_expr (iend0));
8851 else
8853 tree t0, t1, t2, t3, t4;
8854 /* If this is not a combined parallel loop, emit a call to
8855 GOMP_loop_foo_start in ENTRY_BB. */
8856 t4 = build_fold_addr_expr (iend0);
8857 t3 = build_fold_addr_expr (istart0);
8858 if (fd->ordered)
8860 t0 = build_int_cst (unsigned_type_node,
8861 fd->ordered - fd->collapse + 1);
8862 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
8863 fd->ordered
8864 - fd->collapse + 1),
8865 ".omp_counts");
8866 DECL_NAMELESS (arr) = 1;
8867 TREE_ADDRESSABLE (arr) = 1;
8868 TREE_STATIC (arr) = 1;
8869 vec<constructor_elt, va_gc> *v;
8870 vec_alloc (v, fd->ordered - fd->collapse + 1);
8871 int idx;
8873 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
8875 tree c;
8876 if (idx == 0 && fd->collapse > 1)
8877 c = fd->loop.n2;
8878 else
8879 c = counts[idx + fd->collapse - 1];
8880 tree purpose = size_int (idx);
8881 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
8882 if (TREE_CODE (c) != INTEGER_CST)
8883 TREE_STATIC (arr) = 0;
8886 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
8887 if (!TREE_STATIC (arr))
8888 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
8889 void_type_node, arr),
8890 true, NULL_TREE, true, GSI_SAME_STMT);
8891 t1 = build_fold_addr_expr (arr);
8892 t2 = NULL_TREE;
8894 else
8896 t2 = fold_convert (fd->iter_type, fd->loop.step);
8897 t1 = fd->loop.n2;
8898 t0 = fd->loop.n1;
8899 if (gimple_omp_for_combined_into_p (fd->for_stmt))
8901 tree innerc
8902 = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
8903 OMP_CLAUSE__LOOPTEMP_);
8904 gcc_assert (innerc);
8905 t0 = OMP_CLAUSE_DECL (innerc);
8906 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
8907 OMP_CLAUSE__LOOPTEMP_);
8908 gcc_assert (innerc);
8909 t1 = OMP_CLAUSE_DECL (innerc);
8911 if (POINTER_TYPE_P (TREE_TYPE (t0))
8912 && TYPE_PRECISION (TREE_TYPE (t0))
8913 != TYPE_PRECISION (fd->iter_type))
8915 /* Avoid casting pointers to integer of a different size. */
8916 tree itype = signed_type_for (type);
8917 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
8918 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
8920 else
8922 t1 = fold_convert (fd->iter_type, t1);
8923 t0 = fold_convert (fd->iter_type, t0);
8925 if (bias)
8927 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
8928 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
8931 if (fd->iter_type == long_integer_type_node || fd->ordered)
8933 if (fd->chunk_size)
8935 t = fold_convert (fd->iter_type, fd->chunk_size);
8936 t = omp_adjust_chunk_size (t, fd->simd_schedule);
8937 if (fd->ordered)
8938 t = build_call_expr (builtin_decl_explicit (start_fn),
8939 5, t0, t1, t, t3, t4);
8940 else
8941 t = build_call_expr (builtin_decl_explicit (start_fn),
8942 6, t0, t1, t2, t, t3, t4);
8944 else if (fd->ordered)
8945 t = build_call_expr (builtin_decl_explicit (start_fn),
8946 4, t0, t1, t3, t4);
8947 else
8948 t = build_call_expr (builtin_decl_explicit (start_fn),
8949 5, t0, t1, t2, t3, t4);
8951 else
8953 tree t5;
8954 tree c_bool_type;
8955 tree bfn_decl;
8957 /* The GOMP_loop_ull_*start functions have additional boolean
8958 argument, true for < loops and false for > loops.
8959 In Fortran, the C bool type can be different from
8960 boolean_type_node. */
8961 bfn_decl = builtin_decl_explicit (start_fn);
8962 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
8963 t5 = build_int_cst (c_bool_type,
8964 fd->loop.cond_code == LT_EXPR ? 1 : 0);
8965 if (fd->chunk_size)
8967 tree bfn_decl = builtin_decl_explicit (start_fn);
8968 t = fold_convert (fd->iter_type, fd->chunk_size);
8969 t = omp_adjust_chunk_size (t, fd->simd_schedule);
8970 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
8972 else
8973 t = build_call_expr (builtin_decl_explicit (start_fn),
8974 6, t5, t0, t1, t2, t3, t4);
8977 if (TREE_TYPE (t) != boolean_type_node)
8978 t = fold_build2 (NE_EXPR, boolean_type_node,
8979 t, build_int_cst (TREE_TYPE (t), 0));
8980 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8981 true, GSI_SAME_STMT);
8982 if (arr && !TREE_STATIC (arr))
8984 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
8985 TREE_THIS_VOLATILE (clobber) = 1;
8986 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
8987 GSI_SAME_STMT);
8989 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
8991 /* Remove the GIMPLE_OMP_FOR statement. */
8992 gsi_remove (&gsi, true);
8994 if (gsi_end_p (gsif))
8995 gsif = gsi_after_labels (gsi_bb (gsif));
8996 gsi_next (&gsif);
8998 /* Iteration setup for sequential loop goes in L0_BB. */
8999 tree startvar = fd->loop.v;
9000 tree endvar = NULL_TREE;
9002 if (gimple_omp_for_combined_p (fd->for_stmt))
9004 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
9005 && gimple_omp_for_kind (inner_stmt)
9006 == GF_OMP_FOR_KIND_SIMD);
9007 tree innerc = find_omp_clause (gimple_omp_for_clauses (inner_stmt),
9008 OMP_CLAUSE__LOOPTEMP_);
9009 gcc_assert (innerc);
9010 startvar = OMP_CLAUSE_DECL (innerc);
9011 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9012 OMP_CLAUSE__LOOPTEMP_);
9013 gcc_assert (innerc);
9014 endvar = OMP_CLAUSE_DECL (innerc);
9017 gsi = gsi_start_bb (l0_bb);
9018 t = istart0;
9019 if (fd->ordered && fd->collapse == 1)
9020 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
9021 fold_convert (fd->iter_type, fd->loop.step));
9022 else if (bias)
9023 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
9024 if (fd->ordered && fd->collapse == 1)
9026 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
9027 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
9028 fd->loop.n1, fold_convert (sizetype, t));
9029 else
9031 t = fold_convert (TREE_TYPE (startvar), t);
9032 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
9033 fd->loop.n1, t);
9036 else
9038 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
9039 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
9040 t = fold_convert (TREE_TYPE (startvar), t);
9042 t = force_gimple_operand_gsi (&gsi, t,
9043 DECL_P (startvar)
9044 && TREE_ADDRESSABLE (startvar),
9045 NULL_TREE, false, GSI_CONTINUE_LINKING);
9046 assign_stmt = gimple_build_assign (startvar, t);
9047 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9049 t = iend0;
9050 if (fd->ordered && fd->collapse == 1)
9051 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
9052 fold_convert (fd->iter_type, fd->loop.step));
9053 else if (bias)
9054 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
9055 if (fd->ordered && fd->collapse == 1)
9057 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
9058 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
9059 fd->loop.n1, fold_convert (sizetype, t));
9060 else
9062 t = fold_convert (TREE_TYPE (startvar), t);
9063 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
9064 fd->loop.n1, t);
9067 else
9069 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
9070 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
9071 t = fold_convert (TREE_TYPE (startvar), t);
9073 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9074 false, GSI_CONTINUE_LINKING);
9075 if (endvar)
9077 assign_stmt = gimple_build_assign (endvar, iend);
9078 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9079 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
9080 assign_stmt = gimple_build_assign (fd->loop.v, iend);
9081 else
9082 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
9083 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9085 /* Handle linear clause adjustments. */
9086 tree itercnt = NULL_TREE;
9087 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
9088 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
9089 c; c = OMP_CLAUSE_CHAIN (c))
9090 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
9091 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
9093 tree d = OMP_CLAUSE_DECL (c);
9094 bool is_ref = is_reference (d);
9095 tree t = d, a, dest;
9096 if (is_ref)
9097 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
9098 tree type = TREE_TYPE (t);
9099 if (POINTER_TYPE_P (type))
9100 type = sizetype;
9101 dest = unshare_expr (t);
9102 tree v = create_tmp_var (TREE_TYPE (t), NULL);
9103 expand_omp_build_assign (&gsif, v, t);
9104 if (itercnt == NULL_TREE)
9106 itercnt = startvar;
9107 tree n1 = fd->loop.n1;
9108 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
9110 itercnt
9111 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
9112 itercnt);
9113 n1 = fold_convert (TREE_TYPE (itercnt), n1);
9115 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
9116 itercnt, n1);
9117 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
9118 itercnt, fd->loop.step);
9119 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
9120 NULL_TREE, false,
9121 GSI_CONTINUE_LINKING);
9123 a = fold_build2 (MULT_EXPR, type,
9124 fold_convert (type, itercnt),
9125 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
9126 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
9127 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
9128 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9129 false, GSI_CONTINUE_LINKING);
9130 assign_stmt = gimple_build_assign (dest, t);
9131 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9133 if (fd->collapse > 1)
9134 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
9136 if (fd->ordered)
9138 /* Until now, counts array contained number of iterations or
9139 variable containing it for ith loop. From now on, we need
9140 those counts only for collapsed loops, and only for the 2nd
9141 till the last collapsed one. Move those one element earlier,
9142 we'll use counts[fd->collapse - 1] for the first source/sink
9143 iteration counter and so on and counts[fd->ordered]
9144 as the array holding the current counter values for
9145 depend(source). */
9146 if (fd->collapse > 1)
9147 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
9148 if (broken_loop)
9150 int i;
9151 for (i = fd->collapse; i < fd->ordered; i++)
9153 tree type = TREE_TYPE (fd->loops[i].v);
9154 tree this_cond
9155 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
9156 fold_convert (type, fd->loops[i].n1),
9157 fold_convert (type, fd->loops[i].n2));
9158 if (!integer_onep (this_cond))
9159 break;
9161 if (i < fd->ordered)
9163 cont_bb
9164 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
9165 add_bb_to_loop (cont_bb, l1_bb->loop_father);
9166 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
9167 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
9168 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9169 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
9170 make_edge (cont_bb, l1_bb, 0);
9171 l2_bb = create_empty_bb (cont_bb);
9172 broken_loop = false;
9175 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
9176 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
9177 ordered_lastprivate);
9178 if (counts[fd->collapse - 1])
9180 gcc_assert (fd->collapse == 1);
9181 gsi = gsi_last_bb (l0_bb);
9182 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
9183 istart0, true);
9184 gsi = gsi_last_bb (cont_bb);
9185 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
9186 build_int_cst (fd->iter_type, 1));
9187 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
9188 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
9189 size_zero_node, NULL_TREE, NULL_TREE);
9190 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
9191 t = counts[fd->collapse - 1];
9193 else if (fd->collapse > 1)
9194 t = fd->loop.v;
9195 else
9197 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
9198 fd->loops[0].v, fd->loops[0].n1);
9199 t = fold_convert (fd->iter_type, t);
9201 gsi = gsi_last_bb (l0_bb);
9202 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
9203 size_zero_node, NULL_TREE, NULL_TREE);
9204 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9205 false, GSI_CONTINUE_LINKING);
9206 expand_omp_build_assign (&gsi, aref, t, true);
9209 if (!broken_loop)
9211 /* Code to control the increment and predicate for the sequential
9212 loop goes in the CONT_BB. */
9213 gsi = gsi_last_bb (cont_bb);
9214 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
9215 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
9216 vmain = gimple_omp_continue_control_use (cont_stmt);
9217 vback = gimple_omp_continue_control_def (cont_stmt);
9219 if (!gimple_omp_for_combined_p (fd->for_stmt))
9221 if (POINTER_TYPE_P (type))
9222 t = fold_build_pointer_plus (vmain, fd->loop.step);
9223 else
9224 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
9225 t = force_gimple_operand_gsi (&gsi, t,
9226 DECL_P (vback)
9227 && TREE_ADDRESSABLE (vback),
9228 NULL_TREE, true, GSI_SAME_STMT);
9229 assign_stmt = gimple_build_assign (vback, t);
9230 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9232 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
9234 if (fd->collapse > 1)
9235 t = fd->loop.v;
9236 else
9238 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
9239 fd->loops[0].v, fd->loops[0].n1);
9240 t = fold_convert (fd->iter_type, t);
9242 tree aref = build4 (ARRAY_REF, fd->iter_type,
9243 counts[fd->ordered], size_zero_node,
9244 NULL_TREE, NULL_TREE);
9245 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9246 true, GSI_SAME_STMT);
9247 expand_omp_build_assign (&gsi, aref, t);
9250 t = build2 (fd->loop.cond_code, boolean_type_node,
9251 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
9252 iend);
9253 gcond *cond_stmt = gimple_build_cond_empty (t);
9254 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9257 /* Remove GIMPLE_OMP_CONTINUE. */
9258 gsi_remove (&gsi, true);
9260 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
9261 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
9263 /* Emit code to get the next parallel iteration in L2_BB. */
9264 gsi = gsi_start_bb (l2_bb);
9266 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
9267 build_fold_addr_expr (istart0),
9268 build_fold_addr_expr (iend0));
9269 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9270 false, GSI_CONTINUE_LINKING);
9271 if (TREE_TYPE (t) != boolean_type_node)
9272 t = fold_build2 (NE_EXPR, boolean_type_node,
9273 t, build_int_cst (TREE_TYPE (t), 0));
9274 gcond *cond_stmt = gimple_build_cond_empty (t);
9275 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
9278 /* Add the loop cleanup function. */
9279 gsi = gsi_last_bb (exit_bb);
9280 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
9281 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
9282 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
9283 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
9284 else
9285 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
9286 gcall *call_stmt = gimple_build_call (t, 0);
9287 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
9288 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
9289 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
9290 if (fd->ordered)
9292 tree arr = counts[fd->ordered];
9293 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
9294 TREE_THIS_VOLATILE (clobber) = 1;
9295 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
9296 GSI_SAME_STMT);
9298 gsi_remove (&gsi, true);
9300 /* Connect the new blocks. */
9301 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
9302 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
9304 if (!broken_loop)
9306 gimple_seq phis;
9308 e = find_edge (cont_bb, l3_bb);
9309 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
9311 phis = phi_nodes (l3_bb);
9312 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
9314 gimple *phi = gsi_stmt (gsi);
9315 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
9316 PHI_ARG_DEF_FROM_EDGE (phi, e));
9318 remove_edge (e);
9320 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
9321 e = find_edge (cont_bb, l1_bb);
9322 if (e == NULL)
9324 e = BRANCH_EDGE (cont_bb);
9325 gcc_assert (single_succ (e->dest) == l1_bb);
9327 if (gimple_omp_for_combined_p (fd->for_stmt))
9329 remove_edge (e);
9330 e = NULL;
9332 else if (fd->collapse > 1)
9334 remove_edge (e);
9335 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
9337 else
9338 e->flags = EDGE_TRUE_VALUE;
9339 if (e)
9341 e->probability = REG_BR_PROB_BASE * 7 / 8;
9342 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
9344 else
9346 e = find_edge (cont_bb, l2_bb);
9347 e->flags = EDGE_FALLTHRU;
9349 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
9351 if (gimple_in_ssa_p (cfun))
9353 /* Add phis to the outer loop that connect to the phis in the inner,
9354 original loop, and move the loop entry value of the inner phi to
9355 the loop entry value of the outer phi. */
9356 gphi_iterator psi;
9357 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
9359 source_location locus;
9360 gphi *nphi;
9361 gphi *exit_phi = psi.phi ();
9363 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
9364 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
9366 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
9367 edge latch_to_l1 = find_edge (latch, l1_bb);
9368 gphi *inner_phi
9369 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
9371 tree t = gimple_phi_result (exit_phi);
9372 tree new_res = copy_ssa_name (t, NULL);
9373 nphi = create_phi_node (new_res, l0_bb);
9375 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
9376 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
9377 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
9378 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
9379 add_phi_arg (nphi, t, entry_to_l0, locus);
9381 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
9382 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
9384 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
9388 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
9389 recompute_dominator (CDI_DOMINATORS, l2_bb));
9390 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
9391 recompute_dominator (CDI_DOMINATORS, l3_bb));
9392 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
9393 recompute_dominator (CDI_DOMINATORS, l0_bb));
9394 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
9395 recompute_dominator (CDI_DOMINATORS, l1_bb));
9397 /* We enter expand_omp_for_generic with a loop. This original loop may
9398 have its own loop struct, or it may be part of an outer loop struct
9399 (which may be the fake loop). */
9400 struct loop *outer_loop = entry_bb->loop_father;
9401 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
9403 add_bb_to_loop (l2_bb, outer_loop);
9405 /* We've added a new loop around the original loop. Allocate the
9406 corresponding loop struct. */
9407 struct loop *new_loop = alloc_loop ();
9408 new_loop->header = l0_bb;
9409 new_loop->latch = l2_bb;
9410 add_loop (new_loop, outer_loop);
9412 /* Allocate a loop structure for the original loop unless we already
9413 had one. */
9414 if (!orig_loop_has_loop_struct
9415 && !gimple_omp_for_combined_p (fd->for_stmt))
9417 struct loop *orig_loop = alloc_loop ();
9418 orig_loop->header = l1_bb;
9419 /* The loop may have multiple latches. */
9420 add_loop (orig_loop, new_loop);
9426 /* A subroutine of expand_omp_for. Generate code for a parallel
9427 loop with static schedule and no specified chunk size. Given
9428 parameters:
9430 for (V = N1; V cond N2; V += STEP) BODY;
9432 where COND is "<" or ">", we generate pseudocode
9434 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
9435 if (cond is <)
9436 adj = STEP - 1;
9437 else
9438 adj = STEP + 1;
9439 if ((__typeof (V)) -1 > 0 && cond is >)
9440 n = -(adj + N2 - N1) / -STEP;
9441 else
9442 n = (adj + N2 - N1) / STEP;
9443 q = n / nthreads;
9444 tt = n % nthreads;
9445 if (threadid < tt) goto L3; else goto L4;
9447 tt = 0;
9448 q = q + 1;
9450 s0 = q * threadid + tt;
9451 e0 = s0 + q;
9452 V = s0 * STEP + N1;
9453 if (s0 >= e0) goto L2; else goto L0;
9455 e = e0 * STEP + N1;
9457 BODY;
9458 V += STEP;
9459 if (V cond e) goto L1;
9463 static void
9464 expand_omp_for_static_nochunk (struct omp_region *region,
9465 struct omp_for_data *fd,
9466 gimple *inner_stmt)
9468 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
9469 tree type, itype, vmain, vback;
9470 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
9471 basic_block body_bb, cont_bb, collapse_bb = NULL;
9472 basic_block fin_bb;
9473 gimple_stmt_iterator gsi;
9474 edge ep;
9475 bool broken_loop = region->cont == NULL;
9476 tree *counts = NULL;
9477 tree n1, n2, step;
9479 itype = type = TREE_TYPE (fd->loop.v);
9480 if (POINTER_TYPE_P (type))
9481 itype = signed_type_for (type);
9483 entry_bb = region->entry;
9484 cont_bb = region->cont;
9485 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
9486 fin_bb = BRANCH_EDGE (entry_bb)->dest;
9487 gcc_assert (broken_loop
9488 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
9489 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
9490 body_bb = single_succ (seq_start_bb);
9491 if (!broken_loop)
9493 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
9494 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
9495 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
9497 exit_bb = region->exit;
9499 /* Iteration space partitioning goes in ENTRY_BB. */
9500 gsi = gsi_last_bb (entry_bb);
9501 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9503 if (fd->collapse > 1)
9505 int first_zero_iter = -1, dummy = -1;
9506 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
9508 counts = XALLOCAVEC (tree, fd->collapse);
9509 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
9510 fin_bb, first_zero_iter,
9511 dummy_bb, dummy, l2_dom_bb);
9512 t = NULL_TREE;
9514 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
9515 t = integer_one_node;
9516 else
9517 t = fold_binary (fd->loop.cond_code, boolean_type_node,
9518 fold_convert (type, fd->loop.n1),
9519 fold_convert (type, fd->loop.n2));
9520 if (fd->collapse == 1
9521 && TYPE_UNSIGNED (type)
9522 && (t == NULL_TREE || !integer_onep (t)))
9524 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
9525 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
9526 true, GSI_SAME_STMT);
9527 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
9528 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
9529 true, GSI_SAME_STMT);
9530 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
9531 NULL_TREE, NULL_TREE);
9532 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9533 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
9534 expand_omp_regimplify_p, NULL, NULL)
9535 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
9536 expand_omp_regimplify_p, NULL, NULL))
9538 gsi = gsi_for_stmt (cond_stmt);
9539 gimple_regimplify_operands (cond_stmt, &gsi);
9541 ep = split_block (entry_bb, cond_stmt);
9542 ep->flags = EDGE_TRUE_VALUE;
9543 entry_bb = ep->dest;
9544 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
9545 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
9546 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
9547 if (gimple_in_ssa_p (cfun))
9549 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
9550 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
9551 !gsi_end_p (gpi); gsi_next (&gpi))
9553 gphi *phi = gpi.phi ();
9554 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
9555 ep, UNKNOWN_LOCATION);
9558 gsi = gsi_last_bb (entry_bb);
9561 switch (gimple_omp_for_kind (fd->for_stmt))
9563 case GF_OMP_FOR_KIND_FOR:
9564 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
9565 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
9566 break;
9567 case GF_OMP_FOR_KIND_DISTRIBUTE:
9568 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
9569 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
9570 break;
9571 default:
9572 gcc_unreachable ();
9574 nthreads = build_call_expr (nthreads, 0);
9575 nthreads = fold_convert (itype, nthreads);
9576 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
9577 true, GSI_SAME_STMT);
9578 threadid = build_call_expr (threadid, 0);
9579 threadid = fold_convert (itype, threadid);
9580 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
9581 true, GSI_SAME_STMT);
9583 n1 = fd->loop.n1;
9584 n2 = fd->loop.n2;
9585 step = fd->loop.step;
9586 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9588 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
9589 OMP_CLAUSE__LOOPTEMP_);
9590 gcc_assert (innerc);
9591 n1 = OMP_CLAUSE_DECL (innerc);
9592 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9593 OMP_CLAUSE__LOOPTEMP_);
9594 gcc_assert (innerc);
9595 n2 = OMP_CLAUSE_DECL (innerc);
9597 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
9598 true, NULL_TREE, true, GSI_SAME_STMT);
9599 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
9600 true, NULL_TREE, true, GSI_SAME_STMT);
9601 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
9602 true, NULL_TREE, true, GSI_SAME_STMT);
9604 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
9605 t = fold_build2 (PLUS_EXPR, itype, step, t);
9606 t = fold_build2 (PLUS_EXPR, itype, t, n2);
9607 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
9608 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
9609 t = fold_build2 (TRUNC_DIV_EXPR, itype,
9610 fold_build1 (NEGATE_EXPR, itype, t),
9611 fold_build1 (NEGATE_EXPR, itype, step));
9612 else
9613 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
9614 t = fold_convert (itype, t);
9615 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9617 q = create_tmp_reg (itype, "q");
9618 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
9619 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
9620 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
9622 tt = create_tmp_reg (itype, "tt");
9623 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
9624 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
9625 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
9627 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
9628 gcond *cond_stmt = gimple_build_cond_empty (t);
9629 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9631 second_bb = split_block (entry_bb, cond_stmt)->dest;
9632 gsi = gsi_last_bb (second_bb);
9633 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9635 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
9636 GSI_SAME_STMT);
9637 gassign *assign_stmt
9638 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
9639 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9641 third_bb = split_block (second_bb, assign_stmt)->dest;
9642 gsi = gsi_last_bb (third_bb);
9643 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9645 t = build2 (MULT_EXPR, itype, q, threadid);
9646 t = build2 (PLUS_EXPR, itype, t, tt);
9647 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9649 t = fold_build2 (PLUS_EXPR, itype, s0, q);
9650 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9652 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
9653 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
9655 /* Remove the GIMPLE_OMP_FOR statement. */
9656 gsi_remove (&gsi, true);
9658 /* Setup code for sequential iteration goes in SEQ_START_BB. */
9659 gsi = gsi_start_bb (seq_start_bb);
9661 tree startvar = fd->loop.v;
9662 tree endvar = NULL_TREE;
9664 if (gimple_omp_for_combined_p (fd->for_stmt))
9666 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
9667 ? gimple_omp_parallel_clauses (inner_stmt)
9668 : gimple_omp_for_clauses (inner_stmt);
9669 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
9670 gcc_assert (innerc);
9671 startvar = OMP_CLAUSE_DECL (innerc);
9672 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9673 OMP_CLAUSE__LOOPTEMP_);
9674 gcc_assert (innerc);
9675 endvar = OMP_CLAUSE_DECL (innerc);
9676 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
9677 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
9679 int i;
9680 for (i = 1; i < fd->collapse; i++)
9682 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9683 OMP_CLAUSE__LOOPTEMP_);
9684 gcc_assert (innerc);
9686 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9687 OMP_CLAUSE__LOOPTEMP_);
9688 if (innerc)
9690 /* If needed (distribute parallel for with lastprivate),
9691 propagate down the total number of iterations. */
9692 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
9693 fd->loop.n2);
9694 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
9695 GSI_CONTINUE_LINKING);
9696 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
9697 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9701 t = fold_convert (itype, s0);
9702 t = fold_build2 (MULT_EXPR, itype, t, step);
9703 if (POINTER_TYPE_P (type))
9704 t = fold_build_pointer_plus (n1, t);
9705 else
9706 t = fold_build2 (PLUS_EXPR, type, t, n1);
9707 t = fold_convert (TREE_TYPE (startvar), t);
9708 t = force_gimple_operand_gsi (&gsi, t,
9709 DECL_P (startvar)
9710 && TREE_ADDRESSABLE (startvar),
9711 NULL_TREE, false, GSI_CONTINUE_LINKING);
9712 assign_stmt = gimple_build_assign (startvar, t);
9713 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9715 t = fold_convert (itype, e0);
9716 t = fold_build2 (MULT_EXPR, itype, t, step);
9717 if (POINTER_TYPE_P (type))
9718 t = fold_build_pointer_plus (n1, t);
9719 else
9720 t = fold_build2 (PLUS_EXPR, type, t, n1);
9721 t = fold_convert (TREE_TYPE (startvar), t);
9722 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9723 false, GSI_CONTINUE_LINKING);
9724 if (endvar)
9726 assign_stmt = gimple_build_assign (endvar, e);
9727 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9728 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
9729 assign_stmt = gimple_build_assign (fd->loop.v, e);
9730 else
9731 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
9732 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9734 /* Handle linear clause adjustments. */
9735 tree itercnt = NULL_TREE;
9736 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
9737 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
9738 c; c = OMP_CLAUSE_CHAIN (c))
9739 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
9740 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
9742 tree d = OMP_CLAUSE_DECL (c);
9743 bool is_ref = is_reference (d);
9744 tree t = d, a, dest;
9745 if (is_ref)
9746 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
9747 if (itercnt == NULL_TREE)
9749 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9751 itercnt = fold_build2 (MINUS_EXPR, itype,
9752 fold_convert (itype, n1),
9753 fold_convert (itype, fd->loop.n1));
9754 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
9755 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
9756 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
9757 NULL_TREE, false,
9758 GSI_CONTINUE_LINKING);
9760 else
9761 itercnt = s0;
9763 tree type = TREE_TYPE (t);
9764 if (POINTER_TYPE_P (type))
9765 type = sizetype;
9766 a = fold_build2 (MULT_EXPR, type,
9767 fold_convert (type, itercnt),
9768 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
9769 dest = unshare_expr (t);
9770 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
9771 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
9772 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9773 false, GSI_CONTINUE_LINKING);
9774 assign_stmt = gimple_build_assign (dest, t);
9775 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9777 if (fd->collapse > 1)
9778 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
9780 if (!broken_loop)
9782 /* The code controlling the sequential loop replaces the
9783 GIMPLE_OMP_CONTINUE. */
9784 gsi = gsi_last_bb (cont_bb);
9785 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
9786 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
9787 vmain = gimple_omp_continue_control_use (cont_stmt);
9788 vback = gimple_omp_continue_control_def (cont_stmt);
9790 if (!gimple_omp_for_combined_p (fd->for_stmt))
9792 if (POINTER_TYPE_P (type))
9793 t = fold_build_pointer_plus (vmain, step);
9794 else
9795 t = fold_build2 (PLUS_EXPR, type, vmain, step);
9796 t = force_gimple_operand_gsi (&gsi, t,
9797 DECL_P (vback)
9798 && TREE_ADDRESSABLE (vback),
9799 NULL_TREE, true, GSI_SAME_STMT);
9800 assign_stmt = gimple_build_assign (vback, t);
9801 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9803 t = build2 (fd->loop.cond_code, boolean_type_node,
9804 DECL_P (vback) && TREE_ADDRESSABLE (vback)
9805 ? t : vback, e);
9806 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
9809 /* Remove the GIMPLE_OMP_CONTINUE statement. */
9810 gsi_remove (&gsi, true);
9812 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
9813 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
9816 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
9817 gsi = gsi_last_bb (exit_bb);
9818 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
9820 t = gimple_omp_return_lhs (gsi_stmt (gsi));
9821 gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
9823 gsi_remove (&gsi, true);
9825 /* Connect all the blocks. */
9826 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
9827 ep->probability = REG_BR_PROB_BASE / 4 * 3;
9828 ep = find_edge (entry_bb, second_bb);
9829 ep->flags = EDGE_TRUE_VALUE;
9830 ep->probability = REG_BR_PROB_BASE / 4;
9831 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
9832 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
9834 if (!broken_loop)
9836 ep = find_edge (cont_bb, body_bb);
9837 if (ep == NULL)
9839 ep = BRANCH_EDGE (cont_bb);
9840 gcc_assert (single_succ (ep->dest) == body_bb);
9842 if (gimple_omp_for_combined_p (fd->for_stmt))
9844 remove_edge (ep);
9845 ep = NULL;
9847 else if (fd->collapse > 1)
9849 remove_edge (ep);
9850 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
9852 else
9853 ep->flags = EDGE_TRUE_VALUE;
9854 find_edge (cont_bb, fin_bb)->flags
9855 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
9858 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
9859 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
9860 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
9862 set_immediate_dominator (CDI_DOMINATORS, body_bb,
9863 recompute_dominator (CDI_DOMINATORS, body_bb));
9864 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
9865 recompute_dominator (CDI_DOMINATORS, fin_bb));
9867 struct loop *loop = body_bb->loop_father;
9868 if (loop != entry_bb->loop_father)
9870 gcc_assert (broken_loop || loop->header == body_bb);
9871 gcc_assert (broken_loop
9872 || loop->latch == region->cont
9873 || single_pred (loop->latch) == region->cont);
9874 return;
9877 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
9879 loop = alloc_loop ();
9880 loop->header = body_bb;
9881 if (collapse_bb == NULL)
9882 loop->latch = cont_bb;
9883 add_loop (loop, body_bb->loop_father);
9887 /* Return phi in E->DEST with ARG on edge E. */
9889 static gphi *
9890 find_phi_with_arg_on_edge (tree arg, edge e)
9892 basic_block bb = e->dest;
9894 for (gphi_iterator gpi = gsi_start_phis (bb);
9895 !gsi_end_p (gpi);
9896 gsi_next (&gpi))
9898 gphi *phi = gpi.phi ();
9899 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
9900 return phi;
9903 return NULL;
9906 /* A subroutine of expand_omp_for. Generate code for a parallel
9907 loop with static schedule and a specified chunk size. Given
9908 parameters:
9910 for (V = N1; V cond N2; V += STEP) BODY;
9912 where COND is "<" or ">", we generate pseudocode
9914 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
9915 if (cond is <)
9916 adj = STEP - 1;
9917 else
9918 adj = STEP + 1;
9919 if ((__typeof (V)) -1 > 0 && cond is >)
9920 n = -(adj + N2 - N1) / -STEP;
9921 else
9922 n = (adj + N2 - N1) / STEP;
9923 trip = 0;
9924 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
9925 here so that V is defined
9926 if the loop is not entered
9928 s0 = (trip * nthreads + threadid) * CHUNK;
9929 e0 = min(s0 + CHUNK, n);
9930 if (s0 < n) goto L1; else goto L4;
9932 V = s0 * STEP + N1;
9933 e = e0 * STEP + N1;
9935 BODY;
9936 V += STEP;
9937 if (V cond e) goto L2; else goto L3;
9939 trip += 1;
9940 goto L0;
9944 static void
9945 expand_omp_for_static_chunk (struct omp_region *region,
9946 struct omp_for_data *fd, gimple *inner_stmt)
9948 tree n, s0, e0, e, t;
9949 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
9950 tree type, itype, vmain, vback, vextra;
9951 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
9952 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
9953 gimple_stmt_iterator gsi;
9954 edge se;
9955 bool broken_loop = region->cont == NULL;
9956 tree *counts = NULL;
9957 tree n1, n2, step;
9959 itype = type = TREE_TYPE (fd->loop.v);
9960 if (POINTER_TYPE_P (type))
9961 itype = signed_type_for (type);
9963 entry_bb = region->entry;
9964 se = split_block (entry_bb, last_stmt (entry_bb));
9965 entry_bb = se->src;
9966 iter_part_bb = se->dest;
9967 cont_bb = region->cont;
9968 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
9969 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
9970 gcc_assert (broken_loop
9971 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
9972 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
9973 body_bb = single_succ (seq_start_bb);
9974 if (!broken_loop)
9976 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
9977 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
9978 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
9979 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
9981 exit_bb = region->exit;
9983 /* Trip and adjustment setup goes in ENTRY_BB. */
9984 gsi = gsi_last_bb (entry_bb);
9985 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9987 if (fd->collapse > 1)
9989 int first_zero_iter = -1, dummy = -1;
9990 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
9992 counts = XALLOCAVEC (tree, fd->collapse);
9993 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
9994 fin_bb, first_zero_iter,
9995 dummy_bb, dummy, l2_dom_bb);
9996 t = NULL_TREE;
9998 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
9999 t = integer_one_node;
10000 else
10001 t = fold_binary (fd->loop.cond_code, boolean_type_node,
10002 fold_convert (type, fd->loop.n1),
10003 fold_convert (type, fd->loop.n2));
10004 if (fd->collapse == 1
10005 && TYPE_UNSIGNED (type)
10006 && (t == NULL_TREE || !integer_onep (t)))
10008 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
10009 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
10010 true, GSI_SAME_STMT);
10011 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
10012 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
10013 true, GSI_SAME_STMT);
10014 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
10015 NULL_TREE, NULL_TREE);
10016 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
10017 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
10018 expand_omp_regimplify_p, NULL, NULL)
10019 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
10020 expand_omp_regimplify_p, NULL, NULL))
10022 gsi = gsi_for_stmt (cond_stmt);
10023 gimple_regimplify_operands (cond_stmt, &gsi);
10025 se = split_block (entry_bb, cond_stmt);
10026 se->flags = EDGE_TRUE_VALUE;
10027 entry_bb = se->dest;
10028 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
10029 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
10030 se->probability = REG_BR_PROB_BASE / 2000 - 1;
10031 if (gimple_in_ssa_p (cfun))
10033 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
10034 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
10035 !gsi_end_p (gpi); gsi_next (&gpi))
10037 gphi *phi = gpi.phi ();
10038 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
10039 se, UNKNOWN_LOCATION);
10042 gsi = gsi_last_bb (entry_bb);
10045 switch (gimple_omp_for_kind (fd->for_stmt))
10047 case GF_OMP_FOR_KIND_FOR:
10048 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
10049 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
10050 break;
10051 case GF_OMP_FOR_KIND_DISTRIBUTE:
10052 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
10053 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
10054 break;
10055 default:
10056 gcc_unreachable ();
10058 nthreads = build_call_expr (nthreads, 0);
10059 nthreads = fold_convert (itype, nthreads);
10060 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
10061 true, GSI_SAME_STMT);
10062 threadid = build_call_expr (threadid, 0);
10063 threadid = fold_convert (itype, threadid);
10064 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
10065 true, GSI_SAME_STMT);
10067 n1 = fd->loop.n1;
10068 n2 = fd->loop.n2;
10069 step = fd->loop.step;
10070 if (gimple_omp_for_combined_into_p (fd->for_stmt))
10072 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10073 OMP_CLAUSE__LOOPTEMP_);
10074 gcc_assert (innerc);
10075 n1 = OMP_CLAUSE_DECL (innerc);
10076 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10077 OMP_CLAUSE__LOOPTEMP_);
10078 gcc_assert (innerc);
10079 n2 = OMP_CLAUSE_DECL (innerc);
10081 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
10082 true, NULL_TREE, true, GSI_SAME_STMT);
10083 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
10084 true, NULL_TREE, true, GSI_SAME_STMT);
10085 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
10086 true, NULL_TREE, true, GSI_SAME_STMT);
10087 tree chunk_size = fold_convert (itype, fd->chunk_size);
10088 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
10089 chunk_size
10090 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
10091 GSI_SAME_STMT);
10093 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
10094 t = fold_build2 (PLUS_EXPR, itype, step, t);
10095 t = fold_build2 (PLUS_EXPR, itype, t, n2);
10096 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
10097 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
10098 t = fold_build2 (TRUNC_DIV_EXPR, itype,
10099 fold_build1 (NEGATE_EXPR, itype, t),
10100 fold_build1 (NEGATE_EXPR, itype, step));
10101 else
10102 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
10103 t = fold_convert (itype, t);
10104 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10105 true, GSI_SAME_STMT);
10107 trip_var = create_tmp_reg (itype, ".trip");
10108 if (gimple_in_ssa_p (cfun))
10110 trip_init = make_ssa_name (trip_var);
10111 trip_main = make_ssa_name (trip_var);
10112 trip_back = make_ssa_name (trip_var);
10114 else
10116 trip_init = trip_var;
10117 trip_main = trip_var;
10118 trip_back = trip_var;
10121 gassign *assign_stmt
10122 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
10123 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
10125 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
10126 t = fold_build2 (MULT_EXPR, itype, t, step);
10127 if (POINTER_TYPE_P (type))
10128 t = fold_build_pointer_plus (n1, t);
10129 else
10130 t = fold_build2 (PLUS_EXPR, type, t, n1);
10131 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10132 true, GSI_SAME_STMT);
10134 /* Remove the GIMPLE_OMP_FOR. */
10135 gsi_remove (&gsi, true);
10137 gimple_stmt_iterator gsif = gsi;
10139 /* Iteration space partitioning goes in ITER_PART_BB. */
10140 gsi = gsi_last_bb (iter_part_bb);
10142 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
10143 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
10144 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
10145 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10146 false, GSI_CONTINUE_LINKING);
10148 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
10149 t = fold_build2 (MIN_EXPR, itype, t, n);
10150 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10151 false, GSI_CONTINUE_LINKING);
10153 t = build2 (LT_EXPR, boolean_type_node, s0, n);
10154 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
10156 /* Setup code for sequential iteration goes in SEQ_START_BB. */
10157 gsi = gsi_start_bb (seq_start_bb);
10159 tree startvar = fd->loop.v;
10160 tree endvar = NULL_TREE;
10162 if (gimple_omp_for_combined_p (fd->for_stmt))
10164 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
10165 ? gimple_omp_parallel_clauses (inner_stmt)
10166 : gimple_omp_for_clauses (inner_stmt);
10167 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
10168 gcc_assert (innerc);
10169 startvar = OMP_CLAUSE_DECL (innerc);
10170 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10171 OMP_CLAUSE__LOOPTEMP_);
10172 gcc_assert (innerc);
10173 endvar = OMP_CLAUSE_DECL (innerc);
10174 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
10175 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
10177 int i;
10178 for (i = 1; i < fd->collapse; i++)
10180 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10181 OMP_CLAUSE__LOOPTEMP_);
10182 gcc_assert (innerc);
10184 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10185 OMP_CLAUSE__LOOPTEMP_);
10186 if (innerc)
10188 /* If needed (distribute parallel for with lastprivate),
10189 propagate down the total number of iterations. */
10190 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
10191 fd->loop.n2);
10192 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
10193 GSI_CONTINUE_LINKING);
10194 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
10195 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10200 t = fold_convert (itype, s0);
10201 t = fold_build2 (MULT_EXPR, itype, t, step);
10202 if (POINTER_TYPE_P (type))
10203 t = fold_build_pointer_plus (n1, t);
10204 else
10205 t = fold_build2 (PLUS_EXPR, type, t, n1);
10206 t = fold_convert (TREE_TYPE (startvar), t);
10207 t = force_gimple_operand_gsi (&gsi, t,
10208 DECL_P (startvar)
10209 && TREE_ADDRESSABLE (startvar),
10210 NULL_TREE, false, GSI_CONTINUE_LINKING);
10211 assign_stmt = gimple_build_assign (startvar, t);
10212 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10214 t = fold_convert (itype, e0);
10215 t = fold_build2 (MULT_EXPR, itype, t, step);
10216 if (POINTER_TYPE_P (type))
10217 t = fold_build_pointer_plus (n1, t);
10218 else
10219 t = fold_build2 (PLUS_EXPR, type, t, n1);
10220 t = fold_convert (TREE_TYPE (startvar), t);
10221 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10222 false, GSI_CONTINUE_LINKING);
10223 if (endvar)
10225 assign_stmt = gimple_build_assign (endvar, e);
10226 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10227 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
10228 assign_stmt = gimple_build_assign (fd->loop.v, e);
10229 else
10230 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
10231 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10233 /* Handle linear clause adjustments. */
10234 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
10235 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
10236 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
10237 c; c = OMP_CLAUSE_CHAIN (c))
10238 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
10239 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
10241 tree d = OMP_CLAUSE_DECL (c);
10242 bool is_ref = is_reference (d);
10243 tree t = d, a, dest;
10244 if (is_ref)
10245 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
10246 tree type = TREE_TYPE (t);
10247 if (POINTER_TYPE_P (type))
10248 type = sizetype;
10249 dest = unshare_expr (t);
10250 tree v = create_tmp_var (TREE_TYPE (t), NULL);
10251 expand_omp_build_assign (&gsif, v, t);
10252 if (itercnt == NULL_TREE)
10254 if (gimple_omp_for_combined_into_p (fd->for_stmt))
10256 itercntbias
10257 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
10258 fold_convert (itype, fd->loop.n1));
10259 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
10260 itercntbias, step);
10261 itercntbias
10262 = force_gimple_operand_gsi (&gsif, itercntbias, true,
10263 NULL_TREE, true,
10264 GSI_SAME_STMT);
10265 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
10266 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
10267 NULL_TREE, false,
10268 GSI_CONTINUE_LINKING);
10270 else
10271 itercnt = s0;
10273 a = fold_build2 (MULT_EXPR, type,
10274 fold_convert (type, itercnt),
10275 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
10276 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
10277 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
10278 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10279 false, GSI_CONTINUE_LINKING);
10280 assign_stmt = gimple_build_assign (dest, t);
10281 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10283 if (fd->collapse > 1)
10284 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
10286 if (!broken_loop)
10288 /* The code controlling the sequential loop goes in CONT_BB,
10289 replacing the GIMPLE_OMP_CONTINUE. */
10290 gsi = gsi_last_bb (cont_bb);
10291 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
10292 vmain = gimple_omp_continue_control_use (cont_stmt);
10293 vback = gimple_omp_continue_control_def (cont_stmt);
10295 if (!gimple_omp_for_combined_p (fd->for_stmt))
10297 if (POINTER_TYPE_P (type))
10298 t = fold_build_pointer_plus (vmain, step);
10299 else
10300 t = fold_build2 (PLUS_EXPR, type, vmain, step);
10301 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
10302 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10303 true, GSI_SAME_STMT);
10304 assign_stmt = gimple_build_assign (vback, t);
10305 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
10307 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
10308 t = build2 (EQ_EXPR, boolean_type_node,
10309 build_int_cst (itype, 0),
10310 build_int_cst (itype, 1));
10311 else
10312 t = build2 (fd->loop.cond_code, boolean_type_node,
10313 DECL_P (vback) && TREE_ADDRESSABLE (vback)
10314 ? t : vback, e);
10315 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
10318 /* Remove GIMPLE_OMP_CONTINUE. */
10319 gsi_remove (&gsi, true);
10321 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
10322 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
10324 /* Trip update code goes into TRIP_UPDATE_BB. */
10325 gsi = gsi_start_bb (trip_update_bb);
10327 t = build_int_cst (itype, 1);
10328 t = build2 (PLUS_EXPR, itype, trip_main, t);
10329 assign_stmt = gimple_build_assign (trip_back, t);
10330 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10333 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
10334 gsi = gsi_last_bb (exit_bb);
10335 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
10337 t = gimple_omp_return_lhs (gsi_stmt (gsi));
10338 gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
10340 gsi_remove (&gsi, true);
10342 /* Connect the new blocks. */
10343 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
10344 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
10346 if (!broken_loop)
10348 se = find_edge (cont_bb, body_bb);
10349 if (se == NULL)
10351 se = BRANCH_EDGE (cont_bb);
10352 gcc_assert (single_succ (se->dest) == body_bb);
10354 if (gimple_omp_for_combined_p (fd->for_stmt))
10356 remove_edge (se);
10357 se = NULL;
10359 else if (fd->collapse > 1)
10361 remove_edge (se);
10362 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
10364 else
10365 se->flags = EDGE_TRUE_VALUE;
10366 find_edge (cont_bb, trip_update_bb)->flags
10367 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
10369 redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
10372 if (gimple_in_ssa_p (cfun))
10374 gphi_iterator psi;
10375 gphi *phi;
10376 edge re, ene;
10377 edge_var_map *vm;
10378 size_t i;
10380 gcc_assert (fd->collapse == 1 && !broken_loop);
10382 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
10383 remove arguments of the phi nodes in fin_bb. We need to create
10384 appropriate phi nodes in iter_part_bb instead. */
10385 se = find_edge (iter_part_bb, fin_bb);
10386 re = single_succ_edge (trip_update_bb);
10387 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
10388 ene = single_succ_edge (entry_bb);
10390 psi = gsi_start_phis (fin_bb);
10391 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
10392 gsi_next (&psi), ++i)
10394 gphi *nphi;
10395 source_location locus;
10397 phi = psi.phi ();
10398 t = gimple_phi_result (phi);
10399 gcc_assert (t == redirect_edge_var_map_result (vm));
10401 if (!single_pred_p (fin_bb))
10402 t = copy_ssa_name (t, phi);
10404 nphi = create_phi_node (t, iter_part_bb);
10406 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
10407 locus = gimple_phi_arg_location_from_edge (phi, se);
10409 /* A special case -- fd->loop.v is not yet computed in
10410 iter_part_bb, we need to use vextra instead. */
10411 if (t == fd->loop.v)
10412 t = vextra;
10413 add_phi_arg (nphi, t, ene, locus);
10414 locus = redirect_edge_var_map_location (vm);
10415 tree back_arg = redirect_edge_var_map_def (vm);
10416 add_phi_arg (nphi, back_arg, re, locus);
10417 edge ce = find_edge (cont_bb, body_bb);
10418 if (ce == NULL)
10420 ce = BRANCH_EDGE (cont_bb);
10421 gcc_assert (single_succ (ce->dest) == body_bb);
10422 ce = single_succ_edge (ce->dest);
10424 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
10425 gcc_assert (inner_loop_phi != NULL);
10426 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
10427 find_edge (seq_start_bb, body_bb), locus);
10429 if (!single_pred_p (fin_bb))
10430 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
10432 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
10433 redirect_edge_var_map_clear (re);
10434 if (single_pred_p (fin_bb))
10435 while (1)
10437 psi = gsi_start_phis (fin_bb);
10438 if (gsi_end_p (psi))
10439 break;
10440 remove_phi_node (&psi, false);
10443 /* Make phi node for trip. */
10444 phi = create_phi_node (trip_main, iter_part_bb);
10445 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
10446 UNKNOWN_LOCATION);
10447 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
10448 UNKNOWN_LOCATION);
10451 if (!broken_loop)
10452 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
10453 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
10454 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
10455 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
10456 recompute_dominator (CDI_DOMINATORS, fin_bb));
10457 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
10458 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
10459 set_immediate_dominator (CDI_DOMINATORS, body_bb,
10460 recompute_dominator (CDI_DOMINATORS, body_bb));
10462 if (!broken_loop)
10464 struct loop *loop = body_bb->loop_father;
10465 struct loop *trip_loop = alloc_loop ();
10466 trip_loop->header = iter_part_bb;
10467 trip_loop->latch = trip_update_bb;
10468 add_loop (trip_loop, iter_part_bb->loop_father);
10470 if (loop != entry_bb->loop_father)
10472 gcc_assert (loop->header == body_bb);
10473 gcc_assert (loop->latch == region->cont
10474 || single_pred (loop->latch) == region->cont);
10475 trip_loop->inner = loop;
10476 return;
10479 if (!gimple_omp_for_combined_p (fd->for_stmt))
10481 loop = alloc_loop ();
10482 loop->header = body_bb;
10483 if (collapse_bb == NULL)
10484 loop->latch = cont_bb;
10485 add_loop (loop, trip_loop);
10490 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
10491 Given parameters:
10492 for (V = N1; V cond N2; V += STEP) BODY;
10494 where COND is "<" or ">" or "!=", we generate pseudocode
10496 for (ind_var = low; ind_var < high; ind_var++)
10498 V = n1 + (ind_var * STEP)
10500 <BODY>
10503 In the above pseudocode, low and high are function parameters of the
10504 child function. In the function below, we are inserting a temp.
10505 variable that will be making a call to two OMP functions that will not be
10506 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
10507 with _Cilk_for). These functions are replaced with low and high
10508 by the function that handles taskreg. */
10511 static void
10512 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
10514 bool broken_loop = region->cont == NULL;
10515 basic_block entry_bb = region->entry;
10516 basic_block cont_bb = region->cont;
10518 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10519 gcc_assert (broken_loop
10520 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10521 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
10522 basic_block l1_bb, l2_bb;
10524 if (!broken_loop)
10526 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
10527 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10528 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
10529 l2_bb = BRANCH_EDGE (entry_bb)->dest;
10531 else
10533 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
10534 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
10535 l2_bb = single_succ (l1_bb);
10537 basic_block exit_bb = region->exit;
10538 basic_block l2_dom_bb = NULL;
10540 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
10542 /* Below statements until the "tree high_val = ..." are pseudo statements
10543 used to pass information to be used by expand_omp_taskreg.
10544 low_val and high_val will be replaced by the __low and __high
10545 parameter from the child function.
10547 The call_exprs part is a place-holder, it is mainly used
10548 to distinctly identify to the top-level part that this is
10549 where we should put low and high (reasoning given in header
10550 comment). */
10552 tree child_fndecl
10553 = gimple_omp_parallel_child_fn (
10554 as_a <gomp_parallel *> (last_stmt (region->outer->entry)));
10555 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
10556 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
10558 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
10559 high_val = t;
10560 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
10561 low_val = t;
10563 gcc_assert (low_val && high_val);
10565 tree type = TREE_TYPE (low_val);
10566 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
10567 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10569 /* Not needed in SSA form right now. */
10570 gcc_assert (!gimple_in_ssa_p (cfun));
10571 if (l2_dom_bb == NULL)
10572 l2_dom_bb = l1_bb;
10574 tree n1 = low_val;
10575 tree n2 = high_val;
10577 gimple *stmt = gimple_build_assign (ind_var, n1);
10579 /* Replace the GIMPLE_OMP_FOR statement. */
10580 gsi_replace (&gsi, stmt, true);
10582 if (!broken_loop)
10584 /* Code to control the increment goes in the CONT_BB. */
10585 gsi = gsi_last_bb (cont_bb);
10586 stmt = gsi_stmt (gsi);
10587 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
10588 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
10589 build_one_cst (type));
10591 /* Replace GIMPLE_OMP_CONTINUE. */
10592 gsi_replace (&gsi, stmt, true);
10595 /* Emit the condition in L1_BB. */
10596 gsi = gsi_after_labels (l1_bb);
10597 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
10598 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
10599 fd->loop.step);
10600 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
10601 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
10602 fd->loop.n1, fold_convert (sizetype, t));
10603 else
10604 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
10605 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
10606 t = fold_convert (TREE_TYPE (fd->loop.v), t);
10607 expand_omp_build_assign (&gsi, fd->loop.v, t);
10609 /* The condition is always '<' since the runtime will fill in the low
10610 and high values. */
10611 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
10612 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10614 /* Remove GIMPLE_OMP_RETURN. */
10615 gsi = gsi_last_bb (exit_bb);
10616 gsi_remove (&gsi, true);
10618 /* Connect the new blocks. */
10619 remove_edge (FALLTHRU_EDGE (entry_bb));
10621 edge e, ne;
10622 if (!broken_loop)
10624 remove_edge (BRANCH_EDGE (entry_bb));
10625 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
10627 e = BRANCH_EDGE (l1_bb);
10628 ne = FALLTHRU_EDGE (l1_bb);
10629 e->flags = EDGE_TRUE_VALUE;
10631 else
10633 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
10635 ne = single_succ_edge (l1_bb);
10636 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
10639 ne->flags = EDGE_FALSE_VALUE;
10640 e->probability = REG_BR_PROB_BASE * 7 / 8;
10641 ne->probability = REG_BR_PROB_BASE / 8;
10643 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
10644 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
10645 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
10647 if (!broken_loop)
10649 struct loop *loop = alloc_loop ();
10650 loop->header = l1_bb;
10651 loop->latch = cont_bb;
10652 add_loop (loop, l1_bb->loop_father);
10653 loop->safelen = INT_MAX;
10656 /* Pick the correct library function based on the precision of the
10657 induction variable type. */
10658 tree lib_fun = NULL_TREE;
10659 if (TYPE_PRECISION (type) == 32)
10660 lib_fun = cilk_for_32_fndecl;
10661 else if (TYPE_PRECISION (type) == 64)
10662 lib_fun = cilk_for_64_fndecl;
10663 else
10664 gcc_unreachable ();
10666 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
10668 /* WS_ARGS contains the library function flavor to call:
10669 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
10670 user-defined grain value. If the user does not define one, then zero
10671 is passed in by the parser. */
10672 vec_alloc (region->ws_args, 2);
10673 region->ws_args->quick_push (lib_fun);
10674 region->ws_args->quick_push (fd->chunk_size);
10677 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
10678 loop. Given parameters:
10680 for (V = N1; V cond N2; V += STEP) BODY;
10682 where COND is "<" or ">", we generate pseudocode
10684 V = N1;
10685 goto L1;
10687 BODY;
10688 V += STEP;
10690 if (V cond N2) goto L0; else goto L2;
10693 For collapsed loops, given parameters:
10694 collapse(3)
10695 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
10696 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
10697 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
10698 BODY;
10700 we generate pseudocode
10702 if (cond3 is <)
10703 adj = STEP3 - 1;
10704 else
10705 adj = STEP3 + 1;
10706 count3 = (adj + N32 - N31) / STEP3;
10707 if (cond2 is <)
10708 adj = STEP2 - 1;
10709 else
10710 adj = STEP2 + 1;
10711 count2 = (adj + N22 - N21) / STEP2;
10712 if (cond1 is <)
10713 adj = STEP1 - 1;
10714 else
10715 adj = STEP1 + 1;
10716 count1 = (adj + N12 - N11) / STEP1;
10717 count = count1 * count2 * count3;
10718 V = 0;
10719 V1 = N11;
10720 V2 = N21;
10721 V3 = N31;
10722 goto L1;
10724 BODY;
10725 V += 1;
10726 V3 += STEP3;
10727 V2 += (V3 cond3 N32) ? 0 : STEP2;
10728 V3 = (V3 cond3 N32) ? V3 : N31;
10729 V1 += (V2 cond2 N22) ? 0 : STEP1;
10730 V2 = (V2 cond2 N22) ? V2 : N21;
10732 if (V < count) goto L0; else goto L2;
10737 static void
10738 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
10740 tree type, t;
10741 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
10742 gimple_stmt_iterator gsi;
10743 gimple *stmt;
10744 gcond *cond_stmt;
10745 bool broken_loop = region->cont == NULL;
10746 edge e, ne;
10747 tree *counts = NULL;
10748 int i;
10749 int safelen_int = INT_MAX;
10750 tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10751 OMP_CLAUSE_SAFELEN);
10752 tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10753 OMP_CLAUSE__SIMDUID_);
10754 tree n1, n2;
10756 if (safelen)
10758 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
10759 if (TREE_CODE (safelen) != INTEGER_CST)
10760 safelen_int = 0;
10761 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
10762 safelen_int = tree_to_uhwi (safelen);
10763 if (safelen_int == 1)
10764 safelen_int = 0;
10766 type = TREE_TYPE (fd->loop.v);
10767 entry_bb = region->entry;
10768 cont_bb = region->cont;
10769 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10770 gcc_assert (broken_loop
10771 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10772 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
10773 if (!broken_loop)
10775 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
10776 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10777 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
10778 l2_bb = BRANCH_EDGE (entry_bb)->dest;
10780 else
10782 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
10783 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
10784 l2_bb = single_succ (l1_bb);
10786 exit_bb = region->exit;
10787 l2_dom_bb = NULL;
10789 gsi = gsi_last_bb (entry_bb);
10791 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10792 /* Not needed in SSA form right now. */
10793 gcc_assert (!gimple_in_ssa_p (cfun));
10794 if (fd->collapse > 1)
10796 int first_zero_iter = -1, dummy = -1;
10797 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
10799 counts = XALLOCAVEC (tree, fd->collapse);
10800 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
10801 zero_iter_bb, first_zero_iter,
10802 dummy_bb, dummy, l2_dom_bb);
10804 if (l2_dom_bb == NULL)
10805 l2_dom_bb = l1_bb;
10807 n1 = fd->loop.n1;
10808 n2 = fd->loop.n2;
10809 if (gimple_omp_for_combined_into_p (fd->for_stmt))
10811 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10812 OMP_CLAUSE__LOOPTEMP_);
10813 gcc_assert (innerc);
10814 n1 = OMP_CLAUSE_DECL (innerc);
10815 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10816 OMP_CLAUSE__LOOPTEMP_);
10817 gcc_assert (innerc);
10818 n2 = OMP_CLAUSE_DECL (innerc);
10820 tree step = fd->loop.step;
10822 bool is_simt = (safelen_int > 1
10823 && find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10824 OMP_CLAUSE__SIMT_));
10825 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
10826 if (is_simt)
10828 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
10829 simt_lane = create_tmp_var (unsigned_type_node);
10830 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
10831 gimple_call_set_lhs (g, simt_lane);
10832 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10833 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
10834 fold_convert (TREE_TYPE (step), simt_lane));
10835 n1 = fold_convert (type, n1);
10836 if (POINTER_TYPE_P (type))
10837 n1 = fold_build_pointer_plus (n1, offset);
10838 else
10839 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
10841 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
10842 if (fd->collapse > 1)
10843 simt_maxlane = build_one_cst (unsigned_type_node);
10844 else if (safelen_int < omp_max_simt_vf ())
10845 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
10846 tree vf
10847 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
10848 unsigned_type_node, 0);
10849 if (simt_maxlane)
10850 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
10851 vf = fold_convert (TREE_TYPE (step), vf);
10852 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
10855 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
10856 if (fd->collapse > 1)
10858 if (gimple_omp_for_combined_into_p (fd->for_stmt))
10860 gsi_prev (&gsi);
10861 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
10862 gsi_next (&gsi);
10864 else
10865 for (i = 0; i < fd->collapse; i++)
10867 tree itype = TREE_TYPE (fd->loops[i].v);
10868 if (POINTER_TYPE_P (itype))
10869 itype = signed_type_for (itype);
10870 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
10871 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10875 /* Remove the GIMPLE_OMP_FOR statement. */
10876 gsi_remove (&gsi, true);
10878 if (!broken_loop)
10880 /* Code to control the increment goes in the CONT_BB. */
10881 gsi = gsi_last_bb (cont_bb);
10882 stmt = gsi_stmt (gsi);
10883 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
10885 if (POINTER_TYPE_P (type))
10886 t = fold_build_pointer_plus (fd->loop.v, step);
10887 else
10888 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
10889 expand_omp_build_assign (&gsi, fd->loop.v, t);
10891 if (fd->collapse > 1)
10893 i = fd->collapse - 1;
10894 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
10896 t = fold_convert (sizetype, fd->loops[i].step);
10897 t = fold_build_pointer_plus (fd->loops[i].v, t);
10899 else
10901 t = fold_convert (TREE_TYPE (fd->loops[i].v),
10902 fd->loops[i].step);
10903 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
10904 fd->loops[i].v, t);
10906 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10908 for (i = fd->collapse - 1; i > 0; i--)
10910 tree itype = TREE_TYPE (fd->loops[i].v);
10911 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
10912 if (POINTER_TYPE_P (itype2))
10913 itype2 = signed_type_for (itype2);
10914 t = build3 (COND_EXPR, itype2,
10915 build2 (fd->loops[i].cond_code, boolean_type_node,
10916 fd->loops[i].v,
10917 fold_convert (itype, fd->loops[i].n2)),
10918 build_int_cst (itype2, 0),
10919 fold_convert (itype2, fd->loops[i - 1].step));
10920 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
10921 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
10922 else
10923 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
10924 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
10926 t = build3 (COND_EXPR, itype,
10927 build2 (fd->loops[i].cond_code, boolean_type_node,
10928 fd->loops[i].v,
10929 fold_convert (itype, fd->loops[i].n2)),
10930 fd->loops[i].v,
10931 fold_convert (itype, fd->loops[i].n1));
10932 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10936 /* Remove GIMPLE_OMP_CONTINUE. */
10937 gsi_remove (&gsi, true);
10940 /* Emit the condition in L1_BB. */
10941 gsi = gsi_start_bb (l1_bb);
10943 t = fold_convert (type, n2);
10944 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10945 false, GSI_CONTINUE_LINKING);
10946 tree v = fd->loop.v;
10947 if (DECL_P (v) && TREE_ADDRESSABLE (v))
10948 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
10949 false, GSI_CONTINUE_LINKING);
10950 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
10951 cond_stmt = gimple_build_cond_empty (t);
10952 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
10953 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
10954 NULL, NULL)
10955 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
10956 NULL, NULL))
10958 gsi = gsi_for_stmt (cond_stmt);
10959 gimple_regimplify_operands (cond_stmt, &gsi);
10962 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
10963 if (is_simt)
10965 gsi = gsi_start_bb (l2_bb);
10966 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
10967 if (POINTER_TYPE_P (type))
10968 t = fold_build_pointer_plus (fd->loop.v, step);
10969 else
10970 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
10971 expand_omp_build_assign (&gsi, fd->loop.v, t);
10974 /* Remove GIMPLE_OMP_RETURN. */
10975 gsi = gsi_last_bb (exit_bb);
10976 gsi_remove (&gsi, true);
10978 /* Connect the new blocks. */
10979 remove_edge (FALLTHRU_EDGE (entry_bb));
10981 if (!broken_loop)
10983 remove_edge (BRANCH_EDGE (entry_bb));
10984 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
10986 e = BRANCH_EDGE (l1_bb);
10987 ne = FALLTHRU_EDGE (l1_bb);
10988 e->flags = EDGE_TRUE_VALUE;
10990 else
10992 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
10994 ne = single_succ_edge (l1_bb);
10995 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
10998 ne->flags = EDGE_FALSE_VALUE;
10999 e->probability = REG_BR_PROB_BASE * 7 / 8;
11000 ne->probability = REG_BR_PROB_BASE / 8;
11002 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
11003 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
11005 if (simt_maxlane)
11007 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
11008 NULL_TREE, NULL_TREE);
11009 gsi = gsi_last_bb (entry_bb);
11010 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
11011 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
11012 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
11013 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
11014 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
11015 l2_dom_bb = entry_bb;
11017 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
11019 if (!broken_loop)
11021 struct loop *loop = alloc_loop ();
11022 loop->header = l1_bb;
11023 loop->latch = cont_bb;
11024 add_loop (loop, l1_bb->loop_father);
11025 loop->safelen = safelen_int;
11026 if (simduid)
11028 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
11029 cfun->has_simduid_loops = true;
11031 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
11032 the loop. */
11033 if ((flag_tree_loop_vectorize
11034 || (!global_options_set.x_flag_tree_loop_vectorize
11035 && !global_options_set.x_flag_tree_vectorize))
11036 && flag_tree_loop_optimize
11037 && loop->safelen > 1)
11039 loop->force_vectorize = true;
11040 cfun->has_force_vectorize_loops = true;
11043 else if (simduid)
11044 cfun->has_simduid_loops = true;
11047 /* Taskloop construct is represented after gimplification with
11048 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
11049 in between them. This routine expands the outer GIMPLE_OMP_FOR,
11050 which should just compute all the needed loop temporaries
11051 for GIMPLE_OMP_TASK. */
11053 static void
11054 expand_omp_taskloop_for_outer (struct omp_region *region,
11055 struct omp_for_data *fd,
11056 gimple *inner_stmt)
11058 tree type, bias = NULL_TREE;
11059 basic_block entry_bb, cont_bb, exit_bb;
11060 gimple_stmt_iterator gsi;
11061 gassign *assign_stmt;
11062 tree *counts = NULL;
11063 int i;
11065 gcc_assert (inner_stmt);
11066 gcc_assert (region->cont);
11067 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
11068 && gimple_omp_task_taskloop_p (inner_stmt));
11069 type = TREE_TYPE (fd->loop.v);
11071 /* See if we need to bias by LLONG_MIN. */
11072 if (fd->iter_type == long_long_unsigned_type_node
11073 && TREE_CODE (type) == INTEGER_TYPE
11074 && !TYPE_UNSIGNED (type))
11076 tree n1, n2;
11078 if (fd->loop.cond_code == LT_EXPR)
11080 n1 = fd->loop.n1;
11081 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
11083 else
11085 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
11086 n2 = fd->loop.n1;
11088 if (TREE_CODE (n1) != INTEGER_CST
11089 || TREE_CODE (n2) != INTEGER_CST
11090 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
11091 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
11094 entry_bb = region->entry;
11095 cont_bb = region->cont;
11096 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
11097 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
11098 exit_bb = region->exit;
11100 gsi = gsi_last_bb (entry_bb);
11101 gimple *for_stmt = gsi_stmt (gsi);
11102 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
11103 if (fd->collapse > 1)
11105 int first_zero_iter = -1, dummy = -1;
11106 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
11108 counts = XALLOCAVEC (tree, fd->collapse);
11109 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
11110 zero_iter_bb, first_zero_iter,
11111 dummy_bb, dummy, l2_dom_bb);
11113 if (zero_iter_bb)
11115 /* Some counts[i] vars might be uninitialized if
11116 some loop has zero iterations. But the body shouldn't
11117 be executed in that case, so just avoid uninit warnings. */
11118 for (i = first_zero_iter; i < fd->collapse; i++)
11119 if (SSA_VAR_P (counts[i]))
11120 TREE_NO_WARNING (counts[i]) = 1;
11121 gsi_prev (&gsi);
11122 edge e = split_block (entry_bb, gsi_stmt (gsi));
11123 entry_bb = e->dest;
11124 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
11125 gsi = gsi_last_bb (entry_bb);
11126 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
11127 get_immediate_dominator (CDI_DOMINATORS,
11128 zero_iter_bb));
11132 tree t0, t1;
11133 t1 = fd->loop.n2;
11134 t0 = fd->loop.n1;
11135 if (POINTER_TYPE_P (TREE_TYPE (t0))
11136 && TYPE_PRECISION (TREE_TYPE (t0))
11137 != TYPE_PRECISION (fd->iter_type))
11139 /* Avoid casting pointers to integer of a different size. */
11140 tree itype = signed_type_for (type);
11141 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
11142 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
11144 else
11146 t1 = fold_convert (fd->iter_type, t1);
11147 t0 = fold_convert (fd->iter_type, t0);
11149 if (bias)
11151 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
11152 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
11155 tree innerc = find_omp_clause (gimple_omp_task_clauses (inner_stmt),
11156 OMP_CLAUSE__LOOPTEMP_);
11157 gcc_assert (innerc);
11158 tree startvar = OMP_CLAUSE_DECL (innerc);
11159 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
11160 gcc_assert (innerc);
11161 tree endvar = OMP_CLAUSE_DECL (innerc);
11162 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
11164 gcc_assert (innerc);
11165 for (i = 1; i < fd->collapse; i++)
11167 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
11168 OMP_CLAUSE__LOOPTEMP_);
11169 gcc_assert (innerc);
11171 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
11172 OMP_CLAUSE__LOOPTEMP_);
11173 if (innerc)
11175 /* If needed (inner taskloop has lastprivate clause), propagate
11176 down the total number of iterations. */
11177 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
11178 NULL_TREE, false,
11179 GSI_CONTINUE_LINKING);
11180 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
11181 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11185 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
11186 GSI_CONTINUE_LINKING);
11187 assign_stmt = gimple_build_assign (startvar, t0);
11188 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11190 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
11191 GSI_CONTINUE_LINKING);
11192 assign_stmt = gimple_build_assign (endvar, t1);
11193 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11194 if (fd->collapse > 1)
11195 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
11197 /* Remove the GIMPLE_OMP_FOR statement. */
11198 gsi = gsi_for_stmt (for_stmt);
11199 gsi_remove (&gsi, true);
11201 gsi = gsi_last_bb (cont_bb);
11202 gsi_remove (&gsi, true);
11204 gsi = gsi_last_bb (exit_bb);
11205 gsi_remove (&gsi, true);
11207 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
11208 remove_edge (BRANCH_EDGE (entry_bb));
11209 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
11210 remove_edge (BRANCH_EDGE (cont_bb));
11211 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
11212 set_immediate_dominator (CDI_DOMINATORS, region->entry,
11213 recompute_dominator (CDI_DOMINATORS, region->entry));
11216 /* Taskloop construct is represented after gimplification with
11217 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
11218 in between them. This routine expands the inner GIMPLE_OMP_FOR.
11219 GOMP_taskloop{,_ull} function arranges for each task to be given just
11220 a single range of iterations. */
11222 static void
11223 expand_omp_taskloop_for_inner (struct omp_region *region,
11224 struct omp_for_data *fd,
11225 gimple *inner_stmt)
11227 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
11228 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
11229 basic_block fin_bb;
11230 gimple_stmt_iterator gsi;
11231 edge ep;
11232 bool broken_loop = region->cont == NULL;
11233 tree *counts = NULL;
11234 tree n1, n2, step;
11236 itype = type = TREE_TYPE (fd->loop.v);
11237 if (POINTER_TYPE_P (type))
11238 itype = signed_type_for (type);
11240 /* See if we need to bias by LLONG_MIN. */
11241 if (fd->iter_type == long_long_unsigned_type_node
11242 && TREE_CODE (type) == INTEGER_TYPE
11243 && !TYPE_UNSIGNED (type))
11245 tree n1, n2;
11247 if (fd->loop.cond_code == LT_EXPR)
11249 n1 = fd->loop.n1;
11250 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
11252 else
11254 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
11255 n2 = fd->loop.n1;
11257 if (TREE_CODE (n1) != INTEGER_CST
11258 || TREE_CODE (n2) != INTEGER_CST
11259 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
11260 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
11263 entry_bb = region->entry;
11264 cont_bb = region->cont;
11265 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
11266 fin_bb = BRANCH_EDGE (entry_bb)->dest;
11267 gcc_assert (broken_loop
11268 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
11269 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
11270 if (!broken_loop)
11272 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
11273 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
11275 exit_bb = region->exit;
11277 /* Iteration space partitioning goes in ENTRY_BB. */
11278 gsi = gsi_last_bb (entry_bb);
11279 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
11281 if (fd->collapse > 1)
11283 int first_zero_iter = -1, dummy = -1;
11284 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
11286 counts = XALLOCAVEC (tree, fd->collapse);
11287 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
11288 fin_bb, first_zero_iter,
11289 dummy_bb, dummy, l2_dom_bb);
11290 t = NULL_TREE;
11292 else
11293 t = integer_one_node;
11295 step = fd->loop.step;
11296 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
11297 OMP_CLAUSE__LOOPTEMP_);
11298 gcc_assert (innerc);
11299 n1 = OMP_CLAUSE_DECL (innerc);
11300 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
11301 gcc_assert (innerc);
11302 n2 = OMP_CLAUSE_DECL (innerc);
11303 if (bias)
11305 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
11306 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
11308 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
11309 true, NULL_TREE, true, GSI_SAME_STMT);
11310 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
11311 true, NULL_TREE, true, GSI_SAME_STMT);
11312 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
11313 true, NULL_TREE, true, GSI_SAME_STMT);
11315 tree startvar = fd->loop.v;
11316 tree endvar = NULL_TREE;
11318 if (gimple_omp_for_combined_p (fd->for_stmt))
11320 tree clauses = gimple_omp_for_clauses (inner_stmt);
11321 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
11322 gcc_assert (innerc);
11323 startvar = OMP_CLAUSE_DECL (innerc);
11324 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
11325 OMP_CLAUSE__LOOPTEMP_);
11326 gcc_assert (innerc);
11327 endvar = OMP_CLAUSE_DECL (innerc);
11329 t = fold_convert (TREE_TYPE (startvar), n1);
11330 t = force_gimple_operand_gsi (&gsi, t,
11331 DECL_P (startvar)
11332 && TREE_ADDRESSABLE (startvar),
11333 NULL_TREE, false, GSI_CONTINUE_LINKING);
11334 gimple *assign_stmt = gimple_build_assign (startvar, t);
11335 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11337 t = fold_convert (TREE_TYPE (startvar), n2);
11338 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
11339 false, GSI_CONTINUE_LINKING);
11340 if (endvar)
11342 assign_stmt = gimple_build_assign (endvar, e);
11343 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11344 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
11345 assign_stmt = gimple_build_assign (fd->loop.v, e);
11346 else
11347 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
11348 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11350 if (fd->collapse > 1)
11351 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
11353 if (!broken_loop)
11355 /* The code controlling the sequential loop replaces the
11356 GIMPLE_OMP_CONTINUE. */
11357 gsi = gsi_last_bb (cont_bb);
11358 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11359 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
11360 vmain = gimple_omp_continue_control_use (cont_stmt);
11361 vback = gimple_omp_continue_control_def (cont_stmt);
11363 if (!gimple_omp_for_combined_p (fd->for_stmt))
11365 if (POINTER_TYPE_P (type))
11366 t = fold_build_pointer_plus (vmain, step);
11367 else
11368 t = fold_build2 (PLUS_EXPR, type, vmain, step);
11369 t = force_gimple_operand_gsi (&gsi, t,
11370 DECL_P (vback)
11371 && TREE_ADDRESSABLE (vback),
11372 NULL_TREE, true, GSI_SAME_STMT);
11373 assign_stmt = gimple_build_assign (vback, t);
11374 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
11376 t = build2 (fd->loop.cond_code, boolean_type_node,
11377 DECL_P (vback) && TREE_ADDRESSABLE (vback)
11378 ? t : vback, e);
11379 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
11382 /* Remove the GIMPLE_OMP_CONTINUE statement. */
11383 gsi_remove (&gsi, true);
11385 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
11386 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
11389 /* Remove the GIMPLE_OMP_FOR statement. */
11390 gsi = gsi_for_stmt (fd->for_stmt);
11391 gsi_remove (&gsi, true);
11393 /* Remove the GIMPLE_OMP_RETURN statement. */
11394 gsi = gsi_last_bb (exit_bb);
11395 gsi_remove (&gsi, true);
11397 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
11398 if (!broken_loop)
11399 remove_edge (BRANCH_EDGE (entry_bb));
11400 else
11402 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
11403 region->outer->cont = NULL;
11406 /* Connect all the blocks. */
11407 if (!broken_loop)
11409 ep = find_edge (cont_bb, body_bb);
11410 if (gimple_omp_for_combined_p (fd->for_stmt))
11412 remove_edge (ep);
11413 ep = NULL;
11415 else if (fd->collapse > 1)
11417 remove_edge (ep);
11418 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
11420 else
11421 ep->flags = EDGE_TRUE_VALUE;
11422 find_edge (cont_bb, fin_bb)->flags
11423 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
11426 set_immediate_dominator (CDI_DOMINATORS, body_bb,
11427 recompute_dominator (CDI_DOMINATORS, body_bb));
11428 if (!broken_loop)
11429 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
11430 recompute_dominator (CDI_DOMINATORS, fin_bb));
11432 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
11434 struct loop *loop = alloc_loop ();
11435 loop->header = body_bb;
11436 if (collapse_bb == NULL)
11437 loop->latch = cont_bb;
11438 add_loop (loop, body_bb->loop_father);
11442 /* A subroutine of expand_omp_for. Generate code for an OpenACC
11443 partitioned loop. The lowering here is abstracted, in that the
11444 loop parameters are passed through internal functions, which are
11445 further lowered by oacc_device_lower, once we get to the target
11446 compiler. The loop is of the form:
11448 for (V = B; V LTGT E; V += S) {BODY}
11450 where LTGT is < or >. We may have a specified chunking size, CHUNKING
11451 (constant 0 for no chunking) and we will have a GWV partitioning
11452 mask, specifying dimensions over which the loop is to be
11453 partitioned (see note below). We generate code that looks like:
11455 <entry_bb> [incoming FALL->body, BRANCH->exit]
11456 typedef signedintify (typeof (V)) T; // underlying signed integral type
11457 T range = E - B;
11458 T chunk_no = 0;
11459 T DIR = LTGT == '<' ? +1 : -1;
11460 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
11461 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
11463 <head_bb> [created by splitting end of entry_bb]
11464 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
11465 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
11466 if (!(offset LTGT bound)) goto bottom_bb;
11468 <body_bb> [incoming]
11469 V = B + offset;
11470 {BODY}
11472 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
11473 offset += step;
11474 if (offset LTGT bound) goto body_bb; [*]
11476 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
11477 chunk_no++;
11478 if (chunk < chunk_max) goto head_bb;
11480 <exit_bb> [incoming]
11481 V = B + ((range -/+ 1) / S +/- 1) * S [*]
11483 [*] Needed if V live at end of loop
11485 Note: CHUNKING & GWV mask are specified explicitly here. This is a
11486 transition, and will be specified by a more general mechanism shortly.
11489 static void
11490 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
11492 tree v = fd->loop.v;
11493 enum tree_code cond_code = fd->loop.cond_code;
11494 enum tree_code plus_code = PLUS_EXPR;
11496 tree chunk_size = integer_minus_one_node;
11497 tree gwv = integer_zero_node;
11498 tree iter_type = TREE_TYPE (v);
11499 tree diff_type = iter_type;
11500 tree plus_type = iter_type;
11501 struct oacc_collapse *counts = NULL;
11503 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
11504 == GF_OMP_FOR_KIND_OACC_LOOP);
11505 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
11506 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
11508 if (POINTER_TYPE_P (iter_type))
11510 plus_code = POINTER_PLUS_EXPR;
11511 plus_type = sizetype;
11513 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
11514 diff_type = signed_type_for (diff_type);
11516 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
11517 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
11518 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
11519 basic_block bottom_bb = NULL;
11521 /* entry_bb has two sucessors; the branch edge is to the exit
11522 block, fallthrough edge to body. */
11523 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
11524 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
11526 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
11527 body_bb, or to a block whose only successor is the body_bb. Its
11528 fallthrough successor is the final block (same as the branch
11529 successor of the entry_bb). */
11530 if (cont_bb)
11532 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
11533 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
11535 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
11536 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
11538 else
11539 gcc_assert (!gimple_in_ssa_p (cfun));
11541 /* The exit block only has entry_bb and cont_bb as predecessors. */
11542 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
11544 tree chunk_no;
11545 tree chunk_max = NULL_TREE;
11546 tree bound, offset;
11547 tree step = create_tmp_var (diff_type, ".step");
11548 bool up = cond_code == LT_EXPR;
11549 tree dir = build_int_cst (diff_type, up ? +1 : -1);
11550 bool chunking = !gimple_in_ssa_p (cfun);;
11551 bool negating;
11553 /* SSA instances. */
11554 tree offset_incr = NULL_TREE;
11555 tree offset_init = NULL_TREE;
11557 gimple_stmt_iterator gsi;
11558 gassign *ass;
11559 gcall *call;
11560 gimple *stmt;
11561 tree expr;
11562 location_t loc;
11563 edge split, be, fte;
11565 /* Split the end of entry_bb to create head_bb. */
11566 split = split_block (entry_bb, last_stmt (entry_bb));
11567 basic_block head_bb = split->dest;
11568 entry_bb = split->src;
11570 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
11571 gsi = gsi_last_bb (entry_bb);
11572 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
11573 loc = gimple_location (for_stmt);
11575 if (gimple_in_ssa_p (cfun))
11577 offset_init = gimple_omp_for_index (for_stmt, 0);
11578 gcc_assert (integer_zerop (fd->loop.n1));
11579 /* The SSA parallelizer does gang parallelism. */
11580 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
11583 if (fd->collapse > 1)
11585 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
11586 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
11587 TREE_TYPE (fd->loop.n2));
11589 if (SSA_VAR_P (fd->loop.n2))
11591 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
11592 true, GSI_SAME_STMT);
11593 ass = gimple_build_assign (fd->loop.n2, total);
11594 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11599 tree b = fd->loop.n1;
11600 tree e = fd->loop.n2;
11601 tree s = fd->loop.step;
11603 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
11604 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
11606 /* Convert the step, avoiding possible unsigned->signed overflow. */
11607 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
11608 if (negating)
11609 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
11610 s = fold_convert (diff_type, s);
11611 if (negating)
11612 s = fold_build1 (NEGATE_EXPR, diff_type, s);
11613 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
11615 if (!chunking)
11616 chunk_size = integer_zero_node;
11617 expr = fold_convert (diff_type, chunk_size);
11618 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
11619 NULL_TREE, true, GSI_SAME_STMT);
11620 /* Determine the range, avoiding possible unsigned->signed overflow. */
11621 negating = !up && TYPE_UNSIGNED (iter_type);
11622 expr = fold_build2 (MINUS_EXPR, plus_type,
11623 fold_convert (plus_type, negating ? b : e),
11624 fold_convert (plus_type, negating ? e : b));
11625 expr = fold_convert (diff_type, expr);
11626 if (negating)
11627 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
11628 tree range = force_gimple_operand_gsi (&gsi, expr, true,
11629 NULL_TREE, true, GSI_SAME_STMT);
11631 chunk_no = build_int_cst (diff_type, 0);
11632 if (chunking)
11634 gcc_assert (!gimple_in_ssa_p (cfun));
11636 expr = chunk_no;
11637 chunk_max = create_tmp_var (diff_type, ".chunk_max");
11638 chunk_no = create_tmp_var (diff_type, ".chunk_no");
11640 ass = gimple_build_assign (chunk_no, expr);
11641 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11643 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
11644 build_int_cst (integer_type_node,
11645 IFN_GOACC_LOOP_CHUNKS),
11646 dir, range, s, chunk_size, gwv);
11647 gimple_call_set_lhs (call, chunk_max);
11648 gimple_set_location (call, loc);
11649 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
11651 else
11652 chunk_size = chunk_no;
11654 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
11655 build_int_cst (integer_type_node,
11656 IFN_GOACC_LOOP_STEP),
11657 dir, range, s, chunk_size, gwv);
11658 gimple_call_set_lhs (call, step);
11659 gimple_set_location (call, loc);
11660 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
11662 /* Remove the GIMPLE_OMP_FOR. */
11663 gsi_remove (&gsi, true);
11665 /* Fixup edges from head_bb */
11666 be = BRANCH_EDGE (head_bb);
11667 fte = FALLTHRU_EDGE (head_bb);
11668 be->flags |= EDGE_FALSE_VALUE;
11669 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
11671 basic_block body_bb = fte->dest;
11673 if (gimple_in_ssa_p (cfun))
11675 gsi = gsi_last_bb (cont_bb);
11676 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11678 offset = gimple_omp_continue_control_use (cont_stmt);
11679 offset_incr = gimple_omp_continue_control_def (cont_stmt);
11681 else
11683 offset = create_tmp_var (diff_type, ".offset");
11684 offset_init = offset_incr = offset;
11686 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
11688 /* Loop offset & bound go into head_bb. */
11689 gsi = gsi_start_bb (head_bb);
11691 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
11692 build_int_cst (integer_type_node,
11693 IFN_GOACC_LOOP_OFFSET),
11694 dir, range, s,
11695 chunk_size, gwv, chunk_no);
11696 gimple_call_set_lhs (call, offset_init);
11697 gimple_set_location (call, loc);
11698 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
11700 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
11701 build_int_cst (integer_type_node,
11702 IFN_GOACC_LOOP_BOUND),
11703 dir, range, s,
11704 chunk_size, gwv, offset_init);
11705 gimple_call_set_lhs (call, bound);
11706 gimple_set_location (call, loc);
11707 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
11709 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
11710 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
11711 GSI_CONTINUE_LINKING);
11713 /* V assignment goes into body_bb. */
11714 if (!gimple_in_ssa_p (cfun))
11716 gsi = gsi_start_bb (body_bb);
11718 expr = build2 (plus_code, iter_type, b,
11719 fold_convert (plus_type, offset));
11720 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11721 true, GSI_SAME_STMT);
11722 ass = gimple_build_assign (v, expr);
11723 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11724 if (fd->collapse > 1)
11725 expand_oacc_collapse_vars (fd, &gsi, counts, v);
11728 /* Loop increment goes into cont_bb. If this is not a loop, we
11729 will have spawned threads as if it was, and each one will
11730 execute one iteration. The specification is not explicit about
11731 whether such constructs are ill-formed or not, and they can
11732 occur, especially when noreturn routines are involved. */
11733 if (cont_bb)
11735 gsi = gsi_last_bb (cont_bb);
11736 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11737 loc = gimple_location (cont_stmt);
11739 /* Increment offset. */
11740 if (gimple_in_ssa_p (cfun))
11741 expr= build2 (plus_code, iter_type, offset,
11742 fold_convert (plus_type, step));
11743 else
11744 expr = build2 (PLUS_EXPR, diff_type, offset, step);
11745 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11746 true, GSI_SAME_STMT);
11747 ass = gimple_build_assign (offset_incr, expr);
11748 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11749 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
11750 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
11752 /* Remove the GIMPLE_OMP_CONTINUE. */
11753 gsi_remove (&gsi, true);
11755 /* Fixup edges from cont_bb */
11756 be = BRANCH_EDGE (cont_bb);
11757 fte = FALLTHRU_EDGE (cont_bb);
11758 be->flags |= EDGE_TRUE_VALUE;
11759 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
11761 if (chunking)
11763 /* Split the beginning of exit_bb to make bottom_bb. We
11764 need to insert a nop at the start, because splitting is
11765 after a stmt, not before. */
11766 gsi = gsi_start_bb (exit_bb);
11767 stmt = gimple_build_nop ();
11768 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
11769 split = split_block (exit_bb, stmt);
11770 bottom_bb = split->src;
11771 exit_bb = split->dest;
11772 gsi = gsi_last_bb (bottom_bb);
11774 /* Chunk increment and test goes into bottom_bb. */
11775 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
11776 build_int_cst (diff_type, 1));
11777 ass = gimple_build_assign (chunk_no, expr);
11778 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
11780 /* Chunk test at end of bottom_bb. */
11781 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
11782 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
11783 GSI_CONTINUE_LINKING);
11785 /* Fixup edges from bottom_bb. */
11786 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
11787 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
11791 gsi = gsi_last_bb (exit_bb);
11792 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
11793 loc = gimple_location (gsi_stmt (gsi));
11795 if (!gimple_in_ssa_p (cfun))
11797 /* Insert the final value of V, in case it is live. This is the
11798 value for the only thread that survives past the join. */
11799 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
11800 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
11801 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
11802 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
11803 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
11804 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11805 true, GSI_SAME_STMT);
11806 ass = gimple_build_assign (v, expr);
11807 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11810 /* Remove the OMP_RETURN. */
11811 gsi_remove (&gsi, true);
11813 if (cont_bb)
11815 /* We now have one or two nested loops. Update the loop
11816 structures. */
11817 struct loop *parent = entry_bb->loop_father;
11818 struct loop *body = body_bb->loop_father;
11820 if (chunking)
11822 struct loop *chunk_loop = alloc_loop ();
11823 chunk_loop->header = head_bb;
11824 chunk_loop->latch = bottom_bb;
11825 add_loop (chunk_loop, parent);
11826 parent = chunk_loop;
11828 else if (parent != body)
11830 gcc_assert (body->header == body_bb);
11831 gcc_assert (body->latch == cont_bb
11832 || single_pred (body->latch) == cont_bb);
11833 parent = NULL;
11836 if (parent)
11838 struct loop *body_loop = alloc_loop ();
11839 body_loop->header = body_bb;
11840 body_loop->latch = cont_bb;
11841 add_loop (body_loop, parent);
11846 /* Expand the OMP loop defined by REGION. */
11848 static void
11849 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
11851 struct omp_for_data fd;
11852 struct omp_for_data_loop *loops;
11854 loops
11855 = (struct omp_for_data_loop *)
11856 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
11857 * sizeof (struct omp_for_data_loop));
11858 extract_omp_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
11859 &fd, loops);
11860 region->sched_kind = fd.sched_kind;
11861 region->sched_modifiers = fd.sched_modifiers;
11863 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
11864 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
11865 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
11866 if (region->cont)
11868 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
11869 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
11870 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
11872 else
11873 /* If there isn't a continue then this is a degerate case where
11874 the introduction of abnormal edges during lowering will prevent
11875 original loops from being detected. Fix that up. */
11876 loops_state_set (LOOPS_NEED_FIXUP);
11878 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
11879 expand_omp_simd (region, &fd);
11880 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
11881 expand_cilk_for (region, &fd);
11882 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
11884 gcc_assert (!inner_stmt);
11885 expand_oacc_for (region, &fd);
11887 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
11889 if (gimple_omp_for_combined_into_p (fd.for_stmt))
11890 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
11891 else
11892 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
11894 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
11895 && !fd.have_ordered)
11897 if (fd.chunk_size == NULL)
11898 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
11899 else
11900 expand_omp_for_static_chunk (region, &fd, inner_stmt);
11902 else
11904 int fn_index, start_ix, next_ix;
11906 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
11907 == GF_OMP_FOR_KIND_FOR);
11908 if (fd.chunk_size == NULL
11909 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
11910 fd.chunk_size = integer_zero_node;
11911 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
11912 switch (fd.sched_kind)
11914 case OMP_CLAUSE_SCHEDULE_RUNTIME:
11915 fn_index = 3;
11916 break;
11917 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
11918 case OMP_CLAUSE_SCHEDULE_GUIDED:
11919 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
11920 && !fd.ordered
11921 && !fd.have_ordered)
11923 fn_index = 3 + fd.sched_kind;
11924 break;
11926 /* FALLTHRU */
11927 default:
11928 fn_index = fd.sched_kind;
11929 break;
11931 if (!fd.ordered)
11932 fn_index += fd.have_ordered * 6;
11933 if (fd.ordered)
11934 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
11935 else
11936 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
11937 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
11938 if (fd.iter_type == long_long_unsigned_type_node)
11940 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
11941 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
11942 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
11943 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
11945 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
11946 (enum built_in_function) next_ix, inner_stmt);
11949 if (gimple_in_ssa_p (cfun))
11950 update_ssa (TODO_update_ssa_only_virtuals);
11954 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
11956 v = GOMP_sections_start (n);
11958 switch (v)
11960 case 0:
11961 goto L2;
11962 case 1:
11963 section 1;
11964 goto L1;
11965 case 2:
11967 case n:
11969 default:
11970 abort ();
11973 v = GOMP_sections_next ();
11974 goto L0;
11976 reduction;
11978 If this is a combined parallel sections, replace the call to
11979 GOMP_sections_start with call to GOMP_sections_next. */
11981 static void
11982 expand_omp_sections (struct omp_region *region)
11984 tree t, u, vin = NULL, vmain, vnext, l2;
11985 unsigned len;
11986 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
11987 gimple_stmt_iterator si, switch_si;
11988 gomp_sections *sections_stmt;
11989 gimple *stmt;
11990 gomp_continue *cont;
11991 edge_iterator ei;
11992 edge e;
11993 struct omp_region *inner;
11994 unsigned i, casei;
11995 bool exit_reachable = region->cont != NULL;
11997 gcc_assert (region->exit != NULL);
11998 entry_bb = region->entry;
11999 l0_bb = single_succ (entry_bb);
12000 l1_bb = region->cont;
12001 l2_bb = region->exit;
12002 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
12003 l2 = gimple_block_label (l2_bb);
12004 else
12006 /* This can happen if there are reductions. */
12007 len = EDGE_COUNT (l0_bb->succs);
12008 gcc_assert (len > 0);
12009 e = EDGE_SUCC (l0_bb, len - 1);
12010 si = gsi_last_bb (e->dest);
12011 l2 = NULL_TREE;
12012 if (gsi_end_p (si)
12013 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
12014 l2 = gimple_block_label (e->dest);
12015 else
12016 FOR_EACH_EDGE (e, ei, l0_bb->succs)
12018 si = gsi_last_bb (e->dest);
12019 if (gsi_end_p (si)
12020 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
12022 l2 = gimple_block_label (e->dest);
12023 break;
12027 if (exit_reachable)
12028 default_bb = create_empty_bb (l1_bb->prev_bb);
12029 else
12030 default_bb = create_empty_bb (l0_bb);
12032 /* We will build a switch() with enough cases for all the
12033 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
12034 and a default case to abort if something goes wrong. */
12035 len = EDGE_COUNT (l0_bb->succs);
12037 /* Use vec::quick_push on label_vec throughout, since we know the size
12038 in advance. */
12039 auto_vec<tree> label_vec (len);
12041 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
12042 GIMPLE_OMP_SECTIONS statement. */
12043 si = gsi_last_bb (entry_bb);
12044 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
12045 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
12046 vin = gimple_omp_sections_control (sections_stmt);
12047 if (!is_combined_parallel (region))
12049 /* If we are not inside a combined parallel+sections region,
12050 call GOMP_sections_start. */
12051 t = build_int_cst (unsigned_type_node, len - 1);
12052 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
12053 stmt = gimple_build_call (u, 1, t);
12055 else
12057 /* Otherwise, call GOMP_sections_next. */
12058 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
12059 stmt = gimple_build_call (u, 0);
12061 gimple_call_set_lhs (stmt, vin);
12062 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
12063 gsi_remove (&si, true);
12065 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
12066 L0_BB. */
12067 switch_si = gsi_last_bb (l0_bb);
12068 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
12069 if (exit_reachable)
12071 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
12072 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
12073 vmain = gimple_omp_continue_control_use (cont);
12074 vnext = gimple_omp_continue_control_def (cont);
12076 else
12078 vmain = vin;
12079 vnext = NULL_TREE;
12082 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
12083 label_vec.quick_push (t);
12084 i = 1;
12086 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
12087 for (inner = region->inner, casei = 1;
12088 inner;
12089 inner = inner->next, i++, casei++)
12091 basic_block s_entry_bb, s_exit_bb;
12093 /* Skip optional reduction region. */
12094 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
12096 --i;
12097 --casei;
12098 continue;
12101 s_entry_bb = inner->entry;
12102 s_exit_bb = inner->exit;
12104 t = gimple_block_label (s_entry_bb);
12105 u = build_int_cst (unsigned_type_node, casei);
12106 u = build_case_label (u, NULL, t);
12107 label_vec.quick_push (u);
12109 si = gsi_last_bb (s_entry_bb);
12110 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
12111 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
12112 gsi_remove (&si, true);
12113 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
12115 if (s_exit_bb == NULL)
12116 continue;
12118 si = gsi_last_bb (s_exit_bb);
12119 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
12120 gsi_remove (&si, true);
12122 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
12125 /* Error handling code goes in DEFAULT_BB. */
12126 t = gimple_block_label (default_bb);
12127 u = build_case_label (NULL, NULL, t);
12128 make_edge (l0_bb, default_bb, 0);
12129 add_bb_to_loop (default_bb, current_loops->tree_root);
12131 stmt = gimple_build_switch (vmain, u, label_vec);
12132 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
12133 gsi_remove (&switch_si, true);
12135 si = gsi_start_bb (default_bb);
12136 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
12137 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
12139 if (exit_reachable)
12141 tree bfn_decl;
12143 /* Code to get the next section goes in L1_BB. */
12144 si = gsi_last_bb (l1_bb);
12145 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
12147 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
12148 stmt = gimple_build_call (bfn_decl, 0);
12149 gimple_call_set_lhs (stmt, vnext);
12150 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
12151 gsi_remove (&si, true);
12153 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
12156 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
12157 si = gsi_last_bb (l2_bb);
12158 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
12159 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
12160 else if (gimple_omp_return_lhs (gsi_stmt (si)))
12161 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
12162 else
12163 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
12164 stmt = gimple_build_call (t, 0);
12165 if (gimple_omp_return_lhs (gsi_stmt (si)))
12166 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
12167 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
12168 gsi_remove (&si, true);
12170 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
12174 /* Expand code for an OpenMP single directive. We've already expanded
12175 much of the code, here we simply place the GOMP_barrier call. */
12177 static void
12178 expand_omp_single (struct omp_region *region)
12180 basic_block entry_bb, exit_bb;
12181 gimple_stmt_iterator si;
12183 entry_bb = region->entry;
12184 exit_bb = region->exit;
12186 si = gsi_last_bb (entry_bb);
12187 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
12188 gsi_remove (&si, true);
12189 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
12191 si = gsi_last_bb (exit_bb);
12192 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
12194 tree t = gimple_omp_return_lhs (gsi_stmt (si));
12195 gsi_insert_after (&si, build_omp_barrier (t), GSI_SAME_STMT);
12197 gsi_remove (&si, true);
12198 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
12202 /* Generic expansion for OpenMP synchronization directives: master,
12203 ordered and critical. All we need to do here is remove the entry
12204 and exit markers for REGION. */
12206 static void
12207 expand_omp_synch (struct omp_region *region)
12209 basic_block entry_bb, exit_bb;
12210 gimple_stmt_iterator si;
12212 entry_bb = region->entry;
12213 exit_bb = region->exit;
12215 si = gsi_last_bb (entry_bb);
12216 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
12217 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
12218 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
12219 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
12220 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
12221 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
12222 gsi_remove (&si, true);
12223 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
12225 if (exit_bb)
12227 si = gsi_last_bb (exit_bb);
12228 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
12229 gsi_remove (&si, true);
12230 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
12234 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
12235 operation as a normal volatile load. */
12237 static bool
12238 expand_omp_atomic_load (basic_block load_bb, tree addr,
12239 tree loaded_val, int index)
12241 enum built_in_function tmpbase;
12242 gimple_stmt_iterator gsi;
12243 basic_block store_bb;
12244 location_t loc;
12245 gimple *stmt;
12246 tree decl, call, type, itype;
12248 gsi = gsi_last_bb (load_bb);
12249 stmt = gsi_stmt (gsi);
12250 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
12251 loc = gimple_location (stmt);
12253 /* ??? If the target does not implement atomic_load_optab[mode], and mode
12254 is smaller than word size, then expand_atomic_load assumes that the load
12255 is atomic. We could avoid the builtin entirely in this case. */
12257 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
12258 decl = builtin_decl_explicit (tmpbase);
12259 if (decl == NULL_TREE)
12260 return false;
12262 type = TREE_TYPE (loaded_val);
12263 itype = TREE_TYPE (TREE_TYPE (decl));
12265 call = build_call_expr_loc (loc, decl, 2, addr,
12266 build_int_cst (NULL,
12267 gimple_omp_atomic_seq_cst_p (stmt)
12268 ? MEMMODEL_SEQ_CST
12269 : MEMMODEL_RELAXED));
12270 if (!useless_type_conversion_p (type, itype))
12271 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
12272 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
12274 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
12275 gsi_remove (&gsi, true);
12277 store_bb = single_succ (load_bb);
12278 gsi = gsi_last_bb (store_bb);
12279 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
12280 gsi_remove (&gsi, true);
12282 if (gimple_in_ssa_p (cfun))
12283 update_ssa (TODO_update_ssa_no_phi);
12285 return true;
12288 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
12289 operation as a normal volatile store. */
12291 static bool
12292 expand_omp_atomic_store (basic_block load_bb, tree addr,
12293 tree loaded_val, tree stored_val, int index)
12295 enum built_in_function tmpbase;
12296 gimple_stmt_iterator gsi;
12297 basic_block store_bb = single_succ (load_bb);
12298 location_t loc;
12299 gimple *stmt;
12300 tree decl, call, type, itype;
12301 machine_mode imode;
12302 bool exchange;
12304 gsi = gsi_last_bb (load_bb);
12305 stmt = gsi_stmt (gsi);
12306 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
12308 /* If the load value is needed, then this isn't a store but an exchange. */
12309 exchange = gimple_omp_atomic_need_value_p (stmt);
12311 gsi = gsi_last_bb (store_bb);
12312 stmt = gsi_stmt (gsi);
12313 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
12314 loc = gimple_location (stmt);
12316 /* ??? If the target does not implement atomic_store_optab[mode], and mode
12317 is smaller than word size, then expand_atomic_store assumes that the store
12318 is atomic. We could avoid the builtin entirely in this case. */
12320 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
12321 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
12322 decl = builtin_decl_explicit (tmpbase);
12323 if (decl == NULL_TREE)
12324 return false;
12326 type = TREE_TYPE (stored_val);
12328 /* Dig out the type of the function's second argument. */
12329 itype = TREE_TYPE (decl);
12330 itype = TYPE_ARG_TYPES (itype);
12331 itype = TREE_CHAIN (itype);
12332 itype = TREE_VALUE (itype);
12333 imode = TYPE_MODE (itype);
12335 if (exchange && !can_atomic_exchange_p (imode, true))
12336 return false;
12338 if (!useless_type_conversion_p (itype, type))
12339 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
12340 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
12341 build_int_cst (NULL,
12342 gimple_omp_atomic_seq_cst_p (stmt)
12343 ? MEMMODEL_SEQ_CST
12344 : MEMMODEL_RELAXED));
12345 if (exchange)
12347 if (!useless_type_conversion_p (type, itype))
12348 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
12349 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
12352 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
12353 gsi_remove (&gsi, true);
12355 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
12356 gsi = gsi_last_bb (load_bb);
12357 gsi_remove (&gsi, true);
12359 if (gimple_in_ssa_p (cfun))
12360 update_ssa (TODO_update_ssa_no_phi);
12362 return true;
12365 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
12366 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
12367 size of the data type, and thus usable to find the index of the builtin
12368 decl. Returns false if the expression is not of the proper form. */
12370 static bool
12371 expand_omp_atomic_fetch_op (basic_block load_bb,
12372 tree addr, tree loaded_val,
12373 tree stored_val, int index)
12375 enum built_in_function oldbase, newbase, tmpbase;
12376 tree decl, itype, call;
12377 tree lhs, rhs;
12378 basic_block store_bb = single_succ (load_bb);
12379 gimple_stmt_iterator gsi;
12380 gimple *stmt;
12381 location_t loc;
12382 enum tree_code code;
12383 bool need_old, need_new;
12384 machine_mode imode;
12385 bool seq_cst;
12387 /* We expect to find the following sequences:
12389 load_bb:
12390 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
12392 store_bb:
12393 val = tmp OP something; (or: something OP tmp)
12394 GIMPLE_OMP_STORE (val)
12396 ???FIXME: Allow a more flexible sequence.
12397 Perhaps use data flow to pick the statements.
12401 gsi = gsi_after_labels (store_bb);
12402 stmt = gsi_stmt (gsi);
12403 loc = gimple_location (stmt);
12404 if (!is_gimple_assign (stmt))
12405 return false;
12406 gsi_next (&gsi);
12407 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
12408 return false;
12409 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
12410 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
12411 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
12412 gcc_checking_assert (!need_old || !need_new);
12414 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
12415 return false;
12417 /* Check for one of the supported fetch-op operations. */
12418 code = gimple_assign_rhs_code (stmt);
12419 switch (code)
12421 case PLUS_EXPR:
12422 case POINTER_PLUS_EXPR:
12423 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
12424 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
12425 break;
12426 case MINUS_EXPR:
12427 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
12428 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
12429 break;
12430 case BIT_AND_EXPR:
12431 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
12432 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
12433 break;
12434 case BIT_IOR_EXPR:
12435 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
12436 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
12437 break;
12438 case BIT_XOR_EXPR:
12439 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
12440 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
12441 break;
12442 default:
12443 return false;
12446 /* Make sure the expression is of the proper form. */
12447 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
12448 rhs = gimple_assign_rhs2 (stmt);
12449 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
12450 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
12451 rhs = gimple_assign_rhs1 (stmt);
12452 else
12453 return false;
12455 tmpbase = ((enum built_in_function)
12456 ((need_new ? newbase : oldbase) + index + 1));
12457 decl = builtin_decl_explicit (tmpbase);
12458 if (decl == NULL_TREE)
12459 return false;
12460 itype = TREE_TYPE (TREE_TYPE (decl));
12461 imode = TYPE_MODE (itype);
12463 /* We could test all of the various optabs involved, but the fact of the
12464 matter is that (with the exception of i486 vs i586 and xadd) all targets
12465 that support any atomic operaton optab also implements compare-and-swap.
12466 Let optabs.c take care of expanding any compare-and-swap loop. */
12467 if (!can_compare_and_swap_p (imode, true))
12468 return false;
12470 gsi = gsi_last_bb (load_bb);
12471 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
12473 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
12474 It only requires that the operation happen atomically. Thus we can
12475 use the RELAXED memory model. */
12476 call = build_call_expr_loc (loc, decl, 3, addr,
12477 fold_convert_loc (loc, itype, rhs),
12478 build_int_cst (NULL,
12479 seq_cst ? MEMMODEL_SEQ_CST
12480 : MEMMODEL_RELAXED));
12482 if (need_old || need_new)
12484 lhs = need_old ? loaded_val : stored_val;
12485 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
12486 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
12488 else
12489 call = fold_convert_loc (loc, void_type_node, call);
12490 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
12491 gsi_remove (&gsi, true);
12493 gsi = gsi_last_bb (store_bb);
12494 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
12495 gsi_remove (&gsi, true);
12496 gsi = gsi_last_bb (store_bb);
12497 stmt = gsi_stmt (gsi);
12498 gsi_remove (&gsi, true);
12500 if (gimple_in_ssa_p (cfun))
12502 release_defs (stmt);
12503 update_ssa (TODO_update_ssa_no_phi);
12506 return true;
12509 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
12511 oldval = *addr;
12512 repeat:
12513 newval = rhs; // with oldval replacing *addr in rhs
12514 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
12515 if (oldval != newval)
12516 goto repeat;
12518 INDEX is log2 of the size of the data type, and thus usable to find the
12519 index of the builtin decl. */
12521 static bool
12522 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
12523 tree addr, tree loaded_val, tree stored_val,
12524 int index)
12526 tree loadedi, storedi, initial, new_storedi, old_vali;
12527 tree type, itype, cmpxchg, iaddr;
12528 gimple_stmt_iterator si;
12529 basic_block loop_header = single_succ (load_bb);
12530 gimple *phi, *stmt;
12531 edge e;
12532 enum built_in_function fncode;
12534 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
12535 order to use the RELAXED memory model effectively. */
12536 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
12537 + index + 1);
12538 cmpxchg = builtin_decl_explicit (fncode);
12539 if (cmpxchg == NULL_TREE)
12540 return false;
12541 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
12542 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
12544 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
12545 return false;
12547 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
12548 si = gsi_last_bb (load_bb);
12549 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
12551 /* For floating-point values, we'll need to view-convert them to integers
12552 so that we can perform the atomic compare and swap. Simplify the
12553 following code by always setting up the "i"ntegral variables. */
12554 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
12556 tree iaddr_val;
12558 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
12559 true));
12560 iaddr_val
12561 = force_gimple_operand_gsi (&si,
12562 fold_convert (TREE_TYPE (iaddr), addr),
12563 false, NULL_TREE, true, GSI_SAME_STMT);
12564 stmt = gimple_build_assign (iaddr, iaddr_val);
12565 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12566 loadedi = create_tmp_var (itype);
12567 if (gimple_in_ssa_p (cfun))
12568 loadedi = make_ssa_name (loadedi);
12570 else
12572 iaddr = addr;
12573 loadedi = loaded_val;
12576 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
12577 tree loaddecl = builtin_decl_explicit (fncode);
12578 if (loaddecl)
12579 initial
12580 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
12581 build_call_expr (loaddecl, 2, iaddr,
12582 build_int_cst (NULL_TREE,
12583 MEMMODEL_RELAXED)));
12584 else
12585 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
12586 build_int_cst (TREE_TYPE (iaddr), 0));
12588 initial
12589 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
12590 GSI_SAME_STMT);
12592 /* Move the value to the LOADEDI temporary. */
12593 if (gimple_in_ssa_p (cfun))
12595 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
12596 phi = create_phi_node (loadedi, loop_header);
12597 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
12598 initial);
12600 else
12601 gsi_insert_before (&si,
12602 gimple_build_assign (loadedi, initial),
12603 GSI_SAME_STMT);
12604 if (loadedi != loaded_val)
12606 gimple_stmt_iterator gsi2;
12607 tree x;
12609 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
12610 gsi2 = gsi_start_bb (loop_header);
12611 if (gimple_in_ssa_p (cfun))
12613 gassign *stmt;
12614 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
12615 true, GSI_SAME_STMT);
12616 stmt = gimple_build_assign (loaded_val, x);
12617 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
12619 else
12621 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
12622 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
12623 true, GSI_SAME_STMT);
12626 gsi_remove (&si, true);
12628 si = gsi_last_bb (store_bb);
12629 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
12631 if (iaddr == addr)
12632 storedi = stored_val;
12633 else
12634 storedi =
12635 force_gimple_operand_gsi (&si,
12636 build1 (VIEW_CONVERT_EXPR, itype,
12637 stored_val), true, NULL_TREE, true,
12638 GSI_SAME_STMT);
12640 /* Build the compare&swap statement. */
12641 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
12642 new_storedi = force_gimple_operand_gsi (&si,
12643 fold_convert (TREE_TYPE (loadedi),
12644 new_storedi),
12645 true, NULL_TREE,
12646 true, GSI_SAME_STMT);
12648 if (gimple_in_ssa_p (cfun))
12649 old_vali = loadedi;
12650 else
12652 old_vali = create_tmp_var (TREE_TYPE (loadedi));
12653 stmt = gimple_build_assign (old_vali, loadedi);
12654 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12656 stmt = gimple_build_assign (loadedi, new_storedi);
12657 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12660 /* Note that we always perform the comparison as an integer, even for
12661 floating point. This allows the atomic operation to properly
12662 succeed even with NaNs and -0.0. */
12663 stmt = gimple_build_cond_empty
12664 (build2 (NE_EXPR, boolean_type_node,
12665 new_storedi, old_vali));
12666 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12668 /* Update cfg. */
12669 e = single_succ_edge (store_bb);
12670 e->flags &= ~EDGE_FALLTHRU;
12671 e->flags |= EDGE_FALSE_VALUE;
12673 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
12675 /* Copy the new value to loadedi (we already did that before the condition
12676 if we are not in SSA). */
12677 if (gimple_in_ssa_p (cfun))
12679 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
12680 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
12683 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
12684 gsi_remove (&si, true);
12686 struct loop *loop = alloc_loop ();
12687 loop->header = loop_header;
12688 loop->latch = store_bb;
12689 add_loop (loop, loop_header->loop_father);
12691 if (gimple_in_ssa_p (cfun))
12692 update_ssa (TODO_update_ssa_no_phi);
12694 return true;
12697 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
12699 GOMP_atomic_start ();
12700 *addr = rhs;
12701 GOMP_atomic_end ();
12703 The result is not globally atomic, but works so long as all parallel
12704 references are within #pragma omp atomic directives. According to
12705 responses received from omp@openmp.org, appears to be within spec.
12706 Which makes sense, since that's how several other compilers handle
12707 this situation as well.
12708 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
12709 expanding. STORED_VAL is the operand of the matching
12710 GIMPLE_OMP_ATOMIC_STORE.
12712 We replace
12713 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
12714 loaded_val = *addr;
12716 and replace
12717 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
12718 *addr = stored_val;
12721 static bool
12722 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
12723 tree addr, tree loaded_val, tree stored_val)
12725 gimple_stmt_iterator si;
12726 gassign *stmt;
12727 tree t;
12729 si = gsi_last_bb (load_bb);
12730 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
12732 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
12733 t = build_call_expr (t, 0);
12734 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
12736 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
12737 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12738 gsi_remove (&si, true);
12740 si = gsi_last_bb (store_bb);
12741 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
12743 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
12744 stored_val);
12745 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12747 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
12748 t = build_call_expr (t, 0);
12749 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
12750 gsi_remove (&si, true);
12752 if (gimple_in_ssa_p (cfun))
12753 update_ssa (TODO_update_ssa_no_phi);
12754 return true;
12757 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
12758 using expand_omp_atomic_fetch_op. If it failed, we try to
12759 call expand_omp_atomic_pipeline, and if it fails too, the
12760 ultimate fallback is wrapping the operation in a mutex
12761 (expand_omp_atomic_mutex). REGION is the atomic region built
12762 by build_omp_regions_1(). */
12764 static void
12765 expand_omp_atomic (struct omp_region *region)
12767 basic_block load_bb = region->entry, store_bb = region->exit;
12768 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
12769 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
12770 tree loaded_val = gimple_omp_atomic_load_lhs (load);
12771 tree addr = gimple_omp_atomic_load_rhs (load);
12772 tree stored_val = gimple_omp_atomic_store_val (store);
12773 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
12774 HOST_WIDE_INT index;
12776 /* Make sure the type is one of the supported sizes. */
12777 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
12778 index = exact_log2 (index);
12779 if (index >= 0 && index <= 4)
12781 unsigned int align = TYPE_ALIGN_UNIT (type);
12783 /* __sync builtins require strict data alignment. */
12784 if (exact_log2 (align) >= index)
12786 /* Atomic load. */
12787 if (loaded_val == stored_val
12788 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
12789 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
12790 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
12791 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
12792 return;
12794 /* Atomic store. */
12795 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
12796 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
12797 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
12798 && store_bb == single_succ (load_bb)
12799 && first_stmt (store_bb) == store
12800 && expand_omp_atomic_store (load_bb, addr, loaded_val,
12801 stored_val, index))
12802 return;
12804 /* When possible, use specialized atomic update functions. */
12805 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
12806 && store_bb == single_succ (load_bb)
12807 && expand_omp_atomic_fetch_op (load_bb, addr,
12808 loaded_val, stored_val, index))
12809 return;
12811 /* If we don't have specialized __sync builtins, try and implement
12812 as a compare and swap loop. */
12813 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
12814 loaded_val, stored_val, index))
12815 return;
12819 /* The ultimate fallback is wrapping the operation in a mutex. */
12820 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
12824 /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
12825 macro on gomp-constants.h. We do not check for overflow. */
12827 static tree
12828 oacc_launch_pack (unsigned code, tree device, unsigned op)
12830 tree res;
12832 res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op));
12833 if (device)
12835 device = fold_build2 (LSHIFT_EXPR, unsigned_type_node,
12836 device, build_int_cst (unsigned_type_node,
12837 GOMP_LAUNCH_DEVICE_SHIFT));
12838 res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device);
12840 return res;
12843 /* Look for compute grid dimension clauses and convert to an attribute
12844 attached to FN. This permits the target-side code to (a) massage
12845 the dimensions, (b) emit that data and (c) optimize. Non-constant
12846 dimensions are pushed onto ARGS.
12848 The attribute value is a TREE_LIST. A set of dimensions is
12849 represented as a list of INTEGER_CST. Those that are runtime
12850 exprs are represented as an INTEGER_CST of zero.
12852 TOOO. Normally the attribute will just contain a single such list. If
12853 however it contains a list of lists, this will represent the use of
12854 device_type. Each member of the outer list is an assoc list of
12855 dimensions, keyed by the device type. The first entry will be the
12856 default. Well, that's the plan. */
12858 #define OACC_FN_ATTRIB "oacc function"
12860 /* Replace any existing oacc fn attribute with updated dimensions. */
12862 void
12863 replace_oacc_fn_attrib (tree fn, tree dims)
12865 tree ident = get_identifier (OACC_FN_ATTRIB);
12866 tree attribs = DECL_ATTRIBUTES (fn);
12868 /* If we happen to be present as the first attrib, drop it. */
12869 if (attribs && TREE_PURPOSE (attribs) == ident)
12870 attribs = TREE_CHAIN (attribs);
12871 DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs);
12874 /* Scan CLAUSES for launch dimensions and attach them to the oacc
12875 function attribute. Push any that are non-constant onto the ARGS
12876 list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is
12877 true, if these are for a kernels region offload function. */
12879 void
12880 set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
12882 /* Must match GOMP_DIM ordering. */
12883 static const omp_clause_code ids[]
12884 = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS,
12885 OMP_CLAUSE_VECTOR_LENGTH };
12886 unsigned ix;
12887 tree dims[GOMP_DIM_MAX];
12888 tree attr = NULL_TREE;
12889 unsigned non_const = 0;
12891 for (ix = GOMP_DIM_MAX; ix--;)
12893 tree clause = find_omp_clause (clauses, ids[ix]);
12894 tree dim = NULL_TREE;
12896 if (clause)
12897 dim = OMP_CLAUSE_EXPR (clause, ids[ix]);
12898 dims[ix] = dim;
12899 if (dim && TREE_CODE (dim) != INTEGER_CST)
12901 dim = integer_zero_node;
12902 non_const |= GOMP_DIM_MASK (ix);
12904 attr = tree_cons (NULL_TREE, dim, attr);
12905 /* Note kernelness with TREE_PUBLIC. */
12906 if (is_kernel)
12907 TREE_PUBLIC (attr) = 1;
12910 replace_oacc_fn_attrib (fn, attr);
12912 if (non_const)
12914 /* Push a dynamic argument set. */
12915 args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM,
12916 NULL_TREE, non_const));
12917 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
12918 if (non_const & GOMP_DIM_MASK (ix))
12919 args->safe_push (dims[ix]);
12923 /* Process the routine's dimension clauess to generate an attribute
12924 value. Issue diagnostics as appropriate. We default to SEQ
12925 (OpenACC 2.5 clarifies this). All dimensions have a size of zero
12926 (dynamic). TREE_PURPOSE is set to indicate whether that dimension
12927 can have a loop partitioned on it. non-zero indicates
12928 yes, zero indicates no. By construction once a non-zero has been
12929 reached, further inner dimensions must also be non-zero. We set
12930 TREE_VALUE to zero for the dimensions that may be partitioned and
12931 1 for the other ones -- if a loop is (erroneously) spawned at
12932 an outer level, we don't want to try and partition it. */
12934 tree
12935 build_oacc_routine_dims (tree clauses)
12937 /* Must match GOMP_DIM ordering. */
12938 static const omp_clause_code ids[] =
12939 {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ};
12940 int ix;
12941 int level = -1;
12943 for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses))
12944 for (ix = GOMP_DIM_MAX + 1; ix--;)
12945 if (OMP_CLAUSE_CODE (clauses) == ids[ix])
12947 if (level >= 0)
12948 error_at (OMP_CLAUSE_LOCATION (clauses),
12949 "multiple loop axes specified for routine");
12950 level = ix;
12951 break;
12954 /* Default to SEQ. */
12955 if (level < 0)
12956 level = GOMP_DIM_MAX;
12958 tree dims = NULL_TREE;
12960 for (ix = GOMP_DIM_MAX; ix--;)
12961 dims = tree_cons (build_int_cst (boolean_type_node, ix >= level),
12962 build_int_cst (integer_type_node, ix < level), dims);
12964 return dims;
12967 /* Retrieve the oacc function attrib and return it. Non-oacc
12968 functions will return NULL. */
12970 tree
12971 get_oacc_fn_attrib (tree fn)
12973 return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
12976 /* Return true if this oacc fn attrib is for a kernels offload
12977 region. We use the TREE_PUBLIC flag of each dimension -- only
12978 need to check the first one. */
12980 bool
12981 oacc_fn_attrib_kernels_p (tree attr)
12983 return TREE_PUBLIC (TREE_VALUE (attr));
12986 /* Return level at which oacc routine may spawn a partitioned loop, or
12987 -1 if it is not a routine (i.e. is an offload fn). */
12989 static int
12990 oacc_fn_attrib_level (tree attr)
12992 tree pos = TREE_VALUE (attr);
12994 if (!TREE_PURPOSE (pos))
12995 return -1;
12997 int ix = 0;
12998 for (ix = 0; ix != GOMP_DIM_MAX;
12999 ix++, pos = TREE_CHAIN (pos))
13000 if (!integer_zerop (TREE_PURPOSE (pos)))
13001 break;
13003 return ix;
13006 /* Extract an oacc execution dimension from FN. FN must be an
13007 offloaded function or routine that has already had its execution
13008 dimensions lowered to the target-specific values. */
13011 get_oacc_fn_dim_size (tree fn, int axis)
13013 tree attrs = get_oacc_fn_attrib (fn);
13015 gcc_assert (axis < GOMP_DIM_MAX);
13017 tree dims = TREE_VALUE (attrs);
13018 while (axis--)
13019 dims = TREE_CHAIN (dims);
13021 int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
13023 return size;
13026 /* Extract the dimension axis from an IFN_GOACC_DIM_POS or
13027 IFN_GOACC_DIM_SIZE call. */
13030 get_oacc_ifn_dim_arg (const gimple *stmt)
13032 gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE
13033 || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
13034 tree arg = gimple_call_arg (stmt, 0);
13035 HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg);
13037 gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX);
13038 return (int) axis;
13041 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
13042 at REGION_EXIT. */
13044 static void
13045 mark_loops_in_oacc_kernels_region (basic_block region_entry,
13046 basic_block region_exit)
13048 struct loop *outer = region_entry->loop_father;
13049 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
13051 /* Don't parallelize the kernels region if it contains more than one outer
13052 loop. */
13053 unsigned int nr_outer_loops = 0;
13054 struct loop *single_outer = NULL;
13055 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
13057 gcc_assert (loop_outer (loop) == outer);
13059 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
13060 continue;
13062 if (region_exit != NULL
13063 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
13064 continue;
13066 nr_outer_loops++;
13067 single_outer = loop;
13069 if (nr_outer_loops != 1)
13070 return;
13072 for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner)
13073 if (loop->next)
13074 return;
13076 /* Mark the loops in the region. */
13077 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
13078 loop->in_oacc_kernels_region = true;
13081 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
13083 struct GTY(()) grid_launch_attributes_trees
13085 tree kernel_dim_array_type;
13086 tree kernel_lattrs_dimnum_decl;
13087 tree kernel_lattrs_grid_decl;
13088 tree kernel_lattrs_group_decl;
13089 tree kernel_launch_attributes_type;
13092 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
13094 /* Create types used to pass kernel launch attributes to target. */
13096 static void
13097 grid_create_kernel_launch_attr_types (void)
13099 if (grid_attr_trees)
13100 return;
13101 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
13103 tree dim_arr_index_type
13104 = build_index_type (build_int_cst (integer_type_node, 2));
13105 grid_attr_trees->kernel_dim_array_type
13106 = build_array_type (uint32_type_node, dim_arr_index_type);
13108 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
13109 grid_attr_trees->kernel_lattrs_dimnum_decl
13110 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
13111 uint32_type_node);
13112 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
13114 grid_attr_trees->kernel_lattrs_grid_decl
13115 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
13116 grid_attr_trees->kernel_dim_array_type);
13117 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
13118 = grid_attr_trees->kernel_lattrs_dimnum_decl;
13119 grid_attr_trees->kernel_lattrs_group_decl
13120 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
13121 grid_attr_trees->kernel_dim_array_type);
13122 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
13123 = grid_attr_trees->kernel_lattrs_grid_decl;
13124 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
13125 "__gomp_kernel_launch_attributes",
13126 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
13129 /* Insert before the current statement in GSI a store of VALUE to INDEX of
13130 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
13131 of type uint32_type_node. */
13133 static void
13134 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
13135 tree fld_decl, int index, tree value)
13137 tree ref = build4 (ARRAY_REF, uint32_type_node,
13138 build3 (COMPONENT_REF,
13139 grid_attr_trees->kernel_dim_array_type,
13140 range_var, fld_decl, NULL_TREE),
13141 build_int_cst (integer_type_node, index),
13142 NULL_TREE, NULL_TREE);
13143 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
13146 /* Return a tree representation of a pointer to a structure with grid and
13147 work-group size information. Statements filling that information will be
13148 inserted before GSI, TGT_STMT is the target statement which has the
13149 necessary information in it. */
13151 static tree
13152 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
13153 gomp_target *tgt_stmt)
13155 grid_create_kernel_launch_attr_types ();
13156 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
13157 "__kernel_launch_attrs");
13159 unsigned max_dim = 0;
13160 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
13161 clause;
13162 clause = OMP_CLAUSE_CHAIN (clause))
13164 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
13165 continue;
13167 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
13168 max_dim = MAX (dim, max_dim);
13170 grid_insert_store_range_dim (gsi, lattrs,
13171 grid_attr_trees->kernel_lattrs_grid_decl,
13172 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
13173 grid_insert_store_range_dim (gsi, lattrs,
13174 grid_attr_trees->kernel_lattrs_group_decl,
13175 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
13178 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
13179 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
13180 gcc_checking_assert (max_dim <= 2);
13181 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
13182 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
13183 GSI_SAME_STMT);
13184 TREE_ADDRESSABLE (lattrs) = 1;
13185 return build_fold_addr_expr (lattrs);
13188 /* Build target argument identifier from the DEVICE identifier, value
13189 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
13191 static tree
13192 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
13194 tree t = build_int_cst (integer_type_node, device);
13195 if (subseqent_param)
13196 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
13197 build_int_cst (integer_type_node,
13198 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
13199 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
13200 build_int_cst (integer_type_node, id));
13201 return t;
13204 /* Like above but return it in type that can be directly stored as an element
13205 of the argument array. */
13207 static tree
13208 get_target_argument_identifier (int device, bool subseqent_param, int id)
13210 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
13211 return fold_convert (ptr_type_node, t);
13214 /* Return a target argument consisting of DEVICE identifier, value identifier
13215 ID, and the actual VALUE. */
13217 static tree
13218 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
13219 tree value)
13221 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
13222 fold_convert (integer_type_node, value),
13223 build_int_cst (unsigned_type_node,
13224 GOMP_TARGET_ARG_VALUE_SHIFT));
13225 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
13226 get_target_argument_identifier_1 (device, false, id));
13227 t = fold_convert (ptr_type_node, t);
13228 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
13231 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
13232 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
13233 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
13234 arguments. */
13236 static void
13237 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
13238 int id, tree value, vec <tree> *args)
13240 if (tree_fits_shwi_p (value)
13241 && tree_to_shwi (value) > -(1 << 15)
13242 && tree_to_shwi (value) < (1 << 15))
13243 args->quick_push (get_target_argument_value (gsi, device, id, value));
13244 else
13246 args->quick_push (get_target_argument_identifier (device, true, id));
13247 value = fold_convert (ptr_type_node, value);
13248 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
13249 GSI_SAME_STMT);
13250 args->quick_push (value);
13254 /* Create an array of arguments that is then passed to GOMP_target. */
13256 static tree
13257 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
13259 auto_vec <tree, 6> args;
13260 tree clauses = gimple_omp_target_clauses (tgt_stmt);
13261 tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
13262 if (c)
13263 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
13264 else
13265 t = integer_minus_one_node;
13266 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
13267 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
13269 c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
13270 if (c)
13271 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
13272 else
13273 t = integer_minus_one_node;
13274 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
13275 GOMP_TARGET_ARG_THREAD_LIMIT, t,
13276 &args);
13278 /* Add HSA-specific grid sizes, if available. */
13279 if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
13280 OMP_CLAUSE__GRIDDIM_))
13282 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true,
13283 GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES);
13284 args.quick_push (t);
13285 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
13288 /* Produce more, perhaps device specific, arguments here. */
13290 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
13291 args.length () + 1),
13292 ".omp_target_args");
13293 for (unsigned i = 0; i < args.length (); i++)
13295 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
13296 build_int_cst (integer_type_node, i),
13297 NULL_TREE, NULL_TREE);
13298 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
13299 GSI_SAME_STMT);
13301 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
13302 build_int_cst (integer_type_node, args.length ()),
13303 NULL_TREE, NULL_TREE);
13304 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
13305 GSI_SAME_STMT);
13306 TREE_ADDRESSABLE (argarray) = 1;
13307 return build_fold_addr_expr (argarray);
13310 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
13312 static void
13313 expand_omp_target (struct omp_region *region)
13315 basic_block entry_bb, exit_bb, new_bb;
13316 struct function *child_cfun;
13317 tree child_fn, block, t;
13318 gimple_stmt_iterator gsi;
13319 gomp_target *entry_stmt;
13320 gimple *stmt;
13321 edge e;
13322 bool offloaded, data_region;
13324 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
13325 new_bb = region->entry;
13327 offloaded = is_gimple_omp_offloaded (entry_stmt);
13328 switch (gimple_omp_target_kind (entry_stmt))
13330 case GF_OMP_TARGET_KIND_REGION:
13331 case GF_OMP_TARGET_KIND_UPDATE:
13332 case GF_OMP_TARGET_KIND_ENTER_DATA:
13333 case GF_OMP_TARGET_KIND_EXIT_DATA:
13334 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
13335 case GF_OMP_TARGET_KIND_OACC_KERNELS:
13336 case GF_OMP_TARGET_KIND_OACC_UPDATE:
13337 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
13338 case GF_OMP_TARGET_KIND_OACC_DECLARE:
13339 data_region = false;
13340 break;
13341 case GF_OMP_TARGET_KIND_DATA:
13342 case GF_OMP_TARGET_KIND_OACC_DATA:
13343 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
13344 data_region = true;
13345 break;
13346 default:
13347 gcc_unreachable ();
13350 child_fn = NULL_TREE;
13351 child_cfun = NULL;
13352 if (offloaded)
13354 child_fn = gimple_omp_target_child_fn (entry_stmt);
13355 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
13358 /* Supported by expand_omp_taskreg, but not here. */
13359 if (child_cfun != NULL)
13360 gcc_checking_assert (!child_cfun->cfg);
13361 gcc_checking_assert (!gimple_in_ssa_p (cfun));
13363 entry_bb = region->entry;
13364 exit_bb = region->exit;
13366 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
13367 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
13369 if (offloaded)
13371 unsigned srcidx, dstidx, num;
13373 /* If the offloading region needs data sent from the parent
13374 function, then the very first statement (except possible
13375 tree profile counter updates) of the offloading body
13376 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
13377 &.OMP_DATA_O is passed as an argument to the child function,
13378 we need to replace it with the argument as seen by the child
13379 function.
13381 In most cases, this will end up being the identity assignment
13382 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
13383 a function call that has been inlined, the original PARM_DECL
13384 .OMP_DATA_I may have been converted into a different local
13385 variable. In which case, we need to keep the assignment. */
13386 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
13387 if (data_arg)
13389 basic_block entry_succ_bb = single_succ (entry_bb);
13390 gimple_stmt_iterator gsi;
13391 tree arg;
13392 gimple *tgtcopy_stmt = NULL;
13393 tree sender = TREE_VEC_ELT (data_arg, 0);
13395 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
13397 gcc_assert (!gsi_end_p (gsi));
13398 stmt = gsi_stmt (gsi);
13399 if (gimple_code (stmt) != GIMPLE_ASSIGN)
13400 continue;
13402 if (gimple_num_ops (stmt) == 2)
13404 tree arg = gimple_assign_rhs1 (stmt);
13406 /* We're ignoring the subcode because we're
13407 effectively doing a STRIP_NOPS. */
13409 if (TREE_CODE (arg) == ADDR_EXPR
13410 && TREE_OPERAND (arg, 0) == sender)
13412 tgtcopy_stmt = stmt;
13413 break;
13418 gcc_assert (tgtcopy_stmt != NULL);
13419 arg = DECL_ARGUMENTS (child_fn);
13421 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
13422 gsi_remove (&gsi, true);
13425 /* Declare local variables needed in CHILD_CFUN. */
13426 block = DECL_INITIAL (child_fn);
13427 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
13428 /* The gimplifier could record temporaries in the offloading block
13429 rather than in containing function's local_decls chain,
13430 which would mean cgraph missed finalizing them. Do it now. */
13431 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
13432 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
13433 varpool_node::finalize_decl (t);
13434 DECL_SAVED_TREE (child_fn) = NULL;
13435 /* We'll create a CFG for child_fn, so no gimple body is needed. */
13436 gimple_set_body (child_fn, NULL);
13437 TREE_USED (block) = 1;
13439 /* Reset DECL_CONTEXT on function arguments. */
13440 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
13441 DECL_CONTEXT (t) = child_fn;
13443 /* Split ENTRY_BB at GIMPLE_*,
13444 so that it can be moved to the child function. */
13445 gsi = gsi_last_bb (entry_bb);
13446 stmt = gsi_stmt (gsi);
13447 gcc_assert (stmt
13448 && gimple_code (stmt) == gimple_code (entry_stmt));
13449 e = split_block (entry_bb, stmt);
13450 gsi_remove (&gsi, true);
13451 entry_bb = e->dest;
13452 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
13454 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
13455 if (exit_bb)
13457 gsi = gsi_last_bb (exit_bb);
13458 gcc_assert (!gsi_end_p (gsi)
13459 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13460 stmt = gimple_build_return (NULL);
13461 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
13462 gsi_remove (&gsi, true);
13465 /* Move the offloading region into CHILD_CFUN. */
13467 block = gimple_block (entry_stmt);
13469 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
13470 if (exit_bb)
13471 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
13472 /* When the OMP expansion process cannot guarantee an up-to-date
13473 loop tree arrange for the child function to fixup loops. */
13474 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
13475 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
13477 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
13478 num = vec_safe_length (child_cfun->local_decls);
13479 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
13481 t = (*child_cfun->local_decls)[srcidx];
13482 if (DECL_CONTEXT (t) == cfun->decl)
13483 continue;
13484 if (srcidx != dstidx)
13485 (*child_cfun->local_decls)[dstidx] = t;
13486 dstidx++;
13488 if (dstidx != num)
13489 vec_safe_truncate (child_cfun->local_decls, dstidx);
13491 /* Inform the callgraph about the new function. */
13492 child_cfun->curr_properties = cfun->curr_properties;
13493 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
13494 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
13495 cgraph_node *node = cgraph_node::get_create (child_fn);
13496 node->parallelized_function = 1;
13497 cgraph_node::add_new_function (child_fn, true);
13499 /* Add the new function to the offload table. */
13500 if (ENABLE_OFFLOADING)
13501 vec_safe_push (offload_funcs, child_fn);
13503 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
13504 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
13506 /* Fix the callgraph edges for child_cfun. Those for cfun will be
13507 fixed in a following pass. */
13508 push_cfun (child_cfun);
13509 if (need_asm)
13510 assign_assembler_name_if_neeeded (child_fn);
13511 cgraph_edge::rebuild_edges ();
13513 /* Some EH regions might become dead, see PR34608. If
13514 pass_cleanup_cfg isn't the first pass to happen with the
13515 new child, these dead EH edges might cause problems.
13516 Clean them up now. */
13517 if (flag_exceptions)
13519 basic_block bb;
13520 bool changed = false;
13522 FOR_EACH_BB_FN (bb, cfun)
13523 changed |= gimple_purge_dead_eh_edges (bb);
13524 if (changed)
13525 cleanup_tree_cfg ();
13527 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
13528 verify_loop_structure ();
13529 pop_cfun ();
13531 if (dump_file && !gimple_in_ssa_p (cfun))
13533 omp_any_child_fn_dumped = true;
13534 dump_function_header (dump_file, child_fn, dump_flags);
13535 dump_function_to_file (child_fn, dump_file, dump_flags);
13539 /* Emit a library call to launch the offloading region, or do data
13540 transfers. */
13541 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
13542 enum built_in_function start_ix;
13543 location_t clause_loc;
13544 unsigned int flags_i = 0;
13545 bool oacc_kernels_p = false;
13547 switch (gimple_omp_target_kind (entry_stmt))
13549 case GF_OMP_TARGET_KIND_REGION:
13550 start_ix = BUILT_IN_GOMP_TARGET;
13551 break;
13552 case GF_OMP_TARGET_KIND_DATA:
13553 start_ix = BUILT_IN_GOMP_TARGET_DATA;
13554 break;
13555 case GF_OMP_TARGET_KIND_UPDATE:
13556 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
13557 break;
13558 case GF_OMP_TARGET_KIND_ENTER_DATA:
13559 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
13560 break;
13561 case GF_OMP_TARGET_KIND_EXIT_DATA:
13562 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
13563 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
13564 break;
13565 case GF_OMP_TARGET_KIND_OACC_KERNELS:
13566 oacc_kernels_p = true;
13567 /* FALLTHROUGH */
13568 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
13569 start_ix = BUILT_IN_GOACC_PARALLEL;
13570 break;
13571 case GF_OMP_TARGET_KIND_OACC_DATA:
13572 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
13573 start_ix = BUILT_IN_GOACC_DATA_START;
13574 break;
13575 case GF_OMP_TARGET_KIND_OACC_UPDATE:
13576 start_ix = BUILT_IN_GOACC_UPDATE;
13577 break;
13578 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
13579 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
13580 break;
13581 case GF_OMP_TARGET_KIND_OACC_DECLARE:
13582 start_ix = BUILT_IN_GOACC_DECLARE;
13583 break;
13584 default:
13585 gcc_unreachable ();
13588 clauses = gimple_omp_target_clauses (entry_stmt);
13590 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
13591 library choose) and there is no conditional. */
13592 cond = NULL_TREE;
13593 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
13595 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
13596 if (c)
13597 cond = OMP_CLAUSE_IF_EXPR (c);
13599 c = find_omp_clause (clauses, OMP_CLAUSE_DEVICE);
13600 if (c)
13602 /* Even if we pass it to all library function calls, it is currently only
13603 defined/used for the OpenMP target ones. */
13604 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
13605 || start_ix == BUILT_IN_GOMP_TARGET_DATA
13606 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
13607 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
13609 device = OMP_CLAUSE_DEVICE_ID (c);
13610 clause_loc = OMP_CLAUSE_LOCATION (c);
13612 else
13613 clause_loc = gimple_location (entry_stmt);
13615 c = find_omp_clause (clauses, OMP_CLAUSE_NOWAIT);
13616 if (c)
13617 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
13619 /* Ensure 'device' is of the correct type. */
13620 device = fold_convert_loc (clause_loc, integer_type_node, device);
13622 /* If we found the clause 'if (cond)', build
13623 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
13624 if (cond)
13626 cond = gimple_boolify (cond);
13628 basic_block cond_bb, then_bb, else_bb;
13629 edge e;
13630 tree tmp_var;
13632 tmp_var = create_tmp_var (TREE_TYPE (device));
13633 if (offloaded)
13634 e = split_block_after_labels (new_bb);
13635 else
13637 gsi = gsi_last_bb (new_bb);
13638 gsi_prev (&gsi);
13639 e = split_block (new_bb, gsi_stmt (gsi));
13641 cond_bb = e->src;
13642 new_bb = e->dest;
13643 remove_edge (e);
13645 then_bb = create_empty_bb (cond_bb);
13646 else_bb = create_empty_bb (then_bb);
13647 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
13648 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
13650 stmt = gimple_build_cond_empty (cond);
13651 gsi = gsi_last_bb (cond_bb);
13652 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13654 gsi = gsi_start_bb (then_bb);
13655 stmt = gimple_build_assign (tmp_var, device);
13656 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13658 gsi = gsi_start_bb (else_bb);
13659 stmt = gimple_build_assign (tmp_var,
13660 build_int_cst (integer_type_node,
13661 GOMP_DEVICE_HOST_FALLBACK));
13662 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13664 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
13665 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
13666 add_bb_to_loop (then_bb, cond_bb->loop_father);
13667 add_bb_to_loop (else_bb, cond_bb->loop_father);
13668 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
13669 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
13671 device = tmp_var;
13672 gsi = gsi_last_bb (new_bb);
13674 else
13676 gsi = gsi_last_bb (new_bb);
13677 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
13678 true, GSI_SAME_STMT);
13681 t = gimple_omp_target_data_arg (entry_stmt);
13682 if (t == NULL)
13684 t1 = size_zero_node;
13685 t2 = build_zero_cst (ptr_type_node);
13686 t3 = t2;
13687 t4 = t2;
13689 else
13691 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
13692 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
13693 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
13694 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
13695 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
13698 gimple *g;
13699 bool tagging = false;
13700 /* The maximum number used by any start_ix, without varargs. */
13701 auto_vec<tree, 11> args;
13702 args.quick_push (device);
13703 if (offloaded)
13704 args.quick_push (build_fold_addr_expr (child_fn));
13705 args.quick_push (t1);
13706 args.quick_push (t2);
13707 args.quick_push (t3);
13708 args.quick_push (t4);
13709 switch (start_ix)
13711 case BUILT_IN_GOACC_DATA_START:
13712 case BUILT_IN_GOACC_DECLARE:
13713 case BUILT_IN_GOMP_TARGET_DATA:
13714 break;
13715 case BUILT_IN_GOMP_TARGET:
13716 case BUILT_IN_GOMP_TARGET_UPDATE:
13717 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
13718 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
13719 c = find_omp_clause (clauses, OMP_CLAUSE_DEPEND);
13720 if (c)
13721 depend = OMP_CLAUSE_DECL (c);
13722 else
13723 depend = build_int_cst (ptr_type_node, 0);
13724 args.quick_push (depend);
13725 if (start_ix == BUILT_IN_GOMP_TARGET)
13726 args.quick_push (get_target_arguments (&gsi, entry_stmt));
13727 break;
13728 case BUILT_IN_GOACC_PARALLEL:
13730 set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
13731 tagging = true;
13733 /* FALLTHRU */
13734 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
13735 case BUILT_IN_GOACC_UPDATE:
13737 tree t_async = NULL_TREE;
13739 /* If present, use the value specified by the respective
13740 clause, making sure that is of the correct type. */
13741 c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC);
13742 if (c)
13743 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
13744 integer_type_node,
13745 OMP_CLAUSE_ASYNC_EXPR (c));
13746 else if (!tagging)
13747 /* Default values for t_async. */
13748 t_async = fold_convert_loc (gimple_location (entry_stmt),
13749 integer_type_node,
13750 build_int_cst (integer_type_node,
13751 GOMP_ASYNC_SYNC));
13752 if (tagging && t_async)
13754 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
13756 if (TREE_CODE (t_async) == INTEGER_CST)
13758 /* See if we can pack the async arg in to the tag's
13759 operand. */
13760 i_async = TREE_INT_CST_LOW (t_async);
13761 if (i_async < GOMP_LAUNCH_OP_MAX)
13762 t_async = NULL_TREE;
13763 else
13764 i_async = GOMP_LAUNCH_OP_MAX;
13766 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
13767 i_async));
13769 if (t_async)
13770 args.safe_push (t_async);
13772 /* Save the argument index, and ... */
13773 unsigned t_wait_idx = args.length ();
13774 unsigned num_waits = 0;
13775 c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
13776 if (!tagging || c)
13777 /* ... push a placeholder. */
13778 args.safe_push (integer_zero_node);
13780 for (; c; c = OMP_CLAUSE_CHAIN (c))
13781 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
13783 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
13784 integer_type_node,
13785 OMP_CLAUSE_WAIT_EXPR (c)));
13786 num_waits++;
13789 if (!tagging || num_waits)
13791 tree len;
13793 /* Now that we know the number, update the placeholder. */
13794 if (tagging)
13795 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
13796 else
13797 len = build_int_cst (integer_type_node, num_waits);
13798 len = fold_convert_loc (gimple_location (entry_stmt),
13799 unsigned_type_node, len);
13800 args[t_wait_idx] = len;
13803 break;
13804 default:
13805 gcc_unreachable ();
13807 if (tagging)
13808 /* Push terminal marker - zero. */
13809 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
13811 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
13812 gimple_set_location (g, gimple_location (entry_stmt));
13813 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
13814 if (!offloaded)
13816 g = gsi_stmt (gsi);
13817 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
13818 gsi_remove (&gsi, true);
13820 if (data_region && region->exit)
13822 gsi = gsi_last_bb (region->exit);
13823 g = gsi_stmt (gsi);
13824 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
13825 gsi_remove (&gsi, true);
13829 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
13830 iteration variable derived from the thread number. INTRA_GROUP means this
13831 is an expansion of a loop iterating over work-items within a separate
13832 iteration over groups. */
13834 static void
13835 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
13837 gimple_stmt_iterator gsi;
13838 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
13839 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
13840 == GF_OMP_FOR_KIND_GRID_LOOP);
13841 size_t collapse = gimple_omp_for_collapse (for_stmt);
13842 struct omp_for_data_loop *loops
13843 = XALLOCAVEC (struct omp_for_data_loop,
13844 gimple_omp_for_collapse (for_stmt));
13845 struct omp_for_data fd;
13847 remove_edge (BRANCH_EDGE (kfor->entry));
13848 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
13850 gcc_assert (kfor->cont);
13851 extract_omp_for_data (for_stmt, &fd, loops);
13853 gsi = gsi_start_bb (body_bb);
13855 for (size_t dim = 0; dim < collapse; dim++)
13857 tree type, itype;
13858 itype = type = TREE_TYPE (fd.loops[dim].v);
13859 if (POINTER_TYPE_P (type))
13860 itype = signed_type_for (type);
13862 tree n1 = fd.loops[dim].n1;
13863 tree step = fd.loops[dim].step;
13864 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
13865 true, NULL_TREE, true, GSI_SAME_STMT);
13866 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
13867 true, NULL_TREE, true, GSI_SAME_STMT);
13868 tree threadid;
13869 if (gimple_omp_for_grid_group_iter (for_stmt))
13871 gcc_checking_assert (!intra_group);
13872 threadid = build_call_expr (builtin_decl_explicit
13873 (BUILT_IN_HSA_WORKGROUPID), 1,
13874 build_int_cstu (unsigned_type_node, dim));
13876 else if (intra_group)
13877 threadid = build_call_expr (builtin_decl_explicit
13878 (BUILT_IN_HSA_WORKITEMID), 1,
13879 build_int_cstu (unsigned_type_node, dim));
13880 else
13881 threadid = build_call_expr (builtin_decl_explicit
13882 (BUILT_IN_HSA_WORKITEMABSID), 1,
13883 build_int_cstu (unsigned_type_node, dim));
13884 threadid = fold_convert (itype, threadid);
13885 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
13886 true, GSI_SAME_STMT);
13888 tree startvar = fd.loops[dim].v;
13889 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
13890 if (POINTER_TYPE_P (type))
13891 t = fold_build_pointer_plus (n1, t);
13892 else
13893 t = fold_build2 (PLUS_EXPR, type, t, n1);
13894 t = fold_convert (type, t);
13895 t = force_gimple_operand_gsi (&gsi, t,
13896 DECL_P (startvar)
13897 && TREE_ADDRESSABLE (startvar),
13898 NULL_TREE, true, GSI_SAME_STMT);
13899 gassign *assign_stmt = gimple_build_assign (startvar, t);
13900 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
13902 /* Remove the omp for statement */
13903 gsi = gsi_last_bb (kfor->entry);
13904 gsi_remove (&gsi, true);
13906 /* Remove the GIMPLE_OMP_CONTINUE statement. */
13907 gsi = gsi_last_bb (kfor->cont);
13908 gcc_assert (!gsi_end_p (gsi)
13909 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
13910 gsi_remove (&gsi, true);
13912 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
13913 gsi = gsi_last_bb (kfor->exit);
13914 gcc_assert (!gsi_end_p (gsi)
13915 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13916 if (intra_group)
13917 gsi_insert_before (&gsi, build_omp_barrier (NULL_TREE), GSI_SAME_STMT);
13918 gsi_remove (&gsi, true);
13920 /* Fixup the much simpler CFG. */
13921 remove_edge (find_edge (kfor->cont, body_bb));
13923 if (kfor->cont != body_bb)
13924 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
13925 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
13928 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
13929 argument_decls. */
13931 struct grid_arg_decl_map
13933 tree old_arg;
13934 tree new_arg;
13937 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
13938 pertaining to kernel function. */
13940 static tree
13941 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
13943 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
13944 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
13945 tree t = *tp;
13947 if (t == adm->old_arg)
13948 *tp = adm->new_arg;
13949 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
13950 return NULL_TREE;
13953 static void expand_omp (struct omp_region *region);
13955 /* If TARGET region contains a kernel body for loop, remove its region from the
13956 TARGET and expand it in HSA gridified kernel fashion. */
13958 static void
13959 grid_expand_target_grid_body (struct omp_region *target)
13961 if (!hsa_gen_requested_p ())
13962 return;
13964 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
13965 struct omp_region **pp;
13967 for (pp = &target->inner; *pp; pp = &(*pp)->next)
13968 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
13969 break;
13971 struct omp_region *gpukernel = *pp;
13973 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
13974 if (!gpukernel)
13976 /* HSA cannot handle OACC stuff. */
13977 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
13978 return;
13979 gcc_checking_assert (orig_child_fndecl);
13980 gcc_assert (!find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
13981 OMP_CLAUSE__GRIDDIM_));
13982 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
13984 hsa_register_kernel (n);
13985 return;
13988 gcc_assert (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
13989 OMP_CLAUSE__GRIDDIM_));
13990 tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry)));
13991 *pp = gpukernel->next;
13992 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
13993 if ((*pp)->type == GIMPLE_OMP_FOR)
13994 break;
13996 struct omp_region *kfor = *pp;
13997 gcc_assert (kfor);
13998 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
13999 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
14000 *pp = kfor->next;
14001 if (kfor->inner)
14003 if (gimple_omp_for_grid_group_iter (for_stmt))
14005 struct omp_region **next_pp;
14006 for (pp = &kfor->inner; *pp; pp = next_pp)
14008 next_pp = &(*pp)->next;
14009 if ((*pp)->type != GIMPLE_OMP_FOR)
14010 continue;
14011 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
14012 gcc_assert (gimple_omp_for_kind (inner)
14013 == GF_OMP_FOR_KIND_GRID_LOOP);
14014 grid_expand_omp_for_loop (*pp, true);
14015 *pp = (*pp)->next;
14016 next_pp = pp;
14019 expand_omp (kfor->inner);
14021 if (gpukernel->inner)
14022 expand_omp (gpukernel->inner);
14024 tree kern_fndecl = copy_node (orig_child_fndecl);
14025 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
14026 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
14027 tree tgtblock = gimple_block (tgt_stmt);
14028 tree fniniblock = make_node (BLOCK);
14029 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
14030 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
14031 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
14032 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
14033 DECL_INITIAL (kern_fndecl) = fniniblock;
14034 push_struct_function (kern_fndecl);
14035 cfun->function_end_locus = gimple_location (tgt_stmt);
14036 init_tree_ssa (cfun);
14037 pop_cfun ();
14039 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
14040 gcc_assert (!DECL_CHAIN (old_parm_decl));
14041 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
14042 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
14043 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
14044 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
14045 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
14046 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
14047 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
14048 kern_cfun->curr_properties = cfun->curr_properties;
14050 grid_expand_omp_for_loop (kfor, false);
14052 /* Remove the omp for statement */
14053 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
14054 gsi_remove (&gsi, true);
14055 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
14056 return. */
14057 gsi = gsi_last_bb (gpukernel->exit);
14058 gcc_assert (!gsi_end_p (gsi)
14059 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
14060 gimple *ret_stmt = gimple_build_return (NULL);
14061 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
14062 gsi_remove (&gsi, true);
14064 /* Statements in the first BB in the target construct have been produced by
14065 target lowering and must be copied inside the GPUKERNEL, with the two
14066 exceptions of the first OMP statement and the OMP_DATA assignment
14067 statement. */
14068 gsi = gsi_start_bb (single_succ (gpukernel->entry));
14069 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
14070 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
14071 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
14072 !gsi_end_p (tsi); gsi_next (&tsi))
14074 gimple *stmt = gsi_stmt (tsi);
14075 if (is_gimple_omp (stmt))
14076 break;
14077 if (sender
14078 && is_gimple_assign (stmt)
14079 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
14080 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
14081 continue;
14082 gimple *copy = gimple_copy (stmt);
14083 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
14084 gimple_set_block (copy, fniniblock);
14087 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
14088 gpukernel->exit, inside_block);
14090 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
14091 kcn->mark_force_output ();
14092 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
14094 hsa_register_kernel (kcn, orig_child);
14096 cgraph_node::add_new_function (kern_fndecl, true);
14097 push_cfun (kern_cfun);
14098 cgraph_edge::rebuild_edges ();
14100 /* Re-map any mention of the PARM_DECL of the original function to the
14101 PARM_DECL of the new one.
14103 TODO: It would be great if lowering produced references into the GPU
14104 kernel decl straight away and we did not have to do this. */
14105 struct grid_arg_decl_map adm;
14106 adm.old_arg = old_parm_decl;
14107 adm.new_arg = new_parm_decl;
14108 basic_block bb;
14109 FOR_EACH_BB_FN (bb, kern_cfun)
14111 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
14113 gimple *stmt = gsi_stmt (gsi);
14114 struct walk_stmt_info wi;
14115 memset (&wi, 0, sizeof (wi));
14116 wi.info = &adm;
14117 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
14120 pop_cfun ();
14122 return;
14125 /* Expand the parallel region tree rooted at REGION. Expansion
14126 proceeds in depth-first order. Innermost regions are expanded
14127 first. This way, parallel regions that require a new function to
14128 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
14129 internal dependencies in their body. */
14131 static void
14132 expand_omp (struct omp_region *region)
14134 omp_any_child_fn_dumped = false;
14135 while (region)
14137 location_t saved_location;
14138 gimple *inner_stmt = NULL;
14140 /* First, determine whether this is a combined parallel+workshare
14141 region. */
14142 if (region->type == GIMPLE_OMP_PARALLEL)
14143 determine_parallel_type (region);
14144 else if (region->type == GIMPLE_OMP_TARGET)
14145 grid_expand_target_grid_body (region);
14147 if (region->type == GIMPLE_OMP_FOR
14148 && gimple_omp_for_combined_p (last_stmt (region->entry)))
14149 inner_stmt = last_stmt (region->inner->entry);
14151 if (region->inner)
14152 expand_omp (region->inner);
14154 saved_location = input_location;
14155 if (gimple_has_location (last_stmt (region->entry)))
14156 input_location = gimple_location (last_stmt (region->entry));
14158 switch (region->type)
14160 case GIMPLE_OMP_PARALLEL:
14161 case GIMPLE_OMP_TASK:
14162 expand_omp_taskreg (region);
14163 break;
14165 case GIMPLE_OMP_FOR:
14166 expand_omp_for (region, inner_stmt);
14167 break;
14169 case GIMPLE_OMP_SECTIONS:
14170 expand_omp_sections (region);
14171 break;
14173 case GIMPLE_OMP_SECTION:
14174 /* Individual omp sections are handled together with their
14175 parent GIMPLE_OMP_SECTIONS region. */
14176 break;
14178 case GIMPLE_OMP_SINGLE:
14179 expand_omp_single (region);
14180 break;
14182 case GIMPLE_OMP_ORDERED:
14184 gomp_ordered *ord_stmt
14185 = as_a <gomp_ordered *> (last_stmt (region->entry));
14186 if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
14187 OMP_CLAUSE_DEPEND))
14189 /* We'll expand these when expanding corresponding
14190 worksharing region with ordered(n) clause. */
14191 gcc_assert (region->outer
14192 && region->outer->type == GIMPLE_OMP_FOR);
14193 region->ord_stmt = ord_stmt;
14194 break;
14197 /* FALLTHRU */
14198 case GIMPLE_OMP_MASTER:
14199 case GIMPLE_OMP_TASKGROUP:
14200 case GIMPLE_OMP_CRITICAL:
14201 case GIMPLE_OMP_TEAMS:
14202 expand_omp_synch (region);
14203 break;
14205 case GIMPLE_OMP_ATOMIC_LOAD:
14206 expand_omp_atomic (region);
14207 break;
14209 case GIMPLE_OMP_TARGET:
14210 expand_omp_target (region);
14211 break;
14213 default:
14214 gcc_unreachable ();
14217 input_location = saved_location;
14218 region = region->next;
14220 if (omp_any_child_fn_dumped)
14222 if (dump_file)
14223 dump_function_header (dump_file, current_function_decl, dump_flags);
14224 omp_any_child_fn_dumped = false;
14228 /* Helper for build_omp_regions. Scan the dominator tree starting at
14229 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
14230 true, the function ends once a single tree is built (otherwise, whole
14231 forest of OMP constructs may be built). */
14233 static void
14234 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
14235 bool single_tree)
14237 gimple_stmt_iterator gsi;
14238 gimple *stmt;
14239 basic_block son;
14241 gsi = gsi_last_bb (bb);
14242 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
14244 struct omp_region *region;
14245 enum gimple_code code;
14247 stmt = gsi_stmt (gsi);
14248 code = gimple_code (stmt);
14249 if (code == GIMPLE_OMP_RETURN)
14251 /* STMT is the return point out of region PARENT. Mark it
14252 as the exit point and make PARENT the immediately
14253 enclosing region. */
14254 gcc_assert (parent);
14255 region = parent;
14256 region->exit = bb;
14257 parent = parent->outer;
14259 else if (code == GIMPLE_OMP_ATOMIC_STORE)
14261 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
14262 GIMPLE_OMP_RETURN, but matches with
14263 GIMPLE_OMP_ATOMIC_LOAD. */
14264 gcc_assert (parent);
14265 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
14266 region = parent;
14267 region->exit = bb;
14268 parent = parent->outer;
14270 else if (code == GIMPLE_OMP_CONTINUE)
14272 gcc_assert (parent);
14273 parent->cont = bb;
14275 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
14277 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
14278 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
14280 else
14282 region = new_omp_region (bb, code, parent);
14283 /* Otherwise... */
14284 if (code == GIMPLE_OMP_TARGET)
14286 switch (gimple_omp_target_kind (stmt))
14288 case GF_OMP_TARGET_KIND_REGION:
14289 case GF_OMP_TARGET_KIND_DATA:
14290 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
14291 case GF_OMP_TARGET_KIND_OACC_KERNELS:
14292 case GF_OMP_TARGET_KIND_OACC_DATA:
14293 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
14294 break;
14295 case GF_OMP_TARGET_KIND_UPDATE:
14296 case GF_OMP_TARGET_KIND_ENTER_DATA:
14297 case GF_OMP_TARGET_KIND_EXIT_DATA:
14298 case GF_OMP_TARGET_KIND_OACC_UPDATE:
14299 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
14300 case GF_OMP_TARGET_KIND_OACC_DECLARE:
14301 /* ..., other than for those stand-alone directives... */
14302 region = NULL;
14303 break;
14304 default:
14305 gcc_unreachable ();
14308 else if (code == GIMPLE_OMP_ORDERED
14309 && find_omp_clause (gimple_omp_ordered_clauses
14310 (as_a <gomp_ordered *> (stmt)),
14311 OMP_CLAUSE_DEPEND))
14312 /* #pragma omp ordered depend is also just a stand-alone
14313 directive. */
14314 region = NULL;
14315 /* ..., this directive becomes the parent for a new region. */
14316 if (region)
14317 parent = region;
14321 if (single_tree && !parent)
14322 return;
14324 for (son = first_dom_son (CDI_DOMINATORS, bb);
14325 son;
14326 son = next_dom_son (CDI_DOMINATORS, son))
14327 build_omp_regions_1 (son, parent, single_tree);
14330 /* Builds the tree of OMP regions rooted at ROOT, storing it to
14331 root_omp_region. */
14333 static void
14334 build_omp_regions_root (basic_block root)
14336 gcc_assert (root_omp_region == NULL);
14337 build_omp_regions_1 (root, NULL, true);
14338 gcc_assert (root_omp_region != NULL);
14341 /* Expands omp construct (and its subconstructs) starting in HEAD. */
14343 void
14344 omp_expand_local (basic_block head)
14346 build_omp_regions_root (head);
14347 if (dump_file && (dump_flags & TDF_DETAILS))
14349 fprintf (dump_file, "\nOMP region tree\n\n");
14350 dump_omp_region (dump_file, root_omp_region, 0);
14351 fprintf (dump_file, "\n");
14354 remove_exit_barriers (root_omp_region);
14355 expand_omp (root_omp_region);
14357 free_omp_regions ();
14360 /* Scan the CFG and build a tree of OMP regions. Return the root of
14361 the OMP region tree. */
14363 static void
14364 build_omp_regions (void)
14366 gcc_assert (root_omp_region == NULL);
14367 calculate_dominance_info (CDI_DOMINATORS);
14368 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
14371 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
14373 static unsigned int
14374 execute_expand_omp (void)
14376 build_omp_regions ();
14378 if (!root_omp_region)
14379 return 0;
14381 if (dump_file)
14383 fprintf (dump_file, "\nOMP region tree\n\n");
14384 dump_omp_region (dump_file, root_omp_region, 0);
14385 fprintf (dump_file, "\n");
14388 remove_exit_barriers (root_omp_region);
14390 expand_omp (root_omp_region);
14392 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
14393 verify_loop_structure ();
14394 cleanup_tree_cfg ();
14396 free_omp_regions ();
14398 return 0;
14401 /* OMP expansion -- the default pass, run before creation of SSA form. */
14403 namespace {
14405 const pass_data pass_data_expand_omp =
14407 GIMPLE_PASS, /* type */
14408 "ompexp", /* name */
14409 OPTGROUP_OPENMP, /* optinfo_flags */
14410 TV_NONE, /* tv_id */
14411 PROP_gimple_any, /* properties_required */
14412 PROP_gimple_eomp, /* properties_provided */
14413 0, /* properties_destroyed */
14414 0, /* todo_flags_start */
14415 0, /* todo_flags_finish */
14418 class pass_expand_omp : public gimple_opt_pass
14420 public:
14421 pass_expand_omp (gcc::context *ctxt)
14422 : gimple_opt_pass (pass_data_expand_omp, ctxt)
14425 /* opt_pass methods: */
14426 virtual unsigned int execute (function *)
14428 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
14429 || flag_openmp_simd != 0)
14430 && !seen_error ());
14432 /* This pass always runs, to provide PROP_gimple_eomp.
14433 But often, there is nothing to do. */
14434 if (!gate)
14435 return 0;
14437 return execute_expand_omp ();
14440 }; // class pass_expand_omp
14442 } // anon namespace
14444 gimple_opt_pass *
14445 make_pass_expand_omp (gcc::context *ctxt)
14447 return new pass_expand_omp (ctxt);
14450 namespace {
14452 const pass_data pass_data_expand_omp_ssa =
14454 GIMPLE_PASS, /* type */
14455 "ompexpssa", /* name */
14456 OPTGROUP_OPENMP, /* optinfo_flags */
14457 TV_NONE, /* tv_id */
14458 PROP_cfg | PROP_ssa, /* properties_required */
14459 PROP_gimple_eomp, /* properties_provided */
14460 0, /* properties_destroyed */
14461 0, /* todo_flags_start */
14462 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
14465 class pass_expand_omp_ssa : public gimple_opt_pass
14467 public:
14468 pass_expand_omp_ssa (gcc::context *ctxt)
14469 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
14472 /* opt_pass methods: */
14473 virtual bool gate (function *fun)
14475 return !(fun->curr_properties & PROP_gimple_eomp);
14477 virtual unsigned int execute (function *) { return execute_expand_omp (); }
14478 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
14480 }; // class pass_expand_omp_ssa
14482 } // anon namespace
14484 gimple_opt_pass *
14485 make_pass_expand_omp_ssa (gcc::context *ctxt)
14487 return new pass_expand_omp_ssa (ctxt);
14490 /* Routines to lower OMP directives into OMP-GIMPLE. */
14492 /* If ctx is a worksharing context inside of a cancellable parallel
14493 region and it isn't nowait, add lhs to its GIMPLE_OMP_RETURN
14494 and conditional branch to parallel's cancel_label to handle
14495 cancellation in the implicit barrier. */
14497 static void
14498 maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple_seq *body)
14500 gimple *omp_return = gimple_seq_last_stmt (*body);
14501 gcc_assert (gimple_code (omp_return) == GIMPLE_OMP_RETURN);
14502 if (gimple_omp_return_nowait_p (omp_return))
14503 return;
14504 if (ctx->outer
14505 && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_PARALLEL
14506 && ctx->outer->cancellable)
14508 tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL);
14509 tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl));
14510 tree lhs = create_tmp_var (c_bool_type);
14511 gimple_omp_return_set_lhs (omp_return, lhs);
14512 tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
14513 gimple *g = gimple_build_cond (NE_EXPR, lhs,
14514 fold_convert (c_bool_type,
14515 boolean_false_node),
14516 ctx->outer->cancel_label, fallthru_label);
14517 gimple_seq_add_stmt (body, g);
14518 gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
14522 /* Lower the OpenMP sections directive in the current statement in GSI_P.
14523 CTX is the enclosing OMP context for the current statement. */
14525 static void
14526 lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14528 tree block, control;
14529 gimple_stmt_iterator tgsi;
14530 gomp_sections *stmt;
14531 gimple *t;
14532 gbind *new_stmt, *bind;
14533 gimple_seq ilist, dlist, olist, new_body;
14535 stmt = as_a <gomp_sections *> (gsi_stmt (*gsi_p));
14537 push_gimplify_context ();
14539 dlist = NULL;
14540 ilist = NULL;
14541 lower_rec_input_clauses (gimple_omp_sections_clauses (stmt),
14542 &ilist, &dlist, ctx, NULL);
14544 new_body = gimple_omp_body (stmt);
14545 gimple_omp_set_body (stmt, NULL);
14546 tgsi = gsi_start (new_body);
14547 for (; !gsi_end_p (tgsi); gsi_next (&tgsi))
14549 omp_context *sctx;
14550 gimple *sec_start;
14552 sec_start = gsi_stmt (tgsi);
14553 sctx = maybe_lookup_ctx (sec_start);
14554 gcc_assert (sctx);
14556 lower_omp (gimple_omp_body_ptr (sec_start), sctx);
14557 gsi_insert_seq_after (&tgsi, gimple_omp_body (sec_start),
14558 GSI_CONTINUE_LINKING);
14559 gimple_omp_set_body (sec_start, NULL);
14561 if (gsi_one_before_end_p (tgsi))
14563 gimple_seq l = NULL;
14564 lower_lastprivate_clauses (gimple_omp_sections_clauses (stmt), NULL,
14565 &l, ctx);
14566 gsi_insert_seq_after (&tgsi, l, GSI_CONTINUE_LINKING);
14567 gimple_omp_section_set_last (sec_start);
14570 gsi_insert_after (&tgsi, gimple_build_omp_return (false),
14571 GSI_CONTINUE_LINKING);
14574 block = make_node (BLOCK);
14575 bind = gimple_build_bind (NULL, new_body, block);
14577 olist = NULL;
14578 lower_reduction_clauses (gimple_omp_sections_clauses (stmt), &olist, ctx);
14580 block = make_node (BLOCK);
14581 new_stmt = gimple_build_bind (NULL, NULL, block);
14582 gsi_replace (gsi_p, new_stmt, true);
14584 pop_gimplify_context (new_stmt);
14585 gimple_bind_append_vars (new_stmt, ctx->block_vars);
14586 BLOCK_VARS (block) = gimple_bind_vars (bind);
14587 if (BLOCK_VARS (block))
14588 TREE_USED (block) = 1;
14590 new_body = NULL;
14591 gimple_seq_add_seq (&new_body, ilist);
14592 gimple_seq_add_stmt (&new_body, stmt);
14593 gimple_seq_add_stmt (&new_body, gimple_build_omp_sections_switch ());
14594 gimple_seq_add_stmt (&new_body, bind);
14596 control = create_tmp_var (unsigned_type_node, ".section");
14597 t = gimple_build_omp_continue (control, control);
14598 gimple_omp_sections_set_control (stmt, control);
14599 gimple_seq_add_stmt (&new_body, t);
14601 gimple_seq_add_seq (&new_body, olist);
14602 if (ctx->cancellable)
14603 gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
14604 gimple_seq_add_seq (&new_body, dlist);
14606 new_body = maybe_catch_exception (new_body);
14608 t = gimple_build_omp_return
14609 (!!find_omp_clause (gimple_omp_sections_clauses (stmt),
14610 OMP_CLAUSE_NOWAIT));
14611 gimple_seq_add_stmt (&new_body, t);
14612 maybe_add_implicit_barrier_cancel (ctx, &new_body);
14614 gimple_bind_set_body (new_stmt, new_body);
14618 /* A subroutine of lower_omp_single. Expand the simple form of
14619 a GIMPLE_OMP_SINGLE, without a copyprivate clause:
14621 if (GOMP_single_start ())
14622 BODY;
14623 [ GOMP_barrier (); ] -> unless 'nowait' is present.
14625 FIXME. It may be better to delay expanding the logic of this until
14626 pass_expand_omp. The expanded logic may make the job more difficult
14627 to a synchronization analysis pass. */
14629 static void
14630 lower_omp_single_simple (gomp_single *single_stmt, gimple_seq *pre_p)
14632 location_t loc = gimple_location (single_stmt);
14633 tree tlabel = create_artificial_label (loc);
14634 tree flabel = create_artificial_label (loc);
14635 gimple *call, *cond;
14636 tree lhs, decl;
14638 decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_START);
14639 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
14640 call = gimple_build_call (decl, 0);
14641 gimple_call_set_lhs (call, lhs);
14642 gimple_seq_add_stmt (pre_p, call);
14644 cond = gimple_build_cond (EQ_EXPR, lhs,
14645 fold_convert_loc (loc, TREE_TYPE (lhs),
14646 boolean_true_node),
14647 tlabel, flabel);
14648 gimple_seq_add_stmt (pre_p, cond);
14649 gimple_seq_add_stmt (pre_p, gimple_build_label (tlabel));
14650 gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
14651 gimple_seq_add_stmt (pre_p, gimple_build_label (flabel));
14655 /* A subroutine of lower_omp_single. Expand the simple form of
14656 a GIMPLE_OMP_SINGLE, with a copyprivate clause:
14658 #pragma omp single copyprivate (a, b, c)
14660 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
14663 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
14665 BODY;
14666 copyout.a = a;
14667 copyout.b = b;
14668 copyout.c = c;
14669 GOMP_single_copy_end (&copyout);
14671 else
14673 a = copyout_p->a;
14674 b = copyout_p->b;
14675 c = copyout_p->c;
14677 GOMP_barrier ();
14680 FIXME. It may be better to delay expanding the logic of this until
14681 pass_expand_omp. The expanded logic may make the job more difficult
14682 to a synchronization analysis pass. */
14684 static void
14685 lower_omp_single_copy (gomp_single *single_stmt, gimple_seq *pre_p,
14686 omp_context *ctx)
14688 tree ptr_type, t, l0, l1, l2, bfn_decl;
14689 gimple_seq copyin_seq;
14690 location_t loc = gimple_location (single_stmt);
14692 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
14694 ptr_type = build_pointer_type (ctx->record_type);
14695 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
14697 l0 = create_artificial_label (loc);
14698 l1 = create_artificial_label (loc);
14699 l2 = create_artificial_label (loc);
14701 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_COPY_START);
14702 t = build_call_expr_loc (loc, bfn_decl, 0);
14703 t = fold_convert_loc (loc, ptr_type, t);
14704 gimplify_assign (ctx->receiver_decl, t, pre_p);
14706 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
14707 build_int_cst (ptr_type, 0));
14708 t = build3 (COND_EXPR, void_type_node, t,
14709 build_and_jump (&l0), build_and_jump (&l1));
14710 gimplify_and_add (t, pre_p);
14712 gimple_seq_add_stmt (pre_p, gimple_build_label (l0));
14714 gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
14716 copyin_seq = NULL;
14717 lower_copyprivate_clauses (gimple_omp_single_clauses (single_stmt), pre_p,
14718 &copyin_seq, ctx);
14720 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
14721 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_COPY_END);
14722 t = build_call_expr_loc (loc, bfn_decl, 1, t);
14723 gimplify_and_add (t, pre_p);
14725 t = build_and_jump (&l2);
14726 gimplify_and_add (t, pre_p);
14728 gimple_seq_add_stmt (pre_p, gimple_build_label (l1));
14730 gimple_seq_add_seq (pre_p, copyin_seq);
14732 gimple_seq_add_stmt (pre_p, gimple_build_label (l2));
14736 /* Expand code for an OpenMP single directive. */
14738 static void
14739 lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14741 tree block;
14742 gimple *t;
14743 gomp_single *single_stmt = as_a <gomp_single *> (gsi_stmt (*gsi_p));
14744 gbind *bind;
14745 gimple_seq bind_body, bind_body_tail = NULL, dlist;
14747 push_gimplify_context ();
14749 block = make_node (BLOCK);
14750 bind = gimple_build_bind (NULL, NULL, block);
14751 gsi_replace (gsi_p, bind, true);
14752 bind_body = NULL;
14753 dlist = NULL;
14754 lower_rec_input_clauses (gimple_omp_single_clauses (single_stmt),
14755 &bind_body, &dlist, ctx, NULL);
14756 lower_omp (gimple_omp_body_ptr (single_stmt), ctx);
14758 gimple_seq_add_stmt (&bind_body, single_stmt);
14760 if (ctx->record_type)
14761 lower_omp_single_copy (single_stmt, &bind_body, ctx);
14762 else
14763 lower_omp_single_simple (single_stmt, &bind_body);
14765 gimple_omp_set_body (single_stmt, NULL);
14767 gimple_seq_add_seq (&bind_body, dlist);
14769 bind_body = maybe_catch_exception (bind_body);
14771 t = gimple_build_omp_return
14772 (!!find_omp_clause (gimple_omp_single_clauses (single_stmt),
14773 OMP_CLAUSE_NOWAIT));
14774 gimple_seq_add_stmt (&bind_body_tail, t);
14775 maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail);
14776 if (ctx->record_type)
14778 gimple_stmt_iterator gsi = gsi_start (bind_body_tail);
14779 tree clobber = build_constructor (ctx->record_type, NULL);
14780 TREE_THIS_VOLATILE (clobber) = 1;
14781 gsi_insert_after (&gsi, gimple_build_assign (ctx->sender_decl,
14782 clobber), GSI_SAME_STMT);
14784 gimple_seq_add_seq (&bind_body, bind_body_tail);
14785 gimple_bind_set_body (bind, bind_body);
14787 pop_gimplify_context (bind);
14789 gimple_bind_append_vars (bind, ctx->block_vars);
14790 BLOCK_VARS (block) = ctx->block_vars;
14791 if (BLOCK_VARS (block))
14792 TREE_USED (block) = 1;
14796 /* Expand code for an OpenMP master directive. */
14798 static void
14799 lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14801 tree block, lab = NULL, x, bfn_decl;
14802 gimple *stmt = gsi_stmt (*gsi_p);
14803 gbind *bind;
14804 location_t loc = gimple_location (stmt);
14805 gimple_seq tseq;
14807 push_gimplify_context ();
14809 block = make_node (BLOCK);
14810 bind = gimple_build_bind (NULL, NULL, block);
14811 gsi_replace (gsi_p, bind, true);
14812 gimple_bind_add_stmt (bind, stmt);
14814 bfn_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
14815 x = build_call_expr_loc (loc, bfn_decl, 0);
14816 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
14817 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
14818 tseq = NULL;
14819 gimplify_and_add (x, &tseq);
14820 gimple_bind_add_seq (bind, tseq);
14822 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14823 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
14824 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14825 gimple_omp_set_body (stmt, NULL);
14827 gimple_bind_add_stmt (bind, gimple_build_label (lab));
14829 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14831 pop_gimplify_context (bind);
14833 gimple_bind_append_vars (bind, ctx->block_vars);
14834 BLOCK_VARS (block) = ctx->block_vars;
14838 /* Expand code for an OpenMP taskgroup directive. */
14840 static void
14841 lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14843 gimple *stmt = gsi_stmt (*gsi_p);
14844 gcall *x;
14845 gbind *bind;
14846 tree block = make_node (BLOCK);
14848 bind = gimple_build_bind (NULL, NULL, block);
14849 gsi_replace (gsi_p, bind, true);
14850 gimple_bind_add_stmt (bind, stmt);
14852 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_START),
14854 gimple_bind_add_stmt (bind, x);
14856 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14857 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14858 gimple_omp_set_body (stmt, NULL);
14860 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14862 gimple_bind_append_vars (bind, ctx->block_vars);
14863 BLOCK_VARS (block) = ctx->block_vars;
14867 /* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible. */
14869 static void
14870 lower_omp_ordered_clauses (gimple_stmt_iterator *gsi_p, gomp_ordered *ord_stmt,
14871 omp_context *ctx)
14873 struct omp_for_data fd;
14874 if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
14875 return;
14877 unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt);
14878 struct omp_for_data_loop *loops = XALLOCAVEC (struct omp_for_data_loop, len);
14879 extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
14880 if (!fd.ordered)
14881 return;
14883 tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
14884 tree c = gimple_omp_ordered_clauses (ord_stmt);
14885 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
14886 && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
14888 /* Merge depend clauses from multiple adjacent
14889 #pragma omp ordered depend(sink:...) constructs
14890 into one #pragma omp ordered depend(sink:...), so that
14891 we can optimize them together. */
14892 gimple_stmt_iterator gsi = *gsi_p;
14893 gsi_next (&gsi);
14894 while (!gsi_end_p (gsi))
14896 gimple *stmt = gsi_stmt (gsi);
14897 if (is_gimple_debug (stmt)
14898 || gimple_code (stmt) == GIMPLE_NOP)
14900 gsi_next (&gsi);
14901 continue;
14903 if (gimple_code (stmt) != GIMPLE_OMP_ORDERED)
14904 break;
14905 gomp_ordered *ord_stmt2 = as_a <gomp_ordered *> (stmt);
14906 c = gimple_omp_ordered_clauses (ord_stmt2);
14907 if (c == NULL_TREE
14908 || OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
14909 || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
14910 break;
14911 while (*list_p)
14912 list_p = &OMP_CLAUSE_CHAIN (*list_p);
14913 *list_p = c;
14914 gsi_remove (&gsi, true);
14918 /* Canonicalize sink dependence clauses into one folded clause if
14919 possible.
14921 The basic algorithm is to create a sink vector whose first
14922 element is the GCD of all the first elements, and whose remaining
14923 elements are the minimum of the subsequent columns.
14925 We ignore dependence vectors whose first element is zero because
14926 such dependencies are known to be executed by the same thread.
14928 We take into account the direction of the loop, so a minimum
14929 becomes a maximum if the loop is iterating forwards. We also
14930 ignore sink clauses where the loop direction is unknown, or where
14931 the offsets are clearly invalid because they are not a multiple
14932 of the loop increment.
14934 For example:
14936 #pragma omp for ordered(2)
14937 for (i=0; i < N; ++i)
14938 for (j=0; j < M; ++j)
14940 #pragma omp ordered \
14941 depend(sink:i-8,j-2) \
14942 depend(sink:i,j-1) \ // Completely ignored because i+0.
14943 depend(sink:i-4,j-3) \
14944 depend(sink:i-6,j-4)
14945 #pragma omp ordered depend(source)
14948 Folded clause is:
14950 depend(sink:-gcd(8,4,6),-min(2,3,4))
14951 -or-
14952 depend(sink:-2,-2)
14955 /* FIXME: Computing GCD's where the first element is zero is
14956 non-trivial in the presence of collapsed loops. Do this later. */
14957 if (fd.collapse > 1)
14958 return;
14960 wide_int *folded_deps = XALLOCAVEC (wide_int, 2 * len - 1);
14961 memset (folded_deps, 0, sizeof (*folded_deps) * (2 * len - 1));
14962 tree folded_dep = NULL_TREE;
14963 /* TRUE if the first dimension's offset is negative. */
14964 bool neg_offset_p = false;
14966 list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
14967 unsigned int i;
14968 while ((c = *list_p) != NULL)
14970 bool remove = false;
14972 gcc_assert (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND);
14973 if (OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
14974 goto next_ordered_clause;
14976 tree vec;
14977 for (vec = OMP_CLAUSE_DECL (c), i = 0;
14978 vec && TREE_CODE (vec) == TREE_LIST;
14979 vec = TREE_CHAIN (vec), ++i)
14981 gcc_assert (i < len);
14983 /* extract_omp_for_data has canonicalized the condition. */
14984 gcc_assert (fd.loops[i].cond_code == LT_EXPR
14985 || fd.loops[i].cond_code == GT_EXPR);
14986 bool forward = fd.loops[i].cond_code == LT_EXPR;
14987 bool maybe_lexically_later = true;
14989 /* While the committee makes up its mind, bail if we have any
14990 non-constant steps. */
14991 if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
14992 goto lower_omp_ordered_ret;
14994 tree itype = TREE_TYPE (TREE_VALUE (vec));
14995 if (POINTER_TYPE_P (itype))
14996 itype = sizetype;
14997 wide_int offset = wide_int::from (TREE_PURPOSE (vec),
14998 TYPE_PRECISION (itype),
14999 TYPE_SIGN (itype));
15001 /* Ignore invalid offsets that are not multiples of the step. */
15002 if (!wi::multiple_of_p
15003 (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
15004 UNSIGNED))
15006 warning_at (OMP_CLAUSE_LOCATION (c), 0,
15007 "ignoring sink clause with offset that is not "
15008 "a multiple of the loop step");
15009 remove = true;
15010 goto next_ordered_clause;
15013 /* Calculate the first dimension. The first dimension of
15014 the folded dependency vector is the GCD of the first
15015 elements, while ignoring any first elements whose offset
15016 is 0. */
15017 if (i == 0)
15019 /* Ignore dependence vectors whose first dimension is 0. */
15020 if (offset == 0)
15022 remove = true;
15023 goto next_ordered_clause;
15025 else
15027 if (!TYPE_UNSIGNED (itype) && (forward ^ wi::neg_p (offset)))
15029 error_at (OMP_CLAUSE_LOCATION (c),
15030 "first offset must be in opposite direction "
15031 "of loop iterations");
15032 goto lower_omp_ordered_ret;
15034 if (forward)
15035 offset = -offset;
15036 neg_offset_p = forward;
15037 /* Initialize the first time around. */
15038 if (folded_dep == NULL_TREE)
15040 folded_dep = c;
15041 folded_deps[0] = offset;
15043 else
15044 folded_deps[0] = wi::gcd (folded_deps[0],
15045 offset, UNSIGNED);
15048 /* Calculate minimum for the remaining dimensions. */
15049 else
15051 folded_deps[len + i - 1] = offset;
15052 if (folded_dep == c)
15053 folded_deps[i] = offset;
15054 else if (maybe_lexically_later
15055 && !wi::eq_p (folded_deps[i], offset))
15057 if (forward ^ wi::gts_p (folded_deps[i], offset))
15059 unsigned int j;
15060 folded_dep = c;
15061 for (j = 1; j <= i; j++)
15062 folded_deps[j] = folded_deps[len + j - 1];
15064 else
15065 maybe_lexically_later = false;
15069 gcc_assert (i == len);
15071 remove = true;
15073 next_ordered_clause:
15074 if (remove)
15075 *list_p = OMP_CLAUSE_CHAIN (c);
15076 else
15077 list_p = &OMP_CLAUSE_CHAIN (c);
15080 if (folded_dep)
15082 if (neg_offset_p)
15083 folded_deps[0] = -folded_deps[0];
15085 tree itype = TREE_TYPE (TREE_VALUE (OMP_CLAUSE_DECL (folded_dep)));
15086 if (POINTER_TYPE_P (itype))
15087 itype = sizetype;
15089 TREE_PURPOSE (OMP_CLAUSE_DECL (folded_dep))
15090 = wide_int_to_tree (itype, folded_deps[0]);
15091 OMP_CLAUSE_CHAIN (folded_dep) = gimple_omp_ordered_clauses (ord_stmt);
15092 *gimple_omp_ordered_clauses_ptr (ord_stmt) = folded_dep;
15095 lower_omp_ordered_ret:
15097 /* Ordered without clauses is #pragma omp threads, while we want
15098 a nop instead if we remove all clauses. */
15099 if (gimple_omp_ordered_clauses (ord_stmt) == NULL_TREE)
15100 gsi_replace (gsi_p, gimple_build_nop (), true);
15104 /* Expand code for an OpenMP ordered directive. */
15106 static void
15107 lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15109 tree block;
15110 gimple *stmt = gsi_stmt (*gsi_p), *g;
15111 gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt);
15112 gcall *x;
15113 gbind *bind;
15114 bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
15115 OMP_CLAUSE_SIMD);
15116 /* FIXME: this should check presence of OMP_CLAUSE__SIMT_ on the enclosing
15117 loop. */
15118 bool maybe_simt
15119 = simd && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1;
15120 bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
15121 OMP_CLAUSE_THREADS);
15123 if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
15124 OMP_CLAUSE_DEPEND))
15126 /* FIXME: This is needs to be moved to the expansion to verify various
15127 conditions only testable on cfg with dominators computed, and also
15128 all the depend clauses to be merged still might need to be available
15129 for the runtime checks. */
15130 if (0)
15131 lower_omp_ordered_clauses (gsi_p, ord_stmt, ctx);
15132 return;
15135 push_gimplify_context ();
15137 block = make_node (BLOCK);
15138 bind = gimple_build_bind (NULL, NULL, block);
15139 gsi_replace (gsi_p, bind, true);
15140 gimple_bind_add_stmt (bind, stmt);
15142 if (simd)
15144 x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 1,
15145 build_int_cst (NULL_TREE, threads));
15146 cfun->has_simduid_loops = true;
15148 else
15149 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START),
15151 gimple_bind_add_stmt (bind, x);
15153 tree counter = NULL_TREE, test = NULL_TREE, body = NULL_TREE;
15154 if (maybe_simt)
15156 counter = create_tmp_var (integer_type_node);
15157 g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
15158 gimple_call_set_lhs (g, counter);
15159 gimple_bind_add_stmt (bind, g);
15161 body = create_artificial_label (UNKNOWN_LOCATION);
15162 test = create_artificial_label (UNKNOWN_LOCATION);
15163 gimple_bind_add_stmt (bind, gimple_build_label (body));
15165 tree simt_pred = create_tmp_var (integer_type_node);
15166 g = gimple_build_call_internal (IFN_GOMP_SIMT_ORDERED_PRED, 1, counter);
15167 gimple_call_set_lhs (g, simt_pred);
15168 gimple_bind_add_stmt (bind, g);
15170 tree t = create_artificial_label (UNKNOWN_LOCATION);
15171 g = gimple_build_cond (EQ_EXPR, simt_pred, integer_zero_node, t, test);
15172 gimple_bind_add_stmt (bind, g);
15174 gimple_bind_add_stmt (bind, gimple_build_label (t));
15176 lower_omp (gimple_omp_body_ptr (stmt), ctx);
15177 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
15178 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
15179 gimple_omp_set_body (stmt, NULL);
15181 if (maybe_simt)
15183 gimple_bind_add_stmt (bind, gimple_build_label (test));
15184 g = gimple_build_assign (counter, MINUS_EXPR, counter, integer_one_node);
15185 gimple_bind_add_stmt (bind, g);
15187 tree c = build2 (GE_EXPR, boolean_type_node, counter, integer_zero_node);
15188 tree nonneg = create_tmp_var (integer_type_node);
15189 gimple_seq tseq = NULL;
15190 gimplify_assign (nonneg, fold_convert (integer_type_node, c), &tseq);
15191 gimple_bind_add_seq (bind, tseq);
15193 g = gimple_build_call_internal (IFN_GOMP_SIMT_VOTE_ANY, 1, nonneg);
15194 gimple_call_set_lhs (g, nonneg);
15195 gimple_bind_add_stmt (bind, g);
15197 tree end = create_artificial_label (UNKNOWN_LOCATION);
15198 g = gimple_build_cond (NE_EXPR, nonneg, integer_zero_node, body, end);
15199 gimple_bind_add_stmt (bind, g);
15201 gimple_bind_add_stmt (bind, gimple_build_label (end));
15203 if (simd)
15204 x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 1,
15205 build_int_cst (NULL_TREE, threads));
15206 else
15207 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END),
15209 gimple_bind_add_stmt (bind, x);
15211 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
15213 pop_gimplify_context (bind);
15215 gimple_bind_append_vars (bind, ctx->block_vars);
15216 BLOCK_VARS (block) = gimple_bind_vars (bind);
15220 /* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple
15221 substitution of a couple of function calls. But in the NAMED case,
15222 requires that languages coordinate a symbol name. It is therefore
15223 best put here in common code. */
15225 static GTY(()) hash_map<tree, tree> *critical_name_mutexes;
15227 static void
15228 lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15230 tree block;
15231 tree name, lock, unlock;
15232 gomp_critical *stmt = as_a <gomp_critical *> (gsi_stmt (*gsi_p));
15233 gbind *bind;
15234 location_t loc = gimple_location (stmt);
15235 gimple_seq tbody;
15237 name = gimple_omp_critical_name (stmt);
15238 if (name)
15240 tree decl;
15242 if (!critical_name_mutexes)
15243 critical_name_mutexes = hash_map<tree, tree>::create_ggc (10);
15245 tree *n = critical_name_mutexes->get (name);
15246 if (n == NULL)
15248 char *new_str;
15250 decl = create_tmp_var_raw (ptr_type_node);
15252 new_str = ACONCAT ((".gomp_critical_user_",
15253 IDENTIFIER_POINTER (name), NULL));
15254 DECL_NAME (decl) = get_identifier (new_str);
15255 TREE_PUBLIC (decl) = 1;
15256 TREE_STATIC (decl) = 1;
15257 DECL_COMMON (decl) = 1;
15258 DECL_ARTIFICIAL (decl) = 1;
15259 DECL_IGNORED_P (decl) = 1;
15261 varpool_node::finalize_decl (decl);
15263 critical_name_mutexes->put (name, decl);
15265 else
15266 decl = *n;
15268 /* If '#pragma omp critical' is inside offloaded region or
15269 inside function marked as offloadable, the symbol must be
15270 marked as offloadable too. */
15271 omp_context *octx;
15272 if (cgraph_node::get (current_function_decl)->offloadable)
15273 varpool_node::get_create (decl)->offloadable = 1;
15274 else
15275 for (octx = ctx->outer; octx; octx = octx->outer)
15276 if (is_gimple_omp_offloaded (octx->stmt))
15278 varpool_node::get_create (decl)->offloadable = 1;
15279 break;
15282 lock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_NAME_START);
15283 lock = build_call_expr_loc (loc, lock, 1, build_fold_addr_expr_loc (loc, decl));
15285 unlock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_NAME_END);
15286 unlock = build_call_expr_loc (loc, unlock, 1,
15287 build_fold_addr_expr_loc (loc, decl));
15289 else
15291 lock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_START);
15292 lock = build_call_expr_loc (loc, lock, 0);
15294 unlock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_END);
15295 unlock = build_call_expr_loc (loc, unlock, 0);
15298 push_gimplify_context ();
15300 block = make_node (BLOCK);
15301 bind = gimple_build_bind (NULL, NULL, block);
15302 gsi_replace (gsi_p, bind, true);
15303 gimple_bind_add_stmt (bind, stmt);
15305 tbody = gimple_bind_body (bind);
15306 gimplify_and_add (lock, &tbody);
15307 gimple_bind_set_body (bind, tbody);
15309 lower_omp (gimple_omp_body_ptr (stmt), ctx);
15310 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
15311 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
15312 gimple_omp_set_body (stmt, NULL);
15314 tbody = gimple_bind_body (bind);
15315 gimplify_and_add (unlock, &tbody);
15316 gimple_bind_set_body (bind, tbody);
15318 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
15320 pop_gimplify_context (bind);
15321 gimple_bind_append_vars (bind, ctx->block_vars);
15322 BLOCK_VARS (block) = gimple_bind_vars (bind);
15325 /* Return the lastprivate predicate for a given gridified loop described by FD).
15326 TODO: When grid stuff is moved to a separate file, move this too. */
15328 static tree
15329 grid_lastprivate_predicate (struct omp_for_data *fd)
15331 /* When dealing with a gridified loop, we need to check up to three collapsed
15332 iteration variables but they are not actually captured in this fd.
15333 Fortunately, we can easily rely on HSA builtins to get this
15334 information. */
15336 tree id, size;
15337 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
15338 && gimple_omp_for_grid_intra_group (fd->for_stmt))
15340 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
15341 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
15343 else
15345 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
15346 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
15348 tree cond = NULL;
15349 for (int dim = 0; dim < fd->collapse; dim++)
15351 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
15352 tree u1 = build_int_cstu (unsigned_type_node, 1);
15353 tree c2
15354 = build2 (EQ_EXPR, boolean_type_node,
15355 build2 (PLUS_EXPR, unsigned_type_node,
15356 build_call_expr (id, 1, dim_tree), u1),
15357 build_call_expr (size, 1, dim_tree));
15358 if (cond)
15359 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
15360 else
15361 cond = c2;
15363 return cond;
15366 /* A subroutine of lower_omp_for. Generate code to emit the predicate
15367 for a lastprivate clause. Given a loop control predicate of (V
15368 cond N2), we gate the clause on (!(V cond N2)). The lowered form
15369 is appended to *DLIST, iterator initialization is appended to
15370 *BODY_P. */
15372 static void
15373 lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
15374 gimple_seq *dlist, struct omp_context *ctx)
15376 tree clauses, cond, vinit;
15377 enum tree_code cond_code;
15378 gimple_seq stmts;
15380 cond_code = fd->loop.cond_code;
15381 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
15383 /* When possible, use a strict equality expression. This can let VRP
15384 type optimizations deduce the value and remove a copy. */
15385 if (tree_fits_shwi_p (fd->loop.step))
15387 HOST_WIDE_INT step = tree_to_shwi (fd->loop.step);
15388 if (step == 1 || step == -1)
15389 cond_code = EQ_EXPR;
15392 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
15393 || gimple_omp_for_grid_phony (fd->for_stmt))
15394 cond = grid_lastprivate_predicate (fd);
15395 else
15397 tree n2 = fd->loop.n2;
15398 if (fd->collapse > 1
15399 && TREE_CODE (n2) != INTEGER_CST
15400 && gimple_omp_for_combined_into_p (fd->for_stmt))
15402 struct omp_context *taskreg_ctx = NULL;
15403 if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
15405 gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
15406 if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR
15407 || gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_DISTRIBUTE)
15409 if (gimple_omp_for_combined_into_p (gfor))
15411 gcc_assert (ctx->outer->outer
15412 && is_parallel_ctx (ctx->outer->outer));
15413 taskreg_ctx = ctx->outer->outer;
15415 else
15417 struct omp_for_data outer_fd;
15418 extract_omp_for_data (gfor, &outer_fd, NULL);
15419 n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
15422 else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
15423 taskreg_ctx = ctx->outer->outer;
15425 else if (is_taskreg_ctx (ctx->outer))
15426 taskreg_ctx = ctx->outer;
15427 if (taskreg_ctx)
15429 int i;
15430 tree taskreg_clauses
15431 = gimple_omp_taskreg_clauses (taskreg_ctx->stmt);
15432 tree innerc = find_omp_clause (taskreg_clauses,
15433 OMP_CLAUSE__LOOPTEMP_);
15434 gcc_assert (innerc);
15435 for (i = 0; i < fd->collapse; i++)
15437 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
15438 OMP_CLAUSE__LOOPTEMP_);
15439 gcc_assert (innerc);
15441 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
15442 OMP_CLAUSE__LOOPTEMP_);
15443 if (innerc)
15444 n2 = fold_convert (TREE_TYPE (n2),
15445 lookup_decl (OMP_CLAUSE_DECL (innerc),
15446 taskreg_ctx));
15449 cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
15452 clauses = gimple_omp_for_clauses (fd->for_stmt);
15453 stmts = NULL;
15454 lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
15455 if (!gimple_seq_empty_p (stmts))
15457 gimple_seq_add_seq (&stmts, *dlist);
15458 *dlist = stmts;
15460 /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
15461 vinit = fd->loop.n1;
15462 if (cond_code == EQ_EXPR
15463 && tree_fits_shwi_p (fd->loop.n2)
15464 && ! integer_zerop (fd->loop.n2))
15465 vinit = build_int_cst (TREE_TYPE (fd->loop.v), 0);
15466 else
15467 vinit = unshare_expr (vinit);
15469 /* Initialize the iterator variable, so that threads that don't execute
15470 any iterations don't execute the lastprivate clauses by accident. */
15471 gimplify_assign (fd->loop.v, vinit, body_p);
15476 /* Lower code for an OMP loop directive. */
15478 static void
15479 lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15481 tree *rhs_p, block;
15482 struct omp_for_data fd, *fdp = NULL;
15483 gomp_for *stmt = as_a <gomp_for *> (gsi_stmt (*gsi_p));
15484 gbind *new_stmt;
15485 gimple_seq omp_for_body, body, dlist;
15486 gimple_seq oacc_head = NULL, oacc_tail = NULL;
15487 size_t i;
15489 push_gimplify_context ();
15491 lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
15493 block = make_node (BLOCK);
15494 new_stmt = gimple_build_bind (NULL, NULL, block);
15495 /* Replace at gsi right away, so that 'stmt' is no member
15496 of a sequence anymore as we're going to add to a different
15497 one below. */
15498 gsi_replace (gsi_p, new_stmt, true);
15500 /* Move declaration of temporaries in the loop body before we make
15501 it go away. */
15502 omp_for_body = gimple_omp_body (stmt);
15503 if (!gimple_seq_empty_p (omp_for_body)
15504 && gimple_code (gimple_seq_first_stmt (omp_for_body)) == GIMPLE_BIND)
15506 gbind *inner_bind
15507 = as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
15508 tree vars = gimple_bind_vars (inner_bind);
15509 gimple_bind_append_vars (new_stmt, vars);
15510 /* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
15511 keep them on the inner_bind and it's block. */
15512 gimple_bind_set_vars (inner_bind, NULL_TREE);
15513 if (gimple_bind_block (inner_bind))
15514 BLOCK_VARS (gimple_bind_block (inner_bind)) = NULL_TREE;
15517 if (gimple_omp_for_combined_into_p (stmt))
15519 extract_omp_for_data (stmt, &fd, NULL);
15520 fdp = &fd;
15522 /* We need two temporaries with fd.loop.v type (istart/iend)
15523 and then (fd.collapse - 1) temporaries with the same
15524 type for count2 ... countN-1 vars if not constant. */
15525 size_t count = 2;
15526 tree type = fd.iter_type;
15527 if (fd.collapse > 1
15528 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
15529 count += fd.collapse - 1;
15530 bool taskreg_for
15531 = (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR
15532 || gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP);
15533 tree outerc = NULL, *pc = gimple_omp_for_clauses_ptr (stmt);
15534 tree clauses = *pc;
15535 if (taskreg_for)
15536 outerc
15537 = find_omp_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt),
15538 OMP_CLAUSE__LOOPTEMP_);
15539 for (i = 0; i < count; i++)
15541 tree temp;
15542 if (taskreg_for)
15544 gcc_assert (outerc);
15545 temp = lookup_decl (OMP_CLAUSE_DECL (outerc), ctx->outer);
15546 outerc = find_omp_clause (OMP_CLAUSE_CHAIN (outerc),
15547 OMP_CLAUSE__LOOPTEMP_);
15549 else
15551 temp = create_tmp_var (type);
15552 insert_decl_map (&ctx->outer->cb, temp, temp);
15554 *pc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
15555 OMP_CLAUSE_DECL (*pc) = temp;
15556 pc = &OMP_CLAUSE_CHAIN (*pc);
15558 *pc = clauses;
15561 /* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR. */
15562 dlist = NULL;
15563 body = NULL;
15564 lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx,
15565 fdp);
15566 gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt));
15568 lower_omp (gimple_omp_body_ptr (stmt), ctx);
15570 /* Lower the header expressions. At this point, we can assume that
15571 the header is of the form:
15573 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
15575 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
15576 using the .omp_data_s mapping, if needed. */
15577 for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
15579 rhs_p = gimple_omp_for_initial_ptr (stmt, i);
15580 if (!is_gimple_min_invariant (*rhs_p))
15581 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15583 rhs_p = gimple_omp_for_final_ptr (stmt, i);
15584 if (!is_gimple_min_invariant (*rhs_p))
15585 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15587 rhs_p = &TREE_OPERAND (gimple_omp_for_incr (stmt, i), 1);
15588 if (!is_gimple_min_invariant (*rhs_p))
15589 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15592 /* Once lowered, extract the bounds and clauses. */
15593 extract_omp_for_data (stmt, &fd, NULL);
15595 if (is_gimple_omp_oacc (ctx->stmt)
15596 && !ctx_in_oacc_kernels_region (ctx))
15597 lower_oacc_head_tail (gimple_location (stmt),
15598 gimple_omp_for_clauses (stmt),
15599 &oacc_head, &oacc_tail, ctx);
15601 /* Add OpenACC partitioning and reduction markers just before the loop */
15602 if (oacc_head)
15603 gimple_seq_add_seq (&body, oacc_head);
15605 lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
15607 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
15608 for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
15609 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
15610 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
15612 OMP_CLAUSE_DECL (c) = lookup_decl (OMP_CLAUSE_DECL (c), ctx);
15613 if (DECL_P (OMP_CLAUSE_LINEAR_STEP (c)))
15614 OMP_CLAUSE_LINEAR_STEP (c)
15615 = maybe_lookup_decl_in_outer_ctx (OMP_CLAUSE_LINEAR_STEP (c),
15616 ctx);
15619 bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP
15620 && gimple_omp_for_grid_phony (stmt));
15621 if (!phony_loop)
15622 gimple_seq_add_stmt (&body, stmt);
15623 gimple_seq_add_seq (&body, gimple_omp_body (stmt));
15625 if (!phony_loop)
15626 gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
15627 fd.loop.v));
15629 /* After the loop, add exit clauses. */
15630 lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, ctx);
15632 if (ctx->cancellable)
15633 gimple_seq_add_stmt (&body, gimple_build_label (ctx->cancel_label));
15635 gimple_seq_add_seq (&body, dlist);
15637 body = maybe_catch_exception (body);
15639 if (!phony_loop)
15641 /* Region exit marker goes at the end of the loop body. */
15642 gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
15643 maybe_add_implicit_barrier_cancel (ctx, &body);
15646 /* Add OpenACC joining and reduction markers just after the loop. */
15647 if (oacc_tail)
15648 gimple_seq_add_seq (&body, oacc_tail);
15650 pop_gimplify_context (new_stmt);
15652 gimple_bind_append_vars (new_stmt, ctx->block_vars);
15653 BLOCK_VARS (block) = gimple_bind_vars (new_stmt);
15654 if (BLOCK_VARS (block))
15655 TREE_USED (block) = 1;
15657 gimple_bind_set_body (new_stmt, body);
15658 gimple_omp_set_body (stmt, NULL);
15659 gimple_omp_for_set_pre_body (stmt, NULL);
15662 /* Callback for walk_stmts. Check if the current statement only contains
15663 GIMPLE_OMP_FOR or GIMPLE_OMP_SECTIONS. */
15665 static tree
15666 check_combined_parallel (gimple_stmt_iterator *gsi_p,
15667 bool *handled_ops_p,
15668 struct walk_stmt_info *wi)
15670 int *info = (int *) wi->info;
15671 gimple *stmt = gsi_stmt (*gsi_p);
15673 *handled_ops_p = true;
15674 switch (gimple_code (stmt))
15676 WALK_SUBSTMTS;
15678 case GIMPLE_OMP_FOR:
15679 case GIMPLE_OMP_SECTIONS:
15680 *info = *info == 0 ? 1 : -1;
15681 break;
15682 default:
15683 *info = -1;
15684 break;
15686 return NULL;
15689 struct omp_taskcopy_context
15691 /* This field must be at the beginning, as we do "inheritance": Some
15692 callback functions for tree-inline.c (e.g., omp_copy_decl)
15693 receive a copy_body_data pointer that is up-casted to an
15694 omp_context pointer. */
15695 copy_body_data cb;
15696 omp_context *ctx;
15699 static tree
15700 task_copyfn_copy_decl (tree var, copy_body_data *cb)
15702 struct omp_taskcopy_context *tcctx = (struct omp_taskcopy_context *) cb;
15704 if (splay_tree_lookup (tcctx->ctx->sfield_map, (splay_tree_key) var))
15705 return create_tmp_var (TREE_TYPE (var));
15707 return var;
15710 static tree
15711 task_copyfn_remap_type (struct omp_taskcopy_context *tcctx, tree orig_type)
15713 tree name, new_fields = NULL, type, f;
15715 type = lang_hooks.types.make_type (RECORD_TYPE);
15716 name = DECL_NAME (TYPE_NAME (orig_type));
15717 name = build_decl (gimple_location (tcctx->ctx->stmt),
15718 TYPE_DECL, name, type);
15719 TYPE_NAME (type) = name;
15721 for (f = TYPE_FIELDS (orig_type); f ; f = TREE_CHAIN (f))
15723 tree new_f = copy_node (f);
15724 DECL_CONTEXT (new_f) = type;
15725 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &tcctx->cb);
15726 TREE_CHAIN (new_f) = new_fields;
15727 walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &tcctx->cb, NULL);
15728 walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r, &tcctx->cb, NULL);
15729 walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
15730 &tcctx->cb, NULL);
15731 new_fields = new_f;
15732 tcctx->cb.decl_map->put (f, new_f);
15734 TYPE_FIELDS (type) = nreverse (new_fields);
15735 layout_type (type);
15736 return type;
15739 /* Create task copyfn. */
15741 static void
15742 create_task_copyfn (gomp_task *task_stmt, omp_context *ctx)
15744 struct function *child_cfun;
15745 tree child_fn, t, c, src, dst, f, sf, arg, sarg, decl;
15746 tree record_type, srecord_type, bind, list;
15747 bool record_needs_remap = false, srecord_needs_remap = false;
15748 splay_tree_node n;
15749 struct omp_taskcopy_context tcctx;
15750 location_t loc = gimple_location (task_stmt);
15752 child_fn = gimple_omp_task_copy_fn (task_stmt);
15753 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
15754 gcc_assert (child_cfun->cfg == NULL);
15755 DECL_SAVED_TREE (child_fn) = alloc_stmt_list ();
15757 /* Reset DECL_CONTEXT on function arguments. */
15758 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
15759 DECL_CONTEXT (t) = child_fn;
15761 /* Populate the function. */
15762 push_gimplify_context ();
15763 push_cfun (child_cfun);
15765 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
15766 TREE_SIDE_EFFECTS (bind) = 1;
15767 list = NULL;
15768 DECL_SAVED_TREE (child_fn) = bind;
15769 DECL_SOURCE_LOCATION (child_fn) = gimple_location (task_stmt);
15771 /* Remap src and dst argument types if needed. */
15772 record_type = ctx->record_type;
15773 srecord_type = ctx->srecord_type;
15774 for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
15775 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
15777 record_needs_remap = true;
15778 break;
15780 for (f = TYPE_FIELDS (srecord_type); f ; f = DECL_CHAIN (f))
15781 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
15783 srecord_needs_remap = true;
15784 break;
15787 if (record_needs_remap || srecord_needs_remap)
15789 memset (&tcctx, '\0', sizeof (tcctx));
15790 tcctx.cb.src_fn = ctx->cb.src_fn;
15791 tcctx.cb.dst_fn = child_fn;
15792 tcctx.cb.src_node = cgraph_node::get (tcctx.cb.src_fn);
15793 gcc_checking_assert (tcctx.cb.src_node);
15794 tcctx.cb.dst_node = tcctx.cb.src_node;
15795 tcctx.cb.src_cfun = ctx->cb.src_cfun;
15796 tcctx.cb.copy_decl = task_copyfn_copy_decl;
15797 tcctx.cb.eh_lp_nr = 0;
15798 tcctx.cb.transform_call_graph_edges = CB_CGE_MOVE;
15799 tcctx.cb.decl_map = new hash_map<tree, tree>;
15800 tcctx.ctx = ctx;
15802 if (record_needs_remap)
15803 record_type = task_copyfn_remap_type (&tcctx, record_type);
15804 if (srecord_needs_remap)
15805 srecord_type = task_copyfn_remap_type (&tcctx, srecord_type);
15807 else
15808 tcctx.cb.decl_map = NULL;
15810 arg = DECL_ARGUMENTS (child_fn);
15811 TREE_TYPE (arg) = build_pointer_type (record_type);
15812 sarg = DECL_CHAIN (arg);
15813 TREE_TYPE (sarg) = build_pointer_type (srecord_type);
15815 /* First pass: initialize temporaries used in record_type and srecord_type
15816 sizes and field offsets. */
15817 if (tcctx.cb.decl_map)
15818 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15819 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15821 tree *p;
15823 decl = OMP_CLAUSE_DECL (c);
15824 p = tcctx.cb.decl_map->get (decl);
15825 if (p == NULL)
15826 continue;
15827 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15828 sf = (tree) n->value;
15829 sf = *tcctx.cb.decl_map->get (sf);
15830 src = build_simple_mem_ref_loc (loc, sarg);
15831 src = omp_build_component_ref (src, sf);
15832 t = build2 (MODIFY_EXPR, TREE_TYPE (*p), *p, src);
15833 append_to_statement_list (t, &list);
15836 /* Second pass: copy shared var pointers and copy construct non-VLA
15837 firstprivate vars. */
15838 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15839 switch (OMP_CLAUSE_CODE (c))
15841 splay_tree_key key;
15842 case OMP_CLAUSE_SHARED:
15843 decl = OMP_CLAUSE_DECL (c);
15844 key = (splay_tree_key) decl;
15845 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
15846 key = (splay_tree_key) &DECL_UID (decl);
15847 n = splay_tree_lookup (ctx->field_map, key);
15848 if (n == NULL)
15849 break;
15850 f = (tree) n->value;
15851 if (tcctx.cb.decl_map)
15852 f = *tcctx.cb.decl_map->get (f);
15853 n = splay_tree_lookup (ctx->sfield_map, key);
15854 sf = (tree) n->value;
15855 if (tcctx.cb.decl_map)
15856 sf = *tcctx.cb.decl_map->get (sf);
15857 src = build_simple_mem_ref_loc (loc, sarg);
15858 src = omp_build_component_ref (src, sf);
15859 dst = build_simple_mem_ref_loc (loc, arg);
15860 dst = omp_build_component_ref (dst, f);
15861 t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
15862 append_to_statement_list (t, &list);
15863 break;
15864 case OMP_CLAUSE_FIRSTPRIVATE:
15865 decl = OMP_CLAUSE_DECL (c);
15866 if (is_variable_sized (decl))
15867 break;
15868 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15869 if (n == NULL)
15870 break;
15871 f = (tree) n->value;
15872 if (tcctx.cb.decl_map)
15873 f = *tcctx.cb.decl_map->get (f);
15874 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15875 if (n != NULL)
15877 sf = (tree) n->value;
15878 if (tcctx.cb.decl_map)
15879 sf = *tcctx.cb.decl_map->get (sf);
15880 src = build_simple_mem_ref_loc (loc, sarg);
15881 src = omp_build_component_ref (src, sf);
15882 if (use_pointer_for_field (decl, NULL) || is_reference (decl))
15883 src = build_simple_mem_ref_loc (loc, src);
15885 else
15886 src = decl;
15887 dst = build_simple_mem_ref_loc (loc, arg);
15888 dst = omp_build_component_ref (dst, f);
15889 t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
15890 append_to_statement_list (t, &list);
15891 break;
15892 case OMP_CLAUSE_PRIVATE:
15893 if (! OMP_CLAUSE_PRIVATE_OUTER_REF (c))
15894 break;
15895 decl = OMP_CLAUSE_DECL (c);
15896 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15897 f = (tree) n->value;
15898 if (tcctx.cb.decl_map)
15899 f = *tcctx.cb.decl_map->get (f);
15900 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15901 if (n != NULL)
15903 sf = (tree) n->value;
15904 if (tcctx.cb.decl_map)
15905 sf = *tcctx.cb.decl_map->get (sf);
15906 src = build_simple_mem_ref_loc (loc, sarg);
15907 src = omp_build_component_ref (src, sf);
15908 if (use_pointer_for_field (decl, NULL))
15909 src = build_simple_mem_ref_loc (loc, src);
15911 else
15912 src = decl;
15913 dst = build_simple_mem_ref_loc (loc, arg);
15914 dst = omp_build_component_ref (dst, f);
15915 t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
15916 append_to_statement_list (t, &list);
15917 break;
15918 default:
15919 break;
15922 /* Last pass: handle VLA firstprivates. */
15923 if (tcctx.cb.decl_map)
15924 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15925 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15927 tree ind, ptr, df;
15929 decl = OMP_CLAUSE_DECL (c);
15930 if (!is_variable_sized (decl))
15931 continue;
15932 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15933 if (n == NULL)
15934 continue;
15935 f = (tree) n->value;
15936 f = *tcctx.cb.decl_map->get (f);
15937 gcc_assert (DECL_HAS_VALUE_EXPR_P (decl));
15938 ind = DECL_VALUE_EXPR (decl);
15939 gcc_assert (TREE_CODE (ind) == INDIRECT_REF);
15940 gcc_assert (DECL_P (TREE_OPERAND (ind, 0)));
15941 n = splay_tree_lookup (ctx->sfield_map,
15942 (splay_tree_key) TREE_OPERAND (ind, 0));
15943 sf = (tree) n->value;
15944 sf = *tcctx.cb.decl_map->get (sf);
15945 src = build_simple_mem_ref_loc (loc, sarg);
15946 src = omp_build_component_ref (src, sf);
15947 src = build_simple_mem_ref_loc (loc, src);
15948 dst = build_simple_mem_ref_loc (loc, arg);
15949 dst = omp_build_component_ref (dst, f);
15950 t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
15951 append_to_statement_list (t, &list);
15952 n = splay_tree_lookup (ctx->field_map,
15953 (splay_tree_key) TREE_OPERAND (ind, 0));
15954 df = (tree) n->value;
15955 df = *tcctx.cb.decl_map->get (df);
15956 ptr = build_simple_mem_ref_loc (loc, arg);
15957 ptr = omp_build_component_ref (ptr, df);
15958 t = build2 (MODIFY_EXPR, TREE_TYPE (ptr), ptr,
15959 build_fold_addr_expr_loc (loc, dst));
15960 append_to_statement_list (t, &list);
15963 t = build1 (RETURN_EXPR, void_type_node, NULL);
15964 append_to_statement_list (t, &list);
15966 if (tcctx.cb.decl_map)
15967 delete tcctx.cb.decl_map;
15968 pop_gimplify_context (NULL);
15969 BIND_EXPR_BODY (bind) = list;
15970 pop_cfun ();
15973 static void
15974 lower_depend_clauses (tree *pclauses, gimple_seq *iseq, gimple_seq *oseq)
15976 tree c, clauses;
15977 gimple *g;
15978 size_t n_in = 0, n_out = 0, idx = 2, i;
15980 clauses = find_omp_clause (*pclauses, OMP_CLAUSE_DEPEND);
15981 gcc_assert (clauses);
15982 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
15983 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND)
15984 switch (OMP_CLAUSE_DEPEND_KIND (c))
15986 case OMP_CLAUSE_DEPEND_IN:
15987 n_in++;
15988 break;
15989 case OMP_CLAUSE_DEPEND_OUT:
15990 case OMP_CLAUSE_DEPEND_INOUT:
15991 n_out++;
15992 break;
15993 case OMP_CLAUSE_DEPEND_SOURCE:
15994 case OMP_CLAUSE_DEPEND_SINK:
15995 /* FALLTHRU */
15996 default:
15997 gcc_unreachable ();
15999 tree type = build_array_type_nelts (ptr_type_node, n_in + n_out + 2);
16000 tree array = create_tmp_var (type);
16001 TREE_ADDRESSABLE (array) = 1;
16002 tree r = build4 (ARRAY_REF, ptr_type_node, array, size_int (0), NULL_TREE,
16003 NULL_TREE);
16004 g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_in + n_out));
16005 gimple_seq_add_stmt (iseq, g);
16006 r = build4 (ARRAY_REF, ptr_type_node, array, size_int (1), NULL_TREE,
16007 NULL_TREE);
16008 g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_out));
16009 gimple_seq_add_stmt (iseq, g);
16010 for (i = 0; i < 2; i++)
16012 if ((i ? n_in : n_out) == 0)
16013 continue;
16014 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
16015 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
16016 && ((OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_IN) ^ i))
16018 tree t = OMP_CLAUSE_DECL (c);
16019 t = fold_convert (ptr_type_node, t);
16020 gimplify_expr (&t, iseq, NULL, is_gimple_val, fb_rvalue);
16021 r = build4 (ARRAY_REF, ptr_type_node, array, size_int (idx++),
16022 NULL_TREE, NULL_TREE);
16023 g = gimple_build_assign (r, t);
16024 gimple_seq_add_stmt (iseq, g);
16027 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
16028 OMP_CLAUSE_DECL (c) = build_fold_addr_expr (array);
16029 OMP_CLAUSE_CHAIN (c) = *pclauses;
16030 *pclauses = c;
16031 tree clobber = build_constructor (type, NULL);
16032 TREE_THIS_VOLATILE (clobber) = 1;
16033 g = gimple_build_assign (array, clobber);
16034 gimple_seq_add_stmt (oseq, g);
16037 /* Lower the OpenMP parallel or task directive in the current statement
16038 in GSI_P. CTX holds context information for the directive. */
16040 static void
16041 lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
16043 tree clauses;
16044 tree child_fn, t;
16045 gimple *stmt = gsi_stmt (*gsi_p);
16046 gbind *par_bind, *bind, *dep_bind = NULL;
16047 gimple_seq par_body, olist, ilist, par_olist, par_rlist, par_ilist, new_body;
16048 location_t loc = gimple_location (stmt);
16050 clauses = gimple_omp_taskreg_clauses (stmt);
16051 par_bind
16052 = as_a <gbind *> (gimple_seq_first_stmt (gimple_omp_body (stmt)));
16053 par_body = gimple_bind_body (par_bind);
16054 child_fn = ctx->cb.dst_fn;
16055 if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
16056 && !gimple_omp_parallel_combined_p (stmt))
16058 struct walk_stmt_info wi;
16059 int ws_num = 0;
16061 memset (&wi, 0, sizeof (wi));
16062 wi.info = &ws_num;
16063 wi.val_only = true;
16064 walk_gimple_seq (par_body, check_combined_parallel, NULL, &wi);
16065 if (ws_num == 1)
16066 gimple_omp_parallel_set_combined_p (stmt, true);
16068 gimple_seq dep_ilist = NULL;
16069 gimple_seq dep_olist = NULL;
16070 if (gimple_code (stmt) == GIMPLE_OMP_TASK
16071 && find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
16073 push_gimplify_context ();
16074 dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
16075 lower_depend_clauses (gimple_omp_task_clauses_ptr (stmt),
16076 &dep_ilist, &dep_olist);
16079 if (ctx->srecord_type)
16080 create_task_copyfn (as_a <gomp_task *> (stmt), ctx);
16082 push_gimplify_context ();
16084 par_olist = NULL;
16085 par_ilist = NULL;
16086 par_rlist = NULL;
16087 bool phony_construct = gimple_code (stmt) == GIMPLE_OMP_PARALLEL
16088 && gimple_omp_parallel_grid_phony (as_a <gomp_parallel *> (stmt));
16089 if (phony_construct && ctx->record_type)
16091 gcc_checking_assert (!ctx->receiver_decl);
16092 ctx->receiver_decl = create_tmp_var
16093 (build_reference_type (ctx->record_type), ".omp_rec");
16095 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx, NULL);
16096 lower_omp (&par_body, ctx);
16097 if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
16098 lower_reduction_clauses (clauses, &par_rlist, ctx);
16100 /* Declare all the variables created by mapping and the variables
16101 declared in the scope of the parallel body. */
16102 record_vars_into (ctx->block_vars, child_fn);
16103 record_vars_into (gimple_bind_vars (par_bind), child_fn);
16105 if (ctx->record_type)
16107 ctx->sender_decl
16108 = create_tmp_var (ctx->srecord_type ? ctx->srecord_type
16109 : ctx->record_type, ".omp_data_o");
16110 DECL_NAMELESS (ctx->sender_decl) = 1;
16111 TREE_ADDRESSABLE (ctx->sender_decl) = 1;
16112 gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl);
16115 olist = NULL;
16116 ilist = NULL;
16117 lower_send_clauses (clauses, &ilist, &olist, ctx);
16118 lower_send_shared_vars (&ilist, &olist, ctx);
16120 if (ctx->record_type)
16122 tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL);
16123 TREE_THIS_VOLATILE (clobber) = 1;
16124 gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
16125 clobber));
16128 /* Once all the expansions are done, sequence all the different
16129 fragments inside gimple_omp_body. */
16131 new_body = NULL;
16133 if (ctx->record_type)
16135 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
16136 /* fixup_child_record_type might have changed receiver_decl's type. */
16137 t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
16138 gimple_seq_add_stmt (&new_body,
16139 gimple_build_assign (ctx->receiver_decl, t));
16142 gimple_seq_add_seq (&new_body, par_ilist);
16143 gimple_seq_add_seq (&new_body, par_body);
16144 gimple_seq_add_seq (&new_body, par_rlist);
16145 if (ctx->cancellable)
16146 gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
16147 gimple_seq_add_seq (&new_body, par_olist);
16148 new_body = maybe_catch_exception (new_body);
16149 if (gimple_code (stmt) == GIMPLE_OMP_TASK)
16150 gimple_seq_add_stmt (&new_body,
16151 gimple_build_omp_continue (integer_zero_node,
16152 integer_zero_node));
16153 if (!phony_construct)
16155 gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
16156 gimple_omp_set_body (stmt, new_body);
16159 bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind));
16160 gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
16161 gimple_bind_add_seq (bind, ilist);
16162 if (!phony_construct)
16163 gimple_bind_add_stmt (bind, stmt);
16164 else
16165 gimple_bind_add_seq (bind, new_body);
16166 gimple_bind_add_seq (bind, olist);
16168 pop_gimplify_context (NULL);
16170 if (dep_bind)
16172 gimple_bind_add_seq (dep_bind, dep_ilist);
16173 gimple_bind_add_stmt (dep_bind, bind);
16174 gimple_bind_add_seq (dep_bind, dep_olist);
16175 pop_gimplify_context (dep_bind);
16179 /* Lower the GIMPLE_OMP_TARGET in the current statement
16180 in GSI_P. CTX holds context information for the directive. */
16182 static void
16183 lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
16185 tree clauses;
16186 tree child_fn, t, c;
16187 gomp_target *stmt = as_a <gomp_target *> (gsi_stmt (*gsi_p));
16188 gbind *tgt_bind, *bind, *dep_bind = NULL;
16189 gimple_seq tgt_body, olist, ilist, fplist, new_body;
16190 location_t loc = gimple_location (stmt);
16191 bool offloaded, data_region;
16192 unsigned int map_cnt = 0;
16194 offloaded = is_gimple_omp_offloaded (stmt);
16195 switch (gimple_omp_target_kind (stmt))
16197 case GF_OMP_TARGET_KIND_REGION:
16198 case GF_OMP_TARGET_KIND_UPDATE:
16199 case GF_OMP_TARGET_KIND_ENTER_DATA:
16200 case GF_OMP_TARGET_KIND_EXIT_DATA:
16201 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
16202 case GF_OMP_TARGET_KIND_OACC_KERNELS:
16203 case GF_OMP_TARGET_KIND_OACC_UPDATE:
16204 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
16205 case GF_OMP_TARGET_KIND_OACC_DECLARE:
16206 data_region = false;
16207 break;
16208 case GF_OMP_TARGET_KIND_DATA:
16209 case GF_OMP_TARGET_KIND_OACC_DATA:
16210 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
16211 data_region = true;
16212 break;
16213 default:
16214 gcc_unreachable ();
16217 clauses = gimple_omp_target_clauses (stmt);
16219 gimple_seq dep_ilist = NULL;
16220 gimple_seq dep_olist = NULL;
16221 if (find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
16223 push_gimplify_context ();
16224 dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
16225 lower_depend_clauses (gimple_omp_target_clauses_ptr (stmt),
16226 &dep_ilist, &dep_olist);
16229 tgt_bind = NULL;
16230 tgt_body = NULL;
16231 if (offloaded)
16233 tgt_bind = gimple_seq_first_stmt_as_a_bind (gimple_omp_body (stmt));
16234 tgt_body = gimple_bind_body (tgt_bind);
16236 else if (data_region)
16237 tgt_body = gimple_omp_body (stmt);
16238 child_fn = ctx->cb.dst_fn;
16240 push_gimplify_context ();
16241 fplist = NULL;
16243 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
16244 switch (OMP_CLAUSE_CODE (c))
16246 tree var, x;
16248 default:
16249 break;
16250 case OMP_CLAUSE_MAP:
16251 #if CHECKING_P
16252 /* First check what we're prepared to handle in the following. */
16253 switch (OMP_CLAUSE_MAP_KIND (c))
16255 case GOMP_MAP_ALLOC:
16256 case GOMP_MAP_TO:
16257 case GOMP_MAP_FROM:
16258 case GOMP_MAP_TOFROM:
16259 case GOMP_MAP_POINTER:
16260 case GOMP_MAP_TO_PSET:
16261 case GOMP_MAP_DELETE:
16262 case GOMP_MAP_RELEASE:
16263 case GOMP_MAP_ALWAYS_TO:
16264 case GOMP_MAP_ALWAYS_FROM:
16265 case GOMP_MAP_ALWAYS_TOFROM:
16266 case GOMP_MAP_FIRSTPRIVATE_POINTER:
16267 case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
16268 case GOMP_MAP_STRUCT:
16269 case GOMP_MAP_ALWAYS_POINTER:
16270 break;
16271 case GOMP_MAP_FORCE_ALLOC:
16272 case GOMP_MAP_FORCE_TO:
16273 case GOMP_MAP_FORCE_FROM:
16274 case GOMP_MAP_FORCE_TOFROM:
16275 case GOMP_MAP_FORCE_PRESENT:
16276 case GOMP_MAP_FORCE_DEVICEPTR:
16277 case GOMP_MAP_DEVICE_RESIDENT:
16278 case GOMP_MAP_LINK:
16279 gcc_assert (is_gimple_omp_oacc (stmt));
16280 break;
16281 default:
16282 gcc_unreachable ();
16284 #endif
16285 /* FALLTHRU */
16286 case OMP_CLAUSE_TO:
16287 case OMP_CLAUSE_FROM:
16288 oacc_firstprivate:
16289 var = OMP_CLAUSE_DECL (c);
16290 if (!DECL_P (var))
16292 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP
16293 || (!OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
16294 && (OMP_CLAUSE_MAP_KIND (c)
16295 != GOMP_MAP_FIRSTPRIVATE_POINTER)))
16296 map_cnt++;
16297 continue;
16300 if (DECL_SIZE (var)
16301 && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
16303 tree var2 = DECL_VALUE_EXPR (var);
16304 gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
16305 var2 = TREE_OPERAND (var2, 0);
16306 gcc_assert (DECL_P (var2));
16307 var = var2;
16310 if (offloaded
16311 && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16312 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
16313 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
16315 if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
16317 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))
16318 && varpool_node::get_create (var)->offloadable)
16319 continue;
16321 tree type = build_pointer_type (TREE_TYPE (var));
16322 tree new_var = lookup_decl (var, ctx);
16323 x = create_tmp_var_raw (type, get_name (new_var));
16324 gimple_add_tmp_var (x);
16325 x = build_simple_mem_ref (x);
16326 SET_DECL_VALUE_EXPR (new_var, x);
16327 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
16329 continue;
16332 if (!maybe_lookup_field (var, ctx))
16333 continue;
16335 /* Don't remap oacc parallel reduction variables, because the
16336 intermediate result must be local to each gang. */
16337 if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16338 && OMP_CLAUSE_MAP_IN_REDUCTION (c)))
16340 x = build_receiver_ref (var, true, ctx);
16341 tree new_var = lookup_decl (var, ctx);
16343 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16344 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
16345 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
16346 && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
16347 x = build_simple_mem_ref (x);
16348 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
16350 gcc_assert (is_gimple_omp_oacc (ctx->stmt));
16351 if (is_reference (new_var))
16353 /* Create a local object to hold the instance
16354 value. */
16355 tree type = TREE_TYPE (TREE_TYPE (new_var));
16356 const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var));
16357 tree inst = create_tmp_var (type, id);
16358 gimplify_assign (inst, fold_indirect_ref (x), &fplist);
16359 x = build_fold_addr_expr (inst);
16361 gimplify_assign (new_var, x, &fplist);
16363 else if (DECL_P (new_var))
16365 SET_DECL_VALUE_EXPR (new_var, x);
16366 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
16368 else
16369 gcc_unreachable ();
16371 map_cnt++;
16372 break;
16374 case OMP_CLAUSE_FIRSTPRIVATE:
16375 if (is_oacc_parallel (ctx))
16376 goto oacc_firstprivate;
16377 map_cnt++;
16378 var = OMP_CLAUSE_DECL (c);
16379 if (!is_reference (var)
16380 && !is_gimple_reg_type (TREE_TYPE (var)))
16382 tree new_var = lookup_decl (var, ctx);
16383 if (is_variable_sized (var))
16385 tree pvar = DECL_VALUE_EXPR (var);
16386 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16387 pvar = TREE_OPERAND (pvar, 0);
16388 gcc_assert (DECL_P (pvar));
16389 tree new_pvar = lookup_decl (pvar, ctx);
16390 x = build_fold_indirect_ref (new_pvar);
16391 TREE_THIS_NOTRAP (x) = 1;
16393 else
16394 x = build_receiver_ref (var, true, ctx);
16395 SET_DECL_VALUE_EXPR (new_var, x);
16396 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
16398 break;
16400 case OMP_CLAUSE_PRIVATE:
16401 if (is_gimple_omp_oacc (ctx->stmt))
16402 break;
16403 var = OMP_CLAUSE_DECL (c);
16404 if (is_variable_sized (var))
16406 tree new_var = lookup_decl (var, ctx);
16407 tree pvar = DECL_VALUE_EXPR (var);
16408 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16409 pvar = TREE_OPERAND (pvar, 0);
16410 gcc_assert (DECL_P (pvar));
16411 tree new_pvar = lookup_decl (pvar, ctx);
16412 x = build_fold_indirect_ref (new_pvar);
16413 TREE_THIS_NOTRAP (x) = 1;
16414 SET_DECL_VALUE_EXPR (new_var, x);
16415 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
16417 break;
16419 case OMP_CLAUSE_USE_DEVICE_PTR:
16420 case OMP_CLAUSE_IS_DEVICE_PTR:
16421 var = OMP_CLAUSE_DECL (c);
16422 map_cnt++;
16423 if (is_variable_sized (var))
16425 tree new_var = lookup_decl (var, ctx);
16426 tree pvar = DECL_VALUE_EXPR (var);
16427 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16428 pvar = TREE_OPERAND (pvar, 0);
16429 gcc_assert (DECL_P (pvar));
16430 tree new_pvar = lookup_decl (pvar, ctx);
16431 x = build_fold_indirect_ref (new_pvar);
16432 TREE_THIS_NOTRAP (x) = 1;
16433 SET_DECL_VALUE_EXPR (new_var, x);
16434 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
16436 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
16438 tree new_var = lookup_decl (var, ctx);
16439 tree type = build_pointer_type (TREE_TYPE (var));
16440 x = create_tmp_var_raw (type, get_name (new_var));
16441 gimple_add_tmp_var (x);
16442 x = build_simple_mem_ref (x);
16443 SET_DECL_VALUE_EXPR (new_var, x);
16444 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
16446 else
16448 tree new_var = lookup_decl (var, ctx);
16449 x = create_tmp_var_raw (TREE_TYPE (new_var), get_name (new_var));
16450 gimple_add_tmp_var (x);
16451 SET_DECL_VALUE_EXPR (new_var, x);
16452 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
16454 break;
16457 if (offloaded)
16459 target_nesting_level++;
16460 lower_omp (&tgt_body, ctx);
16461 target_nesting_level--;
16463 else if (data_region)
16464 lower_omp (&tgt_body, ctx);
16466 if (offloaded)
16468 /* Declare all the variables created by mapping and the variables
16469 declared in the scope of the target body. */
16470 record_vars_into (ctx->block_vars, child_fn);
16471 record_vars_into (gimple_bind_vars (tgt_bind), child_fn);
16474 olist = NULL;
16475 ilist = NULL;
16476 if (ctx->record_type)
16478 ctx->sender_decl
16479 = create_tmp_var (ctx->record_type, ".omp_data_arr");
16480 DECL_NAMELESS (ctx->sender_decl) = 1;
16481 TREE_ADDRESSABLE (ctx->sender_decl) = 1;
16482 t = make_tree_vec (3);
16483 TREE_VEC_ELT (t, 0) = ctx->sender_decl;
16484 TREE_VEC_ELT (t, 1)
16485 = create_tmp_var (build_array_type_nelts (size_type_node, map_cnt),
16486 ".omp_data_sizes");
16487 DECL_NAMELESS (TREE_VEC_ELT (t, 1)) = 1;
16488 TREE_ADDRESSABLE (TREE_VEC_ELT (t, 1)) = 1;
16489 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 1;
16490 tree tkind_type = short_unsigned_type_node;
16491 int talign_shift = 8;
16492 TREE_VEC_ELT (t, 2)
16493 = create_tmp_var (build_array_type_nelts (tkind_type, map_cnt),
16494 ".omp_data_kinds");
16495 DECL_NAMELESS (TREE_VEC_ELT (t, 2)) = 1;
16496 TREE_ADDRESSABLE (TREE_VEC_ELT (t, 2)) = 1;
16497 TREE_STATIC (TREE_VEC_ELT (t, 2)) = 1;
16498 gimple_omp_target_set_data_arg (stmt, t);
16500 vec<constructor_elt, va_gc> *vsize;
16501 vec<constructor_elt, va_gc> *vkind;
16502 vec_alloc (vsize, map_cnt);
16503 vec_alloc (vkind, map_cnt);
16504 unsigned int map_idx = 0;
16506 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
16507 switch (OMP_CLAUSE_CODE (c))
16509 tree ovar, nc, s, purpose, var, x, type;
16510 unsigned int talign;
16512 default:
16513 break;
16515 case OMP_CLAUSE_MAP:
16516 case OMP_CLAUSE_TO:
16517 case OMP_CLAUSE_FROM:
16518 oacc_firstprivate_map:
16519 nc = c;
16520 ovar = OMP_CLAUSE_DECL (c);
16521 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16522 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
16523 || (OMP_CLAUSE_MAP_KIND (c)
16524 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
16525 break;
16526 if (!DECL_P (ovar))
16528 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16529 && OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
16531 gcc_checking_assert (OMP_CLAUSE_DECL (OMP_CLAUSE_CHAIN (c))
16532 == get_base_address (ovar));
16533 nc = OMP_CLAUSE_CHAIN (c);
16534 ovar = OMP_CLAUSE_DECL (nc);
16536 else
16538 tree x = build_sender_ref (ovar, ctx);
16539 tree v
16540 = build_fold_addr_expr_with_type (ovar, ptr_type_node);
16541 gimplify_assign (x, v, &ilist);
16542 nc = NULL_TREE;
16545 else
16547 if (DECL_SIZE (ovar)
16548 && TREE_CODE (DECL_SIZE (ovar)) != INTEGER_CST)
16550 tree ovar2 = DECL_VALUE_EXPR (ovar);
16551 gcc_assert (TREE_CODE (ovar2) == INDIRECT_REF);
16552 ovar2 = TREE_OPERAND (ovar2, 0);
16553 gcc_assert (DECL_P (ovar2));
16554 ovar = ovar2;
16556 if (!maybe_lookup_field (ovar, ctx))
16557 continue;
16560 talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
16561 if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
16562 talign = DECL_ALIGN_UNIT (ovar);
16563 if (nc)
16565 var = lookup_decl_in_outer_ctx (ovar, ctx);
16566 x = build_sender_ref (ovar, ctx);
16568 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16569 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
16570 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
16571 && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE)
16573 gcc_assert (offloaded);
16574 tree avar
16575 = create_tmp_var (TREE_TYPE (TREE_TYPE (x)));
16576 mark_addressable (avar);
16577 gimplify_assign (avar, build_fold_addr_expr (var), &ilist);
16578 talign = DECL_ALIGN_UNIT (avar);
16579 avar = build_fold_addr_expr (avar);
16580 gimplify_assign (x, avar, &ilist);
16582 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
16584 gcc_assert (is_gimple_omp_oacc (ctx->stmt));
16585 if (!is_reference (var))
16587 if (is_gimple_reg (var)
16588 && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
16589 TREE_NO_WARNING (var) = 1;
16590 var = build_fold_addr_expr (var);
16592 else
16593 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16594 gimplify_assign (x, var, &ilist);
16596 else if (is_gimple_reg (var))
16598 gcc_assert (offloaded);
16599 tree avar = create_tmp_var (TREE_TYPE (var));
16600 mark_addressable (avar);
16601 enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c);
16602 if (GOMP_MAP_COPY_TO_P (map_kind)
16603 || map_kind == GOMP_MAP_POINTER
16604 || map_kind == GOMP_MAP_TO_PSET
16605 || map_kind == GOMP_MAP_FORCE_DEVICEPTR)
16607 /* If we need to initialize a temporary
16608 with VAR because it is not addressable, and
16609 the variable hasn't been initialized yet, then
16610 we'll get a warning for the store to avar.
16611 Don't warn in that case, the mapping might
16612 be implicit. */
16613 TREE_NO_WARNING (var) = 1;
16614 gimplify_assign (avar, var, &ilist);
16616 avar = build_fold_addr_expr (avar);
16617 gimplify_assign (x, avar, &ilist);
16618 if ((GOMP_MAP_COPY_FROM_P (map_kind)
16619 || map_kind == GOMP_MAP_FORCE_DEVICEPTR)
16620 && !TYPE_READONLY (TREE_TYPE (var)))
16622 x = unshare_expr (x);
16623 x = build_simple_mem_ref (x);
16624 gimplify_assign (var, x, &olist);
16627 else
16629 var = build_fold_addr_expr (var);
16630 gimplify_assign (x, var, &ilist);
16633 s = NULL_TREE;
16634 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
16636 gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
16637 s = TREE_TYPE (ovar);
16638 if (TREE_CODE (s) == REFERENCE_TYPE)
16639 s = TREE_TYPE (s);
16640 s = TYPE_SIZE_UNIT (s);
16642 else
16643 s = OMP_CLAUSE_SIZE (c);
16644 if (s == NULL_TREE)
16645 s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
16646 s = fold_convert (size_type_node, s);
16647 purpose = size_int (map_idx++);
16648 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16649 if (TREE_CODE (s) != INTEGER_CST)
16650 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
16652 unsigned HOST_WIDE_INT tkind, tkind_zero;
16653 switch (OMP_CLAUSE_CODE (c))
16655 case OMP_CLAUSE_MAP:
16656 tkind = OMP_CLAUSE_MAP_KIND (c);
16657 tkind_zero = tkind;
16658 if (OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c))
16659 switch (tkind)
16661 case GOMP_MAP_ALLOC:
16662 case GOMP_MAP_TO:
16663 case GOMP_MAP_FROM:
16664 case GOMP_MAP_TOFROM:
16665 case GOMP_MAP_ALWAYS_TO:
16666 case GOMP_MAP_ALWAYS_FROM:
16667 case GOMP_MAP_ALWAYS_TOFROM:
16668 case GOMP_MAP_RELEASE:
16669 case GOMP_MAP_FORCE_TO:
16670 case GOMP_MAP_FORCE_FROM:
16671 case GOMP_MAP_FORCE_TOFROM:
16672 case GOMP_MAP_FORCE_PRESENT:
16673 tkind_zero = GOMP_MAP_ZERO_LEN_ARRAY_SECTION;
16674 break;
16675 case GOMP_MAP_DELETE:
16676 tkind_zero = GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION;
16677 default:
16678 break;
16680 if (tkind_zero != tkind)
16682 if (integer_zerop (s))
16683 tkind = tkind_zero;
16684 else if (integer_nonzerop (s))
16685 tkind_zero = tkind;
16687 break;
16688 case OMP_CLAUSE_FIRSTPRIVATE:
16689 gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
16690 tkind = GOMP_MAP_TO;
16691 tkind_zero = tkind;
16692 break;
16693 case OMP_CLAUSE_TO:
16694 tkind = GOMP_MAP_TO;
16695 tkind_zero = tkind;
16696 break;
16697 case OMP_CLAUSE_FROM:
16698 tkind = GOMP_MAP_FROM;
16699 tkind_zero = tkind;
16700 break;
16701 default:
16702 gcc_unreachable ();
16704 gcc_checking_assert (tkind
16705 < (HOST_WIDE_INT_C (1U) << talign_shift));
16706 gcc_checking_assert (tkind_zero
16707 < (HOST_WIDE_INT_C (1U) << talign_shift));
16708 talign = ceil_log2 (talign);
16709 tkind |= talign << talign_shift;
16710 tkind_zero |= talign << talign_shift;
16711 gcc_checking_assert (tkind
16712 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16713 gcc_checking_assert (tkind_zero
16714 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16715 if (tkind == tkind_zero)
16716 x = build_int_cstu (tkind_type, tkind);
16717 else
16719 TREE_STATIC (TREE_VEC_ELT (t, 2)) = 0;
16720 x = build3 (COND_EXPR, tkind_type,
16721 fold_build2 (EQ_EXPR, boolean_type_node,
16722 unshare_expr (s), size_zero_node),
16723 build_int_cstu (tkind_type, tkind_zero),
16724 build_int_cstu (tkind_type, tkind));
16726 CONSTRUCTOR_APPEND_ELT (vkind, purpose, x);
16727 if (nc && nc != c)
16728 c = nc;
16729 break;
16731 case OMP_CLAUSE_FIRSTPRIVATE:
16732 if (is_oacc_parallel (ctx))
16733 goto oacc_firstprivate_map;
16734 ovar = OMP_CLAUSE_DECL (c);
16735 if (is_reference (ovar))
16736 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16737 else
16738 talign = DECL_ALIGN_UNIT (ovar);
16739 var = lookup_decl_in_outer_ctx (ovar, ctx);
16740 x = build_sender_ref (ovar, ctx);
16741 tkind = GOMP_MAP_FIRSTPRIVATE;
16742 type = TREE_TYPE (ovar);
16743 if (is_reference (ovar))
16744 type = TREE_TYPE (type);
16745 if ((INTEGRAL_TYPE_P (type)
16746 && TYPE_PRECISION (type) <= POINTER_SIZE)
16747 || TREE_CODE (type) == POINTER_TYPE)
16749 tkind = GOMP_MAP_FIRSTPRIVATE_INT;
16750 tree t = var;
16751 if (is_reference (var))
16752 t = build_simple_mem_ref (var);
16753 else if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
16754 TREE_NO_WARNING (var) = 1;
16755 if (TREE_CODE (type) != POINTER_TYPE)
16756 t = fold_convert (pointer_sized_int_node, t);
16757 t = fold_convert (TREE_TYPE (x), t);
16758 gimplify_assign (x, t, &ilist);
16760 else if (is_reference (var))
16761 gimplify_assign (x, var, &ilist);
16762 else if (is_gimple_reg (var))
16764 tree avar = create_tmp_var (TREE_TYPE (var));
16765 mark_addressable (avar);
16766 if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
16767 TREE_NO_WARNING (var) = 1;
16768 gimplify_assign (avar, var, &ilist);
16769 avar = build_fold_addr_expr (avar);
16770 gimplify_assign (x, avar, &ilist);
16772 else
16774 var = build_fold_addr_expr (var);
16775 gimplify_assign (x, var, &ilist);
16777 if (tkind == GOMP_MAP_FIRSTPRIVATE_INT)
16778 s = size_int (0);
16779 else if (is_reference (ovar))
16780 s = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16781 else
16782 s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
16783 s = fold_convert (size_type_node, s);
16784 purpose = size_int (map_idx++);
16785 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16786 if (TREE_CODE (s) != INTEGER_CST)
16787 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
16789 gcc_checking_assert (tkind
16790 < (HOST_WIDE_INT_C (1U) << talign_shift));
16791 talign = ceil_log2 (talign);
16792 tkind |= talign << talign_shift;
16793 gcc_checking_assert (tkind
16794 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16795 CONSTRUCTOR_APPEND_ELT (vkind, purpose,
16796 build_int_cstu (tkind_type, tkind));
16797 break;
16799 case OMP_CLAUSE_USE_DEVICE_PTR:
16800 case OMP_CLAUSE_IS_DEVICE_PTR:
16801 ovar = OMP_CLAUSE_DECL (c);
16802 var = lookup_decl_in_outer_ctx (ovar, ctx);
16803 x = build_sender_ref (ovar, ctx);
16804 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
16805 tkind = GOMP_MAP_USE_DEVICE_PTR;
16806 else
16807 tkind = GOMP_MAP_FIRSTPRIVATE_INT;
16808 type = TREE_TYPE (ovar);
16809 if (TREE_CODE (type) == ARRAY_TYPE)
16810 var = build_fold_addr_expr (var);
16811 else
16813 if (is_reference (ovar))
16815 type = TREE_TYPE (type);
16816 if (TREE_CODE (type) != ARRAY_TYPE)
16817 var = build_simple_mem_ref (var);
16818 var = fold_convert (TREE_TYPE (x), var);
16821 gimplify_assign (x, var, &ilist);
16822 s = size_int (0);
16823 purpose = size_int (map_idx++);
16824 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16825 gcc_checking_assert (tkind
16826 < (HOST_WIDE_INT_C (1U) << talign_shift));
16827 gcc_checking_assert (tkind
16828 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16829 CONSTRUCTOR_APPEND_ELT (vkind, purpose,
16830 build_int_cstu (tkind_type, tkind));
16831 break;
16834 gcc_assert (map_idx == map_cnt);
16836 DECL_INITIAL (TREE_VEC_ELT (t, 1))
16837 = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 1)), vsize);
16838 DECL_INITIAL (TREE_VEC_ELT (t, 2))
16839 = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 2)), vkind);
16840 for (int i = 1; i <= 2; i++)
16841 if (!TREE_STATIC (TREE_VEC_ELT (t, i)))
16843 gimple_seq initlist = NULL;
16844 force_gimple_operand (build1 (DECL_EXPR, void_type_node,
16845 TREE_VEC_ELT (t, i)),
16846 &initlist, true, NULL_TREE);
16847 gimple_seq_add_seq (&ilist, initlist);
16849 tree clobber = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, i)),
16850 NULL);
16851 TREE_THIS_VOLATILE (clobber) = 1;
16852 gimple_seq_add_stmt (&olist,
16853 gimple_build_assign (TREE_VEC_ELT (t, i),
16854 clobber));
16857 tree clobber = build_constructor (ctx->record_type, NULL);
16858 TREE_THIS_VOLATILE (clobber) = 1;
16859 gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
16860 clobber));
16863 /* Once all the expansions are done, sequence all the different
16864 fragments inside gimple_omp_body. */
16866 new_body = NULL;
16868 if (offloaded
16869 && ctx->record_type)
16871 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
16872 /* fixup_child_record_type might have changed receiver_decl's type. */
16873 t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
16874 gimple_seq_add_stmt (&new_body,
16875 gimple_build_assign (ctx->receiver_decl, t));
16877 gimple_seq_add_seq (&new_body, fplist);
16879 if (offloaded || data_region)
16881 tree prev = NULL_TREE;
16882 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
16883 switch (OMP_CLAUSE_CODE (c))
16885 tree var, x;
16886 default:
16887 break;
16888 case OMP_CLAUSE_FIRSTPRIVATE:
16889 if (is_gimple_omp_oacc (ctx->stmt))
16890 break;
16891 var = OMP_CLAUSE_DECL (c);
16892 if (is_reference (var)
16893 || is_gimple_reg_type (TREE_TYPE (var)))
16895 tree new_var = lookup_decl (var, ctx);
16896 tree type;
16897 type = TREE_TYPE (var);
16898 if (is_reference (var))
16899 type = TREE_TYPE (type);
16900 if ((INTEGRAL_TYPE_P (type)
16901 && TYPE_PRECISION (type) <= POINTER_SIZE)
16902 || TREE_CODE (type) == POINTER_TYPE)
16904 x = build_receiver_ref (var, false, ctx);
16905 if (TREE_CODE (type) != POINTER_TYPE)
16906 x = fold_convert (pointer_sized_int_node, x);
16907 x = fold_convert (type, x);
16908 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16909 fb_rvalue);
16910 if (is_reference (var))
16912 tree v = create_tmp_var_raw (type, get_name (var));
16913 gimple_add_tmp_var (v);
16914 TREE_ADDRESSABLE (v) = 1;
16915 gimple_seq_add_stmt (&new_body,
16916 gimple_build_assign (v, x));
16917 x = build_fold_addr_expr (v);
16919 gimple_seq_add_stmt (&new_body,
16920 gimple_build_assign (new_var, x));
16922 else
16924 x = build_receiver_ref (var, !is_reference (var), ctx);
16925 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16926 fb_rvalue);
16927 gimple_seq_add_stmt (&new_body,
16928 gimple_build_assign (new_var, x));
16931 else if (is_variable_sized (var))
16933 tree pvar = DECL_VALUE_EXPR (var);
16934 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16935 pvar = TREE_OPERAND (pvar, 0);
16936 gcc_assert (DECL_P (pvar));
16937 tree new_var = lookup_decl (pvar, ctx);
16938 x = build_receiver_ref (var, false, ctx);
16939 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16940 gimple_seq_add_stmt (&new_body,
16941 gimple_build_assign (new_var, x));
16943 break;
16944 case OMP_CLAUSE_PRIVATE:
16945 if (is_gimple_omp_oacc (ctx->stmt))
16946 break;
16947 var = OMP_CLAUSE_DECL (c);
16948 if (is_reference (var))
16950 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16951 tree new_var = lookup_decl (var, ctx);
16952 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
16953 if (TREE_CONSTANT (x))
16955 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
16956 get_name (var));
16957 gimple_add_tmp_var (x);
16958 TREE_ADDRESSABLE (x) = 1;
16959 x = build_fold_addr_expr_loc (clause_loc, x);
16961 else
16962 break;
16964 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
16965 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16966 gimple_seq_add_stmt (&new_body,
16967 gimple_build_assign (new_var, x));
16969 break;
16970 case OMP_CLAUSE_USE_DEVICE_PTR:
16971 case OMP_CLAUSE_IS_DEVICE_PTR:
16972 var = OMP_CLAUSE_DECL (c);
16973 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
16974 x = build_sender_ref (var, ctx);
16975 else
16976 x = build_receiver_ref (var, false, ctx);
16977 if (is_variable_sized (var))
16979 tree pvar = DECL_VALUE_EXPR (var);
16980 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16981 pvar = TREE_OPERAND (pvar, 0);
16982 gcc_assert (DECL_P (pvar));
16983 tree new_var = lookup_decl (pvar, ctx);
16984 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16985 gimple_seq_add_stmt (&new_body,
16986 gimple_build_assign (new_var, x));
16988 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
16990 tree new_var = lookup_decl (var, ctx);
16991 new_var = DECL_VALUE_EXPR (new_var);
16992 gcc_assert (TREE_CODE (new_var) == MEM_REF);
16993 new_var = TREE_OPERAND (new_var, 0);
16994 gcc_assert (DECL_P (new_var));
16995 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16996 gimple_seq_add_stmt (&new_body,
16997 gimple_build_assign (new_var, x));
16999 else
17001 tree type = TREE_TYPE (var);
17002 tree new_var = lookup_decl (var, ctx);
17003 if (is_reference (var))
17005 type = TREE_TYPE (type);
17006 if (TREE_CODE (type) != ARRAY_TYPE)
17008 tree v = create_tmp_var_raw (type, get_name (var));
17009 gimple_add_tmp_var (v);
17010 TREE_ADDRESSABLE (v) = 1;
17011 x = fold_convert (type, x);
17012 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
17013 fb_rvalue);
17014 gimple_seq_add_stmt (&new_body,
17015 gimple_build_assign (v, x));
17016 x = build_fold_addr_expr (v);
17019 new_var = DECL_VALUE_EXPR (new_var);
17020 x = fold_convert (TREE_TYPE (new_var), x);
17021 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
17022 gimple_seq_add_stmt (&new_body,
17023 gimple_build_assign (new_var, x));
17025 break;
17027 /* Handle GOMP_MAP_FIRSTPRIVATE_{POINTER,REFERENCE} in second pass,
17028 so that firstprivate vars holding OMP_CLAUSE_SIZE if needed
17029 are already handled. Similarly OMP_CLAUSE_PRIVATE for VLAs
17030 or references to VLAs. */
17031 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
17032 switch (OMP_CLAUSE_CODE (c))
17034 tree var;
17035 default:
17036 break;
17037 case OMP_CLAUSE_MAP:
17038 if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
17039 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)
17041 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
17042 HOST_WIDE_INT offset = 0;
17043 gcc_assert (prev);
17044 var = OMP_CLAUSE_DECL (c);
17045 if (DECL_P (var)
17046 && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
17047 && is_global_var (maybe_lookup_decl_in_outer_ctx (var,
17048 ctx))
17049 && varpool_node::get_create (var)->offloadable)
17050 break;
17051 if (TREE_CODE (var) == INDIRECT_REF
17052 && TREE_CODE (TREE_OPERAND (var, 0)) == COMPONENT_REF)
17053 var = TREE_OPERAND (var, 0);
17054 if (TREE_CODE (var) == COMPONENT_REF)
17056 var = get_addr_base_and_unit_offset (var, &offset);
17057 gcc_assert (var != NULL_TREE && DECL_P (var));
17059 else if (DECL_SIZE (var)
17060 && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
17062 tree var2 = DECL_VALUE_EXPR (var);
17063 gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
17064 var2 = TREE_OPERAND (var2, 0);
17065 gcc_assert (DECL_P (var2));
17066 var = var2;
17068 tree new_var = lookup_decl (var, ctx), x;
17069 tree type = TREE_TYPE (new_var);
17070 bool is_ref;
17071 if (TREE_CODE (OMP_CLAUSE_DECL (c)) == INDIRECT_REF
17072 && (TREE_CODE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0))
17073 == COMPONENT_REF))
17075 type = TREE_TYPE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0));
17076 is_ref = true;
17077 new_var = build2 (MEM_REF, type,
17078 build_fold_addr_expr (new_var),
17079 build_int_cst (build_pointer_type (type),
17080 offset));
17082 else if (TREE_CODE (OMP_CLAUSE_DECL (c)) == COMPONENT_REF)
17084 type = TREE_TYPE (OMP_CLAUSE_DECL (c));
17085 is_ref = TREE_CODE (type) == REFERENCE_TYPE;
17086 new_var = build2 (MEM_REF, type,
17087 build_fold_addr_expr (new_var),
17088 build_int_cst (build_pointer_type (type),
17089 offset));
17091 else
17092 is_ref = is_reference (var);
17093 if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)
17094 is_ref = false;
17095 bool ref_to_array = false;
17096 if (is_ref)
17098 type = TREE_TYPE (type);
17099 if (TREE_CODE (type) == ARRAY_TYPE)
17101 type = build_pointer_type (type);
17102 ref_to_array = true;
17105 else if (TREE_CODE (type) == ARRAY_TYPE)
17107 tree decl2 = DECL_VALUE_EXPR (new_var);
17108 gcc_assert (TREE_CODE (decl2) == MEM_REF);
17109 decl2 = TREE_OPERAND (decl2, 0);
17110 gcc_assert (DECL_P (decl2));
17111 new_var = decl2;
17112 type = TREE_TYPE (new_var);
17114 x = build_receiver_ref (OMP_CLAUSE_DECL (prev), false, ctx);
17115 x = fold_convert_loc (clause_loc, type, x);
17116 if (!integer_zerop (OMP_CLAUSE_SIZE (c)))
17118 tree bias = OMP_CLAUSE_SIZE (c);
17119 if (DECL_P (bias))
17120 bias = lookup_decl (bias, ctx);
17121 bias = fold_convert_loc (clause_loc, sizetype, bias);
17122 bias = fold_build1_loc (clause_loc, NEGATE_EXPR, sizetype,
17123 bias);
17124 x = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
17125 TREE_TYPE (x), x, bias);
17127 if (ref_to_array)
17128 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
17129 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
17130 if (is_ref && !ref_to_array)
17132 tree t = create_tmp_var_raw (type, get_name (var));
17133 gimple_add_tmp_var (t);
17134 TREE_ADDRESSABLE (t) = 1;
17135 gimple_seq_add_stmt (&new_body,
17136 gimple_build_assign (t, x));
17137 x = build_fold_addr_expr_loc (clause_loc, t);
17139 gimple_seq_add_stmt (&new_body,
17140 gimple_build_assign (new_var, x));
17141 prev = NULL_TREE;
17143 else if (OMP_CLAUSE_CHAIN (c)
17144 && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (c))
17145 == OMP_CLAUSE_MAP
17146 && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
17147 == GOMP_MAP_FIRSTPRIVATE_POINTER
17148 || (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
17149 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
17150 prev = c;
17151 break;
17152 case OMP_CLAUSE_PRIVATE:
17153 var = OMP_CLAUSE_DECL (c);
17154 if (is_variable_sized (var))
17156 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
17157 tree new_var = lookup_decl (var, ctx);
17158 tree pvar = DECL_VALUE_EXPR (var);
17159 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
17160 pvar = TREE_OPERAND (pvar, 0);
17161 gcc_assert (DECL_P (pvar));
17162 tree new_pvar = lookup_decl (pvar, ctx);
17163 tree atmp = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
17164 tree al = size_int (DECL_ALIGN (var));
17165 tree x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
17166 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
17167 x = fold_convert_loc (clause_loc, TREE_TYPE (new_pvar), x);
17168 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
17169 gimple_seq_add_stmt (&new_body,
17170 gimple_build_assign (new_pvar, x));
17172 else if (is_reference (var) && !is_gimple_omp_oacc (ctx->stmt))
17174 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
17175 tree new_var = lookup_decl (var, ctx);
17176 tree x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
17177 if (TREE_CONSTANT (x))
17178 break;
17179 else
17181 tree atmp
17182 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
17183 tree rtype = TREE_TYPE (TREE_TYPE (new_var));
17184 tree al = size_int (TYPE_ALIGN (rtype));
17185 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
17188 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
17189 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
17190 gimple_seq_add_stmt (&new_body,
17191 gimple_build_assign (new_var, x));
17193 break;
17196 gimple_seq fork_seq = NULL;
17197 gimple_seq join_seq = NULL;
17199 if (is_oacc_parallel (ctx))
17201 /* If there are reductions on the offloaded region itself, treat
17202 them as a dummy GANG loop. */
17203 tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
17205 lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
17206 false, NULL, NULL, &fork_seq, &join_seq, ctx);
17209 gimple_seq_add_seq (&new_body, fork_seq);
17210 gimple_seq_add_seq (&new_body, tgt_body);
17211 gimple_seq_add_seq (&new_body, join_seq);
17213 if (offloaded)
17214 new_body = maybe_catch_exception (new_body);
17216 gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
17217 gimple_omp_set_body (stmt, new_body);
17220 bind = gimple_build_bind (NULL, NULL,
17221 tgt_bind ? gimple_bind_block (tgt_bind)
17222 : NULL_TREE);
17223 gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
17224 gimple_bind_add_seq (bind, ilist);
17225 gimple_bind_add_stmt (bind, stmt);
17226 gimple_bind_add_seq (bind, olist);
17228 pop_gimplify_context (NULL);
17230 if (dep_bind)
17232 gimple_bind_add_seq (dep_bind, dep_ilist);
17233 gimple_bind_add_stmt (dep_bind, bind);
17234 gimple_bind_add_seq (dep_bind, dep_olist);
17235 pop_gimplify_context (dep_bind);
17239 /* Expand code for an OpenMP teams directive. */
17241 static void
17242 lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
17244 gomp_teams *teams_stmt = as_a <gomp_teams *> (gsi_stmt (*gsi_p));
17245 push_gimplify_context ();
17247 tree block = make_node (BLOCK);
17248 gbind *bind = gimple_build_bind (NULL, NULL, block);
17249 gsi_replace (gsi_p, bind, true);
17250 gimple_seq bind_body = NULL;
17251 gimple_seq dlist = NULL;
17252 gimple_seq olist = NULL;
17254 tree num_teams = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
17255 OMP_CLAUSE_NUM_TEAMS);
17256 if (num_teams == NULL_TREE)
17257 num_teams = build_int_cst (unsigned_type_node, 0);
17258 else
17260 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
17261 num_teams = fold_convert (unsigned_type_node, num_teams);
17262 gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue);
17264 tree thread_limit = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
17265 OMP_CLAUSE_THREAD_LIMIT);
17266 if (thread_limit == NULL_TREE)
17267 thread_limit = build_int_cst (unsigned_type_node, 0);
17268 else
17270 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
17271 thread_limit = fold_convert (unsigned_type_node, thread_limit);
17272 gimplify_expr (&thread_limit, &bind_body, NULL, is_gimple_val,
17273 fb_rvalue);
17276 lower_rec_input_clauses (gimple_omp_teams_clauses (teams_stmt),
17277 &bind_body, &dlist, ctx, NULL);
17278 lower_omp (gimple_omp_body_ptr (teams_stmt), ctx);
17279 lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist, ctx);
17280 if (!gimple_omp_teams_grid_phony (teams_stmt))
17282 gimple_seq_add_stmt (&bind_body, teams_stmt);
17283 location_t loc = gimple_location (teams_stmt);
17284 tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
17285 gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
17286 gimple_set_location (call, loc);
17287 gimple_seq_add_stmt (&bind_body, call);
17290 gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
17291 gimple_omp_set_body (teams_stmt, NULL);
17292 gimple_seq_add_seq (&bind_body, olist);
17293 gimple_seq_add_seq (&bind_body, dlist);
17294 if (!gimple_omp_teams_grid_phony (teams_stmt))
17295 gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
17296 gimple_bind_set_body (bind, bind_body);
17298 pop_gimplify_context (bind);
17300 gimple_bind_append_vars (bind, ctx->block_vars);
17301 BLOCK_VARS (block) = ctx->block_vars;
17302 if (BLOCK_VARS (block))
17303 TREE_USED (block) = 1;
17306 /* Expand code within an artificial GIMPLE_OMP_GRID_BODY OMP construct. */
17308 static void
17309 lower_omp_grid_body (gimple_stmt_iterator *gsi_p, omp_context *ctx)
17311 gimple *stmt = gsi_stmt (*gsi_p);
17312 lower_omp (gimple_omp_body_ptr (stmt), ctx);
17313 gimple_seq_add_stmt (gimple_omp_body_ptr (stmt),
17314 gimple_build_omp_return (false));
17318 /* Callback for lower_omp_1. Return non-NULL if *tp needs to be
17319 regimplified. If DATA is non-NULL, lower_omp_1 is outside
17320 of OMP context, but with task_shared_vars set. */
17322 static tree
17323 lower_omp_regimplify_p (tree *tp, int *walk_subtrees,
17324 void *data)
17326 tree t = *tp;
17328 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
17329 if (VAR_P (t) && data == NULL && DECL_HAS_VALUE_EXPR_P (t))
17330 return t;
17332 if (task_shared_vars
17333 && DECL_P (t)
17334 && bitmap_bit_p (task_shared_vars, DECL_UID (t)))
17335 return t;
17337 /* If a global variable has been privatized, TREE_CONSTANT on
17338 ADDR_EXPR might be wrong. */
17339 if (data == NULL && TREE_CODE (t) == ADDR_EXPR)
17340 recompute_tree_invariant_for_addr_expr (t);
17342 *walk_subtrees = !IS_TYPE_OR_DECL_P (t);
17343 return NULL_TREE;
17346 /* Data to be communicated between lower_omp_regimplify_operands and
17347 lower_omp_regimplify_operands_p. */
17349 struct lower_omp_regimplify_operands_data
17351 omp_context *ctx;
17352 vec<tree> *decls;
17355 /* Helper function for lower_omp_regimplify_operands. Find
17356 omp_member_access_dummy_var vars and adjust temporarily their
17357 DECL_VALUE_EXPRs if needed. */
17359 static tree
17360 lower_omp_regimplify_operands_p (tree *tp, int *walk_subtrees,
17361 void *data)
17363 tree t = omp_member_access_dummy_var (*tp);
17364 if (t)
17366 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
17367 lower_omp_regimplify_operands_data *ldata
17368 = (lower_omp_regimplify_operands_data *) wi->info;
17369 tree o = maybe_lookup_decl (t, ldata->ctx);
17370 if (o != t)
17372 ldata->decls->safe_push (DECL_VALUE_EXPR (*tp));
17373 ldata->decls->safe_push (*tp);
17374 tree v = unshare_and_remap (DECL_VALUE_EXPR (*tp), t, o);
17375 SET_DECL_VALUE_EXPR (*tp, v);
17378 *walk_subtrees = !IS_TYPE_OR_DECL_P (*tp);
17379 return NULL_TREE;
17382 /* Wrapper around gimple_regimplify_operands that adjusts DECL_VALUE_EXPRs
17383 of omp_member_access_dummy_var vars during regimplification. */
17385 static void
17386 lower_omp_regimplify_operands (omp_context *ctx, gimple *stmt,
17387 gimple_stmt_iterator *gsi_p)
17389 auto_vec<tree, 10> decls;
17390 if (ctx)
17392 struct walk_stmt_info wi;
17393 memset (&wi, '\0', sizeof (wi));
17394 struct lower_omp_regimplify_operands_data data;
17395 data.ctx = ctx;
17396 data.decls = &decls;
17397 wi.info = &data;
17398 walk_gimple_op (stmt, lower_omp_regimplify_operands_p, &wi);
17400 gimple_regimplify_operands (stmt, gsi_p);
17401 while (!decls.is_empty ())
17403 tree t = decls.pop ();
17404 tree v = decls.pop ();
17405 SET_DECL_VALUE_EXPR (t, v);
17409 static void
17410 lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
17412 gimple *stmt = gsi_stmt (*gsi_p);
17413 struct walk_stmt_info wi;
17414 gcall *call_stmt;
17416 if (gimple_has_location (stmt))
17417 input_location = gimple_location (stmt);
17419 if (task_shared_vars)
17420 memset (&wi, '\0', sizeof (wi));
17422 /* If we have issued syntax errors, avoid doing any heavy lifting.
17423 Just replace the OMP directives with a NOP to avoid
17424 confusing RTL expansion. */
17425 if (seen_error () && is_gimple_omp (stmt))
17427 gsi_replace (gsi_p, gimple_build_nop (), true);
17428 return;
17431 switch (gimple_code (stmt))
17433 case GIMPLE_COND:
17435 gcond *cond_stmt = as_a <gcond *> (stmt);
17436 if ((ctx || task_shared_vars)
17437 && (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
17438 lower_omp_regimplify_p,
17439 ctx ? NULL : &wi, NULL)
17440 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
17441 lower_omp_regimplify_p,
17442 ctx ? NULL : &wi, NULL)))
17443 lower_omp_regimplify_operands (ctx, cond_stmt, gsi_p);
17445 break;
17446 case GIMPLE_CATCH:
17447 lower_omp (gimple_catch_handler_ptr (as_a <gcatch *> (stmt)), ctx);
17448 break;
17449 case GIMPLE_EH_FILTER:
17450 lower_omp (gimple_eh_filter_failure_ptr (stmt), ctx);
17451 break;
17452 case GIMPLE_TRY:
17453 lower_omp (gimple_try_eval_ptr (stmt), ctx);
17454 lower_omp (gimple_try_cleanup_ptr (stmt), ctx);
17455 break;
17456 case GIMPLE_TRANSACTION:
17457 lower_omp (gimple_transaction_body_ptr (
17458 as_a <gtransaction *> (stmt)),
17459 ctx);
17460 break;
17461 case GIMPLE_BIND:
17462 lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
17463 break;
17464 case GIMPLE_OMP_PARALLEL:
17465 case GIMPLE_OMP_TASK:
17466 ctx = maybe_lookup_ctx (stmt);
17467 gcc_assert (ctx);
17468 if (ctx->cancellable)
17469 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
17470 lower_omp_taskreg (gsi_p, ctx);
17471 break;
17472 case GIMPLE_OMP_FOR:
17473 ctx = maybe_lookup_ctx (stmt);
17474 gcc_assert (ctx);
17475 if (ctx->cancellable)
17476 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
17477 lower_omp_for (gsi_p, ctx);
17478 break;
17479 case GIMPLE_OMP_SECTIONS:
17480 ctx = maybe_lookup_ctx (stmt);
17481 gcc_assert (ctx);
17482 if (ctx->cancellable)
17483 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
17484 lower_omp_sections (gsi_p, ctx);
17485 break;
17486 case GIMPLE_OMP_SINGLE:
17487 ctx = maybe_lookup_ctx (stmt);
17488 gcc_assert (ctx);
17489 lower_omp_single (gsi_p, ctx);
17490 break;
17491 case GIMPLE_OMP_MASTER:
17492 ctx = maybe_lookup_ctx (stmt);
17493 gcc_assert (ctx);
17494 lower_omp_master (gsi_p, ctx);
17495 break;
17496 case GIMPLE_OMP_TASKGROUP:
17497 ctx = maybe_lookup_ctx (stmt);
17498 gcc_assert (ctx);
17499 lower_omp_taskgroup (gsi_p, ctx);
17500 break;
17501 case GIMPLE_OMP_ORDERED:
17502 ctx = maybe_lookup_ctx (stmt);
17503 gcc_assert (ctx);
17504 lower_omp_ordered (gsi_p, ctx);
17505 break;
17506 case GIMPLE_OMP_CRITICAL:
17507 ctx = maybe_lookup_ctx (stmt);
17508 gcc_assert (ctx);
17509 lower_omp_critical (gsi_p, ctx);
17510 break;
17511 case GIMPLE_OMP_ATOMIC_LOAD:
17512 if ((ctx || task_shared_vars)
17513 && walk_tree (gimple_omp_atomic_load_rhs_ptr (
17514 as_a <gomp_atomic_load *> (stmt)),
17515 lower_omp_regimplify_p, ctx ? NULL : &wi, NULL))
17516 lower_omp_regimplify_operands (ctx, stmt, gsi_p);
17517 break;
17518 case GIMPLE_OMP_TARGET:
17519 ctx = maybe_lookup_ctx (stmt);
17520 gcc_assert (ctx);
17521 lower_omp_target (gsi_p, ctx);
17522 break;
17523 case GIMPLE_OMP_TEAMS:
17524 ctx = maybe_lookup_ctx (stmt);
17525 gcc_assert (ctx);
17526 lower_omp_teams (gsi_p, ctx);
17527 break;
17528 case GIMPLE_OMP_GRID_BODY:
17529 ctx = maybe_lookup_ctx (stmt);
17530 gcc_assert (ctx);
17531 lower_omp_grid_body (gsi_p, ctx);
17532 break;
17533 case GIMPLE_CALL:
17534 tree fndecl;
17535 call_stmt = as_a <gcall *> (stmt);
17536 fndecl = gimple_call_fndecl (call_stmt);
17537 if (fndecl
17538 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
17539 switch (DECL_FUNCTION_CODE (fndecl))
17541 case BUILT_IN_GOMP_BARRIER:
17542 if (ctx == NULL)
17543 break;
17544 /* FALLTHRU */
17545 case BUILT_IN_GOMP_CANCEL:
17546 case BUILT_IN_GOMP_CANCELLATION_POINT:
17547 omp_context *cctx;
17548 cctx = ctx;
17549 if (gimple_code (cctx->stmt) == GIMPLE_OMP_SECTION)
17550 cctx = cctx->outer;
17551 gcc_assert (gimple_call_lhs (call_stmt) == NULL_TREE);
17552 if (!cctx->cancellable)
17554 if (DECL_FUNCTION_CODE (fndecl)
17555 == BUILT_IN_GOMP_CANCELLATION_POINT)
17557 stmt = gimple_build_nop ();
17558 gsi_replace (gsi_p, stmt, false);
17560 break;
17562 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER)
17564 fndecl = builtin_decl_explicit (BUILT_IN_GOMP_BARRIER_CANCEL);
17565 gimple_call_set_fndecl (call_stmt, fndecl);
17566 gimple_call_set_fntype (call_stmt, TREE_TYPE (fndecl));
17568 tree lhs;
17569 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (fndecl)));
17570 gimple_call_set_lhs (call_stmt, lhs);
17571 tree fallthru_label;
17572 fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
17573 gimple *g;
17574 g = gimple_build_label (fallthru_label);
17575 gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
17576 g = gimple_build_cond (NE_EXPR, lhs,
17577 fold_convert (TREE_TYPE (lhs),
17578 boolean_false_node),
17579 cctx->cancel_label, fallthru_label);
17580 gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
17581 break;
17582 default:
17583 break;
17585 /* FALLTHRU */
17586 default:
17587 if ((ctx || task_shared_vars)
17588 && walk_gimple_op (stmt, lower_omp_regimplify_p,
17589 ctx ? NULL : &wi))
17591 /* Just remove clobbers, this should happen only if we have
17592 "privatized" local addressable variables in SIMD regions,
17593 the clobber isn't needed in that case and gimplifying address
17594 of the ARRAY_REF into a pointer and creating MEM_REF based
17595 clobber would create worse code than we get with the clobber
17596 dropped. */
17597 if (gimple_clobber_p (stmt))
17599 gsi_replace (gsi_p, gimple_build_nop (), true);
17600 break;
17602 lower_omp_regimplify_operands (ctx, stmt, gsi_p);
17604 break;
17608 static void
17609 lower_omp (gimple_seq *body, omp_context *ctx)
17611 location_t saved_location = input_location;
17612 gimple_stmt_iterator gsi;
17613 for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
17614 lower_omp_1 (&gsi, ctx);
17615 /* During gimplification, we haven't folded statments inside offloading
17616 or taskreg regions (gimplify.c:maybe_fold_stmt); do that now. */
17617 if (target_nesting_level || taskreg_nesting_level)
17618 for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
17619 fold_stmt (&gsi);
17620 input_location = saved_location;
17623 /* Structure describing the basic properties of the loop we ara analyzing
17624 whether it can be gridified and when it is gridified. */
17626 struct grid_prop
17628 /* True when we are doing tiling gridification, i.e. when there is a distinct
17629 distribute loop over groups and a loop construct over work-items. False
17630 when distribute and parallel for loops form a combined construct. */
17631 bool tiling;
17632 /* Location of the target construct for optimization information
17633 messages. */
17634 location_t target_loc;
17635 /* The collapse clause of the involved loops. Collapse value of all of them
17636 must be the same for gridification to take place. */
17637 size_t collapse;
17638 /* Group sizes, if requested by the user or NULL if not requested. */
17639 tree group_sizes[3];
17642 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
17643 "gridified HSA kernel because "
17645 /* Return true if STMT is an assignment of a register-type into a local
17646 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
17647 any of the trees specifying group sizes there. */
17649 static bool
17650 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
17652 gassign *assign = dyn_cast <gassign *> (stmt);
17653 if (!assign)
17654 return false;
17655 if (gimple_clobber_p (assign))
17656 return true;
17657 tree lhs = gimple_assign_lhs (assign);
17658 if (!VAR_P (lhs)
17659 || !is_gimple_reg_type (TREE_TYPE (lhs))
17660 || is_global_var (lhs))
17661 return false;
17662 if (grid)
17663 for (unsigned i = 0; i < grid->collapse; i++)
17664 if (lhs == grid->group_sizes[i])
17665 return false;
17666 return true;
17669 /* Return true if all statements in SEQ are assignments to local register-type
17670 variables that do not hold group size information. */
17672 static bool
17673 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
17675 if (!seq)
17676 return true;
17678 gimple_stmt_iterator gsi;
17679 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
17680 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
17681 return false;
17682 return true;
17685 /* Scan statements in SEQ and call itself recursively on any bind. GRID
17686 describes hitherto discovered properties of the loop that is evaluated for
17687 possible gridification. If during whole search only assignments to
17688 register-type local variables (that do not overwrite group size information)
17689 and one single OMP statement is encountered, return true, otherwise return
17690 false. RET is where we store any OMP statement encountered. */
17692 static bool
17693 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
17694 const char *name, gimple **ret)
17696 gimple_stmt_iterator gsi;
17697 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
17699 gimple *stmt = gsi_stmt (gsi);
17701 if (grid_safe_assignment_p (stmt, grid))
17702 continue;
17703 if (gbind *bind = dyn_cast <gbind *> (stmt))
17705 if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind),
17706 grid, name, ret))
17707 return false;
17709 else if (is_gimple_omp (stmt))
17711 if (*ret)
17713 if (dump_enabled_p ())
17715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17716 GRID_MISSED_MSG_PREFIX "%s construct "
17717 "contains multiple OpenMP constructs\n",
17718 name);
17719 dump_printf_loc (MSG_NOTE, gimple_location (*ret),
17720 "The first OpenMP construct within "
17721 "a parallel\n");
17722 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
17723 "The second OpenMP construct within "
17724 "a parallel\n");
17726 return false;
17728 *ret = stmt;
17730 else
17732 if (dump_enabled_p ())
17734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17735 GRID_MISSED_MSG_PREFIX "%s construct contains "
17736 "a complex statement\n", name);
17737 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
17738 "This statement cannot be analyzed for "
17739 "gridification\n");
17741 return false;
17744 return true;
17747 /* Scan statements in SEQ and make sure that it and any binds in it contain
17748 only assignments to local register-type variables (that do not overwrite
17749 group size information) and one OMP construct. If so, return that
17750 construct, otherwise return NULL. GRID describes hitherto discovered
17751 properties of the loop that is evaluated for possible gridification. If
17752 dumping is enabled and function fails, use NAME to dump a note with the
17753 reason for failure. */
17755 static gimple *
17756 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
17757 const char *name)
17759 if (!seq)
17761 if (dump_enabled_p ())
17762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17763 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
17764 name);
17765 return NULL;
17768 gimple *ret = NULL;
17769 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
17771 if (!ret && dump_enabled_p ())
17772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17773 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
17774 "any other OpenMP construct\n", name);
17775 return ret;
17777 else
17778 return NULL;
17781 /* Walker function looking for statements there is no point gridifying (and for
17782 noreturn function calls which we cannot do). Return non-NULL if such a
17783 function is found. */
17785 static tree
17786 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
17787 bool *handled_ops_p,
17788 struct walk_stmt_info *wi)
17790 *handled_ops_p = false;
17791 gimple *stmt = gsi_stmt (*gsi);
17792 switch (gimple_code (stmt))
17794 case GIMPLE_CALL:
17795 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
17797 *handled_ops_p = true;
17798 wi->info = stmt;
17799 return error_mark_node;
17801 break;
17803 /* We may reduce the following list if we find a way to implement the
17804 clauses, but now there is no point trying further. */
17805 case GIMPLE_OMP_CRITICAL:
17806 case GIMPLE_OMP_TASKGROUP:
17807 case GIMPLE_OMP_TASK:
17808 case GIMPLE_OMP_SECTION:
17809 case GIMPLE_OMP_SECTIONS:
17810 case GIMPLE_OMP_SECTIONS_SWITCH:
17811 case GIMPLE_OMP_TARGET:
17812 case GIMPLE_OMP_ORDERED:
17813 *handled_ops_p = true;
17814 wi->info = stmt;
17815 return error_mark_node;
17816 default:
17817 break;
17819 return NULL;
17822 /* Examine clauses of omp parallel statement PAR and if any prevents
17823 gridification, issue a missed-optimization diagnostics and return false,
17824 otherwise return true. GRID describes hitherto discovered properties of the
17825 loop that is evaluated for possible gridification. */
17827 static bool
17828 grid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc)
17830 tree clauses = gimple_omp_parallel_clauses (par);
17831 while (clauses)
17833 switch (OMP_CLAUSE_CODE (clauses))
17835 case OMP_CLAUSE_NUM_THREADS:
17836 if (dump_enabled_p ())
17838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
17839 GRID_MISSED_MSG_PREFIX "because there is "
17840 "a num_threads clause of the parallel "
17841 "construct\n");
17842 dump_printf_loc (MSG_NOTE, gimple_location (par),
17843 "Parallel construct has a num_threads clause\n");
17845 return false;
17847 case OMP_CLAUSE_REDUCTION:
17848 if (dump_enabled_p ())
17850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
17851 GRID_MISSED_MSG_PREFIX "a reduction clause"
17852 "is present\n ");
17853 dump_printf_loc (MSG_NOTE, gimple_location (par),
17854 "Parallel construct has a reduction clause\n");
17856 return false;
17858 default:
17859 break;
17861 clauses = OMP_CLAUSE_CHAIN (clauses);
17863 return true;
17866 /* Examine clauses and the body of omp loop statement GFOR and if something
17867 prevents gridification, issue a missed-optimization diagnostics and return
17868 false, otherwise return true. GRID describes hitherto discovered properties
17869 of the loop that is evaluated for possible gridification. */
17871 static bool
17872 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
17874 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
17875 grid))
17877 if (dump_enabled_p ())
17879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17880 GRID_MISSED_MSG_PREFIX "the inner loop "
17881 "loop bounds computation contains a complex "
17882 "statement\n");
17883 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
17884 "Loop construct cannot be analyzed for "
17885 "gridification\n");
17887 return false;
17890 tree clauses = gimple_omp_for_clauses (gfor);
17891 while (clauses)
17893 switch (OMP_CLAUSE_CODE (clauses))
17895 case OMP_CLAUSE_SCHEDULE:
17896 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
17898 if (dump_enabled_p ())
17900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17901 GRID_MISSED_MSG_PREFIX "the inner loop "
17902 "has a non-automatic schedule clause\n");
17903 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
17904 "Loop construct has a non automatic "
17905 "schedule clause\n");
17907 return false;
17909 break;
17911 case OMP_CLAUSE_REDUCTION:
17912 if (dump_enabled_p ())
17914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17915 GRID_MISSED_MSG_PREFIX "a reduction "
17916 "clause is present\n ");
17917 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
17918 "Loop construct has a reduction schedule "
17919 "clause\n");
17921 return false;
17923 default:
17924 break;
17926 clauses = OMP_CLAUSE_CHAIN (clauses);
17928 struct walk_stmt_info wi;
17929 memset (&wi, 0, sizeof (wi));
17930 if (walk_gimple_seq (gimple_omp_body (gfor),
17931 grid_find_ungridifiable_statement,
17932 NULL, &wi))
17934 gimple *bad = (gimple *) wi.info;
17935 if (dump_enabled_p ())
17937 if (is_gimple_call (bad))
17938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17939 GRID_MISSED_MSG_PREFIX "the inner loop contains "
17940 "call to a noreturn function\n");
17941 else
17942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
17943 GRID_MISSED_MSG_PREFIX "the inner loop contains "
17944 "statement %s which cannot be transformed\n",
17945 gimple_code_name[(int) gimple_code (bad)]);
17946 dump_printf_loc (MSG_NOTE, gimple_location (bad),
17947 "This statement cannot be analyzed for "
17948 "gridification\n");
17950 return false;
17952 return true;
17955 /* Given distribute omp construct represented by DIST, which in the original
17956 source forms a compound construct with a looping construct, return true if it
17957 can be turned into a gridified HSA kernel. Otherwise return false. GRID
17958 describes hitherto discovered properties of the loop that is evaluated for
17959 possible gridification. */
17961 static bool
17962 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
17964 location_t tloc = grid->target_loc;
17965 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
17966 grid, "distribute");
17967 gomp_parallel *par;
17968 if (!stmt
17969 || !(par = dyn_cast <gomp_parallel *> (stmt))
17970 || !grid_parallel_clauses_gridifiable (par, tloc))
17971 return false;
17973 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
17974 "parallel");
17975 gomp_for *gfor;
17976 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
17977 return false;
17979 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
17981 if (dump_enabled_p ())
17982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
17983 GRID_MISSED_MSG_PREFIX "the inner loop is not "
17984 "a simple for loop\n");
17985 return false;
17987 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
17989 if (!grid_inner_loop_gridifiable_p (gfor, grid))
17990 return false;
17992 return true;
17995 /* Given an omp loop statement GFOR, return true if it can participate in
17996 tiling gridification, i.e. in one where the distribute and parallel for
17997 loops do not form a compound statement. GRID describes hitherto discovered
17998 properties of the loop that is evaluated for possible gridification. */
18000 static bool
18001 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
18003 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
18005 if (dump_enabled_p ())
18007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18008 GRID_MISSED_MSG_PREFIX "an inner loop is not "
18009 "a simple for loop\n");
18010 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
18011 "This statement is not a simple for loop\n");
18013 return false;
18016 if (!grid_inner_loop_gridifiable_p (gfor, grid))
18017 return false;
18019 if (gimple_omp_for_collapse (gfor) != grid->collapse)
18021 if (dump_enabled_p ())
18023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18024 GRID_MISSED_MSG_PREFIX "an inner loop does not "
18025 "have use the same collapse clause\n");
18026 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
18027 "Loop construct uses a different collapse clause\n");
18029 return false;
18032 struct omp_for_data fd;
18033 struct omp_for_data_loop *loops
18034 = (struct omp_for_data_loop *)alloca (grid->collapse
18035 * sizeof (struct omp_for_data_loop));
18036 extract_omp_for_data (gfor, &fd, loops);
18037 for (unsigned i = 0; i < grid->collapse; i++)
18039 tree itype, type = TREE_TYPE (fd.loops[i].v);
18040 if (POINTER_TYPE_P (type))
18041 itype = signed_type_for (type);
18042 else
18043 itype = type;
18045 tree n1 = fold_convert (itype, fd.loops[i].n1);
18046 tree n2 = fold_convert (itype, fd.loops[i].n2);
18047 tree t = build_int_cst (itype,
18048 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
18049 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
18050 t = fold_build2 (PLUS_EXPR, itype, t, n2);
18051 t = fold_build2 (MINUS_EXPR, itype, t, n1);
18052 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
18053 t = fold_build2 (TRUNC_DIV_EXPR, itype,
18054 fold_build1 (NEGATE_EXPR, itype, t),
18055 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
18056 else
18057 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
18059 if (!operand_equal_p (grid->group_sizes[i], t, 0))
18061 if (dump_enabled_p ())
18063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18064 GRID_MISSED_MSG_PREFIX "the distribute and "
18065 "an internal loop do not agree on tile size\n");
18066 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
18067 "Loop construct does not seem to loop over "
18068 "a tile size\n");
18070 return false;
18073 return true;
18076 /* Facing a call to FNDECL in the body of a distribute construct, return true
18077 if we can handle it or false if it precludes gridification. */
18079 static bool
18080 grid_call_permissible_in_distribute_p (tree fndecl)
18082 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
18083 return true;
18085 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18086 if (strstr (name, "omp_") != name)
18087 return false;
18089 if ((strcmp (name, "omp_get_thread_num") == 0)
18090 || (strcmp (name, "omp_get_num_threads") == 0)
18091 || (strcmp (name, "omp_get_num_teams") == 0)
18092 || (strcmp (name, "omp_get_team_num") == 0)
18093 || (strcmp (name, "omp_get_level") == 0)
18094 || (strcmp (name, "omp_get_active_level") == 0)
18095 || (strcmp (name, "omp_in_parallel") == 0))
18096 return true;
18098 return false;
18101 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
18102 of a distribute construct that is pointed at by GSI, modify it as necessary
18103 for gridification. If the statement itself got removed, return true. */
18105 static bool
18106 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
18108 gimple *stmt = gsi_stmt (*gsi);
18109 tree fndecl = gimple_call_fndecl (stmt);
18110 gcc_checking_assert (stmt);
18111 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
18112 return false;
18114 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18115 if ((strcmp (name, "omp_get_thread_num") == 0)
18116 || (strcmp (name, "omp_get_level") == 0)
18117 || (strcmp (name, "omp_get_active_level") == 0)
18118 || (strcmp (name, "omp_in_parallel") == 0))
18120 tree lhs = gimple_call_lhs (stmt);
18121 if (lhs)
18123 gassign *assign
18124 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
18125 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
18127 gsi_remove (gsi, true);
18128 return true;
18131 /* The rest of the omp functions can stay as they are, HSA back-end will
18132 handle them correctly. */
18133 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
18134 || (strcmp (name, "omp_get_num_teams") == 0)
18135 || (strcmp (name, "omp_get_team_num") == 0));
18136 return false;
18139 /* Given a sequence of statements within a distribute omp construct or a
18140 parallel construct, which in the original source does not form a compound
18141 construct with a looping construct, return true if it does not prevent us
18142 from turning it into a gridified HSA kernel. Otherwise return false. GRID
18143 describes hitherto discovered properties of the loop that is evaluated for
18144 possible gridification. IN_PARALLEL must be true if seq is within a
18145 parallel construct and flase if it is only within a distribute
18146 construct. */
18148 static bool
18149 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
18150 bool in_parallel)
18152 gimple_stmt_iterator gsi;
18153 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
18155 gimple *stmt = gsi_stmt (gsi);
18157 if (grid_safe_assignment_p (stmt, grid)
18158 || gimple_code (stmt) == GIMPLE_GOTO
18159 || gimple_code (stmt) == GIMPLE_LABEL
18160 || gimple_code (stmt) == GIMPLE_COND)
18161 continue;
18162 else if (gbind *bind = dyn_cast <gbind *> (stmt))
18164 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
18165 grid, in_parallel))
18166 return false;
18167 continue;
18169 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
18171 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
18173 if (dump_enabled_p ())
18175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18176 GRID_MISSED_MSG_PREFIX "the distribute "
18177 "construct contains a try..catch region\n");
18178 dump_printf_loc (MSG_NOTE, gimple_location (try_stmt),
18179 "This statement cannot be analyzed for "
18180 "tiled gridification\n");
18182 return false;
18184 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
18185 grid, in_parallel))
18186 return false;
18187 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
18188 grid, in_parallel))
18189 return false;
18190 continue;
18192 else if (is_gimple_call (stmt))
18194 tree fndecl = gimple_call_fndecl (stmt);
18195 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
18196 continue;
18198 if (dump_enabled_p ())
18200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18201 GRID_MISSED_MSG_PREFIX "the distribute "
18202 "construct contains a call\n");
18203 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
18204 "This statement cannot be analyzed for "
18205 "tiled gridification\n");
18207 return false;
18209 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
18211 if (in_parallel)
18213 if (dump_enabled_p ())
18215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18216 GRID_MISSED_MSG_PREFIX "a parallel "
18217 "construct contains another parallel "
18218 "construct\n");
18219 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
18220 "This parallel construct is nested in "
18221 "another one\n");
18223 return false;
18225 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
18226 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
18227 grid, true))
18228 return false;
18230 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
18232 if (!in_parallel)
18234 if (dump_enabled_p ())
18236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18237 GRID_MISSED_MSG_PREFIX "a loop "
18238 "construct is not nested within a parallel "
18239 "construct\n");
18240 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
18241 "This loop construct is not nested in "
18242 "a parallel construct\n");
18244 return false;
18246 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
18247 return false;
18249 else
18251 if (dump_enabled_p ())
18253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
18254 GRID_MISSED_MSG_PREFIX "the distribute "
18255 "construct contains a complex statement\n");
18256 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
18257 "This statement cannot be analyzed for "
18258 "tiled gridification\n");
18260 return false;
18263 return true;
18266 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
18267 return true, otherwise return false. In the case of success, also fill in
18268 GRID with information describing the kernel grid. */
18270 static bool
18271 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
18273 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
18274 return false;
18276 location_t tloc = gimple_location (target);
18277 grid->target_loc = tloc;
18278 gimple *stmt
18279 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
18280 grid, "target");
18281 if (!stmt)
18282 return false;
18283 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
18284 tree group_size = NULL;
18285 if (!teams)
18287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18288 GRID_MISSED_MSG_PREFIX "it does not have a sole teams "
18289 "construct in it.\n");
18290 return false;
18293 tree clauses = gimple_omp_teams_clauses (teams);
18294 while (clauses)
18296 switch (OMP_CLAUSE_CODE (clauses))
18298 case OMP_CLAUSE_NUM_TEAMS:
18299 if (dump_enabled_p ())
18300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18301 GRID_MISSED_MSG_PREFIX "the teams construct "
18302 "contains a num_teams clause\n ");
18303 return false;
18305 case OMP_CLAUSE_REDUCTION:
18306 if (dump_enabled_p ())
18307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18308 GRID_MISSED_MSG_PREFIX "a reduction "
18309 "clause is present\n ");
18310 return false;
18312 case OMP_CLAUSE_THREAD_LIMIT:
18313 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
18314 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
18315 break;
18317 default:
18318 break;
18320 clauses = OMP_CLAUSE_CHAIN (clauses);
18323 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
18324 "teams");
18325 if (!stmt)
18326 return false;
18327 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
18328 if (!dist)
18330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18331 GRID_MISSED_MSG_PREFIX "the teams construct does not "
18332 "have a single distribute construct in it.\n");
18333 return false;
18336 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
18338 grid->collapse = gimple_omp_for_collapse (dist);
18339 if (grid->collapse > 3)
18341 if (dump_enabled_p ())
18342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18343 GRID_MISSED_MSG_PREFIX "the distribute construct "
18344 "contains collapse clause with parameter greater "
18345 "than 3\n");
18346 return false;
18349 struct omp_for_data fd;
18350 struct omp_for_data_loop *dist_loops
18351 = (struct omp_for_data_loop *)alloca (grid->collapse
18352 * sizeof (struct omp_for_data_loop));
18353 extract_omp_for_data (dist, &fd, dist_loops);
18354 if (fd.chunk_size)
18356 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
18358 if (dump_enabled_p ())
18359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18360 GRID_MISSED_MSG_PREFIX "the teams "
18361 "thread limit is different from distribute "
18362 "schedule chunk\n");
18363 return false;
18365 group_size = fd.chunk_size;
18367 if (group_size && grid->collapse > 1)
18369 if (dump_enabled_p ())
18370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18371 GRID_MISSED_MSG_PREFIX "group size cannot be "
18372 "set using thread_limit or schedule clauses "
18373 "when also using a collapse clause greater than 1\n");
18374 return false;
18377 if (gimple_omp_for_combined_p (dist))
18379 grid->tiling = false;
18380 grid->group_sizes[0] = group_size;
18381 for (unsigned i = 1; i < grid->collapse; i++)
18382 grid->group_sizes[i] = NULL;
18383 return grid_dist_follows_simple_pattern (dist, grid);
18385 else
18387 grid->tiling = true;
18388 if (group_size)
18390 if (dump_enabled_p ())
18391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
18392 GRID_MISSED_MSG_PREFIX "group size cannot be set "
18393 "using thread_limit or schedule clauses when "
18394 "distribute and loop constructs do not form "
18395 "one combined construct\n");
18396 return false;
18398 for (unsigned i = 0; i < grid->collapse; i++)
18400 if (fd.loops[i].cond_code == GT_EXPR)
18401 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
18402 TREE_TYPE (fd.loops[i].step),
18403 fd.loops[i].step);
18404 else
18405 grid->group_sizes[i] = fd.loops[i].step;
18407 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
18408 false);
18412 /* Operand walker, used to remap pre-body declarations according to a hash map
18413 provided in DATA. */
18415 static tree
18416 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
18418 tree t = *tp;
18420 if (DECL_P (t) || TYPE_P (t))
18421 *walk_subtrees = 0;
18422 else
18423 *walk_subtrees = 1;
18425 if (VAR_P (t))
18427 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
18428 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
18429 tree *repl = declmap->get (t);
18430 if (repl)
18431 *tp = *repl;
18433 return NULL_TREE;
18436 /* Identifiers of segments into which a particular variable should be places
18437 when gridifying. */
18439 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
18440 GRID_SEGMENT_GLOBAL};
18442 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
18443 builtin call into SEQ that will make sure the variable is always considered
18444 address taken. */
18446 static void
18447 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
18449 /* Making a non-addressable variables would require that we re-gimplify all
18450 their uses. Fortunately, we do not have to do this because if they are
18451 not addressable, it means they are not used in atomic or parallel
18452 statements and so relaxed GPU consistency rules mean we can just keep them
18453 private. */
18454 if (!TREE_ADDRESSABLE (var))
18455 return;
18457 switch (segment)
18459 case GRID_SEGMENT_GROUP:
18460 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
18461 NULL, DECL_ATTRIBUTES (var));
18462 break;
18463 case GRID_SEGMENT_GLOBAL:
18464 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
18465 NULL, DECL_ATTRIBUTES (var));
18466 break;
18467 default:
18468 gcc_unreachable ();
18471 if (!TREE_STATIC (var))
18473 TREE_STATIC (var) = 1;
18474 varpool_node::finalize_decl (var);
18479 /* Copy leading register-type assignments to local variables in SRC to just
18480 before DST, Creating temporaries, adjusting mapping of operands in WI and
18481 remapping operands as necessary. Add any new temporaries to TGT_BIND.
18482 Return the first statement that does not conform to grid_safe_assignment_p
18483 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
18484 variables in traversed bind statements so that they are put into the
18485 appropriate segment. */
18487 static gimple *
18488 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
18489 gbind *tgt_bind,
18490 enum grid_var_segment var_segment,
18491 struct walk_stmt_info *wi)
18493 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
18494 gimple_stmt_iterator gsi;
18495 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
18497 gimple *stmt = gsi_stmt (gsi);
18498 if (gbind *bind = dyn_cast <gbind *> (stmt))
18500 gimple *r = grid_copy_leading_local_assignments
18501 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
18503 if (var_segment != GRID_SEGMENT_PRIVATE)
18504 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
18505 grid_mark_variable_segment (var, var_segment);
18506 if (r)
18507 return r;
18508 else
18509 continue;
18511 if (!grid_safe_assignment_p (stmt, NULL))
18512 return stmt;
18513 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
18514 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
18515 TREE_TYPE (lhs));
18516 DECL_CONTEXT (repl) = current_function_decl;
18517 gimple_bind_append_vars (tgt_bind, repl);
18519 declmap->put (lhs, repl);
18520 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
18521 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
18522 gsi_insert_before (dst, copy, GSI_SAME_STMT);
18524 return NULL;
18527 /* Statement walker function to make adjustments to statements within the
18528 gridifed kernel copy. */
18530 static tree
18531 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
18532 struct walk_stmt_info *)
18534 *handled_ops_p = false;
18535 gimple *stmt = gsi_stmt (*gsi);
18536 if (gimple_code (stmt) == GIMPLE_OMP_FOR
18537 && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
18539 gomp_for *loop = as_a <gomp_for *> (stmt);
18540 tree clauses = gimple_omp_for_clauses (loop);
18541 tree cl = find_omp_clause (clauses, OMP_CLAUSE_SAFELEN);
18542 if (cl)
18543 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
18544 else
18546 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
18547 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
18548 OMP_CLAUSE_CHAIN (c) = clauses;
18549 gimple_omp_for_set_clauses (loop, c);
18552 return NULL_TREE;
18555 /* Given a PARLOOP that is a normal for looping construct but also a part of a
18556 combined construct with a simd loop, eliminate the simd loop. */
18558 static void
18559 grid_eliminate_combined_simd_part (gomp_for *parloop)
18561 struct walk_stmt_info wi;
18563 memset (&wi, 0, sizeof (wi));
18564 wi.val_only = true;
18565 enum gf_mask msk = GF_OMP_FOR_SIMD;
18566 wi.info = (void *) &msk;
18567 walk_gimple_seq (gimple_omp_body (parloop), find_combined_for, NULL, &wi);
18568 gimple *stmt = (gimple *) wi.info;
18569 /* We expect that the SIMD id the only statement in the parallel loop. */
18570 gcc_assert (stmt
18571 && gimple_code (stmt) == GIMPLE_OMP_FOR
18572 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
18573 && gimple_omp_for_combined_into_p (stmt)
18574 && !gimple_omp_for_combined_p (stmt));
18575 gomp_for *simd = as_a <gomp_for *> (stmt);
18577 /* Copy over the iteration properties because the body refers to the index in
18578 the bottmom-most loop. */
18579 unsigned i, collapse = gimple_omp_for_collapse (parloop);
18580 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
18581 for (i = 0; i < collapse; i++)
18583 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
18584 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
18585 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
18586 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
18589 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
18590 while (*tgt)
18591 tgt = &OMP_CLAUSE_CHAIN (*tgt);
18593 /* Copy over all clauses, except for linaer clauses, which are turned into
18594 private clauses, and all other simd-specificl clauses, which are
18595 ignored. */
18596 tree *pc = gimple_omp_for_clauses_ptr (simd);
18597 while (*pc)
18599 tree c = *pc;
18600 switch (TREE_CODE (c))
18602 case OMP_CLAUSE_LINEAR:
18604 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
18605 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
18606 OMP_CLAUSE_CHAIN (priv) = NULL;
18607 *tgt = priv;
18608 tgt = &OMP_CLAUSE_CHAIN (priv);
18609 pc = &OMP_CLAUSE_CHAIN (c);
18610 break;
18613 case OMP_CLAUSE_SAFELEN:
18614 case OMP_CLAUSE_SIMDLEN:
18615 case OMP_CLAUSE_ALIGNED:
18616 pc = &OMP_CLAUSE_CHAIN (c);
18617 break;
18619 default:
18620 *pc = OMP_CLAUSE_CHAIN (c);
18621 OMP_CLAUSE_CHAIN (c) = NULL;
18622 *tgt = c;
18623 tgt = &OMP_CLAUSE_CHAIN(c);
18624 break;
18628 /* Finally, throw away the simd and mark the parallel loop as not
18629 combined. */
18630 gimple_omp_set_body (parloop, gimple_omp_body (simd));
18631 gimple_omp_for_set_combined_p (parloop, false);
18634 /* Statement walker function marking all parallels as grid_phony and loops as
18635 grid ones representing threads of a particular thread group. */
18637 static tree
18638 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
18639 struct walk_stmt_info *wi_in)
18641 *handled_ops_p = false;
18642 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
18644 *handled_ops_p = true;
18645 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
18646 gimple_omp_for_set_grid_intra_group (loop, true);
18647 if (gimple_omp_for_combined_p (loop))
18648 grid_eliminate_combined_simd_part (loop);
18650 struct walk_stmt_info body_wi;
18651 memset (&body_wi, 0, sizeof (body_wi));
18652 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
18653 grid_process_grid_body, NULL, &body_wi);
18655 gbind *bind = (gbind *) wi_in->info;
18656 tree c;
18657 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
18658 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
18660 push_gimplify_context ();
18661 tree ov = OMP_CLAUSE_DECL (c);
18662 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
18663 TREE_TYPE (ov));
18665 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
18666 DECL_CONTEXT (gv) = current_function_decl;
18667 gimple_bind_append_vars (bind, gv);
18668 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
18669 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
18670 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
18671 gimple_seq l = NULL;
18672 gimplify_and_add (x, &l);
18673 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
18674 pop_gimplify_context (bind);
18677 return NULL_TREE;
18680 /* Statement walker function marking all parallels as grid_phony and loops as
18681 grid ones representing threads of a particular thread group. */
18683 static tree
18684 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
18685 bool *handled_ops_p,
18686 struct walk_stmt_info *wi_in)
18688 *handled_ops_p = false;
18689 wi_in->removed_stmt = false;
18690 gimple *stmt = gsi_stmt (*gsi);
18691 if (gbind *bind = dyn_cast <gbind *> (stmt))
18693 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
18694 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
18696 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
18698 *handled_ops_p = true;
18699 gimple_omp_parallel_set_grid_phony (parallel, true);
18701 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
18702 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
18703 gimple_seq s = NULL;
18704 gimple_seq_add_stmt (&s, new_bind);
18705 gimple_omp_set_body (parallel, s);
18707 struct walk_stmt_info wi_par;
18708 memset (&wi_par, 0, sizeof (wi_par));
18709 wi_par.info = new_bind;
18710 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
18711 grid_mark_tiling_loops, NULL, &wi_par);
18713 else if (is_a <gcall *> (stmt))
18714 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
18715 return NULL_TREE;
18718 /* Given freshly copied top level kernel SEQ, identify the individual OMP
18719 components, mark them as part of kernel, copy assignment leading to them
18720 just before DST, remapping them using WI and adding new temporaries to
18721 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
18723 static gomp_for *
18724 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
18725 gimple_stmt_iterator *dst,
18726 gbind *tgt_bind, struct walk_stmt_info *wi)
18728 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
18729 GRID_SEGMENT_GLOBAL, wi);
18730 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
18731 gcc_assert (teams);
18732 gimple_omp_teams_set_grid_phony (teams, true);
18733 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
18734 tgt_bind, GRID_SEGMENT_GLOBAL, wi);
18735 gcc_checking_assert (stmt);
18736 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
18737 gcc_assert (dist);
18738 gimple_seq prebody = gimple_omp_for_pre_body (dist);
18739 if (prebody)
18740 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
18741 GRID_SEGMENT_GROUP, wi);
18743 if (grid->tiling)
18745 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
18746 gimple_omp_for_set_grid_group_iter (dist, true);
18748 struct walk_stmt_info wi_tiled;
18749 memset (&wi_tiled, 0, sizeof (wi_tiled));
18750 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
18751 grid_mark_tiling_parallels_and_loops, NULL,
18752 &wi_tiled);
18753 return dist;
18755 else
18757 gimple_omp_for_set_grid_phony (dist, true);
18758 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
18759 tgt_bind,
18760 GRID_SEGMENT_PRIVATE, wi);
18761 gcc_checking_assert (stmt);
18762 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
18763 gimple_omp_parallel_set_grid_phony (parallel, true);
18764 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
18765 dst, tgt_bind,
18766 GRID_SEGMENT_PRIVATE, wi);
18767 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
18768 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
18769 prebody = gimple_omp_for_pre_body (inner_loop);
18770 if (prebody)
18771 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
18772 GRID_SEGMENT_PRIVATE, wi);
18774 if (gimple_omp_for_combined_p (inner_loop))
18775 grid_eliminate_combined_simd_part (inner_loop);
18776 struct walk_stmt_info body_wi;;
18777 memset (&body_wi, 0, sizeof (body_wi));
18778 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
18779 grid_process_grid_body, NULL, &body_wi);
18781 return inner_loop;
18785 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
18786 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
18787 is the bind into which temporaries inserted before TARGET should be
18788 added. */
18790 static void
18791 grid_attempt_target_gridification (gomp_target *target,
18792 gimple_stmt_iterator *gsi,
18793 gbind *tgt_bind)
18795 /* removed group_size */
18796 grid_prop grid;
18797 memset (&grid, 0, sizeof (grid));
18798 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
18799 return;
18801 location_t loc = gimple_location (target);
18802 if (dump_enabled_p ())
18803 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
18804 "Target construct will be turned into a gridified HSA "
18805 "kernel\n");
18807 /* Copy target body to a GPUKERNEL construct: */
18808 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
18809 (gimple_omp_body (target));
18811 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
18812 struct walk_stmt_info wi;
18813 memset (&wi, 0, sizeof (struct walk_stmt_info));
18814 wi.info = declmap;
18816 /* Copy assignments in between OMP statements before target, mark OMP
18817 statements within copy appropriately. */
18818 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
18819 tgt_bind, &wi);
18821 gbind *old_bind = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
18822 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
18823 tree new_block = gimple_bind_block (new_bind);
18824 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
18825 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
18826 BLOCK_SUBBLOCKS (enc_block) = new_block;
18827 BLOCK_SUPERCONTEXT (new_block) = enc_block;
18828 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
18829 gimple_seq_add_stmt
18830 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
18831 gpukernel);
18833 for (size_t i = 0; i < grid.collapse; i++)
18834 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
18835 push_gimplify_context ();
18836 for (size_t i = 0; i < grid.collapse; i++)
18838 tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
18839 if (POINTER_TYPE_P (type))
18840 itype = signed_type_for (type);
18841 else
18842 itype = type;
18844 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
18845 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
18846 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
18847 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
18848 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
18849 adjust_for_condition (loc, &cond_code, &n2);
18850 n1 = fold_convert (itype, n1);
18851 n2 = fold_convert (itype, n2);
18853 tree step
18854 = get_omp_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
18856 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
18857 t = fold_build2 (PLUS_EXPR, itype, step, t);
18858 t = fold_build2 (PLUS_EXPR, itype, t, n2);
18859 t = fold_build2 (MINUS_EXPR, itype, t, n1);
18860 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
18861 t = fold_build2 (TRUNC_DIV_EXPR, itype,
18862 fold_build1 (NEGATE_EXPR, itype, t),
18863 fold_build1 (NEGATE_EXPR, itype, step));
18864 else
18865 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
18866 if (grid.tiling)
18868 if (cond_code == GT_EXPR)
18869 step = fold_build1 (NEGATE_EXPR, itype, step);
18870 t = fold_build2 (MULT_EXPR, itype, t, step);
18873 tree gs = fold_convert (uint32_type_node, t);
18874 gimple_seq tmpseq = NULL;
18875 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
18876 if (!gimple_seq_empty_p (tmpseq))
18877 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
18879 tree ws;
18880 if (grid.group_sizes[i])
18882 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
18883 tmpseq = NULL;
18884 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
18885 if (!gimple_seq_empty_p (tmpseq))
18886 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
18888 else
18889 ws = build_zero_cst (uint32_type_node);
18891 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
18892 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
18893 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
18894 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
18895 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
18896 gimple_omp_target_set_clauses (target, c);
18898 pop_gimplify_context (tgt_bind);
18899 delete declmap;
18900 return;
18903 /* Walker function doing all the work for create_target_kernels. */
18905 static tree
18906 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
18907 bool *handled_ops_p,
18908 struct walk_stmt_info *incoming)
18910 *handled_ops_p = false;
18912 gimple *stmt = gsi_stmt (*gsi);
18913 gomp_target *target = dyn_cast <gomp_target *> (stmt);
18914 if (target)
18916 gbind *tgt_bind = (gbind *) incoming->info;
18917 gcc_checking_assert (tgt_bind);
18918 grid_attempt_target_gridification (target, gsi, tgt_bind);
18919 return NULL_TREE;
18921 gbind *bind = dyn_cast <gbind *> (stmt);
18922 if (bind)
18924 *handled_ops_p = true;
18925 struct walk_stmt_info wi;
18926 memset (&wi, 0, sizeof (wi));
18927 wi.info = bind;
18928 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
18929 grid_gridify_all_targets_stmt, NULL, &wi);
18931 return NULL_TREE;
18934 /* Attempt to gridify all target constructs in BODY_P. All such targets will
18935 have their bodies duplicated, with the new copy being put into a
18936 gimple_omp_grid_body statement. All kernel-related construct within the
18937 grid_body will be marked with phony flags or kernel kinds. Moreover, some
18938 re-structuring is often needed, such as copying pre-bodies before the target
18939 construct so that kernel grid sizes can be computed. */
18941 static void
18942 grid_gridify_all_targets (gimple_seq *body_p)
18944 struct walk_stmt_info wi;
18945 memset (&wi, 0, sizeof (wi));
18946 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
18950 /* Main entry point. */
18952 static unsigned int
18953 execute_lower_omp (void)
18955 gimple_seq body;
18956 int i;
18957 omp_context *ctx;
18959 /* This pass always runs, to provide PROP_gimple_lomp.
18960 But often, there is nothing to do. */
18961 if (flag_cilkplus == 0 && flag_openacc == 0 && flag_openmp == 0
18962 && flag_openmp_simd == 0)
18963 return 0;
18965 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
18966 delete_omp_context);
18968 body = gimple_body (current_function_decl);
18970 if (hsa_gen_requested_p ())
18971 grid_gridify_all_targets (&body);
18973 scan_omp (&body, NULL);
18974 gcc_assert (taskreg_nesting_level == 0);
18975 FOR_EACH_VEC_ELT (taskreg_contexts, i, ctx)
18976 finish_taskreg_scan (ctx);
18977 taskreg_contexts.release ();
18979 if (all_contexts->root)
18981 if (task_shared_vars)
18982 push_gimplify_context ();
18983 lower_omp (&body, NULL);
18984 if (task_shared_vars)
18985 pop_gimplify_context (NULL);
18988 if (all_contexts)
18990 splay_tree_delete (all_contexts);
18991 all_contexts = NULL;
18993 BITMAP_FREE (task_shared_vars);
18994 return 0;
18997 namespace {
18999 const pass_data pass_data_lower_omp =
19001 GIMPLE_PASS, /* type */
19002 "omplower", /* name */
19003 OPTGROUP_OPENMP, /* optinfo_flags */
19004 TV_NONE, /* tv_id */
19005 PROP_gimple_any, /* properties_required */
19006 PROP_gimple_lomp | PROP_gimple_lomp_dev, /* properties_provided */
19007 0, /* properties_destroyed */
19008 0, /* todo_flags_start */
19009 0, /* todo_flags_finish */
19012 class pass_lower_omp : public gimple_opt_pass
19014 public:
19015 pass_lower_omp (gcc::context *ctxt)
19016 : gimple_opt_pass (pass_data_lower_omp, ctxt)
19019 /* opt_pass methods: */
19020 virtual unsigned int execute (function *) { return execute_lower_omp (); }
19022 }; // class pass_lower_omp
19024 } // anon namespace
19026 gimple_opt_pass *
19027 make_pass_lower_omp (gcc::context *ctxt)
19029 return new pass_lower_omp (ctxt);
19032 /* The following is a utility to diagnose structured block violations.
19033 It is not part of the "omplower" pass, as that's invoked too late. It
19034 should be invoked by the respective front ends after gimplification. */
19036 static splay_tree all_labels;
19038 /* Check for mismatched contexts and generate an error if needed. Return
19039 true if an error is detected. */
19041 static bool
19042 diagnose_sb_0 (gimple_stmt_iterator *gsi_p,
19043 gimple *branch_ctx, gimple *label_ctx)
19045 gcc_checking_assert (!branch_ctx || is_gimple_omp (branch_ctx));
19046 gcc_checking_assert (!label_ctx || is_gimple_omp (label_ctx));
19048 if (label_ctx == branch_ctx)
19049 return false;
19051 const char* kind = NULL;
19053 if (flag_cilkplus)
19055 if ((branch_ctx
19056 && gimple_code (branch_ctx) == GIMPLE_OMP_FOR
19057 && gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD)
19058 || (label_ctx
19059 && gimple_code (label_ctx) == GIMPLE_OMP_FOR
19060 && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD))
19061 kind = "Cilk Plus";
19063 if (flag_openacc)
19065 if ((branch_ctx && is_gimple_omp_oacc (branch_ctx))
19066 || (label_ctx && is_gimple_omp_oacc (label_ctx)))
19068 gcc_checking_assert (kind == NULL);
19069 kind = "OpenACC";
19072 if (kind == NULL)
19074 gcc_checking_assert (flag_openmp);
19075 kind = "OpenMP";
19079 Previously we kept track of the label's entire context in diagnose_sb_[12]
19080 so we could traverse it and issue a correct "exit" or "enter" error
19081 message upon a structured block violation.
19083 We built the context by building a list with tree_cons'ing, but there is
19084 no easy counterpart in gimple tuples. It seems like far too much work
19085 for issuing exit/enter error messages. If someone really misses the
19086 distinct error message... patches welcome.
19089 #if 0
19090 /* Try to avoid confusing the user by producing and error message
19091 with correct "exit" or "enter" verbiage. We prefer "exit"
19092 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
19093 if (branch_ctx == NULL)
19094 exit_p = false;
19095 else
19097 while (label_ctx)
19099 if (TREE_VALUE (label_ctx) == branch_ctx)
19101 exit_p = false;
19102 break;
19104 label_ctx = TREE_CHAIN (label_ctx);
19108 if (exit_p)
19109 error ("invalid exit from %s structured block", kind);
19110 else
19111 error ("invalid entry to %s structured block", kind);
19112 #endif
19114 /* If it's obvious we have an invalid entry, be specific about the error. */
19115 if (branch_ctx == NULL)
19116 error ("invalid entry to %s structured block", kind);
19117 else
19119 /* Otherwise, be vague and lazy, but efficient. */
19120 error ("invalid branch to/from %s structured block", kind);
19123 gsi_replace (gsi_p, gimple_build_nop (), false);
19124 return true;
19127 /* Pass 1: Create a minimal tree of structured blocks, and record
19128 where each label is found. */
19130 static tree
19131 diagnose_sb_1 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
19132 struct walk_stmt_info *wi)
19134 gimple *context = (gimple *) wi->info;
19135 gimple *inner_context;
19136 gimple *stmt = gsi_stmt (*gsi_p);
19138 *handled_ops_p = true;
19140 switch (gimple_code (stmt))
19142 WALK_SUBSTMTS;
19144 case GIMPLE_OMP_PARALLEL:
19145 case GIMPLE_OMP_TASK:
19146 case GIMPLE_OMP_SECTIONS:
19147 case GIMPLE_OMP_SINGLE:
19148 case GIMPLE_OMP_SECTION:
19149 case GIMPLE_OMP_MASTER:
19150 case GIMPLE_OMP_ORDERED:
19151 case GIMPLE_OMP_CRITICAL:
19152 case GIMPLE_OMP_TARGET:
19153 case GIMPLE_OMP_TEAMS:
19154 case GIMPLE_OMP_TASKGROUP:
19155 /* The minimal context here is just the current OMP construct. */
19156 inner_context = stmt;
19157 wi->info = inner_context;
19158 walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
19159 wi->info = context;
19160 break;
19162 case GIMPLE_OMP_FOR:
19163 inner_context = stmt;
19164 wi->info = inner_context;
19165 /* gimple_omp_for_{index,initial,final} are all DECLs; no need to
19166 walk them. */
19167 walk_gimple_seq (gimple_omp_for_pre_body (stmt),
19168 diagnose_sb_1, NULL, wi);
19169 walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
19170 wi->info = context;
19171 break;
19173 case GIMPLE_LABEL:
19174 splay_tree_insert (all_labels,
19175 (splay_tree_key) gimple_label_label (
19176 as_a <glabel *> (stmt)),
19177 (splay_tree_value) context);
19178 break;
19180 default:
19181 break;
19184 return NULL_TREE;
19187 /* Pass 2: Check each branch and see if its context differs from that of
19188 the destination label's context. */
19190 static tree
19191 diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
19192 struct walk_stmt_info *wi)
19194 gimple *context = (gimple *) wi->info;
19195 splay_tree_node n;
19196 gimple *stmt = gsi_stmt (*gsi_p);
19198 *handled_ops_p = true;
19200 switch (gimple_code (stmt))
19202 WALK_SUBSTMTS;
19204 case GIMPLE_OMP_PARALLEL:
19205 case GIMPLE_OMP_TASK:
19206 case GIMPLE_OMP_SECTIONS:
19207 case GIMPLE_OMP_SINGLE:
19208 case GIMPLE_OMP_SECTION:
19209 case GIMPLE_OMP_MASTER:
19210 case GIMPLE_OMP_ORDERED:
19211 case GIMPLE_OMP_CRITICAL:
19212 case GIMPLE_OMP_TARGET:
19213 case GIMPLE_OMP_TEAMS:
19214 case GIMPLE_OMP_TASKGROUP:
19215 wi->info = stmt;
19216 walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), diagnose_sb_2, NULL, wi);
19217 wi->info = context;
19218 break;
19220 case GIMPLE_OMP_FOR:
19221 wi->info = stmt;
19222 /* gimple_omp_for_{index,initial,final} are all DECLs; no need to
19223 walk them. */
19224 walk_gimple_seq_mod (gimple_omp_for_pre_body_ptr (stmt),
19225 diagnose_sb_2, NULL, wi);
19226 walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), diagnose_sb_2, NULL, wi);
19227 wi->info = context;
19228 break;
19230 case GIMPLE_COND:
19232 gcond *cond_stmt = as_a <gcond *> (stmt);
19233 tree lab = gimple_cond_true_label (cond_stmt);
19234 if (lab)
19236 n = splay_tree_lookup (all_labels,
19237 (splay_tree_key) lab);
19238 diagnose_sb_0 (gsi_p, context,
19239 n ? (gimple *) n->value : NULL);
19241 lab = gimple_cond_false_label (cond_stmt);
19242 if (lab)
19244 n = splay_tree_lookup (all_labels,
19245 (splay_tree_key) lab);
19246 diagnose_sb_0 (gsi_p, context,
19247 n ? (gimple *) n->value : NULL);
19250 break;
19252 case GIMPLE_GOTO:
19254 tree lab = gimple_goto_dest (stmt);
19255 if (TREE_CODE (lab) != LABEL_DECL)
19256 break;
19258 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
19259 diagnose_sb_0 (gsi_p, context, n ? (gimple *) n->value : NULL);
19261 break;
19263 case GIMPLE_SWITCH:
19265 gswitch *switch_stmt = as_a <gswitch *> (stmt);
19266 unsigned int i;
19267 for (i = 0; i < gimple_switch_num_labels (switch_stmt); ++i)
19269 tree lab = CASE_LABEL (gimple_switch_label (switch_stmt, i));
19270 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
19271 if (n && diagnose_sb_0 (gsi_p, context, (gimple *) n->value))
19272 break;
19275 break;
19277 case GIMPLE_RETURN:
19278 diagnose_sb_0 (gsi_p, context, NULL);
19279 break;
19281 default:
19282 break;
19285 return NULL_TREE;
19288 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
19289 GIMPLE_* codes. */
19290 bool
19291 make_gimple_omp_edges (basic_block bb, struct omp_region **region,
19292 int *region_idx)
19294 gimple *last = last_stmt (bb);
19295 enum gimple_code code = gimple_code (last);
19296 struct omp_region *cur_region = *region;
19297 bool fallthru = false;
19299 switch (code)
19301 case GIMPLE_OMP_PARALLEL:
19302 case GIMPLE_OMP_TASK:
19303 case GIMPLE_OMP_FOR:
19304 case GIMPLE_OMP_SINGLE:
19305 case GIMPLE_OMP_TEAMS:
19306 case GIMPLE_OMP_MASTER:
19307 case GIMPLE_OMP_TASKGROUP:
19308 case GIMPLE_OMP_CRITICAL:
19309 case GIMPLE_OMP_SECTION:
19310 case GIMPLE_OMP_GRID_BODY:
19311 cur_region = new_omp_region (bb, code, cur_region);
19312 fallthru = true;
19313 break;
19315 case GIMPLE_OMP_ORDERED:
19316 cur_region = new_omp_region (bb, code, cur_region);
19317 fallthru = true;
19318 if (find_omp_clause (gimple_omp_ordered_clauses
19319 (as_a <gomp_ordered *> (last)),
19320 OMP_CLAUSE_DEPEND))
19321 cur_region = cur_region->outer;
19322 break;
19324 case GIMPLE_OMP_TARGET:
19325 cur_region = new_omp_region (bb, code, cur_region);
19326 fallthru = true;
19327 switch (gimple_omp_target_kind (last))
19329 case GF_OMP_TARGET_KIND_REGION:
19330 case GF_OMP_TARGET_KIND_DATA:
19331 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
19332 case GF_OMP_TARGET_KIND_OACC_KERNELS:
19333 case GF_OMP_TARGET_KIND_OACC_DATA:
19334 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
19335 break;
19336 case GF_OMP_TARGET_KIND_UPDATE:
19337 case GF_OMP_TARGET_KIND_ENTER_DATA:
19338 case GF_OMP_TARGET_KIND_EXIT_DATA:
19339 case GF_OMP_TARGET_KIND_OACC_UPDATE:
19340 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
19341 case GF_OMP_TARGET_KIND_OACC_DECLARE:
19342 cur_region = cur_region->outer;
19343 break;
19344 default:
19345 gcc_unreachable ();
19347 break;
19349 case GIMPLE_OMP_SECTIONS:
19350 cur_region = new_omp_region (bb, code, cur_region);
19351 fallthru = true;
19352 break;
19354 case GIMPLE_OMP_SECTIONS_SWITCH:
19355 fallthru = false;
19356 break;
19358 case GIMPLE_OMP_ATOMIC_LOAD:
19359 case GIMPLE_OMP_ATOMIC_STORE:
19360 fallthru = true;
19361 break;
19363 case GIMPLE_OMP_RETURN:
19364 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
19365 somewhere other than the next block. This will be
19366 created later. */
19367 cur_region->exit = bb;
19368 if (cur_region->type == GIMPLE_OMP_TASK)
19369 /* Add an edge corresponding to not scheduling the task
19370 immediately. */
19371 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
19372 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
19373 cur_region = cur_region->outer;
19374 break;
19376 case GIMPLE_OMP_CONTINUE:
19377 cur_region->cont = bb;
19378 switch (cur_region->type)
19380 case GIMPLE_OMP_FOR:
19381 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
19382 succs edges as abnormal to prevent splitting
19383 them. */
19384 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
19385 /* Make the loopback edge. */
19386 make_edge (bb, single_succ (cur_region->entry),
19387 EDGE_ABNORMAL);
19389 /* Create an edge from GIMPLE_OMP_FOR to exit, which
19390 corresponds to the case that the body of the loop
19391 is not executed at all. */
19392 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
19393 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
19394 fallthru = false;
19395 break;
19397 case GIMPLE_OMP_SECTIONS:
19398 /* Wire up the edges into and out of the nested sections. */
19400 basic_block switch_bb = single_succ (cur_region->entry);
19402 struct omp_region *i;
19403 for (i = cur_region->inner; i ; i = i->next)
19405 gcc_assert (i->type == GIMPLE_OMP_SECTION);
19406 make_edge (switch_bb, i->entry, 0);
19407 make_edge (i->exit, bb, EDGE_FALLTHRU);
19410 /* Make the loopback edge to the block with
19411 GIMPLE_OMP_SECTIONS_SWITCH. */
19412 make_edge (bb, switch_bb, 0);
19414 /* Make the edge from the switch to exit. */
19415 make_edge (switch_bb, bb->next_bb, 0);
19416 fallthru = false;
19418 break;
19420 case GIMPLE_OMP_TASK:
19421 fallthru = true;
19422 break;
19424 default:
19425 gcc_unreachable ();
19427 break;
19429 default:
19430 gcc_unreachable ();
19433 if (*region != cur_region)
19435 *region = cur_region;
19436 if (cur_region)
19437 *region_idx = cur_region->entry->index;
19438 else
19439 *region_idx = 0;
19442 return fallthru;
19445 static unsigned int
19446 diagnose_omp_structured_block_errors (void)
19448 struct walk_stmt_info wi;
19449 gimple_seq body = gimple_body (current_function_decl);
19451 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
19453 memset (&wi, 0, sizeof (wi));
19454 walk_gimple_seq (body, diagnose_sb_1, NULL, &wi);
19456 memset (&wi, 0, sizeof (wi));
19457 wi.want_locations = true;
19458 walk_gimple_seq_mod (&body, diagnose_sb_2, NULL, &wi);
19460 gimple_set_body (current_function_decl, body);
19462 splay_tree_delete (all_labels);
19463 all_labels = NULL;
19465 return 0;
19468 namespace {
19470 const pass_data pass_data_diagnose_omp_blocks =
19472 GIMPLE_PASS, /* type */
19473 "*diagnose_omp_blocks", /* name */
19474 OPTGROUP_OPENMP, /* optinfo_flags */
19475 TV_NONE, /* tv_id */
19476 PROP_gimple_any, /* properties_required */
19477 0, /* properties_provided */
19478 0, /* properties_destroyed */
19479 0, /* todo_flags_start */
19480 0, /* todo_flags_finish */
19483 class pass_diagnose_omp_blocks : public gimple_opt_pass
19485 public:
19486 pass_diagnose_omp_blocks (gcc::context *ctxt)
19487 : gimple_opt_pass (pass_data_diagnose_omp_blocks, ctxt)
19490 /* opt_pass methods: */
19491 virtual bool gate (function *)
19493 return flag_cilkplus || flag_openacc || flag_openmp;
19495 virtual unsigned int execute (function *)
19497 return diagnose_omp_structured_block_errors ();
19500 }; // class pass_diagnose_omp_blocks
19502 } // anon namespace
19504 gimple_opt_pass *
19505 make_pass_diagnose_omp_blocks (gcc::context *ctxt)
19507 return new pass_diagnose_omp_blocks (ctxt);
19510 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
19511 adds their addresses and sizes to constructor-vector V_CTOR. */
19512 static void
19513 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
19514 vec<constructor_elt, va_gc> *v_ctor)
19516 unsigned len = vec_safe_length (v_decls);
19517 for (unsigned i = 0; i < len; i++)
19519 tree it = (*v_decls)[i];
19520 bool is_var = VAR_P (it);
19521 bool is_link_var
19522 = is_var
19523 #ifdef ACCEL_COMPILER
19524 && DECL_HAS_VALUE_EXPR_P (it)
19525 #endif
19526 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
19528 tree size = NULL_TREE;
19529 if (is_var)
19530 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
19532 tree addr;
19533 if (!is_link_var)
19534 addr = build_fold_addr_expr (it);
19535 else
19537 #ifdef ACCEL_COMPILER
19538 /* For "omp declare target link" vars add address of the pointer to
19539 the target table, instead of address of the var. */
19540 tree value_expr = DECL_VALUE_EXPR (it);
19541 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
19542 varpool_node::finalize_decl (link_ptr_decl);
19543 addr = build_fold_addr_expr (link_ptr_decl);
19544 #else
19545 addr = build_fold_addr_expr (it);
19546 #endif
19548 /* Most significant bit of the size marks "omp declare target link"
19549 vars in host and target tables. */
19550 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
19551 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
19552 * BITS_PER_UNIT - 1);
19553 size = wide_int_to_tree (const_ptr_type_node, isize);
19556 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
19557 if (is_var)
19558 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
19562 /* Create new symbols containing (address, size) pairs for global variables,
19563 marked with "omp declare target" attribute, as well as addresses for the
19564 functions, which are outlined offloading regions. */
19565 void
19566 omp_finish_file (void)
19568 unsigned num_funcs = vec_safe_length (offload_funcs);
19569 unsigned num_vars = vec_safe_length (offload_vars);
19571 if (num_funcs == 0 && num_vars == 0)
19572 return;
19574 if (targetm_common.have_named_sections)
19576 vec<constructor_elt, va_gc> *v_f, *v_v;
19577 vec_alloc (v_f, num_funcs);
19578 vec_alloc (v_v, num_vars * 2);
19580 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
19581 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
19583 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
19584 num_vars * 2);
19585 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
19586 num_funcs);
19587 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
19588 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
19589 tree ctor_v = build_constructor (vars_decl_type, v_v);
19590 tree ctor_f = build_constructor (funcs_decl_type, v_f);
19591 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
19592 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
19593 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
19594 get_identifier (".offload_func_table"),
19595 funcs_decl_type);
19596 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
19597 get_identifier (".offload_var_table"),
19598 vars_decl_type);
19599 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
19600 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
19601 otherwise a joint table in a binary will contain padding between
19602 tables from multiple object files. */
19603 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
19604 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
19605 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
19606 DECL_INITIAL (funcs_decl) = ctor_f;
19607 DECL_INITIAL (vars_decl) = ctor_v;
19608 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
19609 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
19611 varpool_node::finalize_decl (vars_decl);
19612 varpool_node::finalize_decl (funcs_decl);
19614 else
19616 for (unsigned i = 0; i < num_funcs; i++)
19618 tree it = (*offload_funcs)[i];
19619 targetm.record_offload_symbol (it);
19621 for (unsigned i = 0; i < num_vars; i++)
19623 tree it = (*offload_vars)[i];
19624 targetm.record_offload_symbol (it);
19629 /* Find the number of threads (POS = false), or thread number (POS =
19630 true) for an OpenACC region partitioned as MASK. Setup code
19631 required for the calculation is added to SEQ. */
19633 static tree
19634 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
19636 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
19637 unsigned ix;
19639 /* Start at gang level, and examine relevant dimension indices. */
19640 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
19641 if (GOMP_DIM_MASK (ix) & mask)
19643 tree arg = build_int_cst (unsigned_type_node, ix);
19645 if (res)
19647 /* We had an outer index, so scale that by the size of
19648 this dimension. */
19649 tree n = create_tmp_var (integer_type_node);
19650 gimple *call
19651 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
19653 gimple_call_set_lhs (call, n);
19654 gimple_seq_add_stmt (seq, call);
19655 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
19657 if (pos)
19659 /* Determine index in this dimension. */
19660 tree id = create_tmp_var (integer_type_node);
19661 gimple *call = gimple_build_call_internal
19662 (IFN_GOACC_DIM_POS, 1, arg);
19664 gimple_call_set_lhs (call, id);
19665 gimple_seq_add_stmt (seq, call);
19666 if (res)
19667 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
19668 else
19669 res = id;
19673 if (res == NULL_TREE)
19674 res = integer_zero_node;
19676 return res;
19679 /* Transform IFN_GOACC_LOOP calls to actual code. See
19680 expand_oacc_for for where these are generated. At the vector
19681 level, we stride loops, such that each member of a warp will
19682 operate on adjacent iterations. At the worker and gang level,
19683 each gang/warp executes a set of contiguous iterations. Chunking
19684 can override this such that each iteration engine executes a
19685 contiguous chunk, and then moves on to stride to the next chunk. */
19687 static void
19688 oacc_xform_loop (gcall *call)
19690 gimple_stmt_iterator gsi = gsi_for_stmt (call);
19691 enum ifn_goacc_loop_kind code
19692 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
19693 tree dir = gimple_call_arg (call, 1);
19694 tree range = gimple_call_arg (call, 2);
19695 tree step = gimple_call_arg (call, 3);
19696 tree chunk_size = NULL_TREE;
19697 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
19698 tree lhs = gimple_call_lhs (call);
19699 tree type = TREE_TYPE (lhs);
19700 tree diff_type = TREE_TYPE (range);
19701 tree r = NULL_TREE;
19702 gimple_seq seq = NULL;
19703 bool chunking = false, striding = true;
19704 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
19705 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
19707 #ifdef ACCEL_COMPILER
19708 chunk_size = gimple_call_arg (call, 4);
19709 if (integer_minus_onep (chunk_size) /* Force static allocation. */
19710 || integer_zerop (chunk_size)) /* Default (also static). */
19712 /* If we're at the gang level, we want each to execute a
19713 contiguous run of iterations. Otherwise we want each element
19714 to stride. */
19715 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
19716 chunking = false;
19718 else
19720 /* Chunk of size 1 is striding. */
19721 striding = integer_onep (chunk_size);
19722 chunking = !striding;
19724 #endif
19726 /* striding=true, chunking=true
19727 -> invalid.
19728 striding=true, chunking=false
19729 -> chunks=1
19730 striding=false,chunking=true
19731 -> chunks=ceil (range/(chunksize*threads*step))
19732 striding=false,chunking=false
19733 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
19734 push_gimplify_context (true);
19736 switch (code)
19738 default: gcc_unreachable ();
19740 case IFN_GOACC_LOOP_CHUNKS:
19741 if (!chunking)
19742 r = build_int_cst (type, 1);
19743 else
19745 /* chunk_max
19746 = (range - dir) / (chunks * step * num_threads) + dir */
19747 tree per = oacc_thread_numbers (false, mask, &seq);
19748 per = fold_convert (type, per);
19749 chunk_size = fold_convert (type, chunk_size);
19750 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
19751 per = fold_build2 (MULT_EXPR, type, per, step);
19752 r = build2 (MINUS_EXPR, type, range, dir);
19753 r = build2 (PLUS_EXPR, type, r, per);
19754 r = build2 (TRUNC_DIV_EXPR, type, r, per);
19756 break;
19758 case IFN_GOACC_LOOP_STEP:
19760 /* If striding, step by the entire compute volume, otherwise
19761 step by the inner volume. */
19762 unsigned volume = striding ? mask : inner_mask;
19764 r = oacc_thread_numbers (false, volume, &seq);
19765 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
19767 break;
19769 case IFN_GOACC_LOOP_OFFSET:
19770 if (striding)
19772 r = oacc_thread_numbers (true, mask, &seq);
19773 r = fold_convert (diff_type, r);
19775 else
19777 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
19778 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
19779 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
19780 inner_size, outer_size);
19782 volume = fold_convert (diff_type, volume);
19783 if (chunking)
19784 chunk_size = fold_convert (diff_type, chunk_size);
19785 else
19787 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
19789 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
19790 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
19791 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
19794 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
19795 fold_convert (diff_type, inner_size));
19796 r = oacc_thread_numbers (true, outer_mask, &seq);
19797 r = fold_convert (diff_type, r);
19798 r = build2 (MULT_EXPR, diff_type, r, span);
19800 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
19801 inner = fold_convert (diff_type, inner);
19802 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
19804 if (chunking)
19806 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
19807 tree per
19808 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
19809 per = build2 (MULT_EXPR, diff_type, per, chunk);
19811 r = build2 (PLUS_EXPR, diff_type, r, per);
19814 r = fold_build2 (MULT_EXPR, diff_type, r, step);
19815 if (type != diff_type)
19816 r = fold_convert (type, r);
19817 break;
19819 case IFN_GOACC_LOOP_BOUND:
19820 if (striding)
19821 r = range;
19822 else
19824 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
19825 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
19826 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
19827 inner_size, outer_size);
19829 volume = fold_convert (diff_type, volume);
19830 if (chunking)
19831 chunk_size = fold_convert (diff_type, chunk_size);
19832 else
19834 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
19836 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
19837 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
19838 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
19841 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
19842 fold_convert (diff_type, inner_size));
19844 r = fold_build2 (MULT_EXPR, diff_type, span, step);
19846 tree offset = gimple_call_arg (call, 6);
19847 r = build2 (PLUS_EXPR, diff_type, r,
19848 fold_convert (diff_type, offset));
19849 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
19850 diff_type, r, range);
19852 if (diff_type != type)
19853 r = fold_convert (type, r);
19854 break;
19857 gimplify_assign (lhs, r, &seq);
19859 pop_gimplify_context (NULL);
19861 gsi_replace_with_seq (&gsi, seq, true);
19864 /* Default partitioned and minimum partitioned dimensions. */
19866 static int oacc_default_dims[GOMP_DIM_MAX];
19867 static int oacc_min_dims[GOMP_DIM_MAX];
19869 /* Parse the default dimension parameter. This is a set of
19870 :-separated optional compute dimensions. Each specified dimension
19871 is a positive integer. When device type support is added, it is
19872 planned to be a comma separated list of such compute dimensions,
19873 with all but the first prefixed by the colon-terminated device
19874 type. */
19876 static void
19877 oacc_parse_default_dims (const char *dims)
19879 int ix;
19881 for (ix = GOMP_DIM_MAX; ix--;)
19883 oacc_default_dims[ix] = -1;
19884 oacc_min_dims[ix] = 1;
19887 #ifndef ACCEL_COMPILER
19888 /* Cannot be overridden on the host. */
19889 dims = NULL;
19890 #endif
19891 if (dims)
19893 const char *pos = dims;
19895 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
19897 if (ix)
19899 if (*pos != ':')
19900 goto malformed;
19901 pos++;
19904 if (*pos != ':')
19906 long val;
19907 const char *eptr;
19909 errno = 0;
19910 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
19911 if (errno || val <= 0 || (int) val != val)
19912 goto malformed;
19913 pos = eptr;
19914 oacc_default_dims[ix] = (int) val;
19917 if (*pos)
19919 malformed:
19920 error_at (UNKNOWN_LOCATION,
19921 "-fopenacc-dim operand is malformed at '%s'", pos);
19925 /* Allow the backend to validate the dimensions. */
19926 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
19927 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
19930 /* Validate and update the dimensions for offloaded FN. ATTRS is the
19931 raw attribute. DIMS is an array of dimensions, which is filled in.
19932 LEVEL is the partitioning level of a routine, or -1 for an offload
19933 region itself. USED is the mask of partitioned execution in the
19934 function. */
19936 static void
19937 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
19939 tree purpose[GOMP_DIM_MAX];
19940 unsigned ix;
19941 tree pos = TREE_VALUE (attrs);
19942 bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
19944 /* Make sure the attribute creator attached the dimension
19945 information. */
19946 gcc_assert (pos);
19948 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
19950 purpose[ix] = TREE_PURPOSE (pos);
19951 tree val = TREE_VALUE (pos);
19952 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
19953 pos = TREE_CHAIN (pos);
19956 bool changed = targetm.goacc.validate_dims (fn, dims, level);
19958 /* Default anything left to 1 or a partitioned default. */
19959 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
19960 if (dims[ix] < 0)
19962 /* The OpenACC spec says 'If the [num_gangs] clause is not
19963 specified, an implementation-defined default will be used;
19964 the default may depend on the code within the construct.'
19965 (2.5.6). Thus an implementation is free to choose
19966 non-unity default for a parallel region that doesn't have
19967 any gang-partitioned loops. However, it appears that there
19968 is a sufficient body of user code that expects non-gang
19969 partitioned regions to not execute in gang-redundant mode.
19970 So we (a) don't warn about the non-portability and (b) pick
19971 the minimum permissible dimension size when there is no
19972 partitioned execution. Otherwise we pick the global
19973 default for the dimension, which the user can control. The
19974 same wording and logic applies to num_workers and
19975 vector_length, however the worker- or vector- single
19976 execution doesn't have the same impact as gang-redundant
19977 execution. (If the minimum gang-level partioning is not 1,
19978 the target is probably too confusing.) */
19979 dims[ix] = (used & GOMP_DIM_MASK (ix)
19980 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
19981 changed = true;
19984 if (changed)
19986 /* Replace the attribute with new values. */
19987 pos = NULL_TREE;
19988 for (ix = GOMP_DIM_MAX; ix--;)
19990 pos = tree_cons (purpose[ix],
19991 build_int_cst (integer_type_node, dims[ix]),
19992 pos);
19993 if (is_kernel)
19994 TREE_PUBLIC (pos) = 1;
19996 replace_oacc_fn_attrib (fn, pos);
20000 /* Create an empty OpenACC loop structure at LOC. */
20002 static oacc_loop *
20003 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
20005 oacc_loop *loop = XCNEW (oacc_loop);
20007 loop->parent = parent;
20008 loop->child = loop->sibling = NULL;
20010 if (parent)
20012 loop->sibling = parent->child;
20013 parent->child = loop;
20016 loop->loc = loc;
20017 loop->marker = NULL;
20018 memset (loop->heads, 0, sizeof (loop->heads));
20019 memset (loop->tails, 0, sizeof (loop->tails));
20020 loop->routine = NULL_TREE;
20022 loop->mask = loop->flags = loop->inner = 0;
20023 loop->ifns = 0;
20024 loop->chunk_size = 0;
20025 loop->head_end = NULL;
20027 return loop;
20030 /* Create an outermost, dummy OpenACC loop for offloaded function
20031 DECL. */
20033 static oacc_loop *
20034 new_oacc_loop_outer (tree decl)
20036 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
20039 /* Start a new OpenACC loop structure beginning at head marker HEAD.
20040 Link into PARENT loop. Return the new loop. */
20042 static oacc_loop *
20043 new_oacc_loop (oacc_loop *parent, gcall *marker)
20045 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
20047 loop->marker = marker;
20049 /* TODO: This is where device_type flattening would occur for the loop
20050 flags. */
20052 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
20054 tree chunk_size = integer_zero_node;
20055 if (loop->flags & OLF_GANG_STATIC)
20056 chunk_size = gimple_call_arg (marker, 4);
20057 loop->chunk_size = chunk_size;
20059 return loop;
20062 /* Create a dummy loop encompassing a call to a openACC routine.
20063 Extract the routine's partitioning requirements. */
20065 static void
20066 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
20068 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
20069 int level = oacc_fn_attrib_level (attrs);
20071 gcc_assert (level >= 0);
20073 loop->marker = call;
20074 loop->routine = decl;
20075 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
20076 ^ (GOMP_DIM_MASK (level) - 1));
20079 /* Finish off the current OpenACC loop ending at tail marker TAIL.
20080 Return the parent loop. */
20082 static oacc_loop *
20083 finish_oacc_loop (oacc_loop *loop)
20085 /* If the loop has been collapsed, don't partition it. */
20086 if (!loop->ifns)
20087 loop->mask = loop->flags = 0;
20088 return loop->parent;
20091 /* Free all OpenACC loop structures within LOOP (inclusive). */
20093 static void
20094 free_oacc_loop (oacc_loop *loop)
20096 if (loop->sibling)
20097 free_oacc_loop (loop->sibling);
20098 if (loop->child)
20099 free_oacc_loop (loop->child);
20101 free (loop);
20104 /* Dump out the OpenACC loop head or tail beginning at FROM. */
20106 static void
20107 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
20108 const char *title, int level)
20110 enum ifn_unique_kind kind
20111 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
20113 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
20114 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
20116 gimple *stmt = gsi_stmt (gsi);
20118 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
20120 enum ifn_unique_kind k
20121 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
20122 (gimple_call_arg (stmt, 0)));
20124 if (k == kind && stmt != from)
20125 break;
20127 print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
20129 gsi_next (&gsi);
20130 while (gsi_end_p (gsi))
20131 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
20135 /* Dump OpenACC loops LOOP, its siblings and its children. */
20137 static void
20138 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
20140 int ix;
20142 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
20143 loop->flags, loop->mask,
20144 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
20146 if (loop->marker)
20147 print_gimple_stmt (file, loop->marker, depth * 2, 0);
20149 if (loop->routine)
20150 fprintf (file, "%*sRoutine %s:%u:%s\n",
20151 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
20152 DECL_SOURCE_LINE (loop->routine),
20153 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
20155 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
20156 if (loop->heads[ix])
20157 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
20158 for (ix = GOMP_DIM_MAX; ix--;)
20159 if (loop->tails[ix])
20160 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
20162 if (loop->child)
20163 dump_oacc_loop (file, loop->child, depth + 1);
20164 if (loop->sibling)
20165 dump_oacc_loop (file, loop->sibling, depth);
20168 void debug_oacc_loop (oacc_loop *);
20170 /* Dump loops to stderr. */
20172 DEBUG_FUNCTION void
20173 debug_oacc_loop (oacc_loop *loop)
20175 dump_oacc_loop (stderr, loop, 0);
20178 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
20179 structures as we go. By construction these loops are properly
20180 nested. */
20182 static void
20183 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
20185 int marker = 0;
20186 int remaining = 0;
20188 if (bb->flags & BB_VISITED)
20189 return;
20191 follow:
20192 bb->flags |= BB_VISITED;
20194 /* Scan for loop markers. */
20195 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
20196 gsi_next (&gsi))
20198 gimple *stmt = gsi_stmt (gsi);
20200 if (!is_gimple_call (stmt))
20201 continue;
20203 gcall *call = as_a <gcall *> (stmt);
20205 /* If this is a routine, make a dummy loop for it. */
20206 if (tree decl = gimple_call_fndecl (call))
20207 if (tree attrs = get_oacc_fn_attrib (decl))
20209 gcc_assert (!marker);
20210 new_oacc_loop_routine (loop, call, decl, attrs);
20213 if (!gimple_call_internal_p (call))
20214 continue;
20216 switch (gimple_call_internal_fn (call))
20218 default:
20219 break;
20221 case IFN_GOACC_LOOP:
20222 /* Count the goacc loop abstraction fns, to determine if the
20223 loop was collapsed already. */
20224 loop->ifns++;
20225 break;
20227 case IFN_UNIQUE:
20228 enum ifn_unique_kind kind
20229 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
20230 (gimple_call_arg (call, 0)));
20231 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
20232 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
20234 if (gimple_call_num_args (call) == 2)
20236 gcc_assert (marker && !remaining);
20237 marker = 0;
20238 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
20239 loop = finish_oacc_loop (loop);
20240 else
20241 loop->head_end = call;
20243 else
20245 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
20247 if (!marker)
20249 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
20250 loop = new_oacc_loop (loop, call);
20251 remaining = count;
20253 gcc_assert (count == remaining);
20254 if (remaining)
20256 remaining--;
20257 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
20258 loop->heads[marker] = call;
20259 else
20260 loop->tails[remaining] = call;
20262 marker++;
20267 if (remaining || marker)
20269 bb = single_succ (bb);
20270 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
20271 goto follow;
20274 /* Walk successor blocks. */
20275 edge e;
20276 edge_iterator ei;
20278 FOR_EACH_EDGE (e, ei, bb->succs)
20279 oacc_loop_discover_walk (loop, e->dest);
20282 /* LOOP is the first sibling. Reverse the order in place and return
20283 the new first sibling. Recurse to child loops. */
20285 static oacc_loop *
20286 oacc_loop_sibling_nreverse (oacc_loop *loop)
20288 oacc_loop *last = NULL;
20291 if (loop->child)
20292 loop->child = oacc_loop_sibling_nreverse (loop->child);
20294 oacc_loop *next = loop->sibling;
20295 loop->sibling = last;
20296 last = loop;
20297 loop = next;
20299 while (loop);
20301 return last;
20304 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
20305 the current function. */
20307 static oacc_loop *
20308 oacc_loop_discovery ()
20310 /* Clear basic block flags, in particular BB_VISITED which we're going to use
20311 in the following. */
20312 clear_bb_flags ();
20314 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
20315 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
20317 /* The siblings were constructed in reverse order, reverse them so
20318 that diagnostics come out in an unsurprising order. */
20319 top = oacc_loop_sibling_nreverse (top);
20321 return top;
20324 /* Transform the abstract internal function markers starting at FROM
20325 to be for partitioning level LEVEL. Stop when we meet another HEAD
20326 or TAIL marker. */
20328 static void
20329 oacc_loop_xform_head_tail (gcall *from, int level)
20331 enum ifn_unique_kind kind
20332 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
20333 tree replacement = build_int_cst (unsigned_type_node, level);
20335 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
20337 gimple *stmt = gsi_stmt (gsi);
20339 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
20341 enum ifn_unique_kind k
20342 = ((enum ifn_unique_kind)
20343 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
20345 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
20346 *gimple_call_arg_ptr (stmt, 2) = replacement;
20347 else if (k == kind && stmt != from)
20348 break;
20350 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
20351 *gimple_call_arg_ptr (stmt, 3) = replacement;
20353 gsi_next (&gsi);
20354 while (gsi_end_p (gsi))
20355 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
20359 /* Transform the IFN_GOACC_LOOP internal functions by providing the
20360 determined partitioning mask and chunking argument. END_MARKER
20361 points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS
20362 is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is
20363 the replacement partitioning mask and CHUNK_ARG is the replacement
20364 chunking arg. */
20366 static void
20367 oacc_loop_xform_loop (gcall *end_marker, unsigned ifns,
20368 tree mask_arg, tree chunk_arg)
20370 gimple_stmt_iterator gsi = gsi_for_stmt (end_marker);
20372 gcc_checking_assert (ifns);
20373 for (;;)
20375 for (; !gsi_end_p (gsi); gsi_next (&gsi))
20377 gimple *stmt = gsi_stmt (gsi);
20379 if (!is_gimple_call (stmt))
20380 continue;
20382 gcall *call = as_a <gcall *> (stmt);
20384 if (!gimple_call_internal_p (call))
20385 continue;
20387 if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP)
20388 continue;
20390 *gimple_call_arg_ptr (call, 5) = mask_arg;
20391 *gimple_call_arg_ptr (call, 4) = chunk_arg;
20392 ifns--;
20393 if (!ifns)
20394 return;
20397 /* The LOOP_BOUND ifn could be in the single successor
20398 block. */
20399 basic_block bb = single_succ (gsi_bb (gsi));
20400 gsi = gsi_start_bb (bb);
20404 /* Process the discovered OpenACC loops, setting the correct
20405 partitioning level etc. */
20407 static void
20408 oacc_loop_process (oacc_loop *loop)
20410 if (loop->child)
20411 oacc_loop_process (loop->child);
20413 if (loop->mask && !loop->routine)
20415 int ix;
20416 unsigned mask = loop->mask;
20417 unsigned dim = GOMP_DIM_GANG;
20418 tree mask_arg = build_int_cst (unsigned_type_node, mask);
20419 tree chunk_arg = loop->chunk_size;
20421 oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg);
20423 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
20425 while (!(GOMP_DIM_MASK (dim) & mask))
20426 dim++;
20428 oacc_loop_xform_head_tail (loop->heads[ix], dim);
20429 oacc_loop_xform_head_tail (loop->tails[ix], dim);
20431 mask ^= GOMP_DIM_MASK (dim);
20435 if (loop->sibling)
20436 oacc_loop_process (loop->sibling);
20439 /* Walk the OpenACC loop heirarchy checking and assigning the
20440 programmer-specified partitionings. OUTER_MASK is the partitioning
20441 this loop is contained within. Return mask of partitioning
20442 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
20443 bit. */
20445 static unsigned
20446 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
20448 unsigned this_mask = loop->mask;
20449 unsigned mask_all = 0;
20450 bool noisy = true;
20452 #ifdef ACCEL_COMPILER
20453 /* When device_type is supported, we want the device compiler to be
20454 noisy, if the loop parameters are device_type-specific. */
20455 noisy = false;
20456 #endif
20458 if (!loop->routine)
20460 bool auto_par = (loop->flags & OLF_AUTO) != 0;
20461 bool seq_par = (loop->flags & OLF_SEQ) != 0;
20463 this_mask = ((loop->flags >> OLF_DIM_BASE)
20464 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
20466 if ((this_mask != 0) + auto_par + seq_par > 1)
20468 if (noisy)
20469 error_at (loop->loc,
20470 seq_par
20471 ? "%<seq%> overrides other OpenACC loop specifiers"
20472 : "%<auto%> conflicts with other OpenACC loop specifiers");
20473 auto_par = false;
20474 loop->flags &= ~OLF_AUTO;
20475 if (seq_par)
20477 loop->flags &=
20478 ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
20479 this_mask = 0;
20482 if (auto_par && (loop->flags & OLF_INDEPENDENT))
20483 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
20486 if (this_mask & outer_mask)
20488 const oacc_loop *outer;
20489 for (outer = loop->parent; outer; outer = outer->parent)
20490 if (outer->mask & this_mask)
20491 break;
20493 if (noisy)
20495 if (outer)
20497 error_at (loop->loc,
20498 "%s uses same OpenACC parallelism as containing loop",
20499 loop->routine ? "routine call" : "inner loop");
20500 inform (outer->loc, "containing loop here");
20502 else
20503 error_at (loop->loc,
20504 "%s uses OpenACC parallelism disallowed by containing routine",
20505 loop->routine ? "routine call" : "loop");
20507 if (loop->routine)
20508 inform (DECL_SOURCE_LOCATION (loop->routine),
20509 "routine %qD declared here", loop->routine);
20511 this_mask &= ~outer_mask;
20513 else
20515 unsigned outermost = least_bit_hwi (this_mask);
20517 if (outermost && outermost <= outer_mask)
20519 if (noisy)
20521 error_at (loop->loc,
20522 "incorrectly nested OpenACC loop parallelism");
20524 const oacc_loop *outer;
20525 for (outer = loop->parent;
20526 outer->flags && outer->flags < outermost;
20527 outer = outer->parent)
20528 continue;
20529 inform (outer->loc, "containing loop here");
20532 this_mask &= ~outermost;
20536 loop->mask = this_mask;
20537 mask_all |= this_mask;
20539 if (loop->child)
20541 loop->inner = oacc_loop_fixed_partitions (loop->child,
20542 outer_mask | this_mask);
20543 mask_all |= loop->inner;
20546 if (loop->sibling)
20547 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
20549 return mask_all;
20552 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
20553 OUTER_MASK is the partitioning this loop is contained within.
20554 Return the cumulative partitioning used by this loop, siblings and
20555 children. */
20557 static unsigned
20558 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
20560 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
20561 bool noisy = true;
20563 #ifdef ACCEL_COMPILER
20564 /* When device_type is supported, we want the device compiler to be
20565 noisy, if the loop parameters are device_type-specific. */
20566 noisy = false;
20567 #endif
20569 if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1))
20571 /* Allocate the outermost loop at the outermost available
20572 level. */
20573 unsigned this_mask = outer_mask + 1;
20575 if (!(this_mask & loop->inner))
20576 loop->mask = this_mask;
20579 if (loop->child)
20581 unsigned child_mask = outer_mask | loop->mask;
20583 if (loop->mask || assign)
20584 child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX);
20586 loop->inner = oacc_loop_auto_partitions (loop->child, child_mask);
20589 if (assign && !loop->mask)
20591 /* Allocate the loop at the innermost available level. */
20592 unsigned this_mask = 0;
20594 /* Determine the outermost partitioning used within this loop. */
20595 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
20596 this_mask = least_bit_hwi (this_mask);
20598 /* Pick the partitioning just inside that one. */
20599 this_mask >>= 1;
20601 /* And avoid picking one use by an outer loop. */
20602 this_mask &= ~outer_mask;
20604 if (!this_mask && noisy)
20605 warning_at (loop->loc, 0,
20606 "insufficient partitioning available to parallelize loop");
20608 loop->mask = this_mask;
20611 if (assign && dump_file)
20612 fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
20613 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
20614 loop->mask);
20616 unsigned inner_mask = 0;
20618 if (loop->sibling)
20619 inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
20621 inner_mask |= loop->inner | loop->mask;
20623 return inner_mask;
20626 /* Walk the OpenACC loop heirarchy to check and assign partitioning
20627 axes. Return mask of partitioning. */
20629 static unsigned
20630 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
20632 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
20634 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
20636 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
20637 mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
20639 return mask_all;
20642 /* Default fork/join early expander. Delete the function calls if
20643 there is no RTL expander. */
20645 bool
20646 default_goacc_fork_join (gcall *ARG_UNUSED (call),
20647 const int *ARG_UNUSED (dims), bool is_fork)
20649 if (is_fork)
20650 return targetm.have_oacc_fork ();
20651 else
20652 return targetm.have_oacc_join ();
20655 /* Default goacc.reduction early expander.
20657 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
20658 If RES_PTR is not integer-zerop:
20659 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
20660 TEARDOWN - emit '*RES_PTR = VAR'
20661 If LHS is not NULL
20662 emit 'LHS = VAR' */
20664 void
20665 default_goacc_reduction (gcall *call)
20667 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
20668 gimple_stmt_iterator gsi = gsi_for_stmt (call);
20669 tree lhs = gimple_call_lhs (call);
20670 tree var = gimple_call_arg (call, 2);
20671 gimple_seq seq = NULL;
20673 if (code == IFN_GOACC_REDUCTION_SETUP
20674 || code == IFN_GOACC_REDUCTION_TEARDOWN)
20676 /* Setup and Teardown need to copy from/to the receiver object,
20677 if there is one. */
20678 tree ref_to_res = gimple_call_arg (call, 1);
20680 if (!integer_zerop (ref_to_res))
20682 tree dst = build_simple_mem_ref (ref_to_res);
20683 tree src = var;
20685 if (code == IFN_GOACC_REDUCTION_SETUP)
20687 src = dst;
20688 dst = lhs;
20689 lhs = NULL;
20691 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
20695 /* Copy VAR to LHS, if there is an LHS. */
20696 if (lhs)
20697 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
20699 gsi_replace_with_seq (&gsi, seq, true);
20702 /* Main entry point for oacc transformations which run on the device
20703 compiler after LTO, so we know what the target device is at this
20704 point (including the host fallback). */
20706 static unsigned int
20707 execute_oacc_device_lower ()
20709 tree attrs = get_oacc_fn_attrib (current_function_decl);
20711 if (!attrs)
20712 /* Not an offloaded function. */
20713 return 0;
20715 /* Parse the default dim argument exactly once. */
20716 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
20718 oacc_parse_default_dims (flag_openacc_dims);
20719 flag_openacc_dims = (char *)&flag_openacc_dims;
20722 /* Discover, partition and process the loops. */
20723 oacc_loop *loops = oacc_loop_discovery ();
20724 int fn_level = oacc_fn_attrib_level (attrs);
20726 if (dump_file)
20727 fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
20728 ? "Function is kernels offload\n"
20729 : fn_level < 0 ? "Function is parallel offload\n"
20730 : "Function is routine level %d\n", fn_level);
20732 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
20733 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
20734 int dims[GOMP_DIM_MAX];
20736 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
20738 if (dump_file)
20740 const char *comma = "Compute dimensions [";
20741 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
20742 fprintf (dump_file, "%s%d", comma, dims[ix]);
20743 fprintf (dump_file, "]\n");
20746 oacc_loop_process (loops);
20747 if (dump_file)
20749 fprintf (dump_file, "OpenACC loops\n");
20750 dump_oacc_loop (dump_file, loops, 0);
20751 fprintf (dump_file, "\n");
20754 /* Offloaded targets may introduce new basic blocks, which require
20755 dominance information to update SSA. */
20756 calculate_dominance_info (CDI_DOMINATORS);
20758 /* Now lower internal loop functions to target-specific code
20759 sequences. */
20760 basic_block bb;
20761 FOR_ALL_BB_FN (bb, cfun)
20762 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
20764 gimple *stmt = gsi_stmt (gsi);
20765 if (!is_gimple_call (stmt))
20767 gsi_next (&gsi);
20768 continue;
20771 gcall *call = as_a <gcall *> (stmt);
20772 if (!gimple_call_internal_p (call))
20774 gsi_next (&gsi);
20775 continue;
20778 /* Rewind to allow rescan. */
20779 gsi_prev (&gsi);
20780 bool rescan = false, remove = false;
20781 enum internal_fn ifn_code = gimple_call_internal_fn (call);
20783 switch (ifn_code)
20785 default: break;
20787 case IFN_GOACC_LOOP:
20788 oacc_xform_loop (call);
20789 rescan = true;
20790 break;
20792 case IFN_GOACC_REDUCTION:
20793 /* Mark the function for SSA renaming. */
20794 mark_virtual_operands_for_renaming (cfun);
20796 /* If the level is -1, this ended up being an unused
20797 axis. Handle as a default. */
20798 if (integer_minus_onep (gimple_call_arg (call, 3)))
20799 default_goacc_reduction (call);
20800 else
20801 targetm.goacc.reduction (call);
20802 rescan = true;
20803 break;
20805 case IFN_UNIQUE:
20807 enum ifn_unique_kind kind
20808 = ((enum ifn_unique_kind)
20809 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
20811 switch (kind)
20813 default:
20814 gcc_unreachable ();
20816 case IFN_UNIQUE_OACC_FORK:
20817 case IFN_UNIQUE_OACC_JOIN:
20818 if (integer_minus_onep (gimple_call_arg (call, 2)))
20819 remove = true;
20820 else if (!targetm.goacc.fork_join
20821 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
20822 remove = true;
20823 break;
20825 case IFN_UNIQUE_OACC_HEAD_MARK:
20826 case IFN_UNIQUE_OACC_TAIL_MARK:
20827 remove = true;
20828 break;
20830 break;
20834 if (gsi_end_p (gsi))
20835 /* We rewound past the beginning of the BB. */
20836 gsi = gsi_start_bb (bb);
20837 else
20838 /* Undo the rewind. */
20839 gsi_next (&gsi);
20841 if (remove)
20843 if (gimple_vdef (call))
20844 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
20845 if (gimple_call_lhs (call))
20847 /* Propagate the data dependency var. */
20848 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
20849 gimple_call_arg (call, 1));
20850 gsi_replace (&gsi, ass, false);
20852 else
20853 gsi_remove (&gsi, true);
20855 else if (!rescan)
20856 /* If not rescanning, advance over the call. */
20857 gsi_next (&gsi);
20860 free_oacc_loop (loops);
20862 return 0;
20865 /* Default launch dimension validator. Force everything to 1. A
20866 backend that wants to provide larger dimensions must override this
20867 hook. */
20869 bool
20870 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
20871 int ARG_UNUSED (fn_level))
20873 bool changed = false;
20875 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
20877 if (dims[ix] != 1)
20879 dims[ix] = 1;
20880 changed = true;
20884 return changed;
20887 /* Default dimension bound is unknown on accelerator and 1 on host. */
20890 default_goacc_dim_limit (int ARG_UNUSED (axis))
20892 #ifdef ACCEL_COMPILER
20893 return 0;
20894 #else
20895 return 1;
20896 #endif
20899 namespace {
20901 const pass_data pass_data_oacc_device_lower =
20903 GIMPLE_PASS, /* type */
20904 "oaccdevlow", /* name */
20905 OPTGROUP_OPENMP, /* optinfo_flags */
20906 TV_NONE, /* tv_id */
20907 PROP_cfg, /* properties_required */
20908 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
20909 0, /* properties_destroyed */
20910 0, /* todo_flags_start */
20911 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
20914 class pass_oacc_device_lower : public gimple_opt_pass
20916 public:
20917 pass_oacc_device_lower (gcc::context *ctxt)
20918 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
20921 /* opt_pass methods: */
20922 virtual bool gate (function *) { return flag_openacc; };
20924 virtual unsigned int execute (function *)
20926 return execute_oacc_device_lower ();
20929 }; // class pass_oacc_device_lower
20931 } // anon namespace
20933 gimple_opt_pass *
20934 make_pass_oacc_device_lower (gcc::context *ctxt)
20936 return new pass_oacc_device_lower (ctxt);
20940 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
20941 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
20942 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
20943 internal functions on non-SIMT targets, and likewise some SIMD internal
20944 functions on SIMT targets. */
20946 static unsigned int
20947 execute_omp_device_lower ()
20949 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
20950 basic_block bb;
20951 gimple_stmt_iterator gsi;
20952 FOR_EACH_BB_FN (bb, cfun)
20953 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
20955 gimple *stmt = gsi_stmt (gsi);
20956 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
20957 continue;
20958 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
20959 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
20960 switch (gimple_call_internal_fn (stmt))
20962 case IFN_GOMP_USE_SIMT:
20963 rhs = vf == 1 ? integer_zero_node : integer_one_node;
20964 break;
20965 case IFN_GOMP_SIMT_LANE:
20966 case IFN_GOMP_SIMT_LAST_LANE:
20967 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
20968 break;
20969 case IFN_GOMP_SIMT_VF:
20970 rhs = build_int_cst (type, vf);
20971 break;
20972 case IFN_GOMP_SIMT_ORDERED_PRED:
20973 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
20974 if (rhs || !lhs)
20975 unlink_stmt_vdef (stmt);
20976 break;
20977 case IFN_GOMP_SIMT_VOTE_ANY:
20978 case IFN_GOMP_SIMT_XCHG_BFLY:
20979 case IFN_GOMP_SIMT_XCHG_IDX:
20980 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
20981 break;
20982 case IFN_GOMP_SIMD_LANE:
20983 case IFN_GOMP_SIMD_LAST_LANE:
20984 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
20985 break;
20986 case IFN_GOMP_SIMD_VF:
20987 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
20988 break;
20989 default:
20990 continue;
20992 if (lhs && !rhs)
20993 continue;
20994 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
20995 gsi_replace (&gsi, stmt, false);
20997 if (vf != 1)
20998 cfun->has_force_vectorize_loops = false;
20999 return 0;
21002 namespace {
21004 const pass_data pass_data_omp_device_lower =
21006 GIMPLE_PASS, /* type */
21007 "ompdevlow", /* name */
21008 OPTGROUP_OPENMP, /* optinfo_flags */
21009 TV_NONE, /* tv_id */
21010 PROP_cfg, /* properties_required */
21011 PROP_gimple_lomp_dev, /* properties_provided */
21012 0, /* properties_destroyed */
21013 0, /* todo_flags_start */
21014 TODO_update_ssa, /* todo_flags_finish */
21017 class pass_omp_device_lower : public gimple_opt_pass
21019 public:
21020 pass_omp_device_lower (gcc::context *ctxt)
21021 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
21024 /* opt_pass methods: */
21025 virtual bool gate (function *ARG_UNUSED (fun))
21027 /* FIXME: this should use PROP_gimple_lomp_dev. */
21028 #ifdef ACCEL_COMPILER
21029 return true;
21030 #else
21031 return ENABLE_OFFLOADING && (flag_openmp || in_lto_p);
21032 #endif
21034 virtual unsigned int execute (function *)
21036 return execute_omp_device_lower ();
21039 }; // class pass_expand_omp_ssa
21041 } // anon namespace
21043 gimple_opt_pass *
21044 make_pass_omp_device_lower (gcc::context *ctxt)
21046 return new pass_omp_device_lower (ctxt);
21049 /* "omp declare target link" handling pass. */
21051 namespace {
21053 const pass_data pass_data_omp_target_link =
21055 GIMPLE_PASS, /* type */
21056 "omptargetlink", /* name */
21057 OPTGROUP_OPENMP, /* optinfo_flags */
21058 TV_NONE, /* tv_id */
21059 PROP_ssa, /* properties_required */
21060 0, /* properties_provided */
21061 0, /* properties_destroyed */
21062 0, /* todo_flags_start */
21063 TODO_update_ssa, /* todo_flags_finish */
21066 class pass_omp_target_link : public gimple_opt_pass
21068 public:
21069 pass_omp_target_link (gcc::context *ctxt)
21070 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
21073 /* opt_pass methods: */
21074 virtual bool gate (function *fun)
21076 #ifdef ACCEL_COMPILER
21077 tree attrs = DECL_ATTRIBUTES (fun->decl);
21078 return lookup_attribute ("omp declare target", attrs)
21079 || lookup_attribute ("omp target entrypoint", attrs);
21080 #else
21081 (void) fun;
21082 return false;
21083 #endif
21086 virtual unsigned execute (function *);
21089 /* Callback for walk_gimple_stmt used to scan for link var operands. */
21091 static tree
21092 find_link_var_op (tree *tp, int *walk_subtrees, void *)
21094 tree t = *tp;
21096 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)
21097 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
21099 *walk_subtrees = 0;
21100 return t;
21103 return NULL_TREE;
21106 unsigned
21107 pass_omp_target_link::execute (function *fun)
21109 basic_block bb;
21110 FOR_EACH_BB_FN (bb, fun)
21112 gimple_stmt_iterator gsi;
21113 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21114 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
21115 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
21118 return 0;
21121 } // anon namespace
21123 gimple_opt_pass *
21124 make_pass_omp_target_link (gcc::context *ctxt)
21126 return new pass_omp_target_link (ctxt);
21129 #include "gt-omp-low.h"