[Patch AArch64 1/3] Enable CRC by default for armv8.1-a
[official-gcc.git] / gcc / omp-low.c
blobd25c51f5bc916b1df2d7413ff56e629302f80c97
1 /* Lowering pass for OMP directives. Converts OMP directives into explicit
2 calls to the runtime library (libgomp), data marshalling to implement data
3 sharing and copying clauses, offloading to accelerators, and more.
5 Contributed by Diego Novillo <dnovillo@redhat.com>
7 Copyright (C) 2005-2016 Free Software Foundation, Inc.
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it under
12 the terms of the GNU General Public License as published by the Free
13 Software Foundation; either version 3, or (at your option) any later
14 version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "alloc-pool.h"
35 #include "tree-pass.h"
36 #include "ssa.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "emit-rtl.h"
40 #include "cgraph.h"
41 #include "pretty-print.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "cfganal.h"
47 #include "internal-fn.h"
48 #include "gimple-fold.h"
49 #include "gimplify.h"
50 #include "gimple-iterator.h"
51 #include "gimplify-me.h"
52 #include "gimple-walk.h"
53 #include "tree-iterator.h"
54 #include "tree-inline.h"
55 #include "langhooks.h"
56 #include "tree-cfg.h"
57 #include "tree-into-ssa.h"
58 #include "flags.h"
59 #include "dojump.h"
60 #include "explow.h"
61 #include "calls.h"
62 #include "varasm.h"
63 #include "stmt.h"
64 #include "expr.h"
65 #include "tree-dfa.h"
66 #include "tree-ssa.h"
67 #include "except.h"
68 #include "splay-tree.h"
69 #include "cfgloop.h"
70 #include "common/common-target.h"
71 #include "omp-low.h"
72 #include "gimple-low.h"
73 #include "tree-cfgcleanup.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "tree-nested.h"
77 #include "tree-eh.h"
78 #include "cilk.h"
79 #include "context.h"
80 #include "lto-section-names.h"
81 #include "gomp-constants.h"
82 #include "gimple-pretty-print.h"
83 #include "symbol-summary.h"
84 #include "hsa.h"
85 #include "params.h"
87 /* Lowering of OMP parallel and workshare constructs proceeds in two
88 phases. The first phase scans the function looking for OMP statements
89 and then for variables that must be replaced to satisfy data sharing
90 clauses. The second phase expands code for the constructs, as well as
91 re-gimplifying things when variables have been replaced with complex
92 expressions.
94 Final code generation is done by pass_expand_omp. The flowgraph is
95 scanned for regions which are then moved to a new
96 function, to be invoked by the thread library, or offloaded. */
98 /* OMP region information. Every parallel and workshare
99 directive is enclosed between two markers, the OMP_* directive
100 and a corresponding GIMPLE_OMP_RETURN statement. */
102 struct omp_region
104 /* The enclosing region. */
105 struct omp_region *outer;
107 /* First child region. */
108 struct omp_region *inner;
110 /* Next peer region. */
111 struct omp_region *next;
113 /* Block containing the omp directive as its last stmt. */
114 basic_block entry;
116 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
117 basic_block exit;
119 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
120 basic_block cont;
122 /* If this is a combined parallel+workshare region, this is a list
123 of additional arguments needed by the combined parallel+workshare
124 library call. */
125 vec<tree, va_gc> *ws_args;
127 /* The code for the omp directive of this region. */
128 enum gimple_code type;
130 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
131 enum omp_clause_schedule_kind sched_kind;
133 /* Schedule modifiers. */
134 unsigned char sched_modifiers;
136 /* True if this is a combined parallel+workshare region. */
137 bool is_combined_parallel;
139 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
140 a depend clause. */
141 gomp_ordered *ord_stmt;
144 /* Context structure. Used to store information about each parallel
145 directive in the code. */
147 struct omp_context
149 /* This field must be at the beginning, as we do "inheritance": Some
150 callback functions for tree-inline.c (e.g., omp_copy_decl)
151 receive a copy_body_data pointer that is up-casted to an
152 omp_context pointer. */
153 copy_body_data cb;
155 /* The tree of contexts corresponding to the encountered constructs. */
156 struct omp_context *outer;
157 gimple *stmt;
159 /* Map variables to fields in a structure that allows communication
160 between sending and receiving threads. */
161 splay_tree field_map;
162 tree record_type;
163 tree sender_decl;
164 tree receiver_decl;
166 /* These are used just by task contexts, if task firstprivate fn is
167 needed. srecord_type is used to communicate from the thread
168 that encountered the task construct to task firstprivate fn,
169 record_type is allocated by GOMP_task, initialized by task firstprivate
170 fn and passed to the task body fn. */
171 splay_tree sfield_map;
172 tree srecord_type;
174 /* A chain of variables to add to the top-level block surrounding the
175 construct. In the case of a parallel, this is in the child function. */
176 tree block_vars;
178 /* Label to which GOMP_cancel{,llation_point} and explicit and implicit
179 barriers should jump to during omplower pass. */
180 tree cancel_label;
182 /* What to do with variables with implicitly determined sharing
183 attributes. */
184 enum omp_clause_default_kind default_kind;
186 /* Nesting depth of this context. Used to beautify error messages re
187 invalid gotos. The outermost ctx is depth 1, with depth 0 being
188 reserved for the main body of the function. */
189 int depth;
191 /* True if this parallel directive is nested within another. */
192 bool is_nested;
194 /* True if this construct can be cancelled. */
195 bool cancellable;
198 /* A structure holding the elements of:
199 for (V = N1; V cond N2; V += STEP) [...] */
201 struct omp_for_data_loop
203 tree v, n1, n2, step;
204 enum tree_code cond_code;
207 /* A structure describing the main elements of a parallel loop. */
209 struct omp_for_data
211 struct omp_for_data_loop loop;
212 tree chunk_size;
213 gomp_for *for_stmt;
214 tree pre, iter_type;
215 int collapse;
216 int ordered;
217 bool have_nowait, have_ordered, simd_schedule;
218 unsigned char sched_modifiers;
219 enum omp_clause_schedule_kind sched_kind;
220 struct omp_for_data_loop *loops;
223 /* Describe the OpenACC looping structure of a function. The entire
224 function is held in a 'NULL' loop. */
226 struct oacc_loop
228 oacc_loop *parent; /* Containing loop. */
230 oacc_loop *child; /* First inner loop. */
232 oacc_loop *sibling; /* Next loop within same parent. */
234 location_t loc; /* Location of the loop start. */
236 gcall *marker; /* Initial head marker. */
238 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
239 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
241 tree routine; /* Pseudo-loop enclosing a routine. */
243 unsigned mask; /* Partitioning mask. */
244 unsigned flags; /* Partitioning flags. */
245 unsigned ifns; /* Contained loop abstraction functions. */
246 tree chunk_size; /* Chunk size. */
247 gcall *head_end; /* Final marker of head sequence. */
250 /* Flags for an OpenACC loop. */
252 enum oacc_loop_flags {
253 OLF_SEQ = 1u << 0, /* Explicitly sequential */
254 OLF_AUTO = 1u << 1, /* Compiler chooses axes. */
255 OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */
256 OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */
258 /* Explicitly specified loop axes. */
259 OLF_DIM_BASE = 4,
260 OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
261 OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
262 OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),
264 OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX
268 static splay_tree all_contexts;
269 static int taskreg_nesting_level;
270 static int target_nesting_level;
271 static struct omp_region *root_omp_region;
272 static bitmap task_shared_vars;
273 static vec<omp_context *> taskreg_contexts;
274 static bool omp_any_child_fn_dumped;
276 static void scan_omp (gimple_seq *, omp_context *);
277 static tree scan_omp_1_op (tree *, int *, void *);
278 static gphi *find_phi_with_arg_on_edge (tree, edge);
280 #define WALK_SUBSTMTS \
281 case GIMPLE_BIND: \
282 case GIMPLE_TRY: \
283 case GIMPLE_CATCH: \
284 case GIMPLE_EH_FILTER: \
285 case GIMPLE_TRANSACTION: \
286 /* The sub-statements for these should be walked. */ \
287 *handled_ops_p = false; \
288 break;
290 /* Return true if CTX corresponds to an oacc parallel region. */
292 static bool
293 is_oacc_parallel (omp_context *ctx)
295 enum gimple_code outer_type = gimple_code (ctx->stmt);
296 return ((outer_type == GIMPLE_OMP_TARGET)
297 && (gimple_omp_target_kind (ctx->stmt)
298 == GF_OMP_TARGET_KIND_OACC_PARALLEL));
301 /* Return true if CTX corresponds to an oacc kernels region. */
303 static bool
304 is_oacc_kernels (omp_context *ctx)
306 enum gimple_code outer_type = gimple_code (ctx->stmt);
307 return ((outer_type == GIMPLE_OMP_TARGET)
308 && (gimple_omp_target_kind (ctx->stmt)
309 == GF_OMP_TARGET_KIND_OACC_KERNELS));
312 /* If DECL is the artificial dummy VAR_DECL created for non-static
313 data member privatization, return the underlying "this" parameter,
314 otherwise return NULL. */
316 tree
317 omp_member_access_dummy_var (tree decl)
319 if (!VAR_P (decl)
320 || !DECL_ARTIFICIAL (decl)
321 || !DECL_IGNORED_P (decl)
322 || !DECL_HAS_VALUE_EXPR_P (decl)
323 || !lang_hooks.decls.omp_disregard_value_expr (decl, false))
324 return NULL_TREE;
326 tree v = DECL_VALUE_EXPR (decl);
327 if (TREE_CODE (v) != COMPONENT_REF)
328 return NULL_TREE;
330 while (1)
331 switch (TREE_CODE (v))
333 case COMPONENT_REF:
334 case MEM_REF:
335 case INDIRECT_REF:
336 CASE_CONVERT:
337 case POINTER_PLUS_EXPR:
338 v = TREE_OPERAND (v, 0);
339 continue;
340 case PARM_DECL:
341 if (DECL_CONTEXT (v) == current_function_decl
342 && DECL_ARTIFICIAL (v)
343 && TREE_CODE (TREE_TYPE (v)) == POINTER_TYPE)
344 return v;
345 return NULL_TREE;
346 default:
347 return NULL_TREE;
351 /* Helper for unshare_and_remap, called through walk_tree. */
353 static tree
354 unshare_and_remap_1 (tree *tp, int *walk_subtrees, void *data)
356 tree *pair = (tree *) data;
357 if (*tp == pair[0])
359 *tp = unshare_expr (pair[1]);
360 *walk_subtrees = 0;
362 else if (IS_TYPE_OR_DECL_P (*tp))
363 *walk_subtrees = 0;
364 return NULL_TREE;
367 /* Return unshare_expr (X) with all occurrences of FROM
368 replaced with TO. */
370 static tree
371 unshare_and_remap (tree x, tree from, tree to)
373 tree pair[2] = { from, to };
374 x = unshare_expr (x);
375 walk_tree (&x, unshare_and_remap_1, pair, NULL);
376 return x;
379 /* Holds offload tables with decls. */
380 vec<tree, va_gc> *offload_funcs, *offload_vars;
382 /* Convenience function for calling scan_omp_1_op on tree operands. */
384 static inline tree
385 scan_omp_op (tree *tp, omp_context *ctx)
387 struct walk_stmt_info wi;
389 memset (&wi, 0, sizeof (wi));
390 wi.info = ctx;
391 wi.want_locations = true;
393 return walk_tree (tp, scan_omp_1_op, &wi, NULL);
396 static void lower_omp (gimple_seq *, omp_context *);
397 static tree lookup_decl_in_outer_ctx (tree, omp_context *);
398 static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
400 /* Find an OMP clause of type KIND within CLAUSES. */
402 tree
403 find_omp_clause (tree clauses, enum omp_clause_code kind)
405 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
406 if (OMP_CLAUSE_CODE (clauses) == kind)
407 return clauses;
409 return NULL_TREE;
412 /* Return true if CTX is for an omp parallel. */
414 static inline bool
415 is_parallel_ctx (omp_context *ctx)
417 return gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL;
421 /* Return true if CTX is for an omp task. */
423 static inline bool
424 is_task_ctx (omp_context *ctx)
426 return gimple_code (ctx->stmt) == GIMPLE_OMP_TASK;
430 /* Return true if CTX is for an omp taskloop. */
432 static inline bool
433 is_taskloop_ctx (omp_context *ctx)
435 return gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
436 && gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP;
440 /* Return true if CTX is for an omp parallel or omp task. */
442 static inline bool
443 is_taskreg_ctx (omp_context *ctx)
445 return is_parallel_ctx (ctx) || is_task_ctx (ctx);
449 /* Return true if REGION is a combined parallel+workshare region. */
451 static inline bool
452 is_combined_parallel (struct omp_region *region)
454 return region->is_combined_parallel;
457 /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or
458 GT_EXPR. */
460 static void
461 adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2)
463 switch (*cond_code)
465 case LT_EXPR:
466 case GT_EXPR:
467 case NE_EXPR:
468 break;
469 case LE_EXPR:
470 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
471 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1);
472 else
473 *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2,
474 build_int_cst (TREE_TYPE (*n2), 1));
475 *cond_code = LT_EXPR;
476 break;
477 case GE_EXPR:
478 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
479 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1);
480 else
481 *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2,
482 build_int_cst (TREE_TYPE (*n2), 1));
483 *cond_code = GT_EXPR;
484 break;
485 default:
486 gcc_unreachable ();
490 /* Return the looping step from INCR, extracted from the step of a gimple omp
491 for statement. */
493 static tree
494 get_omp_for_step_from_incr (location_t loc, tree incr)
496 tree step;
497 switch (TREE_CODE (incr))
499 case PLUS_EXPR:
500 step = TREE_OPERAND (incr, 1);
501 break;
502 case POINTER_PLUS_EXPR:
503 step = fold_convert (ssizetype, TREE_OPERAND (incr, 1));
504 break;
505 case MINUS_EXPR:
506 step = TREE_OPERAND (incr, 1);
507 step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step);
508 break;
509 default:
510 gcc_unreachable ();
512 return step;
515 /* Extract the header elements of parallel loop FOR_STMT and store
516 them into *FD. */
518 static void
519 extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
520 struct omp_for_data_loop *loops)
522 tree t, var, *collapse_iter, *collapse_count;
523 tree count = NULL_TREE, iter_type = long_integer_type_node;
524 struct omp_for_data_loop *loop;
525 int i;
526 struct omp_for_data_loop dummy_loop;
527 location_t loc = gimple_location (for_stmt);
528 bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD;
529 bool distribute = gimple_omp_for_kind (for_stmt)
530 == GF_OMP_FOR_KIND_DISTRIBUTE;
531 bool taskloop = gimple_omp_for_kind (for_stmt)
532 == GF_OMP_FOR_KIND_TASKLOOP;
533 tree iterv, countv;
535 fd->for_stmt = for_stmt;
536 fd->pre = NULL;
537 if (gimple_omp_for_collapse (for_stmt) > 1)
538 fd->loops = loops;
539 else
540 fd->loops = &fd->loop;
542 fd->have_nowait = distribute || simd;
543 fd->have_ordered = false;
544 fd->collapse = 1;
545 fd->ordered = 0;
546 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
547 fd->sched_modifiers = 0;
548 fd->chunk_size = NULL_TREE;
549 fd->simd_schedule = false;
550 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
551 fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
552 collapse_iter = NULL;
553 collapse_count = NULL;
555 for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
556 switch (OMP_CLAUSE_CODE (t))
558 case OMP_CLAUSE_NOWAIT:
559 fd->have_nowait = true;
560 break;
561 case OMP_CLAUSE_ORDERED:
562 fd->have_ordered = true;
563 if (OMP_CLAUSE_ORDERED_EXPR (t))
564 fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t));
565 break;
566 case OMP_CLAUSE_SCHEDULE:
567 gcc_assert (!distribute && !taskloop);
568 fd->sched_kind
569 = (enum omp_clause_schedule_kind)
570 (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK);
571 fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t)
572 & ~OMP_CLAUSE_SCHEDULE_MASK);
573 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
574 fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t);
575 break;
576 case OMP_CLAUSE_DIST_SCHEDULE:
577 gcc_assert (distribute);
578 fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
579 break;
580 case OMP_CLAUSE_COLLAPSE:
581 fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t));
582 if (fd->collapse > 1)
584 collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
585 collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t);
587 break;
588 default:
589 break;
591 if (fd->ordered && fd->collapse == 1 && loops != NULL)
593 fd->loops = loops;
594 iterv = NULL_TREE;
595 countv = NULL_TREE;
596 collapse_iter = &iterv;
597 collapse_count = &countv;
600 /* FIXME: for now map schedule(auto) to schedule(static).
601 There should be analysis to determine whether all iterations
602 are approximately the same amount of work (then schedule(static)
603 is best) or if it varies (then schedule(dynamic,N) is better). */
604 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO)
606 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
607 gcc_assert (fd->chunk_size == NULL);
609 gcc_assert (fd->collapse == 1 || collapse_iter != NULL);
610 if (taskloop)
611 fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME;
612 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
613 gcc_assert (fd->chunk_size == NULL);
614 else if (fd->chunk_size == NULL)
616 /* We only need to compute a default chunk size for ordered
617 static loops and dynamic loops. */
618 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
619 || fd->have_ordered)
620 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
621 ? integer_zero_node : integer_one_node;
624 int cnt = fd->ordered ? fd->ordered : fd->collapse;
625 for (i = 0; i < cnt; i++)
627 if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL))
628 loop = &fd->loop;
629 else if (loops != NULL)
630 loop = loops + i;
631 else
632 loop = &dummy_loop;
634 loop->v = gimple_omp_for_index (for_stmt, i);
635 gcc_assert (SSA_VAR_P (loop->v));
636 gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
637 || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE);
638 var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v;
639 loop->n1 = gimple_omp_for_initial (for_stmt, i);
641 loop->cond_code = gimple_omp_for_cond (for_stmt, i);
642 loop->n2 = gimple_omp_for_final (for_stmt, i);
643 gcc_assert (loop->cond_code != NE_EXPR
644 || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD
645 || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR);
646 adjust_for_condition (loc, &loop->cond_code, &loop->n2);
648 t = gimple_omp_for_incr (for_stmt, i);
649 gcc_assert (TREE_OPERAND (t, 0) == var);
650 loop->step = get_omp_for_step_from_incr (loc, t);
652 if (simd
653 || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
654 && !fd->have_ordered))
656 if (fd->collapse == 1)
657 iter_type = TREE_TYPE (loop->v);
658 else if (i == 0
659 || TYPE_PRECISION (iter_type)
660 < TYPE_PRECISION (TREE_TYPE (loop->v)))
661 iter_type
662 = build_nonstandard_integer_type
663 (TYPE_PRECISION (TREE_TYPE (loop->v)), 1);
665 else if (iter_type != long_long_unsigned_type_node)
667 if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
668 iter_type = long_long_unsigned_type_node;
669 else if (TYPE_UNSIGNED (TREE_TYPE (loop->v))
670 && TYPE_PRECISION (TREE_TYPE (loop->v))
671 >= TYPE_PRECISION (iter_type))
673 tree n;
675 if (loop->cond_code == LT_EXPR)
676 n = fold_build2_loc (loc,
677 PLUS_EXPR, TREE_TYPE (loop->v),
678 loop->n2, loop->step);
679 else
680 n = loop->n1;
681 if (TREE_CODE (n) != INTEGER_CST
682 || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n))
683 iter_type = long_long_unsigned_type_node;
685 else if (TYPE_PRECISION (TREE_TYPE (loop->v))
686 > TYPE_PRECISION (iter_type))
688 tree n1, n2;
690 if (loop->cond_code == LT_EXPR)
692 n1 = loop->n1;
693 n2 = fold_build2_loc (loc,
694 PLUS_EXPR, TREE_TYPE (loop->v),
695 loop->n2, loop->step);
697 else
699 n1 = fold_build2_loc (loc,
700 MINUS_EXPR, TREE_TYPE (loop->v),
701 loop->n2, loop->step);
702 n2 = loop->n1;
704 if (TREE_CODE (n1) != INTEGER_CST
705 || TREE_CODE (n2) != INTEGER_CST
706 || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1)
707 || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type)))
708 iter_type = long_long_unsigned_type_node;
712 if (i >= fd->collapse)
713 continue;
715 if (collapse_count && *collapse_count == NULL)
717 t = fold_binary (loop->cond_code, boolean_type_node,
718 fold_convert (TREE_TYPE (loop->v), loop->n1),
719 fold_convert (TREE_TYPE (loop->v), loop->n2));
720 if (t && integer_zerop (t))
721 count = build_zero_cst (long_long_unsigned_type_node);
722 else if ((i == 0 || count != NULL_TREE)
723 && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
724 && TREE_CONSTANT (loop->n1)
725 && TREE_CONSTANT (loop->n2)
726 && TREE_CODE (loop->step) == INTEGER_CST)
728 tree itype = TREE_TYPE (loop->v);
730 if (POINTER_TYPE_P (itype))
731 itype = signed_type_for (itype);
732 t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
733 t = fold_build2_loc (loc,
734 PLUS_EXPR, itype,
735 fold_convert_loc (loc, itype, loop->step), t);
736 t = fold_build2_loc (loc, PLUS_EXPR, itype, t,
737 fold_convert_loc (loc, itype, loop->n2));
738 t = fold_build2_loc (loc, MINUS_EXPR, itype, t,
739 fold_convert_loc (loc, itype, loop->n1));
740 if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
741 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype,
742 fold_build1_loc (loc, NEGATE_EXPR, itype, t),
743 fold_build1_loc (loc, NEGATE_EXPR, itype,
744 fold_convert_loc (loc, itype,
745 loop->step)));
746 else
747 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t,
748 fold_convert_loc (loc, itype, loop->step));
749 t = fold_convert_loc (loc, long_long_unsigned_type_node, t);
750 if (count != NULL_TREE)
751 count = fold_build2_loc (loc,
752 MULT_EXPR, long_long_unsigned_type_node,
753 count, t);
754 else
755 count = t;
756 if (TREE_CODE (count) != INTEGER_CST)
757 count = NULL_TREE;
759 else if (count && !integer_zerop (count))
760 count = NULL_TREE;
764 if (count
765 && !simd
766 && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
767 || fd->have_ordered))
769 if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
770 iter_type = long_long_unsigned_type_node;
771 else
772 iter_type = long_integer_type_node;
774 else if (collapse_iter && *collapse_iter != NULL)
775 iter_type = TREE_TYPE (*collapse_iter);
776 fd->iter_type = iter_type;
777 if (collapse_iter && *collapse_iter == NULL)
778 *collapse_iter = create_tmp_var (iter_type, ".iter");
779 if (collapse_count && *collapse_count == NULL)
781 if (count)
782 *collapse_count = fold_convert_loc (loc, iter_type, count);
783 else
784 *collapse_count = create_tmp_var (iter_type, ".count");
787 if (fd->collapse > 1 || (fd->ordered && loops))
789 fd->loop.v = *collapse_iter;
790 fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
791 fd->loop.n2 = *collapse_count;
792 fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
793 fd->loop.cond_code = LT_EXPR;
795 else if (loops)
796 loops[0] = fd->loop;
800 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
801 is the immediate dominator of PAR_ENTRY_BB, return true if there
802 are no data dependencies that would prevent expanding the parallel
803 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
805 When expanding a combined parallel+workshare region, the call to
806 the child function may need additional arguments in the case of
807 GIMPLE_OMP_FOR regions. In some cases, these arguments are
808 computed out of variables passed in from the parent to the child
809 via 'struct .omp_data_s'. For instance:
811 #pragma omp parallel for schedule (guided, i * 4)
812 for (j ...)
814 Is lowered into:
816 # BLOCK 2 (PAR_ENTRY_BB)
817 .omp_data_o.i = i;
818 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
820 # BLOCK 3 (WS_ENTRY_BB)
821 .omp_data_i = &.omp_data_o;
822 D.1667 = .omp_data_i->i;
823 D.1598 = D.1667 * 4;
824 #pragma omp for schedule (guided, D.1598)
826 When we outline the parallel region, the call to the child function
827 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
828 that value is computed *after* the call site. So, in principle we
829 cannot do the transformation.
831 To see whether the code in WS_ENTRY_BB blocks the combined
832 parallel+workshare call, we collect all the variables used in the
833 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
834 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
835 call.
837 FIXME. If we had the SSA form built at this point, we could merely
838 hoist the code in block 3 into block 2 and be done with it. But at
839 this point we don't have dataflow information and though we could
840 hack something up here, it is really not worth the aggravation. */
842 static bool
843 workshare_safe_to_combine_p (basic_block ws_entry_bb)
845 struct omp_for_data fd;
846 gimple *ws_stmt = last_stmt (ws_entry_bb);
848 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
849 return true;
851 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
853 extract_omp_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
855 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
856 return false;
857 if (fd.iter_type != long_integer_type_node)
858 return false;
860 /* FIXME. We give up too easily here. If any of these arguments
861 are not constants, they will likely involve variables that have
862 been mapped into fields of .omp_data_s for sharing with the child
863 function. With appropriate data flow, it would be possible to
864 see through this. */
865 if (!is_gimple_min_invariant (fd.loop.n1)
866 || !is_gimple_min_invariant (fd.loop.n2)
867 || !is_gimple_min_invariant (fd.loop.step)
868 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
869 return false;
871 return true;
875 static int omp_max_vf (void);
877 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
878 presence (SIMD_SCHEDULE). */
880 static tree
881 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
883 if (!simd_schedule)
884 return chunk_size;
886 int vf = omp_max_vf ();
887 if (vf == 1)
888 return chunk_size;
890 tree type = TREE_TYPE (chunk_size);
891 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
892 build_int_cst (type, vf - 1));
893 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
894 build_int_cst (type, -vf));
898 /* Collect additional arguments needed to emit a combined
899 parallel+workshare call. WS_STMT is the workshare directive being
900 expanded. */
902 static vec<tree, va_gc> *
903 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
905 tree t;
906 location_t loc = gimple_location (ws_stmt);
907 vec<tree, va_gc> *ws_args;
909 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
911 struct omp_for_data fd;
912 tree n1, n2;
914 extract_omp_for_data (for_stmt, &fd, NULL);
915 n1 = fd.loop.n1;
916 n2 = fd.loop.n2;
918 if (gimple_omp_for_combined_into_p (for_stmt))
920 tree innerc
921 = find_omp_clause (gimple_omp_parallel_clauses (par_stmt),
922 OMP_CLAUSE__LOOPTEMP_);
923 gcc_assert (innerc);
924 n1 = OMP_CLAUSE_DECL (innerc);
925 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
926 OMP_CLAUSE__LOOPTEMP_);
927 gcc_assert (innerc);
928 n2 = OMP_CLAUSE_DECL (innerc);
931 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
933 t = fold_convert_loc (loc, long_integer_type_node, n1);
934 ws_args->quick_push (t);
936 t = fold_convert_loc (loc, long_integer_type_node, n2);
937 ws_args->quick_push (t);
939 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
940 ws_args->quick_push (t);
942 if (fd.chunk_size)
944 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
945 t = omp_adjust_chunk_size (t, fd.simd_schedule);
946 ws_args->quick_push (t);
949 return ws_args;
951 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
953 /* Number of sections is equal to the number of edges from the
954 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
955 the exit of the sections region. */
956 basic_block bb = single_succ (gimple_bb (ws_stmt));
957 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
958 vec_alloc (ws_args, 1);
959 ws_args->quick_push (t);
960 return ws_args;
963 gcc_unreachable ();
967 /* Discover whether REGION is a combined parallel+workshare region. */
969 static void
970 determine_parallel_type (struct omp_region *region)
972 basic_block par_entry_bb, par_exit_bb;
973 basic_block ws_entry_bb, ws_exit_bb;
975 if (region == NULL || region->inner == NULL
976 || region->exit == NULL || region->inner->exit == NULL
977 || region->inner->cont == NULL)
978 return;
980 /* We only support parallel+for and parallel+sections. */
981 if (region->type != GIMPLE_OMP_PARALLEL
982 || (region->inner->type != GIMPLE_OMP_FOR
983 && region->inner->type != GIMPLE_OMP_SECTIONS))
984 return;
986 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
987 WS_EXIT_BB -> PAR_EXIT_BB. */
988 par_entry_bb = region->entry;
989 par_exit_bb = region->exit;
990 ws_entry_bb = region->inner->entry;
991 ws_exit_bb = region->inner->exit;
993 if (single_succ (par_entry_bb) == ws_entry_bb
994 && single_succ (ws_exit_bb) == par_exit_bb
995 && workshare_safe_to_combine_p (ws_entry_bb)
996 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
997 || (last_and_only_stmt (ws_entry_bb)
998 && last_and_only_stmt (par_exit_bb))))
1000 gimple *par_stmt = last_stmt (par_entry_bb);
1001 gimple *ws_stmt = last_stmt (ws_entry_bb);
1003 if (region->inner->type == GIMPLE_OMP_FOR)
1005 /* If this is a combined parallel loop, we need to determine
1006 whether or not to use the combined library calls. There
1007 are two cases where we do not apply the transformation:
1008 static loops and any kind of ordered loop. In the first
1009 case, we already open code the loop so there is no need
1010 to do anything else. In the latter case, the combined
1011 parallel loop call would still need extra synchronization
1012 to implement ordered semantics, so there would not be any
1013 gain in using the combined call. */
1014 tree clauses = gimple_omp_for_clauses (ws_stmt);
1015 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
1016 if (c == NULL
1017 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
1018 == OMP_CLAUSE_SCHEDULE_STATIC)
1019 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
1021 region->is_combined_parallel = false;
1022 region->inner->is_combined_parallel = false;
1023 return;
1027 region->is_combined_parallel = true;
1028 region->inner->is_combined_parallel = true;
1029 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
1034 /* Return true if EXPR is variable sized. */
1036 static inline bool
1037 is_variable_sized (const_tree expr)
1039 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1042 /* Return true if DECL is a reference type. */
1044 static inline bool
1045 is_reference (tree decl)
1047 return lang_hooks.decls.omp_privatize_by_reference (decl);
1050 /* Return the type of a decl. If the decl is reference type,
1051 return its base type. */
1052 static inline tree
1053 get_base_type (tree decl)
1055 tree type = TREE_TYPE (decl);
1056 if (is_reference (decl))
1057 type = TREE_TYPE (type);
1058 return type;
1061 /* Lookup variables. The "maybe" form
1062 allows for the variable form to not have been entered, otherwise we
1063 assert that the variable must have been entered. */
1065 static inline tree
1066 lookup_decl (tree var, omp_context *ctx)
1068 tree *n = ctx->cb.decl_map->get (var);
1069 return *n;
1072 static inline tree
1073 maybe_lookup_decl (const_tree var, omp_context *ctx)
1075 tree *n = ctx->cb.decl_map->get (const_cast<tree> (var));
1076 return n ? *n : NULL_TREE;
1079 static inline tree
1080 lookup_field (tree var, omp_context *ctx)
1082 splay_tree_node n;
1083 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
1084 return (tree) n->value;
1087 static inline tree
1088 lookup_sfield (splay_tree_key key, omp_context *ctx)
1090 splay_tree_node n;
1091 n = splay_tree_lookup (ctx->sfield_map
1092 ? ctx->sfield_map : ctx->field_map, key);
1093 return (tree) n->value;
1096 static inline tree
1097 lookup_sfield (tree var, omp_context *ctx)
1099 return lookup_sfield ((splay_tree_key) var, ctx);
1102 static inline tree
1103 maybe_lookup_field (splay_tree_key key, omp_context *ctx)
1105 splay_tree_node n;
1106 n = splay_tree_lookup (ctx->field_map, key);
1107 return n ? (tree) n->value : NULL_TREE;
1110 static inline tree
1111 maybe_lookup_field (tree var, omp_context *ctx)
1113 return maybe_lookup_field ((splay_tree_key) var, ctx);
1116 /* Return true if DECL should be copied by pointer. SHARED_CTX is
1117 the parallel context if DECL is to be shared. */
1119 static bool
1120 use_pointer_for_field (tree decl, omp_context *shared_ctx)
1122 if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
1123 return true;
1125 /* We can only use copy-in/copy-out semantics for shared variables
1126 when we know the value is not accessible from an outer scope. */
1127 if (shared_ctx)
1129 gcc_assert (!is_gimple_omp_oacc (shared_ctx->stmt));
1131 /* ??? Trivially accessible from anywhere. But why would we even
1132 be passing an address in this case? Should we simply assert
1133 this to be false, or should we have a cleanup pass that removes
1134 these from the list of mappings? */
1135 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
1136 return true;
1138 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
1139 without analyzing the expression whether or not its location
1140 is accessible to anyone else. In the case of nested parallel
1141 regions it certainly may be. */
1142 if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
1143 return true;
1145 /* Do not use copy-in/copy-out for variables that have their
1146 address taken. */
1147 if (TREE_ADDRESSABLE (decl))
1148 return true;
1150 /* lower_send_shared_vars only uses copy-in, but not copy-out
1151 for these. */
1152 if (TREE_READONLY (decl)
1153 || ((TREE_CODE (decl) == RESULT_DECL
1154 || TREE_CODE (decl) == PARM_DECL)
1155 && DECL_BY_REFERENCE (decl)))
1156 return false;
1158 /* Disallow copy-in/out in nested parallel if
1159 decl is shared in outer parallel, otherwise
1160 each thread could store the shared variable
1161 in its own copy-in location, making the
1162 variable no longer really shared. */
1163 if (shared_ctx->is_nested)
1165 omp_context *up;
1167 for (up = shared_ctx->outer; up; up = up->outer)
1168 if (is_taskreg_ctx (up) && maybe_lookup_decl (decl, up))
1169 break;
1171 if (up)
1173 tree c;
1175 for (c = gimple_omp_taskreg_clauses (up->stmt);
1176 c; c = OMP_CLAUSE_CHAIN (c))
1177 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
1178 && OMP_CLAUSE_DECL (c) == decl)
1179 break;
1181 if (c)
1182 goto maybe_mark_addressable_and_ret;
1186 /* For tasks avoid using copy-in/out. As tasks can be
1187 deferred or executed in different thread, when GOMP_task
1188 returns, the task hasn't necessarily terminated. */
1189 if (is_task_ctx (shared_ctx))
1191 tree outer;
1192 maybe_mark_addressable_and_ret:
1193 outer = maybe_lookup_decl_in_outer_ctx (decl, shared_ctx);
1194 if (is_gimple_reg (outer) && !omp_member_access_dummy_var (outer))
1196 /* Taking address of OUTER in lower_send_shared_vars
1197 might need regimplification of everything that uses the
1198 variable. */
1199 if (!task_shared_vars)
1200 task_shared_vars = BITMAP_ALLOC (NULL);
1201 bitmap_set_bit (task_shared_vars, DECL_UID (outer));
1202 TREE_ADDRESSABLE (outer) = 1;
1204 return true;
1208 return false;
1211 /* Construct a new automatic decl similar to VAR. */
1213 static tree
1214 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
1216 tree copy = copy_var_decl (var, name, type);
1218 DECL_CONTEXT (copy) = current_function_decl;
1219 DECL_CHAIN (copy) = ctx->block_vars;
1220 /* If VAR is listed in task_shared_vars, it means it wasn't
1221 originally addressable and is just because task needs to take
1222 it's address. But we don't need to take address of privatizations
1223 from that var. */
1224 if (TREE_ADDRESSABLE (var)
1225 && task_shared_vars
1226 && bitmap_bit_p (task_shared_vars, DECL_UID (var)))
1227 TREE_ADDRESSABLE (copy) = 0;
1228 ctx->block_vars = copy;
1230 return copy;
1233 static tree
1234 omp_copy_decl_1 (tree var, omp_context *ctx)
1236 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
1239 /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
1240 as appropriate. */
1241 static tree
1242 omp_build_component_ref (tree obj, tree field)
1244 tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
1245 if (TREE_THIS_VOLATILE (field))
1246 TREE_THIS_VOLATILE (ret) |= 1;
1247 if (TREE_READONLY (field))
1248 TREE_READONLY (ret) |= 1;
1249 return ret;
1252 /* Build tree nodes to access the field for VAR on the receiver side. */
1254 static tree
1255 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
1257 tree x, field = lookup_field (var, ctx);
1259 /* If the receiver record type was remapped in the child function,
1260 remap the field into the new record type. */
1261 x = maybe_lookup_field (field, ctx);
1262 if (x != NULL)
1263 field = x;
1265 x = build_simple_mem_ref (ctx->receiver_decl);
1266 TREE_THIS_NOTRAP (x) = 1;
1267 x = omp_build_component_ref (x, field);
1268 if (by_ref)
1270 x = build_simple_mem_ref (x);
1271 TREE_THIS_NOTRAP (x) = 1;
1274 return x;
1277 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
1278 of a parallel, this is a component reference; for workshare constructs
1279 this is some variable. */
1281 static tree
1282 build_outer_var_ref (tree var, omp_context *ctx, bool lastprivate = false)
1284 tree x;
1286 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
1287 x = var;
1288 else if (is_variable_sized (var))
1290 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
1291 x = build_outer_var_ref (x, ctx, lastprivate);
1292 x = build_simple_mem_ref (x);
1294 else if (is_taskreg_ctx (ctx))
1296 bool by_ref = use_pointer_for_field (var, NULL);
1297 x = build_receiver_ref (var, by_ref, ctx);
1299 else if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
1300 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
1302 /* #pragma omp simd isn't a worksharing construct, and can reference even
1303 private vars in its linear etc. clauses. */
1304 x = NULL_TREE;
1305 if (ctx->outer && is_taskreg_ctx (ctx))
1306 x = lookup_decl (var, ctx->outer);
1307 else if (ctx->outer)
1308 x = maybe_lookup_decl_in_outer_ctx (var, ctx);
1309 if (x == NULL_TREE)
1310 x = var;
1312 else if (lastprivate && is_taskloop_ctx (ctx))
1314 gcc_assert (ctx->outer);
1315 splay_tree_node n
1316 = splay_tree_lookup (ctx->outer->field_map,
1317 (splay_tree_key) &DECL_UID (var));
1318 if (n == NULL)
1320 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
1321 x = var;
1322 else
1323 x = lookup_decl (var, ctx->outer);
1325 else
1327 tree field = (tree) n->value;
1328 /* If the receiver record type was remapped in the child function,
1329 remap the field into the new record type. */
1330 x = maybe_lookup_field (field, ctx->outer);
1331 if (x != NULL)
1332 field = x;
1334 x = build_simple_mem_ref (ctx->outer->receiver_decl);
1335 x = omp_build_component_ref (x, field);
1336 if (use_pointer_for_field (var, ctx->outer))
1337 x = build_simple_mem_ref (x);
1340 else if (ctx->outer)
1342 omp_context *outer = ctx->outer;
1343 if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY)
1345 outer = outer->outer;
1346 gcc_assert (outer
1347 && gimple_code (outer->stmt) != GIMPLE_OMP_GRID_BODY);
1349 x = lookup_decl (var, outer);
1351 else if (is_reference (var))
1352 /* This can happen with orphaned constructs. If var is reference, it is
1353 possible it is shared and as such valid. */
1354 x = var;
1355 else if (omp_member_access_dummy_var (var))
1356 x = var;
1357 else
1358 gcc_unreachable ();
1360 if (x == var)
1362 tree t = omp_member_access_dummy_var (var);
1363 if (t)
1365 x = DECL_VALUE_EXPR (var);
1366 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx);
1367 if (o != t)
1368 x = unshare_and_remap (x, t, o);
1369 else
1370 x = unshare_expr (x);
1374 if (is_reference (var))
1375 x = build_simple_mem_ref (x);
1377 return x;
1380 /* Build tree nodes to access the field for VAR on the sender side. */
1382 static tree
1383 build_sender_ref (splay_tree_key key, omp_context *ctx)
1385 tree field = lookup_sfield (key, ctx);
1386 return omp_build_component_ref (ctx->sender_decl, field);
1389 static tree
1390 build_sender_ref (tree var, omp_context *ctx)
1392 return build_sender_ref ((splay_tree_key) var, ctx);
1395 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. If
1396 BASE_POINTERS_RESTRICT, declare the field with restrict. */
1398 static void
1399 install_var_field (tree var, bool by_ref, int mask, omp_context *ctx,
1400 bool base_pointers_restrict = false)
1402 tree field, type, sfield = NULL_TREE;
1403 splay_tree_key key = (splay_tree_key) var;
1405 if ((mask & 8) != 0)
1407 key = (splay_tree_key) &DECL_UID (var);
1408 gcc_checking_assert (key != (splay_tree_key) var);
1410 gcc_assert ((mask & 1) == 0
1411 || !splay_tree_lookup (ctx->field_map, key));
1412 gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
1413 || !splay_tree_lookup (ctx->sfield_map, key));
1414 gcc_assert ((mask & 3) == 3
1415 || !is_gimple_omp_oacc (ctx->stmt));
1417 type = TREE_TYPE (var);
1418 /* Prevent redeclaring the var in the split-off function with a restrict
1419 pointer type. Note that we only clear type itself, restrict qualifiers in
1420 the pointed-to type will be ignored by points-to analysis. */
1421 if (POINTER_TYPE_P (type)
1422 && TYPE_RESTRICT (type))
1423 type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
1425 if (mask & 4)
1427 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1428 type = build_pointer_type (build_pointer_type (type));
1430 else if (by_ref)
1432 type = build_pointer_type (type);
1433 if (base_pointers_restrict)
1434 type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
1436 else if ((mask & 3) == 1 && is_reference (var))
1437 type = TREE_TYPE (type);
1439 field = build_decl (DECL_SOURCE_LOCATION (var),
1440 FIELD_DECL, DECL_NAME (var), type);
1442 /* Remember what variable this field was created for. This does have a
1443 side effect of making dwarf2out ignore this member, so for helpful
1444 debugging we clear it later in delete_omp_context. */
1445 DECL_ABSTRACT_ORIGIN (field) = var;
1446 if (type == TREE_TYPE (var))
1448 DECL_ALIGN (field) = DECL_ALIGN (var);
1449 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1450 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1452 else
1453 DECL_ALIGN (field) = TYPE_ALIGN (type);
1455 if ((mask & 3) == 3)
1457 insert_field_into_struct (ctx->record_type, field);
1458 if (ctx->srecord_type)
1460 sfield = build_decl (DECL_SOURCE_LOCATION (var),
1461 FIELD_DECL, DECL_NAME (var), type);
1462 DECL_ABSTRACT_ORIGIN (sfield) = var;
1463 DECL_ALIGN (sfield) = DECL_ALIGN (field);
1464 DECL_USER_ALIGN (sfield) = DECL_USER_ALIGN (field);
1465 TREE_THIS_VOLATILE (sfield) = TREE_THIS_VOLATILE (field);
1466 insert_field_into_struct (ctx->srecord_type, sfield);
1469 else
1471 if (ctx->srecord_type == NULL_TREE)
1473 tree t;
1475 ctx->srecord_type = lang_hooks.types.make_type (RECORD_TYPE);
1476 ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1477 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
1479 sfield = build_decl (DECL_SOURCE_LOCATION (t),
1480 FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
1481 DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
1482 insert_field_into_struct (ctx->srecord_type, sfield);
1483 splay_tree_insert (ctx->sfield_map,
1484 (splay_tree_key) DECL_ABSTRACT_ORIGIN (t),
1485 (splay_tree_value) sfield);
1488 sfield = field;
1489 insert_field_into_struct ((mask & 1) ? ctx->record_type
1490 : ctx->srecord_type, field);
1493 if (mask & 1)
1494 splay_tree_insert (ctx->field_map, key, (splay_tree_value) field);
1495 if ((mask & 2) && ctx->sfield_map)
1496 splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield);
1499 static tree
1500 install_var_local (tree var, omp_context *ctx)
1502 tree new_var = omp_copy_decl_1 (var, ctx);
1503 insert_decl_map (&ctx->cb, var, new_var);
1504 return new_var;
1507 /* Adjust the replacement for DECL in CTX for the new context. This means
1508 copying the DECL_VALUE_EXPR, and fixing up the type. */
1510 static void
1511 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
1513 tree new_decl, size;
1515 new_decl = lookup_decl (decl, ctx);
1517 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
1519 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
1520 && DECL_HAS_VALUE_EXPR_P (decl))
1522 tree ve = DECL_VALUE_EXPR (decl);
1523 walk_tree (&ve, copy_tree_body_r, &ctx->cb, NULL);
1524 SET_DECL_VALUE_EXPR (new_decl, ve);
1525 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
1528 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
1530 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
1531 if (size == error_mark_node)
1532 size = TYPE_SIZE (TREE_TYPE (new_decl));
1533 DECL_SIZE (new_decl) = size;
1535 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
1536 if (size == error_mark_node)
1537 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
1538 DECL_SIZE_UNIT (new_decl) = size;
1542 /* The callback for remap_decl. Search all containing contexts for a
1543 mapping of the variable; this avoids having to duplicate the splay
1544 tree ahead of time. We know a mapping doesn't already exist in the
1545 given context. Create new mappings to implement default semantics. */
1547 static tree
1548 omp_copy_decl (tree var, copy_body_data *cb)
1550 omp_context *ctx = (omp_context *) cb;
1551 tree new_var;
1553 if (TREE_CODE (var) == LABEL_DECL)
1555 new_var = create_artificial_label (DECL_SOURCE_LOCATION (var));
1556 DECL_CONTEXT (new_var) = current_function_decl;
1557 insert_decl_map (&ctx->cb, var, new_var);
1558 return new_var;
1561 while (!is_taskreg_ctx (ctx))
1563 ctx = ctx->outer;
1564 if (ctx == NULL)
1565 return var;
1566 new_var = maybe_lookup_decl (var, ctx);
1567 if (new_var)
1568 return new_var;
1571 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
1572 return var;
1574 return error_mark_node;
1578 /* Debugging dumps for parallel regions. */
1579 void dump_omp_region (FILE *, struct omp_region *, int);
1580 void debug_omp_region (struct omp_region *);
1581 void debug_all_omp_regions (void);
1583 /* Dump the parallel region tree rooted at REGION. */
1585 void
1586 dump_omp_region (FILE *file, struct omp_region *region, int indent)
1588 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
1589 gimple_code_name[region->type]);
1591 if (region->inner)
1592 dump_omp_region (file, region->inner, indent + 4);
1594 if (region->cont)
1596 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
1597 region->cont->index);
1600 if (region->exit)
1601 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
1602 region->exit->index);
1603 else
1604 fprintf (file, "%*s[no exit marker]\n", indent, "");
1606 if (region->next)
1607 dump_omp_region (file, region->next, indent);
1610 DEBUG_FUNCTION void
1611 debug_omp_region (struct omp_region *region)
1613 dump_omp_region (stderr, region, 0);
1616 DEBUG_FUNCTION void
1617 debug_all_omp_regions (void)
1619 dump_omp_region (stderr, root_omp_region, 0);
1623 /* Create a new parallel region starting at STMT inside region PARENT. */
1625 static struct omp_region *
1626 new_omp_region (basic_block bb, enum gimple_code type,
1627 struct omp_region *parent)
1629 struct omp_region *region = XCNEW (struct omp_region);
1631 region->outer = parent;
1632 region->entry = bb;
1633 region->type = type;
1635 if (parent)
1637 /* This is a nested region. Add it to the list of inner
1638 regions in PARENT. */
1639 region->next = parent->inner;
1640 parent->inner = region;
1642 else
1644 /* This is a toplevel region. Add it to the list of toplevel
1645 regions in ROOT_OMP_REGION. */
1646 region->next = root_omp_region;
1647 root_omp_region = region;
1650 return region;
1653 /* Release the memory associated with the region tree rooted at REGION. */
1655 static void
1656 free_omp_region_1 (struct omp_region *region)
1658 struct omp_region *i, *n;
1660 for (i = region->inner; i ; i = n)
1662 n = i->next;
1663 free_omp_region_1 (i);
1666 free (region);
1669 /* Release the memory for the entire omp region tree. */
1671 void
1672 free_omp_regions (void)
1674 struct omp_region *r, *n;
1675 for (r = root_omp_region; r ; r = n)
1677 n = r->next;
1678 free_omp_region_1 (r);
1680 root_omp_region = NULL;
1684 /* Create a new context, with OUTER_CTX being the surrounding context. */
1686 static omp_context *
1687 new_omp_context (gimple *stmt, omp_context *outer_ctx)
1689 omp_context *ctx = XCNEW (omp_context);
1691 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
1692 (splay_tree_value) ctx);
1693 ctx->stmt = stmt;
1695 if (outer_ctx)
1697 ctx->outer = outer_ctx;
1698 ctx->cb = outer_ctx->cb;
1699 ctx->cb.block = NULL;
1700 ctx->depth = outer_ctx->depth + 1;
1702 else
1704 ctx->cb.src_fn = current_function_decl;
1705 ctx->cb.dst_fn = current_function_decl;
1706 ctx->cb.src_node = cgraph_node::get (current_function_decl);
1707 gcc_checking_assert (ctx->cb.src_node);
1708 ctx->cb.dst_node = ctx->cb.src_node;
1709 ctx->cb.src_cfun = cfun;
1710 ctx->cb.copy_decl = omp_copy_decl;
1711 ctx->cb.eh_lp_nr = 0;
1712 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
1713 ctx->depth = 1;
1716 ctx->cb.decl_map = new hash_map<tree, tree>;
1718 return ctx;
1721 static gimple_seq maybe_catch_exception (gimple_seq);
1723 /* Finalize task copyfn. */
1725 static void
1726 finalize_task_copyfn (gomp_task *task_stmt)
1728 struct function *child_cfun;
1729 tree child_fn;
1730 gimple_seq seq = NULL, new_seq;
1731 gbind *bind;
1733 child_fn = gimple_omp_task_copy_fn (task_stmt);
1734 if (child_fn == NULL_TREE)
1735 return;
1737 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1738 DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
1740 push_cfun (child_cfun);
1741 bind = gimplify_body (child_fn, false);
1742 gimple_seq_add_stmt (&seq, bind);
1743 new_seq = maybe_catch_exception (seq);
1744 if (new_seq != seq)
1746 bind = gimple_build_bind (NULL, new_seq, NULL);
1747 seq = NULL;
1748 gimple_seq_add_stmt (&seq, bind);
1750 gimple_set_body (child_fn, seq);
1751 pop_cfun ();
1753 /* Inform the callgraph about the new function. */
1754 cgraph_node *node = cgraph_node::get_create (child_fn);
1755 node->parallelized_function = 1;
1756 cgraph_node::add_new_function (child_fn, false);
1759 /* Destroy a omp_context data structures. Called through the splay tree
1760 value delete callback. */
1762 static void
1763 delete_omp_context (splay_tree_value value)
1765 omp_context *ctx = (omp_context *) value;
1767 delete ctx->cb.decl_map;
1769 if (ctx->field_map)
1770 splay_tree_delete (ctx->field_map);
1771 if (ctx->sfield_map)
1772 splay_tree_delete (ctx->sfield_map);
1774 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
1775 it produces corrupt debug information. */
1776 if (ctx->record_type)
1778 tree t;
1779 for (t = TYPE_FIELDS (ctx->record_type); t ; t = DECL_CHAIN (t))
1780 DECL_ABSTRACT_ORIGIN (t) = NULL;
1782 if (ctx->srecord_type)
1784 tree t;
1785 for (t = TYPE_FIELDS (ctx->srecord_type); t ; t = DECL_CHAIN (t))
1786 DECL_ABSTRACT_ORIGIN (t) = NULL;
1789 if (is_task_ctx (ctx))
1790 finalize_task_copyfn (as_a <gomp_task *> (ctx->stmt));
1792 XDELETE (ctx);
1795 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
1796 context. */
1798 static void
1799 fixup_child_record_type (omp_context *ctx)
1801 tree f, type = ctx->record_type;
1803 if (!ctx->receiver_decl)
1804 return;
1805 /* ??? It isn't sufficient to just call remap_type here, because
1806 variably_modified_type_p doesn't work the way we expect for
1807 record types. Testing each field for whether it needs remapping
1808 and creating a new record by hand works, however. */
1809 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
1810 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
1811 break;
1812 if (f)
1814 tree name, new_fields = NULL;
1816 type = lang_hooks.types.make_type (RECORD_TYPE);
1817 name = DECL_NAME (TYPE_NAME (ctx->record_type));
1818 name = build_decl (DECL_SOURCE_LOCATION (ctx->receiver_decl),
1819 TYPE_DECL, name, type);
1820 TYPE_NAME (type) = name;
1822 for (f = TYPE_FIELDS (ctx->record_type); f ; f = DECL_CHAIN (f))
1824 tree new_f = copy_node (f);
1825 DECL_CONTEXT (new_f) = type;
1826 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
1827 DECL_CHAIN (new_f) = new_fields;
1828 walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &ctx->cb, NULL);
1829 walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r,
1830 &ctx->cb, NULL);
1831 walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
1832 &ctx->cb, NULL);
1833 new_fields = new_f;
1835 /* Arrange to be able to look up the receiver field
1836 given the sender field. */
1837 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
1838 (splay_tree_value) new_f);
1840 TYPE_FIELDS (type) = nreverse (new_fields);
1841 layout_type (type);
1844 /* In a target region we never modify any of the pointers in *.omp_data_i,
1845 so attempt to help the optimizers. */
1846 if (is_gimple_omp_offloaded (ctx->stmt))
1847 type = build_qualified_type (type, TYPE_QUAL_CONST);
1849 TREE_TYPE (ctx->receiver_decl)
1850 = build_qualified_type (build_reference_type (type), TYPE_QUAL_RESTRICT);
1853 /* Instantiate decls as necessary in CTX to satisfy the data sharing
1854 specified by CLAUSES. If BASE_POINTERS_RESTRICT, install var field with
1855 restrict. */
1857 static void
1858 scan_sharing_clauses (tree clauses, omp_context *ctx,
1859 bool base_pointers_restrict = false)
1861 tree c, decl;
1862 bool scan_array_reductions = false;
1864 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1866 bool by_ref;
1868 switch (OMP_CLAUSE_CODE (c))
1870 case OMP_CLAUSE_PRIVATE:
1871 decl = OMP_CLAUSE_DECL (c);
1872 if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
1873 goto do_private;
1874 else if (!is_variable_sized (decl))
1875 install_var_local (decl, ctx);
1876 break;
1878 case OMP_CLAUSE_SHARED:
1879 decl = OMP_CLAUSE_DECL (c);
1880 /* Ignore shared directives in teams construct. */
1881 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
1883 /* Global variables don't need to be copied,
1884 the receiver side will use them directly. */
1885 tree odecl = maybe_lookup_decl_in_outer_ctx (decl, ctx);
1886 if (is_global_var (odecl))
1887 break;
1888 insert_decl_map (&ctx->cb, decl, odecl);
1889 break;
1891 gcc_assert (is_taskreg_ctx (ctx));
1892 gcc_assert (!COMPLETE_TYPE_P (TREE_TYPE (decl))
1893 || !is_variable_sized (decl));
1894 /* Global variables don't need to be copied,
1895 the receiver side will use them directly. */
1896 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
1897 break;
1898 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
1900 use_pointer_for_field (decl, ctx);
1901 break;
1903 by_ref = use_pointer_for_field (decl, NULL);
1904 if ((! TREE_READONLY (decl) && !OMP_CLAUSE_SHARED_READONLY (c))
1905 || TREE_ADDRESSABLE (decl)
1906 || by_ref
1907 || is_reference (decl))
1909 by_ref = use_pointer_for_field (decl, ctx);
1910 install_var_field (decl, by_ref, 3, ctx);
1911 install_var_local (decl, ctx);
1912 break;
1914 /* We don't need to copy const scalar vars back. */
1915 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
1916 goto do_private;
1918 case OMP_CLAUSE_REDUCTION:
1919 decl = OMP_CLAUSE_DECL (c);
1920 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1921 && TREE_CODE (decl) == MEM_REF)
1923 tree t = TREE_OPERAND (decl, 0);
1924 if (TREE_CODE (t) == POINTER_PLUS_EXPR)
1925 t = TREE_OPERAND (t, 0);
1926 if (TREE_CODE (t) == INDIRECT_REF
1927 || TREE_CODE (t) == ADDR_EXPR)
1928 t = TREE_OPERAND (t, 0);
1929 install_var_local (t, ctx);
1930 if (is_taskreg_ctx (ctx)
1931 && !is_global_var (maybe_lookup_decl_in_outer_ctx (t, ctx))
1932 && !is_variable_sized (t))
1934 by_ref = use_pointer_for_field (t, ctx);
1935 install_var_field (t, by_ref, 3, ctx);
1937 break;
1939 goto do_private;
1941 case OMP_CLAUSE_LASTPRIVATE:
1942 /* Let the corresponding firstprivate clause create
1943 the variable. */
1944 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1945 break;
1946 /* FALLTHRU */
1948 case OMP_CLAUSE_FIRSTPRIVATE:
1949 case OMP_CLAUSE_LINEAR:
1950 decl = OMP_CLAUSE_DECL (c);
1951 do_private:
1952 if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
1953 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
1954 && is_gimple_omp_offloaded (ctx->stmt))
1956 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
1957 install_var_field (decl, !is_reference (decl), 3, ctx);
1958 else if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
1959 install_var_field (decl, true, 3, ctx);
1960 else
1961 install_var_field (decl, false, 3, ctx);
1963 if (is_variable_sized (decl))
1965 if (is_task_ctx (ctx))
1966 install_var_field (decl, false, 1, ctx);
1967 break;
1969 else if (is_taskreg_ctx (ctx))
1971 bool global
1972 = is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx));
1973 by_ref = use_pointer_for_field (decl, NULL);
1975 if (is_task_ctx (ctx)
1976 && (global || by_ref || is_reference (decl)))
1978 install_var_field (decl, false, 1, ctx);
1979 if (!global)
1980 install_var_field (decl, by_ref, 2, ctx);
1982 else if (!global)
1983 install_var_field (decl, by_ref, 3, ctx);
1985 install_var_local (decl, ctx);
1986 break;
1988 case OMP_CLAUSE_USE_DEVICE_PTR:
1989 decl = OMP_CLAUSE_DECL (c);
1990 if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
1991 install_var_field (decl, true, 3, ctx);
1992 else
1993 install_var_field (decl, false, 3, ctx);
1994 if (DECL_SIZE (decl)
1995 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
1997 tree decl2 = DECL_VALUE_EXPR (decl);
1998 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
1999 decl2 = TREE_OPERAND (decl2, 0);
2000 gcc_assert (DECL_P (decl2));
2001 install_var_local (decl2, ctx);
2003 install_var_local (decl, ctx);
2004 break;
2006 case OMP_CLAUSE_IS_DEVICE_PTR:
2007 decl = OMP_CLAUSE_DECL (c);
2008 goto do_private;
2010 case OMP_CLAUSE__LOOPTEMP_:
2011 gcc_assert (is_taskreg_ctx (ctx));
2012 decl = OMP_CLAUSE_DECL (c);
2013 install_var_field (decl, false, 3, ctx);
2014 install_var_local (decl, ctx);
2015 break;
2017 case OMP_CLAUSE_COPYPRIVATE:
2018 case OMP_CLAUSE_COPYIN:
2019 decl = OMP_CLAUSE_DECL (c);
2020 by_ref = use_pointer_for_field (decl, NULL);
2021 install_var_field (decl, by_ref, 3, ctx);
2022 break;
2024 case OMP_CLAUSE_DEFAULT:
2025 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
2026 break;
2028 case OMP_CLAUSE_FINAL:
2029 case OMP_CLAUSE_IF:
2030 case OMP_CLAUSE_NUM_THREADS:
2031 case OMP_CLAUSE_NUM_TEAMS:
2032 case OMP_CLAUSE_THREAD_LIMIT:
2033 case OMP_CLAUSE_DEVICE:
2034 case OMP_CLAUSE_SCHEDULE:
2035 case OMP_CLAUSE_DIST_SCHEDULE:
2036 case OMP_CLAUSE_DEPEND:
2037 case OMP_CLAUSE_PRIORITY:
2038 case OMP_CLAUSE_GRAINSIZE:
2039 case OMP_CLAUSE_NUM_TASKS:
2040 case OMP_CLAUSE__CILK_FOR_COUNT_:
2041 case OMP_CLAUSE_NUM_GANGS:
2042 case OMP_CLAUSE_NUM_WORKERS:
2043 case OMP_CLAUSE_VECTOR_LENGTH:
2044 if (ctx->outer)
2045 scan_omp_op (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
2046 break;
2048 case OMP_CLAUSE_TO:
2049 case OMP_CLAUSE_FROM:
2050 case OMP_CLAUSE_MAP:
2051 if (ctx->outer)
2052 scan_omp_op (&OMP_CLAUSE_SIZE (c), ctx->outer);
2053 decl = OMP_CLAUSE_DECL (c);
2054 /* Global variables with "omp declare target" attribute
2055 don't need to be copied, the receiver side will use them
2056 directly. However, global variables with "omp declare target link"
2057 attribute need to be copied. */
2058 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2059 && DECL_P (decl)
2060 && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
2061 && (OMP_CLAUSE_MAP_KIND (c)
2062 != GOMP_MAP_FIRSTPRIVATE_REFERENCE))
2063 || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2064 && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
2065 && varpool_node::get_create (decl)->offloadable
2066 && !lookup_attribute ("omp declare target link",
2067 DECL_ATTRIBUTES (decl)))
2068 break;
2069 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2070 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER)
2072 /* Ignore GOMP_MAP_POINTER kind for arrays in regions that are
2073 not offloaded; there is nothing to map for those. */
2074 if (!is_gimple_omp_offloaded (ctx->stmt)
2075 && !POINTER_TYPE_P (TREE_TYPE (decl))
2076 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
2077 break;
2079 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2080 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
2081 || (OMP_CLAUSE_MAP_KIND (c)
2082 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
2084 if (TREE_CODE (decl) == COMPONENT_REF
2085 || (TREE_CODE (decl) == INDIRECT_REF
2086 && TREE_CODE (TREE_OPERAND (decl, 0)) == COMPONENT_REF
2087 && (TREE_CODE (TREE_TYPE (TREE_OPERAND (decl, 0)))
2088 == REFERENCE_TYPE)))
2089 break;
2090 if (DECL_SIZE (decl)
2091 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2093 tree decl2 = DECL_VALUE_EXPR (decl);
2094 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2095 decl2 = TREE_OPERAND (decl2, 0);
2096 gcc_assert (DECL_P (decl2));
2097 install_var_local (decl2, ctx);
2099 install_var_local (decl, ctx);
2100 break;
2102 if (DECL_P (decl))
2104 if (DECL_SIZE (decl)
2105 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2107 tree decl2 = DECL_VALUE_EXPR (decl);
2108 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2109 decl2 = TREE_OPERAND (decl2, 0);
2110 gcc_assert (DECL_P (decl2));
2111 install_var_field (decl2, true, 3, ctx);
2112 install_var_local (decl2, ctx);
2113 install_var_local (decl, ctx);
2115 else
2117 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2118 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
2119 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
2120 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2121 install_var_field (decl, true, 7, ctx);
2122 else
2123 install_var_field (decl, true, 3, ctx,
2124 base_pointers_restrict);
2125 if (is_gimple_omp_offloaded (ctx->stmt)
2126 && !OMP_CLAUSE_MAP_IN_REDUCTION (c))
2127 install_var_local (decl, ctx);
2130 else
2132 tree base = get_base_address (decl);
2133 tree nc = OMP_CLAUSE_CHAIN (c);
2134 if (DECL_P (base)
2135 && nc != NULL_TREE
2136 && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP
2137 && OMP_CLAUSE_DECL (nc) == base
2138 && OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_POINTER
2139 && integer_zerop (OMP_CLAUSE_SIZE (nc)))
2141 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) = 1;
2142 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (nc) = 1;
2144 else
2146 if (ctx->outer)
2148 scan_omp_op (&OMP_CLAUSE_DECL (c), ctx->outer);
2149 decl = OMP_CLAUSE_DECL (c);
2151 gcc_assert (!splay_tree_lookup (ctx->field_map,
2152 (splay_tree_key) decl));
2153 tree field
2154 = build_decl (OMP_CLAUSE_LOCATION (c),
2155 FIELD_DECL, NULL_TREE, ptr_type_node);
2156 DECL_ALIGN (field) = TYPE_ALIGN (ptr_type_node);
2157 insert_field_into_struct (ctx->record_type, field);
2158 splay_tree_insert (ctx->field_map, (splay_tree_key) decl,
2159 (splay_tree_value) field);
2162 break;
2164 case OMP_CLAUSE__GRIDDIM_:
2165 if (ctx->outer)
2167 scan_omp_op (&OMP_CLAUSE__GRIDDIM__SIZE (c), ctx->outer);
2168 scan_omp_op (&OMP_CLAUSE__GRIDDIM__GROUP (c), ctx->outer);
2170 break;
2172 case OMP_CLAUSE_NOWAIT:
2173 case OMP_CLAUSE_ORDERED:
2174 case OMP_CLAUSE_COLLAPSE:
2175 case OMP_CLAUSE_UNTIED:
2176 case OMP_CLAUSE_MERGEABLE:
2177 case OMP_CLAUSE_PROC_BIND:
2178 case OMP_CLAUSE_SAFELEN:
2179 case OMP_CLAUSE_SIMDLEN:
2180 case OMP_CLAUSE_THREADS:
2181 case OMP_CLAUSE_SIMD:
2182 case OMP_CLAUSE_NOGROUP:
2183 case OMP_CLAUSE_DEFAULTMAP:
2184 case OMP_CLAUSE_ASYNC:
2185 case OMP_CLAUSE_WAIT:
2186 case OMP_CLAUSE_GANG:
2187 case OMP_CLAUSE_WORKER:
2188 case OMP_CLAUSE_VECTOR:
2189 case OMP_CLAUSE_TILE:
2190 case OMP_CLAUSE_INDEPENDENT:
2191 case OMP_CLAUSE_AUTO:
2192 case OMP_CLAUSE_SEQ:
2193 break;
2195 case OMP_CLAUSE_ALIGNED:
2196 decl = OMP_CLAUSE_DECL (c);
2197 if (is_global_var (decl)
2198 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2199 install_var_local (decl, ctx);
2200 break;
2202 case OMP_CLAUSE_DEVICE_RESIDENT:
2203 case OMP_CLAUSE__CACHE_:
2204 sorry ("Clause not supported yet");
2205 break;
2207 default:
2208 gcc_unreachable ();
2212 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
2214 switch (OMP_CLAUSE_CODE (c))
2216 case OMP_CLAUSE_LASTPRIVATE:
2217 /* Let the corresponding firstprivate clause create
2218 the variable. */
2219 if (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
2220 scan_array_reductions = true;
2221 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
2222 break;
2223 /* FALLTHRU */
2225 case OMP_CLAUSE_FIRSTPRIVATE:
2226 case OMP_CLAUSE_PRIVATE:
2227 case OMP_CLAUSE_LINEAR:
2228 case OMP_CLAUSE_IS_DEVICE_PTR:
2229 decl = OMP_CLAUSE_DECL (c);
2230 if (is_variable_sized (decl))
2232 if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
2233 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
2234 && is_gimple_omp_offloaded (ctx->stmt))
2236 tree decl2 = DECL_VALUE_EXPR (decl);
2237 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2238 decl2 = TREE_OPERAND (decl2, 0);
2239 gcc_assert (DECL_P (decl2));
2240 install_var_local (decl2, ctx);
2241 fixup_remapped_decl (decl2, ctx, false);
2243 install_var_local (decl, ctx);
2245 fixup_remapped_decl (decl, ctx,
2246 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
2247 && OMP_CLAUSE_PRIVATE_DEBUG (c));
2248 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2249 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
2250 scan_array_reductions = true;
2251 break;
2253 case OMP_CLAUSE_REDUCTION:
2254 decl = OMP_CLAUSE_DECL (c);
2255 if (TREE_CODE (decl) != MEM_REF)
2257 if (is_variable_sized (decl))
2258 install_var_local (decl, ctx);
2259 fixup_remapped_decl (decl, ctx, false);
2261 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
2262 scan_array_reductions = true;
2263 break;
2265 case OMP_CLAUSE_SHARED:
2266 /* Ignore shared directives in teams construct. */
2267 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
2268 break;
2269 decl = OMP_CLAUSE_DECL (c);
2270 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
2271 break;
2272 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
2274 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
2275 ctx->outer)))
2276 break;
2277 bool by_ref = use_pointer_for_field (decl, ctx);
2278 install_var_field (decl, by_ref, 11, ctx);
2279 break;
2281 fixup_remapped_decl (decl, ctx, false);
2282 break;
2284 case OMP_CLAUSE_MAP:
2285 if (!is_gimple_omp_offloaded (ctx->stmt))
2286 break;
2287 decl = OMP_CLAUSE_DECL (c);
2288 if (DECL_P (decl)
2289 && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
2290 && (OMP_CLAUSE_MAP_KIND (c)
2291 != GOMP_MAP_FIRSTPRIVATE_REFERENCE))
2292 || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2293 && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
2294 && varpool_node::get_create (decl)->offloadable)
2295 break;
2296 if (DECL_P (decl))
2298 if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
2299 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
2300 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE
2301 && !COMPLETE_TYPE_P (TREE_TYPE (decl)))
2303 tree new_decl = lookup_decl (decl, ctx);
2304 TREE_TYPE (new_decl)
2305 = remap_type (TREE_TYPE (decl), &ctx->cb);
2307 else if (DECL_SIZE (decl)
2308 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2310 tree decl2 = DECL_VALUE_EXPR (decl);
2311 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2312 decl2 = TREE_OPERAND (decl2, 0);
2313 gcc_assert (DECL_P (decl2));
2314 fixup_remapped_decl (decl2, ctx, false);
2315 fixup_remapped_decl (decl, ctx, true);
2317 else
2318 fixup_remapped_decl (decl, ctx, false);
2320 break;
2322 case OMP_CLAUSE_COPYPRIVATE:
2323 case OMP_CLAUSE_COPYIN:
2324 case OMP_CLAUSE_DEFAULT:
2325 case OMP_CLAUSE_IF:
2326 case OMP_CLAUSE_NUM_THREADS:
2327 case OMP_CLAUSE_NUM_TEAMS:
2328 case OMP_CLAUSE_THREAD_LIMIT:
2329 case OMP_CLAUSE_DEVICE:
2330 case OMP_CLAUSE_SCHEDULE:
2331 case OMP_CLAUSE_DIST_SCHEDULE:
2332 case OMP_CLAUSE_NOWAIT:
2333 case OMP_CLAUSE_ORDERED:
2334 case OMP_CLAUSE_COLLAPSE:
2335 case OMP_CLAUSE_UNTIED:
2336 case OMP_CLAUSE_FINAL:
2337 case OMP_CLAUSE_MERGEABLE:
2338 case OMP_CLAUSE_PROC_BIND:
2339 case OMP_CLAUSE_SAFELEN:
2340 case OMP_CLAUSE_SIMDLEN:
2341 case OMP_CLAUSE_ALIGNED:
2342 case OMP_CLAUSE_DEPEND:
2343 case OMP_CLAUSE__LOOPTEMP_:
2344 case OMP_CLAUSE_TO:
2345 case OMP_CLAUSE_FROM:
2346 case OMP_CLAUSE_PRIORITY:
2347 case OMP_CLAUSE_GRAINSIZE:
2348 case OMP_CLAUSE_NUM_TASKS:
2349 case OMP_CLAUSE_THREADS:
2350 case OMP_CLAUSE_SIMD:
2351 case OMP_CLAUSE_NOGROUP:
2352 case OMP_CLAUSE_DEFAULTMAP:
2353 case OMP_CLAUSE_USE_DEVICE_PTR:
2354 case OMP_CLAUSE__CILK_FOR_COUNT_:
2355 case OMP_CLAUSE_ASYNC:
2356 case OMP_CLAUSE_WAIT:
2357 case OMP_CLAUSE_NUM_GANGS:
2358 case OMP_CLAUSE_NUM_WORKERS:
2359 case OMP_CLAUSE_VECTOR_LENGTH:
2360 case OMP_CLAUSE_GANG:
2361 case OMP_CLAUSE_WORKER:
2362 case OMP_CLAUSE_VECTOR:
2363 case OMP_CLAUSE_TILE:
2364 case OMP_CLAUSE_INDEPENDENT:
2365 case OMP_CLAUSE_AUTO:
2366 case OMP_CLAUSE_SEQ:
2367 case OMP_CLAUSE__GRIDDIM_:
2368 break;
2370 case OMP_CLAUSE_DEVICE_RESIDENT:
2371 case OMP_CLAUSE__CACHE_:
2372 sorry ("Clause not supported yet");
2373 break;
2375 default:
2376 gcc_unreachable ();
2380 gcc_checking_assert (!scan_array_reductions
2381 || !is_gimple_omp_oacc (ctx->stmt));
2382 if (scan_array_reductions)
2384 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
2385 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
2386 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
2388 scan_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c), ctx);
2389 scan_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
2391 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
2392 && OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
2393 scan_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
2394 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2395 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
2396 scan_omp (&OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c), ctx);
2400 /* Create a new name for omp child function. Returns an identifier. If
2401 IS_CILK_FOR is true then the suffix for the child function is
2402 "_cilk_for_fn." */
2404 static tree
2405 create_omp_child_function_name (bool task_copy, bool is_cilk_for)
2407 if (is_cilk_for)
2408 return clone_function_name (current_function_decl, "_cilk_for_fn");
2409 return clone_function_name (current_function_decl,
2410 task_copy ? "_omp_cpyfn" : "_omp_fn");
2413 /* Returns the type of the induction variable for the child function for
2414 _Cilk_for and the types for _high and _low variables based on TYPE. */
2416 static tree
2417 cilk_for_check_loop_diff_type (tree type)
2419 if (TYPE_PRECISION (type) <= TYPE_PRECISION (uint32_type_node))
2421 if (TYPE_UNSIGNED (type))
2422 return uint32_type_node;
2423 else
2424 return integer_type_node;
2426 else
2428 if (TYPE_UNSIGNED (type))
2429 return uint64_type_node;
2430 else
2431 return long_long_integer_type_node;
2435 /* Build a decl for the omp child function. It'll not contain a body
2436 yet, just the bare decl. */
2438 static void
2439 create_omp_child_function (omp_context *ctx, bool task_copy)
2441 tree decl, type, name, t;
2443 tree cilk_for_count
2444 = (flag_cilkplus && gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
2445 ? find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
2446 OMP_CLAUSE__CILK_FOR_COUNT_) : NULL_TREE;
2447 tree cilk_var_type = NULL_TREE;
2449 name = create_omp_child_function_name (task_copy,
2450 cilk_for_count != NULL_TREE);
2451 if (task_copy)
2452 type = build_function_type_list (void_type_node, ptr_type_node,
2453 ptr_type_node, NULL_TREE);
2454 else if (cilk_for_count)
2456 type = TREE_TYPE (OMP_CLAUSE_OPERAND (cilk_for_count, 0));
2457 cilk_var_type = cilk_for_check_loop_diff_type (type);
2458 type = build_function_type_list (void_type_node, ptr_type_node,
2459 cilk_var_type, cilk_var_type, NULL_TREE);
2461 else
2462 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2464 decl = build_decl (gimple_location (ctx->stmt), FUNCTION_DECL, name, type);
2466 gcc_checking_assert (!is_gimple_omp_oacc (ctx->stmt)
2467 || !task_copy);
2468 if (!task_copy)
2469 ctx->cb.dst_fn = decl;
2470 else
2471 gimple_omp_task_set_copy_fn (ctx->stmt, decl);
2473 TREE_STATIC (decl) = 1;
2474 TREE_USED (decl) = 1;
2475 DECL_ARTIFICIAL (decl) = 1;
2476 DECL_IGNORED_P (decl) = 0;
2477 TREE_PUBLIC (decl) = 0;
2478 DECL_UNINLINABLE (decl) = 1;
2479 DECL_EXTERNAL (decl) = 0;
2480 DECL_CONTEXT (decl) = NULL_TREE;
2481 DECL_INITIAL (decl) = make_node (BLOCK);
2482 if (cgraph_node::get (current_function_decl)->offloadable)
2483 cgraph_node::get_create (decl)->offloadable = 1;
2484 else
2486 omp_context *octx;
2487 for (octx = ctx; octx; octx = octx->outer)
2488 if (is_gimple_omp_offloaded (octx->stmt))
2490 cgraph_node::get_create (decl)->offloadable = 1;
2491 if (ENABLE_OFFLOADING)
2492 g->have_offload = true;
2494 break;
2498 if (cgraph_node::get_create (decl)->offloadable
2499 && !lookup_attribute ("omp declare target",
2500 DECL_ATTRIBUTES (current_function_decl)))
2501 DECL_ATTRIBUTES (decl)
2502 = tree_cons (get_identifier ("omp target entrypoint"),
2503 NULL_TREE, DECL_ATTRIBUTES (decl));
2505 t = build_decl (DECL_SOURCE_LOCATION (decl),
2506 RESULT_DECL, NULL_TREE, void_type_node);
2507 DECL_ARTIFICIAL (t) = 1;
2508 DECL_IGNORED_P (t) = 1;
2509 DECL_CONTEXT (t) = decl;
2510 DECL_RESULT (decl) = t;
2512 /* _Cilk_for's child function requires two extra parameters called
2513 __low and __high that are set the by Cilk runtime when it calls this
2514 function. */
2515 if (cilk_for_count)
2517 t = build_decl (DECL_SOURCE_LOCATION (decl),
2518 PARM_DECL, get_identifier ("__high"), cilk_var_type);
2519 DECL_ARTIFICIAL (t) = 1;
2520 DECL_NAMELESS (t) = 1;
2521 DECL_ARG_TYPE (t) = ptr_type_node;
2522 DECL_CONTEXT (t) = current_function_decl;
2523 TREE_USED (t) = 1;
2524 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2525 DECL_ARGUMENTS (decl) = t;
2527 t = build_decl (DECL_SOURCE_LOCATION (decl),
2528 PARM_DECL, get_identifier ("__low"), cilk_var_type);
2529 DECL_ARTIFICIAL (t) = 1;
2530 DECL_NAMELESS (t) = 1;
2531 DECL_ARG_TYPE (t) = ptr_type_node;
2532 DECL_CONTEXT (t) = current_function_decl;
2533 TREE_USED (t) = 1;
2534 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2535 DECL_ARGUMENTS (decl) = t;
2538 tree data_name = get_identifier (".omp_data_i");
2539 t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
2540 ptr_type_node);
2541 DECL_ARTIFICIAL (t) = 1;
2542 DECL_NAMELESS (t) = 1;
2543 DECL_ARG_TYPE (t) = ptr_type_node;
2544 DECL_CONTEXT (t) = current_function_decl;
2545 TREE_USED (t) = 1;
2546 TREE_READONLY (t) = 1;
2547 if (cilk_for_count)
2548 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2549 DECL_ARGUMENTS (decl) = t;
2550 if (!task_copy)
2551 ctx->receiver_decl = t;
2552 else
2554 t = build_decl (DECL_SOURCE_LOCATION (decl),
2555 PARM_DECL, get_identifier (".omp_data_o"),
2556 ptr_type_node);
2557 DECL_ARTIFICIAL (t) = 1;
2558 DECL_NAMELESS (t) = 1;
2559 DECL_ARG_TYPE (t) = ptr_type_node;
2560 DECL_CONTEXT (t) = current_function_decl;
2561 TREE_USED (t) = 1;
2562 TREE_ADDRESSABLE (t) = 1;
2563 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2564 DECL_ARGUMENTS (decl) = t;
2567 /* Allocate memory for the function structure. The call to
2568 allocate_struct_function clobbers CFUN, so we need to restore
2569 it afterward. */
2570 push_struct_function (decl);
2571 cfun->function_end_locus = gimple_location (ctx->stmt);
2572 pop_cfun ();
2575 /* Callback for walk_gimple_seq. Check if combined parallel
2576 contains gimple_omp_for_combined_into_p OMP_FOR. */
2578 static tree
2579 find_combined_for (gimple_stmt_iterator *gsi_p,
2580 bool *handled_ops_p,
2581 struct walk_stmt_info *wi)
2583 gimple *stmt = gsi_stmt (*gsi_p);
2585 *handled_ops_p = true;
2586 switch (gimple_code (stmt))
2588 WALK_SUBSTMTS;
2590 case GIMPLE_OMP_FOR:
2591 if (gimple_omp_for_combined_into_p (stmt)
2592 && gimple_omp_for_kind (stmt)
2593 == *(const enum gf_mask *) (wi->info))
2595 wi->info = stmt;
2596 return integer_zero_node;
2598 break;
2599 default:
2600 break;
2602 return NULL;
2605 /* Add _LOOPTEMP_ clauses on OpenMP parallel or task. */
2607 static void
2608 add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt,
2609 omp_context *outer_ctx)
2611 struct walk_stmt_info wi;
2613 memset (&wi, 0, sizeof (wi));
2614 wi.val_only = true;
2615 wi.info = (void *) &msk;
2616 walk_gimple_seq (gimple_omp_body (stmt), find_combined_for, NULL, &wi);
2617 if (wi.info != (void *) &msk)
2619 gomp_for *for_stmt = as_a <gomp_for *> ((gimple *) wi.info);
2620 struct omp_for_data fd;
2621 extract_omp_for_data (for_stmt, &fd, NULL);
2622 /* We need two temporaries with fd.loop.v type (istart/iend)
2623 and then (fd.collapse - 1) temporaries with the same
2624 type for count2 ... countN-1 vars if not constant. */
2625 size_t count = 2, i;
2626 tree type = fd.iter_type;
2627 if (fd.collapse > 1
2628 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
2630 count += fd.collapse - 1;
2631 /* If there are lastprivate clauses on the inner
2632 GIMPLE_OMP_FOR, add one more temporaries for the total number
2633 of iterations (product of count1 ... countN-1). */
2634 if (find_omp_clause (gimple_omp_for_clauses (for_stmt),
2635 OMP_CLAUSE_LASTPRIVATE))
2636 count++;
2637 else if (msk == GF_OMP_FOR_KIND_FOR
2638 && find_omp_clause (gimple_omp_parallel_clauses (stmt),
2639 OMP_CLAUSE_LASTPRIVATE))
2640 count++;
2642 for (i = 0; i < count; i++)
2644 tree temp = create_tmp_var (type);
2645 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
2646 insert_decl_map (&outer_ctx->cb, temp, temp);
2647 OMP_CLAUSE_DECL (c) = temp;
2648 OMP_CLAUSE_CHAIN (c) = gimple_omp_taskreg_clauses (stmt);
2649 gimple_omp_taskreg_set_clauses (stmt, c);
2654 /* Scan an OpenMP parallel directive. */
2656 static void
2657 scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
2659 omp_context *ctx;
2660 tree name;
2661 gomp_parallel *stmt = as_a <gomp_parallel *> (gsi_stmt (*gsi));
2663 /* Ignore parallel directives with empty bodies, unless there
2664 are copyin clauses. */
2665 if (optimize > 0
2666 && empty_body_p (gimple_omp_body (stmt))
2667 && find_omp_clause (gimple_omp_parallel_clauses (stmt),
2668 OMP_CLAUSE_COPYIN) == NULL)
2670 gsi_replace (gsi, gimple_build_nop (), false);
2671 return;
2674 if (gimple_omp_parallel_combined_p (stmt))
2675 add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_FOR, stmt, outer_ctx);
2677 ctx = new_omp_context (stmt, outer_ctx);
2678 taskreg_contexts.safe_push (ctx);
2679 if (taskreg_nesting_level > 1)
2680 ctx->is_nested = true;
2681 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
2682 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
2683 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
2684 name = create_tmp_var_name (".omp_data_s");
2685 name = build_decl (gimple_location (stmt),
2686 TYPE_DECL, name, ctx->record_type);
2687 DECL_ARTIFICIAL (name) = 1;
2688 DECL_NAMELESS (name) = 1;
2689 TYPE_NAME (ctx->record_type) = name;
2690 TYPE_ARTIFICIAL (ctx->record_type) = 1;
2691 if (!gimple_omp_parallel_grid_phony (stmt))
2693 create_omp_child_function (ctx, false);
2694 gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
2697 scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx);
2698 scan_omp (gimple_omp_body_ptr (stmt), ctx);
2700 if (TYPE_FIELDS (ctx->record_type) == NULL)
2701 ctx->record_type = ctx->receiver_decl = NULL;
2704 /* Scan an OpenMP task directive. */
2706 static void
2707 scan_omp_task (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
2709 omp_context *ctx;
2710 tree name, t;
2711 gomp_task *stmt = as_a <gomp_task *> (gsi_stmt (*gsi));
2713 /* Ignore task directives with empty bodies. */
2714 if (optimize > 0
2715 && empty_body_p (gimple_omp_body (stmt)))
2717 gsi_replace (gsi, gimple_build_nop (), false);
2718 return;
2721 if (gimple_omp_task_taskloop_p (stmt))
2722 add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_TASKLOOP, stmt, outer_ctx);
2724 ctx = new_omp_context (stmt, outer_ctx);
2725 taskreg_contexts.safe_push (ctx);
2726 if (taskreg_nesting_level > 1)
2727 ctx->is_nested = true;
2728 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
2729 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
2730 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
2731 name = create_tmp_var_name (".omp_data_s");
2732 name = build_decl (gimple_location (stmt),
2733 TYPE_DECL, name, ctx->record_type);
2734 DECL_ARTIFICIAL (name) = 1;
2735 DECL_NAMELESS (name) = 1;
2736 TYPE_NAME (ctx->record_type) = name;
2737 TYPE_ARTIFICIAL (ctx->record_type) = 1;
2738 create_omp_child_function (ctx, false);
2739 gimple_omp_task_set_child_fn (stmt, ctx->cb.dst_fn);
2741 scan_sharing_clauses (gimple_omp_task_clauses (stmt), ctx);
2743 if (ctx->srecord_type)
2745 name = create_tmp_var_name (".omp_data_a");
2746 name = build_decl (gimple_location (stmt),
2747 TYPE_DECL, name, ctx->srecord_type);
2748 DECL_ARTIFICIAL (name) = 1;
2749 DECL_NAMELESS (name) = 1;
2750 TYPE_NAME (ctx->srecord_type) = name;
2751 TYPE_ARTIFICIAL (ctx->srecord_type) = 1;
2752 create_omp_child_function (ctx, true);
2755 scan_omp (gimple_omp_body_ptr (stmt), ctx);
2757 if (TYPE_FIELDS (ctx->record_type) == NULL)
2759 ctx->record_type = ctx->receiver_decl = NULL;
2760 t = build_int_cst (long_integer_type_node, 0);
2761 gimple_omp_task_set_arg_size (stmt, t);
2762 t = build_int_cst (long_integer_type_node, 1);
2763 gimple_omp_task_set_arg_align (stmt, t);
2768 /* If any decls have been made addressable during scan_omp,
2769 adjust their fields if needed, and layout record types
2770 of parallel/task constructs. */
2772 static void
2773 finish_taskreg_scan (omp_context *ctx)
2775 if (ctx->record_type == NULL_TREE)
2776 return;
2778 /* If any task_shared_vars were needed, verify all
2779 OMP_CLAUSE_SHARED clauses on GIMPLE_OMP_{PARALLEL,TASK}
2780 statements if use_pointer_for_field hasn't changed
2781 because of that. If it did, update field types now. */
2782 if (task_shared_vars)
2784 tree c;
2786 for (c = gimple_omp_taskreg_clauses (ctx->stmt);
2787 c; c = OMP_CLAUSE_CHAIN (c))
2788 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
2789 && !OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
2791 tree decl = OMP_CLAUSE_DECL (c);
2793 /* Global variables don't need to be copied,
2794 the receiver side will use them directly. */
2795 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
2796 continue;
2797 if (!bitmap_bit_p (task_shared_vars, DECL_UID (decl))
2798 || !use_pointer_for_field (decl, ctx))
2799 continue;
2800 tree field = lookup_field (decl, ctx);
2801 if (TREE_CODE (TREE_TYPE (field)) == POINTER_TYPE
2802 && TREE_TYPE (TREE_TYPE (field)) == TREE_TYPE (decl))
2803 continue;
2804 TREE_TYPE (field) = build_pointer_type (TREE_TYPE (decl));
2805 TREE_THIS_VOLATILE (field) = 0;
2806 DECL_USER_ALIGN (field) = 0;
2807 DECL_ALIGN (field) = TYPE_ALIGN (TREE_TYPE (field));
2808 if (TYPE_ALIGN (ctx->record_type) < DECL_ALIGN (field))
2809 TYPE_ALIGN (ctx->record_type) = DECL_ALIGN (field);
2810 if (ctx->srecord_type)
2812 tree sfield = lookup_sfield (decl, ctx);
2813 TREE_TYPE (sfield) = TREE_TYPE (field);
2814 TREE_THIS_VOLATILE (sfield) = 0;
2815 DECL_USER_ALIGN (sfield) = 0;
2816 DECL_ALIGN (sfield) = DECL_ALIGN (field);
2817 if (TYPE_ALIGN (ctx->srecord_type) < DECL_ALIGN (sfield))
2818 TYPE_ALIGN (ctx->srecord_type) = DECL_ALIGN (sfield);
2823 if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
2825 layout_type (ctx->record_type);
2826 fixup_child_record_type (ctx);
2828 else
2830 location_t loc = gimple_location (ctx->stmt);
2831 tree *p, vla_fields = NULL_TREE, *q = &vla_fields;
2832 /* Move VLA fields to the end. */
2833 p = &TYPE_FIELDS (ctx->record_type);
2834 while (*p)
2835 if (!TYPE_SIZE_UNIT (TREE_TYPE (*p))
2836 || ! TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (*p))))
2838 *q = *p;
2839 *p = TREE_CHAIN (*p);
2840 TREE_CHAIN (*q) = NULL_TREE;
2841 q = &TREE_CHAIN (*q);
2843 else
2844 p = &DECL_CHAIN (*p);
2845 *p = vla_fields;
2846 if (gimple_omp_task_taskloop_p (ctx->stmt))
2848 /* Move fields corresponding to first and second _looptemp_
2849 clause first. There are filled by GOMP_taskloop
2850 and thus need to be in specific positions. */
2851 tree c1 = gimple_omp_task_clauses (ctx->stmt);
2852 c1 = find_omp_clause (c1, OMP_CLAUSE__LOOPTEMP_);
2853 tree c2 = find_omp_clause (OMP_CLAUSE_CHAIN (c1),
2854 OMP_CLAUSE__LOOPTEMP_);
2855 tree f1 = lookup_field (OMP_CLAUSE_DECL (c1), ctx);
2856 tree f2 = lookup_field (OMP_CLAUSE_DECL (c2), ctx);
2857 p = &TYPE_FIELDS (ctx->record_type);
2858 while (*p)
2859 if (*p == f1 || *p == f2)
2860 *p = DECL_CHAIN (*p);
2861 else
2862 p = &DECL_CHAIN (*p);
2863 DECL_CHAIN (f1) = f2;
2864 DECL_CHAIN (f2) = TYPE_FIELDS (ctx->record_type);
2865 TYPE_FIELDS (ctx->record_type) = f1;
2866 if (ctx->srecord_type)
2868 f1 = lookup_sfield (OMP_CLAUSE_DECL (c1), ctx);
2869 f2 = lookup_sfield (OMP_CLAUSE_DECL (c2), ctx);
2870 p = &TYPE_FIELDS (ctx->srecord_type);
2871 while (*p)
2872 if (*p == f1 || *p == f2)
2873 *p = DECL_CHAIN (*p);
2874 else
2875 p = &DECL_CHAIN (*p);
2876 DECL_CHAIN (f1) = f2;
2877 DECL_CHAIN (f2) = TYPE_FIELDS (ctx->srecord_type);
2878 TYPE_FIELDS (ctx->srecord_type) = f1;
2881 layout_type (ctx->record_type);
2882 fixup_child_record_type (ctx);
2883 if (ctx->srecord_type)
2884 layout_type (ctx->srecord_type);
2885 tree t = fold_convert_loc (loc, long_integer_type_node,
2886 TYPE_SIZE_UNIT (ctx->record_type));
2887 gimple_omp_task_set_arg_size (ctx->stmt, t);
2888 t = build_int_cst (long_integer_type_node,
2889 TYPE_ALIGN_UNIT (ctx->record_type));
2890 gimple_omp_task_set_arg_align (ctx->stmt, t);
2894 /* Find the enclosing offload context. */
2896 static omp_context *
2897 enclosing_target_ctx (omp_context *ctx)
2899 for (; ctx; ctx = ctx->outer)
2900 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET)
2901 break;
2903 return ctx;
2906 /* Return true if ctx is part of an oacc kernels region. */
2908 static bool
2909 ctx_in_oacc_kernels_region (omp_context *ctx)
2911 for (;ctx != NULL; ctx = ctx->outer)
2913 gimple *stmt = ctx->stmt;
2914 if (gimple_code (stmt) == GIMPLE_OMP_TARGET
2915 && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
2916 return true;
2919 return false;
2922 /* Check the parallelism clauses inside a kernels regions.
2923 Until kernels handling moves to use the same loop indirection
2924 scheme as parallel, we need to do this checking early. */
2926 static unsigned
2927 check_oacc_kernel_gwv (gomp_for *stmt, omp_context *ctx)
2929 bool checking = true;
2930 unsigned outer_mask = 0;
2931 unsigned this_mask = 0;
2932 bool has_seq = false, has_auto = false;
2934 if (ctx->outer)
2935 outer_mask = check_oacc_kernel_gwv (NULL, ctx->outer);
2936 if (!stmt)
2938 checking = false;
2939 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR)
2940 return outer_mask;
2941 stmt = as_a <gomp_for *> (ctx->stmt);
2944 for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
2946 switch (OMP_CLAUSE_CODE (c))
2948 case OMP_CLAUSE_GANG:
2949 this_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
2950 break;
2951 case OMP_CLAUSE_WORKER:
2952 this_mask |= GOMP_DIM_MASK (GOMP_DIM_WORKER);
2953 break;
2954 case OMP_CLAUSE_VECTOR:
2955 this_mask |= GOMP_DIM_MASK (GOMP_DIM_VECTOR);
2956 break;
2957 case OMP_CLAUSE_SEQ:
2958 has_seq = true;
2959 break;
2960 case OMP_CLAUSE_AUTO:
2961 has_auto = true;
2962 break;
2963 default:
2964 break;
2968 if (checking)
2970 if (has_seq && (this_mask || has_auto))
2971 error_at (gimple_location (stmt), "%<seq%> overrides other"
2972 " OpenACC loop specifiers");
2973 else if (has_auto && this_mask)
2974 error_at (gimple_location (stmt), "%<auto%> conflicts with other"
2975 " OpenACC loop specifiers");
2977 if (this_mask & outer_mask)
2978 error_at (gimple_location (stmt), "inner loop uses same"
2979 " OpenACC parallelism as containing loop");
2982 return outer_mask | this_mask;
2985 /* Scan a GIMPLE_OMP_FOR. */
2987 static void
2988 scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
2990 omp_context *ctx;
2991 size_t i;
2992 tree clauses = gimple_omp_for_clauses (stmt);
2994 ctx = new_omp_context (stmt, outer_ctx);
2996 if (is_gimple_omp_oacc (stmt))
2998 omp_context *tgt = enclosing_target_ctx (outer_ctx);
3000 if (!tgt || is_oacc_parallel (tgt))
3001 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3003 char const *check = NULL;
3005 switch (OMP_CLAUSE_CODE (c))
3007 case OMP_CLAUSE_GANG:
3008 check = "gang";
3009 break;
3011 case OMP_CLAUSE_WORKER:
3012 check = "worker";
3013 break;
3015 case OMP_CLAUSE_VECTOR:
3016 check = "vector";
3017 break;
3019 default:
3020 break;
3023 if (check && OMP_CLAUSE_OPERAND (c, 0))
3024 error_at (gimple_location (stmt),
3025 "argument not permitted on %qs clause in"
3026 " OpenACC %<parallel%>", check);
3029 if (tgt && is_oacc_kernels (tgt))
3031 /* Strip out reductions, as they are not handled yet. */
3032 tree *prev_ptr = &clauses;
3034 while (tree probe = *prev_ptr)
3036 tree *next_ptr = &OMP_CLAUSE_CHAIN (probe);
3038 if (OMP_CLAUSE_CODE (probe) == OMP_CLAUSE_REDUCTION)
3039 *prev_ptr = *next_ptr;
3040 else
3041 prev_ptr = next_ptr;
3044 gimple_omp_for_set_clauses (stmt, clauses);
3045 check_oacc_kernel_gwv (stmt, ctx);
3049 scan_sharing_clauses (clauses, ctx);
3051 scan_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
3052 for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
3054 scan_omp_op (gimple_omp_for_index_ptr (stmt, i), ctx);
3055 scan_omp_op (gimple_omp_for_initial_ptr (stmt, i), ctx);
3056 scan_omp_op (gimple_omp_for_final_ptr (stmt, i), ctx);
3057 scan_omp_op (gimple_omp_for_incr_ptr (stmt, i), ctx);
3059 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3062 /* Scan an OpenMP sections directive. */
3064 static void
3065 scan_omp_sections (gomp_sections *stmt, omp_context *outer_ctx)
3067 omp_context *ctx;
3069 ctx = new_omp_context (stmt, outer_ctx);
3070 scan_sharing_clauses (gimple_omp_sections_clauses (stmt), ctx);
3071 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3074 /* Scan an OpenMP single directive. */
3076 static void
3077 scan_omp_single (gomp_single *stmt, omp_context *outer_ctx)
3079 omp_context *ctx;
3080 tree name;
3082 ctx = new_omp_context (stmt, outer_ctx);
3083 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
3084 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
3085 name = create_tmp_var_name (".omp_copy_s");
3086 name = build_decl (gimple_location (stmt),
3087 TYPE_DECL, name, ctx->record_type);
3088 TYPE_NAME (ctx->record_type) = name;
3090 scan_sharing_clauses (gimple_omp_single_clauses (stmt), ctx);
3091 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3093 if (TYPE_FIELDS (ctx->record_type) == NULL)
3094 ctx->record_type = NULL;
3095 else
3096 layout_type (ctx->record_type);
3099 /* Return true if the CLAUSES of an omp target guarantee that the base pointers
3100 used in the corresponding offloaded function are restrict. */
3102 static bool
3103 omp_target_base_pointers_restrict_p (tree clauses)
3105 /* The analysis relies on the GOMP_MAP_FORCE_* mapping kinds, which are only
3106 used by OpenACC. */
3107 if (flag_openacc == 0)
3108 return false;
3110 /* I. Basic example:
3112 void foo (void)
3114 unsigned int a[2], b[2];
3116 #pragma acc kernels \
3117 copyout (a) \
3118 copyout (b)
3120 a[0] = 0;
3121 b[0] = 1;
3125 After gimplification, we have:
3127 #pragma omp target oacc_kernels \
3128 map(force_from:a [len: 8]) \
3129 map(force_from:b [len: 8])
3131 a[0] = 0;
3132 b[0] = 1;
3135 Because both mappings have the force prefix, we know that they will be
3136 allocated when calling the corresponding offloaded function, which means we
3137 can mark the base pointers for a and b in the offloaded function as
3138 restrict. */
3140 tree c;
3141 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3143 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
3144 return false;
3146 switch (OMP_CLAUSE_MAP_KIND (c))
3148 case GOMP_MAP_FORCE_ALLOC:
3149 case GOMP_MAP_FORCE_TO:
3150 case GOMP_MAP_FORCE_FROM:
3151 case GOMP_MAP_FORCE_TOFROM:
3152 break;
3153 default:
3154 return false;
3158 return true;
3161 /* Scan a GIMPLE_OMP_TARGET. */
3163 static void
3164 scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
3166 omp_context *ctx;
3167 tree name;
3168 bool offloaded = is_gimple_omp_offloaded (stmt);
3169 tree clauses = gimple_omp_target_clauses (stmt);
3171 ctx = new_omp_context (stmt, outer_ctx);
3172 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
3173 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
3174 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
3175 name = create_tmp_var_name (".omp_data_t");
3176 name = build_decl (gimple_location (stmt),
3177 TYPE_DECL, name, ctx->record_type);
3178 DECL_ARTIFICIAL (name) = 1;
3179 DECL_NAMELESS (name) = 1;
3180 TYPE_NAME (ctx->record_type) = name;
3181 TYPE_ARTIFICIAL (ctx->record_type) = 1;
3183 bool base_pointers_restrict = false;
3184 if (offloaded)
3186 create_omp_child_function (ctx, false);
3187 gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
3189 base_pointers_restrict = omp_target_base_pointers_restrict_p (clauses);
3190 if (base_pointers_restrict
3191 && dump_file && (dump_flags & TDF_DETAILS))
3192 fprintf (dump_file,
3193 "Base pointers in offloaded function are restrict\n");
3196 scan_sharing_clauses (clauses, ctx, base_pointers_restrict);
3197 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3199 if (TYPE_FIELDS (ctx->record_type) == NULL)
3200 ctx->record_type = ctx->receiver_decl = NULL;
3201 else
3203 TYPE_FIELDS (ctx->record_type)
3204 = nreverse (TYPE_FIELDS (ctx->record_type));
3205 if (flag_checking)
3207 unsigned int align = DECL_ALIGN (TYPE_FIELDS (ctx->record_type));
3208 for (tree field = TYPE_FIELDS (ctx->record_type);
3209 field;
3210 field = DECL_CHAIN (field))
3211 gcc_assert (DECL_ALIGN (field) == align);
3213 layout_type (ctx->record_type);
3214 if (offloaded)
3215 fixup_child_record_type (ctx);
3219 /* Scan an OpenMP teams directive. */
3221 static void
3222 scan_omp_teams (gomp_teams *stmt, omp_context *outer_ctx)
3224 omp_context *ctx = new_omp_context (stmt, outer_ctx);
3225 scan_sharing_clauses (gimple_omp_teams_clauses (stmt), ctx);
3226 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3229 /* Check nesting restrictions. */
3230 static bool
3231 check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
3233 tree c;
3235 if (ctx && gimple_code (ctx->stmt) == GIMPLE_OMP_GRID_BODY)
3236 /* GRID_BODY is an artificial construct, nesting rules will be checked in
3237 the original copy of its contents. */
3238 return true;
3240 /* No nesting of non-OpenACC STMT (that is, an OpenMP one, or a GOMP builtin)
3241 inside an OpenACC CTX. */
3242 if (!(is_gimple_omp (stmt)
3243 && is_gimple_omp_oacc (stmt))
3244 /* Except for atomic codes that we share with OpenMP. */
3245 && !(gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD
3246 || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE))
3248 if (get_oacc_fn_attrib (cfun->decl) != NULL)
3250 error_at (gimple_location (stmt),
3251 "non-OpenACC construct inside of OpenACC routine");
3252 return false;
3254 else
3255 for (omp_context *octx = ctx; octx != NULL; octx = octx->outer)
3256 if (is_gimple_omp (octx->stmt)
3257 && is_gimple_omp_oacc (octx->stmt))
3259 error_at (gimple_location (stmt),
3260 "non-OpenACC construct inside of OpenACC region");
3261 return false;
3265 if (ctx != NULL)
3267 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
3268 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
3270 c = NULL_TREE;
3271 if (gimple_code (stmt) == GIMPLE_OMP_ORDERED)
3273 c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3274 if (find_omp_clause (c, OMP_CLAUSE_SIMD))
3276 if (find_omp_clause (c, OMP_CLAUSE_THREADS)
3277 && (ctx->outer == NULL
3278 || !gimple_omp_for_combined_into_p (ctx->stmt)
3279 || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR
3280 || (gimple_omp_for_kind (ctx->outer->stmt)
3281 != GF_OMP_FOR_KIND_FOR)
3282 || !gimple_omp_for_combined_p (ctx->outer->stmt)))
3284 error_at (gimple_location (stmt),
3285 "%<ordered simd threads%> must be closely "
3286 "nested inside of %<for simd%> region");
3287 return false;
3289 return true;
3292 error_at (gimple_location (stmt),
3293 "OpenMP constructs other than %<#pragma omp ordered simd%>"
3294 " may not be nested inside %<simd%> region");
3295 return false;
3297 else if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
3299 if ((gimple_code (stmt) != GIMPLE_OMP_FOR
3300 || (gimple_omp_for_kind (stmt)
3301 != GF_OMP_FOR_KIND_DISTRIBUTE))
3302 && gimple_code (stmt) != GIMPLE_OMP_PARALLEL)
3304 error_at (gimple_location (stmt),
3305 "only %<distribute%> or %<parallel%> regions are "
3306 "allowed to be strictly nested inside %<teams%> "
3307 "region");
3308 return false;
3312 switch (gimple_code (stmt))
3314 case GIMPLE_OMP_FOR:
3315 if (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)
3316 return true;
3317 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3319 if (ctx != NULL && gimple_code (ctx->stmt) != GIMPLE_OMP_TEAMS)
3321 error_at (gimple_location (stmt),
3322 "%<distribute%> region must be strictly nested "
3323 "inside %<teams%> construct");
3324 return false;
3326 return true;
3328 /* We split taskloop into task and nested taskloop in it. */
3329 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP)
3330 return true;
3331 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
3333 bool ok = false;
3335 if (ctx)
3336 switch (gimple_code (ctx->stmt))
3338 case GIMPLE_OMP_FOR:
3339 ok = (gimple_omp_for_kind (ctx->stmt)
3340 == GF_OMP_FOR_KIND_OACC_LOOP);
3341 break;
3343 case GIMPLE_OMP_TARGET:
3344 switch (gimple_omp_target_kind (ctx->stmt))
3346 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
3347 case GF_OMP_TARGET_KIND_OACC_KERNELS:
3348 ok = true;
3349 break;
3351 default:
3352 break;
3355 default:
3356 break;
3358 else if (get_oacc_fn_attrib (current_function_decl))
3359 ok = true;
3360 if (!ok)
3362 error_at (gimple_location (stmt),
3363 "OpenACC loop directive must be associated with"
3364 " an OpenACC compute region");
3365 return false;
3368 /* FALLTHRU */
3369 case GIMPLE_CALL:
3370 if (is_gimple_call (stmt)
3371 && (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3372 == BUILT_IN_GOMP_CANCEL
3373 || DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3374 == BUILT_IN_GOMP_CANCELLATION_POINT))
3376 const char *bad = NULL;
3377 const char *kind = NULL;
3378 const char *construct
3379 = (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3380 == BUILT_IN_GOMP_CANCEL)
3381 ? "#pragma omp cancel"
3382 : "#pragma omp cancellation point";
3383 if (ctx == NULL)
3385 error_at (gimple_location (stmt), "orphaned %qs construct",
3386 construct);
3387 return false;
3389 switch (tree_fits_shwi_p (gimple_call_arg (stmt, 0))
3390 ? tree_to_shwi (gimple_call_arg (stmt, 0))
3391 : 0)
3393 case 1:
3394 if (gimple_code (ctx->stmt) != GIMPLE_OMP_PARALLEL)
3395 bad = "#pragma omp parallel";
3396 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3397 == BUILT_IN_GOMP_CANCEL
3398 && !integer_zerop (gimple_call_arg (stmt, 1)))
3399 ctx->cancellable = true;
3400 kind = "parallel";
3401 break;
3402 case 2:
3403 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
3404 || gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR)
3405 bad = "#pragma omp for";
3406 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3407 == BUILT_IN_GOMP_CANCEL
3408 && !integer_zerop (gimple_call_arg (stmt, 1)))
3410 ctx->cancellable = true;
3411 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3412 OMP_CLAUSE_NOWAIT))
3413 warning_at (gimple_location (stmt), 0,
3414 "%<#pragma omp cancel for%> inside "
3415 "%<nowait%> for construct");
3416 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3417 OMP_CLAUSE_ORDERED))
3418 warning_at (gimple_location (stmt), 0,
3419 "%<#pragma omp cancel for%> inside "
3420 "%<ordered%> for construct");
3422 kind = "for";
3423 break;
3424 case 4:
3425 if (gimple_code (ctx->stmt) != GIMPLE_OMP_SECTIONS
3426 && gimple_code (ctx->stmt) != GIMPLE_OMP_SECTION)
3427 bad = "#pragma omp sections";
3428 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3429 == BUILT_IN_GOMP_CANCEL
3430 && !integer_zerop (gimple_call_arg (stmt, 1)))
3432 if (gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS)
3434 ctx->cancellable = true;
3435 if (find_omp_clause (gimple_omp_sections_clauses
3436 (ctx->stmt),
3437 OMP_CLAUSE_NOWAIT))
3438 warning_at (gimple_location (stmt), 0,
3439 "%<#pragma omp cancel sections%> inside "
3440 "%<nowait%> sections construct");
3442 else
3444 gcc_assert (ctx->outer
3445 && gimple_code (ctx->outer->stmt)
3446 == GIMPLE_OMP_SECTIONS);
3447 ctx->outer->cancellable = true;
3448 if (find_omp_clause (gimple_omp_sections_clauses
3449 (ctx->outer->stmt),
3450 OMP_CLAUSE_NOWAIT))
3451 warning_at (gimple_location (stmt), 0,
3452 "%<#pragma omp cancel sections%> inside "
3453 "%<nowait%> sections construct");
3456 kind = "sections";
3457 break;
3458 case 8:
3459 if (gimple_code (ctx->stmt) != GIMPLE_OMP_TASK)
3460 bad = "#pragma omp task";
3461 else
3463 for (omp_context *octx = ctx->outer;
3464 octx; octx = octx->outer)
3466 switch (gimple_code (octx->stmt))
3468 case GIMPLE_OMP_TASKGROUP:
3469 break;
3470 case GIMPLE_OMP_TARGET:
3471 if (gimple_omp_target_kind (octx->stmt)
3472 != GF_OMP_TARGET_KIND_REGION)
3473 continue;
3474 /* FALLTHRU */
3475 case GIMPLE_OMP_PARALLEL:
3476 case GIMPLE_OMP_TEAMS:
3477 error_at (gimple_location (stmt),
3478 "%<%s taskgroup%> construct not closely "
3479 "nested inside of %<taskgroup%> region",
3480 construct);
3481 return false;
3482 default:
3483 continue;
3485 break;
3487 ctx->cancellable = true;
3489 kind = "taskgroup";
3490 break;
3491 default:
3492 error_at (gimple_location (stmt), "invalid arguments");
3493 return false;
3495 if (bad)
3497 error_at (gimple_location (stmt),
3498 "%<%s %s%> construct not closely nested inside of %qs",
3499 construct, kind, bad);
3500 return false;
3503 /* FALLTHRU */
3504 case GIMPLE_OMP_SECTIONS:
3505 case GIMPLE_OMP_SINGLE:
3506 for (; ctx != NULL; ctx = ctx->outer)
3507 switch (gimple_code (ctx->stmt))
3509 case GIMPLE_OMP_FOR:
3510 if (gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR
3511 && gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_TASKLOOP)
3512 break;
3513 /* FALLTHRU */
3514 case GIMPLE_OMP_SECTIONS:
3515 case GIMPLE_OMP_SINGLE:
3516 case GIMPLE_OMP_ORDERED:
3517 case GIMPLE_OMP_MASTER:
3518 case GIMPLE_OMP_TASK:
3519 case GIMPLE_OMP_CRITICAL:
3520 if (is_gimple_call (stmt))
3522 if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3523 != BUILT_IN_GOMP_BARRIER)
3524 return true;
3525 error_at (gimple_location (stmt),
3526 "barrier region may not be closely nested inside "
3527 "of work-sharing, %<critical%>, %<ordered%>, "
3528 "%<master%>, explicit %<task%> or %<taskloop%> "
3529 "region");
3530 return false;
3532 error_at (gimple_location (stmt),
3533 "work-sharing region may not be closely nested inside "
3534 "of work-sharing, %<critical%>, %<ordered%>, "
3535 "%<master%>, explicit %<task%> or %<taskloop%> region");
3536 return false;
3537 case GIMPLE_OMP_PARALLEL:
3538 case GIMPLE_OMP_TEAMS:
3539 return true;
3540 case GIMPLE_OMP_TARGET:
3541 if (gimple_omp_target_kind (ctx->stmt)
3542 == GF_OMP_TARGET_KIND_REGION)
3543 return true;
3544 break;
3545 default:
3546 break;
3548 break;
3549 case GIMPLE_OMP_MASTER:
3550 for (; ctx != NULL; ctx = ctx->outer)
3551 switch (gimple_code (ctx->stmt))
3553 case GIMPLE_OMP_FOR:
3554 if (gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR
3555 && gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_TASKLOOP)
3556 break;
3557 /* FALLTHRU */
3558 case GIMPLE_OMP_SECTIONS:
3559 case GIMPLE_OMP_SINGLE:
3560 case GIMPLE_OMP_TASK:
3561 error_at (gimple_location (stmt),
3562 "%<master%> region may not be closely nested inside "
3563 "of work-sharing, explicit %<task%> or %<taskloop%> "
3564 "region");
3565 return false;
3566 case GIMPLE_OMP_PARALLEL:
3567 case GIMPLE_OMP_TEAMS:
3568 return true;
3569 case GIMPLE_OMP_TARGET:
3570 if (gimple_omp_target_kind (ctx->stmt)
3571 == GF_OMP_TARGET_KIND_REGION)
3572 return true;
3573 break;
3574 default:
3575 break;
3577 break;
3578 case GIMPLE_OMP_TASK:
3579 for (c = gimple_omp_task_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
3580 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
3581 && (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE
3582 || OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
3584 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3585 error_at (OMP_CLAUSE_LOCATION (c),
3586 "%<depend(%s)%> is only allowed in %<omp ordered%>",
3587 kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
3588 return false;
3590 break;
3591 case GIMPLE_OMP_ORDERED:
3592 for (c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3593 c; c = OMP_CLAUSE_CHAIN (c))
3595 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND)
3597 gcc_assert (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_THREADS
3598 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SIMD);
3599 continue;
3601 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3602 if (kind == OMP_CLAUSE_DEPEND_SOURCE
3603 || kind == OMP_CLAUSE_DEPEND_SINK)
3605 tree oclause;
3606 /* Look for containing ordered(N) loop. */
3607 if (ctx == NULL
3608 || gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
3609 || (oclause
3610 = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3611 OMP_CLAUSE_ORDERED)) == NULL_TREE)
3613 error_at (OMP_CLAUSE_LOCATION (c),
3614 "%<ordered%> construct with %<depend%> clause "
3615 "must be closely nested inside an %<ordered%> "
3616 "loop");
3617 return false;
3619 else if (OMP_CLAUSE_ORDERED_EXPR (oclause) == NULL_TREE)
3621 error_at (OMP_CLAUSE_LOCATION (c),
3622 "%<ordered%> construct with %<depend%> clause "
3623 "must be closely nested inside a loop with "
3624 "%<ordered%> clause with a parameter");
3625 return false;
3628 else
3630 error_at (OMP_CLAUSE_LOCATION (c),
3631 "invalid depend kind in omp %<ordered%> %<depend%>");
3632 return false;
3635 c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3636 if (find_omp_clause (c, OMP_CLAUSE_SIMD))
3638 /* ordered simd must be closely nested inside of simd region,
3639 and simd region must not encounter constructs other than
3640 ordered simd, therefore ordered simd may be either orphaned,
3641 or ctx->stmt must be simd. The latter case is handled already
3642 earlier. */
3643 if (ctx != NULL)
3645 error_at (gimple_location (stmt),
3646 "%<ordered%> %<simd%> must be closely nested inside "
3647 "%<simd%> region");
3648 return false;
3651 for (; ctx != NULL; ctx = ctx->outer)
3652 switch (gimple_code (ctx->stmt))
3654 case GIMPLE_OMP_CRITICAL:
3655 case GIMPLE_OMP_TASK:
3656 case GIMPLE_OMP_ORDERED:
3657 ordered_in_taskloop:
3658 error_at (gimple_location (stmt),
3659 "%<ordered%> region may not be closely nested inside "
3660 "of %<critical%>, %<ordered%>, explicit %<task%> or "
3661 "%<taskloop%> region");
3662 return false;
3663 case GIMPLE_OMP_FOR:
3664 if (gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP)
3665 goto ordered_in_taskloop;
3666 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3667 OMP_CLAUSE_ORDERED) == NULL)
3669 error_at (gimple_location (stmt),
3670 "%<ordered%> region must be closely nested inside "
3671 "a loop region with an %<ordered%> clause");
3672 return false;
3674 return true;
3675 case GIMPLE_OMP_TARGET:
3676 if (gimple_omp_target_kind (ctx->stmt)
3677 != GF_OMP_TARGET_KIND_REGION)
3678 break;
3679 /* FALLTHRU */
3680 case GIMPLE_OMP_PARALLEL:
3681 case GIMPLE_OMP_TEAMS:
3682 error_at (gimple_location (stmt),
3683 "%<ordered%> region must be closely nested inside "
3684 "a loop region with an %<ordered%> clause");
3685 return false;
3686 default:
3687 break;
3689 break;
3690 case GIMPLE_OMP_CRITICAL:
3692 tree this_stmt_name
3693 = gimple_omp_critical_name (as_a <gomp_critical *> (stmt));
3694 for (; ctx != NULL; ctx = ctx->outer)
3695 if (gomp_critical *other_crit
3696 = dyn_cast <gomp_critical *> (ctx->stmt))
3697 if (this_stmt_name == gimple_omp_critical_name (other_crit))
3699 error_at (gimple_location (stmt),
3700 "%<critical%> region may not be nested inside "
3701 "a %<critical%> region with the same name");
3702 return false;
3705 break;
3706 case GIMPLE_OMP_TEAMS:
3707 if (ctx == NULL
3708 || gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET
3709 || gimple_omp_target_kind (ctx->stmt) != GF_OMP_TARGET_KIND_REGION)
3711 error_at (gimple_location (stmt),
3712 "%<teams%> construct not closely nested inside of "
3713 "%<target%> construct");
3714 return false;
3716 break;
3717 case GIMPLE_OMP_TARGET:
3718 for (c = gimple_omp_target_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
3719 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
3720 && (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE
3721 || OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
3723 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3724 error_at (OMP_CLAUSE_LOCATION (c),
3725 "%<depend(%s)%> is only allowed in %<omp ordered%>",
3726 kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
3727 return false;
3729 if (is_gimple_omp_offloaded (stmt)
3730 && get_oacc_fn_attrib (cfun->decl) != NULL)
3732 error_at (gimple_location (stmt),
3733 "OpenACC region inside of OpenACC routine, nested "
3734 "parallelism not supported yet");
3735 return false;
3737 for (; ctx != NULL; ctx = ctx->outer)
3739 if (gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET)
3741 if (is_gimple_omp (stmt)
3742 && is_gimple_omp_oacc (stmt)
3743 && is_gimple_omp (ctx->stmt))
3745 error_at (gimple_location (stmt),
3746 "OpenACC construct inside of non-OpenACC region");
3747 return false;
3749 continue;
3752 const char *stmt_name, *ctx_stmt_name;
3753 switch (gimple_omp_target_kind (stmt))
3755 case GF_OMP_TARGET_KIND_REGION: stmt_name = "target"; break;
3756 case GF_OMP_TARGET_KIND_DATA: stmt_name = "target data"; break;
3757 case GF_OMP_TARGET_KIND_UPDATE: stmt_name = "target update"; break;
3758 case GF_OMP_TARGET_KIND_ENTER_DATA:
3759 stmt_name = "target enter data"; break;
3760 case GF_OMP_TARGET_KIND_EXIT_DATA:
3761 stmt_name = "target exit data"; break;
3762 case GF_OMP_TARGET_KIND_OACC_PARALLEL: stmt_name = "parallel"; break;
3763 case GF_OMP_TARGET_KIND_OACC_KERNELS: stmt_name = "kernels"; break;
3764 case GF_OMP_TARGET_KIND_OACC_DATA: stmt_name = "data"; break;
3765 case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break;
3766 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
3767 stmt_name = "enter/exit data"; break;
3768 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
3769 break;
3770 default: gcc_unreachable ();
3772 switch (gimple_omp_target_kind (ctx->stmt))
3774 case GF_OMP_TARGET_KIND_REGION: ctx_stmt_name = "target"; break;
3775 case GF_OMP_TARGET_KIND_DATA: ctx_stmt_name = "target data"; break;
3776 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
3777 ctx_stmt_name = "parallel"; break;
3778 case GF_OMP_TARGET_KIND_OACC_KERNELS:
3779 ctx_stmt_name = "kernels"; break;
3780 case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
3781 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
3782 ctx_stmt_name = "host_data"; break;
3783 default: gcc_unreachable ();
3786 /* OpenACC/OpenMP mismatch? */
3787 if (is_gimple_omp_oacc (stmt)
3788 != is_gimple_omp_oacc (ctx->stmt))
3790 error_at (gimple_location (stmt),
3791 "%s %qs construct inside of %s %qs region",
3792 (is_gimple_omp_oacc (stmt)
3793 ? "OpenACC" : "OpenMP"), stmt_name,
3794 (is_gimple_omp_oacc (ctx->stmt)
3795 ? "OpenACC" : "OpenMP"), ctx_stmt_name);
3796 return false;
3798 if (is_gimple_omp_offloaded (ctx->stmt))
3800 /* No GIMPLE_OMP_TARGET inside offloaded OpenACC CTX. */
3801 if (is_gimple_omp_oacc (ctx->stmt))
3803 error_at (gimple_location (stmt),
3804 "%qs construct inside of %qs region",
3805 stmt_name, ctx_stmt_name);
3806 return false;
3808 else
3810 warning_at (gimple_location (stmt), 0,
3811 "%qs construct inside of %qs region",
3812 stmt_name, ctx_stmt_name);
3816 break;
3817 default:
3818 break;
3820 return true;
3824 /* Helper function scan_omp.
3826 Callback for walk_tree or operators in walk_gimple_stmt used to
3827 scan for OMP directives in TP. */
3829 static tree
3830 scan_omp_1_op (tree *tp, int *walk_subtrees, void *data)
3832 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
3833 omp_context *ctx = (omp_context *) wi->info;
3834 tree t = *tp;
3836 switch (TREE_CODE (t))
3838 case VAR_DECL:
3839 case PARM_DECL:
3840 case LABEL_DECL:
3841 case RESULT_DECL:
3842 if (ctx)
3844 tree repl = remap_decl (t, &ctx->cb);
3845 gcc_checking_assert (TREE_CODE (repl) != ERROR_MARK);
3846 *tp = repl;
3848 break;
3850 default:
3851 if (ctx && TYPE_P (t))
3852 *tp = remap_type (t, &ctx->cb);
3853 else if (!DECL_P (t))
3855 *walk_subtrees = 1;
3856 if (ctx)
3858 tree tem = remap_type (TREE_TYPE (t), &ctx->cb);
3859 if (tem != TREE_TYPE (t))
3861 if (TREE_CODE (t) == INTEGER_CST)
3862 *tp = wide_int_to_tree (tem, t);
3863 else
3864 TREE_TYPE (t) = tem;
3868 break;
3871 return NULL_TREE;
3874 /* Return true if FNDECL is a setjmp or a longjmp. */
3876 static bool
3877 setjmp_or_longjmp_p (const_tree fndecl)
3879 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3880 && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_SETJMP
3881 || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LONGJMP))
3882 return true;
3884 tree declname = DECL_NAME (fndecl);
3885 if (!declname)
3886 return false;
3887 const char *name = IDENTIFIER_POINTER (declname);
3888 return !strcmp (name, "setjmp") || !strcmp (name, "longjmp");
3892 /* Helper function for scan_omp.
3894 Callback for walk_gimple_stmt used to scan for OMP directives in
3895 the current statement in GSI. */
3897 static tree
3898 scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
3899 struct walk_stmt_info *wi)
3901 gimple *stmt = gsi_stmt (*gsi);
3902 omp_context *ctx = (omp_context *) wi->info;
3904 if (gimple_has_location (stmt))
3905 input_location = gimple_location (stmt);
3907 /* Check the nesting restrictions. */
3908 bool remove = false;
3909 if (is_gimple_omp (stmt))
3910 remove = !check_omp_nesting_restrictions (stmt, ctx);
3911 else if (is_gimple_call (stmt))
3913 tree fndecl = gimple_call_fndecl (stmt);
3914 if (fndecl)
3916 if (setjmp_or_longjmp_p (fndecl)
3917 && ctx
3918 && gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
3919 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
3921 remove = true;
3922 error_at (gimple_location (stmt),
3923 "setjmp/longjmp inside simd construct");
3925 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3926 switch (DECL_FUNCTION_CODE (fndecl))
3928 case BUILT_IN_GOMP_BARRIER:
3929 case BUILT_IN_GOMP_CANCEL:
3930 case BUILT_IN_GOMP_CANCELLATION_POINT:
3931 case BUILT_IN_GOMP_TASKYIELD:
3932 case BUILT_IN_GOMP_TASKWAIT:
3933 case BUILT_IN_GOMP_TASKGROUP_START:
3934 case BUILT_IN_GOMP_TASKGROUP_END:
3935 remove = !check_omp_nesting_restrictions (stmt, ctx);
3936 break;
3937 default:
3938 break;
3942 if (remove)
3944 stmt = gimple_build_nop ();
3945 gsi_replace (gsi, stmt, false);
3948 *handled_ops_p = true;
3950 switch (gimple_code (stmt))
3952 case GIMPLE_OMP_PARALLEL:
3953 taskreg_nesting_level++;
3954 scan_omp_parallel (gsi, ctx);
3955 taskreg_nesting_level--;
3956 break;
3958 case GIMPLE_OMP_TASK:
3959 taskreg_nesting_level++;
3960 scan_omp_task (gsi, ctx);
3961 taskreg_nesting_level--;
3962 break;
3964 case GIMPLE_OMP_FOR:
3965 scan_omp_for (as_a <gomp_for *> (stmt), ctx);
3966 break;
3968 case GIMPLE_OMP_SECTIONS:
3969 scan_omp_sections (as_a <gomp_sections *> (stmt), ctx);
3970 break;
3972 case GIMPLE_OMP_SINGLE:
3973 scan_omp_single (as_a <gomp_single *> (stmt), ctx);
3974 break;
3976 case GIMPLE_OMP_SECTION:
3977 case GIMPLE_OMP_MASTER:
3978 case GIMPLE_OMP_TASKGROUP:
3979 case GIMPLE_OMP_ORDERED:
3980 case GIMPLE_OMP_CRITICAL:
3981 case GIMPLE_OMP_GRID_BODY:
3982 ctx = new_omp_context (stmt, ctx);
3983 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3984 break;
3986 case GIMPLE_OMP_TARGET:
3987 scan_omp_target (as_a <gomp_target *> (stmt), ctx);
3988 break;
3990 case GIMPLE_OMP_TEAMS:
3991 scan_omp_teams (as_a <gomp_teams *> (stmt), ctx);
3992 break;
3994 case GIMPLE_BIND:
3996 tree var;
3998 *handled_ops_p = false;
3999 if (ctx)
4000 for (var = gimple_bind_vars (as_a <gbind *> (stmt));
4001 var ;
4002 var = DECL_CHAIN (var))
4003 insert_decl_map (&ctx->cb, var, var);
4005 break;
4006 default:
4007 *handled_ops_p = false;
4008 break;
4011 return NULL_TREE;
4015 /* Scan all the statements starting at the current statement. CTX
4016 contains context information about the OMP directives and
4017 clauses found during the scan. */
4019 static void
4020 scan_omp (gimple_seq *body_p, omp_context *ctx)
4022 location_t saved_location;
4023 struct walk_stmt_info wi;
4025 memset (&wi, 0, sizeof (wi));
4026 wi.info = ctx;
4027 wi.want_locations = true;
4029 saved_location = input_location;
4030 walk_gimple_seq_mod (body_p, scan_omp_1_stmt, scan_omp_1_op, &wi);
4031 input_location = saved_location;
4034 /* Re-gimplification and code generation routines. */
4036 /* Build a call to GOMP_barrier. */
4038 static gimple *
4039 build_omp_barrier (tree lhs)
4041 tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL
4042 : BUILT_IN_GOMP_BARRIER);
4043 gcall *g = gimple_build_call (fndecl, 0);
4044 if (lhs)
4045 gimple_call_set_lhs (g, lhs);
4046 return g;
4049 /* If a context was created for STMT when it was scanned, return it. */
4051 static omp_context *
4052 maybe_lookup_ctx (gimple *stmt)
4054 splay_tree_node n;
4055 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
4056 return n ? (omp_context *) n->value : NULL;
4060 /* Find the mapping for DECL in CTX or the immediately enclosing
4061 context that has a mapping for DECL.
4063 If CTX is a nested parallel directive, we may have to use the decl
4064 mappings created in CTX's parent context. Suppose that we have the
4065 following parallel nesting (variable UIDs showed for clarity):
4067 iD.1562 = 0;
4068 #omp parallel shared(iD.1562) -> outer parallel
4069 iD.1562 = iD.1562 + 1;
4071 #omp parallel shared (iD.1562) -> inner parallel
4072 iD.1562 = iD.1562 - 1;
4074 Each parallel structure will create a distinct .omp_data_s structure
4075 for copying iD.1562 in/out of the directive:
4077 outer parallel .omp_data_s.1.i -> iD.1562
4078 inner parallel .omp_data_s.2.i -> iD.1562
4080 A shared variable mapping will produce a copy-out operation before
4081 the parallel directive and a copy-in operation after it. So, in
4082 this case we would have:
4084 iD.1562 = 0;
4085 .omp_data_o.1.i = iD.1562;
4086 #omp parallel shared(iD.1562) -> outer parallel
4087 .omp_data_i.1 = &.omp_data_o.1
4088 .omp_data_i.1->i = .omp_data_i.1->i + 1;
4090 .omp_data_o.2.i = iD.1562; -> **
4091 #omp parallel shared(iD.1562) -> inner parallel
4092 .omp_data_i.2 = &.omp_data_o.2
4093 .omp_data_i.2->i = .omp_data_i.2->i - 1;
4096 ** This is a problem. The symbol iD.1562 cannot be referenced
4097 inside the body of the outer parallel region. But since we are
4098 emitting this copy operation while expanding the inner parallel
4099 directive, we need to access the CTX structure of the outer
4100 parallel directive to get the correct mapping:
4102 .omp_data_o.2.i = .omp_data_i.1->i
4104 Since there may be other workshare or parallel directives enclosing
4105 the parallel directive, it may be necessary to walk up the context
4106 parent chain. This is not a problem in general because nested
4107 parallelism happens only rarely. */
4109 static tree
4110 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
4112 tree t;
4113 omp_context *up;
4115 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
4116 t = maybe_lookup_decl (decl, up);
4118 gcc_assert (!ctx->is_nested || t || is_global_var (decl));
4120 return t ? t : decl;
4124 /* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
4125 in outer contexts. */
4127 static tree
4128 maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
4130 tree t = NULL;
4131 omp_context *up;
4133 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
4134 t = maybe_lookup_decl (decl, up);
4136 return t ? t : decl;
4140 /* Construct the initialization value for reduction operation OP. */
4142 tree
4143 omp_reduction_init_op (location_t loc, enum tree_code op, tree type)
4145 switch (op)
4147 case PLUS_EXPR:
4148 case MINUS_EXPR:
4149 case BIT_IOR_EXPR:
4150 case BIT_XOR_EXPR:
4151 case TRUTH_OR_EXPR:
4152 case TRUTH_ORIF_EXPR:
4153 case TRUTH_XOR_EXPR:
4154 case NE_EXPR:
4155 return build_zero_cst (type);
4157 case MULT_EXPR:
4158 case TRUTH_AND_EXPR:
4159 case TRUTH_ANDIF_EXPR:
4160 case EQ_EXPR:
4161 return fold_convert_loc (loc, type, integer_one_node);
4163 case BIT_AND_EXPR:
4164 return fold_convert_loc (loc, type, integer_minus_one_node);
4166 case MAX_EXPR:
4167 if (SCALAR_FLOAT_TYPE_P (type))
4169 REAL_VALUE_TYPE max, min;
4170 if (HONOR_INFINITIES (type))
4172 real_inf (&max);
4173 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
4175 else
4176 real_maxval (&min, 1, TYPE_MODE (type));
4177 return build_real (type, min);
4179 else if (POINTER_TYPE_P (type))
4181 wide_int min
4182 = wi::min_value (TYPE_PRECISION (type), TYPE_SIGN (type));
4183 return wide_int_to_tree (type, min);
4185 else
4187 gcc_assert (INTEGRAL_TYPE_P (type));
4188 return TYPE_MIN_VALUE (type);
4191 case MIN_EXPR:
4192 if (SCALAR_FLOAT_TYPE_P (type))
4194 REAL_VALUE_TYPE max;
4195 if (HONOR_INFINITIES (type))
4196 real_inf (&max);
4197 else
4198 real_maxval (&max, 0, TYPE_MODE (type));
4199 return build_real (type, max);
4201 else if (POINTER_TYPE_P (type))
4203 wide_int max
4204 = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
4205 return wide_int_to_tree (type, max);
4207 else
4209 gcc_assert (INTEGRAL_TYPE_P (type));
4210 return TYPE_MAX_VALUE (type);
4213 default:
4214 gcc_unreachable ();
4218 /* Construct the initialization value for reduction CLAUSE. */
4220 tree
4221 omp_reduction_init (tree clause, tree type)
4223 return omp_reduction_init_op (OMP_CLAUSE_LOCATION (clause),
4224 OMP_CLAUSE_REDUCTION_CODE (clause), type);
4227 /* Return alignment to be assumed for var in CLAUSE, which should be
4228 OMP_CLAUSE_ALIGNED. */
4230 static tree
4231 omp_clause_aligned_alignment (tree clause)
4233 if (OMP_CLAUSE_ALIGNED_ALIGNMENT (clause))
4234 return OMP_CLAUSE_ALIGNED_ALIGNMENT (clause);
4236 /* Otherwise return implementation defined alignment. */
4237 unsigned int al = 1;
4238 machine_mode mode, vmode;
4239 int vs = targetm.vectorize.autovectorize_vector_sizes ();
4240 if (vs)
4241 vs = 1 << floor_log2 (vs);
4242 static enum mode_class classes[]
4243 = { MODE_INT, MODE_VECTOR_INT, MODE_FLOAT, MODE_VECTOR_FLOAT };
4244 for (int i = 0; i < 4; i += 2)
4245 for (mode = GET_CLASS_NARROWEST_MODE (classes[i]);
4246 mode != VOIDmode;
4247 mode = GET_MODE_WIDER_MODE (mode))
4249 vmode = targetm.vectorize.preferred_simd_mode (mode);
4250 if (GET_MODE_CLASS (vmode) != classes[i + 1])
4251 continue;
4252 while (vs
4253 && GET_MODE_SIZE (vmode) < vs
4254 && GET_MODE_2XWIDER_MODE (vmode) != VOIDmode)
4255 vmode = GET_MODE_2XWIDER_MODE (vmode);
4257 tree type = lang_hooks.types.type_for_mode (mode, 1);
4258 if (type == NULL_TREE || TYPE_MODE (type) != mode)
4259 continue;
4260 type = build_vector_type (type, GET_MODE_SIZE (vmode)
4261 / GET_MODE_SIZE (mode));
4262 if (TYPE_MODE (type) != vmode)
4263 continue;
4264 if (TYPE_ALIGN_UNIT (type) > al)
4265 al = TYPE_ALIGN_UNIT (type);
4267 return build_int_cst (integer_type_node, al);
4270 /* Return maximum possible vectorization factor for the target. */
4272 static int
4273 omp_max_vf (void)
4275 if (!optimize
4276 || optimize_debug
4277 || !flag_tree_loop_optimize
4278 || (!flag_tree_loop_vectorize
4279 && (global_options_set.x_flag_tree_loop_vectorize
4280 || global_options_set.x_flag_tree_vectorize)))
4281 return 1;
4283 int vs = targetm.vectorize.autovectorize_vector_sizes ();
4284 if (vs)
4286 vs = 1 << floor_log2 (vs);
4287 return vs;
4289 machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
4290 if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
4291 return GET_MODE_NUNITS (vqimode);
4292 return 1;
4295 /* Helper function of lower_rec_input_clauses, used for #pragma omp simd
4296 privatization. */
4298 static bool
4299 lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf,
4300 tree &idx, tree &lane, tree &ivar, tree &lvar)
4302 if (max_vf == 0)
4304 max_vf = omp_max_vf ();
4305 if (max_vf > 1)
4307 tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
4308 OMP_CLAUSE_SAFELEN);
4309 if (c && TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) != INTEGER_CST)
4310 max_vf = 1;
4311 else if (c && compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c),
4312 max_vf) == -1)
4313 max_vf = tree_to_shwi (OMP_CLAUSE_SAFELEN_EXPR (c));
4315 if (max_vf > 1)
4317 idx = create_tmp_var (unsigned_type_node);
4318 lane = create_tmp_var (unsigned_type_node);
4321 if (max_vf == 1)
4322 return false;
4324 tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
4325 tree avar = create_tmp_var_raw (atype);
4326 if (TREE_ADDRESSABLE (new_var))
4327 TREE_ADDRESSABLE (avar) = 1;
4328 DECL_ATTRIBUTES (avar)
4329 = tree_cons (get_identifier ("omp simd array"), NULL,
4330 DECL_ATTRIBUTES (avar));
4331 gimple_add_tmp_var (avar);
4332 ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, idx,
4333 NULL_TREE, NULL_TREE);
4334 lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, lane,
4335 NULL_TREE, NULL_TREE);
4336 if (DECL_P (new_var))
4338 SET_DECL_VALUE_EXPR (new_var, lvar);
4339 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4341 return true;
4344 /* Helper function of lower_rec_input_clauses. For a reference
4345 in simd reduction, add an underlying variable it will reference. */
4347 static void
4348 handle_simd_reference (location_t loc, tree new_vard, gimple_seq *ilist)
4350 tree z = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_vard)));
4351 if (TREE_CONSTANT (z))
4353 z = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_vard)),
4354 get_name (new_vard));
4355 gimple_add_tmp_var (z);
4356 TREE_ADDRESSABLE (z) = 1;
4357 z = build_fold_addr_expr_loc (loc, z);
4358 gimplify_assign (new_vard, z, ilist);
4362 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
4363 from the receiver (aka child) side and initializers for REFERENCE_TYPE
4364 private variables. Initialization statements go in ILIST, while calls
4365 to destructors go in DLIST. */
4367 static void
4368 lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
4369 omp_context *ctx, struct omp_for_data *fd)
4371 tree c, dtor, copyin_seq, x, ptr;
4372 bool copyin_by_ref = false;
4373 bool lastprivate_firstprivate = false;
4374 bool reduction_omp_orig_ref = false;
4375 int pass;
4376 bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
4377 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
4378 int max_vf = 0;
4379 tree lane = NULL_TREE, idx = NULL_TREE;
4380 tree ivar = NULL_TREE, lvar = NULL_TREE;
4381 gimple_seq llist[2] = { NULL, NULL };
4383 copyin_seq = NULL;
4385 /* Set max_vf=1 (which will later enforce safelen=1) in simd loops
4386 with data sharing clauses referencing variable sized vars. That
4387 is unnecessarily hard to support and very unlikely to result in
4388 vectorized code anyway. */
4389 if (is_simd)
4390 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
4391 switch (OMP_CLAUSE_CODE (c))
4393 case OMP_CLAUSE_LINEAR:
4394 if (OMP_CLAUSE_LINEAR_ARRAY (c))
4395 max_vf = 1;
4396 /* FALLTHRU */
4397 case OMP_CLAUSE_PRIVATE:
4398 case OMP_CLAUSE_FIRSTPRIVATE:
4399 case OMP_CLAUSE_LASTPRIVATE:
4400 if (is_variable_sized (OMP_CLAUSE_DECL (c)))
4401 max_vf = 1;
4402 break;
4403 case OMP_CLAUSE_REDUCTION:
4404 if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF
4405 || is_variable_sized (OMP_CLAUSE_DECL (c)))
4406 max_vf = 1;
4407 break;
4408 default:
4409 continue;
4412 /* Do all the fixed sized types in the first pass, and the variable sized
4413 types in the second pass. This makes sure that the scalar arguments to
4414 the variable sized types are processed before we use them in the
4415 variable sized operations. */
4416 for (pass = 0; pass < 2; ++pass)
4418 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
4420 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
4421 tree var, new_var;
4422 bool by_ref;
4423 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
4425 switch (c_kind)
4427 case OMP_CLAUSE_PRIVATE:
4428 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
4429 continue;
4430 break;
4431 case OMP_CLAUSE_SHARED:
4432 /* Ignore shared directives in teams construct. */
4433 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
4434 continue;
4435 if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
4437 gcc_assert (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)
4438 || is_global_var (OMP_CLAUSE_DECL (c)));
4439 continue;
4441 case OMP_CLAUSE_FIRSTPRIVATE:
4442 case OMP_CLAUSE_COPYIN:
4443 break;
4444 case OMP_CLAUSE_LINEAR:
4445 if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c)
4446 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c))
4447 lastprivate_firstprivate = true;
4448 break;
4449 case OMP_CLAUSE_REDUCTION:
4450 if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
4451 reduction_omp_orig_ref = true;
4452 break;
4453 case OMP_CLAUSE__LOOPTEMP_:
4454 /* Handle _looptemp_ clauses only on parallel/task. */
4455 if (fd)
4456 continue;
4457 break;
4458 case OMP_CLAUSE_LASTPRIVATE:
4459 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
4461 lastprivate_firstprivate = true;
4462 if (pass != 0 || is_taskloop_ctx (ctx))
4463 continue;
4465 /* Even without corresponding firstprivate, if
4466 decl is Fortran allocatable, it needs outer var
4467 reference. */
4468 else if (pass == 0
4469 && lang_hooks.decls.omp_private_outer_ref
4470 (OMP_CLAUSE_DECL (c)))
4471 lastprivate_firstprivate = true;
4472 break;
4473 case OMP_CLAUSE_ALIGNED:
4474 if (pass == 0)
4475 continue;
4476 var = OMP_CLAUSE_DECL (c);
4477 if (TREE_CODE (TREE_TYPE (var)) == POINTER_TYPE
4478 && !is_global_var (var))
4480 new_var = maybe_lookup_decl (var, ctx);
4481 if (new_var == NULL_TREE)
4482 new_var = maybe_lookup_decl_in_outer_ctx (var, ctx);
4483 x = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
4484 x = build_call_expr_loc (clause_loc, x, 2, new_var,
4485 omp_clause_aligned_alignment (c));
4486 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4487 x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
4488 gimplify_and_add (x, ilist);
4490 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
4491 && is_global_var (var))
4493 tree ptype = build_pointer_type (TREE_TYPE (var)), t, t2;
4494 new_var = lookup_decl (var, ctx);
4495 t = maybe_lookup_decl_in_outer_ctx (var, ctx);
4496 t = build_fold_addr_expr_loc (clause_loc, t);
4497 t2 = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
4498 t = build_call_expr_loc (clause_loc, t2, 2, t,
4499 omp_clause_aligned_alignment (c));
4500 t = fold_convert_loc (clause_loc, ptype, t);
4501 x = create_tmp_var (ptype);
4502 t = build2 (MODIFY_EXPR, ptype, x, t);
4503 gimplify_and_add (t, ilist);
4504 t = build_simple_mem_ref_loc (clause_loc, x);
4505 SET_DECL_VALUE_EXPR (new_var, t);
4506 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4508 continue;
4509 default:
4510 continue;
4513 new_var = var = OMP_CLAUSE_DECL (c);
4514 if (c_kind == OMP_CLAUSE_REDUCTION && TREE_CODE (var) == MEM_REF)
4516 var = TREE_OPERAND (var, 0);
4517 if (TREE_CODE (var) == POINTER_PLUS_EXPR)
4518 var = TREE_OPERAND (var, 0);
4519 if (TREE_CODE (var) == INDIRECT_REF
4520 || TREE_CODE (var) == ADDR_EXPR)
4521 var = TREE_OPERAND (var, 0);
4522 if (is_variable_sized (var))
4524 gcc_assert (DECL_HAS_VALUE_EXPR_P (var));
4525 var = DECL_VALUE_EXPR (var);
4526 gcc_assert (TREE_CODE (var) == INDIRECT_REF);
4527 var = TREE_OPERAND (var, 0);
4528 gcc_assert (DECL_P (var));
4530 new_var = var;
4532 if (c_kind != OMP_CLAUSE_COPYIN)
4533 new_var = lookup_decl (var, ctx);
4535 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
4537 if (pass != 0)
4538 continue;
4540 /* C/C++ array section reductions. */
4541 else if (c_kind == OMP_CLAUSE_REDUCTION
4542 && var != OMP_CLAUSE_DECL (c))
4544 if (pass == 0)
4545 continue;
4547 tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (c), 1);
4548 tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0);
4549 if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR)
4551 tree b = TREE_OPERAND (orig_var, 1);
4552 b = maybe_lookup_decl (b, ctx);
4553 if (b == NULL)
4555 b = TREE_OPERAND (orig_var, 1);
4556 b = maybe_lookup_decl_in_outer_ctx (b, ctx);
4558 if (integer_zerop (bias))
4559 bias = b;
4560 else
4562 bias = fold_convert_loc (clause_loc,
4563 TREE_TYPE (b), bias);
4564 bias = fold_build2_loc (clause_loc, PLUS_EXPR,
4565 TREE_TYPE (b), b, bias);
4567 orig_var = TREE_OPERAND (orig_var, 0);
4569 if (TREE_CODE (orig_var) == INDIRECT_REF
4570 || TREE_CODE (orig_var) == ADDR_EXPR)
4571 orig_var = TREE_OPERAND (orig_var, 0);
4572 tree d = OMP_CLAUSE_DECL (c);
4573 tree type = TREE_TYPE (d);
4574 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
4575 tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
4576 const char *name = get_name (orig_var);
4577 if (TREE_CONSTANT (v))
4579 x = create_tmp_var_raw (type, name);
4580 gimple_add_tmp_var (x);
4581 TREE_ADDRESSABLE (x) = 1;
4582 x = build_fold_addr_expr_loc (clause_loc, x);
4584 else
4586 tree atmp
4587 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4588 tree t = maybe_lookup_decl (v, ctx);
4589 if (t)
4590 v = t;
4591 else
4592 v = maybe_lookup_decl_in_outer_ctx (v, ctx);
4593 gimplify_expr (&v, ilist, NULL, is_gimple_val, fb_rvalue);
4594 t = fold_build2_loc (clause_loc, PLUS_EXPR,
4595 TREE_TYPE (v), v,
4596 build_int_cst (TREE_TYPE (v), 1));
4597 t = fold_build2_loc (clause_loc, MULT_EXPR,
4598 TREE_TYPE (v), t,
4599 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4600 tree al = size_int (TYPE_ALIGN (TREE_TYPE (type)));
4601 x = build_call_expr_loc (clause_loc, atmp, 2, t, al);
4604 tree ptype = build_pointer_type (TREE_TYPE (type));
4605 x = fold_convert_loc (clause_loc, ptype, x);
4606 tree y = create_tmp_var (ptype, name);
4607 gimplify_assign (y, x, ilist);
4608 x = y;
4609 tree yb = y;
4611 if (!integer_zerop (bias))
4613 bias = fold_convert_loc (clause_loc, pointer_sized_int_node,
4614 bias);
4615 yb = fold_convert_loc (clause_loc, pointer_sized_int_node,
4617 yb = fold_build2_loc (clause_loc, MINUS_EXPR,
4618 pointer_sized_int_node, yb, bias);
4619 x = fold_convert_loc (clause_loc, TREE_TYPE (x), yb);
4620 yb = create_tmp_var (ptype, name);
4621 gimplify_assign (yb, x, ilist);
4622 x = yb;
4625 d = TREE_OPERAND (d, 0);
4626 if (TREE_CODE (d) == POINTER_PLUS_EXPR)
4627 d = TREE_OPERAND (d, 0);
4628 if (TREE_CODE (d) == ADDR_EXPR)
4630 if (orig_var != var)
4632 gcc_assert (is_variable_sized (orig_var));
4633 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var),
4635 gimplify_assign (new_var, x, ilist);
4636 tree new_orig_var = lookup_decl (orig_var, ctx);
4637 tree t = build_fold_indirect_ref (new_var);
4638 DECL_IGNORED_P (new_var) = 0;
4639 TREE_THIS_NOTRAP (t);
4640 SET_DECL_VALUE_EXPR (new_orig_var, t);
4641 DECL_HAS_VALUE_EXPR_P (new_orig_var) = 1;
4643 else
4645 x = build2 (MEM_REF, TREE_TYPE (new_var), x,
4646 build_int_cst (ptype, 0));
4647 SET_DECL_VALUE_EXPR (new_var, x);
4648 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4651 else
4653 gcc_assert (orig_var == var);
4654 if (TREE_CODE (d) == INDIRECT_REF)
4656 x = create_tmp_var (ptype, name);
4657 TREE_ADDRESSABLE (x) = 1;
4658 gimplify_assign (x, yb, ilist);
4659 x = build_fold_addr_expr_loc (clause_loc, x);
4661 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4662 gimplify_assign (new_var, x, ilist);
4664 tree y1 = create_tmp_var (ptype, NULL);
4665 gimplify_assign (y1, y, ilist);
4666 tree i2 = NULL_TREE, y2 = NULL_TREE;
4667 tree body2 = NULL_TREE, end2 = NULL_TREE;
4668 tree y3 = NULL_TREE, y4 = NULL_TREE;
4669 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || is_simd)
4671 y2 = create_tmp_var (ptype, NULL);
4672 gimplify_assign (y2, y, ilist);
4673 tree ref = build_outer_var_ref (var, ctx);
4674 /* For ref build_outer_var_ref already performs this. */
4675 if (TREE_CODE (d) == INDIRECT_REF)
4676 gcc_assert (is_reference (var));
4677 else if (TREE_CODE (d) == ADDR_EXPR)
4678 ref = build_fold_addr_expr (ref);
4679 else if (is_reference (var))
4680 ref = build_fold_addr_expr (ref);
4681 ref = fold_convert_loc (clause_loc, ptype, ref);
4682 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
4683 && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
4685 y3 = create_tmp_var (ptype, NULL);
4686 gimplify_assign (y3, unshare_expr (ref), ilist);
4688 if (is_simd)
4690 y4 = create_tmp_var (ptype, NULL);
4691 gimplify_assign (y4, ref, dlist);
4694 tree i = create_tmp_var (TREE_TYPE (v), NULL);
4695 gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), ilist);
4696 tree body = create_artificial_label (UNKNOWN_LOCATION);
4697 tree end = create_artificial_label (UNKNOWN_LOCATION);
4698 gimple_seq_add_stmt (ilist, gimple_build_label (body));
4699 if (y2)
4701 i2 = create_tmp_var (TREE_TYPE (v), NULL);
4702 gimplify_assign (i2, build_int_cst (TREE_TYPE (v), 0), dlist);
4703 body2 = create_artificial_label (UNKNOWN_LOCATION);
4704 end2 = create_artificial_label (UNKNOWN_LOCATION);
4705 gimple_seq_add_stmt (dlist, gimple_build_label (body2));
4707 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
4709 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
4710 tree decl_placeholder
4711 = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
4712 SET_DECL_VALUE_EXPR (decl_placeholder,
4713 build_simple_mem_ref (y1));
4714 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 1;
4715 SET_DECL_VALUE_EXPR (placeholder,
4716 y3 ? build_simple_mem_ref (y3)
4717 : error_mark_node);
4718 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
4719 x = lang_hooks.decls.omp_clause_default_ctor
4720 (c, build_simple_mem_ref (y1),
4721 y3 ? build_simple_mem_ref (y3) : NULL_TREE);
4722 if (x)
4723 gimplify_and_add (x, ilist);
4724 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
4726 gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
4727 lower_omp (&tseq, ctx);
4728 gimple_seq_add_seq (ilist, tseq);
4730 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
4731 if (is_simd)
4733 SET_DECL_VALUE_EXPR (decl_placeholder,
4734 build_simple_mem_ref (y2));
4735 SET_DECL_VALUE_EXPR (placeholder,
4736 build_simple_mem_ref (y4));
4737 gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
4738 lower_omp (&tseq, ctx);
4739 gimple_seq_add_seq (dlist, tseq);
4740 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
4742 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
4743 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 0;
4744 x = lang_hooks.decls.omp_clause_dtor
4745 (c, build_simple_mem_ref (y2));
4746 if (x)
4748 gimple_seq tseq = NULL;
4749 dtor = x;
4750 gimplify_stmt (&dtor, &tseq);
4751 gimple_seq_add_seq (dlist, tseq);
4754 else
4756 x = omp_reduction_init (c, TREE_TYPE (type));
4757 enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
4759 /* reduction(-:var) sums up the partial results, so it
4760 acts identically to reduction(+:var). */
4761 if (code == MINUS_EXPR)
4762 code = PLUS_EXPR;
4764 gimplify_assign (build_simple_mem_ref (y1), x, ilist);
4765 if (is_simd)
4767 x = build2 (code, TREE_TYPE (type),
4768 build_simple_mem_ref (y4),
4769 build_simple_mem_ref (y2));
4770 gimplify_assign (build_simple_mem_ref (y4), x, dlist);
4773 gimple *g
4774 = gimple_build_assign (y1, POINTER_PLUS_EXPR, y1,
4775 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4776 gimple_seq_add_stmt (ilist, g);
4777 if (y3)
4779 g = gimple_build_assign (y3, POINTER_PLUS_EXPR, y3,
4780 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4781 gimple_seq_add_stmt (ilist, g);
4783 g = gimple_build_assign (i, PLUS_EXPR, i,
4784 build_int_cst (TREE_TYPE (i), 1));
4785 gimple_seq_add_stmt (ilist, g);
4786 g = gimple_build_cond (LE_EXPR, i, v, body, end);
4787 gimple_seq_add_stmt (ilist, g);
4788 gimple_seq_add_stmt (ilist, gimple_build_label (end));
4789 if (y2)
4791 g = gimple_build_assign (y2, POINTER_PLUS_EXPR, y2,
4792 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4793 gimple_seq_add_stmt (dlist, g);
4794 if (y4)
4796 g = gimple_build_assign
4797 (y4, POINTER_PLUS_EXPR, y4,
4798 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4799 gimple_seq_add_stmt (dlist, g);
4801 g = gimple_build_assign (i2, PLUS_EXPR, i2,
4802 build_int_cst (TREE_TYPE (i2), 1));
4803 gimple_seq_add_stmt (dlist, g);
4804 g = gimple_build_cond (LE_EXPR, i2, v, body2, end2);
4805 gimple_seq_add_stmt (dlist, g);
4806 gimple_seq_add_stmt (dlist, gimple_build_label (end2));
4808 continue;
4810 else if (is_variable_sized (var))
4812 /* For variable sized types, we need to allocate the
4813 actual storage here. Call alloca and store the
4814 result in the pointer decl that we created elsewhere. */
4815 if (pass == 0)
4816 continue;
4818 if (c_kind != OMP_CLAUSE_FIRSTPRIVATE || !is_task_ctx (ctx))
4820 gcall *stmt;
4821 tree tmp, atmp;
4823 ptr = DECL_VALUE_EXPR (new_var);
4824 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
4825 ptr = TREE_OPERAND (ptr, 0);
4826 gcc_assert (DECL_P (ptr));
4827 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
4829 /* void *tmp = __builtin_alloca */
4830 atmp = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4831 stmt = gimple_build_call (atmp, 2, x,
4832 size_int (DECL_ALIGN (var)));
4833 tmp = create_tmp_var_raw (ptr_type_node);
4834 gimple_add_tmp_var (tmp);
4835 gimple_call_set_lhs (stmt, tmp);
4837 gimple_seq_add_stmt (ilist, stmt);
4839 x = fold_convert_loc (clause_loc, TREE_TYPE (ptr), tmp);
4840 gimplify_assign (ptr, x, ilist);
4843 else if (is_reference (var))
4845 /* For references that are being privatized for Fortran,
4846 allocate new backing storage for the new pointer
4847 variable. This allows us to avoid changing all the
4848 code that expects a pointer to something that expects
4849 a direct variable. */
4850 if (pass == 0)
4851 continue;
4853 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
4854 if (c_kind == OMP_CLAUSE_FIRSTPRIVATE && is_task_ctx (ctx))
4856 x = build_receiver_ref (var, false, ctx);
4857 x = build_fold_addr_expr_loc (clause_loc, x);
4859 else if (TREE_CONSTANT (x))
4861 /* For reduction in SIMD loop, defer adding the
4862 initialization of the reference, because if we decide
4863 to use SIMD array for it, the initilization could cause
4864 expansion ICE. */
4865 if (c_kind == OMP_CLAUSE_REDUCTION && is_simd)
4866 x = NULL_TREE;
4867 else
4869 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
4870 get_name (var));
4871 gimple_add_tmp_var (x);
4872 TREE_ADDRESSABLE (x) = 1;
4873 x = build_fold_addr_expr_loc (clause_loc, x);
4876 else
4878 tree atmp
4879 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4880 tree rtype = TREE_TYPE (TREE_TYPE (new_var));
4881 tree al = size_int (TYPE_ALIGN (rtype));
4882 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
4885 if (x)
4887 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4888 gimplify_assign (new_var, x, ilist);
4891 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
4893 else if (c_kind == OMP_CLAUSE_REDUCTION
4894 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
4896 if (pass == 0)
4897 continue;
4899 else if (pass != 0)
4900 continue;
4902 switch (OMP_CLAUSE_CODE (c))
4904 case OMP_CLAUSE_SHARED:
4905 /* Ignore shared directives in teams construct. */
4906 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
4907 continue;
4908 /* Shared global vars are just accessed directly. */
4909 if (is_global_var (new_var))
4910 break;
4911 /* For taskloop firstprivate/lastprivate, represented
4912 as firstprivate and shared clause on the task, new_var
4913 is the firstprivate var. */
4914 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
4915 break;
4916 /* Set up the DECL_VALUE_EXPR for shared variables now. This
4917 needs to be delayed until after fixup_child_record_type so
4918 that we get the correct type during the dereference. */
4919 by_ref = use_pointer_for_field (var, ctx);
4920 x = build_receiver_ref (var, by_ref, ctx);
4921 SET_DECL_VALUE_EXPR (new_var, x);
4922 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4924 /* ??? If VAR is not passed by reference, and the variable
4925 hasn't been initialized yet, then we'll get a warning for
4926 the store into the omp_data_s structure. Ideally, we'd be
4927 able to notice this and not store anything at all, but
4928 we're generating code too early. Suppress the warning. */
4929 if (!by_ref)
4930 TREE_NO_WARNING (var) = 1;
4931 break;
4933 case OMP_CLAUSE_LASTPRIVATE:
4934 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
4935 break;
4936 /* FALLTHRU */
4938 case OMP_CLAUSE_PRIVATE:
4939 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_PRIVATE)
4940 x = build_outer_var_ref (var, ctx);
4941 else if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
4943 if (is_task_ctx (ctx))
4944 x = build_receiver_ref (var, false, ctx);
4945 else
4946 x = build_outer_var_ref (var, ctx);
4948 else
4949 x = NULL;
4950 do_private:
4951 tree nx;
4952 nx = lang_hooks.decls.omp_clause_default_ctor
4953 (c, unshare_expr (new_var), x);
4954 if (is_simd)
4956 tree y = lang_hooks.decls.omp_clause_dtor (c, new_var);
4957 if ((TREE_ADDRESSABLE (new_var) || nx || y
4958 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
4959 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
4960 idx, lane, ivar, lvar))
4962 if (nx)
4963 x = lang_hooks.decls.omp_clause_default_ctor
4964 (c, unshare_expr (ivar), x);
4965 if (nx && x)
4966 gimplify_and_add (x, &llist[0]);
4967 if (y)
4969 y = lang_hooks.decls.omp_clause_dtor (c, ivar);
4970 if (y)
4972 gimple_seq tseq = NULL;
4974 dtor = y;
4975 gimplify_stmt (&dtor, &tseq);
4976 gimple_seq_add_seq (&llist[1], tseq);
4979 break;
4982 if (nx)
4983 gimplify_and_add (nx, ilist);
4984 /* FALLTHRU */
4986 do_dtor:
4987 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
4988 if (x)
4990 gimple_seq tseq = NULL;
4992 dtor = x;
4993 gimplify_stmt (&dtor, &tseq);
4994 gimple_seq_add_seq (dlist, tseq);
4996 break;
4998 case OMP_CLAUSE_LINEAR:
4999 if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5000 goto do_firstprivate;
5001 if (OMP_CLAUSE_LINEAR_NO_COPYOUT (c))
5002 x = NULL;
5003 else
5004 x = build_outer_var_ref (var, ctx);
5005 goto do_private;
5007 case OMP_CLAUSE_FIRSTPRIVATE:
5008 if (is_task_ctx (ctx))
5010 if (is_reference (var) || is_variable_sized (var))
5011 goto do_dtor;
5012 else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var,
5013 ctx))
5014 || use_pointer_for_field (var, NULL))
5016 x = build_receiver_ref (var, false, ctx);
5017 SET_DECL_VALUE_EXPR (new_var, x);
5018 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
5019 goto do_dtor;
5022 do_firstprivate:
5023 x = build_outer_var_ref (var, ctx);
5024 if (is_simd)
5026 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5027 && gimple_omp_for_combined_into_p (ctx->stmt))
5029 tree t = OMP_CLAUSE_LINEAR_STEP (c);
5030 tree stept = TREE_TYPE (t);
5031 tree ct = find_omp_clause (clauses,
5032 OMP_CLAUSE__LOOPTEMP_);
5033 gcc_assert (ct);
5034 tree l = OMP_CLAUSE_DECL (ct);
5035 tree n1 = fd->loop.n1;
5036 tree step = fd->loop.step;
5037 tree itype = TREE_TYPE (l);
5038 if (POINTER_TYPE_P (itype))
5039 itype = signed_type_for (itype);
5040 l = fold_build2 (MINUS_EXPR, itype, l, n1);
5041 if (TYPE_UNSIGNED (itype)
5042 && fd->loop.cond_code == GT_EXPR)
5043 l = fold_build2 (TRUNC_DIV_EXPR, itype,
5044 fold_build1 (NEGATE_EXPR, itype, l),
5045 fold_build1 (NEGATE_EXPR,
5046 itype, step));
5047 else
5048 l = fold_build2 (TRUNC_DIV_EXPR, itype, l, step);
5049 t = fold_build2 (MULT_EXPR, stept,
5050 fold_convert (stept, l), t);
5052 if (OMP_CLAUSE_LINEAR_ARRAY (c))
5054 x = lang_hooks.decls.omp_clause_linear_ctor
5055 (c, new_var, x, t);
5056 gimplify_and_add (x, ilist);
5057 goto do_dtor;
5060 if (POINTER_TYPE_P (TREE_TYPE (x)))
5061 x = fold_build2 (POINTER_PLUS_EXPR,
5062 TREE_TYPE (x), x, t);
5063 else
5064 x = fold_build2 (PLUS_EXPR, TREE_TYPE (x), x, t);
5067 if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LINEAR
5068 || TREE_ADDRESSABLE (new_var))
5069 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5070 idx, lane, ivar, lvar))
5072 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
5074 tree iv = create_tmp_var (TREE_TYPE (new_var));
5075 x = lang_hooks.decls.omp_clause_copy_ctor (c, iv, x);
5076 gimplify_and_add (x, ilist);
5077 gimple_stmt_iterator gsi
5078 = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
5079 gassign *g
5080 = gimple_build_assign (unshare_expr (lvar), iv);
5081 gsi_insert_before_without_update (&gsi, g,
5082 GSI_SAME_STMT);
5083 tree t = OMP_CLAUSE_LINEAR_STEP (c);
5084 enum tree_code code = PLUS_EXPR;
5085 if (POINTER_TYPE_P (TREE_TYPE (new_var)))
5086 code = POINTER_PLUS_EXPR;
5087 g = gimple_build_assign (iv, code, iv, t);
5088 gsi_insert_before_without_update (&gsi, g,
5089 GSI_SAME_STMT);
5090 break;
5092 x = lang_hooks.decls.omp_clause_copy_ctor
5093 (c, unshare_expr (ivar), x);
5094 gimplify_and_add (x, &llist[0]);
5095 x = lang_hooks.decls.omp_clause_dtor (c, ivar);
5096 if (x)
5098 gimple_seq tseq = NULL;
5100 dtor = x;
5101 gimplify_stmt (&dtor, &tseq);
5102 gimple_seq_add_seq (&llist[1], tseq);
5104 break;
5107 x = lang_hooks.decls.omp_clause_copy_ctor
5108 (c, unshare_expr (new_var), x);
5109 gimplify_and_add (x, ilist);
5110 goto do_dtor;
5112 case OMP_CLAUSE__LOOPTEMP_:
5113 gcc_assert (is_taskreg_ctx (ctx));
5114 x = build_outer_var_ref (var, ctx);
5115 x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
5116 gimplify_and_add (x, ilist);
5117 break;
5119 case OMP_CLAUSE_COPYIN:
5120 by_ref = use_pointer_for_field (var, NULL);
5121 x = build_receiver_ref (var, by_ref, ctx);
5122 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
5123 append_to_statement_list (x, &copyin_seq);
5124 copyin_by_ref |= by_ref;
5125 break;
5127 case OMP_CLAUSE_REDUCTION:
5128 /* OpenACC reductions are initialized using the
5129 GOACC_REDUCTION internal function. */
5130 if (is_gimple_omp_oacc (ctx->stmt))
5131 break;
5132 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
5134 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
5135 gimple *tseq;
5136 x = build_outer_var_ref (var, ctx);
5138 if (is_reference (var)
5139 && !useless_type_conversion_p (TREE_TYPE (placeholder),
5140 TREE_TYPE (x)))
5141 x = build_fold_addr_expr_loc (clause_loc, x);
5142 SET_DECL_VALUE_EXPR (placeholder, x);
5143 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
5144 tree new_vard = new_var;
5145 if (is_reference (var))
5147 gcc_assert (TREE_CODE (new_var) == MEM_REF);
5148 new_vard = TREE_OPERAND (new_var, 0);
5149 gcc_assert (DECL_P (new_vard));
5151 if (is_simd
5152 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5153 idx, lane, ivar, lvar))
5155 if (new_vard == new_var)
5157 gcc_assert (DECL_VALUE_EXPR (new_var) == lvar);
5158 SET_DECL_VALUE_EXPR (new_var, ivar);
5160 else
5162 SET_DECL_VALUE_EXPR (new_vard,
5163 build_fold_addr_expr (ivar));
5164 DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
5166 x = lang_hooks.decls.omp_clause_default_ctor
5167 (c, unshare_expr (ivar),
5168 build_outer_var_ref (var, ctx));
5169 if (x)
5170 gimplify_and_add (x, &llist[0]);
5171 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
5173 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
5174 lower_omp (&tseq, ctx);
5175 gimple_seq_add_seq (&llist[0], tseq);
5177 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
5178 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
5179 lower_omp (&tseq, ctx);
5180 gimple_seq_add_seq (&llist[1], tseq);
5181 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5182 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
5183 if (new_vard == new_var)
5184 SET_DECL_VALUE_EXPR (new_var, lvar);
5185 else
5186 SET_DECL_VALUE_EXPR (new_vard,
5187 build_fold_addr_expr (lvar));
5188 x = lang_hooks.decls.omp_clause_dtor (c, ivar);
5189 if (x)
5191 tseq = NULL;
5192 dtor = x;
5193 gimplify_stmt (&dtor, &tseq);
5194 gimple_seq_add_seq (&llist[1], tseq);
5196 break;
5198 /* If this is a reference to constant size reduction var
5199 with placeholder, we haven't emitted the initializer
5200 for it because it is undesirable if SIMD arrays are used.
5201 But if they aren't used, we need to emit the deferred
5202 initialization now. */
5203 else if (is_reference (var) && is_simd)
5204 handle_simd_reference (clause_loc, new_vard, ilist);
5205 x = lang_hooks.decls.omp_clause_default_ctor
5206 (c, unshare_expr (new_var),
5207 build_outer_var_ref (var, ctx));
5208 if (x)
5209 gimplify_and_add (x, ilist);
5210 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
5212 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
5213 lower_omp (&tseq, ctx);
5214 gimple_seq_add_seq (ilist, tseq);
5216 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
5217 if (is_simd)
5219 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
5220 lower_omp (&tseq, ctx);
5221 gimple_seq_add_seq (dlist, tseq);
5222 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5224 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
5225 goto do_dtor;
5227 else
5229 x = omp_reduction_init (c, TREE_TYPE (new_var));
5230 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
5231 enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
5233 /* reduction(-:var) sums up the partial results, so it
5234 acts identically to reduction(+:var). */
5235 if (code == MINUS_EXPR)
5236 code = PLUS_EXPR;
5238 tree new_vard = new_var;
5239 if (is_simd && is_reference (var))
5241 gcc_assert (TREE_CODE (new_var) == MEM_REF);
5242 new_vard = TREE_OPERAND (new_var, 0);
5243 gcc_assert (DECL_P (new_vard));
5245 if (is_simd
5246 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5247 idx, lane, ivar, lvar))
5249 tree ref = build_outer_var_ref (var, ctx);
5251 gimplify_assign (unshare_expr (ivar), x, &llist[0]);
5253 x = build2 (code, TREE_TYPE (ref), ref, ivar);
5254 ref = build_outer_var_ref (var, ctx);
5255 gimplify_assign (ref, x, &llist[1]);
5257 if (new_vard != new_var)
5259 SET_DECL_VALUE_EXPR (new_vard,
5260 build_fold_addr_expr (lvar));
5261 DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
5264 else
5266 if (is_reference (var) && is_simd)
5267 handle_simd_reference (clause_loc, new_vard, ilist);
5268 gimplify_assign (new_var, x, ilist);
5269 if (is_simd)
5271 tree ref = build_outer_var_ref (var, ctx);
5273 x = build2 (code, TREE_TYPE (ref), ref, new_var);
5274 ref = build_outer_var_ref (var, ctx);
5275 gimplify_assign (ref, x, dlist);
5279 break;
5281 default:
5282 gcc_unreachable ();
5287 if (lane)
5289 tree uid = create_tmp_var (ptr_type_node, "simduid");
5290 /* Don't want uninit warnings on simduid, it is always uninitialized,
5291 but we use it not for the value, but for the DECL_UID only. */
5292 TREE_NO_WARNING (uid) = 1;
5293 gimple *g
5294 = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
5295 gimple_call_set_lhs (g, lane);
5296 gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
5297 gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
5298 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
5299 OMP_CLAUSE__SIMDUID__DECL (c) = uid;
5300 OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
5301 gimple_omp_for_set_clauses (ctx->stmt, c);
5302 g = gimple_build_assign (lane, INTEGER_CST,
5303 build_int_cst (unsigned_type_node, 0));
5304 gimple_seq_add_stmt (ilist, g);
5305 for (int i = 0; i < 2; i++)
5306 if (llist[i])
5308 tree vf = create_tmp_var (unsigned_type_node);
5309 g = gimple_build_call_internal (IFN_GOMP_SIMD_VF, 1, uid);
5310 gimple_call_set_lhs (g, vf);
5311 gimple_seq *seq = i == 0 ? ilist : dlist;
5312 gimple_seq_add_stmt (seq, g);
5313 tree t = build_int_cst (unsigned_type_node, 0);
5314 g = gimple_build_assign (idx, INTEGER_CST, t);
5315 gimple_seq_add_stmt (seq, g);
5316 tree body = create_artificial_label (UNKNOWN_LOCATION);
5317 tree header = create_artificial_label (UNKNOWN_LOCATION);
5318 tree end = create_artificial_label (UNKNOWN_LOCATION);
5319 gimple_seq_add_stmt (seq, gimple_build_goto (header));
5320 gimple_seq_add_stmt (seq, gimple_build_label (body));
5321 gimple_seq_add_seq (seq, llist[i]);
5322 t = build_int_cst (unsigned_type_node, 1);
5323 g = gimple_build_assign (idx, PLUS_EXPR, idx, t);
5324 gimple_seq_add_stmt (seq, g);
5325 gimple_seq_add_stmt (seq, gimple_build_label (header));
5326 g = gimple_build_cond (LT_EXPR, idx, vf, body, end);
5327 gimple_seq_add_stmt (seq, g);
5328 gimple_seq_add_stmt (seq, gimple_build_label (end));
5332 /* The copyin sequence is not to be executed by the main thread, since
5333 that would result in self-copies. Perhaps not visible to scalars,
5334 but it certainly is to C++ operator=. */
5335 if (copyin_seq)
5337 x = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM),
5339 x = build2 (NE_EXPR, boolean_type_node, x,
5340 build_int_cst (TREE_TYPE (x), 0));
5341 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
5342 gimplify_and_add (x, ilist);
5345 /* If any copyin variable is passed by reference, we must ensure the
5346 master thread doesn't modify it before it is copied over in all
5347 threads. Similarly for variables in both firstprivate and
5348 lastprivate clauses we need to ensure the lastprivate copying
5349 happens after firstprivate copying in all threads. And similarly
5350 for UDRs if initializer expression refers to omp_orig. */
5351 if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref)
5353 /* Don't add any barrier for #pragma omp simd or
5354 #pragma omp distribute. */
5355 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
5356 || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR)
5357 gimple_seq_add_stmt (ilist, build_omp_barrier (NULL_TREE));
5360 /* If max_vf is non-zero, then we can use only a vectorization factor
5361 up to the max_vf we chose. So stick it into the safelen clause. */
5362 if (max_vf)
5364 tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
5365 OMP_CLAUSE_SAFELEN);
5366 if (c == NULL_TREE
5367 || (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) == INTEGER_CST
5368 && compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c),
5369 max_vf) == 1))
5371 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
5372 OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node,
5373 max_vf);
5374 OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
5375 gimple_omp_for_set_clauses (ctx->stmt, c);
5381 /* Generate code to implement the LASTPRIVATE clauses. This is used for
5382 both parallel and workshare constructs. PREDICATE may be NULL if it's
5383 always true. */
5385 static void
5386 lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
5387 omp_context *ctx)
5389 tree x, c, label = NULL, orig_clauses = clauses;
5390 bool par_clauses = false;
5391 tree simduid = NULL, lastlane = NULL;
5393 /* Early exit if there are no lastprivate or linear clauses. */
5394 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
5395 if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LASTPRIVATE
5396 || (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LINEAR
5397 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (clauses)))
5398 break;
5399 if (clauses == NULL)
5401 /* If this was a workshare clause, see if it had been combined
5402 with its parallel. In that case, look for the clauses on the
5403 parallel statement itself. */
5404 if (is_parallel_ctx (ctx))
5405 return;
5407 ctx = ctx->outer;
5408 if (ctx == NULL || !is_parallel_ctx (ctx))
5409 return;
5411 clauses = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
5412 OMP_CLAUSE_LASTPRIVATE);
5413 if (clauses == NULL)
5414 return;
5415 par_clauses = true;
5418 if (predicate)
5420 gcond *stmt;
5421 tree label_true, arm1, arm2;
5423 label = create_artificial_label (UNKNOWN_LOCATION);
5424 label_true = create_artificial_label (UNKNOWN_LOCATION);
5425 arm1 = TREE_OPERAND (predicate, 0);
5426 arm2 = TREE_OPERAND (predicate, 1);
5427 gimplify_expr (&arm1, stmt_list, NULL, is_gimple_val, fb_rvalue);
5428 gimplify_expr (&arm2, stmt_list, NULL, is_gimple_val, fb_rvalue);
5429 stmt = gimple_build_cond (TREE_CODE (predicate), arm1, arm2,
5430 label_true, label);
5431 gimple_seq_add_stmt (stmt_list, stmt);
5432 gimple_seq_add_stmt (stmt_list, gimple_build_label (label_true));
5435 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
5436 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
5438 simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_);
5439 if (simduid)
5440 simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5443 for (c = clauses; c ;)
5445 tree var, new_var;
5446 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
5448 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5449 || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5450 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)))
5452 var = OMP_CLAUSE_DECL (c);
5453 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5454 && OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)
5455 && is_taskloop_ctx (ctx))
5457 gcc_checking_assert (ctx->outer && is_task_ctx (ctx->outer));
5458 new_var = lookup_decl (var, ctx->outer);
5460 else
5461 new_var = lookup_decl (var, ctx);
5463 if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
5465 tree val = DECL_VALUE_EXPR (new_var);
5466 if (TREE_CODE (val) == ARRAY_REF
5467 && VAR_P (TREE_OPERAND (val, 0))
5468 && lookup_attribute ("omp simd array",
5469 DECL_ATTRIBUTES (TREE_OPERAND (val,
5470 0))))
5472 if (lastlane == NULL)
5474 lastlane = create_tmp_var (unsigned_type_node);
5475 gcall *g
5476 = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
5477 2, simduid,
5478 TREE_OPERAND (val, 1));
5479 gimple_call_set_lhs (g, lastlane);
5480 gimple_seq_add_stmt (stmt_list, g);
5482 new_var = build4 (ARRAY_REF, TREE_TYPE (val),
5483 TREE_OPERAND (val, 0), lastlane,
5484 NULL_TREE, NULL_TREE);
5488 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5489 && OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
5491 lower_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
5492 gimple_seq_add_seq (stmt_list,
5493 OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
5494 OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c) = NULL;
5496 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5497 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
5499 lower_omp (&OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c), ctx);
5500 gimple_seq_add_seq (stmt_list,
5501 OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c));
5502 OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c) = NULL;
5505 x = NULL_TREE;
5506 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5507 && OMP_CLAUSE_LASTPRIVATE_TASKLOOP_IV (c))
5509 gcc_checking_assert (is_taskloop_ctx (ctx));
5510 tree ovar = maybe_lookup_decl_in_outer_ctx (var,
5511 ctx->outer->outer);
5512 if (is_global_var (ovar))
5513 x = ovar;
5515 if (!x)
5516 x = build_outer_var_ref (var, ctx, true);
5517 if (is_reference (var))
5518 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5519 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
5520 gimplify_and_add (x, stmt_list);
5522 c = OMP_CLAUSE_CHAIN (c);
5523 if (c == NULL && !par_clauses)
5525 /* If this was a workshare clause, see if it had been combined
5526 with its parallel. In that case, continue looking for the
5527 clauses also on the parallel statement itself. */
5528 if (is_parallel_ctx (ctx))
5529 break;
5531 ctx = ctx->outer;
5532 if (ctx == NULL || !is_parallel_ctx (ctx))
5533 break;
5535 c = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
5536 OMP_CLAUSE_LASTPRIVATE);
5537 par_clauses = true;
5541 if (label)
5542 gimple_seq_add_stmt (stmt_list, gimple_build_label (label));
5545 /* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL
5546 (which might be a placeholder). INNER is true if this is an inner
5547 axis of a multi-axis loop. FORK and JOIN are (optional) fork and
5548 join markers. Generate the before-loop forking sequence in
5549 FORK_SEQ and the after-loop joining sequence to JOIN_SEQ. The
5550 general form of these sequences is
5552 GOACC_REDUCTION_SETUP
5553 GOACC_FORK
5554 GOACC_REDUCTION_INIT
5556 GOACC_REDUCTION_FINI
5557 GOACC_JOIN
5558 GOACC_REDUCTION_TEARDOWN. */
5560 static void
5561 lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
5562 gcall *fork, gcall *join, gimple_seq *fork_seq,
5563 gimple_seq *join_seq, omp_context *ctx)
5565 gimple_seq before_fork = NULL;
5566 gimple_seq after_fork = NULL;
5567 gimple_seq before_join = NULL;
5568 gimple_seq after_join = NULL;
5569 tree init_code = NULL_TREE, fini_code = NULL_TREE,
5570 setup_code = NULL_TREE, teardown_code = NULL_TREE;
5571 unsigned offset = 0;
5573 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5574 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
5576 tree orig = OMP_CLAUSE_DECL (c);
5577 tree var = maybe_lookup_decl (orig, ctx);
5578 tree ref_to_res = NULL_TREE;
5579 tree incoming, outgoing, v1, v2, v3;
5580 bool is_private = false;
5582 enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
5583 if (rcode == MINUS_EXPR)
5584 rcode = PLUS_EXPR;
5585 else if (rcode == TRUTH_ANDIF_EXPR)
5586 rcode = BIT_AND_EXPR;
5587 else if (rcode == TRUTH_ORIF_EXPR)
5588 rcode = BIT_IOR_EXPR;
5589 tree op = build_int_cst (unsigned_type_node, rcode);
5591 if (!var)
5592 var = orig;
5594 incoming = outgoing = var;
5596 if (!inner)
5598 /* See if an outer construct also reduces this variable. */
5599 omp_context *outer = ctx;
5601 while (omp_context *probe = outer->outer)
5603 enum gimple_code type = gimple_code (probe->stmt);
5604 tree cls;
5606 switch (type)
5608 case GIMPLE_OMP_FOR:
5609 cls = gimple_omp_for_clauses (probe->stmt);
5610 break;
5612 case GIMPLE_OMP_TARGET:
5613 if (gimple_omp_target_kind (probe->stmt)
5614 != GF_OMP_TARGET_KIND_OACC_PARALLEL)
5615 goto do_lookup;
5617 cls = gimple_omp_target_clauses (probe->stmt);
5618 break;
5620 default:
5621 goto do_lookup;
5624 outer = probe;
5625 for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
5626 if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
5627 && orig == OMP_CLAUSE_DECL (cls))
5629 incoming = outgoing = lookup_decl (orig, probe);
5630 goto has_outer_reduction;
5632 else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
5633 || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
5634 && orig == OMP_CLAUSE_DECL (cls))
5636 is_private = true;
5637 goto do_lookup;
5641 do_lookup:
5642 /* This is the outermost construct with this reduction,
5643 see if there's a mapping for it. */
5644 if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
5645 && maybe_lookup_field (orig, outer) && !is_private)
5647 ref_to_res = build_receiver_ref (orig, false, outer);
5648 if (is_reference (orig))
5649 ref_to_res = build_simple_mem_ref (ref_to_res);
5651 tree type = TREE_TYPE (var);
5652 if (POINTER_TYPE_P (type))
5653 type = TREE_TYPE (type);
5655 outgoing = var;
5656 incoming = omp_reduction_init_op (loc, rcode, type);
5658 else if (ctx->outer)
5659 incoming = outgoing = lookup_decl (orig, ctx->outer);
5660 else
5661 incoming = outgoing = orig;
5663 has_outer_reduction:;
5666 if (!ref_to_res)
5667 ref_to_res = integer_zero_node;
5669 if (is_reference (orig))
5671 tree type = TREE_TYPE (var);
5672 const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
5674 if (!inner)
5676 tree x = create_tmp_var (TREE_TYPE (type), id);
5677 gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
5680 v1 = create_tmp_var (type, id);
5681 v2 = create_tmp_var (type, id);
5682 v3 = create_tmp_var (type, id);
5684 gimplify_assign (v1, var, fork_seq);
5685 gimplify_assign (v2, var, fork_seq);
5686 gimplify_assign (v3, var, fork_seq);
5688 var = build_simple_mem_ref (var);
5689 v1 = build_simple_mem_ref (v1);
5690 v2 = build_simple_mem_ref (v2);
5691 v3 = build_simple_mem_ref (v3);
5692 outgoing = build_simple_mem_ref (outgoing);
5694 if (TREE_CODE (incoming) != INTEGER_CST)
5695 incoming = build_simple_mem_ref (incoming);
5697 else
5698 v1 = v2 = v3 = var;
5700 /* Determine position in reduction buffer, which may be used
5701 by target. */
5702 enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
5703 unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
5704 offset = (offset + align - 1) & ~(align - 1);
5705 tree off = build_int_cst (sizetype, offset);
5706 offset += GET_MODE_SIZE (mode);
5708 if (!init_code)
5710 init_code = build_int_cst (integer_type_node,
5711 IFN_GOACC_REDUCTION_INIT);
5712 fini_code = build_int_cst (integer_type_node,
5713 IFN_GOACC_REDUCTION_FINI);
5714 setup_code = build_int_cst (integer_type_node,
5715 IFN_GOACC_REDUCTION_SETUP);
5716 teardown_code = build_int_cst (integer_type_node,
5717 IFN_GOACC_REDUCTION_TEARDOWN);
5720 tree setup_call
5721 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5722 TREE_TYPE (var), 6, setup_code,
5723 unshare_expr (ref_to_res),
5724 incoming, level, op, off);
5725 tree init_call
5726 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5727 TREE_TYPE (var), 6, init_code,
5728 unshare_expr (ref_to_res),
5729 v1, level, op, off);
5730 tree fini_call
5731 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5732 TREE_TYPE (var), 6, fini_code,
5733 unshare_expr (ref_to_res),
5734 v2, level, op, off);
5735 tree teardown_call
5736 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5737 TREE_TYPE (var), 6, teardown_code,
5738 ref_to_res, v3, level, op, off);
5740 gimplify_assign (v1, setup_call, &before_fork);
5741 gimplify_assign (v2, init_call, &after_fork);
5742 gimplify_assign (v3, fini_call, &before_join);
5743 gimplify_assign (outgoing, teardown_call, &after_join);
5746 /* Now stitch things together. */
5747 gimple_seq_add_seq (fork_seq, before_fork);
5748 if (fork)
5749 gimple_seq_add_stmt (fork_seq, fork);
5750 gimple_seq_add_seq (fork_seq, after_fork);
5752 gimple_seq_add_seq (join_seq, before_join);
5753 if (join)
5754 gimple_seq_add_stmt (join_seq, join);
5755 gimple_seq_add_seq (join_seq, after_join);
5758 /* Generate code to implement the REDUCTION clauses. */
5760 static void
5761 lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
5763 gimple_seq sub_seq = NULL;
5764 gimple *stmt;
5765 tree x, c;
5766 int count = 0;
5768 /* OpenACC loop reductions are handled elsewhere. */
5769 if (is_gimple_omp_oacc (ctx->stmt))
5770 return;
5772 /* SIMD reductions are handled in lower_rec_input_clauses. */
5773 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
5774 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
5775 return;
5777 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
5778 update in that case, otherwise use a lock. */
5779 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
5780 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
5782 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
5783 || TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
5785 /* Never use OMP_ATOMIC for array reductions or UDRs. */
5786 count = -1;
5787 break;
5789 count++;
5792 if (count == 0)
5793 return;
5795 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
5797 tree var, ref, new_var, orig_var;
5798 enum tree_code code;
5799 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
5801 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
5802 continue;
5804 orig_var = var = OMP_CLAUSE_DECL (c);
5805 if (TREE_CODE (var) == MEM_REF)
5807 var = TREE_OPERAND (var, 0);
5808 if (TREE_CODE (var) == POINTER_PLUS_EXPR)
5809 var = TREE_OPERAND (var, 0);
5810 if (TREE_CODE (var) == INDIRECT_REF
5811 || TREE_CODE (var) == ADDR_EXPR)
5812 var = TREE_OPERAND (var, 0);
5813 orig_var = var;
5814 if (is_variable_sized (var))
5816 gcc_assert (DECL_HAS_VALUE_EXPR_P (var));
5817 var = DECL_VALUE_EXPR (var);
5818 gcc_assert (TREE_CODE (var) == INDIRECT_REF);
5819 var = TREE_OPERAND (var, 0);
5820 gcc_assert (DECL_P (var));
5823 new_var = lookup_decl (var, ctx);
5824 if (var == OMP_CLAUSE_DECL (c) && is_reference (var))
5825 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5826 ref = build_outer_var_ref (var, ctx);
5827 code = OMP_CLAUSE_REDUCTION_CODE (c);
5829 /* reduction(-:var) sums up the partial results, so it acts
5830 identically to reduction(+:var). */
5831 if (code == MINUS_EXPR)
5832 code = PLUS_EXPR;
5834 if (count == 1)
5836 tree addr = build_fold_addr_expr_loc (clause_loc, ref);
5838 addr = save_expr (addr);
5839 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
5840 x = fold_build2_loc (clause_loc, code, TREE_TYPE (ref), ref, new_var);
5841 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
5842 gimplify_and_add (x, stmt_seqp);
5843 return;
5845 else if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
5847 tree d = OMP_CLAUSE_DECL (c);
5848 tree type = TREE_TYPE (d);
5849 tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
5850 tree i = create_tmp_var (TREE_TYPE (v), NULL);
5851 tree ptype = build_pointer_type (TREE_TYPE (type));
5852 tree bias = TREE_OPERAND (d, 1);
5853 d = TREE_OPERAND (d, 0);
5854 if (TREE_CODE (d) == POINTER_PLUS_EXPR)
5856 tree b = TREE_OPERAND (d, 1);
5857 b = maybe_lookup_decl (b, ctx);
5858 if (b == NULL)
5860 b = TREE_OPERAND (d, 1);
5861 b = maybe_lookup_decl_in_outer_ctx (b, ctx);
5863 if (integer_zerop (bias))
5864 bias = b;
5865 else
5867 bias = fold_convert_loc (clause_loc, TREE_TYPE (b), bias);
5868 bias = fold_build2_loc (clause_loc, PLUS_EXPR,
5869 TREE_TYPE (b), b, bias);
5871 d = TREE_OPERAND (d, 0);
5873 /* For ref build_outer_var_ref already performs this, so
5874 only new_var needs a dereference. */
5875 if (TREE_CODE (d) == INDIRECT_REF)
5877 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5878 gcc_assert (is_reference (var) && var == orig_var);
5880 else if (TREE_CODE (d) == ADDR_EXPR)
5882 if (orig_var == var)
5884 new_var = build_fold_addr_expr (new_var);
5885 ref = build_fold_addr_expr (ref);
5888 else
5890 gcc_assert (orig_var == var);
5891 if (is_reference (var))
5892 ref = build_fold_addr_expr (ref);
5894 if (DECL_P (v))
5896 tree t = maybe_lookup_decl (v, ctx);
5897 if (t)
5898 v = t;
5899 else
5900 v = maybe_lookup_decl_in_outer_ctx (v, ctx);
5901 gimplify_expr (&v, stmt_seqp, NULL, is_gimple_val, fb_rvalue);
5903 if (!integer_zerop (bias))
5905 bias = fold_convert_loc (clause_loc, sizetype, bias);
5906 new_var = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
5907 TREE_TYPE (new_var), new_var,
5908 unshare_expr (bias));
5909 ref = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
5910 TREE_TYPE (ref), ref, bias);
5912 new_var = fold_convert_loc (clause_loc, ptype, new_var);
5913 ref = fold_convert_loc (clause_loc, ptype, ref);
5914 tree m = create_tmp_var (ptype, NULL);
5915 gimplify_assign (m, new_var, stmt_seqp);
5916 new_var = m;
5917 m = create_tmp_var (ptype, NULL);
5918 gimplify_assign (m, ref, stmt_seqp);
5919 ref = m;
5920 gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), stmt_seqp);
5921 tree body = create_artificial_label (UNKNOWN_LOCATION);
5922 tree end = create_artificial_label (UNKNOWN_LOCATION);
5923 gimple_seq_add_stmt (&sub_seq, gimple_build_label (body));
5924 tree priv = build_simple_mem_ref_loc (clause_loc, new_var);
5925 tree out = build_simple_mem_ref_loc (clause_loc, ref);
5926 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
5928 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
5929 tree decl_placeholder
5930 = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
5931 SET_DECL_VALUE_EXPR (placeholder, out);
5932 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
5933 SET_DECL_VALUE_EXPR (decl_placeholder, priv);
5934 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 1;
5935 lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
5936 gimple_seq_add_seq (&sub_seq,
5937 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c));
5938 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5939 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
5940 OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL;
5942 else
5944 x = build2 (code, TREE_TYPE (out), out, priv);
5945 out = unshare_expr (out);
5946 gimplify_assign (out, x, &sub_seq);
5948 gimple *g = gimple_build_assign (new_var, POINTER_PLUS_EXPR, new_var,
5949 TYPE_SIZE_UNIT (TREE_TYPE (type)));
5950 gimple_seq_add_stmt (&sub_seq, g);
5951 g = gimple_build_assign (ref, POINTER_PLUS_EXPR, ref,
5952 TYPE_SIZE_UNIT (TREE_TYPE (type)));
5953 gimple_seq_add_stmt (&sub_seq, g);
5954 g = gimple_build_assign (i, PLUS_EXPR, i,
5955 build_int_cst (TREE_TYPE (i), 1));
5956 gimple_seq_add_stmt (&sub_seq, g);
5957 g = gimple_build_cond (LE_EXPR, i, v, body, end);
5958 gimple_seq_add_stmt (&sub_seq, g);
5959 gimple_seq_add_stmt (&sub_seq, gimple_build_label (end));
5961 else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
5963 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
5965 if (is_reference (var)
5966 && !useless_type_conversion_p (TREE_TYPE (placeholder),
5967 TREE_TYPE (ref)))
5968 ref = build_fold_addr_expr_loc (clause_loc, ref);
5969 SET_DECL_VALUE_EXPR (placeholder, ref);
5970 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
5971 lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
5972 gimple_seq_add_seq (&sub_seq, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c));
5973 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5974 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
5976 else
5978 x = build2 (code, TREE_TYPE (ref), ref, new_var);
5979 ref = build_outer_var_ref (var, ctx);
5980 gimplify_assign (ref, x, &sub_seq);
5984 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START),
5986 gimple_seq_add_stmt (stmt_seqp, stmt);
5988 gimple_seq_add_seq (stmt_seqp, sub_seq);
5990 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END),
5992 gimple_seq_add_stmt (stmt_seqp, stmt);
5996 /* Generate code to implement the COPYPRIVATE clauses. */
5998 static void
5999 lower_copyprivate_clauses (tree clauses, gimple_seq *slist, gimple_seq *rlist,
6000 omp_context *ctx)
6002 tree c;
6004 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
6006 tree var, new_var, ref, x;
6007 bool by_ref;
6008 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
6010 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
6011 continue;
6013 var = OMP_CLAUSE_DECL (c);
6014 by_ref = use_pointer_for_field (var, NULL);
6016 ref = build_sender_ref (var, ctx);
6017 x = new_var = lookup_decl_in_outer_ctx (var, ctx);
6018 if (by_ref)
6020 x = build_fold_addr_expr_loc (clause_loc, new_var);
6021 x = fold_convert_loc (clause_loc, TREE_TYPE (ref), x);
6023 gimplify_assign (ref, x, slist);
6025 ref = build_receiver_ref (var, false, ctx);
6026 if (by_ref)
6028 ref = fold_convert_loc (clause_loc,
6029 build_pointer_type (TREE_TYPE (new_var)),
6030 ref);
6031 ref = build_fold_indirect_ref_loc (clause_loc, ref);
6033 if (is_reference (var))
6035 ref = fold_convert_loc (clause_loc, TREE_TYPE (new_var), ref);
6036 ref = build_simple_mem_ref_loc (clause_loc, ref);
6037 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
6039 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, ref);
6040 gimplify_and_add (x, rlist);
6045 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
6046 and REDUCTION from the sender (aka parent) side. */
6048 static void
6049 lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
6050 omp_context *ctx)
6052 tree c, t;
6053 int ignored_looptemp = 0;
6054 bool is_taskloop = false;
6056 /* For taskloop, ignore first two _looptemp_ clauses, those are initialized
6057 by GOMP_taskloop. */
6058 if (is_task_ctx (ctx) && gimple_omp_task_taskloop_p (ctx->stmt))
6060 ignored_looptemp = 2;
6061 is_taskloop = true;
6064 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
6066 tree val, ref, x, var;
6067 bool by_ref, do_in = false, do_out = false;
6068 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
6070 switch (OMP_CLAUSE_CODE (c))
6072 case OMP_CLAUSE_PRIVATE:
6073 if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
6074 break;
6075 continue;
6076 case OMP_CLAUSE_FIRSTPRIVATE:
6077 case OMP_CLAUSE_COPYIN:
6078 case OMP_CLAUSE_LASTPRIVATE:
6079 case OMP_CLAUSE_REDUCTION:
6080 break;
6081 case OMP_CLAUSE_SHARED:
6082 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
6083 break;
6084 continue;
6085 case OMP_CLAUSE__LOOPTEMP_:
6086 if (ignored_looptemp)
6088 ignored_looptemp--;
6089 continue;
6091 break;
6092 default:
6093 continue;
6096 val = OMP_CLAUSE_DECL (c);
6097 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
6098 && TREE_CODE (val) == MEM_REF)
6100 val = TREE_OPERAND (val, 0);
6101 if (TREE_CODE (val) == POINTER_PLUS_EXPR)
6102 val = TREE_OPERAND (val, 0);
6103 if (TREE_CODE (val) == INDIRECT_REF
6104 || TREE_CODE (val) == ADDR_EXPR)
6105 val = TREE_OPERAND (val, 0);
6106 if (is_variable_sized (val))
6107 continue;
6110 /* For OMP_CLAUSE_SHARED_FIRSTPRIVATE, look beyond the
6111 outer taskloop region. */
6112 omp_context *ctx_for_o = ctx;
6113 if (is_taskloop
6114 && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
6115 && OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
6116 ctx_for_o = ctx->outer;
6118 var = lookup_decl_in_outer_ctx (val, ctx_for_o);
6120 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
6121 && is_global_var (var))
6122 continue;
6124 t = omp_member_access_dummy_var (var);
6125 if (t)
6127 var = DECL_VALUE_EXPR (var);
6128 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx_for_o);
6129 if (o != t)
6130 var = unshare_and_remap (var, t, o);
6131 else
6132 var = unshare_expr (var);
6135 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
6137 /* Handle taskloop firstprivate/lastprivate, where the
6138 lastprivate on GIMPLE_OMP_TASK is represented as
6139 OMP_CLAUSE_SHARED_FIRSTPRIVATE. */
6140 tree f = lookup_sfield ((splay_tree_key) &DECL_UID (val), ctx);
6141 x = omp_build_component_ref (ctx->sender_decl, f);
6142 if (use_pointer_for_field (val, ctx))
6143 var = build_fold_addr_expr (var);
6144 gimplify_assign (x, var, ilist);
6145 DECL_ABSTRACT_ORIGIN (f) = NULL;
6146 continue;
6149 if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION
6150 || val == OMP_CLAUSE_DECL (c))
6151 && is_variable_sized (val))
6152 continue;
6153 by_ref = use_pointer_for_field (val, NULL);
6155 switch (OMP_CLAUSE_CODE (c))
6157 case OMP_CLAUSE_FIRSTPRIVATE:
6158 if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)
6159 && !by_ref
6160 && is_task_ctx (ctx))
6161 TREE_NO_WARNING (var) = 1;
6162 do_in = true;
6163 break;
6165 case OMP_CLAUSE_PRIVATE:
6166 case OMP_CLAUSE_COPYIN:
6167 case OMP_CLAUSE__LOOPTEMP_:
6168 do_in = true;
6169 break;
6171 case OMP_CLAUSE_LASTPRIVATE:
6172 if (by_ref || is_reference (val))
6174 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
6175 continue;
6176 do_in = true;
6178 else
6180 do_out = true;
6181 if (lang_hooks.decls.omp_private_outer_ref (val))
6182 do_in = true;
6184 break;
6186 case OMP_CLAUSE_REDUCTION:
6187 do_in = true;
6188 if (val == OMP_CLAUSE_DECL (c))
6189 do_out = !(by_ref || is_reference (val));
6190 else
6191 by_ref = TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE;
6192 break;
6194 default:
6195 gcc_unreachable ();
6198 if (do_in)
6200 ref = build_sender_ref (val, ctx);
6201 x = by_ref ? build_fold_addr_expr_loc (clause_loc, var) : var;
6202 gimplify_assign (ref, x, ilist);
6203 if (is_task_ctx (ctx))
6204 DECL_ABSTRACT_ORIGIN (TREE_OPERAND (ref, 1)) = NULL;
6207 if (do_out)
6209 ref = build_sender_ref (val, ctx);
6210 gimplify_assign (var, ref, olist);
6215 /* Generate code to implement SHARED from the sender (aka parent)
6216 side. This is trickier, since GIMPLE_OMP_PARALLEL_CLAUSES doesn't
6217 list things that got automatically shared. */
6219 static void
6220 lower_send_shared_vars (gimple_seq *ilist, gimple_seq *olist, omp_context *ctx)
6222 tree var, ovar, nvar, t, f, x, record_type;
6224 if (ctx->record_type == NULL)
6225 return;
6227 record_type = ctx->srecord_type ? ctx->srecord_type : ctx->record_type;
6228 for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
6230 ovar = DECL_ABSTRACT_ORIGIN (f);
6231 if (!ovar || TREE_CODE (ovar) == FIELD_DECL)
6232 continue;
6234 nvar = maybe_lookup_decl (ovar, ctx);
6235 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
6236 continue;
6238 /* If CTX is a nested parallel directive. Find the immediately
6239 enclosing parallel or workshare construct that contains a
6240 mapping for OVAR. */
6241 var = lookup_decl_in_outer_ctx (ovar, ctx);
6243 t = omp_member_access_dummy_var (var);
6244 if (t)
6246 var = DECL_VALUE_EXPR (var);
6247 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx);
6248 if (o != t)
6249 var = unshare_and_remap (var, t, o);
6250 else
6251 var = unshare_expr (var);
6254 if (use_pointer_for_field (ovar, ctx))
6256 x = build_sender_ref (ovar, ctx);
6257 var = build_fold_addr_expr (var);
6258 gimplify_assign (x, var, ilist);
6260 else
6262 x = build_sender_ref (ovar, ctx);
6263 gimplify_assign (x, var, ilist);
6265 if (!TREE_READONLY (var)
6266 /* We don't need to receive a new reference to a result
6267 or parm decl. In fact we may not store to it as we will
6268 invalidate any pending RSO and generate wrong gimple
6269 during inlining. */
6270 && !((TREE_CODE (var) == RESULT_DECL
6271 || TREE_CODE (var) == PARM_DECL)
6272 && DECL_BY_REFERENCE (var)))
6274 x = build_sender_ref (ovar, ctx);
6275 gimplify_assign (var, x, olist);
6281 /* Emit an OpenACC head marker call, encapulating the partitioning and
6282 other information that must be processed by the target compiler.
6283 Return the maximum number of dimensions the associated loop might
6284 be partitioned over. */
6286 static unsigned
6287 lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
6288 gimple_seq *seq, omp_context *ctx)
6290 unsigned levels = 0;
6291 unsigned tag = 0;
6292 tree gang_static = NULL_TREE;
6293 auto_vec<tree, 5> args;
6295 args.quick_push (build_int_cst
6296 (integer_type_node, IFN_UNIQUE_OACC_HEAD_MARK));
6297 args.quick_push (ddvar);
6298 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6300 switch (OMP_CLAUSE_CODE (c))
6302 case OMP_CLAUSE_GANG:
6303 tag |= OLF_DIM_GANG;
6304 gang_static = OMP_CLAUSE_GANG_STATIC_EXPR (c);
6305 /* static:* is represented by -1, and we can ignore it, as
6306 scheduling is always static. */
6307 if (gang_static && integer_minus_onep (gang_static))
6308 gang_static = NULL_TREE;
6309 levels++;
6310 break;
6312 case OMP_CLAUSE_WORKER:
6313 tag |= OLF_DIM_WORKER;
6314 levels++;
6315 break;
6317 case OMP_CLAUSE_VECTOR:
6318 tag |= OLF_DIM_VECTOR;
6319 levels++;
6320 break;
6322 case OMP_CLAUSE_SEQ:
6323 tag |= OLF_SEQ;
6324 break;
6326 case OMP_CLAUSE_AUTO:
6327 tag |= OLF_AUTO;
6328 break;
6330 case OMP_CLAUSE_INDEPENDENT:
6331 tag |= OLF_INDEPENDENT;
6332 break;
6334 default:
6335 continue;
6339 if (gang_static)
6341 if (DECL_P (gang_static))
6342 gang_static = build_outer_var_ref (gang_static, ctx);
6343 tag |= OLF_GANG_STATIC;
6346 /* In a parallel region, loops are implicitly INDEPENDENT. */
6347 omp_context *tgt = enclosing_target_ctx (ctx);
6348 if (!tgt || is_oacc_parallel (tgt))
6349 tag |= OLF_INDEPENDENT;
6351 /* A loop lacking SEQ, GANG, WORKER and/or VECTOR is implicitly AUTO. */
6352 if (!(tag & (((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE)
6353 | OLF_SEQ)))
6354 tag |= OLF_AUTO;
6356 /* Ensure at least one level. */
6357 if (!levels)
6358 levels++;
6360 args.quick_push (build_int_cst (integer_type_node, levels));
6361 args.quick_push (build_int_cst (integer_type_node, tag));
6362 if (gang_static)
6363 args.quick_push (gang_static);
6365 gcall *call = gimple_build_call_internal_vec (IFN_UNIQUE, args);
6366 gimple_set_location (call, loc);
6367 gimple_set_lhs (call, ddvar);
6368 gimple_seq_add_stmt (seq, call);
6370 return levels;
6373 /* Emit an OpenACC lopp head or tail marker to SEQ. LEVEL is the
6374 partitioning level of the enclosed region. */
6376 static void
6377 lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
6378 tree tofollow, gimple_seq *seq)
6380 int marker_kind = (head ? IFN_UNIQUE_OACC_HEAD_MARK
6381 : IFN_UNIQUE_OACC_TAIL_MARK);
6382 tree marker = build_int_cst (integer_type_node, marker_kind);
6383 int nargs = 2 + (tofollow != NULL_TREE);
6384 gcall *call = gimple_build_call_internal (IFN_UNIQUE, nargs,
6385 marker, ddvar, tofollow);
6386 gimple_set_location (call, loc);
6387 gimple_set_lhs (call, ddvar);
6388 gimple_seq_add_stmt (seq, call);
6391 /* Generate the before and after OpenACC loop sequences. CLAUSES are
6392 the loop clauses, from which we extract reductions. Initialize
6393 HEAD and TAIL. */
6395 static void
6396 lower_oacc_head_tail (location_t loc, tree clauses,
6397 gimple_seq *head, gimple_seq *tail, omp_context *ctx)
6399 bool inner = false;
6400 tree ddvar = create_tmp_var (integer_type_node, ".data_dep");
6401 gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
6403 unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
6404 if (!count)
6405 lower_oacc_loop_marker (loc, ddvar, false, integer_zero_node, tail);
6407 tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
6408 tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
6410 for (unsigned done = 1; count; count--, done++)
6412 gimple_seq fork_seq = NULL;
6413 gimple_seq join_seq = NULL;
6415 tree place = build_int_cst (integer_type_node, -1);
6416 gcall *fork = gimple_build_call_internal (IFN_UNIQUE, 3,
6417 fork_kind, ddvar, place);
6418 gimple_set_location (fork, loc);
6419 gimple_set_lhs (fork, ddvar);
6421 gcall *join = gimple_build_call_internal (IFN_UNIQUE, 3,
6422 join_kind, ddvar, place);
6423 gimple_set_location (join, loc);
6424 gimple_set_lhs (join, ddvar);
6426 /* Mark the beginning of this level sequence. */
6427 if (inner)
6428 lower_oacc_loop_marker (loc, ddvar, true,
6429 build_int_cst (integer_type_node, count),
6430 &fork_seq);
6431 lower_oacc_loop_marker (loc, ddvar, false,
6432 build_int_cst (integer_type_node, done),
6433 &join_seq);
6435 lower_oacc_reductions (loc, clauses, place, inner,
6436 fork, join, &fork_seq, &join_seq, ctx);
6438 /* Append this level to head. */
6439 gimple_seq_add_seq (head, fork_seq);
6440 /* Prepend it to tail. */
6441 gimple_seq_add_seq (&join_seq, *tail);
6442 *tail = join_seq;
6444 inner = true;
6447 /* Mark the end of the sequence. */
6448 lower_oacc_loop_marker (loc, ddvar, true, NULL_TREE, head);
6449 lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail);
6452 /* A convenience function to build an empty GIMPLE_COND with just the
6453 condition. */
6455 static gcond *
6456 gimple_build_cond_empty (tree cond)
6458 enum tree_code pred_code;
6459 tree lhs, rhs;
6461 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
6462 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
6465 /* Return true if a parallel REGION is within a declare target function or
6466 within a target region and is not a part of a gridified target. */
6468 static bool
6469 parallel_needs_hsa_kernel_p (struct omp_region *region)
6471 bool indirect = false;
6472 for (region = region->outer; region; region = region->outer)
6474 if (region->type == GIMPLE_OMP_PARALLEL)
6475 indirect = true;
6476 else if (region->type == GIMPLE_OMP_TARGET)
6478 gomp_target *tgt_stmt
6479 = as_a <gomp_target *> (last_stmt (region->entry));
6481 if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
6482 OMP_CLAUSE__GRIDDIM_))
6483 return indirect;
6484 else
6485 return true;
6489 if (lookup_attribute ("omp declare target",
6490 DECL_ATTRIBUTES (current_function_decl)))
6491 return true;
6493 return false;
6496 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
6497 bool = false);
6499 /* Build the function calls to GOMP_parallel_start etc to actually
6500 generate the parallel operation. REGION is the parallel region
6501 being expanded. BB is the block where to insert the code. WS_ARGS
6502 will be set if this is a call to a combined parallel+workshare
6503 construct, it contains the list of additional arguments needed by
6504 the workshare construct. */
6506 static void
6507 expand_parallel_call (struct omp_region *region, basic_block bb,
6508 gomp_parallel *entry_stmt,
6509 vec<tree, va_gc> *ws_args)
6511 tree t, t1, t2, val, cond, c, clauses, flags;
6512 gimple_stmt_iterator gsi;
6513 gimple *stmt;
6514 enum built_in_function start_ix;
6515 int start_ix2;
6516 location_t clause_loc;
6517 vec<tree, va_gc> *args;
6519 clauses = gimple_omp_parallel_clauses (entry_stmt);
6521 /* Determine what flavor of GOMP_parallel we will be
6522 emitting. */
6523 start_ix = BUILT_IN_GOMP_PARALLEL;
6524 if (is_combined_parallel (region))
6526 switch (region->inner->type)
6528 case GIMPLE_OMP_FOR:
6529 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
6530 switch (region->inner->sched_kind)
6532 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6533 start_ix2 = 3;
6534 break;
6535 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6536 case OMP_CLAUSE_SCHEDULE_GUIDED:
6537 if (region->inner->sched_modifiers
6538 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
6540 start_ix2 = 3 + region->inner->sched_kind;
6541 break;
6543 /* FALLTHRU */
6544 default:
6545 start_ix2 = region->inner->sched_kind;
6546 break;
6548 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
6549 start_ix = (enum built_in_function) start_ix2;
6550 break;
6551 case GIMPLE_OMP_SECTIONS:
6552 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
6553 break;
6554 default:
6555 gcc_unreachable ();
6559 /* By default, the value of NUM_THREADS is zero (selected at run time)
6560 and there is no conditional. */
6561 cond = NULL_TREE;
6562 val = build_int_cst (unsigned_type_node, 0);
6563 flags = build_int_cst (unsigned_type_node, 0);
6565 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
6566 if (c)
6567 cond = OMP_CLAUSE_IF_EXPR (c);
6569 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
6570 if (c)
6572 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
6573 clause_loc = OMP_CLAUSE_LOCATION (c);
6575 else
6576 clause_loc = gimple_location (entry_stmt);
6578 c = find_omp_clause (clauses, OMP_CLAUSE_PROC_BIND);
6579 if (c)
6580 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
6582 /* Ensure 'val' is of the correct type. */
6583 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
6585 /* If we found the clause 'if (cond)', build either
6586 (cond != 0) or (cond ? val : 1u). */
6587 if (cond)
6589 cond = gimple_boolify (cond);
6591 if (integer_zerop (val))
6592 val = fold_build2_loc (clause_loc,
6593 EQ_EXPR, unsigned_type_node, cond,
6594 build_int_cst (TREE_TYPE (cond), 0));
6595 else
6597 basic_block cond_bb, then_bb, else_bb;
6598 edge e, e_then, e_else;
6599 tree tmp_then, tmp_else, tmp_join, tmp_var;
6601 tmp_var = create_tmp_var (TREE_TYPE (val));
6602 if (gimple_in_ssa_p (cfun))
6604 tmp_then = make_ssa_name (tmp_var);
6605 tmp_else = make_ssa_name (tmp_var);
6606 tmp_join = make_ssa_name (tmp_var);
6608 else
6610 tmp_then = tmp_var;
6611 tmp_else = tmp_var;
6612 tmp_join = tmp_var;
6615 e = split_block_after_labels (bb);
6616 cond_bb = e->src;
6617 bb = e->dest;
6618 remove_edge (e);
6620 then_bb = create_empty_bb (cond_bb);
6621 else_bb = create_empty_bb (then_bb);
6622 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
6623 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
6625 stmt = gimple_build_cond_empty (cond);
6626 gsi = gsi_start_bb (cond_bb);
6627 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
6629 gsi = gsi_start_bb (then_bb);
6630 expand_omp_build_assign (&gsi, tmp_then, val, true);
6632 gsi = gsi_start_bb (else_bb);
6633 expand_omp_build_assign (&gsi, tmp_else,
6634 build_int_cst (unsigned_type_node, 1),
6635 true);
6637 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
6638 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
6639 add_bb_to_loop (then_bb, cond_bb->loop_father);
6640 add_bb_to_loop (else_bb, cond_bb->loop_father);
6641 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
6642 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
6644 if (gimple_in_ssa_p (cfun))
6646 gphi *phi = create_phi_node (tmp_join, bb);
6647 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
6648 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
6651 val = tmp_join;
6654 gsi = gsi_start_bb (bb);
6655 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
6656 false, GSI_CONTINUE_LINKING);
6659 gsi = gsi_last_bb (bb);
6660 t = gimple_omp_parallel_data_arg (entry_stmt);
6661 if (t == NULL)
6662 t1 = null_pointer_node;
6663 else
6664 t1 = build_fold_addr_expr (t);
6665 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
6666 t2 = build_fold_addr_expr (child_fndecl);
6668 vec_alloc (args, 4 + vec_safe_length (ws_args));
6669 args->quick_push (t2);
6670 args->quick_push (t1);
6671 args->quick_push (val);
6672 if (ws_args)
6673 args->splice (*ws_args);
6674 args->quick_push (flags);
6676 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
6677 builtin_decl_explicit (start_ix), args);
6679 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6680 false, GSI_CONTINUE_LINKING);
6682 if (hsa_gen_requested_p ()
6683 && parallel_needs_hsa_kernel_p (region))
6685 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
6686 hsa_register_kernel (child_cnode);
6690 /* Insert a function call whose name is FUNC_NAME with the information from
6691 ENTRY_STMT into the basic_block BB. */
6693 static void
6694 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
6695 vec <tree, va_gc> *ws_args)
6697 tree t, t1, t2;
6698 gimple_stmt_iterator gsi;
6699 vec <tree, va_gc> *args;
6701 gcc_assert (vec_safe_length (ws_args) == 2);
6702 tree func_name = (*ws_args)[0];
6703 tree grain = (*ws_args)[1];
6705 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
6706 tree count = find_omp_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
6707 gcc_assert (count != NULL_TREE);
6708 count = OMP_CLAUSE_OPERAND (count, 0);
6710 gsi = gsi_last_bb (bb);
6711 t = gimple_omp_parallel_data_arg (entry_stmt);
6712 if (t == NULL)
6713 t1 = null_pointer_node;
6714 else
6715 t1 = build_fold_addr_expr (t);
6716 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
6718 vec_alloc (args, 4);
6719 args->quick_push (t2);
6720 args->quick_push (t1);
6721 args->quick_push (count);
6722 args->quick_push (grain);
6723 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
6725 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
6726 GSI_CONTINUE_LINKING);
6729 /* Build the function call to GOMP_task to actually
6730 generate the task operation. BB is the block where to insert the code. */
6732 static void
6733 expand_task_call (struct omp_region *region, basic_block bb,
6734 gomp_task *entry_stmt)
6736 tree t1, t2, t3;
6737 gimple_stmt_iterator gsi;
6738 location_t loc = gimple_location (entry_stmt);
6740 tree clauses = gimple_omp_task_clauses (entry_stmt);
6742 tree ifc = find_omp_clause (clauses, OMP_CLAUSE_IF);
6743 tree untied = find_omp_clause (clauses, OMP_CLAUSE_UNTIED);
6744 tree mergeable = find_omp_clause (clauses, OMP_CLAUSE_MERGEABLE);
6745 tree depend = find_omp_clause (clauses, OMP_CLAUSE_DEPEND);
6746 tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL);
6747 tree priority = find_omp_clause (clauses, OMP_CLAUSE_PRIORITY);
6749 unsigned int iflags
6750 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
6751 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
6752 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
6754 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
6755 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
6756 tree num_tasks = NULL_TREE;
6757 bool ull = false;
6758 if (taskloop_p)
6760 gimple *g = last_stmt (region->outer->entry);
6761 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
6762 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
6763 struct omp_for_data fd;
6764 extract_omp_for_data (as_a <gomp_for *> (g), &fd, NULL);
6765 startvar = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6766 endvar = find_omp_clause (OMP_CLAUSE_CHAIN (startvar),
6767 OMP_CLAUSE__LOOPTEMP_);
6768 startvar = OMP_CLAUSE_DECL (startvar);
6769 endvar = OMP_CLAUSE_DECL (endvar);
6770 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
6771 if (fd.loop.cond_code == LT_EXPR)
6772 iflags |= GOMP_TASK_FLAG_UP;
6773 tree tclauses = gimple_omp_for_clauses (g);
6774 num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
6775 if (num_tasks)
6776 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
6777 else
6779 num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
6780 if (num_tasks)
6782 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
6783 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
6785 else
6786 num_tasks = integer_zero_node;
6788 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
6789 if (ifc == NULL_TREE)
6790 iflags |= GOMP_TASK_FLAG_IF;
6791 if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP))
6792 iflags |= GOMP_TASK_FLAG_NOGROUP;
6793 ull = fd.iter_type == long_long_unsigned_type_node;
6795 else if (priority)
6796 iflags |= GOMP_TASK_FLAG_PRIORITY;
6798 tree flags = build_int_cst (unsigned_type_node, iflags);
6800 tree cond = boolean_true_node;
6801 if (ifc)
6803 if (taskloop_p)
6805 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
6806 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
6807 build_int_cst (unsigned_type_node,
6808 GOMP_TASK_FLAG_IF),
6809 build_int_cst (unsigned_type_node, 0));
6810 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
6811 flags, t);
6813 else
6814 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
6817 if (finalc)
6819 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
6820 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
6821 build_int_cst (unsigned_type_node,
6822 GOMP_TASK_FLAG_FINAL),
6823 build_int_cst (unsigned_type_node, 0));
6824 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
6826 if (depend)
6827 depend = OMP_CLAUSE_DECL (depend);
6828 else
6829 depend = build_int_cst (ptr_type_node, 0);
6830 if (priority)
6831 priority = fold_convert (integer_type_node,
6832 OMP_CLAUSE_PRIORITY_EXPR (priority));
6833 else
6834 priority = integer_zero_node;
6836 gsi = gsi_last_bb (bb);
6837 tree t = gimple_omp_task_data_arg (entry_stmt);
6838 if (t == NULL)
6839 t2 = null_pointer_node;
6840 else
6841 t2 = build_fold_addr_expr_loc (loc, t);
6842 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
6843 t = gimple_omp_task_copy_fn (entry_stmt);
6844 if (t == NULL)
6845 t3 = null_pointer_node;
6846 else
6847 t3 = build_fold_addr_expr_loc (loc, t);
6849 if (taskloop_p)
6850 t = build_call_expr (ull
6851 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
6852 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
6853 11, t1, t2, t3,
6854 gimple_omp_task_arg_size (entry_stmt),
6855 gimple_omp_task_arg_align (entry_stmt), flags,
6856 num_tasks, priority, startvar, endvar, step);
6857 else
6858 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
6859 9, t1, t2, t3,
6860 gimple_omp_task_arg_size (entry_stmt),
6861 gimple_omp_task_arg_align (entry_stmt), cond, flags,
6862 depend, priority);
6864 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6865 false, GSI_CONTINUE_LINKING);
6869 /* If exceptions are enabled, wrap the statements in BODY in a MUST_NOT_THROW
6870 catch handler and return it. This prevents programs from violating the
6871 structured block semantics with throws. */
6873 static gimple_seq
6874 maybe_catch_exception (gimple_seq body)
6876 gimple *g;
6877 tree decl;
6879 if (!flag_exceptions)
6880 return body;
6882 if (lang_hooks.eh_protect_cleanup_actions != NULL)
6883 decl = lang_hooks.eh_protect_cleanup_actions ();
6884 else
6885 decl = builtin_decl_explicit (BUILT_IN_TRAP);
6887 g = gimple_build_eh_must_not_throw (decl);
6888 g = gimple_build_try (body, gimple_seq_alloc_with_stmt (g),
6889 GIMPLE_TRY_CATCH);
6891 return gimple_seq_alloc_with_stmt (g);
6894 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
6896 static tree
6897 vec2chain (vec<tree, va_gc> *v)
6899 tree chain = NULL_TREE, t;
6900 unsigned ix;
6902 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
6904 DECL_CHAIN (t) = chain;
6905 chain = t;
6908 return chain;
6912 /* Remove barriers in REGION->EXIT's block. Note that this is only
6913 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
6914 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
6915 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
6916 removed. */
6918 static void
6919 remove_exit_barrier (struct omp_region *region)
6921 gimple_stmt_iterator gsi;
6922 basic_block exit_bb;
6923 edge_iterator ei;
6924 edge e;
6925 gimple *stmt;
6926 int any_addressable_vars = -1;
6928 exit_bb = region->exit;
6930 /* If the parallel region doesn't return, we don't have REGION->EXIT
6931 block at all. */
6932 if (! exit_bb)
6933 return;
6935 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
6936 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
6937 statements that can appear in between are extremely limited -- no
6938 memory operations at all. Here, we allow nothing at all, so the
6939 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
6940 gsi = gsi_last_bb (exit_bb);
6941 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6942 gsi_prev (&gsi);
6943 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
6944 return;
6946 FOR_EACH_EDGE (e, ei, exit_bb->preds)
6948 gsi = gsi_last_bb (e->src);
6949 if (gsi_end_p (gsi))
6950 continue;
6951 stmt = gsi_stmt (gsi);
6952 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
6953 && !gimple_omp_return_nowait_p (stmt))
6955 /* OpenMP 3.0 tasks unfortunately prevent this optimization
6956 in many cases. If there could be tasks queued, the barrier
6957 might be needed to let the tasks run before some local
6958 variable of the parallel that the task uses as shared
6959 runs out of scope. The task can be spawned either
6960 from within current function (this would be easy to check)
6961 or from some function it calls and gets passed an address
6962 of such a variable. */
6963 if (any_addressable_vars < 0)
6965 gomp_parallel *parallel_stmt
6966 = as_a <gomp_parallel *> (last_stmt (region->entry));
6967 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
6968 tree local_decls, block, decl;
6969 unsigned ix;
6971 any_addressable_vars = 0;
6972 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
6973 if (TREE_ADDRESSABLE (decl))
6975 any_addressable_vars = 1;
6976 break;
6978 for (block = gimple_block (stmt);
6979 !any_addressable_vars
6980 && block
6981 && TREE_CODE (block) == BLOCK;
6982 block = BLOCK_SUPERCONTEXT (block))
6984 for (local_decls = BLOCK_VARS (block);
6985 local_decls;
6986 local_decls = DECL_CHAIN (local_decls))
6987 if (TREE_ADDRESSABLE (local_decls))
6989 any_addressable_vars = 1;
6990 break;
6992 if (block == gimple_block (parallel_stmt))
6993 break;
6996 if (!any_addressable_vars)
6997 gimple_omp_return_set_nowait (stmt);
7002 static void
7003 remove_exit_barriers (struct omp_region *region)
7005 if (region->type == GIMPLE_OMP_PARALLEL)
7006 remove_exit_barrier (region);
7008 if (region->inner)
7010 region = region->inner;
7011 remove_exit_barriers (region);
7012 while (region->next)
7014 region = region->next;
7015 remove_exit_barriers (region);
7020 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
7021 calls. These can't be declared as const functions, but
7022 within one parallel body they are constant, so they can be
7023 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
7024 which are declared const. Similarly for task body, except
7025 that in untied task omp_get_thread_num () can change at any task
7026 scheduling point. */
7028 static void
7029 optimize_omp_library_calls (gimple *entry_stmt)
7031 basic_block bb;
7032 gimple_stmt_iterator gsi;
7033 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
7034 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
7035 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
7036 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
7037 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
7038 && find_omp_clause (gimple_omp_task_clauses (entry_stmt),
7039 OMP_CLAUSE_UNTIED) != NULL);
7041 FOR_EACH_BB_FN (bb, cfun)
7042 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7044 gimple *call = gsi_stmt (gsi);
7045 tree decl;
7047 if (is_gimple_call (call)
7048 && (decl = gimple_call_fndecl (call))
7049 && DECL_EXTERNAL (decl)
7050 && TREE_PUBLIC (decl)
7051 && DECL_INITIAL (decl) == NULL)
7053 tree built_in;
7055 if (DECL_NAME (decl) == thr_num_id)
7057 /* In #pragma omp task untied omp_get_thread_num () can change
7058 during the execution of the task region. */
7059 if (untied_task)
7060 continue;
7061 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
7063 else if (DECL_NAME (decl) == num_thr_id)
7064 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
7065 else
7066 continue;
7068 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
7069 || gimple_call_num_args (call) != 0)
7070 continue;
7072 if (flag_exceptions && !TREE_NOTHROW (decl))
7073 continue;
7075 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
7076 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
7077 TREE_TYPE (TREE_TYPE (built_in))))
7078 continue;
7080 gimple_call_set_fndecl (call, built_in);
7085 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
7086 regimplified. */
7088 static tree
7089 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
7091 tree t = *tp;
7093 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
7094 if (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t))
7095 return t;
7097 if (TREE_CODE (t) == ADDR_EXPR)
7098 recompute_tree_invariant_for_addr_expr (t);
7100 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7101 return NULL_TREE;
7104 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
7106 static void
7107 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
7108 bool after)
7110 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
7111 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
7112 !after, after ? GSI_CONTINUE_LINKING
7113 : GSI_SAME_STMT);
7114 gimple *stmt = gimple_build_assign (to, from);
7115 if (after)
7116 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
7117 else
7118 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
7119 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
7120 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
7122 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
7123 gimple_regimplify_operands (stmt, &gsi);
7127 /* Expand the OpenMP parallel or task directive starting at REGION. */
7129 static void
7130 expand_omp_taskreg (struct omp_region *region)
7132 basic_block entry_bb, exit_bb, new_bb;
7133 struct function *child_cfun;
7134 tree child_fn, block, t;
7135 gimple_stmt_iterator gsi;
7136 gimple *entry_stmt, *stmt;
7137 edge e;
7138 vec<tree, va_gc> *ws_args;
7140 entry_stmt = last_stmt (region->entry);
7141 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
7142 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7144 entry_bb = region->entry;
7145 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
7146 exit_bb = region->cont;
7147 else
7148 exit_bb = region->exit;
7150 bool is_cilk_for
7151 = (flag_cilkplus
7152 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
7153 && find_omp_clause (gimple_omp_parallel_clauses (entry_stmt),
7154 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
7156 if (is_cilk_for)
7157 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
7158 and the inner statement contains the name of the built-in function
7159 and grain. */
7160 ws_args = region->inner->ws_args;
7161 else if (is_combined_parallel (region))
7162 ws_args = region->ws_args;
7163 else
7164 ws_args = NULL;
7166 if (child_cfun->cfg)
7168 /* Due to inlining, it may happen that we have already outlined
7169 the region, in which case all we need to do is make the
7170 sub-graph unreachable and emit the parallel call. */
7171 edge entry_succ_e, exit_succ_e;
7173 entry_succ_e = single_succ_edge (entry_bb);
7175 gsi = gsi_last_bb (entry_bb);
7176 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
7177 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
7178 gsi_remove (&gsi, true);
7180 new_bb = entry_bb;
7181 if (exit_bb)
7183 exit_succ_e = single_succ_edge (exit_bb);
7184 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
7186 remove_edge_and_dominated_blocks (entry_succ_e);
7188 else
7190 unsigned srcidx, dstidx, num;
7192 /* If the parallel region needs data sent from the parent
7193 function, then the very first statement (except possible
7194 tree profile counter updates) of the parallel body
7195 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7196 &.OMP_DATA_O is passed as an argument to the child function,
7197 we need to replace it with the argument as seen by the child
7198 function.
7200 In most cases, this will end up being the identity assignment
7201 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
7202 a function call that has been inlined, the original PARM_DECL
7203 .OMP_DATA_I may have been converted into a different local
7204 variable. In which case, we need to keep the assignment. */
7205 if (gimple_omp_taskreg_data_arg (entry_stmt))
7207 basic_block entry_succ_bb
7208 = single_succ_p (entry_bb) ? single_succ (entry_bb)
7209 : FALLTHRU_EDGE (entry_bb)->dest;
7210 tree arg;
7211 gimple *parcopy_stmt = NULL;
7213 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7215 gimple *stmt;
7217 gcc_assert (!gsi_end_p (gsi));
7218 stmt = gsi_stmt (gsi);
7219 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7220 continue;
7222 if (gimple_num_ops (stmt) == 2)
7224 tree arg = gimple_assign_rhs1 (stmt);
7226 /* We're ignore the subcode because we're
7227 effectively doing a STRIP_NOPS. */
7229 if (TREE_CODE (arg) == ADDR_EXPR
7230 && TREE_OPERAND (arg, 0)
7231 == gimple_omp_taskreg_data_arg (entry_stmt))
7233 parcopy_stmt = stmt;
7234 break;
7239 gcc_assert (parcopy_stmt != NULL);
7240 arg = DECL_ARGUMENTS (child_fn);
7242 if (!gimple_in_ssa_p (cfun))
7244 if (gimple_assign_lhs (parcopy_stmt) == arg)
7245 gsi_remove (&gsi, true);
7246 else
7248 /* ?? Is setting the subcode really necessary ?? */
7249 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
7250 gimple_assign_set_rhs1 (parcopy_stmt, arg);
7253 else
7255 tree lhs = gimple_assign_lhs (parcopy_stmt);
7256 gcc_assert (SSA_NAME_VAR (lhs) == arg);
7257 /* We'd like to set the rhs to the default def in the child_fn,
7258 but it's too early to create ssa names in the child_fn.
7259 Instead, we set the rhs to the parm. In
7260 move_sese_region_to_fn, we introduce a default def for the
7261 parm, map the parm to it's default def, and once we encounter
7262 this stmt, replace the parm with the default def. */
7263 gimple_assign_set_rhs1 (parcopy_stmt, arg);
7264 update_stmt (parcopy_stmt);
7268 /* Declare local variables needed in CHILD_CFUN. */
7269 block = DECL_INITIAL (child_fn);
7270 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7271 /* The gimplifier could record temporaries in parallel/task block
7272 rather than in containing function's local_decls chain,
7273 which would mean cgraph missed finalizing them. Do it now. */
7274 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7275 if (TREE_CODE (t) == VAR_DECL
7276 && TREE_STATIC (t)
7277 && !DECL_EXTERNAL (t))
7278 varpool_node::finalize_decl (t);
7279 DECL_SAVED_TREE (child_fn) = NULL;
7280 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7281 gimple_set_body (child_fn, NULL);
7282 TREE_USED (block) = 1;
7284 /* Reset DECL_CONTEXT on function arguments. */
7285 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7286 DECL_CONTEXT (t) = child_fn;
7288 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
7289 so that it can be moved to the child function. */
7290 gsi = gsi_last_bb (entry_bb);
7291 stmt = gsi_stmt (gsi);
7292 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
7293 || gimple_code (stmt) == GIMPLE_OMP_TASK));
7294 e = split_block (entry_bb, stmt);
7295 gsi_remove (&gsi, true);
7296 entry_bb = e->dest;
7297 edge e2 = NULL;
7298 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
7299 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7300 else
7302 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
7303 gcc_assert (e2->dest == region->exit);
7304 remove_edge (BRANCH_EDGE (entry_bb));
7305 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
7306 gsi = gsi_last_bb (region->exit);
7307 gcc_assert (!gsi_end_p (gsi)
7308 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7309 gsi_remove (&gsi, true);
7312 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
7313 if (exit_bb)
7315 gsi = gsi_last_bb (exit_bb);
7316 gcc_assert (!gsi_end_p (gsi)
7317 && (gimple_code (gsi_stmt (gsi))
7318 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
7319 stmt = gimple_build_return (NULL);
7320 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7321 gsi_remove (&gsi, true);
7324 /* Move the parallel region into CHILD_CFUN. */
7326 if (gimple_in_ssa_p (cfun))
7328 init_tree_ssa (child_cfun);
7329 init_ssa_operands (child_cfun);
7330 child_cfun->gimple_df->in_ssa_p = true;
7331 block = NULL_TREE;
7333 else
7334 block = gimple_block (entry_stmt);
7336 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7337 if (exit_bb)
7338 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7339 if (e2)
7341 basic_block dest_bb = e2->dest;
7342 if (!exit_bb)
7343 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
7344 remove_edge (e2);
7345 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
7347 /* When the OMP expansion process cannot guarantee an up-to-date
7348 loop tree arrange for the child function to fixup loops. */
7349 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7350 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7352 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7353 num = vec_safe_length (child_cfun->local_decls);
7354 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7356 t = (*child_cfun->local_decls)[srcidx];
7357 if (DECL_CONTEXT (t) == cfun->decl)
7358 continue;
7359 if (srcidx != dstidx)
7360 (*child_cfun->local_decls)[dstidx] = t;
7361 dstidx++;
7363 if (dstidx != num)
7364 vec_safe_truncate (child_cfun->local_decls, dstidx);
7366 /* Inform the callgraph about the new function. */
7367 child_cfun->curr_properties = cfun->curr_properties;
7368 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7369 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7370 cgraph_node *node = cgraph_node::get_create (child_fn);
7371 node->parallelized_function = 1;
7372 cgraph_node::add_new_function (child_fn, true);
7374 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7375 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7377 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7378 fixed in a following pass. */
7379 push_cfun (child_cfun);
7380 if (need_asm)
7381 assign_assembler_name_if_neeeded (child_fn);
7383 if (optimize)
7384 optimize_omp_library_calls (entry_stmt);
7385 cgraph_edge::rebuild_edges ();
7387 /* Some EH regions might become dead, see PR34608. If
7388 pass_cleanup_cfg isn't the first pass to happen with the
7389 new child, these dead EH edges might cause problems.
7390 Clean them up now. */
7391 if (flag_exceptions)
7393 basic_block bb;
7394 bool changed = false;
7396 FOR_EACH_BB_FN (bb, cfun)
7397 changed |= gimple_purge_dead_eh_edges (bb);
7398 if (changed)
7399 cleanup_tree_cfg ();
7401 if (gimple_in_ssa_p (cfun))
7402 update_ssa (TODO_update_ssa);
7403 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7404 verify_loop_structure ();
7405 pop_cfun ();
7407 if (dump_file && !gimple_in_ssa_p (cfun))
7409 omp_any_child_fn_dumped = true;
7410 dump_function_header (dump_file, child_fn, dump_flags);
7411 dump_function_to_file (child_fn, dump_file, dump_flags);
7415 /* Emit a library call to launch the children threads. */
7416 if (is_cilk_for)
7417 expand_cilk_for_call (new_bb,
7418 as_a <gomp_parallel *> (entry_stmt), ws_args);
7419 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
7420 expand_parallel_call (region, new_bb,
7421 as_a <gomp_parallel *> (entry_stmt), ws_args);
7422 else
7423 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
7424 if (gimple_in_ssa_p (cfun))
7425 update_ssa (TODO_update_ssa_only_virtuals);
7428 /* Information about members of an OpenACC collapsed loop nest. */
7430 struct oacc_collapse
7432 tree base; /* Base value. */
7433 tree iters; /* Number of steps. */
7434 tree step; /* step size. */
7437 /* Helper for expand_oacc_for. Determine collapsed loop information.
7438 Fill in COUNTS array. Emit any initialization code before GSI.
7439 Return the calculated outer loop bound of BOUND_TYPE. */
7441 static tree
7442 expand_oacc_collapse_init (const struct omp_for_data *fd,
7443 gimple_stmt_iterator *gsi,
7444 oacc_collapse *counts, tree bound_type)
7446 tree total = build_int_cst (bound_type, 1);
7447 int ix;
7449 gcc_assert (integer_onep (fd->loop.step));
7450 gcc_assert (integer_zerop (fd->loop.n1));
7452 for (ix = 0; ix != fd->collapse; ix++)
7454 const omp_for_data_loop *loop = &fd->loops[ix];
7456 tree iter_type = TREE_TYPE (loop->v);
7457 tree diff_type = iter_type;
7458 tree plus_type = iter_type;
7460 gcc_assert (loop->cond_code == fd->loop.cond_code);
7462 if (POINTER_TYPE_P (iter_type))
7463 plus_type = sizetype;
7464 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7465 diff_type = signed_type_for (diff_type);
7467 tree b = loop->n1;
7468 tree e = loop->n2;
7469 tree s = loop->step;
7470 bool up = loop->cond_code == LT_EXPR;
7471 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7472 bool negating;
7473 tree expr;
7475 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
7476 true, GSI_SAME_STMT);
7477 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
7478 true, GSI_SAME_STMT);
7480 /* Convert the step, avoiding possible unsigned->signed overflow. */
7481 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7482 if (negating)
7483 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7484 s = fold_convert (diff_type, s);
7485 if (negating)
7486 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7487 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
7488 true, GSI_SAME_STMT);
7490 /* Determine the range, avoiding possible unsigned->signed overflow. */
7491 negating = !up && TYPE_UNSIGNED (iter_type);
7492 expr = fold_build2 (MINUS_EXPR, plus_type,
7493 fold_convert (plus_type, negating ? b : e),
7494 fold_convert (plus_type, negating ? e : b));
7495 expr = fold_convert (diff_type, expr);
7496 if (negating)
7497 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7498 tree range = force_gimple_operand_gsi
7499 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
7501 /* Determine number of iterations. */
7502 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7503 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7504 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7506 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
7507 true, GSI_SAME_STMT);
7509 counts[ix].base = b;
7510 counts[ix].iters = iters;
7511 counts[ix].step = s;
7513 total = fold_build2 (MULT_EXPR, bound_type, total,
7514 fold_convert (bound_type, iters));
7517 return total;
7520 /* Emit initializers for collapsed loop members. IVAR is the outer
7521 loop iteration variable, from which collapsed loop iteration values
7522 are calculated. COUNTS array has been initialized by
7523 expand_oacc_collapse_inits. */
7525 static void
7526 expand_oacc_collapse_vars (const struct omp_for_data *fd,
7527 gimple_stmt_iterator *gsi,
7528 const oacc_collapse *counts, tree ivar)
7530 tree ivar_type = TREE_TYPE (ivar);
7532 /* The most rapidly changing iteration variable is the innermost
7533 one. */
7534 for (int ix = fd->collapse; ix--;)
7536 const omp_for_data_loop *loop = &fd->loops[ix];
7537 const oacc_collapse *collapse = &counts[ix];
7538 tree iter_type = TREE_TYPE (loop->v);
7539 tree diff_type = TREE_TYPE (collapse->step);
7540 tree plus_type = iter_type;
7541 enum tree_code plus_code = PLUS_EXPR;
7542 tree expr;
7544 if (POINTER_TYPE_P (iter_type))
7546 plus_code = POINTER_PLUS_EXPR;
7547 plus_type = sizetype;
7550 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
7551 fold_convert (ivar_type, collapse->iters));
7552 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
7553 collapse->step);
7554 expr = fold_build2 (plus_code, iter_type, collapse->base,
7555 fold_convert (plus_type, expr));
7556 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
7557 true, GSI_SAME_STMT);
7558 gassign *ass = gimple_build_assign (loop->v, expr);
7559 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
7561 if (ix)
7563 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
7564 fold_convert (ivar_type, collapse->iters));
7565 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
7566 true, GSI_SAME_STMT);
7572 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
7573 of the combined collapse > 1 loop constructs, generate code like:
7574 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
7575 if (cond3 is <)
7576 adj = STEP3 - 1;
7577 else
7578 adj = STEP3 + 1;
7579 count3 = (adj + N32 - N31) / STEP3;
7580 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
7581 if (cond2 is <)
7582 adj = STEP2 - 1;
7583 else
7584 adj = STEP2 + 1;
7585 count2 = (adj + N22 - N21) / STEP2;
7586 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
7587 if (cond1 is <)
7588 adj = STEP1 - 1;
7589 else
7590 adj = STEP1 + 1;
7591 count1 = (adj + N12 - N11) / STEP1;
7592 count = count1 * count2 * count3;
7593 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
7594 count = 0;
7595 and set ZERO_ITER_BB to that bb. If this isn't the outermost
7596 of the combined loop constructs, just initialize COUNTS array
7597 from the _looptemp_ clauses. */
7599 /* NOTE: It *could* be better to moosh all of the BBs together,
7600 creating one larger BB with all the computation and the unexpected
7601 jump at the end. I.e.
7603 bool zero3, zero2, zero1, zero;
7605 zero3 = N32 c3 N31;
7606 count3 = (N32 - N31) /[cl] STEP3;
7607 zero2 = N22 c2 N21;
7608 count2 = (N22 - N21) /[cl] STEP2;
7609 zero1 = N12 c1 N11;
7610 count1 = (N12 - N11) /[cl] STEP1;
7611 zero = zero3 || zero2 || zero1;
7612 count = count1 * count2 * count3;
7613 if (__builtin_expect(zero, false)) goto zero_iter_bb;
7615 After all, we expect the zero=false, and thus we expect to have to
7616 evaluate all of the comparison expressions, so short-circuiting
7617 oughtn't be a win. Since the condition isn't protecting a
7618 denominator, we're not concerned about divide-by-zero, so we can
7619 fully evaluate count even if a numerator turned out to be wrong.
7621 It seems like putting this all together would create much better
7622 scheduling opportunities, and less pressure on the chip's branch
7623 predictor. */
7625 static void
7626 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
7627 basic_block &entry_bb, tree *counts,
7628 basic_block &zero_iter1_bb, int &first_zero_iter1,
7629 basic_block &zero_iter2_bb, int &first_zero_iter2,
7630 basic_block &l2_dom_bb)
7632 tree t, type = TREE_TYPE (fd->loop.v);
7633 edge e, ne;
7634 int i;
7636 /* Collapsed loops need work for expansion into SSA form. */
7637 gcc_assert (!gimple_in_ssa_p (cfun));
7639 if (gimple_omp_for_combined_into_p (fd->for_stmt)
7640 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7642 gcc_assert (fd->ordered == 0);
7643 /* First two _looptemp_ clauses are for istart/iend, counts[0]
7644 isn't supposed to be handled, as the inner loop doesn't
7645 use it. */
7646 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
7647 OMP_CLAUSE__LOOPTEMP_);
7648 gcc_assert (innerc);
7649 for (i = 0; i < fd->collapse; i++)
7651 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
7652 OMP_CLAUSE__LOOPTEMP_);
7653 gcc_assert (innerc);
7654 if (i)
7655 counts[i] = OMP_CLAUSE_DECL (innerc);
7656 else
7657 counts[0] = NULL_TREE;
7659 return;
7662 for (i = fd->collapse; i < fd->ordered; i++)
7664 tree itype = TREE_TYPE (fd->loops[i].v);
7665 counts[i] = NULL_TREE;
7666 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
7667 fold_convert (itype, fd->loops[i].n1),
7668 fold_convert (itype, fd->loops[i].n2));
7669 if (t && integer_zerop (t))
7671 for (i = fd->collapse; i < fd->ordered; i++)
7672 counts[i] = build_int_cst (type, 0);
7673 break;
7676 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
7678 tree itype = TREE_TYPE (fd->loops[i].v);
7680 if (i >= fd->collapse && counts[i])
7681 continue;
7682 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
7683 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
7684 fold_convert (itype, fd->loops[i].n1),
7685 fold_convert (itype, fd->loops[i].n2)))
7686 == NULL_TREE || !integer_onep (t)))
7688 gcond *cond_stmt;
7689 tree n1, n2;
7690 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
7691 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
7692 true, GSI_SAME_STMT);
7693 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
7694 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
7695 true, GSI_SAME_STMT);
7696 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
7697 NULL_TREE, NULL_TREE);
7698 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
7699 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
7700 expand_omp_regimplify_p, NULL, NULL)
7701 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
7702 expand_omp_regimplify_p, NULL, NULL))
7704 *gsi = gsi_for_stmt (cond_stmt);
7705 gimple_regimplify_operands (cond_stmt, gsi);
7707 e = split_block (entry_bb, cond_stmt);
7708 basic_block &zero_iter_bb
7709 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
7710 int &first_zero_iter
7711 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
7712 if (zero_iter_bb == NULL)
7714 gassign *assign_stmt;
7715 first_zero_iter = i;
7716 zero_iter_bb = create_empty_bb (entry_bb);
7717 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
7718 *gsi = gsi_after_labels (zero_iter_bb);
7719 if (i < fd->collapse)
7720 assign_stmt = gimple_build_assign (fd->loop.n2,
7721 build_zero_cst (type));
7722 else
7724 counts[i] = create_tmp_reg (type, ".count");
7725 assign_stmt
7726 = gimple_build_assign (counts[i], build_zero_cst (type));
7728 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
7729 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
7730 entry_bb);
7732 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
7733 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
7734 e->flags = EDGE_TRUE_VALUE;
7735 e->probability = REG_BR_PROB_BASE - ne->probability;
7736 if (l2_dom_bb == NULL)
7737 l2_dom_bb = entry_bb;
7738 entry_bb = e->dest;
7739 *gsi = gsi_last_bb (entry_bb);
7742 if (POINTER_TYPE_P (itype))
7743 itype = signed_type_for (itype);
7744 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
7745 ? -1 : 1));
7746 t = fold_build2 (PLUS_EXPR, itype,
7747 fold_convert (itype, fd->loops[i].step), t);
7748 t = fold_build2 (PLUS_EXPR, itype, t,
7749 fold_convert (itype, fd->loops[i].n2));
7750 t = fold_build2 (MINUS_EXPR, itype, t,
7751 fold_convert (itype, fd->loops[i].n1));
7752 /* ?? We could probably use CEIL_DIV_EXPR instead of
7753 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
7754 generate the same code in the end because generically we
7755 don't know that the values involved must be negative for
7756 GT?? */
7757 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
7758 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7759 fold_build1 (NEGATE_EXPR, itype, t),
7760 fold_build1 (NEGATE_EXPR, itype,
7761 fold_convert (itype,
7762 fd->loops[i].step)));
7763 else
7764 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
7765 fold_convert (itype, fd->loops[i].step));
7766 t = fold_convert (type, t);
7767 if (TREE_CODE (t) == INTEGER_CST)
7768 counts[i] = t;
7769 else
7771 if (i < fd->collapse || i != first_zero_iter2)
7772 counts[i] = create_tmp_reg (type, ".count");
7773 expand_omp_build_assign (gsi, counts[i], t);
7775 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
7777 if (i == 0)
7778 t = counts[0];
7779 else
7780 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
7781 expand_omp_build_assign (gsi, fd->loop.n2, t);
7787 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
7788 T = V;
7789 V3 = N31 + (T % count3) * STEP3;
7790 T = T / count3;
7791 V2 = N21 + (T % count2) * STEP2;
7792 T = T / count2;
7793 V1 = N11 + T * STEP1;
7794 if this loop doesn't have an inner loop construct combined with it.
7795 If it does have an inner loop construct combined with it and the
7796 iteration count isn't known constant, store values from counts array
7797 into its _looptemp_ temporaries instead. */
7799 static void
7800 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
7801 tree *counts, gimple *inner_stmt, tree startvar)
7803 int i;
7804 if (gimple_omp_for_combined_p (fd->for_stmt))
7806 /* If fd->loop.n2 is constant, then no propagation of the counts
7807 is needed, they are constant. */
7808 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
7809 return;
7811 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
7812 ? gimple_omp_taskreg_clauses (inner_stmt)
7813 : gimple_omp_for_clauses (inner_stmt);
7814 /* First two _looptemp_ clauses are for istart/iend, counts[0]
7815 isn't supposed to be handled, as the inner loop doesn't
7816 use it. */
7817 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7818 gcc_assert (innerc);
7819 for (i = 0; i < fd->collapse; i++)
7821 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
7822 OMP_CLAUSE__LOOPTEMP_);
7823 gcc_assert (innerc);
7824 if (i)
7826 tree tem = OMP_CLAUSE_DECL (innerc);
7827 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
7828 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
7829 false, GSI_CONTINUE_LINKING);
7830 gassign *stmt = gimple_build_assign (tem, t);
7831 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7834 return;
7837 tree type = TREE_TYPE (fd->loop.v);
7838 tree tem = create_tmp_reg (type, ".tem");
7839 gassign *stmt = gimple_build_assign (tem, startvar);
7840 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7842 for (i = fd->collapse - 1; i >= 0; i--)
7844 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
7845 itype = vtype;
7846 if (POINTER_TYPE_P (vtype))
7847 itype = signed_type_for (vtype);
7848 if (i != 0)
7849 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
7850 else
7851 t = tem;
7852 t = fold_convert (itype, t);
7853 t = fold_build2 (MULT_EXPR, itype, t,
7854 fold_convert (itype, fd->loops[i].step));
7855 if (POINTER_TYPE_P (vtype))
7856 t = fold_build_pointer_plus (fd->loops[i].n1, t);
7857 else
7858 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
7859 t = force_gimple_operand_gsi (gsi, t,
7860 DECL_P (fd->loops[i].v)
7861 && TREE_ADDRESSABLE (fd->loops[i].v),
7862 NULL_TREE, false,
7863 GSI_CONTINUE_LINKING);
7864 stmt = gimple_build_assign (fd->loops[i].v, t);
7865 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7866 if (i != 0)
7868 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
7869 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
7870 false, GSI_CONTINUE_LINKING);
7871 stmt = gimple_build_assign (tem, t);
7872 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7878 /* Helper function for expand_omp_for_*. Generate code like:
7879 L10:
7880 V3 += STEP3;
7881 if (V3 cond3 N32) goto BODY_BB; else goto L11;
7882 L11:
7883 V3 = N31;
7884 V2 += STEP2;
7885 if (V2 cond2 N22) goto BODY_BB; else goto L12;
7886 L12:
7887 V2 = N21;
7888 V1 += STEP1;
7889 goto BODY_BB; */
7891 static basic_block
7892 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
7893 basic_block body_bb)
7895 basic_block last_bb, bb, collapse_bb = NULL;
7896 int i;
7897 gimple_stmt_iterator gsi;
7898 edge e;
7899 tree t;
7900 gimple *stmt;
7902 last_bb = cont_bb;
7903 for (i = fd->collapse - 1; i >= 0; i--)
7905 tree vtype = TREE_TYPE (fd->loops[i].v);
7907 bb = create_empty_bb (last_bb);
7908 add_bb_to_loop (bb, last_bb->loop_father);
7909 gsi = gsi_start_bb (bb);
7911 if (i < fd->collapse - 1)
7913 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
7914 e->probability = REG_BR_PROB_BASE / 8;
7916 t = fd->loops[i + 1].n1;
7917 t = force_gimple_operand_gsi (&gsi, t,
7918 DECL_P (fd->loops[i + 1].v)
7919 && TREE_ADDRESSABLE (fd->loops[i
7920 + 1].v),
7921 NULL_TREE, false,
7922 GSI_CONTINUE_LINKING);
7923 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
7924 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7926 else
7927 collapse_bb = bb;
7929 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
7931 if (POINTER_TYPE_P (vtype))
7932 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
7933 else
7934 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
7935 t = force_gimple_operand_gsi (&gsi, t,
7936 DECL_P (fd->loops[i].v)
7937 && TREE_ADDRESSABLE (fd->loops[i].v),
7938 NULL_TREE, false, GSI_CONTINUE_LINKING);
7939 stmt = gimple_build_assign (fd->loops[i].v, t);
7940 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7942 if (i > 0)
7944 t = fd->loops[i].n2;
7945 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7946 false, GSI_CONTINUE_LINKING);
7947 tree v = fd->loops[i].v;
7948 if (DECL_P (v) && TREE_ADDRESSABLE (v))
7949 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
7950 false, GSI_CONTINUE_LINKING);
7951 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
7952 stmt = gimple_build_cond_empty (t);
7953 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7954 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
7955 e->probability = REG_BR_PROB_BASE * 7 / 8;
7957 else
7958 make_edge (bb, body_bb, EDGE_FALLTHRU);
7959 last_bb = bb;
7962 return collapse_bb;
7966 /* Expand #pragma omp ordered depend(source). */
7968 static void
7969 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
7970 tree *counts, location_t loc)
7972 enum built_in_function source_ix
7973 = fd->iter_type == long_integer_type_node
7974 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
7975 gimple *g
7976 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
7977 build_fold_addr_expr (counts[fd->ordered]));
7978 gimple_set_location (g, loc);
7979 gsi_insert_before (gsi, g, GSI_SAME_STMT);
7982 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
7984 static void
7985 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
7986 tree *counts, tree c, location_t loc)
7988 auto_vec<tree, 10> args;
7989 enum built_in_function sink_ix
7990 = fd->iter_type == long_integer_type_node
7991 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
7992 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
7993 int i;
7994 gimple_stmt_iterator gsi2 = *gsi;
7995 bool warned_step = false;
7997 for (i = 0; i < fd->ordered; i++)
7999 off = TREE_PURPOSE (deps);
8000 if (!integer_zerop (off))
8002 gcc_assert (fd->loops[i].cond_code == LT_EXPR
8003 || fd->loops[i].cond_code == GT_EXPR);
8004 bool forward = fd->loops[i].cond_code == LT_EXPR;
8005 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8006 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
8007 "lexically later iteration");
8008 break;
8010 deps = TREE_CHAIN (deps);
8012 /* If all offsets corresponding to the collapsed loops are zero,
8013 this depend clause can be ignored. FIXME: but there is still a
8014 flush needed. We need to emit one __sync_synchronize () for it
8015 though (perhaps conditionally)? Solve this together with the
8016 conservative dependence folding optimization.
8017 if (i >= fd->collapse)
8018 return; */
8020 deps = OMP_CLAUSE_DECL (c);
8021 gsi_prev (&gsi2);
8022 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
8023 edge e2 = split_block_after_labels (e1->dest);
8025 *gsi = gsi_after_labels (e1->dest);
8026 for (i = 0; i < fd->ordered; i++)
8028 tree itype = TREE_TYPE (fd->loops[i].v);
8029 if (POINTER_TYPE_P (itype))
8030 itype = sizetype;
8031 if (i)
8032 deps = TREE_CHAIN (deps);
8033 off = TREE_PURPOSE (deps);
8034 tree s = fold_convert_loc (loc, itype, fd->loops[i].step);
8036 if (integer_zerop (off))
8037 t = boolean_true_node;
8038 else
8040 tree a;
8041 tree co = fold_convert_loc (loc, itype, off);
8042 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
8044 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8045 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
8046 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
8047 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
8048 co);
8050 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8051 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8052 fd->loops[i].v, co);
8053 else
8054 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
8055 fd->loops[i].v, co);
8056 if (fd->loops[i].cond_code == LT_EXPR)
8058 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8059 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
8060 fd->loops[i].n1);
8061 else
8062 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
8063 fd->loops[i].n2);
8065 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8066 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
8067 fd->loops[i].n2);
8068 else
8069 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
8070 fd->loops[i].n1);
8072 if (cond)
8073 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
8074 else
8075 cond = t;
8077 off = fold_convert_loc (loc, itype, off);
8079 if (fd->loops[i].cond_code == LT_EXPR
8080 ? !integer_onep (fd->loops[i].step)
8081 : !integer_minus_onep (fd->loops[i].step))
8083 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
8084 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
8085 fold_build1_loc (loc, NEGATE_EXPR, itype,
8086 s));
8087 else
8088 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, s);
8089 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
8090 build_int_cst (itype, 0));
8091 if (integer_zerop (t) && !warned_step)
8093 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
8094 "in the iteration space");
8095 warned_step = true;
8097 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
8098 cond, t);
8101 if (i <= fd->collapse - 1 && fd->collapse > 1)
8102 t = fd->loop.v;
8103 else if (counts[i])
8104 t = counts[i];
8105 else
8107 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8108 fd->loops[i].v, fd->loops[i].n1);
8109 t = fold_convert_loc (loc, fd->iter_type, t);
8111 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
8112 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
8113 fold_build1_loc (loc, NEGATE_EXPR, itype,
8114 s));
8115 else
8116 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
8117 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8118 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
8119 off = fold_convert_loc (loc, fd->iter_type, off);
8120 if (i <= fd->collapse - 1 && fd->collapse > 1)
8122 if (i)
8123 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
8124 off);
8125 if (i < fd->collapse - 1)
8127 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
8128 counts[i]);
8129 continue;
8132 off = unshare_expr (off);
8133 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
8134 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
8135 true, GSI_SAME_STMT);
8136 args.safe_push (t);
8138 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
8139 gimple_set_location (g, loc);
8140 gsi_insert_before (gsi, g, GSI_SAME_STMT);
8142 *gsi = gsi_last_bb (e1->src);
8143 cond = unshare_expr (cond);
8144 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
8145 GSI_CONTINUE_LINKING);
8146 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
8147 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
8148 e3->probability = REG_BR_PROB_BASE / 8;
8149 e1->probability = REG_BR_PROB_BASE - e3->probability;
8150 e1->flags = EDGE_TRUE_VALUE;
8151 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
8153 *gsi = gsi_after_labels (e2->dest);
8156 /* Expand all #pragma omp ordered depend(source) and
8157 #pragma omp ordered depend(sink:...) constructs in the current
8158 #pragma omp for ordered(n) region. */
8160 static void
8161 expand_omp_ordered_source_sink (struct omp_region *region,
8162 struct omp_for_data *fd, tree *counts,
8163 basic_block cont_bb)
8165 struct omp_region *inner;
8166 int i;
8167 for (i = fd->collapse - 1; i < fd->ordered; i++)
8168 if (i == fd->collapse - 1 && fd->collapse > 1)
8169 counts[i] = NULL_TREE;
8170 else if (i >= fd->collapse && !cont_bb)
8171 counts[i] = build_zero_cst (fd->iter_type);
8172 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
8173 && integer_onep (fd->loops[i].step))
8174 counts[i] = NULL_TREE;
8175 else
8176 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
8177 tree atype
8178 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
8179 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
8180 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
8182 for (inner = region->inner; inner; inner = inner->next)
8183 if (inner->type == GIMPLE_OMP_ORDERED)
8185 gomp_ordered *ord_stmt = inner->ord_stmt;
8186 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
8187 location_t loc = gimple_location (ord_stmt);
8188 tree c;
8189 for (c = gimple_omp_ordered_clauses (ord_stmt);
8190 c; c = OMP_CLAUSE_CHAIN (c))
8191 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
8192 break;
8193 if (c)
8194 expand_omp_ordered_source (&gsi, fd, counts, loc);
8195 for (c = gimple_omp_ordered_clauses (ord_stmt);
8196 c; c = OMP_CLAUSE_CHAIN (c))
8197 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
8198 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
8199 gsi_remove (&gsi, true);
8203 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
8204 collapsed. */
8206 static basic_block
8207 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
8208 basic_block cont_bb, basic_block body_bb,
8209 bool ordered_lastprivate)
8211 if (fd->ordered == fd->collapse)
8212 return cont_bb;
8214 if (!cont_bb)
8216 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8217 for (int i = fd->collapse; i < fd->ordered; i++)
8219 tree type = TREE_TYPE (fd->loops[i].v);
8220 tree n1 = fold_convert (type, fd->loops[i].n1);
8221 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
8222 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8223 size_int (i - fd->collapse + 1),
8224 NULL_TREE, NULL_TREE);
8225 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
8227 return NULL;
8230 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
8232 tree t, type = TREE_TYPE (fd->loops[i].v);
8233 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8234 expand_omp_build_assign (&gsi, fd->loops[i].v,
8235 fold_convert (type, fd->loops[i].n1));
8236 if (counts[i])
8237 expand_omp_build_assign (&gsi, counts[i],
8238 build_zero_cst (fd->iter_type));
8239 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8240 size_int (i - fd->collapse + 1),
8241 NULL_TREE, NULL_TREE);
8242 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
8243 if (!gsi_end_p (gsi))
8244 gsi_prev (&gsi);
8245 else
8246 gsi = gsi_last_bb (body_bb);
8247 edge e1 = split_block (body_bb, gsi_stmt (gsi));
8248 basic_block new_body = e1->dest;
8249 if (body_bb == cont_bb)
8250 cont_bb = new_body;
8251 edge e2 = NULL;
8252 basic_block new_header;
8253 if (EDGE_COUNT (cont_bb->preds) > 0)
8255 gsi = gsi_last_bb (cont_bb);
8256 if (POINTER_TYPE_P (type))
8257 t = fold_build_pointer_plus (fd->loops[i].v,
8258 fold_convert (sizetype,
8259 fd->loops[i].step));
8260 else
8261 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
8262 fold_convert (type, fd->loops[i].step));
8263 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
8264 if (counts[i])
8266 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
8267 build_int_cst (fd->iter_type, 1));
8268 expand_omp_build_assign (&gsi, counts[i], t);
8269 t = counts[i];
8271 else
8273 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8274 fd->loops[i].v, fd->loops[i].n1);
8275 t = fold_convert (fd->iter_type, t);
8276 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8277 true, GSI_SAME_STMT);
8279 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8280 size_int (i - fd->collapse + 1),
8281 NULL_TREE, NULL_TREE);
8282 expand_omp_build_assign (&gsi, aref, t);
8283 gsi_prev (&gsi);
8284 e2 = split_block (cont_bb, gsi_stmt (gsi));
8285 new_header = e2->dest;
8287 else
8288 new_header = cont_bb;
8289 gsi = gsi_after_labels (new_header);
8290 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
8291 true, GSI_SAME_STMT);
8292 tree n2
8293 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
8294 true, NULL_TREE, true, GSI_SAME_STMT);
8295 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
8296 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
8297 edge e3 = split_block (new_header, gsi_stmt (gsi));
8298 cont_bb = e3->dest;
8299 remove_edge (e1);
8300 make_edge (body_bb, new_header, EDGE_FALLTHRU);
8301 e3->flags = EDGE_FALSE_VALUE;
8302 e3->probability = REG_BR_PROB_BASE / 8;
8303 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
8304 e1->probability = REG_BR_PROB_BASE - e3->probability;
8306 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
8307 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
8309 if (e2)
8311 struct loop *loop = alloc_loop ();
8312 loop->header = new_header;
8313 loop->latch = e2->src;
8314 add_loop (loop, body_bb->loop_father);
8318 /* If there are any lastprivate clauses and it is possible some loops
8319 might have zero iterations, ensure all the decls are initialized,
8320 otherwise we could crash evaluating C++ class iterators with lastprivate
8321 clauses. */
8322 bool need_inits = false;
8323 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
8324 if (need_inits)
8326 tree type = TREE_TYPE (fd->loops[i].v);
8327 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8328 expand_omp_build_assign (&gsi, fd->loops[i].v,
8329 fold_convert (type, fd->loops[i].n1));
8331 else
8333 tree type = TREE_TYPE (fd->loops[i].v);
8334 tree this_cond = fold_build2 (fd->loops[i].cond_code,
8335 boolean_type_node,
8336 fold_convert (type, fd->loops[i].n1),
8337 fold_convert (type, fd->loops[i].n2));
8338 if (!integer_onep (this_cond))
8339 need_inits = true;
8342 return cont_bb;
8346 /* A subroutine of expand_omp_for. Generate code for a parallel
8347 loop with any schedule. Given parameters:
8349 for (V = N1; V cond N2; V += STEP) BODY;
8351 where COND is "<" or ">", we generate pseudocode
8353 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
8354 if (more) goto L0; else goto L3;
8356 V = istart0;
8357 iend = iend0;
8359 BODY;
8360 V += STEP;
8361 if (V cond iend) goto L1; else goto L2;
8363 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
8366 If this is a combined omp parallel loop, instead of the call to
8367 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
8368 If this is gimple_omp_for_combined_p loop, then instead of assigning
8369 V and iend in L0 we assign the first two _looptemp_ clause decls of the
8370 inner GIMPLE_OMP_FOR and V += STEP; and
8371 if (V cond iend) goto L1; else goto L2; are removed.
8373 For collapsed loops, given parameters:
8374 collapse(3)
8375 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
8376 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
8377 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
8378 BODY;
8380 we generate pseudocode
8382 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
8383 if (cond3 is <)
8384 adj = STEP3 - 1;
8385 else
8386 adj = STEP3 + 1;
8387 count3 = (adj + N32 - N31) / STEP3;
8388 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
8389 if (cond2 is <)
8390 adj = STEP2 - 1;
8391 else
8392 adj = STEP2 + 1;
8393 count2 = (adj + N22 - N21) / STEP2;
8394 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
8395 if (cond1 is <)
8396 adj = STEP1 - 1;
8397 else
8398 adj = STEP1 + 1;
8399 count1 = (adj + N12 - N11) / STEP1;
8400 count = count1 * count2 * count3;
8401 goto Z1;
8403 count = 0;
8405 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
8406 if (more) goto L0; else goto L3;
8408 V = istart0;
8409 T = V;
8410 V3 = N31 + (T % count3) * STEP3;
8411 T = T / count3;
8412 V2 = N21 + (T % count2) * STEP2;
8413 T = T / count2;
8414 V1 = N11 + T * STEP1;
8415 iend = iend0;
8417 BODY;
8418 V += 1;
8419 if (V < iend) goto L10; else goto L2;
8420 L10:
8421 V3 += STEP3;
8422 if (V3 cond3 N32) goto L1; else goto L11;
8423 L11:
8424 V3 = N31;
8425 V2 += STEP2;
8426 if (V2 cond2 N22) goto L1; else goto L12;
8427 L12:
8428 V2 = N21;
8429 V1 += STEP1;
8430 goto L1;
8432 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
8437 static void
8438 expand_omp_for_generic (struct omp_region *region,
8439 struct omp_for_data *fd,
8440 enum built_in_function start_fn,
8441 enum built_in_function next_fn,
8442 gimple *inner_stmt)
8444 tree type, istart0, iend0, iend;
8445 tree t, vmain, vback, bias = NULL_TREE;
8446 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
8447 basic_block l2_bb = NULL, l3_bb = NULL;
8448 gimple_stmt_iterator gsi;
8449 gassign *assign_stmt;
8450 bool in_combined_parallel = is_combined_parallel (region);
8451 bool broken_loop = region->cont == NULL;
8452 edge e, ne;
8453 tree *counts = NULL;
8454 int i;
8455 bool ordered_lastprivate = false;
8457 gcc_assert (!broken_loop || !in_combined_parallel);
8458 gcc_assert (fd->iter_type == long_integer_type_node
8459 || !in_combined_parallel);
8461 entry_bb = region->entry;
8462 cont_bb = region->cont;
8463 collapse_bb = NULL;
8464 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
8465 gcc_assert (broken_loop
8466 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
8467 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
8468 l1_bb = single_succ (l0_bb);
8469 if (!broken_loop)
8471 l2_bb = create_empty_bb (cont_bb);
8472 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
8473 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
8474 == l1_bb));
8475 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
8477 else
8478 l2_bb = NULL;
8479 l3_bb = BRANCH_EDGE (entry_bb)->dest;
8480 exit_bb = region->exit;
8482 gsi = gsi_last_bb (entry_bb);
8484 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
8485 if (fd->ordered
8486 && find_omp_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
8487 OMP_CLAUSE_LASTPRIVATE))
8488 ordered_lastprivate = false;
8489 if (fd->collapse > 1 || fd->ordered)
8491 int first_zero_iter1 = -1, first_zero_iter2 = -1;
8492 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
8494 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
8495 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
8496 zero_iter1_bb, first_zero_iter1,
8497 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
8499 if (zero_iter1_bb)
8501 /* Some counts[i] vars might be uninitialized if
8502 some loop has zero iterations. But the body shouldn't
8503 be executed in that case, so just avoid uninit warnings. */
8504 for (i = first_zero_iter1;
8505 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
8506 if (SSA_VAR_P (counts[i]))
8507 TREE_NO_WARNING (counts[i]) = 1;
8508 gsi_prev (&gsi);
8509 e = split_block (entry_bb, gsi_stmt (gsi));
8510 entry_bb = e->dest;
8511 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
8512 gsi = gsi_last_bb (entry_bb);
8513 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
8514 get_immediate_dominator (CDI_DOMINATORS,
8515 zero_iter1_bb));
8517 if (zero_iter2_bb)
8519 /* Some counts[i] vars might be uninitialized if
8520 some loop has zero iterations. But the body shouldn't
8521 be executed in that case, so just avoid uninit warnings. */
8522 for (i = first_zero_iter2; i < fd->ordered; i++)
8523 if (SSA_VAR_P (counts[i]))
8524 TREE_NO_WARNING (counts[i]) = 1;
8525 if (zero_iter1_bb)
8526 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
8527 else
8529 gsi_prev (&gsi);
8530 e = split_block (entry_bb, gsi_stmt (gsi));
8531 entry_bb = e->dest;
8532 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
8533 gsi = gsi_last_bb (entry_bb);
8534 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
8535 get_immediate_dominator
8536 (CDI_DOMINATORS, zero_iter2_bb));
8539 if (fd->collapse == 1)
8541 counts[0] = fd->loop.n2;
8542 fd->loop = fd->loops[0];
8546 type = TREE_TYPE (fd->loop.v);
8547 istart0 = create_tmp_var (fd->iter_type, ".istart0");
8548 iend0 = create_tmp_var (fd->iter_type, ".iend0");
8549 TREE_ADDRESSABLE (istart0) = 1;
8550 TREE_ADDRESSABLE (iend0) = 1;
8552 /* See if we need to bias by LLONG_MIN. */
8553 if (fd->iter_type == long_long_unsigned_type_node
8554 && TREE_CODE (type) == INTEGER_TYPE
8555 && !TYPE_UNSIGNED (type)
8556 && fd->ordered == 0)
8558 tree n1, n2;
8560 if (fd->loop.cond_code == LT_EXPR)
8562 n1 = fd->loop.n1;
8563 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
8565 else
8567 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
8568 n2 = fd->loop.n1;
8570 if (TREE_CODE (n1) != INTEGER_CST
8571 || TREE_CODE (n2) != INTEGER_CST
8572 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
8573 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
8576 gimple_stmt_iterator gsif = gsi;
8577 gsi_prev (&gsif);
8579 tree arr = NULL_TREE;
8580 if (in_combined_parallel)
8582 gcc_assert (fd->ordered == 0);
8583 /* In a combined parallel loop, emit a call to
8584 GOMP_loop_foo_next. */
8585 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
8586 build_fold_addr_expr (istart0),
8587 build_fold_addr_expr (iend0));
8589 else
8591 tree t0, t1, t2, t3, t4;
8592 /* If this is not a combined parallel loop, emit a call to
8593 GOMP_loop_foo_start in ENTRY_BB. */
8594 t4 = build_fold_addr_expr (iend0);
8595 t3 = build_fold_addr_expr (istart0);
8596 if (fd->ordered)
8598 t0 = build_int_cst (unsigned_type_node,
8599 fd->ordered - fd->collapse + 1);
8600 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
8601 fd->ordered
8602 - fd->collapse + 1),
8603 ".omp_counts");
8604 DECL_NAMELESS (arr) = 1;
8605 TREE_ADDRESSABLE (arr) = 1;
8606 TREE_STATIC (arr) = 1;
8607 vec<constructor_elt, va_gc> *v;
8608 vec_alloc (v, fd->ordered - fd->collapse + 1);
8609 int idx;
8611 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
8613 tree c;
8614 if (idx == 0 && fd->collapse > 1)
8615 c = fd->loop.n2;
8616 else
8617 c = counts[idx + fd->collapse - 1];
8618 tree purpose = size_int (idx);
8619 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
8620 if (TREE_CODE (c) != INTEGER_CST)
8621 TREE_STATIC (arr) = 0;
8624 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
8625 if (!TREE_STATIC (arr))
8626 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
8627 void_type_node, arr),
8628 true, NULL_TREE, true, GSI_SAME_STMT);
8629 t1 = build_fold_addr_expr (arr);
8630 t2 = NULL_TREE;
8632 else
8634 t2 = fold_convert (fd->iter_type, fd->loop.step);
8635 t1 = fd->loop.n2;
8636 t0 = fd->loop.n1;
8637 if (gimple_omp_for_combined_into_p (fd->for_stmt))
8639 tree innerc
8640 = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
8641 OMP_CLAUSE__LOOPTEMP_);
8642 gcc_assert (innerc);
8643 t0 = OMP_CLAUSE_DECL (innerc);
8644 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
8645 OMP_CLAUSE__LOOPTEMP_);
8646 gcc_assert (innerc);
8647 t1 = OMP_CLAUSE_DECL (innerc);
8649 if (POINTER_TYPE_P (TREE_TYPE (t0))
8650 && TYPE_PRECISION (TREE_TYPE (t0))
8651 != TYPE_PRECISION (fd->iter_type))
8653 /* Avoid casting pointers to integer of a different size. */
8654 tree itype = signed_type_for (type);
8655 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
8656 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
8658 else
8660 t1 = fold_convert (fd->iter_type, t1);
8661 t0 = fold_convert (fd->iter_type, t0);
8663 if (bias)
8665 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
8666 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
8669 if (fd->iter_type == long_integer_type_node || fd->ordered)
8671 if (fd->chunk_size)
8673 t = fold_convert (fd->iter_type, fd->chunk_size);
8674 t = omp_adjust_chunk_size (t, fd->simd_schedule);
8675 if (fd->ordered)
8676 t = build_call_expr (builtin_decl_explicit (start_fn),
8677 5, t0, t1, t, t3, t4);
8678 else
8679 t = build_call_expr (builtin_decl_explicit (start_fn),
8680 6, t0, t1, t2, t, t3, t4);
8682 else if (fd->ordered)
8683 t = build_call_expr (builtin_decl_explicit (start_fn),
8684 4, t0, t1, t3, t4);
8685 else
8686 t = build_call_expr (builtin_decl_explicit (start_fn),
8687 5, t0, t1, t2, t3, t4);
8689 else
8691 tree t5;
8692 tree c_bool_type;
8693 tree bfn_decl;
8695 /* The GOMP_loop_ull_*start functions have additional boolean
8696 argument, true for < loops and false for > loops.
8697 In Fortran, the C bool type can be different from
8698 boolean_type_node. */
8699 bfn_decl = builtin_decl_explicit (start_fn);
8700 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
8701 t5 = build_int_cst (c_bool_type,
8702 fd->loop.cond_code == LT_EXPR ? 1 : 0);
8703 if (fd->chunk_size)
8705 tree bfn_decl = builtin_decl_explicit (start_fn);
8706 t = fold_convert (fd->iter_type, fd->chunk_size);
8707 t = omp_adjust_chunk_size (t, fd->simd_schedule);
8708 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
8710 else
8711 t = build_call_expr (builtin_decl_explicit (start_fn),
8712 6, t5, t0, t1, t2, t3, t4);
8715 if (TREE_TYPE (t) != boolean_type_node)
8716 t = fold_build2 (NE_EXPR, boolean_type_node,
8717 t, build_int_cst (TREE_TYPE (t), 0));
8718 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8719 true, GSI_SAME_STMT);
8720 if (arr && !TREE_STATIC (arr))
8722 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
8723 TREE_THIS_VOLATILE (clobber) = 1;
8724 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
8725 GSI_SAME_STMT);
8727 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
8729 /* Remove the GIMPLE_OMP_FOR statement. */
8730 gsi_remove (&gsi, true);
8732 if (gsi_end_p (gsif))
8733 gsif = gsi_after_labels (gsi_bb (gsif));
8734 gsi_next (&gsif);
8736 /* Iteration setup for sequential loop goes in L0_BB. */
8737 tree startvar = fd->loop.v;
8738 tree endvar = NULL_TREE;
8740 if (gimple_omp_for_combined_p (fd->for_stmt))
8742 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
8743 && gimple_omp_for_kind (inner_stmt)
8744 == GF_OMP_FOR_KIND_SIMD);
8745 tree innerc = find_omp_clause (gimple_omp_for_clauses (inner_stmt),
8746 OMP_CLAUSE__LOOPTEMP_);
8747 gcc_assert (innerc);
8748 startvar = OMP_CLAUSE_DECL (innerc);
8749 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
8750 OMP_CLAUSE__LOOPTEMP_);
8751 gcc_assert (innerc);
8752 endvar = OMP_CLAUSE_DECL (innerc);
8755 gsi = gsi_start_bb (l0_bb);
8756 t = istart0;
8757 if (fd->ordered && fd->collapse == 1)
8758 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
8759 fold_convert (fd->iter_type, fd->loop.step));
8760 else if (bias)
8761 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
8762 if (fd->ordered && fd->collapse == 1)
8764 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8765 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
8766 fd->loop.n1, fold_convert (sizetype, t));
8767 else
8769 t = fold_convert (TREE_TYPE (startvar), t);
8770 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
8771 fd->loop.n1, t);
8774 else
8776 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8777 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
8778 t = fold_convert (TREE_TYPE (startvar), t);
8780 t = force_gimple_operand_gsi (&gsi, t,
8781 DECL_P (startvar)
8782 && TREE_ADDRESSABLE (startvar),
8783 NULL_TREE, false, GSI_CONTINUE_LINKING);
8784 assign_stmt = gimple_build_assign (startvar, t);
8785 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8787 t = iend0;
8788 if (fd->ordered && fd->collapse == 1)
8789 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
8790 fold_convert (fd->iter_type, fd->loop.step));
8791 else if (bias)
8792 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
8793 if (fd->ordered && fd->collapse == 1)
8795 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8796 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
8797 fd->loop.n1, fold_convert (sizetype, t));
8798 else
8800 t = fold_convert (TREE_TYPE (startvar), t);
8801 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
8802 fd->loop.n1, t);
8805 else
8807 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8808 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
8809 t = fold_convert (TREE_TYPE (startvar), t);
8811 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8812 false, GSI_CONTINUE_LINKING);
8813 if (endvar)
8815 assign_stmt = gimple_build_assign (endvar, iend);
8816 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8817 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
8818 assign_stmt = gimple_build_assign (fd->loop.v, iend);
8819 else
8820 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
8821 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8823 /* Handle linear clause adjustments. */
8824 tree itercnt = NULL_TREE;
8825 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
8826 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
8827 c; c = OMP_CLAUSE_CHAIN (c))
8828 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
8829 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
8831 tree d = OMP_CLAUSE_DECL (c);
8832 bool is_ref = is_reference (d);
8833 tree t = d, a, dest;
8834 if (is_ref)
8835 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
8836 tree type = TREE_TYPE (t);
8837 if (POINTER_TYPE_P (type))
8838 type = sizetype;
8839 dest = unshare_expr (t);
8840 tree v = create_tmp_var (TREE_TYPE (t), NULL);
8841 expand_omp_build_assign (&gsif, v, t);
8842 if (itercnt == NULL_TREE)
8844 itercnt = startvar;
8845 tree n1 = fd->loop.n1;
8846 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
8848 itercnt
8849 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
8850 itercnt);
8851 n1 = fold_convert (TREE_TYPE (itercnt), n1);
8853 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
8854 itercnt, n1);
8855 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
8856 itercnt, fd->loop.step);
8857 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
8858 NULL_TREE, false,
8859 GSI_CONTINUE_LINKING);
8861 a = fold_build2 (MULT_EXPR, type,
8862 fold_convert (type, itercnt),
8863 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
8864 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
8865 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
8866 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8867 false, GSI_CONTINUE_LINKING);
8868 assign_stmt = gimple_build_assign (dest, t);
8869 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8871 if (fd->collapse > 1)
8872 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
8874 if (fd->ordered)
8876 /* Until now, counts array contained number of iterations or
8877 variable containing it for ith loop. From now on, we need
8878 those counts only for collapsed loops, and only for the 2nd
8879 till the last collapsed one. Move those one element earlier,
8880 we'll use counts[fd->collapse - 1] for the first source/sink
8881 iteration counter and so on and counts[fd->ordered]
8882 as the array holding the current counter values for
8883 depend(source). */
8884 if (fd->collapse > 1)
8885 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
8886 if (broken_loop)
8888 int i;
8889 for (i = fd->collapse; i < fd->ordered; i++)
8891 tree type = TREE_TYPE (fd->loops[i].v);
8892 tree this_cond
8893 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
8894 fold_convert (type, fd->loops[i].n1),
8895 fold_convert (type, fd->loops[i].n2));
8896 if (!integer_onep (this_cond))
8897 break;
8899 if (i < fd->ordered)
8901 cont_bb
8902 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
8903 add_bb_to_loop (cont_bb, l1_bb->loop_father);
8904 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
8905 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
8906 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8907 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
8908 make_edge (cont_bb, l1_bb, 0);
8909 l2_bb = create_empty_bb (cont_bb);
8910 broken_loop = false;
8913 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
8914 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
8915 ordered_lastprivate);
8916 if (counts[fd->collapse - 1])
8918 gcc_assert (fd->collapse == 1);
8919 gsi = gsi_last_bb (l0_bb);
8920 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
8921 istart0, true);
8922 gsi = gsi_last_bb (cont_bb);
8923 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
8924 build_int_cst (fd->iter_type, 1));
8925 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
8926 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8927 size_zero_node, NULL_TREE, NULL_TREE);
8928 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
8929 t = counts[fd->collapse - 1];
8931 else if (fd->collapse > 1)
8932 t = fd->loop.v;
8933 else
8935 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
8936 fd->loops[0].v, fd->loops[0].n1);
8937 t = fold_convert (fd->iter_type, t);
8939 gsi = gsi_last_bb (l0_bb);
8940 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8941 size_zero_node, NULL_TREE, NULL_TREE);
8942 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8943 false, GSI_CONTINUE_LINKING);
8944 expand_omp_build_assign (&gsi, aref, t, true);
8947 if (!broken_loop)
8949 /* Code to control the increment and predicate for the sequential
8950 loop goes in the CONT_BB. */
8951 gsi = gsi_last_bb (cont_bb);
8952 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
8953 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
8954 vmain = gimple_omp_continue_control_use (cont_stmt);
8955 vback = gimple_omp_continue_control_def (cont_stmt);
8957 if (!gimple_omp_for_combined_p (fd->for_stmt))
8959 if (POINTER_TYPE_P (type))
8960 t = fold_build_pointer_plus (vmain, fd->loop.step);
8961 else
8962 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
8963 t = force_gimple_operand_gsi (&gsi, t,
8964 DECL_P (vback)
8965 && TREE_ADDRESSABLE (vback),
8966 NULL_TREE, true, GSI_SAME_STMT);
8967 assign_stmt = gimple_build_assign (vback, t);
8968 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8970 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
8972 if (fd->collapse > 1)
8973 t = fd->loop.v;
8974 else
8976 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
8977 fd->loops[0].v, fd->loops[0].n1);
8978 t = fold_convert (fd->iter_type, t);
8980 tree aref = build4 (ARRAY_REF, fd->iter_type,
8981 counts[fd->ordered], size_zero_node,
8982 NULL_TREE, NULL_TREE);
8983 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8984 true, GSI_SAME_STMT);
8985 expand_omp_build_assign (&gsi, aref, t);
8988 t = build2 (fd->loop.cond_code, boolean_type_node,
8989 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
8990 iend);
8991 gcond *cond_stmt = gimple_build_cond_empty (t);
8992 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
8995 /* Remove GIMPLE_OMP_CONTINUE. */
8996 gsi_remove (&gsi, true);
8998 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
8999 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
9001 /* Emit code to get the next parallel iteration in L2_BB. */
9002 gsi = gsi_start_bb (l2_bb);
9004 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
9005 build_fold_addr_expr (istart0),
9006 build_fold_addr_expr (iend0));
9007 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9008 false, GSI_CONTINUE_LINKING);
9009 if (TREE_TYPE (t) != boolean_type_node)
9010 t = fold_build2 (NE_EXPR, boolean_type_node,
9011 t, build_int_cst (TREE_TYPE (t), 0));
9012 gcond *cond_stmt = gimple_build_cond_empty (t);
9013 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
9016 /* Add the loop cleanup function. */
9017 gsi = gsi_last_bb (exit_bb);
9018 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
9019 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
9020 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
9021 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
9022 else
9023 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
9024 gcall *call_stmt = gimple_build_call (t, 0);
9025 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
9026 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
9027 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
9028 if (fd->ordered)
9030 tree arr = counts[fd->ordered];
9031 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
9032 TREE_THIS_VOLATILE (clobber) = 1;
9033 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
9034 GSI_SAME_STMT);
9036 gsi_remove (&gsi, true);
9038 /* Connect the new blocks. */
9039 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
9040 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
9042 if (!broken_loop)
9044 gimple_seq phis;
9046 e = find_edge (cont_bb, l3_bb);
9047 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
9049 phis = phi_nodes (l3_bb);
9050 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
9052 gimple *phi = gsi_stmt (gsi);
9053 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
9054 PHI_ARG_DEF_FROM_EDGE (phi, e));
9056 remove_edge (e);
9058 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
9059 e = find_edge (cont_bb, l1_bb);
9060 if (e == NULL)
9062 e = BRANCH_EDGE (cont_bb);
9063 gcc_assert (single_succ (e->dest) == l1_bb);
9065 if (gimple_omp_for_combined_p (fd->for_stmt))
9067 remove_edge (e);
9068 e = NULL;
9070 else if (fd->collapse > 1)
9072 remove_edge (e);
9073 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
9075 else
9076 e->flags = EDGE_TRUE_VALUE;
9077 if (e)
9079 e->probability = REG_BR_PROB_BASE * 7 / 8;
9080 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
9082 else
9084 e = find_edge (cont_bb, l2_bb);
9085 e->flags = EDGE_FALLTHRU;
9087 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
9089 if (gimple_in_ssa_p (cfun))
9091 /* Add phis to the outer loop that connect to the phis in the inner,
9092 original loop, and move the loop entry value of the inner phi to
9093 the loop entry value of the outer phi. */
9094 gphi_iterator psi;
9095 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
9097 source_location locus;
9098 gphi *nphi;
9099 gphi *exit_phi = psi.phi ();
9101 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
9102 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
9104 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
9105 edge latch_to_l1 = find_edge (latch, l1_bb);
9106 gphi *inner_phi
9107 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
9109 tree t = gimple_phi_result (exit_phi);
9110 tree new_res = copy_ssa_name (t, NULL);
9111 nphi = create_phi_node (new_res, l0_bb);
9113 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
9114 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
9115 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
9116 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
9117 add_phi_arg (nphi, t, entry_to_l0, locus);
9119 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
9120 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
9122 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
9126 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
9127 recompute_dominator (CDI_DOMINATORS, l2_bb));
9128 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
9129 recompute_dominator (CDI_DOMINATORS, l3_bb));
9130 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
9131 recompute_dominator (CDI_DOMINATORS, l0_bb));
9132 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
9133 recompute_dominator (CDI_DOMINATORS, l1_bb));
9135 /* We enter expand_omp_for_generic with a loop. This original loop may
9136 have its own loop struct, or it may be part of an outer loop struct
9137 (which may be the fake loop). */
9138 struct loop *outer_loop = entry_bb->loop_father;
9139 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
9141 add_bb_to_loop (l2_bb, outer_loop);
9143 /* We've added a new loop around the original loop. Allocate the
9144 corresponding loop struct. */
9145 struct loop *new_loop = alloc_loop ();
9146 new_loop->header = l0_bb;
9147 new_loop->latch = l2_bb;
9148 add_loop (new_loop, outer_loop);
9150 /* Allocate a loop structure for the original loop unless we already
9151 had one. */
9152 if (!orig_loop_has_loop_struct
9153 && !gimple_omp_for_combined_p (fd->for_stmt))
9155 struct loop *orig_loop = alloc_loop ();
9156 orig_loop->header = l1_bb;
9157 /* The loop may have multiple latches. */
9158 add_loop (orig_loop, new_loop);
9164 /* A subroutine of expand_omp_for. Generate code for a parallel
9165 loop with static schedule and no specified chunk size. Given
9166 parameters:
9168 for (V = N1; V cond N2; V += STEP) BODY;
9170 where COND is "<" or ">", we generate pseudocode
9172 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
9173 if (cond is <)
9174 adj = STEP - 1;
9175 else
9176 adj = STEP + 1;
9177 if ((__typeof (V)) -1 > 0 && cond is >)
9178 n = -(adj + N2 - N1) / -STEP;
9179 else
9180 n = (adj + N2 - N1) / STEP;
9181 q = n / nthreads;
9182 tt = n % nthreads;
9183 if (threadid < tt) goto L3; else goto L4;
9185 tt = 0;
9186 q = q + 1;
9188 s0 = q * threadid + tt;
9189 e0 = s0 + q;
9190 V = s0 * STEP + N1;
9191 if (s0 >= e0) goto L2; else goto L0;
9193 e = e0 * STEP + N1;
9195 BODY;
9196 V += STEP;
9197 if (V cond e) goto L1;
9201 static void
9202 expand_omp_for_static_nochunk (struct omp_region *region,
9203 struct omp_for_data *fd,
9204 gimple *inner_stmt)
9206 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
9207 tree type, itype, vmain, vback;
9208 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
9209 basic_block body_bb, cont_bb, collapse_bb = NULL;
9210 basic_block fin_bb;
9211 gimple_stmt_iterator gsi;
9212 edge ep;
9213 bool broken_loop = region->cont == NULL;
9214 tree *counts = NULL;
9215 tree n1, n2, step;
9217 itype = type = TREE_TYPE (fd->loop.v);
9218 if (POINTER_TYPE_P (type))
9219 itype = signed_type_for (type);
9221 entry_bb = region->entry;
9222 cont_bb = region->cont;
9223 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
9224 fin_bb = BRANCH_EDGE (entry_bb)->dest;
9225 gcc_assert (broken_loop
9226 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
9227 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
9228 body_bb = single_succ (seq_start_bb);
9229 if (!broken_loop)
9231 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
9232 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
9233 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
9235 exit_bb = region->exit;
9237 /* Iteration space partitioning goes in ENTRY_BB. */
9238 gsi = gsi_last_bb (entry_bb);
9239 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9241 if (fd->collapse > 1)
9243 int first_zero_iter = -1, dummy = -1;
9244 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
9246 counts = XALLOCAVEC (tree, fd->collapse);
9247 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
9248 fin_bb, first_zero_iter,
9249 dummy_bb, dummy, l2_dom_bb);
9250 t = NULL_TREE;
9252 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
9253 t = integer_one_node;
9254 else
9255 t = fold_binary (fd->loop.cond_code, boolean_type_node,
9256 fold_convert (type, fd->loop.n1),
9257 fold_convert (type, fd->loop.n2));
9258 if (fd->collapse == 1
9259 && TYPE_UNSIGNED (type)
9260 && (t == NULL_TREE || !integer_onep (t)))
9262 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
9263 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
9264 true, GSI_SAME_STMT);
9265 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
9266 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
9267 true, GSI_SAME_STMT);
9268 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
9269 NULL_TREE, NULL_TREE);
9270 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9271 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
9272 expand_omp_regimplify_p, NULL, NULL)
9273 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
9274 expand_omp_regimplify_p, NULL, NULL))
9276 gsi = gsi_for_stmt (cond_stmt);
9277 gimple_regimplify_operands (cond_stmt, &gsi);
9279 ep = split_block (entry_bb, cond_stmt);
9280 ep->flags = EDGE_TRUE_VALUE;
9281 entry_bb = ep->dest;
9282 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
9283 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
9284 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
9285 if (gimple_in_ssa_p (cfun))
9287 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
9288 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
9289 !gsi_end_p (gpi); gsi_next (&gpi))
9291 gphi *phi = gpi.phi ();
9292 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
9293 ep, UNKNOWN_LOCATION);
9296 gsi = gsi_last_bb (entry_bb);
9299 switch (gimple_omp_for_kind (fd->for_stmt))
9301 case GF_OMP_FOR_KIND_FOR:
9302 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
9303 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
9304 break;
9305 case GF_OMP_FOR_KIND_DISTRIBUTE:
9306 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
9307 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
9308 break;
9309 default:
9310 gcc_unreachable ();
9312 nthreads = build_call_expr (nthreads, 0);
9313 nthreads = fold_convert (itype, nthreads);
9314 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
9315 true, GSI_SAME_STMT);
9316 threadid = build_call_expr (threadid, 0);
9317 threadid = fold_convert (itype, threadid);
9318 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
9319 true, GSI_SAME_STMT);
9321 n1 = fd->loop.n1;
9322 n2 = fd->loop.n2;
9323 step = fd->loop.step;
9324 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9326 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
9327 OMP_CLAUSE__LOOPTEMP_);
9328 gcc_assert (innerc);
9329 n1 = OMP_CLAUSE_DECL (innerc);
9330 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9331 OMP_CLAUSE__LOOPTEMP_);
9332 gcc_assert (innerc);
9333 n2 = OMP_CLAUSE_DECL (innerc);
9335 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
9336 true, NULL_TREE, true, GSI_SAME_STMT);
9337 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
9338 true, NULL_TREE, true, GSI_SAME_STMT);
9339 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
9340 true, NULL_TREE, true, GSI_SAME_STMT);
9342 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
9343 t = fold_build2 (PLUS_EXPR, itype, step, t);
9344 t = fold_build2 (PLUS_EXPR, itype, t, n2);
9345 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
9346 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
9347 t = fold_build2 (TRUNC_DIV_EXPR, itype,
9348 fold_build1 (NEGATE_EXPR, itype, t),
9349 fold_build1 (NEGATE_EXPR, itype, step));
9350 else
9351 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
9352 t = fold_convert (itype, t);
9353 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9355 q = create_tmp_reg (itype, "q");
9356 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
9357 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
9358 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
9360 tt = create_tmp_reg (itype, "tt");
9361 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
9362 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
9363 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
9365 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
9366 gcond *cond_stmt = gimple_build_cond_empty (t);
9367 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9369 second_bb = split_block (entry_bb, cond_stmt)->dest;
9370 gsi = gsi_last_bb (second_bb);
9371 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9373 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
9374 GSI_SAME_STMT);
9375 gassign *assign_stmt
9376 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
9377 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9379 third_bb = split_block (second_bb, assign_stmt)->dest;
9380 gsi = gsi_last_bb (third_bb);
9381 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9383 t = build2 (MULT_EXPR, itype, q, threadid);
9384 t = build2 (PLUS_EXPR, itype, t, tt);
9385 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9387 t = fold_build2 (PLUS_EXPR, itype, s0, q);
9388 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9390 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
9391 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
9393 /* Remove the GIMPLE_OMP_FOR statement. */
9394 gsi_remove (&gsi, true);
9396 /* Setup code for sequential iteration goes in SEQ_START_BB. */
9397 gsi = gsi_start_bb (seq_start_bb);
9399 tree startvar = fd->loop.v;
9400 tree endvar = NULL_TREE;
9402 if (gimple_omp_for_combined_p (fd->for_stmt))
9404 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
9405 ? gimple_omp_parallel_clauses (inner_stmt)
9406 : gimple_omp_for_clauses (inner_stmt);
9407 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
9408 gcc_assert (innerc);
9409 startvar = OMP_CLAUSE_DECL (innerc);
9410 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9411 OMP_CLAUSE__LOOPTEMP_);
9412 gcc_assert (innerc);
9413 endvar = OMP_CLAUSE_DECL (innerc);
9414 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
9415 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
9417 int i;
9418 for (i = 1; i < fd->collapse; i++)
9420 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9421 OMP_CLAUSE__LOOPTEMP_);
9422 gcc_assert (innerc);
9424 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9425 OMP_CLAUSE__LOOPTEMP_);
9426 if (innerc)
9428 /* If needed (distribute parallel for with lastprivate),
9429 propagate down the total number of iterations. */
9430 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
9431 fd->loop.n2);
9432 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
9433 GSI_CONTINUE_LINKING);
9434 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
9435 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9439 t = fold_convert (itype, s0);
9440 t = fold_build2 (MULT_EXPR, itype, t, step);
9441 if (POINTER_TYPE_P (type))
9442 t = fold_build_pointer_plus (n1, t);
9443 else
9444 t = fold_build2 (PLUS_EXPR, type, t, n1);
9445 t = fold_convert (TREE_TYPE (startvar), t);
9446 t = force_gimple_operand_gsi (&gsi, t,
9447 DECL_P (startvar)
9448 && TREE_ADDRESSABLE (startvar),
9449 NULL_TREE, false, GSI_CONTINUE_LINKING);
9450 assign_stmt = gimple_build_assign (startvar, t);
9451 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9453 t = fold_convert (itype, e0);
9454 t = fold_build2 (MULT_EXPR, itype, t, step);
9455 if (POINTER_TYPE_P (type))
9456 t = fold_build_pointer_plus (n1, t);
9457 else
9458 t = fold_build2 (PLUS_EXPR, type, t, n1);
9459 t = fold_convert (TREE_TYPE (startvar), t);
9460 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9461 false, GSI_CONTINUE_LINKING);
9462 if (endvar)
9464 assign_stmt = gimple_build_assign (endvar, e);
9465 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9466 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
9467 assign_stmt = gimple_build_assign (fd->loop.v, e);
9468 else
9469 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
9470 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9472 /* Handle linear clause adjustments. */
9473 tree itercnt = NULL_TREE;
9474 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
9475 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
9476 c; c = OMP_CLAUSE_CHAIN (c))
9477 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
9478 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
9480 tree d = OMP_CLAUSE_DECL (c);
9481 bool is_ref = is_reference (d);
9482 tree t = d, a, dest;
9483 if (is_ref)
9484 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
9485 if (itercnt == NULL_TREE)
9487 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9489 itercnt = fold_build2 (MINUS_EXPR, itype,
9490 fold_convert (itype, n1),
9491 fold_convert (itype, fd->loop.n1));
9492 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
9493 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
9494 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
9495 NULL_TREE, false,
9496 GSI_CONTINUE_LINKING);
9498 else
9499 itercnt = s0;
9501 tree type = TREE_TYPE (t);
9502 if (POINTER_TYPE_P (type))
9503 type = sizetype;
9504 a = fold_build2 (MULT_EXPR, type,
9505 fold_convert (type, itercnt),
9506 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
9507 dest = unshare_expr (t);
9508 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
9509 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
9510 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9511 false, GSI_CONTINUE_LINKING);
9512 assign_stmt = gimple_build_assign (dest, t);
9513 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9515 if (fd->collapse > 1)
9516 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
9518 if (!broken_loop)
9520 /* The code controlling the sequential loop replaces the
9521 GIMPLE_OMP_CONTINUE. */
9522 gsi = gsi_last_bb (cont_bb);
9523 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
9524 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
9525 vmain = gimple_omp_continue_control_use (cont_stmt);
9526 vback = gimple_omp_continue_control_def (cont_stmt);
9528 if (!gimple_omp_for_combined_p (fd->for_stmt))
9530 if (POINTER_TYPE_P (type))
9531 t = fold_build_pointer_plus (vmain, step);
9532 else
9533 t = fold_build2 (PLUS_EXPR, type, vmain, step);
9534 t = force_gimple_operand_gsi (&gsi, t,
9535 DECL_P (vback)
9536 && TREE_ADDRESSABLE (vback),
9537 NULL_TREE, true, GSI_SAME_STMT);
9538 assign_stmt = gimple_build_assign (vback, t);
9539 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9541 t = build2 (fd->loop.cond_code, boolean_type_node,
9542 DECL_P (vback) && TREE_ADDRESSABLE (vback)
9543 ? t : vback, e);
9544 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
9547 /* Remove the GIMPLE_OMP_CONTINUE statement. */
9548 gsi_remove (&gsi, true);
9550 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
9551 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
9554 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
9555 gsi = gsi_last_bb (exit_bb);
9556 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
9558 t = gimple_omp_return_lhs (gsi_stmt (gsi));
9559 gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
9561 gsi_remove (&gsi, true);
9563 /* Connect all the blocks. */
9564 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
9565 ep->probability = REG_BR_PROB_BASE / 4 * 3;
9566 ep = find_edge (entry_bb, second_bb);
9567 ep->flags = EDGE_TRUE_VALUE;
9568 ep->probability = REG_BR_PROB_BASE / 4;
9569 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
9570 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
9572 if (!broken_loop)
9574 ep = find_edge (cont_bb, body_bb);
9575 if (ep == NULL)
9577 ep = BRANCH_EDGE (cont_bb);
9578 gcc_assert (single_succ (ep->dest) == body_bb);
9580 if (gimple_omp_for_combined_p (fd->for_stmt))
9582 remove_edge (ep);
9583 ep = NULL;
9585 else if (fd->collapse > 1)
9587 remove_edge (ep);
9588 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
9590 else
9591 ep->flags = EDGE_TRUE_VALUE;
9592 find_edge (cont_bb, fin_bb)->flags
9593 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
9596 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
9597 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
9598 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
9600 set_immediate_dominator (CDI_DOMINATORS, body_bb,
9601 recompute_dominator (CDI_DOMINATORS, body_bb));
9602 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
9603 recompute_dominator (CDI_DOMINATORS, fin_bb));
9605 struct loop *loop = body_bb->loop_father;
9606 if (loop != entry_bb->loop_father)
9608 gcc_assert (loop->header == body_bb);
9609 gcc_assert (broken_loop
9610 || loop->latch == region->cont
9611 || single_pred (loop->latch) == region->cont);
9612 return;
9615 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
9617 loop = alloc_loop ();
9618 loop->header = body_bb;
9619 if (collapse_bb == NULL)
9620 loop->latch = cont_bb;
9621 add_loop (loop, body_bb->loop_father);
9625 /* Return phi in E->DEST with ARG on edge E. */
9627 static gphi *
9628 find_phi_with_arg_on_edge (tree arg, edge e)
9630 basic_block bb = e->dest;
9632 for (gphi_iterator gpi = gsi_start_phis (bb);
9633 !gsi_end_p (gpi);
9634 gsi_next (&gpi))
9636 gphi *phi = gpi.phi ();
9637 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
9638 return phi;
9641 return NULL;
9644 /* A subroutine of expand_omp_for. Generate code for a parallel
9645 loop with static schedule and a specified chunk size. Given
9646 parameters:
9648 for (V = N1; V cond N2; V += STEP) BODY;
9650 where COND is "<" or ">", we generate pseudocode
9652 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
9653 if (cond is <)
9654 adj = STEP - 1;
9655 else
9656 adj = STEP + 1;
9657 if ((__typeof (V)) -1 > 0 && cond is >)
9658 n = -(adj + N2 - N1) / -STEP;
9659 else
9660 n = (adj + N2 - N1) / STEP;
9661 trip = 0;
9662 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
9663 here so that V is defined
9664 if the loop is not entered
9666 s0 = (trip * nthreads + threadid) * CHUNK;
9667 e0 = min(s0 + CHUNK, n);
9668 if (s0 < n) goto L1; else goto L4;
9670 V = s0 * STEP + N1;
9671 e = e0 * STEP + N1;
9673 BODY;
9674 V += STEP;
9675 if (V cond e) goto L2; else goto L3;
9677 trip += 1;
9678 goto L0;
9682 static void
9683 expand_omp_for_static_chunk (struct omp_region *region,
9684 struct omp_for_data *fd, gimple *inner_stmt)
9686 tree n, s0, e0, e, t;
9687 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
9688 tree type, itype, vmain, vback, vextra;
9689 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
9690 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
9691 gimple_stmt_iterator gsi;
9692 edge se;
9693 bool broken_loop = region->cont == NULL;
9694 tree *counts = NULL;
9695 tree n1, n2, step;
9697 itype = type = TREE_TYPE (fd->loop.v);
9698 if (POINTER_TYPE_P (type))
9699 itype = signed_type_for (type);
9701 entry_bb = region->entry;
9702 se = split_block (entry_bb, last_stmt (entry_bb));
9703 entry_bb = se->src;
9704 iter_part_bb = se->dest;
9705 cont_bb = region->cont;
9706 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
9707 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
9708 gcc_assert (broken_loop
9709 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
9710 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
9711 body_bb = single_succ (seq_start_bb);
9712 if (!broken_loop)
9714 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
9715 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
9716 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
9717 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
9719 exit_bb = region->exit;
9721 /* Trip and adjustment setup goes in ENTRY_BB. */
9722 gsi = gsi_last_bb (entry_bb);
9723 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9725 if (fd->collapse > 1)
9727 int first_zero_iter = -1, dummy = -1;
9728 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
9730 counts = XALLOCAVEC (tree, fd->collapse);
9731 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
9732 fin_bb, first_zero_iter,
9733 dummy_bb, dummy, l2_dom_bb);
9734 t = NULL_TREE;
9736 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
9737 t = integer_one_node;
9738 else
9739 t = fold_binary (fd->loop.cond_code, boolean_type_node,
9740 fold_convert (type, fd->loop.n1),
9741 fold_convert (type, fd->loop.n2));
9742 if (fd->collapse == 1
9743 && TYPE_UNSIGNED (type)
9744 && (t == NULL_TREE || !integer_onep (t)))
9746 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
9747 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
9748 true, GSI_SAME_STMT);
9749 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
9750 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
9751 true, GSI_SAME_STMT);
9752 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
9753 NULL_TREE, NULL_TREE);
9754 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9755 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
9756 expand_omp_regimplify_p, NULL, NULL)
9757 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
9758 expand_omp_regimplify_p, NULL, NULL))
9760 gsi = gsi_for_stmt (cond_stmt);
9761 gimple_regimplify_operands (cond_stmt, &gsi);
9763 se = split_block (entry_bb, cond_stmt);
9764 se->flags = EDGE_TRUE_VALUE;
9765 entry_bb = se->dest;
9766 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
9767 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
9768 se->probability = REG_BR_PROB_BASE / 2000 - 1;
9769 if (gimple_in_ssa_p (cfun))
9771 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
9772 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
9773 !gsi_end_p (gpi); gsi_next (&gpi))
9775 gphi *phi = gpi.phi ();
9776 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
9777 se, UNKNOWN_LOCATION);
9780 gsi = gsi_last_bb (entry_bb);
9783 switch (gimple_omp_for_kind (fd->for_stmt))
9785 case GF_OMP_FOR_KIND_FOR:
9786 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
9787 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
9788 break;
9789 case GF_OMP_FOR_KIND_DISTRIBUTE:
9790 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
9791 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
9792 break;
9793 default:
9794 gcc_unreachable ();
9796 nthreads = build_call_expr (nthreads, 0);
9797 nthreads = fold_convert (itype, nthreads);
9798 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
9799 true, GSI_SAME_STMT);
9800 threadid = build_call_expr (threadid, 0);
9801 threadid = fold_convert (itype, threadid);
9802 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
9803 true, GSI_SAME_STMT);
9805 n1 = fd->loop.n1;
9806 n2 = fd->loop.n2;
9807 step = fd->loop.step;
9808 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9810 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
9811 OMP_CLAUSE__LOOPTEMP_);
9812 gcc_assert (innerc);
9813 n1 = OMP_CLAUSE_DECL (innerc);
9814 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9815 OMP_CLAUSE__LOOPTEMP_);
9816 gcc_assert (innerc);
9817 n2 = OMP_CLAUSE_DECL (innerc);
9819 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
9820 true, NULL_TREE, true, GSI_SAME_STMT);
9821 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
9822 true, NULL_TREE, true, GSI_SAME_STMT);
9823 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
9824 true, NULL_TREE, true, GSI_SAME_STMT);
9825 tree chunk_size = fold_convert (itype, fd->chunk_size);
9826 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
9827 chunk_size
9828 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
9829 GSI_SAME_STMT);
9831 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
9832 t = fold_build2 (PLUS_EXPR, itype, step, t);
9833 t = fold_build2 (PLUS_EXPR, itype, t, n2);
9834 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
9835 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
9836 t = fold_build2 (TRUNC_DIV_EXPR, itype,
9837 fold_build1 (NEGATE_EXPR, itype, t),
9838 fold_build1 (NEGATE_EXPR, itype, step));
9839 else
9840 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
9841 t = fold_convert (itype, t);
9842 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9843 true, GSI_SAME_STMT);
9845 trip_var = create_tmp_reg (itype, ".trip");
9846 if (gimple_in_ssa_p (cfun))
9848 trip_init = make_ssa_name (trip_var);
9849 trip_main = make_ssa_name (trip_var);
9850 trip_back = make_ssa_name (trip_var);
9852 else
9854 trip_init = trip_var;
9855 trip_main = trip_var;
9856 trip_back = trip_var;
9859 gassign *assign_stmt
9860 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
9861 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9863 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
9864 t = fold_build2 (MULT_EXPR, itype, t, step);
9865 if (POINTER_TYPE_P (type))
9866 t = fold_build_pointer_plus (n1, t);
9867 else
9868 t = fold_build2 (PLUS_EXPR, type, t, n1);
9869 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9870 true, GSI_SAME_STMT);
9872 /* Remove the GIMPLE_OMP_FOR. */
9873 gsi_remove (&gsi, true);
9875 gimple_stmt_iterator gsif = gsi;
9877 /* Iteration space partitioning goes in ITER_PART_BB. */
9878 gsi = gsi_last_bb (iter_part_bb);
9880 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
9881 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
9882 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
9883 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9884 false, GSI_CONTINUE_LINKING);
9886 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
9887 t = fold_build2 (MIN_EXPR, itype, t, n);
9888 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9889 false, GSI_CONTINUE_LINKING);
9891 t = build2 (LT_EXPR, boolean_type_node, s0, n);
9892 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
9894 /* Setup code for sequential iteration goes in SEQ_START_BB. */
9895 gsi = gsi_start_bb (seq_start_bb);
9897 tree startvar = fd->loop.v;
9898 tree endvar = NULL_TREE;
9900 if (gimple_omp_for_combined_p (fd->for_stmt))
9902 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
9903 ? gimple_omp_parallel_clauses (inner_stmt)
9904 : gimple_omp_for_clauses (inner_stmt);
9905 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
9906 gcc_assert (innerc);
9907 startvar = OMP_CLAUSE_DECL (innerc);
9908 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9909 OMP_CLAUSE__LOOPTEMP_);
9910 gcc_assert (innerc);
9911 endvar = OMP_CLAUSE_DECL (innerc);
9912 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
9913 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
9915 int i;
9916 for (i = 1; i < fd->collapse; i++)
9918 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9919 OMP_CLAUSE__LOOPTEMP_);
9920 gcc_assert (innerc);
9922 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9923 OMP_CLAUSE__LOOPTEMP_);
9924 if (innerc)
9926 /* If needed (distribute parallel for with lastprivate),
9927 propagate down the total number of iterations. */
9928 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
9929 fd->loop.n2);
9930 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
9931 GSI_CONTINUE_LINKING);
9932 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
9933 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9938 t = fold_convert (itype, s0);
9939 t = fold_build2 (MULT_EXPR, itype, t, step);
9940 if (POINTER_TYPE_P (type))
9941 t = fold_build_pointer_plus (n1, t);
9942 else
9943 t = fold_build2 (PLUS_EXPR, type, t, n1);
9944 t = fold_convert (TREE_TYPE (startvar), t);
9945 t = force_gimple_operand_gsi (&gsi, t,
9946 DECL_P (startvar)
9947 && TREE_ADDRESSABLE (startvar),
9948 NULL_TREE, false, GSI_CONTINUE_LINKING);
9949 assign_stmt = gimple_build_assign (startvar, t);
9950 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9952 t = fold_convert (itype, e0);
9953 t = fold_build2 (MULT_EXPR, itype, t, step);
9954 if (POINTER_TYPE_P (type))
9955 t = fold_build_pointer_plus (n1, t);
9956 else
9957 t = fold_build2 (PLUS_EXPR, type, t, n1);
9958 t = fold_convert (TREE_TYPE (startvar), t);
9959 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9960 false, GSI_CONTINUE_LINKING);
9961 if (endvar)
9963 assign_stmt = gimple_build_assign (endvar, e);
9964 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9965 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
9966 assign_stmt = gimple_build_assign (fd->loop.v, e);
9967 else
9968 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
9969 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9971 /* Handle linear clause adjustments. */
9972 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
9973 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
9974 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
9975 c; c = OMP_CLAUSE_CHAIN (c))
9976 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
9977 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
9979 tree d = OMP_CLAUSE_DECL (c);
9980 bool is_ref = is_reference (d);
9981 tree t = d, a, dest;
9982 if (is_ref)
9983 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
9984 tree type = TREE_TYPE (t);
9985 if (POINTER_TYPE_P (type))
9986 type = sizetype;
9987 dest = unshare_expr (t);
9988 tree v = create_tmp_var (TREE_TYPE (t), NULL);
9989 expand_omp_build_assign (&gsif, v, t);
9990 if (itercnt == NULL_TREE)
9992 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9994 itercntbias
9995 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
9996 fold_convert (itype, fd->loop.n1));
9997 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
9998 itercntbias, step);
9999 itercntbias
10000 = force_gimple_operand_gsi (&gsif, itercntbias, true,
10001 NULL_TREE, true,
10002 GSI_SAME_STMT);
10003 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
10004 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
10005 NULL_TREE, false,
10006 GSI_CONTINUE_LINKING);
10008 else
10009 itercnt = s0;
10011 a = fold_build2 (MULT_EXPR, type,
10012 fold_convert (type, itercnt),
10013 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
10014 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
10015 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
10016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10017 false, GSI_CONTINUE_LINKING);
10018 assign_stmt = gimple_build_assign (dest, t);
10019 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10021 if (fd->collapse > 1)
10022 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
10024 if (!broken_loop)
10026 /* The code controlling the sequential loop goes in CONT_BB,
10027 replacing the GIMPLE_OMP_CONTINUE. */
10028 gsi = gsi_last_bb (cont_bb);
10029 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
10030 vmain = gimple_omp_continue_control_use (cont_stmt);
10031 vback = gimple_omp_continue_control_def (cont_stmt);
10033 if (!gimple_omp_for_combined_p (fd->for_stmt))
10035 if (POINTER_TYPE_P (type))
10036 t = fold_build_pointer_plus (vmain, step);
10037 else
10038 t = fold_build2 (PLUS_EXPR, type, vmain, step);
10039 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
10040 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10041 true, GSI_SAME_STMT);
10042 assign_stmt = gimple_build_assign (vback, t);
10043 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
10045 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
10046 t = build2 (EQ_EXPR, boolean_type_node,
10047 build_int_cst (itype, 0),
10048 build_int_cst (itype, 1));
10049 else
10050 t = build2 (fd->loop.cond_code, boolean_type_node,
10051 DECL_P (vback) && TREE_ADDRESSABLE (vback)
10052 ? t : vback, e);
10053 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
10056 /* Remove GIMPLE_OMP_CONTINUE. */
10057 gsi_remove (&gsi, true);
10059 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
10060 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
10062 /* Trip update code goes into TRIP_UPDATE_BB. */
10063 gsi = gsi_start_bb (trip_update_bb);
10065 t = build_int_cst (itype, 1);
10066 t = build2 (PLUS_EXPR, itype, trip_main, t);
10067 assign_stmt = gimple_build_assign (trip_back, t);
10068 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10071 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
10072 gsi = gsi_last_bb (exit_bb);
10073 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
10075 t = gimple_omp_return_lhs (gsi_stmt (gsi));
10076 gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
10078 gsi_remove (&gsi, true);
10080 /* Connect the new blocks. */
10081 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
10082 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
10084 if (!broken_loop)
10086 se = find_edge (cont_bb, body_bb);
10087 if (se == NULL)
10089 se = BRANCH_EDGE (cont_bb);
10090 gcc_assert (single_succ (se->dest) == body_bb);
10092 if (gimple_omp_for_combined_p (fd->for_stmt))
10094 remove_edge (se);
10095 se = NULL;
10097 else if (fd->collapse > 1)
10099 remove_edge (se);
10100 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
10102 else
10103 se->flags = EDGE_TRUE_VALUE;
10104 find_edge (cont_bb, trip_update_bb)->flags
10105 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
10107 redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
10110 if (gimple_in_ssa_p (cfun))
10112 gphi_iterator psi;
10113 gphi *phi;
10114 edge re, ene;
10115 edge_var_map *vm;
10116 size_t i;
10118 gcc_assert (fd->collapse == 1 && !broken_loop);
10120 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
10121 remove arguments of the phi nodes in fin_bb. We need to create
10122 appropriate phi nodes in iter_part_bb instead. */
10123 se = find_edge (iter_part_bb, fin_bb);
10124 re = single_succ_edge (trip_update_bb);
10125 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
10126 ene = single_succ_edge (entry_bb);
10128 psi = gsi_start_phis (fin_bb);
10129 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
10130 gsi_next (&psi), ++i)
10132 gphi *nphi;
10133 source_location locus;
10135 phi = psi.phi ();
10136 t = gimple_phi_result (phi);
10137 gcc_assert (t == redirect_edge_var_map_result (vm));
10139 if (!single_pred_p (fin_bb))
10140 t = copy_ssa_name (t, phi);
10142 nphi = create_phi_node (t, iter_part_bb);
10144 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
10145 locus = gimple_phi_arg_location_from_edge (phi, se);
10147 /* A special case -- fd->loop.v is not yet computed in
10148 iter_part_bb, we need to use vextra instead. */
10149 if (t == fd->loop.v)
10150 t = vextra;
10151 add_phi_arg (nphi, t, ene, locus);
10152 locus = redirect_edge_var_map_location (vm);
10153 tree back_arg = redirect_edge_var_map_def (vm);
10154 add_phi_arg (nphi, back_arg, re, locus);
10155 edge ce = find_edge (cont_bb, body_bb);
10156 if (ce == NULL)
10158 ce = BRANCH_EDGE (cont_bb);
10159 gcc_assert (single_succ (ce->dest) == body_bb);
10160 ce = single_succ_edge (ce->dest);
10162 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
10163 gcc_assert (inner_loop_phi != NULL);
10164 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
10165 find_edge (seq_start_bb, body_bb), locus);
10167 if (!single_pred_p (fin_bb))
10168 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
10170 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
10171 redirect_edge_var_map_clear (re);
10172 if (single_pred_p (fin_bb))
10173 while (1)
10175 psi = gsi_start_phis (fin_bb);
10176 if (gsi_end_p (psi))
10177 break;
10178 remove_phi_node (&psi, false);
10181 /* Make phi node for trip. */
10182 phi = create_phi_node (trip_main, iter_part_bb);
10183 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
10184 UNKNOWN_LOCATION);
10185 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
10186 UNKNOWN_LOCATION);
10189 if (!broken_loop)
10190 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
10191 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
10192 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
10193 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
10194 recompute_dominator (CDI_DOMINATORS, fin_bb));
10195 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
10196 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
10197 set_immediate_dominator (CDI_DOMINATORS, body_bb,
10198 recompute_dominator (CDI_DOMINATORS, body_bb));
10200 if (!broken_loop)
10202 struct loop *loop = body_bb->loop_father;
10203 struct loop *trip_loop = alloc_loop ();
10204 trip_loop->header = iter_part_bb;
10205 trip_loop->latch = trip_update_bb;
10206 add_loop (trip_loop, iter_part_bb->loop_father);
10208 if (loop != entry_bb->loop_father)
10210 gcc_assert (loop->header == body_bb);
10211 gcc_assert (loop->latch == region->cont
10212 || single_pred (loop->latch) == region->cont);
10213 trip_loop->inner = loop;
10214 return;
10217 if (!gimple_omp_for_combined_p (fd->for_stmt))
10219 loop = alloc_loop ();
10220 loop->header = body_bb;
10221 if (collapse_bb == NULL)
10222 loop->latch = cont_bb;
10223 add_loop (loop, trip_loop);
10228 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
10229 Given parameters:
10230 for (V = N1; V cond N2; V += STEP) BODY;
10232 where COND is "<" or ">" or "!=", we generate pseudocode
10234 for (ind_var = low; ind_var < high; ind_var++)
10236 V = n1 + (ind_var * STEP)
10238 <BODY>
10241 In the above pseudocode, low and high are function parameters of the
10242 child function. In the function below, we are inserting a temp.
10243 variable that will be making a call to two OMP functions that will not be
10244 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
10245 with _Cilk_for). These functions are replaced with low and high
10246 by the function that handles taskreg. */
10249 static void
10250 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
10252 bool broken_loop = region->cont == NULL;
10253 basic_block entry_bb = region->entry;
10254 basic_block cont_bb = region->cont;
10256 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10257 gcc_assert (broken_loop
10258 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10259 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
10260 basic_block l1_bb, l2_bb;
10262 if (!broken_loop)
10264 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
10265 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10266 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
10267 l2_bb = BRANCH_EDGE (entry_bb)->dest;
10269 else
10271 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
10272 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
10273 l2_bb = single_succ (l1_bb);
10275 basic_block exit_bb = region->exit;
10276 basic_block l2_dom_bb = NULL;
10278 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
10280 /* Below statements until the "tree high_val = ..." are pseudo statements
10281 used to pass information to be used by expand_omp_taskreg.
10282 low_val and high_val will be replaced by the __low and __high
10283 parameter from the child function.
10285 The call_exprs part is a place-holder, it is mainly used
10286 to distinctly identify to the top-level part that this is
10287 where we should put low and high (reasoning given in header
10288 comment). */
10290 tree child_fndecl
10291 = gimple_omp_parallel_child_fn (
10292 as_a <gomp_parallel *> (last_stmt (region->outer->entry)));
10293 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
10294 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
10296 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
10297 high_val = t;
10298 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
10299 low_val = t;
10301 gcc_assert (low_val && high_val);
10303 tree type = TREE_TYPE (low_val);
10304 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
10305 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10307 /* Not needed in SSA form right now. */
10308 gcc_assert (!gimple_in_ssa_p (cfun));
10309 if (l2_dom_bb == NULL)
10310 l2_dom_bb = l1_bb;
10312 tree n1 = low_val;
10313 tree n2 = high_val;
10315 gimple *stmt = gimple_build_assign (ind_var, n1);
10317 /* Replace the GIMPLE_OMP_FOR statement. */
10318 gsi_replace (&gsi, stmt, true);
10320 if (!broken_loop)
10322 /* Code to control the increment goes in the CONT_BB. */
10323 gsi = gsi_last_bb (cont_bb);
10324 stmt = gsi_stmt (gsi);
10325 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
10326 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
10327 build_one_cst (type));
10329 /* Replace GIMPLE_OMP_CONTINUE. */
10330 gsi_replace (&gsi, stmt, true);
10333 /* Emit the condition in L1_BB. */
10334 gsi = gsi_after_labels (l1_bb);
10335 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
10336 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
10337 fd->loop.step);
10338 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
10339 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
10340 fd->loop.n1, fold_convert (sizetype, t));
10341 else
10342 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
10343 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
10344 t = fold_convert (TREE_TYPE (fd->loop.v), t);
10345 expand_omp_build_assign (&gsi, fd->loop.v, t);
10347 /* The condition is always '<' since the runtime will fill in the low
10348 and high values. */
10349 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
10350 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10352 /* Remove GIMPLE_OMP_RETURN. */
10353 gsi = gsi_last_bb (exit_bb);
10354 gsi_remove (&gsi, true);
10356 /* Connect the new blocks. */
10357 remove_edge (FALLTHRU_EDGE (entry_bb));
10359 edge e, ne;
10360 if (!broken_loop)
10362 remove_edge (BRANCH_EDGE (entry_bb));
10363 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
10365 e = BRANCH_EDGE (l1_bb);
10366 ne = FALLTHRU_EDGE (l1_bb);
10367 e->flags = EDGE_TRUE_VALUE;
10369 else
10371 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
10373 ne = single_succ_edge (l1_bb);
10374 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
10377 ne->flags = EDGE_FALSE_VALUE;
10378 e->probability = REG_BR_PROB_BASE * 7 / 8;
10379 ne->probability = REG_BR_PROB_BASE / 8;
10381 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
10382 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
10383 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
10385 if (!broken_loop)
10387 struct loop *loop = alloc_loop ();
10388 loop->header = l1_bb;
10389 loop->latch = cont_bb;
10390 add_loop (loop, l1_bb->loop_father);
10391 loop->safelen = INT_MAX;
10394 /* Pick the correct library function based on the precision of the
10395 induction variable type. */
10396 tree lib_fun = NULL_TREE;
10397 if (TYPE_PRECISION (type) == 32)
10398 lib_fun = cilk_for_32_fndecl;
10399 else if (TYPE_PRECISION (type) == 64)
10400 lib_fun = cilk_for_64_fndecl;
10401 else
10402 gcc_unreachable ();
10404 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
10406 /* WS_ARGS contains the library function flavor to call:
10407 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
10408 user-defined grain value. If the user does not define one, then zero
10409 is passed in by the parser. */
10410 vec_alloc (region->ws_args, 2);
10411 region->ws_args->quick_push (lib_fun);
10412 region->ws_args->quick_push (fd->chunk_size);
10415 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
10416 loop. Given parameters:
10418 for (V = N1; V cond N2; V += STEP) BODY;
10420 where COND is "<" or ">", we generate pseudocode
10422 V = N1;
10423 goto L1;
10425 BODY;
10426 V += STEP;
10428 if (V cond N2) goto L0; else goto L2;
10431 For collapsed loops, given parameters:
10432 collapse(3)
10433 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
10434 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
10435 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
10436 BODY;
10438 we generate pseudocode
10440 if (cond3 is <)
10441 adj = STEP3 - 1;
10442 else
10443 adj = STEP3 + 1;
10444 count3 = (adj + N32 - N31) / STEP3;
10445 if (cond2 is <)
10446 adj = STEP2 - 1;
10447 else
10448 adj = STEP2 + 1;
10449 count2 = (adj + N22 - N21) / STEP2;
10450 if (cond1 is <)
10451 adj = STEP1 - 1;
10452 else
10453 adj = STEP1 + 1;
10454 count1 = (adj + N12 - N11) / STEP1;
10455 count = count1 * count2 * count3;
10456 V = 0;
10457 V1 = N11;
10458 V2 = N21;
10459 V3 = N31;
10460 goto L1;
10462 BODY;
10463 V += 1;
10464 V3 += STEP3;
10465 V2 += (V3 cond3 N32) ? 0 : STEP2;
10466 V3 = (V3 cond3 N32) ? V3 : N31;
10467 V1 += (V2 cond2 N22) ? 0 : STEP1;
10468 V2 = (V2 cond2 N22) ? V2 : N21;
10470 if (V < count) goto L0; else goto L2;
10475 static void
10476 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
10478 tree type, t;
10479 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
10480 gimple_stmt_iterator gsi;
10481 gimple *stmt;
10482 gcond *cond_stmt;
10483 bool broken_loop = region->cont == NULL;
10484 edge e, ne;
10485 tree *counts = NULL;
10486 int i;
10487 tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10488 OMP_CLAUSE_SAFELEN);
10489 tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10490 OMP_CLAUSE__SIMDUID_);
10491 tree n1, n2;
10493 type = TREE_TYPE (fd->loop.v);
10494 entry_bb = region->entry;
10495 cont_bb = region->cont;
10496 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10497 gcc_assert (broken_loop
10498 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10499 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
10500 if (!broken_loop)
10502 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
10503 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10504 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
10505 l2_bb = BRANCH_EDGE (entry_bb)->dest;
10507 else
10509 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
10510 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
10511 l2_bb = single_succ (l1_bb);
10513 exit_bb = region->exit;
10514 l2_dom_bb = NULL;
10516 gsi = gsi_last_bb (entry_bb);
10518 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10519 /* Not needed in SSA form right now. */
10520 gcc_assert (!gimple_in_ssa_p (cfun));
10521 if (fd->collapse > 1)
10523 int first_zero_iter = -1, dummy = -1;
10524 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
10526 counts = XALLOCAVEC (tree, fd->collapse);
10527 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
10528 zero_iter_bb, first_zero_iter,
10529 dummy_bb, dummy, l2_dom_bb);
10531 if (l2_dom_bb == NULL)
10532 l2_dom_bb = l1_bb;
10534 n1 = fd->loop.n1;
10535 n2 = fd->loop.n2;
10536 if (gimple_omp_for_combined_into_p (fd->for_stmt))
10538 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10539 OMP_CLAUSE__LOOPTEMP_);
10540 gcc_assert (innerc);
10541 n1 = OMP_CLAUSE_DECL (innerc);
10542 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10543 OMP_CLAUSE__LOOPTEMP_);
10544 gcc_assert (innerc);
10545 n2 = OMP_CLAUSE_DECL (innerc);
10546 expand_omp_build_assign (&gsi, fd->loop.v,
10547 fold_convert (type, n1));
10548 if (fd->collapse > 1)
10550 gsi_prev (&gsi);
10551 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
10552 gsi_next (&gsi);
10555 else
10557 expand_omp_build_assign (&gsi, fd->loop.v,
10558 fold_convert (type, fd->loop.n1));
10559 if (fd->collapse > 1)
10560 for (i = 0; i < fd->collapse; i++)
10562 tree itype = TREE_TYPE (fd->loops[i].v);
10563 if (POINTER_TYPE_P (itype))
10564 itype = signed_type_for (itype);
10565 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
10566 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10570 /* Remove the GIMPLE_OMP_FOR statement. */
10571 gsi_remove (&gsi, true);
10573 if (!broken_loop)
10575 /* Code to control the increment goes in the CONT_BB. */
10576 gsi = gsi_last_bb (cont_bb);
10577 stmt = gsi_stmt (gsi);
10578 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
10580 if (POINTER_TYPE_P (type))
10581 t = fold_build_pointer_plus (fd->loop.v, fd->loop.step);
10582 else
10583 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, fd->loop.step);
10584 expand_omp_build_assign (&gsi, fd->loop.v, t);
10586 if (fd->collapse > 1)
10588 i = fd->collapse - 1;
10589 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
10591 t = fold_convert (sizetype, fd->loops[i].step);
10592 t = fold_build_pointer_plus (fd->loops[i].v, t);
10594 else
10596 t = fold_convert (TREE_TYPE (fd->loops[i].v),
10597 fd->loops[i].step);
10598 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
10599 fd->loops[i].v, t);
10601 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10603 for (i = fd->collapse - 1; i > 0; i--)
10605 tree itype = TREE_TYPE (fd->loops[i].v);
10606 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
10607 if (POINTER_TYPE_P (itype2))
10608 itype2 = signed_type_for (itype2);
10609 t = build3 (COND_EXPR, itype2,
10610 build2 (fd->loops[i].cond_code, boolean_type_node,
10611 fd->loops[i].v,
10612 fold_convert (itype, fd->loops[i].n2)),
10613 build_int_cst (itype2, 0),
10614 fold_convert (itype2, fd->loops[i - 1].step));
10615 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
10616 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
10617 else
10618 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
10619 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
10621 t = build3 (COND_EXPR, itype,
10622 build2 (fd->loops[i].cond_code, boolean_type_node,
10623 fd->loops[i].v,
10624 fold_convert (itype, fd->loops[i].n2)),
10625 fd->loops[i].v,
10626 fold_convert (itype, fd->loops[i].n1));
10627 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10631 /* Remove GIMPLE_OMP_CONTINUE. */
10632 gsi_remove (&gsi, true);
10635 /* Emit the condition in L1_BB. */
10636 gsi = gsi_start_bb (l1_bb);
10638 t = fold_convert (type, n2);
10639 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10640 false, GSI_CONTINUE_LINKING);
10641 tree v = fd->loop.v;
10642 if (DECL_P (v) && TREE_ADDRESSABLE (v))
10643 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
10644 false, GSI_CONTINUE_LINKING);
10645 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
10646 cond_stmt = gimple_build_cond_empty (t);
10647 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
10648 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
10649 NULL, NULL)
10650 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
10651 NULL, NULL))
10653 gsi = gsi_for_stmt (cond_stmt);
10654 gimple_regimplify_operands (cond_stmt, &gsi);
10657 /* Remove GIMPLE_OMP_RETURN. */
10658 gsi = gsi_last_bb (exit_bb);
10659 gsi_remove (&gsi, true);
10661 /* Connect the new blocks. */
10662 remove_edge (FALLTHRU_EDGE (entry_bb));
10664 if (!broken_loop)
10666 remove_edge (BRANCH_EDGE (entry_bb));
10667 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
10669 e = BRANCH_EDGE (l1_bb);
10670 ne = FALLTHRU_EDGE (l1_bb);
10671 e->flags = EDGE_TRUE_VALUE;
10673 else
10675 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
10677 ne = single_succ_edge (l1_bb);
10678 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
10681 ne->flags = EDGE_FALSE_VALUE;
10682 e->probability = REG_BR_PROB_BASE * 7 / 8;
10683 ne->probability = REG_BR_PROB_BASE / 8;
10685 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
10686 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
10687 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
10689 if (!broken_loop)
10691 struct loop *loop = alloc_loop ();
10692 loop->header = l1_bb;
10693 loop->latch = cont_bb;
10694 add_loop (loop, l1_bb->loop_father);
10695 if (safelen == NULL_TREE)
10696 loop->safelen = INT_MAX;
10697 else
10699 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
10700 if (TREE_CODE (safelen) != INTEGER_CST)
10701 loop->safelen = 0;
10702 else if (!tree_fits_uhwi_p (safelen)
10703 || tree_to_uhwi (safelen) > INT_MAX)
10704 loop->safelen = INT_MAX;
10705 else
10706 loop->safelen = tree_to_uhwi (safelen);
10707 if (loop->safelen == 1)
10708 loop->safelen = 0;
10710 if (simduid)
10712 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
10713 cfun->has_simduid_loops = true;
10715 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
10716 the loop. */
10717 if ((flag_tree_loop_vectorize
10718 || (!global_options_set.x_flag_tree_loop_vectorize
10719 && !global_options_set.x_flag_tree_vectorize))
10720 && flag_tree_loop_optimize
10721 && loop->safelen > 1)
10723 loop->force_vectorize = true;
10724 cfun->has_force_vectorize_loops = true;
10727 else if (simduid)
10728 cfun->has_simduid_loops = true;
10731 /* Taskloop construct is represented after gimplification with
10732 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
10733 in between them. This routine expands the outer GIMPLE_OMP_FOR,
10734 which should just compute all the needed loop temporaries
10735 for GIMPLE_OMP_TASK. */
10737 static void
10738 expand_omp_taskloop_for_outer (struct omp_region *region,
10739 struct omp_for_data *fd,
10740 gimple *inner_stmt)
10742 tree type, bias = NULL_TREE;
10743 basic_block entry_bb, cont_bb, exit_bb;
10744 gimple_stmt_iterator gsi;
10745 gassign *assign_stmt;
10746 tree *counts = NULL;
10747 int i;
10749 gcc_assert (inner_stmt);
10750 gcc_assert (region->cont);
10751 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
10752 && gimple_omp_task_taskloop_p (inner_stmt));
10753 type = TREE_TYPE (fd->loop.v);
10755 /* See if we need to bias by LLONG_MIN. */
10756 if (fd->iter_type == long_long_unsigned_type_node
10757 && TREE_CODE (type) == INTEGER_TYPE
10758 && !TYPE_UNSIGNED (type))
10760 tree n1, n2;
10762 if (fd->loop.cond_code == LT_EXPR)
10764 n1 = fd->loop.n1;
10765 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
10767 else
10769 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
10770 n2 = fd->loop.n1;
10772 if (TREE_CODE (n1) != INTEGER_CST
10773 || TREE_CODE (n2) != INTEGER_CST
10774 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
10775 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
10778 entry_bb = region->entry;
10779 cont_bb = region->cont;
10780 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10781 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10782 exit_bb = region->exit;
10784 gsi = gsi_last_bb (entry_bb);
10785 gimple *for_stmt = gsi_stmt (gsi);
10786 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
10787 if (fd->collapse > 1)
10789 int first_zero_iter = -1, dummy = -1;
10790 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
10792 counts = XALLOCAVEC (tree, fd->collapse);
10793 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
10794 zero_iter_bb, first_zero_iter,
10795 dummy_bb, dummy, l2_dom_bb);
10797 if (zero_iter_bb)
10799 /* Some counts[i] vars might be uninitialized if
10800 some loop has zero iterations. But the body shouldn't
10801 be executed in that case, so just avoid uninit warnings. */
10802 for (i = first_zero_iter; i < fd->collapse; i++)
10803 if (SSA_VAR_P (counts[i]))
10804 TREE_NO_WARNING (counts[i]) = 1;
10805 gsi_prev (&gsi);
10806 edge e = split_block (entry_bb, gsi_stmt (gsi));
10807 entry_bb = e->dest;
10808 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
10809 gsi = gsi_last_bb (entry_bb);
10810 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
10811 get_immediate_dominator (CDI_DOMINATORS,
10812 zero_iter_bb));
10816 tree t0, t1;
10817 t1 = fd->loop.n2;
10818 t0 = fd->loop.n1;
10819 if (POINTER_TYPE_P (TREE_TYPE (t0))
10820 && TYPE_PRECISION (TREE_TYPE (t0))
10821 != TYPE_PRECISION (fd->iter_type))
10823 /* Avoid casting pointers to integer of a different size. */
10824 tree itype = signed_type_for (type);
10825 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
10826 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
10828 else
10830 t1 = fold_convert (fd->iter_type, t1);
10831 t0 = fold_convert (fd->iter_type, t0);
10833 if (bias)
10835 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
10836 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
10839 tree innerc = find_omp_clause (gimple_omp_task_clauses (inner_stmt),
10840 OMP_CLAUSE__LOOPTEMP_);
10841 gcc_assert (innerc);
10842 tree startvar = OMP_CLAUSE_DECL (innerc);
10843 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
10844 gcc_assert (innerc);
10845 tree endvar = OMP_CLAUSE_DECL (innerc);
10846 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
10848 gcc_assert (innerc);
10849 for (i = 1; i < fd->collapse; i++)
10851 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10852 OMP_CLAUSE__LOOPTEMP_);
10853 gcc_assert (innerc);
10855 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10856 OMP_CLAUSE__LOOPTEMP_);
10857 if (innerc)
10859 /* If needed (inner taskloop has lastprivate clause), propagate
10860 down the total number of iterations. */
10861 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
10862 NULL_TREE, false,
10863 GSI_CONTINUE_LINKING);
10864 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
10865 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10869 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
10870 GSI_CONTINUE_LINKING);
10871 assign_stmt = gimple_build_assign (startvar, t0);
10872 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10874 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
10875 GSI_CONTINUE_LINKING);
10876 assign_stmt = gimple_build_assign (endvar, t1);
10877 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10878 if (fd->collapse > 1)
10879 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
10881 /* Remove the GIMPLE_OMP_FOR statement. */
10882 gsi = gsi_for_stmt (for_stmt);
10883 gsi_remove (&gsi, true);
10885 gsi = gsi_last_bb (cont_bb);
10886 gsi_remove (&gsi, true);
10888 gsi = gsi_last_bb (exit_bb);
10889 gsi_remove (&gsi, true);
10891 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
10892 remove_edge (BRANCH_EDGE (entry_bb));
10893 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
10894 remove_edge (BRANCH_EDGE (cont_bb));
10895 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
10896 set_immediate_dominator (CDI_DOMINATORS, region->entry,
10897 recompute_dominator (CDI_DOMINATORS, region->entry));
10900 /* Taskloop construct is represented after gimplification with
10901 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
10902 in between them. This routine expands the inner GIMPLE_OMP_FOR.
10903 GOMP_taskloop{,_ull} function arranges for each task to be given just
10904 a single range of iterations. */
10906 static void
10907 expand_omp_taskloop_for_inner (struct omp_region *region,
10908 struct omp_for_data *fd,
10909 gimple *inner_stmt)
10911 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
10912 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
10913 basic_block fin_bb;
10914 gimple_stmt_iterator gsi;
10915 edge ep;
10916 bool broken_loop = region->cont == NULL;
10917 tree *counts = NULL;
10918 tree n1, n2, step;
10920 itype = type = TREE_TYPE (fd->loop.v);
10921 if (POINTER_TYPE_P (type))
10922 itype = signed_type_for (type);
10924 /* See if we need to bias by LLONG_MIN. */
10925 if (fd->iter_type == long_long_unsigned_type_node
10926 && TREE_CODE (type) == INTEGER_TYPE
10927 && !TYPE_UNSIGNED (type))
10929 tree n1, n2;
10931 if (fd->loop.cond_code == LT_EXPR)
10933 n1 = fd->loop.n1;
10934 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
10936 else
10938 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
10939 n2 = fd->loop.n1;
10941 if (TREE_CODE (n1) != INTEGER_CST
10942 || TREE_CODE (n2) != INTEGER_CST
10943 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
10944 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
10947 entry_bb = region->entry;
10948 cont_bb = region->cont;
10949 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10950 fin_bb = BRANCH_EDGE (entry_bb)->dest;
10951 gcc_assert (broken_loop
10952 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
10953 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
10954 if (!broken_loop)
10956 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
10957 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10959 exit_bb = region->exit;
10961 /* Iteration space partitioning goes in ENTRY_BB. */
10962 gsi = gsi_last_bb (entry_bb);
10963 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10965 if (fd->collapse > 1)
10967 int first_zero_iter = -1, dummy = -1;
10968 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
10970 counts = XALLOCAVEC (tree, fd->collapse);
10971 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
10972 fin_bb, first_zero_iter,
10973 dummy_bb, dummy, l2_dom_bb);
10974 t = NULL_TREE;
10976 else
10977 t = integer_one_node;
10979 step = fd->loop.step;
10980 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10981 OMP_CLAUSE__LOOPTEMP_);
10982 gcc_assert (innerc);
10983 n1 = OMP_CLAUSE_DECL (innerc);
10984 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
10985 gcc_assert (innerc);
10986 n2 = OMP_CLAUSE_DECL (innerc);
10987 if (bias)
10989 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
10990 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
10992 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
10993 true, NULL_TREE, true, GSI_SAME_STMT);
10994 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
10995 true, NULL_TREE, true, GSI_SAME_STMT);
10996 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
10997 true, NULL_TREE, true, GSI_SAME_STMT);
10999 tree startvar = fd->loop.v;
11000 tree endvar = NULL_TREE;
11002 if (gimple_omp_for_combined_p (fd->for_stmt))
11004 tree clauses = gimple_omp_for_clauses (inner_stmt);
11005 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
11006 gcc_assert (innerc);
11007 startvar = OMP_CLAUSE_DECL (innerc);
11008 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
11009 OMP_CLAUSE__LOOPTEMP_);
11010 gcc_assert (innerc);
11011 endvar = OMP_CLAUSE_DECL (innerc);
11013 t = fold_convert (TREE_TYPE (startvar), n1);
11014 t = force_gimple_operand_gsi (&gsi, t,
11015 DECL_P (startvar)
11016 && TREE_ADDRESSABLE (startvar),
11017 NULL_TREE, false, GSI_CONTINUE_LINKING);
11018 gimple *assign_stmt = gimple_build_assign (startvar, t);
11019 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11021 t = fold_convert (TREE_TYPE (startvar), n2);
11022 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
11023 false, GSI_CONTINUE_LINKING);
11024 if (endvar)
11026 assign_stmt = gimple_build_assign (endvar, e);
11027 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11028 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
11029 assign_stmt = gimple_build_assign (fd->loop.v, e);
11030 else
11031 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
11032 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
11034 if (fd->collapse > 1)
11035 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
11037 if (!broken_loop)
11039 /* The code controlling the sequential loop replaces the
11040 GIMPLE_OMP_CONTINUE. */
11041 gsi = gsi_last_bb (cont_bb);
11042 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11043 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
11044 vmain = gimple_omp_continue_control_use (cont_stmt);
11045 vback = gimple_omp_continue_control_def (cont_stmt);
11047 if (!gimple_omp_for_combined_p (fd->for_stmt))
11049 if (POINTER_TYPE_P (type))
11050 t = fold_build_pointer_plus (vmain, step);
11051 else
11052 t = fold_build2 (PLUS_EXPR, type, vmain, step);
11053 t = force_gimple_operand_gsi (&gsi, t,
11054 DECL_P (vback)
11055 && TREE_ADDRESSABLE (vback),
11056 NULL_TREE, true, GSI_SAME_STMT);
11057 assign_stmt = gimple_build_assign (vback, t);
11058 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
11060 t = build2 (fd->loop.cond_code, boolean_type_node,
11061 DECL_P (vback) && TREE_ADDRESSABLE (vback)
11062 ? t : vback, e);
11063 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
11066 /* Remove the GIMPLE_OMP_CONTINUE statement. */
11067 gsi_remove (&gsi, true);
11069 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
11070 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
11073 /* Remove the GIMPLE_OMP_FOR statement. */
11074 gsi = gsi_for_stmt (fd->for_stmt);
11075 gsi_remove (&gsi, true);
11077 /* Remove the GIMPLE_OMP_RETURN statement. */
11078 gsi = gsi_last_bb (exit_bb);
11079 gsi_remove (&gsi, true);
11081 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
11082 if (!broken_loop)
11083 remove_edge (BRANCH_EDGE (entry_bb));
11084 else
11086 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
11087 region->outer->cont = NULL;
11090 /* Connect all the blocks. */
11091 if (!broken_loop)
11093 ep = find_edge (cont_bb, body_bb);
11094 if (gimple_omp_for_combined_p (fd->for_stmt))
11096 remove_edge (ep);
11097 ep = NULL;
11099 else if (fd->collapse > 1)
11101 remove_edge (ep);
11102 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
11104 else
11105 ep->flags = EDGE_TRUE_VALUE;
11106 find_edge (cont_bb, fin_bb)->flags
11107 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
11110 set_immediate_dominator (CDI_DOMINATORS, body_bb,
11111 recompute_dominator (CDI_DOMINATORS, body_bb));
11112 if (!broken_loop)
11113 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
11114 recompute_dominator (CDI_DOMINATORS, fin_bb));
11116 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
11118 struct loop *loop = alloc_loop ();
11119 loop->header = body_bb;
11120 if (collapse_bb == NULL)
11121 loop->latch = cont_bb;
11122 add_loop (loop, body_bb->loop_father);
11126 /* A subroutine of expand_omp_for. Generate code for an OpenACC
11127 partitioned loop. The lowering here is abstracted, in that the
11128 loop parameters are passed through internal functions, which are
11129 further lowered by oacc_device_lower, once we get to the target
11130 compiler. The loop is of the form:
11132 for (V = B; V LTGT E; V += S) {BODY}
11134 where LTGT is < or >. We may have a specified chunking size, CHUNKING
11135 (constant 0 for no chunking) and we will have a GWV partitioning
11136 mask, specifying dimensions over which the loop is to be
11137 partitioned (see note below). We generate code that looks like:
11139 <entry_bb> [incoming FALL->body, BRANCH->exit]
11140 typedef signedintify (typeof (V)) T; // underlying signed integral type
11141 T range = E - B;
11142 T chunk_no = 0;
11143 T DIR = LTGT == '<' ? +1 : -1;
11144 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
11145 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
11147 <head_bb> [created by splitting end of entry_bb]
11148 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
11149 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
11150 if (!(offset LTGT bound)) goto bottom_bb;
11152 <body_bb> [incoming]
11153 V = B + offset;
11154 {BODY}
11156 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
11157 offset += step;
11158 if (offset LTGT bound) goto body_bb; [*]
11160 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
11161 chunk_no++;
11162 if (chunk < chunk_max) goto head_bb;
11164 <exit_bb> [incoming]
11165 V = B + ((range -/+ 1) / S +/- 1) * S [*]
11167 [*] Needed if V live at end of loop
11169 Note: CHUNKING & GWV mask are specified explicitly here. This is a
11170 transition, and will be specified by a more general mechanism shortly.
11173 static void
11174 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
11176 tree v = fd->loop.v;
11177 enum tree_code cond_code = fd->loop.cond_code;
11178 enum tree_code plus_code = PLUS_EXPR;
11180 tree chunk_size = integer_minus_one_node;
11181 tree gwv = integer_zero_node;
11182 tree iter_type = TREE_TYPE (v);
11183 tree diff_type = iter_type;
11184 tree plus_type = iter_type;
11185 struct oacc_collapse *counts = NULL;
11187 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
11188 == GF_OMP_FOR_KIND_OACC_LOOP);
11189 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
11190 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
11192 if (POINTER_TYPE_P (iter_type))
11194 plus_code = POINTER_PLUS_EXPR;
11195 plus_type = sizetype;
11197 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
11198 diff_type = signed_type_for (diff_type);
11200 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
11201 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
11202 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
11203 basic_block bottom_bb = NULL;
11205 /* entry_bb has two sucessors; the branch edge is to the exit
11206 block, fallthrough edge to body. */
11207 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
11208 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
11210 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
11211 body_bb, or to a block whose only successor is the body_bb. Its
11212 fallthrough successor is the final block (same as the branch
11213 successor of the entry_bb). */
11214 if (cont_bb)
11216 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
11217 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
11219 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
11220 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
11222 else
11223 gcc_assert (!gimple_in_ssa_p (cfun));
11225 /* The exit block only has entry_bb and cont_bb as predecessors. */
11226 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
11228 tree chunk_no;
11229 tree chunk_max = NULL_TREE;
11230 tree bound, offset;
11231 tree step = create_tmp_var (diff_type, ".step");
11232 bool up = cond_code == LT_EXPR;
11233 tree dir = build_int_cst (diff_type, up ? +1 : -1);
11234 bool chunking = !gimple_in_ssa_p (cfun);;
11235 bool negating;
11237 /* SSA instances. */
11238 tree offset_incr = NULL_TREE;
11239 tree offset_init = NULL_TREE;
11241 gimple_stmt_iterator gsi;
11242 gassign *ass;
11243 gcall *call;
11244 gimple *stmt;
11245 tree expr;
11246 location_t loc;
11247 edge split, be, fte;
11249 /* Split the end of entry_bb to create head_bb. */
11250 split = split_block (entry_bb, last_stmt (entry_bb));
11251 basic_block head_bb = split->dest;
11252 entry_bb = split->src;
11254 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
11255 gsi = gsi_last_bb (entry_bb);
11256 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
11257 loc = gimple_location (for_stmt);
11259 if (gimple_in_ssa_p (cfun))
11261 offset_init = gimple_omp_for_index (for_stmt, 0);
11262 gcc_assert (integer_zerop (fd->loop.n1));
11263 /* The SSA parallelizer does gang parallelism. */
11264 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
11267 if (fd->collapse > 1)
11269 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
11270 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
11271 TREE_TYPE (fd->loop.n2));
11273 if (SSA_VAR_P (fd->loop.n2))
11275 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
11276 true, GSI_SAME_STMT);
11277 ass = gimple_build_assign (fd->loop.n2, total);
11278 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11283 tree b = fd->loop.n1;
11284 tree e = fd->loop.n2;
11285 tree s = fd->loop.step;
11287 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
11288 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
11290 /* Convert the step, avoiding possible unsigned->signed overflow. */
11291 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
11292 if (negating)
11293 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
11294 s = fold_convert (diff_type, s);
11295 if (negating)
11296 s = fold_build1 (NEGATE_EXPR, diff_type, s);
11297 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
11299 if (!chunking)
11300 chunk_size = integer_zero_node;
11301 expr = fold_convert (diff_type, chunk_size);
11302 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
11303 NULL_TREE, true, GSI_SAME_STMT);
11304 /* Determine the range, avoiding possible unsigned->signed overflow. */
11305 negating = !up && TYPE_UNSIGNED (iter_type);
11306 expr = fold_build2 (MINUS_EXPR, plus_type,
11307 fold_convert (plus_type, negating ? b : e),
11308 fold_convert (plus_type, negating ? e : b));
11309 expr = fold_convert (diff_type, expr);
11310 if (negating)
11311 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
11312 tree range = force_gimple_operand_gsi (&gsi, expr, true,
11313 NULL_TREE, true, GSI_SAME_STMT);
11315 chunk_no = build_int_cst (diff_type, 0);
11316 if (chunking)
11318 gcc_assert (!gimple_in_ssa_p (cfun));
11320 expr = chunk_no;
11321 chunk_max = create_tmp_var (diff_type, ".chunk_max");
11322 chunk_no = create_tmp_var (diff_type, ".chunk_no");
11324 ass = gimple_build_assign (chunk_no, expr);
11325 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11327 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
11328 build_int_cst (integer_type_node,
11329 IFN_GOACC_LOOP_CHUNKS),
11330 dir, range, s, chunk_size, gwv);
11331 gimple_call_set_lhs (call, chunk_max);
11332 gimple_set_location (call, loc);
11333 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
11335 else
11336 chunk_size = chunk_no;
11338 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
11339 build_int_cst (integer_type_node,
11340 IFN_GOACC_LOOP_STEP),
11341 dir, range, s, chunk_size, gwv);
11342 gimple_call_set_lhs (call, step);
11343 gimple_set_location (call, loc);
11344 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
11346 /* Remove the GIMPLE_OMP_FOR. */
11347 gsi_remove (&gsi, true);
11349 /* Fixup edges from head_bb */
11350 be = BRANCH_EDGE (head_bb);
11351 fte = FALLTHRU_EDGE (head_bb);
11352 be->flags |= EDGE_FALSE_VALUE;
11353 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
11355 basic_block body_bb = fte->dest;
11357 if (gimple_in_ssa_p (cfun))
11359 gsi = gsi_last_bb (cont_bb);
11360 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11362 offset = gimple_omp_continue_control_use (cont_stmt);
11363 offset_incr = gimple_omp_continue_control_def (cont_stmt);
11365 else
11367 offset = create_tmp_var (diff_type, ".offset");
11368 offset_init = offset_incr = offset;
11370 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
11372 /* Loop offset & bound go into head_bb. */
11373 gsi = gsi_start_bb (head_bb);
11375 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
11376 build_int_cst (integer_type_node,
11377 IFN_GOACC_LOOP_OFFSET),
11378 dir, range, s,
11379 chunk_size, gwv, chunk_no);
11380 gimple_call_set_lhs (call, offset_init);
11381 gimple_set_location (call, loc);
11382 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
11384 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
11385 build_int_cst (integer_type_node,
11386 IFN_GOACC_LOOP_BOUND),
11387 dir, range, s,
11388 chunk_size, gwv, offset_init);
11389 gimple_call_set_lhs (call, bound);
11390 gimple_set_location (call, loc);
11391 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
11393 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
11394 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
11395 GSI_CONTINUE_LINKING);
11397 /* V assignment goes into body_bb. */
11398 if (!gimple_in_ssa_p (cfun))
11400 gsi = gsi_start_bb (body_bb);
11402 expr = build2 (plus_code, iter_type, b,
11403 fold_convert (plus_type, offset));
11404 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11405 true, GSI_SAME_STMT);
11406 ass = gimple_build_assign (v, expr);
11407 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11408 if (fd->collapse > 1)
11409 expand_oacc_collapse_vars (fd, &gsi, counts, v);
11412 /* Loop increment goes into cont_bb. If this is not a loop, we
11413 will have spawned threads as if it was, and each one will
11414 execute one iteration. The specification is not explicit about
11415 whether such constructs are ill-formed or not, and they can
11416 occur, especially when noreturn routines are involved. */
11417 if (cont_bb)
11419 gsi = gsi_last_bb (cont_bb);
11420 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11421 loc = gimple_location (cont_stmt);
11423 /* Increment offset. */
11424 if (gimple_in_ssa_p (cfun))
11425 expr= build2 (plus_code, iter_type, offset,
11426 fold_convert (plus_type, step));
11427 else
11428 expr = build2 (PLUS_EXPR, diff_type, offset, step);
11429 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11430 true, GSI_SAME_STMT);
11431 ass = gimple_build_assign (offset_incr, expr);
11432 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11433 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
11434 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
11436 /* Remove the GIMPLE_OMP_CONTINUE. */
11437 gsi_remove (&gsi, true);
11439 /* Fixup edges from cont_bb */
11440 be = BRANCH_EDGE (cont_bb);
11441 fte = FALLTHRU_EDGE (cont_bb);
11442 be->flags |= EDGE_TRUE_VALUE;
11443 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
11445 if (chunking)
11447 /* Split the beginning of exit_bb to make bottom_bb. We
11448 need to insert a nop at the start, because splitting is
11449 after a stmt, not before. */
11450 gsi = gsi_start_bb (exit_bb);
11451 stmt = gimple_build_nop ();
11452 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
11453 split = split_block (exit_bb, stmt);
11454 bottom_bb = split->src;
11455 exit_bb = split->dest;
11456 gsi = gsi_last_bb (bottom_bb);
11458 /* Chunk increment and test goes into bottom_bb. */
11459 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
11460 build_int_cst (diff_type, 1));
11461 ass = gimple_build_assign (chunk_no, expr);
11462 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
11464 /* Chunk test at end of bottom_bb. */
11465 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
11466 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
11467 GSI_CONTINUE_LINKING);
11469 /* Fixup edges from bottom_bb. */
11470 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
11471 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
11475 gsi = gsi_last_bb (exit_bb);
11476 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
11477 loc = gimple_location (gsi_stmt (gsi));
11479 if (!gimple_in_ssa_p (cfun))
11481 /* Insert the final value of V, in case it is live. This is the
11482 value for the only thread that survives past the join. */
11483 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
11484 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
11485 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
11486 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
11487 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
11488 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11489 true, GSI_SAME_STMT);
11490 ass = gimple_build_assign (v, expr);
11491 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11494 /* Remove the OMP_RETURN. */
11495 gsi_remove (&gsi, true);
11497 if (cont_bb)
11499 /* We now have one or two nested loops. Update the loop
11500 structures. */
11501 struct loop *parent = entry_bb->loop_father;
11502 struct loop *body = body_bb->loop_father;
11504 if (chunking)
11506 struct loop *chunk_loop = alloc_loop ();
11507 chunk_loop->header = head_bb;
11508 chunk_loop->latch = bottom_bb;
11509 add_loop (chunk_loop, parent);
11510 parent = chunk_loop;
11512 else if (parent != body)
11514 gcc_assert (body->header == body_bb);
11515 gcc_assert (body->latch == cont_bb
11516 || single_pred (body->latch) == cont_bb);
11517 parent = NULL;
11520 if (parent)
11522 struct loop *body_loop = alloc_loop ();
11523 body_loop->header = body_bb;
11524 body_loop->latch = cont_bb;
11525 add_loop (body_loop, parent);
11530 /* Expand the OMP loop defined by REGION. */
11532 static void
11533 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
11535 struct omp_for_data fd;
11536 struct omp_for_data_loop *loops;
11538 loops
11539 = (struct omp_for_data_loop *)
11540 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
11541 * sizeof (struct omp_for_data_loop));
11542 extract_omp_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
11543 &fd, loops);
11544 region->sched_kind = fd.sched_kind;
11545 region->sched_modifiers = fd.sched_modifiers;
11547 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
11548 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
11549 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
11550 if (region->cont)
11552 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
11553 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
11554 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
11556 else
11557 /* If there isn't a continue then this is a degerate case where
11558 the introduction of abnormal edges during lowering will prevent
11559 original loops from being detected. Fix that up. */
11560 loops_state_set (LOOPS_NEED_FIXUP);
11562 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
11563 expand_omp_simd (region, &fd);
11564 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
11565 expand_cilk_for (region, &fd);
11566 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
11568 gcc_assert (!inner_stmt);
11569 expand_oacc_for (region, &fd);
11571 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
11573 if (gimple_omp_for_combined_into_p (fd.for_stmt))
11574 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
11575 else
11576 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
11578 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
11579 && !fd.have_ordered)
11581 if (fd.chunk_size == NULL)
11582 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
11583 else
11584 expand_omp_for_static_chunk (region, &fd, inner_stmt);
11586 else
11588 int fn_index, start_ix, next_ix;
11590 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
11591 == GF_OMP_FOR_KIND_FOR);
11592 if (fd.chunk_size == NULL
11593 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
11594 fd.chunk_size = integer_zero_node;
11595 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
11596 switch (fd.sched_kind)
11598 case OMP_CLAUSE_SCHEDULE_RUNTIME:
11599 fn_index = 3;
11600 break;
11601 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
11602 case OMP_CLAUSE_SCHEDULE_GUIDED:
11603 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
11604 && !fd.ordered
11605 && !fd.have_ordered)
11607 fn_index = 3 + fd.sched_kind;
11608 break;
11610 /* FALLTHRU */
11611 default:
11612 fn_index = fd.sched_kind;
11613 break;
11615 if (!fd.ordered)
11616 fn_index += fd.have_ordered * 6;
11617 if (fd.ordered)
11618 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
11619 else
11620 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
11621 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
11622 if (fd.iter_type == long_long_unsigned_type_node)
11624 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
11625 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
11626 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
11627 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
11629 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
11630 (enum built_in_function) next_ix, inner_stmt);
11633 if (gimple_in_ssa_p (cfun))
11634 update_ssa (TODO_update_ssa_only_virtuals);
11638 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
11640 v = GOMP_sections_start (n);
11642 switch (v)
11644 case 0:
11645 goto L2;
11646 case 1:
11647 section 1;
11648 goto L1;
11649 case 2:
11651 case n:
11653 default:
11654 abort ();
11657 v = GOMP_sections_next ();
11658 goto L0;
11660 reduction;
11662 If this is a combined parallel sections, replace the call to
11663 GOMP_sections_start with call to GOMP_sections_next. */
11665 static void
11666 expand_omp_sections (struct omp_region *region)
11668 tree t, u, vin = NULL, vmain, vnext, l2;
11669 unsigned len;
11670 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
11671 gimple_stmt_iterator si, switch_si;
11672 gomp_sections *sections_stmt;
11673 gimple *stmt;
11674 gomp_continue *cont;
11675 edge_iterator ei;
11676 edge e;
11677 struct omp_region *inner;
11678 unsigned i, casei;
11679 bool exit_reachable = region->cont != NULL;
11681 gcc_assert (region->exit != NULL);
11682 entry_bb = region->entry;
11683 l0_bb = single_succ (entry_bb);
11684 l1_bb = region->cont;
11685 l2_bb = region->exit;
11686 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
11687 l2 = gimple_block_label (l2_bb);
11688 else
11690 /* This can happen if there are reductions. */
11691 len = EDGE_COUNT (l0_bb->succs);
11692 gcc_assert (len > 0);
11693 e = EDGE_SUCC (l0_bb, len - 1);
11694 si = gsi_last_bb (e->dest);
11695 l2 = NULL_TREE;
11696 if (gsi_end_p (si)
11697 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
11698 l2 = gimple_block_label (e->dest);
11699 else
11700 FOR_EACH_EDGE (e, ei, l0_bb->succs)
11702 si = gsi_last_bb (e->dest);
11703 if (gsi_end_p (si)
11704 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
11706 l2 = gimple_block_label (e->dest);
11707 break;
11711 if (exit_reachable)
11712 default_bb = create_empty_bb (l1_bb->prev_bb);
11713 else
11714 default_bb = create_empty_bb (l0_bb);
11716 /* We will build a switch() with enough cases for all the
11717 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
11718 and a default case to abort if something goes wrong. */
11719 len = EDGE_COUNT (l0_bb->succs);
11721 /* Use vec::quick_push on label_vec throughout, since we know the size
11722 in advance. */
11723 auto_vec<tree> label_vec (len);
11725 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
11726 GIMPLE_OMP_SECTIONS statement. */
11727 si = gsi_last_bb (entry_bb);
11728 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
11729 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
11730 vin = gimple_omp_sections_control (sections_stmt);
11731 if (!is_combined_parallel (region))
11733 /* If we are not inside a combined parallel+sections region,
11734 call GOMP_sections_start. */
11735 t = build_int_cst (unsigned_type_node, len - 1);
11736 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
11737 stmt = gimple_build_call (u, 1, t);
11739 else
11741 /* Otherwise, call GOMP_sections_next. */
11742 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
11743 stmt = gimple_build_call (u, 0);
11745 gimple_call_set_lhs (stmt, vin);
11746 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
11747 gsi_remove (&si, true);
11749 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
11750 L0_BB. */
11751 switch_si = gsi_last_bb (l0_bb);
11752 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
11753 if (exit_reachable)
11755 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
11756 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
11757 vmain = gimple_omp_continue_control_use (cont);
11758 vnext = gimple_omp_continue_control_def (cont);
11760 else
11762 vmain = vin;
11763 vnext = NULL_TREE;
11766 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
11767 label_vec.quick_push (t);
11768 i = 1;
11770 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
11771 for (inner = region->inner, casei = 1;
11772 inner;
11773 inner = inner->next, i++, casei++)
11775 basic_block s_entry_bb, s_exit_bb;
11777 /* Skip optional reduction region. */
11778 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
11780 --i;
11781 --casei;
11782 continue;
11785 s_entry_bb = inner->entry;
11786 s_exit_bb = inner->exit;
11788 t = gimple_block_label (s_entry_bb);
11789 u = build_int_cst (unsigned_type_node, casei);
11790 u = build_case_label (u, NULL, t);
11791 label_vec.quick_push (u);
11793 si = gsi_last_bb (s_entry_bb);
11794 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
11795 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
11796 gsi_remove (&si, true);
11797 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
11799 if (s_exit_bb == NULL)
11800 continue;
11802 si = gsi_last_bb (s_exit_bb);
11803 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
11804 gsi_remove (&si, true);
11806 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
11809 /* Error handling code goes in DEFAULT_BB. */
11810 t = gimple_block_label (default_bb);
11811 u = build_case_label (NULL, NULL, t);
11812 make_edge (l0_bb, default_bb, 0);
11813 add_bb_to_loop (default_bb, current_loops->tree_root);
11815 stmt = gimple_build_switch (vmain, u, label_vec);
11816 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
11817 gsi_remove (&switch_si, true);
11819 si = gsi_start_bb (default_bb);
11820 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
11821 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
11823 if (exit_reachable)
11825 tree bfn_decl;
11827 /* Code to get the next section goes in L1_BB. */
11828 si = gsi_last_bb (l1_bb);
11829 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
11831 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
11832 stmt = gimple_build_call (bfn_decl, 0);
11833 gimple_call_set_lhs (stmt, vnext);
11834 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
11835 gsi_remove (&si, true);
11837 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
11840 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
11841 si = gsi_last_bb (l2_bb);
11842 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
11843 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
11844 else if (gimple_omp_return_lhs (gsi_stmt (si)))
11845 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
11846 else
11847 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
11848 stmt = gimple_build_call (t, 0);
11849 if (gimple_omp_return_lhs (gsi_stmt (si)))
11850 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
11851 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
11852 gsi_remove (&si, true);
11854 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
11858 /* Expand code for an OpenMP single directive. We've already expanded
11859 much of the code, here we simply place the GOMP_barrier call. */
11861 static void
11862 expand_omp_single (struct omp_region *region)
11864 basic_block entry_bb, exit_bb;
11865 gimple_stmt_iterator si;
11867 entry_bb = region->entry;
11868 exit_bb = region->exit;
11870 si = gsi_last_bb (entry_bb);
11871 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
11872 gsi_remove (&si, true);
11873 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
11875 si = gsi_last_bb (exit_bb);
11876 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
11878 tree t = gimple_omp_return_lhs (gsi_stmt (si));
11879 gsi_insert_after (&si, build_omp_barrier (t), GSI_SAME_STMT);
11881 gsi_remove (&si, true);
11882 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
11886 /* Generic expansion for OpenMP synchronization directives: master,
11887 ordered and critical. All we need to do here is remove the entry
11888 and exit markers for REGION. */
11890 static void
11891 expand_omp_synch (struct omp_region *region)
11893 basic_block entry_bb, exit_bb;
11894 gimple_stmt_iterator si;
11896 entry_bb = region->entry;
11897 exit_bb = region->exit;
11899 si = gsi_last_bb (entry_bb);
11900 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
11901 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
11902 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
11903 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
11904 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
11905 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
11906 gsi_remove (&si, true);
11907 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
11909 if (exit_bb)
11911 si = gsi_last_bb (exit_bb);
11912 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
11913 gsi_remove (&si, true);
11914 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
11918 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
11919 operation as a normal volatile load. */
11921 static bool
11922 expand_omp_atomic_load (basic_block load_bb, tree addr,
11923 tree loaded_val, int index)
11925 enum built_in_function tmpbase;
11926 gimple_stmt_iterator gsi;
11927 basic_block store_bb;
11928 location_t loc;
11929 gimple *stmt;
11930 tree decl, call, type, itype;
11932 gsi = gsi_last_bb (load_bb);
11933 stmt = gsi_stmt (gsi);
11934 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
11935 loc = gimple_location (stmt);
11937 /* ??? If the target does not implement atomic_load_optab[mode], and mode
11938 is smaller than word size, then expand_atomic_load assumes that the load
11939 is atomic. We could avoid the builtin entirely in this case. */
11941 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
11942 decl = builtin_decl_explicit (tmpbase);
11943 if (decl == NULL_TREE)
11944 return false;
11946 type = TREE_TYPE (loaded_val);
11947 itype = TREE_TYPE (TREE_TYPE (decl));
11949 call = build_call_expr_loc (loc, decl, 2, addr,
11950 build_int_cst (NULL,
11951 gimple_omp_atomic_seq_cst_p (stmt)
11952 ? MEMMODEL_SEQ_CST
11953 : MEMMODEL_RELAXED));
11954 if (!useless_type_conversion_p (type, itype))
11955 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
11956 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
11958 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
11959 gsi_remove (&gsi, true);
11961 store_bb = single_succ (load_bb);
11962 gsi = gsi_last_bb (store_bb);
11963 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
11964 gsi_remove (&gsi, true);
11966 if (gimple_in_ssa_p (cfun))
11967 update_ssa (TODO_update_ssa_no_phi);
11969 return true;
11972 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
11973 operation as a normal volatile store. */
11975 static bool
11976 expand_omp_atomic_store (basic_block load_bb, tree addr,
11977 tree loaded_val, tree stored_val, int index)
11979 enum built_in_function tmpbase;
11980 gimple_stmt_iterator gsi;
11981 basic_block store_bb = single_succ (load_bb);
11982 location_t loc;
11983 gimple *stmt;
11984 tree decl, call, type, itype;
11985 machine_mode imode;
11986 bool exchange;
11988 gsi = gsi_last_bb (load_bb);
11989 stmt = gsi_stmt (gsi);
11990 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
11992 /* If the load value is needed, then this isn't a store but an exchange. */
11993 exchange = gimple_omp_atomic_need_value_p (stmt);
11995 gsi = gsi_last_bb (store_bb);
11996 stmt = gsi_stmt (gsi);
11997 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
11998 loc = gimple_location (stmt);
12000 /* ??? If the target does not implement atomic_store_optab[mode], and mode
12001 is smaller than word size, then expand_atomic_store assumes that the store
12002 is atomic. We could avoid the builtin entirely in this case. */
12004 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
12005 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
12006 decl = builtin_decl_explicit (tmpbase);
12007 if (decl == NULL_TREE)
12008 return false;
12010 type = TREE_TYPE (stored_val);
12012 /* Dig out the type of the function's second argument. */
12013 itype = TREE_TYPE (decl);
12014 itype = TYPE_ARG_TYPES (itype);
12015 itype = TREE_CHAIN (itype);
12016 itype = TREE_VALUE (itype);
12017 imode = TYPE_MODE (itype);
12019 if (exchange && !can_atomic_exchange_p (imode, true))
12020 return false;
12022 if (!useless_type_conversion_p (itype, type))
12023 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
12024 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
12025 build_int_cst (NULL,
12026 gimple_omp_atomic_seq_cst_p (stmt)
12027 ? MEMMODEL_SEQ_CST
12028 : MEMMODEL_RELAXED));
12029 if (exchange)
12031 if (!useless_type_conversion_p (type, itype))
12032 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
12033 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
12036 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
12037 gsi_remove (&gsi, true);
12039 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
12040 gsi = gsi_last_bb (load_bb);
12041 gsi_remove (&gsi, true);
12043 if (gimple_in_ssa_p (cfun))
12044 update_ssa (TODO_update_ssa_no_phi);
12046 return true;
12049 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
12050 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
12051 size of the data type, and thus usable to find the index of the builtin
12052 decl. Returns false if the expression is not of the proper form. */
12054 static bool
12055 expand_omp_atomic_fetch_op (basic_block load_bb,
12056 tree addr, tree loaded_val,
12057 tree stored_val, int index)
12059 enum built_in_function oldbase, newbase, tmpbase;
12060 tree decl, itype, call;
12061 tree lhs, rhs;
12062 basic_block store_bb = single_succ (load_bb);
12063 gimple_stmt_iterator gsi;
12064 gimple *stmt;
12065 location_t loc;
12066 enum tree_code code;
12067 bool need_old, need_new;
12068 machine_mode imode;
12069 bool seq_cst;
12071 /* We expect to find the following sequences:
12073 load_bb:
12074 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
12076 store_bb:
12077 val = tmp OP something; (or: something OP tmp)
12078 GIMPLE_OMP_STORE (val)
12080 ???FIXME: Allow a more flexible sequence.
12081 Perhaps use data flow to pick the statements.
12085 gsi = gsi_after_labels (store_bb);
12086 stmt = gsi_stmt (gsi);
12087 loc = gimple_location (stmt);
12088 if (!is_gimple_assign (stmt))
12089 return false;
12090 gsi_next (&gsi);
12091 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
12092 return false;
12093 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
12094 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
12095 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
12096 gcc_checking_assert (!need_old || !need_new);
12098 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
12099 return false;
12101 /* Check for one of the supported fetch-op operations. */
12102 code = gimple_assign_rhs_code (stmt);
12103 switch (code)
12105 case PLUS_EXPR:
12106 case POINTER_PLUS_EXPR:
12107 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
12108 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
12109 break;
12110 case MINUS_EXPR:
12111 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
12112 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
12113 break;
12114 case BIT_AND_EXPR:
12115 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
12116 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
12117 break;
12118 case BIT_IOR_EXPR:
12119 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
12120 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
12121 break;
12122 case BIT_XOR_EXPR:
12123 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
12124 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
12125 break;
12126 default:
12127 return false;
12130 /* Make sure the expression is of the proper form. */
12131 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
12132 rhs = gimple_assign_rhs2 (stmt);
12133 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
12134 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
12135 rhs = gimple_assign_rhs1 (stmt);
12136 else
12137 return false;
12139 tmpbase = ((enum built_in_function)
12140 ((need_new ? newbase : oldbase) + index + 1));
12141 decl = builtin_decl_explicit (tmpbase);
12142 if (decl == NULL_TREE)
12143 return false;
12144 itype = TREE_TYPE (TREE_TYPE (decl));
12145 imode = TYPE_MODE (itype);
12147 /* We could test all of the various optabs involved, but the fact of the
12148 matter is that (with the exception of i486 vs i586 and xadd) all targets
12149 that support any atomic operaton optab also implements compare-and-swap.
12150 Let optabs.c take care of expanding any compare-and-swap loop. */
12151 if (!can_compare_and_swap_p (imode, true))
12152 return false;
12154 gsi = gsi_last_bb (load_bb);
12155 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
12157 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
12158 It only requires that the operation happen atomically. Thus we can
12159 use the RELAXED memory model. */
12160 call = build_call_expr_loc (loc, decl, 3, addr,
12161 fold_convert_loc (loc, itype, rhs),
12162 build_int_cst (NULL,
12163 seq_cst ? MEMMODEL_SEQ_CST
12164 : MEMMODEL_RELAXED));
12166 if (need_old || need_new)
12168 lhs = need_old ? loaded_val : stored_val;
12169 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
12170 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
12172 else
12173 call = fold_convert_loc (loc, void_type_node, call);
12174 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
12175 gsi_remove (&gsi, true);
12177 gsi = gsi_last_bb (store_bb);
12178 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
12179 gsi_remove (&gsi, true);
12180 gsi = gsi_last_bb (store_bb);
12181 stmt = gsi_stmt (gsi);
12182 gsi_remove (&gsi, true);
12184 if (gimple_in_ssa_p (cfun))
12186 release_defs (stmt);
12187 update_ssa (TODO_update_ssa_no_phi);
12190 return true;
12193 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
12195 oldval = *addr;
12196 repeat:
12197 newval = rhs; // with oldval replacing *addr in rhs
12198 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
12199 if (oldval != newval)
12200 goto repeat;
12202 INDEX is log2 of the size of the data type, and thus usable to find the
12203 index of the builtin decl. */
12205 static bool
12206 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
12207 tree addr, tree loaded_val, tree stored_val,
12208 int index)
12210 tree loadedi, storedi, initial, new_storedi, old_vali;
12211 tree type, itype, cmpxchg, iaddr;
12212 gimple_stmt_iterator si;
12213 basic_block loop_header = single_succ (load_bb);
12214 gimple *phi, *stmt;
12215 edge e;
12216 enum built_in_function fncode;
12218 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
12219 order to use the RELAXED memory model effectively. */
12220 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
12221 + index + 1);
12222 cmpxchg = builtin_decl_explicit (fncode);
12223 if (cmpxchg == NULL_TREE)
12224 return false;
12225 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
12226 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
12228 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
12229 return false;
12231 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
12232 si = gsi_last_bb (load_bb);
12233 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
12235 /* For floating-point values, we'll need to view-convert them to integers
12236 so that we can perform the atomic compare and swap. Simplify the
12237 following code by always setting up the "i"ntegral variables. */
12238 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
12240 tree iaddr_val;
12242 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
12243 true));
12244 iaddr_val
12245 = force_gimple_operand_gsi (&si,
12246 fold_convert (TREE_TYPE (iaddr), addr),
12247 false, NULL_TREE, true, GSI_SAME_STMT);
12248 stmt = gimple_build_assign (iaddr, iaddr_val);
12249 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12250 loadedi = create_tmp_var (itype);
12251 if (gimple_in_ssa_p (cfun))
12252 loadedi = make_ssa_name (loadedi);
12254 else
12256 iaddr = addr;
12257 loadedi = loaded_val;
12260 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
12261 tree loaddecl = builtin_decl_explicit (fncode);
12262 if (loaddecl)
12263 initial
12264 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
12265 build_call_expr (loaddecl, 2, iaddr,
12266 build_int_cst (NULL_TREE,
12267 MEMMODEL_RELAXED)));
12268 else
12269 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
12270 build_int_cst (TREE_TYPE (iaddr), 0));
12272 initial
12273 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
12274 GSI_SAME_STMT);
12276 /* Move the value to the LOADEDI temporary. */
12277 if (gimple_in_ssa_p (cfun))
12279 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
12280 phi = create_phi_node (loadedi, loop_header);
12281 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
12282 initial);
12284 else
12285 gsi_insert_before (&si,
12286 gimple_build_assign (loadedi, initial),
12287 GSI_SAME_STMT);
12288 if (loadedi != loaded_val)
12290 gimple_stmt_iterator gsi2;
12291 tree x;
12293 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
12294 gsi2 = gsi_start_bb (loop_header);
12295 if (gimple_in_ssa_p (cfun))
12297 gassign *stmt;
12298 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
12299 true, GSI_SAME_STMT);
12300 stmt = gimple_build_assign (loaded_val, x);
12301 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
12303 else
12305 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
12306 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
12307 true, GSI_SAME_STMT);
12310 gsi_remove (&si, true);
12312 si = gsi_last_bb (store_bb);
12313 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
12315 if (iaddr == addr)
12316 storedi = stored_val;
12317 else
12318 storedi =
12319 force_gimple_operand_gsi (&si,
12320 build1 (VIEW_CONVERT_EXPR, itype,
12321 stored_val), true, NULL_TREE, true,
12322 GSI_SAME_STMT);
12324 /* Build the compare&swap statement. */
12325 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
12326 new_storedi = force_gimple_operand_gsi (&si,
12327 fold_convert (TREE_TYPE (loadedi),
12328 new_storedi),
12329 true, NULL_TREE,
12330 true, GSI_SAME_STMT);
12332 if (gimple_in_ssa_p (cfun))
12333 old_vali = loadedi;
12334 else
12336 old_vali = create_tmp_var (TREE_TYPE (loadedi));
12337 stmt = gimple_build_assign (old_vali, loadedi);
12338 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12340 stmt = gimple_build_assign (loadedi, new_storedi);
12341 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12344 /* Note that we always perform the comparison as an integer, even for
12345 floating point. This allows the atomic operation to properly
12346 succeed even with NaNs and -0.0. */
12347 stmt = gimple_build_cond_empty
12348 (build2 (NE_EXPR, boolean_type_node,
12349 new_storedi, old_vali));
12350 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12352 /* Update cfg. */
12353 e = single_succ_edge (store_bb);
12354 e->flags &= ~EDGE_FALLTHRU;
12355 e->flags |= EDGE_FALSE_VALUE;
12357 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
12359 /* Copy the new value to loadedi (we already did that before the condition
12360 if we are not in SSA). */
12361 if (gimple_in_ssa_p (cfun))
12363 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
12364 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
12367 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
12368 gsi_remove (&si, true);
12370 struct loop *loop = alloc_loop ();
12371 loop->header = loop_header;
12372 loop->latch = store_bb;
12373 add_loop (loop, loop_header->loop_father);
12375 if (gimple_in_ssa_p (cfun))
12376 update_ssa (TODO_update_ssa_no_phi);
12378 return true;
12381 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
12383 GOMP_atomic_start ();
12384 *addr = rhs;
12385 GOMP_atomic_end ();
12387 The result is not globally atomic, but works so long as all parallel
12388 references are within #pragma omp atomic directives. According to
12389 responses received from omp@openmp.org, appears to be within spec.
12390 Which makes sense, since that's how several other compilers handle
12391 this situation as well.
12392 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
12393 expanding. STORED_VAL is the operand of the matching
12394 GIMPLE_OMP_ATOMIC_STORE.
12396 We replace
12397 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
12398 loaded_val = *addr;
12400 and replace
12401 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
12402 *addr = stored_val;
12405 static bool
12406 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
12407 tree addr, tree loaded_val, tree stored_val)
12409 gimple_stmt_iterator si;
12410 gassign *stmt;
12411 tree t;
12413 si = gsi_last_bb (load_bb);
12414 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
12416 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
12417 t = build_call_expr (t, 0);
12418 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
12420 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
12421 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12422 gsi_remove (&si, true);
12424 si = gsi_last_bb (store_bb);
12425 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
12427 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
12428 stored_val);
12429 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12431 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
12432 t = build_call_expr (t, 0);
12433 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
12434 gsi_remove (&si, true);
12436 if (gimple_in_ssa_p (cfun))
12437 update_ssa (TODO_update_ssa_no_phi);
12438 return true;
12441 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
12442 using expand_omp_atomic_fetch_op. If it failed, we try to
12443 call expand_omp_atomic_pipeline, and if it fails too, the
12444 ultimate fallback is wrapping the operation in a mutex
12445 (expand_omp_atomic_mutex). REGION is the atomic region built
12446 by build_omp_regions_1(). */
12448 static void
12449 expand_omp_atomic (struct omp_region *region)
12451 basic_block load_bb = region->entry, store_bb = region->exit;
12452 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
12453 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
12454 tree loaded_val = gimple_omp_atomic_load_lhs (load);
12455 tree addr = gimple_omp_atomic_load_rhs (load);
12456 tree stored_val = gimple_omp_atomic_store_val (store);
12457 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
12458 HOST_WIDE_INT index;
12460 /* Make sure the type is one of the supported sizes. */
12461 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
12462 index = exact_log2 (index);
12463 if (index >= 0 && index <= 4)
12465 unsigned int align = TYPE_ALIGN_UNIT (type);
12467 /* __sync builtins require strict data alignment. */
12468 if (exact_log2 (align) >= index)
12470 /* Atomic load. */
12471 if (loaded_val == stored_val
12472 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
12473 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
12474 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
12475 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
12476 return;
12478 /* Atomic store. */
12479 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
12480 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
12481 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
12482 && store_bb == single_succ (load_bb)
12483 && first_stmt (store_bb) == store
12484 && expand_omp_atomic_store (load_bb, addr, loaded_val,
12485 stored_val, index))
12486 return;
12488 /* When possible, use specialized atomic update functions. */
12489 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
12490 && store_bb == single_succ (load_bb)
12491 && expand_omp_atomic_fetch_op (load_bb, addr,
12492 loaded_val, stored_val, index))
12493 return;
12495 /* If we don't have specialized __sync builtins, try and implement
12496 as a compare and swap loop. */
12497 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
12498 loaded_val, stored_val, index))
12499 return;
12503 /* The ultimate fallback is wrapping the operation in a mutex. */
12504 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
12508 /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
12509 macro on gomp-constants.h. We do not check for overflow. */
12511 static tree
12512 oacc_launch_pack (unsigned code, tree device, unsigned op)
12514 tree res;
12516 res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op));
12517 if (device)
12519 device = fold_build2 (LSHIFT_EXPR, unsigned_type_node,
12520 device, build_int_cst (unsigned_type_node,
12521 GOMP_LAUNCH_DEVICE_SHIFT));
12522 res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device);
12524 return res;
12527 /* Look for compute grid dimension clauses and convert to an attribute
12528 attached to FN. This permits the target-side code to (a) massage
12529 the dimensions, (b) emit that data and (c) optimize. Non-constant
12530 dimensions are pushed onto ARGS.
12532 The attribute value is a TREE_LIST. A set of dimensions is
12533 represented as a list of INTEGER_CST. Those that are runtime
12534 exprs are represented as an INTEGER_CST of zero.
12536 TOOO. Normally the attribute will just contain a single such list. If
12537 however it contains a list of lists, this will represent the use of
12538 device_type. Each member of the outer list is an assoc list of
12539 dimensions, keyed by the device type. The first entry will be the
12540 default. Well, that's the plan. */
12542 #define OACC_FN_ATTRIB "oacc function"
12544 /* Replace any existing oacc fn attribute with updated dimensions. */
12546 void
12547 replace_oacc_fn_attrib (tree fn, tree dims)
12549 tree ident = get_identifier (OACC_FN_ATTRIB);
12550 tree attribs = DECL_ATTRIBUTES (fn);
12552 /* If we happen to be present as the first attrib, drop it. */
12553 if (attribs && TREE_PURPOSE (attribs) == ident)
12554 attribs = TREE_CHAIN (attribs);
12555 DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs);
12558 /* Scan CLAUSES for launch dimensions and attach them to the oacc
12559 function attribute. Push any that are non-constant onto the ARGS
12560 list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is
12561 true, if these are for a kernels region offload function. */
12563 void
12564 set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
12566 /* Must match GOMP_DIM ordering. */
12567 static const omp_clause_code ids[]
12568 = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS,
12569 OMP_CLAUSE_VECTOR_LENGTH };
12570 unsigned ix;
12571 tree dims[GOMP_DIM_MAX];
12572 tree attr = NULL_TREE;
12573 unsigned non_const = 0;
12575 for (ix = GOMP_DIM_MAX; ix--;)
12577 tree clause = find_omp_clause (clauses, ids[ix]);
12578 tree dim = NULL_TREE;
12580 if (clause)
12581 dim = OMP_CLAUSE_EXPR (clause, ids[ix]);
12582 dims[ix] = dim;
12583 if (dim && TREE_CODE (dim) != INTEGER_CST)
12585 dim = integer_zero_node;
12586 non_const |= GOMP_DIM_MASK (ix);
12588 attr = tree_cons (NULL_TREE, dim, attr);
12589 /* Note kernelness with TREE_PUBLIC. */
12590 if (is_kernel)
12591 TREE_PUBLIC (attr) = 1;
12594 replace_oacc_fn_attrib (fn, attr);
12596 if (non_const)
12598 /* Push a dynamic argument set. */
12599 args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM,
12600 NULL_TREE, non_const));
12601 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
12602 if (non_const & GOMP_DIM_MASK (ix))
12603 args->safe_push (dims[ix]);
12607 /* Process the routine's dimension clauess to generate an attribute
12608 value. Issue diagnostics as appropriate. We default to SEQ
12609 (OpenACC 2.5 clarifies this). All dimensions have a size of zero
12610 (dynamic). TREE_PURPOSE is set to indicate whether that dimension
12611 can have a loop partitioned on it. non-zero indicates
12612 yes, zero indicates no. By construction once a non-zero has been
12613 reached, further inner dimensions must also be non-zero. We set
12614 TREE_VALUE to zero for the dimensions that may be partitioned and
12615 1 for the other ones -- if a loop is (erroneously) spawned at
12616 an outer level, we don't want to try and partition it. */
12618 tree
12619 build_oacc_routine_dims (tree clauses)
12621 /* Must match GOMP_DIM ordering. */
12622 static const omp_clause_code ids[] =
12623 {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ};
12624 int ix;
12625 int level = -1;
12627 for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses))
12628 for (ix = GOMP_DIM_MAX + 1; ix--;)
12629 if (OMP_CLAUSE_CODE (clauses) == ids[ix])
12631 if (level >= 0)
12632 error_at (OMP_CLAUSE_LOCATION (clauses),
12633 "multiple loop axes specified for routine");
12634 level = ix;
12635 break;
12638 /* Default to SEQ. */
12639 if (level < 0)
12640 level = GOMP_DIM_MAX;
12642 tree dims = NULL_TREE;
12644 for (ix = GOMP_DIM_MAX; ix--;)
12645 dims = tree_cons (build_int_cst (boolean_type_node, ix >= level),
12646 build_int_cst (integer_type_node, ix < level), dims);
12648 return dims;
12651 /* Retrieve the oacc function attrib and return it. Non-oacc
12652 functions will return NULL. */
12654 tree
12655 get_oacc_fn_attrib (tree fn)
12657 return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
12660 /* Return true if this oacc fn attrib is for a kernels offload
12661 region. We use the TREE_PUBLIC flag of each dimension -- only
12662 need to check the first one. */
12664 bool
12665 oacc_fn_attrib_kernels_p (tree attr)
12667 return TREE_PUBLIC (TREE_VALUE (attr));
12670 /* Return level at which oacc routine may spawn a partitioned loop, or
12671 -1 if it is not a routine (i.e. is an offload fn). */
12673 static int
12674 oacc_fn_attrib_level (tree attr)
12676 tree pos = TREE_VALUE (attr);
12678 if (!TREE_PURPOSE (pos))
12679 return -1;
12681 int ix = 0;
12682 for (ix = 0; ix != GOMP_DIM_MAX;
12683 ix++, pos = TREE_CHAIN (pos))
12684 if (!integer_zerop (TREE_PURPOSE (pos)))
12685 break;
12687 return ix;
12690 /* Extract an oacc execution dimension from FN. FN must be an
12691 offloaded function or routine that has already had its execution
12692 dimensions lowered to the target-specific values. */
12695 get_oacc_fn_dim_size (tree fn, int axis)
12697 tree attrs = get_oacc_fn_attrib (fn);
12699 gcc_assert (axis < GOMP_DIM_MAX);
12701 tree dims = TREE_VALUE (attrs);
12702 while (axis--)
12703 dims = TREE_CHAIN (dims);
12705 int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
12707 return size;
12710 /* Extract the dimension axis from an IFN_GOACC_DIM_POS or
12711 IFN_GOACC_DIM_SIZE call. */
12714 get_oacc_ifn_dim_arg (const gimple *stmt)
12716 gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE
12717 || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
12718 tree arg = gimple_call_arg (stmt, 0);
12719 HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg);
12721 gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX);
12722 return (int) axis;
12725 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
12726 at REGION_EXIT. */
12728 static void
12729 mark_loops_in_oacc_kernels_region (basic_block region_entry,
12730 basic_block region_exit)
12732 struct loop *outer = region_entry->loop_father;
12733 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
12735 /* Don't parallelize the kernels region if it contains more than one outer
12736 loop. */
12737 unsigned int nr_outer_loops = 0;
12738 struct loop *single_outer = NULL;
12739 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
12741 gcc_assert (loop_outer (loop) == outer);
12743 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
12744 continue;
12746 if (region_exit != NULL
12747 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
12748 continue;
12750 nr_outer_loops++;
12751 single_outer = loop;
12753 if (nr_outer_loops != 1)
12754 return;
12756 for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner)
12757 if (loop->next)
12758 return;
12760 /* Mark the loops in the region. */
12761 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
12762 loop->in_oacc_kernels_region = true;
12765 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
12767 struct GTY(()) grid_launch_attributes_trees
12769 tree kernel_dim_array_type;
12770 tree kernel_lattrs_dimnum_decl;
12771 tree kernel_lattrs_grid_decl;
12772 tree kernel_lattrs_group_decl;
12773 tree kernel_launch_attributes_type;
12776 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
12778 /* Create types used to pass kernel launch attributes to target. */
12780 static void
12781 grid_create_kernel_launch_attr_types (void)
12783 if (grid_attr_trees)
12784 return;
12785 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
12787 tree dim_arr_index_type
12788 = build_index_type (build_int_cst (integer_type_node, 2));
12789 grid_attr_trees->kernel_dim_array_type
12790 = build_array_type (uint32_type_node, dim_arr_index_type);
12792 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
12793 grid_attr_trees->kernel_lattrs_dimnum_decl
12794 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
12795 uint32_type_node);
12796 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
12798 grid_attr_trees->kernel_lattrs_grid_decl
12799 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
12800 grid_attr_trees->kernel_dim_array_type);
12801 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
12802 = grid_attr_trees->kernel_lattrs_dimnum_decl;
12803 grid_attr_trees->kernel_lattrs_group_decl
12804 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
12805 grid_attr_trees->kernel_dim_array_type);
12806 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
12807 = grid_attr_trees->kernel_lattrs_grid_decl;
12808 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
12809 "__gomp_kernel_launch_attributes",
12810 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
12813 /* Insert before the current statement in GSI a store of VALUE to INDEX of
12814 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
12815 of type uint32_type_node. */
12817 static void
12818 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
12819 tree fld_decl, int index, tree value)
12821 tree ref = build4 (ARRAY_REF, uint32_type_node,
12822 build3 (COMPONENT_REF,
12823 grid_attr_trees->kernel_dim_array_type,
12824 range_var, fld_decl, NULL_TREE),
12825 build_int_cst (integer_type_node, index),
12826 NULL_TREE, NULL_TREE);
12827 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
12830 /* Return a tree representation of a pointer to a structure with grid and
12831 work-group size information. Statements filling that information will be
12832 inserted before GSI, TGT_STMT is the target statement which has the
12833 necessary information in it. */
12835 static tree
12836 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
12837 gomp_target *tgt_stmt)
12839 grid_create_kernel_launch_attr_types ();
12840 tree u32_one = build_one_cst (uint32_type_node);
12841 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
12842 "__kernel_launch_attrs");
12844 unsigned max_dim = 0;
12845 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
12846 clause;
12847 clause = OMP_CLAUSE_CHAIN (clause))
12849 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
12850 continue;
12852 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
12853 max_dim = MAX (dim, max_dim);
12855 grid_insert_store_range_dim (gsi, lattrs,
12856 grid_attr_trees->kernel_lattrs_grid_decl,
12857 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
12858 grid_insert_store_range_dim (gsi, lattrs,
12859 grid_attr_trees->kernel_lattrs_group_decl,
12860 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
12863 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
12864 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
12865 /* At this moment we cannot gridify a loop with a collapse clause. */
12866 /* TODO: Adjust when we support bigger collapse. */
12867 gcc_assert (max_dim == 0);
12868 gsi_insert_before (gsi, gimple_build_assign (dimref, u32_one), GSI_SAME_STMT);
12869 TREE_ADDRESSABLE (lattrs) = 1;
12870 return build_fold_addr_expr (lattrs);
12873 /* Build target argument identifier from the DEVICE identifier, value
12874 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
12876 static tree
12877 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
12879 tree t = build_int_cst (integer_type_node, device);
12880 if (subseqent_param)
12881 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
12882 build_int_cst (integer_type_node,
12883 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
12884 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
12885 build_int_cst (integer_type_node, id));
12886 return t;
12889 /* Like above but return it in type that can be directly stored as an element
12890 of the argument array. */
12892 static tree
12893 get_target_argument_identifier (int device, bool subseqent_param, int id)
12895 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
12896 return fold_convert (ptr_type_node, t);
12899 /* Return a target argument consisting of DEVICE identifier, value identifier
12900 ID, and the actual VALUE. */
12902 static tree
12903 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
12904 tree value)
12906 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
12907 fold_convert (integer_type_node, value),
12908 build_int_cst (unsigned_type_node,
12909 GOMP_TARGET_ARG_VALUE_SHIFT));
12910 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
12911 get_target_argument_identifier_1 (device, false, id));
12912 t = fold_convert (ptr_type_node, t);
12913 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
12916 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
12917 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
12918 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
12919 arguments. */
12921 static void
12922 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
12923 int id, tree value, vec <tree> *args)
12925 if (tree_fits_shwi_p (value)
12926 && tree_to_shwi (value) > -(1 << 15)
12927 && tree_to_shwi (value) < (1 << 15))
12928 args->quick_push (get_target_argument_value (gsi, device, id, value));
12929 else
12931 args->quick_push (get_target_argument_identifier (device, true, id));
12932 value = fold_convert (ptr_type_node, value);
12933 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
12934 GSI_SAME_STMT);
12935 args->quick_push (value);
12939 /* Create an array of arguments that is then passed to GOMP_target. */
12941 static tree
12942 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
12944 auto_vec <tree, 6> args;
12945 tree clauses = gimple_omp_target_clauses (tgt_stmt);
12946 tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
12947 if (c)
12948 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
12949 else
12950 t = integer_minus_one_node;
12951 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
12952 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
12954 c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
12955 if (c)
12956 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
12957 else
12958 t = integer_minus_one_node;
12959 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
12960 GOMP_TARGET_ARG_THREAD_LIMIT, t,
12961 &args);
12963 /* Add HSA-specific grid sizes, if available. */
12964 if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
12965 OMP_CLAUSE__GRIDDIM_))
12967 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true,
12968 GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES);
12969 args.quick_push (t);
12970 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
12973 /* Produce more, perhaps device specific, arguments here. */
12975 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
12976 args.length () + 1),
12977 ".omp_target_args");
12978 for (unsigned i = 0; i < args.length (); i++)
12980 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
12981 build_int_cst (integer_type_node, i),
12982 NULL_TREE, NULL_TREE);
12983 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
12984 GSI_SAME_STMT);
12986 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
12987 build_int_cst (integer_type_node, args.length ()),
12988 NULL_TREE, NULL_TREE);
12989 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
12990 GSI_SAME_STMT);
12991 TREE_ADDRESSABLE (argarray) = 1;
12992 return build_fold_addr_expr (argarray);
12995 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
12997 static void
12998 expand_omp_target (struct omp_region *region)
13000 basic_block entry_bb, exit_bb, new_bb;
13001 struct function *child_cfun;
13002 tree child_fn, block, t;
13003 gimple_stmt_iterator gsi;
13004 gomp_target *entry_stmt;
13005 gimple *stmt;
13006 edge e;
13007 bool offloaded, data_region;
13009 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
13010 new_bb = region->entry;
13012 offloaded = is_gimple_omp_offloaded (entry_stmt);
13013 switch (gimple_omp_target_kind (entry_stmt))
13015 case GF_OMP_TARGET_KIND_REGION:
13016 case GF_OMP_TARGET_KIND_UPDATE:
13017 case GF_OMP_TARGET_KIND_ENTER_DATA:
13018 case GF_OMP_TARGET_KIND_EXIT_DATA:
13019 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
13020 case GF_OMP_TARGET_KIND_OACC_KERNELS:
13021 case GF_OMP_TARGET_KIND_OACC_UPDATE:
13022 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
13023 case GF_OMP_TARGET_KIND_OACC_DECLARE:
13024 data_region = false;
13025 break;
13026 case GF_OMP_TARGET_KIND_DATA:
13027 case GF_OMP_TARGET_KIND_OACC_DATA:
13028 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
13029 data_region = true;
13030 break;
13031 default:
13032 gcc_unreachable ();
13035 child_fn = NULL_TREE;
13036 child_cfun = NULL;
13037 if (offloaded)
13039 child_fn = gimple_omp_target_child_fn (entry_stmt);
13040 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
13043 /* Supported by expand_omp_taskreg, but not here. */
13044 if (child_cfun != NULL)
13045 gcc_checking_assert (!child_cfun->cfg);
13046 gcc_checking_assert (!gimple_in_ssa_p (cfun));
13048 entry_bb = region->entry;
13049 exit_bb = region->exit;
13051 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
13052 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
13054 if (offloaded)
13056 unsigned srcidx, dstidx, num;
13058 /* If the offloading region needs data sent from the parent
13059 function, then the very first statement (except possible
13060 tree profile counter updates) of the offloading body
13061 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
13062 &.OMP_DATA_O is passed as an argument to the child function,
13063 we need to replace it with the argument as seen by the child
13064 function.
13066 In most cases, this will end up being the identity assignment
13067 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
13068 a function call that has been inlined, the original PARM_DECL
13069 .OMP_DATA_I may have been converted into a different local
13070 variable. In which case, we need to keep the assignment. */
13071 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
13072 if (data_arg)
13074 basic_block entry_succ_bb = single_succ (entry_bb);
13075 gimple_stmt_iterator gsi;
13076 tree arg;
13077 gimple *tgtcopy_stmt = NULL;
13078 tree sender = TREE_VEC_ELT (data_arg, 0);
13080 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
13082 gcc_assert (!gsi_end_p (gsi));
13083 stmt = gsi_stmt (gsi);
13084 if (gimple_code (stmt) != GIMPLE_ASSIGN)
13085 continue;
13087 if (gimple_num_ops (stmt) == 2)
13089 tree arg = gimple_assign_rhs1 (stmt);
13091 /* We're ignoring the subcode because we're
13092 effectively doing a STRIP_NOPS. */
13094 if (TREE_CODE (arg) == ADDR_EXPR
13095 && TREE_OPERAND (arg, 0) == sender)
13097 tgtcopy_stmt = stmt;
13098 break;
13103 gcc_assert (tgtcopy_stmt != NULL);
13104 arg = DECL_ARGUMENTS (child_fn);
13106 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
13107 gsi_remove (&gsi, true);
13110 /* Declare local variables needed in CHILD_CFUN. */
13111 block = DECL_INITIAL (child_fn);
13112 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
13113 /* The gimplifier could record temporaries in the offloading block
13114 rather than in containing function's local_decls chain,
13115 which would mean cgraph missed finalizing them. Do it now. */
13116 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
13117 if (TREE_CODE (t) == VAR_DECL
13118 && TREE_STATIC (t)
13119 && !DECL_EXTERNAL (t))
13120 varpool_node::finalize_decl (t);
13121 DECL_SAVED_TREE (child_fn) = NULL;
13122 /* We'll create a CFG for child_fn, so no gimple body is needed. */
13123 gimple_set_body (child_fn, NULL);
13124 TREE_USED (block) = 1;
13126 /* Reset DECL_CONTEXT on function arguments. */
13127 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
13128 DECL_CONTEXT (t) = child_fn;
13130 /* Split ENTRY_BB at GIMPLE_*,
13131 so that it can be moved to the child function. */
13132 gsi = gsi_last_bb (entry_bb);
13133 stmt = gsi_stmt (gsi);
13134 gcc_assert (stmt
13135 && gimple_code (stmt) == gimple_code (entry_stmt));
13136 e = split_block (entry_bb, stmt);
13137 gsi_remove (&gsi, true);
13138 entry_bb = e->dest;
13139 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
13141 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
13142 if (exit_bb)
13144 gsi = gsi_last_bb (exit_bb);
13145 gcc_assert (!gsi_end_p (gsi)
13146 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13147 stmt = gimple_build_return (NULL);
13148 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
13149 gsi_remove (&gsi, true);
13152 /* Move the offloading region into CHILD_CFUN. */
13154 block = gimple_block (entry_stmt);
13156 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
13157 if (exit_bb)
13158 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
13159 /* When the OMP expansion process cannot guarantee an up-to-date
13160 loop tree arrange for the child function to fixup loops. */
13161 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
13162 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
13164 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
13165 num = vec_safe_length (child_cfun->local_decls);
13166 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
13168 t = (*child_cfun->local_decls)[srcidx];
13169 if (DECL_CONTEXT (t) == cfun->decl)
13170 continue;
13171 if (srcidx != dstidx)
13172 (*child_cfun->local_decls)[dstidx] = t;
13173 dstidx++;
13175 if (dstidx != num)
13176 vec_safe_truncate (child_cfun->local_decls, dstidx);
13178 /* Inform the callgraph about the new function. */
13179 child_cfun->curr_properties = cfun->curr_properties;
13180 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
13181 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
13182 cgraph_node *node = cgraph_node::get_create (child_fn);
13183 node->parallelized_function = 1;
13184 cgraph_node::add_new_function (child_fn, true);
13186 /* Add the new function to the offload table. */
13187 if (ENABLE_OFFLOADING)
13188 vec_safe_push (offload_funcs, child_fn);
13190 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
13191 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
13193 /* Fix the callgraph edges for child_cfun. Those for cfun will be
13194 fixed in a following pass. */
13195 push_cfun (child_cfun);
13196 if (need_asm)
13197 assign_assembler_name_if_neeeded (child_fn);
13198 cgraph_edge::rebuild_edges ();
13200 /* Some EH regions might become dead, see PR34608. If
13201 pass_cleanup_cfg isn't the first pass to happen with the
13202 new child, these dead EH edges might cause problems.
13203 Clean them up now. */
13204 if (flag_exceptions)
13206 basic_block bb;
13207 bool changed = false;
13209 FOR_EACH_BB_FN (bb, cfun)
13210 changed |= gimple_purge_dead_eh_edges (bb);
13211 if (changed)
13212 cleanup_tree_cfg ();
13214 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
13215 verify_loop_structure ();
13216 pop_cfun ();
13218 if (dump_file && !gimple_in_ssa_p (cfun))
13220 omp_any_child_fn_dumped = true;
13221 dump_function_header (dump_file, child_fn, dump_flags);
13222 dump_function_to_file (child_fn, dump_file, dump_flags);
13226 /* Emit a library call to launch the offloading region, or do data
13227 transfers. */
13228 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
13229 enum built_in_function start_ix;
13230 location_t clause_loc;
13231 unsigned int flags_i = 0;
13232 bool oacc_kernels_p = false;
13234 switch (gimple_omp_target_kind (entry_stmt))
13236 case GF_OMP_TARGET_KIND_REGION:
13237 start_ix = BUILT_IN_GOMP_TARGET;
13238 break;
13239 case GF_OMP_TARGET_KIND_DATA:
13240 start_ix = BUILT_IN_GOMP_TARGET_DATA;
13241 break;
13242 case GF_OMP_TARGET_KIND_UPDATE:
13243 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
13244 break;
13245 case GF_OMP_TARGET_KIND_ENTER_DATA:
13246 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
13247 break;
13248 case GF_OMP_TARGET_KIND_EXIT_DATA:
13249 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
13250 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
13251 break;
13252 case GF_OMP_TARGET_KIND_OACC_KERNELS:
13253 oacc_kernels_p = true;
13254 /* FALLTHROUGH */
13255 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
13256 start_ix = BUILT_IN_GOACC_PARALLEL;
13257 break;
13258 case GF_OMP_TARGET_KIND_OACC_DATA:
13259 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
13260 start_ix = BUILT_IN_GOACC_DATA_START;
13261 break;
13262 case GF_OMP_TARGET_KIND_OACC_UPDATE:
13263 start_ix = BUILT_IN_GOACC_UPDATE;
13264 break;
13265 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
13266 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
13267 break;
13268 case GF_OMP_TARGET_KIND_OACC_DECLARE:
13269 start_ix = BUILT_IN_GOACC_DECLARE;
13270 break;
13271 default:
13272 gcc_unreachable ();
13275 clauses = gimple_omp_target_clauses (entry_stmt);
13277 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
13278 library choose) and there is no conditional. */
13279 cond = NULL_TREE;
13280 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
13282 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
13283 if (c)
13284 cond = OMP_CLAUSE_IF_EXPR (c);
13286 c = find_omp_clause (clauses, OMP_CLAUSE_DEVICE);
13287 if (c)
13289 /* Even if we pass it to all library function calls, it is currently only
13290 defined/used for the OpenMP target ones. */
13291 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
13292 || start_ix == BUILT_IN_GOMP_TARGET_DATA
13293 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
13294 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
13296 device = OMP_CLAUSE_DEVICE_ID (c);
13297 clause_loc = OMP_CLAUSE_LOCATION (c);
13299 else
13300 clause_loc = gimple_location (entry_stmt);
13302 c = find_omp_clause (clauses, OMP_CLAUSE_NOWAIT);
13303 if (c)
13304 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
13306 /* Ensure 'device' is of the correct type. */
13307 device = fold_convert_loc (clause_loc, integer_type_node, device);
13309 /* If we found the clause 'if (cond)', build
13310 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
13311 if (cond)
13313 cond = gimple_boolify (cond);
13315 basic_block cond_bb, then_bb, else_bb;
13316 edge e;
13317 tree tmp_var;
13319 tmp_var = create_tmp_var (TREE_TYPE (device));
13320 if (offloaded)
13321 e = split_block_after_labels (new_bb);
13322 else
13324 gsi = gsi_last_bb (new_bb);
13325 gsi_prev (&gsi);
13326 e = split_block (new_bb, gsi_stmt (gsi));
13328 cond_bb = e->src;
13329 new_bb = e->dest;
13330 remove_edge (e);
13332 then_bb = create_empty_bb (cond_bb);
13333 else_bb = create_empty_bb (then_bb);
13334 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
13335 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
13337 stmt = gimple_build_cond_empty (cond);
13338 gsi = gsi_last_bb (cond_bb);
13339 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13341 gsi = gsi_start_bb (then_bb);
13342 stmt = gimple_build_assign (tmp_var, device);
13343 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13345 gsi = gsi_start_bb (else_bb);
13346 stmt = gimple_build_assign (tmp_var,
13347 build_int_cst (integer_type_node,
13348 GOMP_DEVICE_HOST_FALLBACK));
13349 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13351 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
13352 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
13353 add_bb_to_loop (then_bb, cond_bb->loop_father);
13354 add_bb_to_loop (else_bb, cond_bb->loop_father);
13355 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
13356 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
13358 device = tmp_var;
13361 gsi = gsi_last_bb (new_bb);
13362 t = gimple_omp_target_data_arg (entry_stmt);
13363 if (t == NULL)
13365 t1 = size_zero_node;
13366 t2 = build_zero_cst (ptr_type_node);
13367 t3 = t2;
13368 t4 = t2;
13370 else
13372 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
13373 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
13374 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
13375 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
13376 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
13379 gimple *g;
13380 bool tagging = false;
13381 /* The maximum number used by any start_ix, without varargs. */
13382 auto_vec<tree, 11> args;
13383 args.quick_push (device);
13384 if (offloaded)
13385 args.quick_push (build_fold_addr_expr (child_fn));
13386 args.quick_push (t1);
13387 args.quick_push (t2);
13388 args.quick_push (t3);
13389 args.quick_push (t4);
13390 switch (start_ix)
13392 case BUILT_IN_GOACC_DATA_START:
13393 case BUILT_IN_GOACC_DECLARE:
13394 case BUILT_IN_GOMP_TARGET_DATA:
13395 break;
13396 case BUILT_IN_GOMP_TARGET:
13397 case BUILT_IN_GOMP_TARGET_UPDATE:
13398 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
13399 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
13400 c = find_omp_clause (clauses, OMP_CLAUSE_DEPEND);
13401 if (c)
13402 depend = OMP_CLAUSE_DECL (c);
13403 else
13404 depend = build_int_cst (ptr_type_node, 0);
13405 args.quick_push (depend);
13406 if (start_ix == BUILT_IN_GOMP_TARGET)
13407 args.quick_push (get_target_arguments (&gsi, entry_stmt));
13408 break;
13409 case BUILT_IN_GOACC_PARALLEL:
13411 set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
13412 tagging = true;
13414 /* FALLTHRU */
13415 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
13416 case BUILT_IN_GOACC_UPDATE:
13418 tree t_async = NULL_TREE;
13420 /* If present, use the value specified by the respective
13421 clause, making sure that is of the correct type. */
13422 c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC);
13423 if (c)
13424 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
13425 integer_type_node,
13426 OMP_CLAUSE_ASYNC_EXPR (c));
13427 else if (!tagging)
13428 /* Default values for t_async. */
13429 t_async = fold_convert_loc (gimple_location (entry_stmt),
13430 integer_type_node,
13431 build_int_cst (integer_type_node,
13432 GOMP_ASYNC_SYNC));
13433 if (tagging && t_async)
13435 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
13437 if (TREE_CODE (t_async) == INTEGER_CST)
13439 /* See if we can pack the async arg in to the tag's
13440 operand. */
13441 i_async = TREE_INT_CST_LOW (t_async);
13442 if (i_async < GOMP_LAUNCH_OP_MAX)
13443 t_async = NULL_TREE;
13444 else
13445 i_async = GOMP_LAUNCH_OP_MAX;
13447 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
13448 i_async));
13450 if (t_async)
13451 args.safe_push (t_async);
13453 /* Save the argument index, and ... */
13454 unsigned t_wait_idx = args.length ();
13455 unsigned num_waits = 0;
13456 c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
13457 if (!tagging || c)
13458 /* ... push a placeholder. */
13459 args.safe_push (integer_zero_node);
13461 for (; c; c = OMP_CLAUSE_CHAIN (c))
13462 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
13464 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
13465 integer_type_node,
13466 OMP_CLAUSE_WAIT_EXPR (c)));
13467 num_waits++;
13470 if (!tagging || num_waits)
13472 tree len;
13474 /* Now that we know the number, update the placeholder. */
13475 if (tagging)
13476 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
13477 else
13478 len = build_int_cst (integer_type_node, num_waits);
13479 len = fold_convert_loc (gimple_location (entry_stmt),
13480 unsigned_type_node, len);
13481 args[t_wait_idx] = len;
13484 break;
13485 default:
13486 gcc_unreachable ();
13488 if (tagging)
13489 /* Push terminal marker - zero. */
13490 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
13492 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
13493 gimple_set_location (g, gimple_location (entry_stmt));
13494 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
13495 if (!offloaded)
13497 g = gsi_stmt (gsi);
13498 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
13499 gsi_remove (&gsi, true);
13501 if (data_region && region->exit)
13503 gsi = gsi_last_bb (region->exit);
13504 g = gsi_stmt (gsi);
13505 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
13506 gsi_remove (&gsi, true);
13510 /* Expand KFOR loop as a GPGPU kernel, i.e. as a body only with iteration
13511 variable derived from the thread number. */
13513 static void
13514 grid_expand_omp_for_loop (struct omp_region *kfor)
13516 tree t, threadid;
13517 tree type, itype;
13518 gimple_stmt_iterator gsi;
13519 tree n1, step;
13520 struct omp_for_data fd;
13522 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
13523 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
13524 == GF_OMP_FOR_KIND_GRID_LOOP);
13525 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
13527 gcc_assert (gimple_omp_for_collapse (for_stmt) == 1);
13528 gcc_assert (kfor->cont);
13529 extract_omp_for_data (for_stmt, &fd, NULL);
13531 itype = type = TREE_TYPE (fd.loop.v);
13532 if (POINTER_TYPE_P (type))
13533 itype = signed_type_for (type);
13535 gsi = gsi_start_bb (body_bb);
13537 n1 = fd.loop.n1;
13538 step = fd.loop.step;
13539 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
13540 true, NULL_TREE, true, GSI_SAME_STMT);
13541 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
13542 true, NULL_TREE, true, GSI_SAME_STMT);
13543 threadid = build_call_expr (builtin_decl_explicit
13544 (BUILT_IN_OMP_GET_THREAD_NUM), 0);
13545 threadid = fold_convert (itype, threadid);
13546 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
13547 true, GSI_SAME_STMT);
13549 tree startvar = fd.loop.v;
13550 t = fold_build2 (MULT_EXPR, itype, threadid, step);
13551 if (POINTER_TYPE_P (type))
13552 t = fold_build_pointer_plus (n1, t);
13553 else
13554 t = fold_build2 (PLUS_EXPR, type, t, n1);
13555 t = fold_convert (type, t);
13556 t = force_gimple_operand_gsi (&gsi, t,
13557 DECL_P (startvar)
13558 && TREE_ADDRESSABLE (startvar),
13559 NULL_TREE, true, GSI_SAME_STMT);
13560 gassign *assign_stmt = gimple_build_assign (startvar, t);
13561 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
13563 /* Remove the omp for statement */
13564 gsi = gsi_last_bb (kfor->entry);
13565 gsi_remove (&gsi, true);
13567 /* Remove the GIMPLE_OMP_CONTINUE statement. */
13568 gsi = gsi_last_bb (kfor->cont);
13569 gcc_assert (!gsi_end_p (gsi)
13570 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
13571 gsi_remove (&gsi, true);
13573 /* Replace the GIMPLE_OMP_RETURN with a real return. */
13574 gsi = gsi_last_bb (kfor->exit);
13575 gcc_assert (!gsi_end_p (gsi)
13576 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13577 gsi_remove (&gsi, true);
13579 /* Fixup the much simpler CFG. */
13580 remove_edge (find_edge (kfor->cont, body_bb));
13582 if (kfor->cont != body_bb)
13583 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
13584 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
13587 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
13588 argument_decls. */
13590 struct grid_arg_decl_map
13592 tree old_arg;
13593 tree new_arg;
13596 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
13597 pertaining to kernel function. */
13599 static tree
13600 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
13602 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
13603 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
13604 tree t = *tp;
13606 if (t == adm->old_arg)
13607 *tp = adm->new_arg;
13608 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
13609 return NULL_TREE;
13612 static void expand_omp (struct omp_region *region);
13614 /* If TARGET region contains a kernel body for loop, remove its region from the
13615 TARGET and expand it in GPGPU kernel fashion. */
13617 static void
13618 grid_expand_target_grid_body (struct omp_region *target)
13620 if (!hsa_gen_requested_p ())
13621 return;
13623 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
13624 struct omp_region **pp;
13626 for (pp = &target->inner; *pp; pp = &(*pp)->next)
13627 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
13628 break;
13630 struct omp_region *gpukernel = *pp;
13632 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
13633 if (!gpukernel)
13635 /* HSA cannot handle OACC stuff. */
13636 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
13637 return;
13638 gcc_checking_assert (orig_child_fndecl);
13639 gcc_assert (!find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
13640 OMP_CLAUSE__GRIDDIM_));
13641 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
13643 hsa_register_kernel (n);
13644 return;
13647 gcc_assert (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
13648 OMP_CLAUSE__GRIDDIM_));
13649 tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry)));
13650 *pp = gpukernel->next;
13651 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
13652 if ((*pp)->type == GIMPLE_OMP_FOR)
13653 break;
13655 struct omp_region *kfor = *pp;
13656 gcc_assert (kfor);
13657 gcc_assert (gimple_omp_for_kind (last_stmt ((kfor)->entry))
13658 == GF_OMP_FOR_KIND_GRID_LOOP);
13659 *pp = kfor->next;
13660 if (kfor->inner)
13661 expand_omp (kfor->inner);
13662 if (gpukernel->inner)
13663 expand_omp (gpukernel->inner);
13665 tree kern_fndecl = copy_node (orig_child_fndecl);
13666 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
13667 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
13668 tree tgtblock = gimple_block (tgt_stmt);
13669 tree fniniblock = make_node (BLOCK);
13670 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
13671 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
13672 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
13673 DECL_INITIAL (kern_fndecl) = fniniblock;
13674 push_struct_function (kern_fndecl);
13675 cfun->function_end_locus = gimple_location (tgt_stmt);
13676 pop_cfun ();
13678 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
13679 gcc_assert (!DECL_CHAIN (old_parm_decl));
13680 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
13681 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
13682 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
13683 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
13684 kern_cfun->curr_properties = cfun->curr_properties;
13686 remove_edge (BRANCH_EDGE (kfor->entry));
13687 grid_expand_omp_for_loop (kfor);
13689 /* Remove the omp for statement */
13690 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
13691 gsi_remove (&gsi, true);
13692 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
13693 return. */
13694 gsi = gsi_last_bb (gpukernel->exit);
13695 gcc_assert (!gsi_end_p (gsi)
13696 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13697 gimple *ret_stmt = gimple_build_return (NULL);
13698 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
13699 gsi_remove (&gsi, true);
13701 /* Statements in the first BB in the target construct have been produced by
13702 target lowering and must be copied inside the GPUKERNEL, with the two
13703 exceptions of the first OMP statement and the OMP_DATA assignment
13704 statement. */
13705 gsi = gsi_start_bb (single_succ (gpukernel->entry));
13706 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
13707 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
13708 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
13709 !gsi_end_p (tsi); gsi_next (&tsi))
13711 gimple *stmt = gsi_stmt (tsi);
13712 if (is_gimple_omp (stmt))
13713 break;
13714 if (sender
13715 && is_gimple_assign (stmt)
13716 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
13717 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
13718 continue;
13719 gimple *copy = gimple_copy (stmt);
13720 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
13721 gimple_set_block (copy, fniniblock);
13724 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
13725 gpukernel->exit, inside_block);
13727 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
13728 kcn->mark_force_output ();
13729 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
13731 hsa_register_kernel (kcn, orig_child);
13733 cgraph_node::add_new_function (kern_fndecl, true);
13734 push_cfun (kern_cfun);
13735 cgraph_edge::rebuild_edges ();
13737 /* Re-map any mention of the PARM_DECL of the original function to the
13738 PARM_DECL of the new one.
13740 TODO: It would be great if lowering produced references into the GPU
13741 kernel decl straight away and we did not have to do this. */
13742 struct grid_arg_decl_map adm;
13743 adm.old_arg = old_parm_decl;
13744 adm.new_arg = new_parm_decl;
13745 basic_block bb;
13746 FOR_EACH_BB_FN (bb, kern_cfun)
13748 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
13750 gimple *stmt = gsi_stmt (gsi);
13751 struct walk_stmt_info wi;
13752 memset (&wi, 0, sizeof (wi));
13753 wi.info = &adm;
13754 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
13757 pop_cfun ();
13759 return;
13762 /* Expand the parallel region tree rooted at REGION. Expansion
13763 proceeds in depth-first order. Innermost regions are expanded
13764 first. This way, parallel regions that require a new function to
13765 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
13766 internal dependencies in their body. */
13768 static void
13769 expand_omp (struct omp_region *region)
13771 omp_any_child_fn_dumped = false;
13772 while (region)
13774 location_t saved_location;
13775 gimple *inner_stmt = NULL;
13777 /* First, determine whether this is a combined parallel+workshare
13778 region. */
13779 if (region->type == GIMPLE_OMP_PARALLEL)
13780 determine_parallel_type (region);
13781 else if (region->type == GIMPLE_OMP_TARGET)
13782 grid_expand_target_grid_body (region);
13784 if (region->type == GIMPLE_OMP_FOR
13785 && gimple_omp_for_combined_p (last_stmt (region->entry)))
13786 inner_stmt = last_stmt (region->inner->entry);
13788 if (region->inner)
13789 expand_omp (region->inner);
13791 saved_location = input_location;
13792 if (gimple_has_location (last_stmt (region->entry)))
13793 input_location = gimple_location (last_stmt (region->entry));
13795 switch (region->type)
13797 case GIMPLE_OMP_PARALLEL:
13798 case GIMPLE_OMP_TASK:
13799 expand_omp_taskreg (region);
13800 break;
13802 case GIMPLE_OMP_FOR:
13803 expand_omp_for (region, inner_stmt);
13804 break;
13806 case GIMPLE_OMP_SECTIONS:
13807 expand_omp_sections (region);
13808 break;
13810 case GIMPLE_OMP_SECTION:
13811 /* Individual omp sections are handled together with their
13812 parent GIMPLE_OMP_SECTIONS region. */
13813 break;
13815 case GIMPLE_OMP_SINGLE:
13816 expand_omp_single (region);
13817 break;
13819 case GIMPLE_OMP_ORDERED:
13821 gomp_ordered *ord_stmt
13822 = as_a <gomp_ordered *> (last_stmt (region->entry));
13823 if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
13824 OMP_CLAUSE_DEPEND))
13826 /* We'll expand these when expanding corresponding
13827 worksharing region with ordered(n) clause. */
13828 gcc_assert (region->outer
13829 && region->outer->type == GIMPLE_OMP_FOR);
13830 region->ord_stmt = ord_stmt;
13831 break;
13834 /* FALLTHRU */
13835 case GIMPLE_OMP_MASTER:
13836 case GIMPLE_OMP_TASKGROUP:
13837 case GIMPLE_OMP_CRITICAL:
13838 case GIMPLE_OMP_TEAMS:
13839 expand_omp_synch (region);
13840 break;
13842 case GIMPLE_OMP_ATOMIC_LOAD:
13843 expand_omp_atomic (region);
13844 break;
13846 case GIMPLE_OMP_TARGET:
13847 expand_omp_target (region);
13848 break;
13850 default:
13851 gcc_unreachable ();
13854 input_location = saved_location;
13855 region = region->next;
13857 if (omp_any_child_fn_dumped)
13859 if (dump_file)
13860 dump_function_header (dump_file, current_function_decl, dump_flags);
13861 omp_any_child_fn_dumped = false;
13866 /* Helper for build_omp_regions. Scan the dominator tree starting at
13867 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
13868 true, the function ends once a single tree is built (otherwise, whole
13869 forest of OMP constructs may be built). */
13871 static void
13872 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
13873 bool single_tree)
13875 gimple_stmt_iterator gsi;
13876 gimple *stmt;
13877 basic_block son;
13879 gsi = gsi_last_bb (bb);
13880 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
13882 struct omp_region *region;
13883 enum gimple_code code;
13885 stmt = gsi_stmt (gsi);
13886 code = gimple_code (stmt);
13887 if (code == GIMPLE_OMP_RETURN)
13889 /* STMT is the return point out of region PARENT. Mark it
13890 as the exit point and make PARENT the immediately
13891 enclosing region. */
13892 gcc_assert (parent);
13893 region = parent;
13894 region->exit = bb;
13895 parent = parent->outer;
13897 else if (code == GIMPLE_OMP_ATOMIC_STORE)
13899 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
13900 GIMPLE_OMP_RETURN, but matches with
13901 GIMPLE_OMP_ATOMIC_LOAD. */
13902 gcc_assert (parent);
13903 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
13904 region = parent;
13905 region->exit = bb;
13906 parent = parent->outer;
13908 else if (code == GIMPLE_OMP_CONTINUE)
13910 gcc_assert (parent);
13911 parent->cont = bb;
13913 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
13915 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
13916 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
13918 else
13920 region = new_omp_region (bb, code, parent);
13921 /* Otherwise... */
13922 if (code == GIMPLE_OMP_TARGET)
13924 switch (gimple_omp_target_kind (stmt))
13926 case GF_OMP_TARGET_KIND_REGION:
13927 case GF_OMP_TARGET_KIND_DATA:
13928 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
13929 case GF_OMP_TARGET_KIND_OACC_KERNELS:
13930 case GF_OMP_TARGET_KIND_OACC_DATA:
13931 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
13932 break;
13933 case GF_OMP_TARGET_KIND_UPDATE:
13934 case GF_OMP_TARGET_KIND_ENTER_DATA:
13935 case GF_OMP_TARGET_KIND_EXIT_DATA:
13936 case GF_OMP_TARGET_KIND_OACC_UPDATE:
13937 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
13938 case GF_OMP_TARGET_KIND_OACC_DECLARE:
13939 /* ..., other than for those stand-alone directives... */
13940 region = NULL;
13941 break;
13942 default:
13943 gcc_unreachable ();
13946 else if (code == GIMPLE_OMP_ORDERED
13947 && find_omp_clause (gimple_omp_ordered_clauses
13948 (as_a <gomp_ordered *> (stmt)),
13949 OMP_CLAUSE_DEPEND))
13950 /* #pragma omp ordered depend is also just a stand-alone
13951 directive. */
13952 region = NULL;
13953 /* ..., this directive becomes the parent for a new region. */
13954 if (region)
13955 parent = region;
13959 if (single_tree && !parent)
13960 return;
13962 for (son = first_dom_son (CDI_DOMINATORS, bb);
13963 son;
13964 son = next_dom_son (CDI_DOMINATORS, son))
13965 build_omp_regions_1 (son, parent, single_tree);
13968 /* Builds the tree of OMP regions rooted at ROOT, storing it to
13969 root_omp_region. */
13971 static void
13972 build_omp_regions_root (basic_block root)
13974 gcc_assert (root_omp_region == NULL);
13975 build_omp_regions_1 (root, NULL, true);
13976 gcc_assert (root_omp_region != NULL);
13979 /* Expands omp construct (and its subconstructs) starting in HEAD. */
13981 void
13982 omp_expand_local (basic_block head)
13984 build_omp_regions_root (head);
13985 if (dump_file && (dump_flags & TDF_DETAILS))
13987 fprintf (dump_file, "\nOMP region tree\n\n");
13988 dump_omp_region (dump_file, root_omp_region, 0);
13989 fprintf (dump_file, "\n");
13992 remove_exit_barriers (root_omp_region);
13993 expand_omp (root_omp_region);
13995 free_omp_regions ();
13998 /* Scan the CFG and build a tree of OMP regions. Return the root of
13999 the OMP region tree. */
14001 static void
14002 build_omp_regions (void)
14004 gcc_assert (root_omp_region == NULL);
14005 calculate_dominance_info (CDI_DOMINATORS);
14006 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
14009 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
14011 static unsigned int
14012 execute_expand_omp (void)
14014 build_omp_regions ();
14016 if (!root_omp_region)
14017 return 0;
14019 if (dump_file)
14021 fprintf (dump_file, "\nOMP region tree\n\n");
14022 dump_omp_region (dump_file, root_omp_region, 0);
14023 fprintf (dump_file, "\n");
14026 remove_exit_barriers (root_omp_region);
14028 expand_omp (root_omp_region);
14030 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
14031 verify_loop_structure ();
14032 cleanup_tree_cfg ();
14034 free_omp_regions ();
14036 return 0;
14039 /* OMP expansion -- the default pass, run before creation of SSA form. */
14041 namespace {
14043 const pass_data pass_data_expand_omp =
14045 GIMPLE_PASS, /* type */
14046 "ompexp", /* name */
14047 OPTGROUP_NONE, /* optinfo_flags */
14048 TV_NONE, /* tv_id */
14049 PROP_gimple_any, /* properties_required */
14050 PROP_gimple_eomp, /* properties_provided */
14051 0, /* properties_destroyed */
14052 0, /* todo_flags_start */
14053 0, /* todo_flags_finish */
14056 class pass_expand_omp : public gimple_opt_pass
14058 public:
14059 pass_expand_omp (gcc::context *ctxt)
14060 : gimple_opt_pass (pass_data_expand_omp, ctxt)
14063 /* opt_pass methods: */
14064 virtual unsigned int execute (function *)
14066 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
14067 || flag_openmp_simd != 0)
14068 && !seen_error ());
14070 /* This pass always runs, to provide PROP_gimple_eomp.
14071 But often, there is nothing to do. */
14072 if (!gate)
14073 return 0;
14075 return execute_expand_omp ();
14078 }; // class pass_expand_omp
14080 } // anon namespace
14082 gimple_opt_pass *
14083 make_pass_expand_omp (gcc::context *ctxt)
14085 return new pass_expand_omp (ctxt);
14088 namespace {
14090 const pass_data pass_data_expand_omp_ssa =
14092 GIMPLE_PASS, /* type */
14093 "ompexpssa", /* name */
14094 OPTGROUP_NONE, /* optinfo_flags */
14095 TV_NONE, /* tv_id */
14096 PROP_cfg | PROP_ssa, /* properties_required */
14097 PROP_gimple_eomp, /* properties_provided */
14098 0, /* properties_destroyed */
14099 0, /* todo_flags_start */
14100 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
14103 class pass_expand_omp_ssa : public gimple_opt_pass
14105 public:
14106 pass_expand_omp_ssa (gcc::context *ctxt)
14107 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
14110 /* opt_pass methods: */
14111 virtual bool gate (function *fun)
14113 return !(fun->curr_properties & PROP_gimple_eomp);
14115 virtual unsigned int execute (function *) { return execute_expand_omp (); }
14116 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
14118 }; // class pass_expand_omp_ssa
14120 } // anon namespace
14122 gimple_opt_pass *
14123 make_pass_expand_omp_ssa (gcc::context *ctxt)
14125 return new pass_expand_omp_ssa (ctxt);
14128 /* Routines to lower OMP directives into OMP-GIMPLE. */
14130 /* If ctx is a worksharing context inside of a cancellable parallel
14131 region and it isn't nowait, add lhs to its GIMPLE_OMP_RETURN
14132 and conditional branch to parallel's cancel_label to handle
14133 cancellation in the implicit barrier. */
14135 static void
14136 maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple_seq *body)
14138 gimple *omp_return = gimple_seq_last_stmt (*body);
14139 gcc_assert (gimple_code (omp_return) == GIMPLE_OMP_RETURN);
14140 if (gimple_omp_return_nowait_p (omp_return))
14141 return;
14142 if (ctx->outer
14143 && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_PARALLEL
14144 && ctx->outer->cancellable)
14146 tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL);
14147 tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl));
14148 tree lhs = create_tmp_var (c_bool_type);
14149 gimple_omp_return_set_lhs (omp_return, lhs);
14150 tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
14151 gimple *g = gimple_build_cond (NE_EXPR, lhs,
14152 fold_convert (c_bool_type,
14153 boolean_false_node),
14154 ctx->outer->cancel_label, fallthru_label);
14155 gimple_seq_add_stmt (body, g);
14156 gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
14160 /* Lower the OpenMP sections directive in the current statement in GSI_P.
14161 CTX is the enclosing OMP context for the current statement. */
14163 static void
14164 lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14166 tree block, control;
14167 gimple_stmt_iterator tgsi;
14168 gomp_sections *stmt;
14169 gimple *t;
14170 gbind *new_stmt, *bind;
14171 gimple_seq ilist, dlist, olist, new_body;
14173 stmt = as_a <gomp_sections *> (gsi_stmt (*gsi_p));
14175 push_gimplify_context ();
14177 dlist = NULL;
14178 ilist = NULL;
14179 lower_rec_input_clauses (gimple_omp_sections_clauses (stmt),
14180 &ilist, &dlist, ctx, NULL);
14182 new_body = gimple_omp_body (stmt);
14183 gimple_omp_set_body (stmt, NULL);
14184 tgsi = gsi_start (new_body);
14185 for (; !gsi_end_p (tgsi); gsi_next (&tgsi))
14187 omp_context *sctx;
14188 gimple *sec_start;
14190 sec_start = gsi_stmt (tgsi);
14191 sctx = maybe_lookup_ctx (sec_start);
14192 gcc_assert (sctx);
14194 lower_omp (gimple_omp_body_ptr (sec_start), sctx);
14195 gsi_insert_seq_after (&tgsi, gimple_omp_body (sec_start),
14196 GSI_CONTINUE_LINKING);
14197 gimple_omp_set_body (sec_start, NULL);
14199 if (gsi_one_before_end_p (tgsi))
14201 gimple_seq l = NULL;
14202 lower_lastprivate_clauses (gimple_omp_sections_clauses (stmt), NULL,
14203 &l, ctx);
14204 gsi_insert_seq_after (&tgsi, l, GSI_CONTINUE_LINKING);
14205 gimple_omp_section_set_last (sec_start);
14208 gsi_insert_after (&tgsi, gimple_build_omp_return (false),
14209 GSI_CONTINUE_LINKING);
14212 block = make_node (BLOCK);
14213 bind = gimple_build_bind (NULL, new_body, block);
14215 olist = NULL;
14216 lower_reduction_clauses (gimple_omp_sections_clauses (stmt), &olist, ctx);
14218 block = make_node (BLOCK);
14219 new_stmt = gimple_build_bind (NULL, NULL, block);
14220 gsi_replace (gsi_p, new_stmt, true);
14222 pop_gimplify_context (new_stmt);
14223 gimple_bind_append_vars (new_stmt, ctx->block_vars);
14224 BLOCK_VARS (block) = gimple_bind_vars (bind);
14225 if (BLOCK_VARS (block))
14226 TREE_USED (block) = 1;
14228 new_body = NULL;
14229 gimple_seq_add_seq (&new_body, ilist);
14230 gimple_seq_add_stmt (&new_body, stmt);
14231 gimple_seq_add_stmt (&new_body, gimple_build_omp_sections_switch ());
14232 gimple_seq_add_stmt (&new_body, bind);
14234 control = create_tmp_var (unsigned_type_node, ".section");
14235 t = gimple_build_omp_continue (control, control);
14236 gimple_omp_sections_set_control (stmt, control);
14237 gimple_seq_add_stmt (&new_body, t);
14239 gimple_seq_add_seq (&new_body, olist);
14240 if (ctx->cancellable)
14241 gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
14242 gimple_seq_add_seq (&new_body, dlist);
14244 new_body = maybe_catch_exception (new_body);
14246 t = gimple_build_omp_return
14247 (!!find_omp_clause (gimple_omp_sections_clauses (stmt),
14248 OMP_CLAUSE_NOWAIT));
14249 gimple_seq_add_stmt (&new_body, t);
14250 maybe_add_implicit_barrier_cancel (ctx, &new_body);
14252 gimple_bind_set_body (new_stmt, new_body);
14256 /* A subroutine of lower_omp_single. Expand the simple form of
14257 a GIMPLE_OMP_SINGLE, without a copyprivate clause:
14259 if (GOMP_single_start ())
14260 BODY;
14261 [ GOMP_barrier (); ] -> unless 'nowait' is present.
14263 FIXME. It may be better to delay expanding the logic of this until
14264 pass_expand_omp. The expanded logic may make the job more difficult
14265 to a synchronization analysis pass. */
14267 static void
14268 lower_omp_single_simple (gomp_single *single_stmt, gimple_seq *pre_p)
14270 location_t loc = gimple_location (single_stmt);
14271 tree tlabel = create_artificial_label (loc);
14272 tree flabel = create_artificial_label (loc);
14273 gimple *call, *cond;
14274 tree lhs, decl;
14276 decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_START);
14277 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
14278 call = gimple_build_call (decl, 0);
14279 gimple_call_set_lhs (call, lhs);
14280 gimple_seq_add_stmt (pre_p, call);
14282 cond = gimple_build_cond (EQ_EXPR, lhs,
14283 fold_convert_loc (loc, TREE_TYPE (lhs),
14284 boolean_true_node),
14285 tlabel, flabel);
14286 gimple_seq_add_stmt (pre_p, cond);
14287 gimple_seq_add_stmt (pre_p, gimple_build_label (tlabel));
14288 gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
14289 gimple_seq_add_stmt (pre_p, gimple_build_label (flabel));
14293 /* A subroutine of lower_omp_single. Expand the simple form of
14294 a GIMPLE_OMP_SINGLE, with a copyprivate clause:
14296 #pragma omp single copyprivate (a, b, c)
14298 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
14301 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
14303 BODY;
14304 copyout.a = a;
14305 copyout.b = b;
14306 copyout.c = c;
14307 GOMP_single_copy_end (&copyout);
14309 else
14311 a = copyout_p->a;
14312 b = copyout_p->b;
14313 c = copyout_p->c;
14315 GOMP_barrier ();
14318 FIXME. It may be better to delay expanding the logic of this until
14319 pass_expand_omp. The expanded logic may make the job more difficult
14320 to a synchronization analysis pass. */
14322 static void
14323 lower_omp_single_copy (gomp_single *single_stmt, gimple_seq *pre_p,
14324 omp_context *ctx)
14326 tree ptr_type, t, l0, l1, l2, bfn_decl;
14327 gimple_seq copyin_seq;
14328 location_t loc = gimple_location (single_stmt);
14330 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
14332 ptr_type = build_pointer_type (ctx->record_type);
14333 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
14335 l0 = create_artificial_label (loc);
14336 l1 = create_artificial_label (loc);
14337 l2 = create_artificial_label (loc);
14339 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_COPY_START);
14340 t = build_call_expr_loc (loc, bfn_decl, 0);
14341 t = fold_convert_loc (loc, ptr_type, t);
14342 gimplify_assign (ctx->receiver_decl, t, pre_p);
14344 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
14345 build_int_cst (ptr_type, 0));
14346 t = build3 (COND_EXPR, void_type_node, t,
14347 build_and_jump (&l0), build_and_jump (&l1));
14348 gimplify_and_add (t, pre_p);
14350 gimple_seq_add_stmt (pre_p, gimple_build_label (l0));
14352 gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
14354 copyin_seq = NULL;
14355 lower_copyprivate_clauses (gimple_omp_single_clauses (single_stmt), pre_p,
14356 &copyin_seq, ctx);
14358 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
14359 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_COPY_END);
14360 t = build_call_expr_loc (loc, bfn_decl, 1, t);
14361 gimplify_and_add (t, pre_p);
14363 t = build_and_jump (&l2);
14364 gimplify_and_add (t, pre_p);
14366 gimple_seq_add_stmt (pre_p, gimple_build_label (l1));
14368 gimple_seq_add_seq (pre_p, copyin_seq);
14370 gimple_seq_add_stmt (pre_p, gimple_build_label (l2));
14374 /* Expand code for an OpenMP single directive. */
14376 static void
14377 lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14379 tree block;
14380 gimple *t;
14381 gomp_single *single_stmt = as_a <gomp_single *> (gsi_stmt (*gsi_p));
14382 gbind *bind;
14383 gimple_seq bind_body, bind_body_tail = NULL, dlist;
14385 push_gimplify_context ();
14387 block = make_node (BLOCK);
14388 bind = gimple_build_bind (NULL, NULL, block);
14389 gsi_replace (gsi_p, bind, true);
14390 bind_body = NULL;
14391 dlist = NULL;
14392 lower_rec_input_clauses (gimple_omp_single_clauses (single_stmt),
14393 &bind_body, &dlist, ctx, NULL);
14394 lower_omp (gimple_omp_body_ptr (single_stmt), ctx);
14396 gimple_seq_add_stmt (&bind_body, single_stmt);
14398 if (ctx->record_type)
14399 lower_omp_single_copy (single_stmt, &bind_body, ctx);
14400 else
14401 lower_omp_single_simple (single_stmt, &bind_body);
14403 gimple_omp_set_body (single_stmt, NULL);
14405 gimple_seq_add_seq (&bind_body, dlist);
14407 bind_body = maybe_catch_exception (bind_body);
14409 t = gimple_build_omp_return
14410 (!!find_omp_clause (gimple_omp_single_clauses (single_stmt),
14411 OMP_CLAUSE_NOWAIT));
14412 gimple_seq_add_stmt (&bind_body_tail, t);
14413 maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail);
14414 if (ctx->record_type)
14416 gimple_stmt_iterator gsi = gsi_start (bind_body_tail);
14417 tree clobber = build_constructor (ctx->record_type, NULL);
14418 TREE_THIS_VOLATILE (clobber) = 1;
14419 gsi_insert_after (&gsi, gimple_build_assign (ctx->sender_decl,
14420 clobber), GSI_SAME_STMT);
14422 gimple_seq_add_seq (&bind_body, bind_body_tail);
14423 gimple_bind_set_body (bind, bind_body);
14425 pop_gimplify_context (bind);
14427 gimple_bind_append_vars (bind, ctx->block_vars);
14428 BLOCK_VARS (block) = ctx->block_vars;
14429 if (BLOCK_VARS (block))
14430 TREE_USED (block) = 1;
14434 /* Expand code for an OpenMP master directive. */
14436 static void
14437 lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14439 tree block, lab = NULL, x, bfn_decl;
14440 gimple *stmt = gsi_stmt (*gsi_p);
14441 gbind *bind;
14442 location_t loc = gimple_location (stmt);
14443 gimple_seq tseq;
14445 push_gimplify_context ();
14447 block = make_node (BLOCK);
14448 bind = gimple_build_bind (NULL, NULL, block);
14449 gsi_replace (gsi_p, bind, true);
14450 gimple_bind_add_stmt (bind, stmt);
14452 bfn_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
14453 x = build_call_expr_loc (loc, bfn_decl, 0);
14454 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
14455 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
14456 tseq = NULL;
14457 gimplify_and_add (x, &tseq);
14458 gimple_bind_add_seq (bind, tseq);
14460 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14461 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
14462 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14463 gimple_omp_set_body (stmt, NULL);
14465 gimple_bind_add_stmt (bind, gimple_build_label (lab));
14467 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14469 pop_gimplify_context (bind);
14471 gimple_bind_append_vars (bind, ctx->block_vars);
14472 BLOCK_VARS (block) = ctx->block_vars;
14476 /* Expand code for an OpenMP taskgroup directive. */
14478 static void
14479 lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14481 gimple *stmt = gsi_stmt (*gsi_p);
14482 gcall *x;
14483 gbind *bind;
14484 tree block = make_node (BLOCK);
14486 bind = gimple_build_bind (NULL, NULL, block);
14487 gsi_replace (gsi_p, bind, true);
14488 gimple_bind_add_stmt (bind, stmt);
14490 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_START),
14492 gimple_bind_add_stmt (bind, x);
14494 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14495 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14496 gimple_omp_set_body (stmt, NULL);
14498 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14500 gimple_bind_append_vars (bind, ctx->block_vars);
14501 BLOCK_VARS (block) = ctx->block_vars;
14505 /* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible. */
14507 static void
14508 lower_omp_ordered_clauses (gimple_stmt_iterator *gsi_p, gomp_ordered *ord_stmt,
14509 omp_context *ctx)
14511 struct omp_for_data fd;
14512 if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
14513 return;
14515 unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt);
14516 struct omp_for_data_loop *loops = XALLOCAVEC (struct omp_for_data_loop, len);
14517 extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
14518 if (!fd.ordered)
14519 return;
14521 tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
14522 tree c = gimple_omp_ordered_clauses (ord_stmt);
14523 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
14524 && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
14526 /* Merge depend clauses from multiple adjacent
14527 #pragma omp ordered depend(sink:...) constructs
14528 into one #pragma omp ordered depend(sink:...), so that
14529 we can optimize them together. */
14530 gimple_stmt_iterator gsi = *gsi_p;
14531 gsi_next (&gsi);
14532 while (!gsi_end_p (gsi))
14534 gimple *stmt = gsi_stmt (gsi);
14535 if (is_gimple_debug (stmt)
14536 || gimple_code (stmt) == GIMPLE_NOP)
14538 gsi_next (&gsi);
14539 continue;
14541 if (gimple_code (stmt) != GIMPLE_OMP_ORDERED)
14542 break;
14543 gomp_ordered *ord_stmt2 = as_a <gomp_ordered *> (stmt);
14544 c = gimple_omp_ordered_clauses (ord_stmt2);
14545 if (c == NULL_TREE
14546 || OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
14547 || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
14548 break;
14549 while (*list_p)
14550 list_p = &OMP_CLAUSE_CHAIN (*list_p);
14551 *list_p = c;
14552 gsi_remove (&gsi, true);
14556 /* Canonicalize sink dependence clauses into one folded clause if
14557 possible.
14559 The basic algorithm is to create a sink vector whose first
14560 element is the GCD of all the first elements, and whose remaining
14561 elements are the minimum of the subsequent columns.
14563 We ignore dependence vectors whose first element is zero because
14564 such dependencies are known to be executed by the same thread.
14566 We take into account the direction of the loop, so a minimum
14567 becomes a maximum if the loop is iterating forwards. We also
14568 ignore sink clauses where the loop direction is unknown, or where
14569 the offsets are clearly invalid because they are not a multiple
14570 of the loop increment.
14572 For example:
14574 #pragma omp for ordered(2)
14575 for (i=0; i < N; ++i)
14576 for (j=0; j < M; ++j)
14578 #pragma omp ordered \
14579 depend(sink:i-8,j-2) \
14580 depend(sink:i,j-1) \ // Completely ignored because i+0.
14581 depend(sink:i-4,j-3) \
14582 depend(sink:i-6,j-4)
14583 #pragma omp ordered depend(source)
14586 Folded clause is:
14588 depend(sink:-gcd(8,4,6),-min(2,3,4))
14589 -or-
14590 depend(sink:-2,-2)
14593 /* FIXME: Computing GCD's where the first element is zero is
14594 non-trivial in the presence of collapsed loops. Do this later. */
14595 if (fd.collapse > 1)
14596 return;
14598 wide_int *folded_deps = XALLOCAVEC (wide_int, 2 * len - 1);
14599 memset (folded_deps, 0, sizeof (*folded_deps) * (2 * len - 1));
14600 tree folded_dep = NULL_TREE;
14601 /* TRUE if the first dimension's offset is negative. */
14602 bool neg_offset_p = false;
14604 list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
14605 unsigned int i;
14606 while ((c = *list_p) != NULL)
14608 bool remove = false;
14610 gcc_assert (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND);
14611 if (OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
14612 goto next_ordered_clause;
14614 tree vec;
14615 for (vec = OMP_CLAUSE_DECL (c), i = 0;
14616 vec && TREE_CODE (vec) == TREE_LIST;
14617 vec = TREE_CHAIN (vec), ++i)
14619 gcc_assert (i < len);
14621 /* extract_omp_for_data has canonicalized the condition. */
14622 gcc_assert (fd.loops[i].cond_code == LT_EXPR
14623 || fd.loops[i].cond_code == GT_EXPR);
14624 bool forward = fd.loops[i].cond_code == LT_EXPR;
14625 bool maybe_lexically_later = true;
14627 /* While the committee makes up its mind, bail if we have any
14628 non-constant steps. */
14629 if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
14630 goto lower_omp_ordered_ret;
14632 tree itype = TREE_TYPE (TREE_VALUE (vec));
14633 if (POINTER_TYPE_P (itype))
14634 itype = sizetype;
14635 wide_int offset = wide_int::from (TREE_PURPOSE (vec),
14636 TYPE_PRECISION (itype),
14637 TYPE_SIGN (itype));
14639 /* Ignore invalid offsets that are not multiples of the step. */
14640 if (!wi::multiple_of_p
14641 (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
14642 UNSIGNED))
14644 warning_at (OMP_CLAUSE_LOCATION (c), 0,
14645 "ignoring sink clause with offset that is not "
14646 "a multiple of the loop step");
14647 remove = true;
14648 goto next_ordered_clause;
14651 /* Calculate the first dimension. The first dimension of
14652 the folded dependency vector is the GCD of the first
14653 elements, while ignoring any first elements whose offset
14654 is 0. */
14655 if (i == 0)
14657 /* Ignore dependence vectors whose first dimension is 0. */
14658 if (offset == 0)
14660 remove = true;
14661 goto next_ordered_clause;
14663 else
14665 if (!TYPE_UNSIGNED (itype) && (forward ^ wi::neg_p (offset)))
14667 error_at (OMP_CLAUSE_LOCATION (c),
14668 "first offset must be in opposite direction "
14669 "of loop iterations");
14670 goto lower_omp_ordered_ret;
14672 if (forward)
14673 offset = -offset;
14674 neg_offset_p = forward;
14675 /* Initialize the first time around. */
14676 if (folded_dep == NULL_TREE)
14678 folded_dep = c;
14679 folded_deps[0] = offset;
14681 else
14682 folded_deps[0] = wi::gcd (folded_deps[0],
14683 offset, UNSIGNED);
14686 /* Calculate minimum for the remaining dimensions. */
14687 else
14689 folded_deps[len + i - 1] = offset;
14690 if (folded_dep == c)
14691 folded_deps[i] = offset;
14692 else if (maybe_lexically_later
14693 && !wi::eq_p (folded_deps[i], offset))
14695 if (forward ^ wi::gts_p (folded_deps[i], offset))
14697 unsigned int j;
14698 folded_dep = c;
14699 for (j = 1; j <= i; j++)
14700 folded_deps[j] = folded_deps[len + j - 1];
14702 else
14703 maybe_lexically_later = false;
14707 gcc_assert (i == len);
14709 remove = true;
14711 next_ordered_clause:
14712 if (remove)
14713 *list_p = OMP_CLAUSE_CHAIN (c);
14714 else
14715 list_p = &OMP_CLAUSE_CHAIN (c);
14718 if (folded_dep)
14720 if (neg_offset_p)
14721 folded_deps[0] = -folded_deps[0];
14723 tree itype = TREE_TYPE (TREE_VALUE (OMP_CLAUSE_DECL (folded_dep)));
14724 if (POINTER_TYPE_P (itype))
14725 itype = sizetype;
14727 TREE_PURPOSE (OMP_CLAUSE_DECL (folded_dep))
14728 = wide_int_to_tree (itype, folded_deps[0]);
14729 OMP_CLAUSE_CHAIN (folded_dep) = gimple_omp_ordered_clauses (ord_stmt);
14730 *gimple_omp_ordered_clauses_ptr (ord_stmt) = folded_dep;
14733 lower_omp_ordered_ret:
14735 /* Ordered without clauses is #pragma omp threads, while we want
14736 a nop instead if we remove all clauses. */
14737 if (gimple_omp_ordered_clauses (ord_stmt) == NULL_TREE)
14738 gsi_replace (gsi_p, gimple_build_nop (), true);
14742 /* Expand code for an OpenMP ordered directive. */
14744 static void
14745 lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14747 tree block;
14748 gimple *stmt = gsi_stmt (*gsi_p);
14749 gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt);
14750 gcall *x;
14751 gbind *bind;
14752 bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
14753 OMP_CLAUSE_SIMD);
14754 bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
14755 OMP_CLAUSE_THREADS);
14757 if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
14758 OMP_CLAUSE_DEPEND))
14760 /* FIXME: This is needs to be moved to the expansion to verify various
14761 conditions only testable on cfg with dominators computed, and also
14762 all the depend clauses to be merged still might need to be available
14763 for the runtime checks. */
14764 if (0)
14765 lower_omp_ordered_clauses (gsi_p, ord_stmt, ctx);
14766 return;
14769 push_gimplify_context ();
14771 block = make_node (BLOCK);
14772 bind = gimple_build_bind (NULL, NULL, block);
14773 gsi_replace (gsi_p, bind, true);
14774 gimple_bind_add_stmt (bind, stmt);
14776 if (simd)
14778 x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 1,
14779 build_int_cst (NULL_TREE, threads));
14780 cfun->has_simduid_loops = true;
14782 else
14783 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START),
14785 gimple_bind_add_stmt (bind, x);
14787 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14788 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
14789 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14790 gimple_omp_set_body (stmt, NULL);
14792 if (simd)
14793 x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 1,
14794 build_int_cst (NULL_TREE, threads));
14795 else
14796 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END),
14798 gimple_bind_add_stmt (bind, x);
14800 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14802 pop_gimplify_context (bind);
14804 gimple_bind_append_vars (bind, ctx->block_vars);
14805 BLOCK_VARS (block) = gimple_bind_vars (bind);
14809 /* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple
14810 substitution of a couple of function calls. But in the NAMED case,
14811 requires that languages coordinate a symbol name. It is therefore
14812 best put here in common code. */
14814 static GTY(()) hash_map<tree, tree> *critical_name_mutexes;
14816 static void
14817 lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14819 tree block;
14820 tree name, lock, unlock;
14821 gomp_critical *stmt = as_a <gomp_critical *> (gsi_stmt (*gsi_p));
14822 gbind *bind;
14823 location_t loc = gimple_location (stmt);
14824 gimple_seq tbody;
14826 name = gimple_omp_critical_name (stmt);
14827 if (name)
14829 tree decl;
14831 if (!critical_name_mutexes)
14832 critical_name_mutexes = hash_map<tree, tree>::create_ggc (10);
14834 tree *n = critical_name_mutexes->get (name);
14835 if (n == NULL)
14837 char *new_str;
14839 decl = create_tmp_var_raw (ptr_type_node);
14841 new_str = ACONCAT ((".gomp_critical_user_",
14842 IDENTIFIER_POINTER (name), NULL));
14843 DECL_NAME (decl) = get_identifier (new_str);
14844 TREE_PUBLIC (decl) = 1;
14845 TREE_STATIC (decl) = 1;
14846 DECL_COMMON (decl) = 1;
14847 DECL_ARTIFICIAL (decl) = 1;
14848 DECL_IGNORED_P (decl) = 1;
14850 varpool_node::finalize_decl (decl);
14852 critical_name_mutexes->put (name, decl);
14854 else
14855 decl = *n;
14857 /* If '#pragma omp critical' is inside offloaded region or
14858 inside function marked as offloadable, the symbol must be
14859 marked as offloadable too. */
14860 omp_context *octx;
14861 if (cgraph_node::get (current_function_decl)->offloadable)
14862 varpool_node::get_create (decl)->offloadable = 1;
14863 else
14864 for (octx = ctx->outer; octx; octx = octx->outer)
14865 if (is_gimple_omp_offloaded (octx->stmt))
14867 varpool_node::get_create (decl)->offloadable = 1;
14868 break;
14871 lock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_NAME_START);
14872 lock = build_call_expr_loc (loc, lock, 1, build_fold_addr_expr_loc (loc, decl));
14874 unlock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_NAME_END);
14875 unlock = build_call_expr_loc (loc, unlock, 1,
14876 build_fold_addr_expr_loc (loc, decl));
14878 else
14880 lock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_START);
14881 lock = build_call_expr_loc (loc, lock, 0);
14883 unlock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_END);
14884 unlock = build_call_expr_loc (loc, unlock, 0);
14887 push_gimplify_context ();
14889 block = make_node (BLOCK);
14890 bind = gimple_build_bind (NULL, NULL, block);
14891 gsi_replace (gsi_p, bind, true);
14892 gimple_bind_add_stmt (bind, stmt);
14894 tbody = gimple_bind_body (bind);
14895 gimplify_and_add (lock, &tbody);
14896 gimple_bind_set_body (bind, tbody);
14898 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14899 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
14900 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14901 gimple_omp_set_body (stmt, NULL);
14903 tbody = gimple_bind_body (bind);
14904 gimplify_and_add (unlock, &tbody);
14905 gimple_bind_set_body (bind, tbody);
14907 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14909 pop_gimplify_context (bind);
14910 gimple_bind_append_vars (bind, ctx->block_vars);
14911 BLOCK_VARS (block) = gimple_bind_vars (bind);
14915 /* A subroutine of lower_omp_for. Generate code to emit the predicate
14916 for a lastprivate clause. Given a loop control predicate of (V
14917 cond N2), we gate the clause on (!(V cond N2)). The lowered form
14918 is appended to *DLIST, iterator initialization is appended to
14919 *BODY_P. */
14921 static void
14922 lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
14923 gimple_seq *dlist, struct omp_context *ctx)
14925 tree clauses, cond, vinit;
14926 enum tree_code cond_code;
14927 gimple_seq stmts;
14929 cond_code = fd->loop.cond_code;
14930 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
14932 /* When possible, use a strict equality expression. This can let VRP
14933 type optimizations deduce the value and remove a copy. */
14934 if (tree_fits_shwi_p (fd->loop.step))
14936 HOST_WIDE_INT step = tree_to_shwi (fd->loop.step);
14937 if (step == 1 || step == -1)
14938 cond_code = EQ_EXPR;
14941 tree n2 = fd->loop.n2;
14942 if (fd->collapse > 1
14943 && TREE_CODE (n2) != INTEGER_CST
14944 && gimple_omp_for_combined_into_p (fd->for_stmt))
14946 struct omp_context *taskreg_ctx = NULL;
14947 if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
14949 gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
14950 if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR
14951 || gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_DISTRIBUTE)
14953 if (gimple_omp_for_combined_into_p (gfor))
14955 gcc_assert (ctx->outer->outer
14956 && is_parallel_ctx (ctx->outer->outer));
14957 taskreg_ctx = ctx->outer->outer;
14959 else
14961 struct omp_for_data outer_fd;
14962 extract_omp_for_data (gfor, &outer_fd, NULL);
14963 n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
14966 else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
14967 taskreg_ctx = ctx->outer->outer;
14969 else if (is_taskreg_ctx (ctx->outer))
14970 taskreg_ctx = ctx->outer;
14971 if (taskreg_ctx)
14973 int i;
14974 tree innerc
14975 = find_omp_clause (gimple_omp_taskreg_clauses (taskreg_ctx->stmt),
14976 OMP_CLAUSE__LOOPTEMP_);
14977 gcc_assert (innerc);
14978 for (i = 0; i < fd->collapse; i++)
14980 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
14981 OMP_CLAUSE__LOOPTEMP_);
14982 gcc_assert (innerc);
14984 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
14985 OMP_CLAUSE__LOOPTEMP_);
14986 if (innerc)
14987 n2 = fold_convert (TREE_TYPE (n2),
14988 lookup_decl (OMP_CLAUSE_DECL (innerc),
14989 taskreg_ctx));
14992 cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
14994 clauses = gimple_omp_for_clauses (fd->for_stmt);
14995 stmts = NULL;
14996 lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
14997 if (!gimple_seq_empty_p (stmts))
14999 gimple_seq_add_seq (&stmts, *dlist);
15000 *dlist = stmts;
15002 /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
15003 vinit = fd->loop.n1;
15004 if (cond_code == EQ_EXPR
15005 && tree_fits_shwi_p (fd->loop.n2)
15006 && ! integer_zerop (fd->loop.n2))
15007 vinit = build_int_cst (TREE_TYPE (fd->loop.v), 0);
15008 else
15009 vinit = unshare_expr (vinit);
15011 /* Initialize the iterator variable, so that threads that don't execute
15012 any iterations don't execute the lastprivate clauses by accident. */
15013 gimplify_assign (fd->loop.v, vinit, body_p);
15018 /* Lower code for an OMP loop directive. */
15020 static void
15021 lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15023 tree *rhs_p, block;
15024 struct omp_for_data fd, *fdp = NULL;
15025 gomp_for *stmt = as_a <gomp_for *> (gsi_stmt (*gsi_p));
15026 gbind *new_stmt;
15027 gimple_seq omp_for_body, body, dlist;
15028 gimple_seq oacc_head = NULL, oacc_tail = NULL;
15029 size_t i;
15031 push_gimplify_context ();
15033 lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
15035 block = make_node (BLOCK);
15036 new_stmt = gimple_build_bind (NULL, NULL, block);
15037 /* Replace at gsi right away, so that 'stmt' is no member
15038 of a sequence anymore as we're going to add to a different
15039 one below. */
15040 gsi_replace (gsi_p, new_stmt, true);
15042 /* Move declaration of temporaries in the loop body before we make
15043 it go away. */
15044 omp_for_body = gimple_omp_body (stmt);
15045 if (!gimple_seq_empty_p (omp_for_body)
15046 && gimple_code (gimple_seq_first_stmt (omp_for_body)) == GIMPLE_BIND)
15048 gbind *inner_bind
15049 = as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
15050 tree vars = gimple_bind_vars (inner_bind);
15051 gimple_bind_append_vars (new_stmt, vars);
15052 /* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
15053 keep them on the inner_bind and it's block. */
15054 gimple_bind_set_vars (inner_bind, NULL_TREE);
15055 if (gimple_bind_block (inner_bind))
15056 BLOCK_VARS (gimple_bind_block (inner_bind)) = NULL_TREE;
15059 if (gimple_omp_for_combined_into_p (stmt))
15061 extract_omp_for_data (stmt, &fd, NULL);
15062 fdp = &fd;
15064 /* We need two temporaries with fd.loop.v type (istart/iend)
15065 and then (fd.collapse - 1) temporaries with the same
15066 type for count2 ... countN-1 vars if not constant. */
15067 size_t count = 2;
15068 tree type = fd.iter_type;
15069 if (fd.collapse > 1
15070 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
15071 count += fd.collapse - 1;
15072 bool taskreg_for
15073 = (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR
15074 || gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP);
15075 tree outerc = NULL, *pc = gimple_omp_for_clauses_ptr (stmt);
15076 tree clauses = *pc;
15077 if (taskreg_for)
15078 outerc
15079 = find_omp_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt),
15080 OMP_CLAUSE__LOOPTEMP_);
15081 for (i = 0; i < count; i++)
15083 tree temp;
15084 if (taskreg_for)
15086 gcc_assert (outerc);
15087 temp = lookup_decl (OMP_CLAUSE_DECL (outerc), ctx->outer);
15088 outerc = find_omp_clause (OMP_CLAUSE_CHAIN (outerc),
15089 OMP_CLAUSE__LOOPTEMP_);
15091 else
15093 temp = create_tmp_var (type);
15094 insert_decl_map (&ctx->outer->cb, temp, temp);
15096 *pc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
15097 OMP_CLAUSE_DECL (*pc) = temp;
15098 pc = &OMP_CLAUSE_CHAIN (*pc);
15100 *pc = clauses;
15103 /* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR. */
15104 dlist = NULL;
15105 body = NULL;
15106 lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx,
15107 fdp);
15108 gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt));
15110 lower_omp (gimple_omp_body_ptr (stmt), ctx);
15112 /* Lower the header expressions. At this point, we can assume that
15113 the header is of the form:
15115 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
15117 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
15118 using the .omp_data_s mapping, if needed. */
15119 for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
15121 rhs_p = gimple_omp_for_initial_ptr (stmt, i);
15122 if (!is_gimple_min_invariant (*rhs_p))
15123 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15125 rhs_p = gimple_omp_for_final_ptr (stmt, i);
15126 if (!is_gimple_min_invariant (*rhs_p))
15127 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15129 rhs_p = &TREE_OPERAND (gimple_omp_for_incr (stmt, i), 1);
15130 if (!is_gimple_min_invariant (*rhs_p))
15131 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15134 /* Once lowered, extract the bounds and clauses. */
15135 extract_omp_for_data (stmt, &fd, NULL);
15137 if (is_gimple_omp_oacc (ctx->stmt)
15138 && !ctx_in_oacc_kernels_region (ctx))
15139 lower_oacc_head_tail (gimple_location (stmt),
15140 gimple_omp_for_clauses (stmt),
15141 &oacc_head, &oacc_tail, ctx);
15143 /* Add OpenACC partitioning and reduction markers just before the loop */
15144 if (oacc_head)
15145 gimple_seq_add_seq (&body, oacc_head);
15147 lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
15149 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
15150 for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
15151 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
15152 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
15154 OMP_CLAUSE_DECL (c) = lookup_decl (OMP_CLAUSE_DECL (c), ctx);
15155 if (DECL_P (OMP_CLAUSE_LINEAR_STEP (c)))
15156 OMP_CLAUSE_LINEAR_STEP (c)
15157 = maybe_lookup_decl_in_outer_ctx (OMP_CLAUSE_LINEAR_STEP (c),
15158 ctx);
15161 if (!gimple_omp_for_grid_phony (stmt))
15162 gimple_seq_add_stmt (&body, stmt);
15163 gimple_seq_add_seq (&body, gimple_omp_body (stmt));
15165 if (!gimple_omp_for_grid_phony (stmt))
15166 gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
15167 fd.loop.v));
15169 /* After the loop, add exit clauses. */
15170 lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, ctx);
15172 if (ctx->cancellable)
15173 gimple_seq_add_stmt (&body, gimple_build_label (ctx->cancel_label));
15175 gimple_seq_add_seq (&body, dlist);
15177 body = maybe_catch_exception (body);
15179 if (!gimple_omp_for_grid_phony (stmt))
15181 /* Region exit marker goes at the end of the loop body. */
15182 gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
15183 maybe_add_implicit_barrier_cancel (ctx, &body);
15186 /* Add OpenACC joining and reduction markers just after the loop. */
15187 if (oacc_tail)
15188 gimple_seq_add_seq (&body, oacc_tail);
15190 pop_gimplify_context (new_stmt);
15192 gimple_bind_append_vars (new_stmt, ctx->block_vars);
15193 BLOCK_VARS (block) = gimple_bind_vars (new_stmt);
15194 if (BLOCK_VARS (block))
15195 TREE_USED (block) = 1;
15197 gimple_bind_set_body (new_stmt, body);
15198 gimple_omp_set_body (stmt, NULL);
15199 gimple_omp_for_set_pre_body (stmt, NULL);
15202 /* Callback for walk_stmts. Check if the current statement only contains
15203 GIMPLE_OMP_FOR or GIMPLE_OMP_SECTIONS. */
15205 static tree
15206 check_combined_parallel (gimple_stmt_iterator *gsi_p,
15207 bool *handled_ops_p,
15208 struct walk_stmt_info *wi)
15210 int *info = (int *) wi->info;
15211 gimple *stmt = gsi_stmt (*gsi_p);
15213 *handled_ops_p = true;
15214 switch (gimple_code (stmt))
15216 WALK_SUBSTMTS;
15218 case GIMPLE_OMP_FOR:
15219 case GIMPLE_OMP_SECTIONS:
15220 *info = *info == 0 ? 1 : -1;
15221 break;
15222 default:
15223 *info = -1;
15224 break;
15226 return NULL;
15229 struct omp_taskcopy_context
15231 /* This field must be at the beginning, as we do "inheritance": Some
15232 callback functions for tree-inline.c (e.g., omp_copy_decl)
15233 receive a copy_body_data pointer that is up-casted to an
15234 omp_context pointer. */
15235 copy_body_data cb;
15236 omp_context *ctx;
15239 static tree
15240 task_copyfn_copy_decl (tree var, copy_body_data *cb)
15242 struct omp_taskcopy_context *tcctx = (struct omp_taskcopy_context *) cb;
15244 if (splay_tree_lookup (tcctx->ctx->sfield_map, (splay_tree_key) var))
15245 return create_tmp_var (TREE_TYPE (var));
15247 return var;
15250 static tree
15251 task_copyfn_remap_type (struct omp_taskcopy_context *tcctx, tree orig_type)
15253 tree name, new_fields = NULL, type, f;
15255 type = lang_hooks.types.make_type (RECORD_TYPE);
15256 name = DECL_NAME (TYPE_NAME (orig_type));
15257 name = build_decl (gimple_location (tcctx->ctx->stmt),
15258 TYPE_DECL, name, type);
15259 TYPE_NAME (type) = name;
15261 for (f = TYPE_FIELDS (orig_type); f ; f = TREE_CHAIN (f))
15263 tree new_f = copy_node (f);
15264 DECL_CONTEXT (new_f) = type;
15265 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &tcctx->cb);
15266 TREE_CHAIN (new_f) = new_fields;
15267 walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &tcctx->cb, NULL);
15268 walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r, &tcctx->cb, NULL);
15269 walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
15270 &tcctx->cb, NULL);
15271 new_fields = new_f;
15272 tcctx->cb.decl_map->put (f, new_f);
15274 TYPE_FIELDS (type) = nreverse (new_fields);
15275 layout_type (type);
15276 return type;
15279 /* Create task copyfn. */
15281 static void
15282 create_task_copyfn (gomp_task *task_stmt, omp_context *ctx)
15284 struct function *child_cfun;
15285 tree child_fn, t, c, src, dst, f, sf, arg, sarg, decl;
15286 tree record_type, srecord_type, bind, list;
15287 bool record_needs_remap = false, srecord_needs_remap = false;
15288 splay_tree_node n;
15289 struct omp_taskcopy_context tcctx;
15290 location_t loc = gimple_location (task_stmt);
15292 child_fn = gimple_omp_task_copy_fn (task_stmt);
15293 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
15294 gcc_assert (child_cfun->cfg == NULL);
15295 DECL_SAVED_TREE (child_fn) = alloc_stmt_list ();
15297 /* Reset DECL_CONTEXT on function arguments. */
15298 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
15299 DECL_CONTEXT (t) = child_fn;
15301 /* Populate the function. */
15302 push_gimplify_context ();
15303 push_cfun (child_cfun);
15305 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
15306 TREE_SIDE_EFFECTS (bind) = 1;
15307 list = NULL;
15308 DECL_SAVED_TREE (child_fn) = bind;
15309 DECL_SOURCE_LOCATION (child_fn) = gimple_location (task_stmt);
15311 /* Remap src and dst argument types if needed. */
15312 record_type = ctx->record_type;
15313 srecord_type = ctx->srecord_type;
15314 for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
15315 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
15317 record_needs_remap = true;
15318 break;
15320 for (f = TYPE_FIELDS (srecord_type); f ; f = DECL_CHAIN (f))
15321 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
15323 srecord_needs_remap = true;
15324 break;
15327 if (record_needs_remap || srecord_needs_remap)
15329 memset (&tcctx, '\0', sizeof (tcctx));
15330 tcctx.cb.src_fn = ctx->cb.src_fn;
15331 tcctx.cb.dst_fn = child_fn;
15332 tcctx.cb.src_node = cgraph_node::get (tcctx.cb.src_fn);
15333 gcc_checking_assert (tcctx.cb.src_node);
15334 tcctx.cb.dst_node = tcctx.cb.src_node;
15335 tcctx.cb.src_cfun = ctx->cb.src_cfun;
15336 tcctx.cb.copy_decl = task_copyfn_copy_decl;
15337 tcctx.cb.eh_lp_nr = 0;
15338 tcctx.cb.transform_call_graph_edges = CB_CGE_MOVE;
15339 tcctx.cb.decl_map = new hash_map<tree, tree>;
15340 tcctx.ctx = ctx;
15342 if (record_needs_remap)
15343 record_type = task_copyfn_remap_type (&tcctx, record_type);
15344 if (srecord_needs_remap)
15345 srecord_type = task_copyfn_remap_type (&tcctx, srecord_type);
15347 else
15348 tcctx.cb.decl_map = NULL;
15350 arg = DECL_ARGUMENTS (child_fn);
15351 TREE_TYPE (arg) = build_pointer_type (record_type);
15352 sarg = DECL_CHAIN (arg);
15353 TREE_TYPE (sarg) = build_pointer_type (srecord_type);
15355 /* First pass: initialize temporaries used in record_type and srecord_type
15356 sizes and field offsets. */
15357 if (tcctx.cb.decl_map)
15358 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15359 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15361 tree *p;
15363 decl = OMP_CLAUSE_DECL (c);
15364 p = tcctx.cb.decl_map->get (decl);
15365 if (p == NULL)
15366 continue;
15367 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15368 sf = (tree) n->value;
15369 sf = *tcctx.cb.decl_map->get (sf);
15370 src = build_simple_mem_ref_loc (loc, sarg);
15371 src = omp_build_component_ref (src, sf);
15372 t = build2 (MODIFY_EXPR, TREE_TYPE (*p), *p, src);
15373 append_to_statement_list (t, &list);
15376 /* Second pass: copy shared var pointers and copy construct non-VLA
15377 firstprivate vars. */
15378 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15379 switch (OMP_CLAUSE_CODE (c))
15381 splay_tree_key key;
15382 case OMP_CLAUSE_SHARED:
15383 decl = OMP_CLAUSE_DECL (c);
15384 key = (splay_tree_key) decl;
15385 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
15386 key = (splay_tree_key) &DECL_UID (decl);
15387 n = splay_tree_lookup (ctx->field_map, key);
15388 if (n == NULL)
15389 break;
15390 f = (tree) n->value;
15391 if (tcctx.cb.decl_map)
15392 f = *tcctx.cb.decl_map->get (f);
15393 n = splay_tree_lookup (ctx->sfield_map, key);
15394 sf = (tree) n->value;
15395 if (tcctx.cb.decl_map)
15396 sf = *tcctx.cb.decl_map->get (sf);
15397 src = build_simple_mem_ref_loc (loc, sarg);
15398 src = omp_build_component_ref (src, sf);
15399 dst = build_simple_mem_ref_loc (loc, arg);
15400 dst = omp_build_component_ref (dst, f);
15401 t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
15402 append_to_statement_list (t, &list);
15403 break;
15404 case OMP_CLAUSE_FIRSTPRIVATE:
15405 decl = OMP_CLAUSE_DECL (c);
15406 if (is_variable_sized (decl))
15407 break;
15408 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15409 if (n == NULL)
15410 break;
15411 f = (tree) n->value;
15412 if (tcctx.cb.decl_map)
15413 f = *tcctx.cb.decl_map->get (f);
15414 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15415 if (n != NULL)
15417 sf = (tree) n->value;
15418 if (tcctx.cb.decl_map)
15419 sf = *tcctx.cb.decl_map->get (sf);
15420 src = build_simple_mem_ref_loc (loc, sarg);
15421 src = omp_build_component_ref (src, sf);
15422 if (use_pointer_for_field (decl, NULL) || is_reference (decl))
15423 src = build_simple_mem_ref_loc (loc, src);
15425 else
15426 src = decl;
15427 dst = build_simple_mem_ref_loc (loc, arg);
15428 dst = omp_build_component_ref (dst, f);
15429 t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
15430 append_to_statement_list (t, &list);
15431 break;
15432 case OMP_CLAUSE_PRIVATE:
15433 if (! OMP_CLAUSE_PRIVATE_OUTER_REF (c))
15434 break;
15435 decl = OMP_CLAUSE_DECL (c);
15436 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15437 f = (tree) n->value;
15438 if (tcctx.cb.decl_map)
15439 f = *tcctx.cb.decl_map->get (f);
15440 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15441 if (n != NULL)
15443 sf = (tree) n->value;
15444 if (tcctx.cb.decl_map)
15445 sf = *tcctx.cb.decl_map->get (sf);
15446 src = build_simple_mem_ref_loc (loc, sarg);
15447 src = omp_build_component_ref (src, sf);
15448 if (use_pointer_for_field (decl, NULL))
15449 src = build_simple_mem_ref_loc (loc, src);
15451 else
15452 src = decl;
15453 dst = build_simple_mem_ref_loc (loc, arg);
15454 dst = omp_build_component_ref (dst, f);
15455 t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
15456 append_to_statement_list (t, &list);
15457 break;
15458 default:
15459 break;
15462 /* Last pass: handle VLA firstprivates. */
15463 if (tcctx.cb.decl_map)
15464 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15465 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15467 tree ind, ptr, df;
15469 decl = OMP_CLAUSE_DECL (c);
15470 if (!is_variable_sized (decl))
15471 continue;
15472 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15473 if (n == NULL)
15474 continue;
15475 f = (tree) n->value;
15476 f = *tcctx.cb.decl_map->get (f);
15477 gcc_assert (DECL_HAS_VALUE_EXPR_P (decl));
15478 ind = DECL_VALUE_EXPR (decl);
15479 gcc_assert (TREE_CODE (ind) == INDIRECT_REF);
15480 gcc_assert (DECL_P (TREE_OPERAND (ind, 0)));
15481 n = splay_tree_lookup (ctx->sfield_map,
15482 (splay_tree_key) TREE_OPERAND (ind, 0));
15483 sf = (tree) n->value;
15484 sf = *tcctx.cb.decl_map->get (sf);
15485 src = build_simple_mem_ref_loc (loc, sarg);
15486 src = omp_build_component_ref (src, sf);
15487 src = build_simple_mem_ref_loc (loc, src);
15488 dst = build_simple_mem_ref_loc (loc, arg);
15489 dst = omp_build_component_ref (dst, f);
15490 t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
15491 append_to_statement_list (t, &list);
15492 n = splay_tree_lookup (ctx->field_map,
15493 (splay_tree_key) TREE_OPERAND (ind, 0));
15494 df = (tree) n->value;
15495 df = *tcctx.cb.decl_map->get (df);
15496 ptr = build_simple_mem_ref_loc (loc, arg);
15497 ptr = omp_build_component_ref (ptr, df);
15498 t = build2 (MODIFY_EXPR, TREE_TYPE (ptr), ptr,
15499 build_fold_addr_expr_loc (loc, dst));
15500 append_to_statement_list (t, &list);
15503 t = build1 (RETURN_EXPR, void_type_node, NULL);
15504 append_to_statement_list (t, &list);
15506 if (tcctx.cb.decl_map)
15507 delete tcctx.cb.decl_map;
15508 pop_gimplify_context (NULL);
15509 BIND_EXPR_BODY (bind) = list;
15510 pop_cfun ();
15513 static void
15514 lower_depend_clauses (tree *pclauses, gimple_seq *iseq, gimple_seq *oseq)
15516 tree c, clauses;
15517 gimple *g;
15518 size_t n_in = 0, n_out = 0, idx = 2, i;
15520 clauses = find_omp_clause (*pclauses, OMP_CLAUSE_DEPEND);
15521 gcc_assert (clauses);
15522 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
15523 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND)
15524 switch (OMP_CLAUSE_DEPEND_KIND (c))
15526 case OMP_CLAUSE_DEPEND_IN:
15527 n_in++;
15528 break;
15529 case OMP_CLAUSE_DEPEND_OUT:
15530 case OMP_CLAUSE_DEPEND_INOUT:
15531 n_out++;
15532 break;
15533 case OMP_CLAUSE_DEPEND_SOURCE:
15534 case OMP_CLAUSE_DEPEND_SINK:
15535 /* FALLTHRU */
15536 default:
15537 gcc_unreachable ();
15539 tree type = build_array_type_nelts (ptr_type_node, n_in + n_out + 2);
15540 tree array = create_tmp_var (type);
15541 TREE_ADDRESSABLE (array) = 1;
15542 tree r = build4 (ARRAY_REF, ptr_type_node, array, size_int (0), NULL_TREE,
15543 NULL_TREE);
15544 g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_in + n_out));
15545 gimple_seq_add_stmt (iseq, g);
15546 r = build4 (ARRAY_REF, ptr_type_node, array, size_int (1), NULL_TREE,
15547 NULL_TREE);
15548 g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_out));
15549 gimple_seq_add_stmt (iseq, g);
15550 for (i = 0; i < 2; i++)
15552 if ((i ? n_in : n_out) == 0)
15553 continue;
15554 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
15555 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
15556 && ((OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_IN) ^ i))
15558 tree t = OMP_CLAUSE_DECL (c);
15559 t = fold_convert (ptr_type_node, t);
15560 gimplify_expr (&t, iseq, NULL, is_gimple_val, fb_rvalue);
15561 r = build4 (ARRAY_REF, ptr_type_node, array, size_int (idx++),
15562 NULL_TREE, NULL_TREE);
15563 g = gimple_build_assign (r, t);
15564 gimple_seq_add_stmt (iseq, g);
15567 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
15568 OMP_CLAUSE_DECL (c) = build_fold_addr_expr (array);
15569 OMP_CLAUSE_CHAIN (c) = *pclauses;
15570 *pclauses = c;
15571 tree clobber = build_constructor (type, NULL);
15572 TREE_THIS_VOLATILE (clobber) = 1;
15573 g = gimple_build_assign (array, clobber);
15574 gimple_seq_add_stmt (oseq, g);
15577 /* Lower the OpenMP parallel or task directive in the current statement
15578 in GSI_P. CTX holds context information for the directive. */
15580 static void
15581 lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15583 tree clauses;
15584 tree child_fn, t;
15585 gimple *stmt = gsi_stmt (*gsi_p);
15586 gbind *par_bind, *bind, *dep_bind = NULL;
15587 gimple_seq par_body, olist, ilist, par_olist, par_rlist, par_ilist, new_body;
15588 location_t loc = gimple_location (stmt);
15590 clauses = gimple_omp_taskreg_clauses (stmt);
15591 par_bind
15592 = as_a <gbind *> (gimple_seq_first_stmt (gimple_omp_body (stmt)));
15593 par_body = gimple_bind_body (par_bind);
15594 child_fn = ctx->cb.dst_fn;
15595 if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
15596 && !gimple_omp_parallel_combined_p (stmt))
15598 struct walk_stmt_info wi;
15599 int ws_num = 0;
15601 memset (&wi, 0, sizeof (wi));
15602 wi.info = &ws_num;
15603 wi.val_only = true;
15604 walk_gimple_seq (par_body, check_combined_parallel, NULL, &wi);
15605 if (ws_num == 1)
15606 gimple_omp_parallel_set_combined_p (stmt, true);
15608 gimple_seq dep_ilist = NULL;
15609 gimple_seq dep_olist = NULL;
15610 if (gimple_code (stmt) == GIMPLE_OMP_TASK
15611 && find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
15613 push_gimplify_context ();
15614 dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
15615 lower_depend_clauses (gimple_omp_task_clauses_ptr (stmt),
15616 &dep_ilist, &dep_olist);
15619 if (ctx->srecord_type)
15620 create_task_copyfn (as_a <gomp_task *> (stmt), ctx);
15622 push_gimplify_context ();
15624 par_olist = NULL;
15625 par_ilist = NULL;
15626 par_rlist = NULL;
15627 bool phony_construct = gimple_code (stmt) == GIMPLE_OMP_PARALLEL
15628 && gimple_omp_parallel_grid_phony (as_a <gomp_parallel *> (stmt));
15629 if (phony_construct && ctx->record_type)
15631 gcc_checking_assert (!ctx->receiver_decl);
15632 ctx->receiver_decl = create_tmp_var
15633 (build_reference_type (ctx->record_type), ".omp_rec");
15635 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx, NULL);
15636 lower_omp (&par_body, ctx);
15637 if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
15638 lower_reduction_clauses (clauses, &par_rlist, ctx);
15640 /* Declare all the variables created by mapping and the variables
15641 declared in the scope of the parallel body. */
15642 record_vars_into (ctx->block_vars, child_fn);
15643 record_vars_into (gimple_bind_vars (par_bind), child_fn);
15645 if (ctx->record_type)
15647 ctx->sender_decl
15648 = create_tmp_var (ctx->srecord_type ? ctx->srecord_type
15649 : ctx->record_type, ".omp_data_o");
15650 DECL_NAMELESS (ctx->sender_decl) = 1;
15651 TREE_ADDRESSABLE (ctx->sender_decl) = 1;
15652 gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl);
15655 olist = NULL;
15656 ilist = NULL;
15657 lower_send_clauses (clauses, &ilist, &olist, ctx);
15658 lower_send_shared_vars (&ilist, &olist, ctx);
15660 if (ctx->record_type)
15662 tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL);
15663 TREE_THIS_VOLATILE (clobber) = 1;
15664 gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
15665 clobber));
15668 /* Once all the expansions are done, sequence all the different
15669 fragments inside gimple_omp_body. */
15671 new_body = NULL;
15673 if (ctx->record_type)
15675 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
15676 /* fixup_child_record_type might have changed receiver_decl's type. */
15677 t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
15678 gimple_seq_add_stmt (&new_body,
15679 gimple_build_assign (ctx->receiver_decl, t));
15682 gimple_seq_add_seq (&new_body, par_ilist);
15683 gimple_seq_add_seq (&new_body, par_body);
15684 gimple_seq_add_seq (&new_body, par_rlist);
15685 if (ctx->cancellable)
15686 gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
15687 gimple_seq_add_seq (&new_body, par_olist);
15688 new_body = maybe_catch_exception (new_body);
15689 if (gimple_code (stmt) == GIMPLE_OMP_TASK)
15690 gimple_seq_add_stmt (&new_body,
15691 gimple_build_omp_continue (integer_zero_node,
15692 integer_zero_node));
15693 if (!phony_construct)
15695 gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
15696 gimple_omp_set_body (stmt, new_body);
15699 bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind));
15700 gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
15701 gimple_bind_add_seq (bind, ilist);
15702 if (!phony_construct)
15703 gimple_bind_add_stmt (bind, stmt);
15704 else
15705 gimple_bind_add_seq (bind, new_body);
15706 gimple_bind_add_seq (bind, olist);
15708 pop_gimplify_context (NULL);
15710 if (dep_bind)
15712 gimple_bind_add_seq (dep_bind, dep_ilist);
15713 gimple_bind_add_stmt (dep_bind, bind);
15714 gimple_bind_add_seq (dep_bind, dep_olist);
15715 pop_gimplify_context (dep_bind);
15719 /* Lower the GIMPLE_OMP_TARGET in the current statement
15720 in GSI_P. CTX holds context information for the directive. */
15722 static void
15723 lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15725 tree clauses;
15726 tree child_fn, t, c;
15727 gomp_target *stmt = as_a <gomp_target *> (gsi_stmt (*gsi_p));
15728 gbind *tgt_bind, *bind, *dep_bind = NULL;
15729 gimple_seq tgt_body, olist, ilist, fplist, new_body;
15730 location_t loc = gimple_location (stmt);
15731 bool offloaded, data_region;
15732 unsigned int map_cnt = 0;
15733 bool has_depend = false;
15735 offloaded = is_gimple_omp_offloaded (stmt);
15736 switch (gimple_omp_target_kind (stmt))
15738 case GF_OMP_TARGET_KIND_REGION:
15739 case GF_OMP_TARGET_KIND_UPDATE:
15740 case GF_OMP_TARGET_KIND_ENTER_DATA:
15741 case GF_OMP_TARGET_KIND_EXIT_DATA:
15742 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
15743 case GF_OMP_TARGET_KIND_OACC_KERNELS:
15744 case GF_OMP_TARGET_KIND_OACC_UPDATE:
15745 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
15746 case GF_OMP_TARGET_KIND_OACC_DECLARE:
15747 data_region = false;
15748 break;
15749 case GF_OMP_TARGET_KIND_DATA:
15750 case GF_OMP_TARGET_KIND_OACC_DATA:
15751 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
15752 data_region = true;
15753 break;
15754 default:
15755 gcc_unreachable ();
15758 clauses = gimple_omp_target_clauses (stmt);
15760 gimple_seq dep_ilist = NULL;
15761 gimple_seq dep_olist = NULL;
15762 if (find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
15764 push_gimplify_context ();
15765 dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
15766 lower_depend_clauses (gimple_omp_target_clauses_ptr (stmt),
15767 &dep_ilist, &dep_olist);
15768 has_depend = true;
15771 tgt_bind = NULL;
15772 tgt_body = NULL;
15773 if (offloaded)
15775 tgt_bind = gimple_seq_first_stmt_as_a_bind (gimple_omp_body (stmt));
15776 tgt_body = gimple_bind_body (tgt_bind);
15778 else if (data_region)
15779 tgt_body = gimple_omp_body (stmt);
15780 child_fn = ctx->cb.dst_fn;
15782 push_gimplify_context ();
15783 fplist = NULL;
15785 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
15786 switch (OMP_CLAUSE_CODE (c))
15788 tree var, x;
15790 default:
15791 break;
15792 case OMP_CLAUSE_MAP:
15793 #if CHECKING_P
15794 /* First check what we're prepared to handle in the following. */
15795 switch (OMP_CLAUSE_MAP_KIND (c))
15797 case GOMP_MAP_ALLOC:
15798 case GOMP_MAP_TO:
15799 case GOMP_MAP_FROM:
15800 case GOMP_MAP_TOFROM:
15801 case GOMP_MAP_POINTER:
15802 case GOMP_MAP_TO_PSET:
15803 case GOMP_MAP_DELETE:
15804 case GOMP_MAP_RELEASE:
15805 case GOMP_MAP_ALWAYS_TO:
15806 case GOMP_MAP_ALWAYS_FROM:
15807 case GOMP_MAP_ALWAYS_TOFROM:
15808 case GOMP_MAP_FIRSTPRIVATE_POINTER:
15809 case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
15810 case GOMP_MAP_STRUCT:
15811 case GOMP_MAP_ALWAYS_POINTER:
15812 break;
15813 case GOMP_MAP_FORCE_ALLOC:
15814 case GOMP_MAP_FORCE_TO:
15815 case GOMP_MAP_FORCE_FROM:
15816 case GOMP_MAP_FORCE_TOFROM:
15817 case GOMP_MAP_FORCE_PRESENT:
15818 case GOMP_MAP_FORCE_DEVICEPTR:
15819 case GOMP_MAP_DEVICE_RESIDENT:
15820 case GOMP_MAP_LINK:
15821 gcc_assert (is_gimple_omp_oacc (stmt));
15822 break;
15823 default:
15824 gcc_unreachable ();
15826 #endif
15827 /* FALLTHRU */
15828 case OMP_CLAUSE_TO:
15829 case OMP_CLAUSE_FROM:
15830 oacc_firstprivate:
15831 var = OMP_CLAUSE_DECL (c);
15832 if (!DECL_P (var))
15834 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP
15835 || (!OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
15836 && (OMP_CLAUSE_MAP_KIND (c)
15837 != GOMP_MAP_FIRSTPRIVATE_POINTER)))
15838 map_cnt++;
15839 continue;
15842 if (DECL_SIZE (var)
15843 && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
15845 tree var2 = DECL_VALUE_EXPR (var);
15846 gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
15847 var2 = TREE_OPERAND (var2, 0);
15848 gcc_assert (DECL_P (var2));
15849 var = var2;
15852 if (offloaded
15853 && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
15854 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
15855 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
15857 if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
15859 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))
15860 && varpool_node::get_create (var)->offloadable)
15861 continue;
15863 tree type = build_pointer_type (TREE_TYPE (var));
15864 tree new_var = lookup_decl (var, ctx);
15865 x = create_tmp_var_raw (type, get_name (new_var));
15866 gimple_add_tmp_var (x);
15867 x = build_simple_mem_ref (x);
15868 SET_DECL_VALUE_EXPR (new_var, x);
15869 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15871 continue;
15874 if (!maybe_lookup_field (var, ctx))
15875 continue;
15877 /* Don't remap oacc parallel reduction variables, because the
15878 intermediate result must be local to each gang. */
15879 if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
15880 && OMP_CLAUSE_MAP_IN_REDUCTION (c)))
15882 x = build_receiver_ref (var, true, ctx);
15883 tree new_var = lookup_decl (var, ctx);
15885 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
15886 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
15887 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
15888 && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
15889 x = build_simple_mem_ref (x);
15890 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15892 gcc_assert (is_gimple_omp_oacc (ctx->stmt));
15893 if (is_reference (new_var))
15895 /* Create a local object to hold the instance
15896 value. */
15897 tree type = TREE_TYPE (TREE_TYPE (new_var));
15898 const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var));
15899 tree inst = create_tmp_var (type, id);
15900 gimplify_assign (inst, fold_indirect_ref (x), &fplist);
15901 x = build_fold_addr_expr (inst);
15903 gimplify_assign (new_var, x, &fplist);
15905 else if (DECL_P (new_var))
15907 SET_DECL_VALUE_EXPR (new_var, x);
15908 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15910 else
15911 gcc_unreachable ();
15913 map_cnt++;
15914 break;
15916 case OMP_CLAUSE_FIRSTPRIVATE:
15917 if (is_oacc_parallel (ctx))
15918 goto oacc_firstprivate;
15919 map_cnt++;
15920 var = OMP_CLAUSE_DECL (c);
15921 if (!is_reference (var)
15922 && !is_gimple_reg_type (TREE_TYPE (var)))
15924 tree new_var = lookup_decl (var, ctx);
15925 if (is_variable_sized (var))
15927 tree pvar = DECL_VALUE_EXPR (var);
15928 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
15929 pvar = TREE_OPERAND (pvar, 0);
15930 gcc_assert (DECL_P (pvar));
15931 tree new_pvar = lookup_decl (pvar, ctx);
15932 x = build_fold_indirect_ref (new_pvar);
15933 TREE_THIS_NOTRAP (x) = 1;
15935 else
15936 x = build_receiver_ref (var, true, ctx);
15937 SET_DECL_VALUE_EXPR (new_var, x);
15938 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15940 break;
15942 case OMP_CLAUSE_PRIVATE:
15943 if (is_gimple_omp_oacc (ctx->stmt))
15944 break;
15945 var = OMP_CLAUSE_DECL (c);
15946 if (is_variable_sized (var))
15948 tree new_var = lookup_decl (var, ctx);
15949 tree pvar = DECL_VALUE_EXPR (var);
15950 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
15951 pvar = TREE_OPERAND (pvar, 0);
15952 gcc_assert (DECL_P (pvar));
15953 tree new_pvar = lookup_decl (pvar, ctx);
15954 x = build_fold_indirect_ref (new_pvar);
15955 TREE_THIS_NOTRAP (x) = 1;
15956 SET_DECL_VALUE_EXPR (new_var, x);
15957 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15959 break;
15961 case OMP_CLAUSE_USE_DEVICE_PTR:
15962 case OMP_CLAUSE_IS_DEVICE_PTR:
15963 var = OMP_CLAUSE_DECL (c);
15964 map_cnt++;
15965 if (is_variable_sized (var))
15967 tree new_var = lookup_decl (var, ctx);
15968 tree pvar = DECL_VALUE_EXPR (var);
15969 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
15970 pvar = TREE_OPERAND (pvar, 0);
15971 gcc_assert (DECL_P (pvar));
15972 tree new_pvar = lookup_decl (pvar, ctx);
15973 x = build_fold_indirect_ref (new_pvar);
15974 TREE_THIS_NOTRAP (x) = 1;
15975 SET_DECL_VALUE_EXPR (new_var, x);
15976 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15978 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
15980 tree new_var = lookup_decl (var, ctx);
15981 tree type = build_pointer_type (TREE_TYPE (var));
15982 x = create_tmp_var_raw (type, get_name (new_var));
15983 gimple_add_tmp_var (x);
15984 x = build_simple_mem_ref (x);
15985 SET_DECL_VALUE_EXPR (new_var, x);
15986 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15988 else
15990 tree new_var = lookup_decl (var, ctx);
15991 x = create_tmp_var_raw (TREE_TYPE (new_var), get_name (new_var));
15992 gimple_add_tmp_var (x);
15993 SET_DECL_VALUE_EXPR (new_var, x);
15994 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15996 break;
15999 if (offloaded)
16001 target_nesting_level++;
16002 lower_omp (&tgt_body, ctx);
16003 target_nesting_level--;
16005 else if (data_region)
16006 lower_omp (&tgt_body, ctx);
16008 if (offloaded)
16010 /* Declare all the variables created by mapping and the variables
16011 declared in the scope of the target body. */
16012 record_vars_into (ctx->block_vars, child_fn);
16013 record_vars_into (gimple_bind_vars (tgt_bind), child_fn);
16016 olist = NULL;
16017 ilist = NULL;
16018 if (ctx->record_type)
16020 ctx->sender_decl
16021 = create_tmp_var (ctx->record_type, ".omp_data_arr");
16022 DECL_NAMELESS (ctx->sender_decl) = 1;
16023 TREE_ADDRESSABLE (ctx->sender_decl) = 1;
16024 t = make_tree_vec (3);
16025 TREE_VEC_ELT (t, 0) = ctx->sender_decl;
16026 TREE_VEC_ELT (t, 1)
16027 = create_tmp_var (build_array_type_nelts (size_type_node, map_cnt),
16028 ".omp_data_sizes");
16029 DECL_NAMELESS (TREE_VEC_ELT (t, 1)) = 1;
16030 TREE_ADDRESSABLE (TREE_VEC_ELT (t, 1)) = 1;
16031 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 1;
16032 tree tkind_type = short_unsigned_type_node;
16033 int talign_shift = 8;
16034 TREE_VEC_ELT (t, 2)
16035 = create_tmp_var (build_array_type_nelts (tkind_type, map_cnt),
16036 ".omp_data_kinds");
16037 DECL_NAMELESS (TREE_VEC_ELT (t, 2)) = 1;
16038 TREE_ADDRESSABLE (TREE_VEC_ELT (t, 2)) = 1;
16039 TREE_STATIC (TREE_VEC_ELT (t, 2)) = 1;
16040 gimple_omp_target_set_data_arg (stmt, t);
16042 vec<constructor_elt, va_gc> *vsize;
16043 vec<constructor_elt, va_gc> *vkind;
16044 vec_alloc (vsize, map_cnt);
16045 vec_alloc (vkind, map_cnt);
16046 unsigned int map_idx = 0;
16048 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
16049 switch (OMP_CLAUSE_CODE (c))
16051 tree ovar, nc, s, purpose, var, x, type;
16052 unsigned int talign;
16054 default:
16055 break;
16057 case OMP_CLAUSE_MAP:
16058 case OMP_CLAUSE_TO:
16059 case OMP_CLAUSE_FROM:
16060 oacc_firstprivate_map:
16061 nc = c;
16062 ovar = OMP_CLAUSE_DECL (c);
16063 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16064 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
16065 || (OMP_CLAUSE_MAP_KIND (c)
16066 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
16067 break;
16068 if (!DECL_P (ovar))
16070 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16071 && OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
16073 gcc_checking_assert (OMP_CLAUSE_DECL (OMP_CLAUSE_CHAIN (c))
16074 == get_base_address (ovar));
16075 nc = OMP_CLAUSE_CHAIN (c);
16076 ovar = OMP_CLAUSE_DECL (nc);
16078 else
16080 tree x = build_sender_ref (ovar, ctx);
16081 tree v
16082 = build_fold_addr_expr_with_type (ovar, ptr_type_node);
16083 gimplify_assign (x, v, &ilist);
16084 nc = NULL_TREE;
16087 else
16089 if (DECL_SIZE (ovar)
16090 && TREE_CODE (DECL_SIZE (ovar)) != INTEGER_CST)
16092 tree ovar2 = DECL_VALUE_EXPR (ovar);
16093 gcc_assert (TREE_CODE (ovar2) == INDIRECT_REF);
16094 ovar2 = TREE_OPERAND (ovar2, 0);
16095 gcc_assert (DECL_P (ovar2));
16096 ovar = ovar2;
16098 if (!maybe_lookup_field (ovar, ctx))
16099 continue;
16102 talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
16103 if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
16104 talign = DECL_ALIGN_UNIT (ovar);
16105 if (nc)
16107 var = lookup_decl_in_outer_ctx (ovar, ctx);
16108 x = build_sender_ref (ovar, ctx);
16110 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16111 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
16112 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
16113 && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE)
16115 gcc_assert (offloaded);
16116 tree avar
16117 = create_tmp_var (TREE_TYPE (TREE_TYPE (x)));
16118 mark_addressable (avar);
16119 gimplify_assign (avar, build_fold_addr_expr (var), &ilist);
16120 talign = DECL_ALIGN_UNIT (avar);
16121 avar = build_fold_addr_expr (avar);
16122 gimplify_assign (x, avar, &ilist);
16124 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
16126 gcc_assert (is_gimple_omp_oacc (ctx->stmt));
16127 if (!is_reference (var))
16129 if (is_gimple_reg (var)
16130 && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
16131 TREE_NO_WARNING (var) = 1;
16132 var = build_fold_addr_expr (var);
16134 else
16135 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16136 gimplify_assign (x, var, &ilist);
16138 else if (is_gimple_reg (var))
16140 gcc_assert (offloaded);
16141 tree avar = create_tmp_var (TREE_TYPE (var));
16142 mark_addressable (avar);
16143 enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c);
16144 if (GOMP_MAP_COPY_TO_P (map_kind)
16145 || map_kind == GOMP_MAP_POINTER
16146 || map_kind == GOMP_MAP_TO_PSET
16147 || map_kind == GOMP_MAP_FORCE_DEVICEPTR)
16149 /* If we need to initialize a temporary
16150 with VAR because it is not addressable, and
16151 the variable hasn't been initialized yet, then
16152 we'll get a warning for the store to avar.
16153 Don't warn in that case, the mapping might
16154 be implicit. */
16155 TREE_NO_WARNING (var) = 1;
16156 gimplify_assign (avar, var, &ilist);
16158 avar = build_fold_addr_expr (avar);
16159 gimplify_assign (x, avar, &ilist);
16160 if ((GOMP_MAP_COPY_FROM_P (map_kind)
16161 || map_kind == GOMP_MAP_FORCE_DEVICEPTR)
16162 && !TYPE_READONLY (TREE_TYPE (var)))
16164 x = unshare_expr (x);
16165 x = build_simple_mem_ref (x);
16166 gimplify_assign (var, x, &olist);
16169 else
16171 var = build_fold_addr_expr (var);
16172 gimplify_assign (x, var, &ilist);
16175 s = NULL_TREE;
16176 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
16178 gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
16179 s = TREE_TYPE (ovar);
16180 if (TREE_CODE (s) == REFERENCE_TYPE)
16181 s = TREE_TYPE (s);
16182 s = TYPE_SIZE_UNIT (s);
16184 else
16185 s = OMP_CLAUSE_SIZE (c);
16186 if (s == NULL_TREE)
16187 s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
16188 s = fold_convert (size_type_node, s);
16189 purpose = size_int (map_idx++);
16190 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16191 if (TREE_CODE (s) != INTEGER_CST)
16192 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
16194 unsigned HOST_WIDE_INT tkind, tkind_zero;
16195 switch (OMP_CLAUSE_CODE (c))
16197 case OMP_CLAUSE_MAP:
16198 tkind = OMP_CLAUSE_MAP_KIND (c);
16199 tkind_zero = tkind;
16200 if (OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c))
16201 switch (tkind)
16203 case GOMP_MAP_ALLOC:
16204 case GOMP_MAP_TO:
16205 case GOMP_MAP_FROM:
16206 case GOMP_MAP_TOFROM:
16207 case GOMP_MAP_ALWAYS_TO:
16208 case GOMP_MAP_ALWAYS_FROM:
16209 case GOMP_MAP_ALWAYS_TOFROM:
16210 case GOMP_MAP_RELEASE:
16211 tkind_zero = GOMP_MAP_ZERO_LEN_ARRAY_SECTION;
16212 break;
16213 case GOMP_MAP_DELETE:
16214 tkind_zero = GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION;
16215 default:
16216 break;
16218 if (tkind_zero != tkind)
16220 if (integer_zerop (s))
16221 tkind = tkind_zero;
16222 else if (integer_nonzerop (s))
16223 tkind_zero = tkind;
16225 break;
16226 case OMP_CLAUSE_FIRSTPRIVATE:
16227 gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
16228 tkind = GOMP_MAP_TO;
16229 tkind_zero = tkind;
16230 break;
16231 case OMP_CLAUSE_TO:
16232 tkind = GOMP_MAP_TO;
16233 tkind_zero = tkind;
16234 break;
16235 case OMP_CLAUSE_FROM:
16236 tkind = GOMP_MAP_FROM;
16237 tkind_zero = tkind;
16238 break;
16239 default:
16240 gcc_unreachable ();
16242 gcc_checking_assert (tkind
16243 < (HOST_WIDE_INT_C (1U) << talign_shift));
16244 gcc_checking_assert (tkind_zero
16245 < (HOST_WIDE_INT_C (1U) << talign_shift));
16246 talign = ceil_log2 (talign);
16247 tkind |= talign << talign_shift;
16248 tkind_zero |= talign << talign_shift;
16249 gcc_checking_assert (tkind
16250 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16251 gcc_checking_assert (tkind_zero
16252 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16253 if (tkind == tkind_zero)
16254 x = build_int_cstu (tkind_type, tkind);
16255 else
16257 TREE_STATIC (TREE_VEC_ELT (t, 2)) = 0;
16258 x = build3 (COND_EXPR, tkind_type,
16259 fold_build2 (EQ_EXPR, boolean_type_node,
16260 unshare_expr (s), size_zero_node),
16261 build_int_cstu (tkind_type, tkind_zero),
16262 build_int_cstu (tkind_type, tkind));
16264 CONSTRUCTOR_APPEND_ELT (vkind, purpose, x);
16265 if (nc && nc != c)
16266 c = nc;
16267 break;
16269 case OMP_CLAUSE_FIRSTPRIVATE:
16270 if (is_oacc_parallel (ctx))
16271 goto oacc_firstprivate_map;
16272 ovar = OMP_CLAUSE_DECL (c);
16273 if (is_reference (ovar))
16274 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16275 else
16276 talign = DECL_ALIGN_UNIT (ovar);
16277 var = lookup_decl_in_outer_ctx (ovar, ctx);
16278 x = build_sender_ref (ovar, ctx);
16279 tkind = GOMP_MAP_FIRSTPRIVATE;
16280 type = TREE_TYPE (ovar);
16281 if (is_reference (ovar))
16282 type = TREE_TYPE (type);
16283 bool use_firstprivate_int, force_addr;
16284 use_firstprivate_int = false;
16285 force_addr = false;
16286 if ((INTEGRAL_TYPE_P (type)
16287 && TYPE_PRECISION (type) <= POINTER_SIZE)
16288 || TREE_CODE (type) == POINTER_TYPE)
16289 use_firstprivate_int = true;
16290 if (has_depend)
16292 if (is_reference (var))
16293 use_firstprivate_int = false;
16294 else if (is_gimple_reg (var))
16296 if (DECL_HAS_VALUE_EXPR_P (var))
16298 tree v = get_base_address (var);
16299 if (DECL_P (v) && TREE_ADDRESSABLE (v))
16301 use_firstprivate_int = false;
16302 force_addr = true;
16304 else
16305 switch (TREE_CODE (v))
16307 case INDIRECT_REF:
16308 case MEM_REF:
16309 use_firstprivate_int = false;
16310 force_addr = true;
16311 break;
16312 default:
16313 break;
16317 else
16318 use_firstprivate_int = false;
16320 if (use_firstprivate_int)
16322 tkind = GOMP_MAP_FIRSTPRIVATE_INT;
16323 tree t = var;
16324 if (is_reference (var))
16325 t = build_simple_mem_ref (var);
16326 else if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
16327 TREE_NO_WARNING (var) = 1;
16328 if (TREE_CODE (type) != POINTER_TYPE)
16329 t = fold_convert (pointer_sized_int_node, t);
16330 t = fold_convert (TREE_TYPE (x), t);
16331 gimplify_assign (x, t, &ilist);
16333 else if (is_reference (var))
16334 gimplify_assign (x, var, &ilist);
16335 else if (!force_addr && is_gimple_reg (var))
16337 tree avar = create_tmp_var (TREE_TYPE (var));
16338 mark_addressable (avar);
16339 if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c))
16340 TREE_NO_WARNING (var) = 1;
16341 gimplify_assign (avar, var, &ilist);
16342 avar = build_fold_addr_expr (avar);
16343 gimplify_assign (x, avar, &ilist);
16345 else
16347 var = build_fold_addr_expr (var);
16348 gimplify_assign (x, var, &ilist);
16350 if (tkind == GOMP_MAP_FIRSTPRIVATE_INT)
16351 s = size_int (0);
16352 else if (is_reference (var))
16353 s = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16354 else
16355 s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
16356 s = fold_convert (size_type_node, s);
16357 purpose = size_int (map_idx++);
16358 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16359 if (TREE_CODE (s) != INTEGER_CST)
16360 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
16362 gcc_checking_assert (tkind
16363 < (HOST_WIDE_INT_C (1U) << talign_shift));
16364 talign = ceil_log2 (talign);
16365 tkind |= talign << talign_shift;
16366 gcc_checking_assert (tkind
16367 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16368 CONSTRUCTOR_APPEND_ELT (vkind, purpose,
16369 build_int_cstu (tkind_type, tkind));
16370 break;
16372 case OMP_CLAUSE_USE_DEVICE_PTR:
16373 case OMP_CLAUSE_IS_DEVICE_PTR:
16374 ovar = OMP_CLAUSE_DECL (c);
16375 var = lookup_decl_in_outer_ctx (ovar, ctx);
16376 x = build_sender_ref (ovar, ctx);
16377 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
16378 tkind = GOMP_MAP_USE_DEVICE_PTR;
16379 else
16380 tkind = GOMP_MAP_FIRSTPRIVATE_INT;
16381 type = TREE_TYPE (ovar);
16382 if (TREE_CODE (type) == ARRAY_TYPE)
16383 var = build_fold_addr_expr (var);
16384 else
16386 if (is_reference (ovar))
16388 type = TREE_TYPE (type);
16389 if (TREE_CODE (type) != ARRAY_TYPE)
16390 var = build_simple_mem_ref (var);
16391 var = fold_convert (TREE_TYPE (x), var);
16394 gimplify_assign (x, var, &ilist);
16395 s = size_int (0);
16396 purpose = size_int (map_idx++);
16397 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16398 gcc_checking_assert (tkind
16399 < (HOST_WIDE_INT_C (1U) << talign_shift));
16400 gcc_checking_assert (tkind
16401 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16402 CONSTRUCTOR_APPEND_ELT (vkind, purpose,
16403 build_int_cstu (tkind_type, tkind));
16404 break;
16407 gcc_assert (map_idx == map_cnt);
16409 DECL_INITIAL (TREE_VEC_ELT (t, 1))
16410 = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 1)), vsize);
16411 DECL_INITIAL (TREE_VEC_ELT (t, 2))
16412 = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 2)), vkind);
16413 for (int i = 1; i <= 2; i++)
16414 if (!TREE_STATIC (TREE_VEC_ELT (t, i)))
16416 gimple_seq initlist = NULL;
16417 force_gimple_operand (build1 (DECL_EXPR, void_type_node,
16418 TREE_VEC_ELT (t, i)),
16419 &initlist, true, NULL_TREE);
16420 gimple_seq_add_seq (&ilist, initlist);
16422 tree clobber = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, i)),
16423 NULL);
16424 TREE_THIS_VOLATILE (clobber) = 1;
16425 gimple_seq_add_stmt (&olist,
16426 gimple_build_assign (TREE_VEC_ELT (t, i),
16427 clobber));
16430 tree clobber = build_constructor (ctx->record_type, NULL);
16431 TREE_THIS_VOLATILE (clobber) = 1;
16432 gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
16433 clobber));
16436 /* Once all the expansions are done, sequence all the different
16437 fragments inside gimple_omp_body. */
16439 new_body = NULL;
16441 if (offloaded
16442 && ctx->record_type)
16444 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
16445 /* fixup_child_record_type might have changed receiver_decl's type. */
16446 t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
16447 gimple_seq_add_stmt (&new_body,
16448 gimple_build_assign (ctx->receiver_decl, t));
16450 gimple_seq_add_seq (&new_body, fplist);
16452 if (offloaded || data_region)
16454 tree prev = NULL_TREE;
16455 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
16456 switch (OMP_CLAUSE_CODE (c))
16458 tree var, x;
16459 default:
16460 break;
16461 case OMP_CLAUSE_FIRSTPRIVATE:
16462 if (is_gimple_omp_oacc (ctx->stmt))
16463 break;
16464 var = OMP_CLAUSE_DECL (c);
16465 if (is_reference (var)
16466 || is_gimple_reg_type (TREE_TYPE (var)))
16468 tree new_var = lookup_decl (var, ctx);
16469 tree type;
16470 type = TREE_TYPE (var);
16471 if (is_reference (var))
16472 type = TREE_TYPE (type);
16473 bool use_firstprivate_int;
16474 use_firstprivate_int = false;
16475 if ((INTEGRAL_TYPE_P (type)
16476 && TYPE_PRECISION (type) <= POINTER_SIZE)
16477 || TREE_CODE (type) == POINTER_TYPE)
16478 use_firstprivate_int = true;
16479 if (has_depend)
16481 tree v = lookup_decl_in_outer_ctx (var, ctx);
16482 if (is_reference (v))
16483 use_firstprivate_int = false;
16484 else if (is_gimple_reg (v))
16486 if (DECL_HAS_VALUE_EXPR_P (v))
16488 v = get_base_address (v);
16489 if (DECL_P (v) && TREE_ADDRESSABLE (v))
16490 use_firstprivate_int = false;
16491 else
16492 switch (TREE_CODE (v))
16494 case INDIRECT_REF:
16495 case MEM_REF:
16496 use_firstprivate_int = false;
16497 break;
16498 default:
16499 break;
16503 else
16504 use_firstprivate_int = false;
16506 if (use_firstprivate_int)
16508 x = build_receiver_ref (var, false, ctx);
16509 if (TREE_CODE (type) != POINTER_TYPE)
16510 x = fold_convert (pointer_sized_int_node, x);
16511 x = fold_convert (type, x);
16512 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16513 fb_rvalue);
16514 if (is_reference (var))
16516 tree v = create_tmp_var_raw (type, get_name (var));
16517 gimple_add_tmp_var (v);
16518 TREE_ADDRESSABLE (v) = 1;
16519 gimple_seq_add_stmt (&new_body,
16520 gimple_build_assign (v, x));
16521 x = build_fold_addr_expr (v);
16523 gimple_seq_add_stmt (&new_body,
16524 gimple_build_assign (new_var, x));
16526 else
16528 x = build_receiver_ref (var, !is_reference (var), ctx);
16529 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16530 fb_rvalue);
16531 gimple_seq_add_stmt (&new_body,
16532 gimple_build_assign (new_var, x));
16535 else if (is_variable_sized (var))
16537 tree pvar = DECL_VALUE_EXPR (var);
16538 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16539 pvar = TREE_OPERAND (pvar, 0);
16540 gcc_assert (DECL_P (pvar));
16541 tree new_var = lookup_decl (pvar, ctx);
16542 x = build_receiver_ref (var, false, ctx);
16543 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16544 gimple_seq_add_stmt (&new_body,
16545 gimple_build_assign (new_var, x));
16547 break;
16548 case OMP_CLAUSE_PRIVATE:
16549 if (is_gimple_omp_oacc (ctx->stmt))
16550 break;
16551 var = OMP_CLAUSE_DECL (c);
16552 if (is_reference (var))
16554 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16555 tree new_var = lookup_decl (var, ctx);
16556 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
16557 if (TREE_CONSTANT (x))
16559 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
16560 get_name (var));
16561 gimple_add_tmp_var (x);
16562 TREE_ADDRESSABLE (x) = 1;
16563 x = build_fold_addr_expr_loc (clause_loc, x);
16565 else
16566 break;
16568 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
16569 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16570 gimple_seq_add_stmt (&new_body,
16571 gimple_build_assign (new_var, x));
16573 break;
16574 case OMP_CLAUSE_USE_DEVICE_PTR:
16575 case OMP_CLAUSE_IS_DEVICE_PTR:
16576 var = OMP_CLAUSE_DECL (c);
16577 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
16578 x = build_sender_ref (var, ctx);
16579 else
16580 x = build_receiver_ref (var, false, ctx);
16581 if (is_variable_sized (var))
16583 tree pvar = DECL_VALUE_EXPR (var);
16584 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16585 pvar = TREE_OPERAND (pvar, 0);
16586 gcc_assert (DECL_P (pvar));
16587 tree new_var = lookup_decl (pvar, ctx);
16588 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16589 gimple_seq_add_stmt (&new_body,
16590 gimple_build_assign (new_var, x));
16592 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
16594 tree new_var = lookup_decl (var, ctx);
16595 new_var = DECL_VALUE_EXPR (new_var);
16596 gcc_assert (TREE_CODE (new_var) == MEM_REF);
16597 new_var = TREE_OPERAND (new_var, 0);
16598 gcc_assert (DECL_P (new_var));
16599 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16600 gimple_seq_add_stmt (&new_body,
16601 gimple_build_assign (new_var, x));
16603 else
16605 tree type = TREE_TYPE (var);
16606 tree new_var = lookup_decl (var, ctx);
16607 if (is_reference (var))
16609 type = TREE_TYPE (type);
16610 if (TREE_CODE (type) != ARRAY_TYPE)
16612 tree v = create_tmp_var_raw (type, get_name (var));
16613 gimple_add_tmp_var (v);
16614 TREE_ADDRESSABLE (v) = 1;
16615 x = fold_convert (type, x);
16616 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16617 fb_rvalue);
16618 gimple_seq_add_stmt (&new_body,
16619 gimple_build_assign (v, x));
16620 x = build_fold_addr_expr (v);
16623 new_var = DECL_VALUE_EXPR (new_var);
16624 x = fold_convert (TREE_TYPE (new_var), x);
16625 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16626 gimple_seq_add_stmt (&new_body,
16627 gimple_build_assign (new_var, x));
16629 break;
16631 /* Handle GOMP_MAP_FIRSTPRIVATE_{POINTER,REFERENCE} in second pass,
16632 so that firstprivate vars holding OMP_CLAUSE_SIZE if needed
16633 are already handled. Similarly OMP_CLAUSE_PRIVATE for VLAs
16634 or references to VLAs. */
16635 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
16636 switch (OMP_CLAUSE_CODE (c))
16638 tree var;
16639 default:
16640 break;
16641 case OMP_CLAUSE_MAP:
16642 if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
16643 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)
16645 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16646 HOST_WIDE_INT offset = 0;
16647 gcc_assert (prev);
16648 var = OMP_CLAUSE_DECL (c);
16649 if (DECL_P (var)
16650 && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
16651 && is_global_var (maybe_lookup_decl_in_outer_ctx (var,
16652 ctx))
16653 && varpool_node::get_create (var)->offloadable)
16654 break;
16655 if (TREE_CODE (var) == INDIRECT_REF
16656 && TREE_CODE (TREE_OPERAND (var, 0)) == COMPONENT_REF)
16657 var = TREE_OPERAND (var, 0);
16658 if (TREE_CODE (var) == COMPONENT_REF)
16660 var = get_addr_base_and_unit_offset (var, &offset);
16661 gcc_assert (var != NULL_TREE && DECL_P (var));
16663 else if (DECL_SIZE (var)
16664 && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
16666 tree var2 = DECL_VALUE_EXPR (var);
16667 gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
16668 var2 = TREE_OPERAND (var2, 0);
16669 gcc_assert (DECL_P (var2));
16670 var = var2;
16672 tree new_var = lookup_decl (var, ctx), x;
16673 tree type = TREE_TYPE (new_var);
16674 bool is_ref;
16675 if (TREE_CODE (OMP_CLAUSE_DECL (c)) == INDIRECT_REF
16676 && (TREE_CODE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0))
16677 == COMPONENT_REF))
16679 type = TREE_TYPE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0));
16680 is_ref = true;
16681 new_var = build2 (MEM_REF, type,
16682 build_fold_addr_expr (new_var),
16683 build_int_cst (build_pointer_type (type),
16684 offset));
16686 else if (TREE_CODE (OMP_CLAUSE_DECL (c)) == COMPONENT_REF)
16688 type = TREE_TYPE (OMP_CLAUSE_DECL (c));
16689 is_ref = TREE_CODE (type) == REFERENCE_TYPE;
16690 new_var = build2 (MEM_REF, type,
16691 build_fold_addr_expr (new_var),
16692 build_int_cst (build_pointer_type (type),
16693 offset));
16695 else
16696 is_ref = is_reference (var);
16697 if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)
16698 is_ref = false;
16699 bool ref_to_array = false;
16700 if (is_ref)
16702 type = TREE_TYPE (type);
16703 if (TREE_CODE (type) == ARRAY_TYPE)
16705 type = build_pointer_type (type);
16706 ref_to_array = true;
16709 else if (TREE_CODE (type) == ARRAY_TYPE)
16711 tree decl2 = DECL_VALUE_EXPR (new_var);
16712 gcc_assert (TREE_CODE (decl2) == MEM_REF);
16713 decl2 = TREE_OPERAND (decl2, 0);
16714 gcc_assert (DECL_P (decl2));
16715 new_var = decl2;
16716 type = TREE_TYPE (new_var);
16718 x = build_receiver_ref (OMP_CLAUSE_DECL (prev), false, ctx);
16719 x = fold_convert_loc (clause_loc, type, x);
16720 if (!integer_zerop (OMP_CLAUSE_SIZE (c)))
16722 tree bias = OMP_CLAUSE_SIZE (c);
16723 if (DECL_P (bias))
16724 bias = lookup_decl (bias, ctx);
16725 bias = fold_convert_loc (clause_loc, sizetype, bias);
16726 bias = fold_build1_loc (clause_loc, NEGATE_EXPR, sizetype,
16727 bias);
16728 x = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
16729 TREE_TYPE (x), x, bias);
16731 if (ref_to_array)
16732 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
16733 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16734 if (is_ref && !ref_to_array)
16736 tree t = create_tmp_var_raw (type, get_name (var));
16737 gimple_add_tmp_var (t);
16738 TREE_ADDRESSABLE (t) = 1;
16739 gimple_seq_add_stmt (&new_body,
16740 gimple_build_assign (t, x));
16741 x = build_fold_addr_expr_loc (clause_loc, t);
16743 gimple_seq_add_stmt (&new_body,
16744 gimple_build_assign (new_var, x));
16745 prev = NULL_TREE;
16747 else if (OMP_CLAUSE_CHAIN (c)
16748 && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (c))
16749 == OMP_CLAUSE_MAP
16750 && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
16751 == GOMP_MAP_FIRSTPRIVATE_POINTER
16752 || (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
16753 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
16754 prev = c;
16755 break;
16756 case OMP_CLAUSE_PRIVATE:
16757 var = OMP_CLAUSE_DECL (c);
16758 if (is_variable_sized (var))
16760 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16761 tree new_var = lookup_decl (var, ctx);
16762 tree pvar = DECL_VALUE_EXPR (var);
16763 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16764 pvar = TREE_OPERAND (pvar, 0);
16765 gcc_assert (DECL_P (pvar));
16766 tree new_pvar = lookup_decl (pvar, ctx);
16767 tree atmp = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
16768 tree al = size_int (DECL_ALIGN (var));
16769 tree x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
16770 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
16771 x = fold_convert_loc (clause_loc, TREE_TYPE (new_pvar), x);
16772 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16773 gimple_seq_add_stmt (&new_body,
16774 gimple_build_assign (new_pvar, x));
16776 else if (is_reference (var) && !is_gimple_omp_oacc (ctx->stmt))
16778 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16779 tree new_var = lookup_decl (var, ctx);
16780 tree x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
16781 if (TREE_CONSTANT (x))
16782 break;
16783 else
16785 tree atmp
16786 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
16787 tree rtype = TREE_TYPE (TREE_TYPE (new_var));
16788 tree al = size_int (TYPE_ALIGN (rtype));
16789 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
16792 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
16793 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16794 gimple_seq_add_stmt (&new_body,
16795 gimple_build_assign (new_var, x));
16797 break;
16800 gimple_seq fork_seq = NULL;
16801 gimple_seq join_seq = NULL;
16803 if (is_oacc_parallel (ctx))
16805 /* If there are reductions on the offloaded region itself, treat
16806 them as a dummy GANG loop. */
16807 tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
16809 lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
16810 false, NULL, NULL, &fork_seq, &join_seq, ctx);
16813 gimple_seq_add_seq (&new_body, fork_seq);
16814 gimple_seq_add_seq (&new_body, tgt_body);
16815 gimple_seq_add_seq (&new_body, join_seq);
16817 if (offloaded)
16818 new_body = maybe_catch_exception (new_body);
16820 gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
16821 gimple_omp_set_body (stmt, new_body);
16824 bind = gimple_build_bind (NULL, NULL,
16825 tgt_bind ? gimple_bind_block (tgt_bind)
16826 : NULL_TREE);
16827 gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
16828 gimple_bind_add_seq (bind, ilist);
16829 gimple_bind_add_stmt (bind, stmt);
16830 gimple_bind_add_seq (bind, olist);
16832 pop_gimplify_context (NULL);
16834 if (dep_bind)
16836 gimple_bind_add_seq (dep_bind, dep_ilist);
16837 gimple_bind_add_stmt (dep_bind, bind);
16838 gimple_bind_add_seq (dep_bind, dep_olist);
16839 pop_gimplify_context (dep_bind);
16843 /* Expand code for an OpenMP teams directive. */
16845 static void
16846 lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
16848 gomp_teams *teams_stmt = as_a <gomp_teams *> (gsi_stmt (*gsi_p));
16849 push_gimplify_context ();
16851 tree block = make_node (BLOCK);
16852 gbind *bind = gimple_build_bind (NULL, NULL, block);
16853 gsi_replace (gsi_p, bind, true);
16854 gimple_seq bind_body = NULL;
16855 gimple_seq dlist = NULL;
16856 gimple_seq olist = NULL;
16858 tree num_teams = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
16859 OMP_CLAUSE_NUM_TEAMS);
16860 if (num_teams == NULL_TREE)
16861 num_teams = build_int_cst (unsigned_type_node, 0);
16862 else
16864 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
16865 num_teams = fold_convert (unsigned_type_node, num_teams);
16866 gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue);
16868 tree thread_limit = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
16869 OMP_CLAUSE_THREAD_LIMIT);
16870 if (thread_limit == NULL_TREE)
16871 thread_limit = build_int_cst (unsigned_type_node, 0);
16872 else
16874 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
16875 thread_limit = fold_convert (unsigned_type_node, thread_limit);
16876 gimplify_expr (&thread_limit, &bind_body, NULL, is_gimple_val,
16877 fb_rvalue);
16880 lower_rec_input_clauses (gimple_omp_teams_clauses (teams_stmt),
16881 &bind_body, &dlist, ctx, NULL);
16882 lower_omp (gimple_omp_body_ptr (teams_stmt), ctx);
16883 lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist, ctx);
16884 if (!gimple_omp_teams_grid_phony (teams_stmt))
16886 gimple_seq_add_stmt (&bind_body, teams_stmt);
16887 location_t loc = gimple_location (teams_stmt);
16888 tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
16889 gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
16890 gimple_set_location (call, loc);
16891 gimple_seq_add_stmt (&bind_body, call);
16894 gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
16895 gimple_omp_set_body (teams_stmt, NULL);
16896 gimple_seq_add_seq (&bind_body, olist);
16897 gimple_seq_add_seq (&bind_body, dlist);
16898 if (!gimple_omp_teams_grid_phony (teams_stmt))
16899 gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
16900 gimple_bind_set_body (bind, bind_body);
16902 pop_gimplify_context (bind);
16904 gimple_bind_append_vars (bind, ctx->block_vars);
16905 BLOCK_VARS (block) = ctx->block_vars;
16906 if (BLOCK_VARS (block))
16907 TREE_USED (block) = 1;
16910 /* Expand code within an artificial GIMPLE_OMP_GRID_BODY OMP construct. */
16912 static void
16913 lower_omp_grid_body (gimple_stmt_iterator *gsi_p, omp_context *ctx)
16915 gimple *stmt = gsi_stmt (*gsi_p);
16916 lower_omp (gimple_omp_body_ptr (stmt), ctx);
16917 gimple_seq_add_stmt (gimple_omp_body_ptr (stmt),
16918 gimple_build_omp_return (false));
16922 /* Callback for lower_omp_1. Return non-NULL if *tp needs to be
16923 regimplified. If DATA is non-NULL, lower_omp_1 is outside
16924 of OMP context, but with task_shared_vars set. */
16926 static tree
16927 lower_omp_regimplify_p (tree *tp, int *walk_subtrees,
16928 void *data)
16930 tree t = *tp;
16932 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
16933 if (TREE_CODE (t) == VAR_DECL && data == NULL && DECL_HAS_VALUE_EXPR_P (t))
16934 return t;
16936 if (task_shared_vars
16937 && DECL_P (t)
16938 && bitmap_bit_p (task_shared_vars, DECL_UID (t)))
16939 return t;
16941 /* If a global variable has been privatized, TREE_CONSTANT on
16942 ADDR_EXPR might be wrong. */
16943 if (data == NULL && TREE_CODE (t) == ADDR_EXPR)
16944 recompute_tree_invariant_for_addr_expr (t);
16946 *walk_subtrees = !IS_TYPE_OR_DECL_P (t);
16947 return NULL_TREE;
16950 /* Data to be communicated between lower_omp_regimplify_operands and
16951 lower_omp_regimplify_operands_p. */
16953 struct lower_omp_regimplify_operands_data
16955 omp_context *ctx;
16956 vec<tree> *decls;
16959 /* Helper function for lower_omp_regimplify_operands. Find
16960 omp_member_access_dummy_var vars and adjust temporarily their
16961 DECL_VALUE_EXPRs if needed. */
16963 static tree
16964 lower_omp_regimplify_operands_p (tree *tp, int *walk_subtrees,
16965 void *data)
16967 tree t = omp_member_access_dummy_var (*tp);
16968 if (t)
16970 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
16971 lower_omp_regimplify_operands_data *ldata
16972 = (lower_omp_regimplify_operands_data *) wi->info;
16973 tree o = maybe_lookup_decl (t, ldata->ctx);
16974 if (o != t)
16976 ldata->decls->safe_push (DECL_VALUE_EXPR (*tp));
16977 ldata->decls->safe_push (*tp);
16978 tree v = unshare_and_remap (DECL_VALUE_EXPR (*tp), t, o);
16979 SET_DECL_VALUE_EXPR (*tp, v);
16982 *walk_subtrees = !IS_TYPE_OR_DECL_P (*tp);
16983 return NULL_TREE;
16986 /* Wrapper around gimple_regimplify_operands that adjusts DECL_VALUE_EXPRs
16987 of omp_member_access_dummy_var vars during regimplification. */
16989 static void
16990 lower_omp_regimplify_operands (omp_context *ctx, gimple *stmt,
16991 gimple_stmt_iterator *gsi_p)
16993 auto_vec<tree, 10> decls;
16994 if (ctx)
16996 struct walk_stmt_info wi;
16997 memset (&wi, '\0', sizeof (wi));
16998 struct lower_omp_regimplify_operands_data data;
16999 data.ctx = ctx;
17000 data.decls = &decls;
17001 wi.info = &data;
17002 walk_gimple_op (stmt, lower_omp_regimplify_operands_p, &wi);
17004 gimple_regimplify_operands (stmt, gsi_p);
17005 while (!decls.is_empty ())
17007 tree t = decls.pop ();
17008 tree v = decls.pop ();
17009 SET_DECL_VALUE_EXPR (t, v);
17013 static void
17014 lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
17016 gimple *stmt = gsi_stmt (*gsi_p);
17017 struct walk_stmt_info wi;
17018 gcall *call_stmt;
17020 if (gimple_has_location (stmt))
17021 input_location = gimple_location (stmt);
17023 if (task_shared_vars)
17024 memset (&wi, '\0', sizeof (wi));
17026 /* If we have issued syntax errors, avoid doing any heavy lifting.
17027 Just replace the OMP directives with a NOP to avoid
17028 confusing RTL expansion. */
17029 if (seen_error () && is_gimple_omp (stmt))
17031 gsi_replace (gsi_p, gimple_build_nop (), true);
17032 return;
17035 switch (gimple_code (stmt))
17037 case GIMPLE_COND:
17039 gcond *cond_stmt = as_a <gcond *> (stmt);
17040 if ((ctx || task_shared_vars)
17041 && (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
17042 lower_omp_regimplify_p,
17043 ctx ? NULL : &wi, NULL)
17044 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
17045 lower_omp_regimplify_p,
17046 ctx ? NULL : &wi, NULL)))
17047 lower_omp_regimplify_operands (ctx, cond_stmt, gsi_p);
17049 break;
17050 case GIMPLE_CATCH:
17051 lower_omp (gimple_catch_handler_ptr (as_a <gcatch *> (stmt)), ctx);
17052 break;
17053 case GIMPLE_EH_FILTER:
17054 lower_omp (gimple_eh_filter_failure_ptr (stmt), ctx);
17055 break;
17056 case GIMPLE_TRY:
17057 lower_omp (gimple_try_eval_ptr (stmt), ctx);
17058 lower_omp (gimple_try_cleanup_ptr (stmt), ctx);
17059 break;
17060 case GIMPLE_TRANSACTION:
17061 lower_omp (gimple_transaction_body_ptr (
17062 as_a <gtransaction *> (stmt)),
17063 ctx);
17064 break;
17065 case GIMPLE_BIND:
17066 lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
17067 break;
17068 case GIMPLE_OMP_PARALLEL:
17069 case GIMPLE_OMP_TASK:
17070 ctx = maybe_lookup_ctx (stmt);
17071 gcc_assert (ctx);
17072 if (ctx->cancellable)
17073 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
17074 lower_omp_taskreg (gsi_p, ctx);
17075 break;
17076 case GIMPLE_OMP_FOR:
17077 ctx = maybe_lookup_ctx (stmt);
17078 gcc_assert (ctx);
17079 if (ctx->cancellable)
17080 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
17081 lower_omp_for (gsi_p, ctx);
17082 break;
17083 case GIMPLE_OMP_SECTIONS:
17084 ctx = maybe_lookup_ctx (stmt);
17085 gcc_assert (ctx);
17086 if (ctx->cancellable)
17087 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
17088 lower_omp_sections (gsi_p, ctx);
17089 break;
17090 case GIMPLE_OMP_SINGLE:
17091 ctx = maybe_lookup_ctx (stmt);
17092 gcc_assert (ctx);
17093 lower_omp_single (gsi_p, ctx);
17094 break;
17095 case GIMPLE_OMP_MASTER:
17096 ctx = maybe_lookup_ctx (stmt);
17097 gcc_assert (ctx);
17098 lower_omp_master (gsi_p, ctx);
17099 break;
17100 case GIMPLE_OMP_TASKGROUP:
17101 ctx = maybe_lookup_ctx (stmt);
17102 gcc_assert (ctx);
17103 lower_omp_taskgroup (gsi_p, ctx);
17104 break;
17105 case GIMPLE_OMP_ORDERED:
17106 ctx = maybe_lookup_ctx (stmt);
17107 gcc_assert (ctx);
17108 lower_omp_ordered (gsi_p, ctx);
17109 break;
17110 case GIMPLE_OMP_CRITICAL:
17111 ctx = maybe_lookup_ctx (stmt);
17112 gcc_assert (ctx);
17113 lower_omp_critical (gsi_p, ctx);
17114 break;
17115 case GIMPLE_OMP_ATOMIC_LOAD:
17116 if ((ctx || task_shared_vars)
17117 && walk_tree (gimple_omp_atomic_load_rhs_ptr (
17118 as_a <gomp_atomic_load *> (stmt)),
17119 lower_omp_regimplify_p, ctx ? NULL : &wi, NULL))
17120 lower_omp_regimplify_operands (ctx, stmt, gsi_p);
17121 break;
17122 case GIMPLE_OMP_TARGET:
17123 ctx = maybe_lookup_ctx (stmt);
17124 gcc_assert (ctx);
17125 lower_omp_target (gsi_p, ctx);
17126 break;
17127 case GIMPLE_OMP_TEAMS:
17128 ctx = maybe_lookup_ctx (stmt);
17129 gcc_assert (ctx);
17130 lower_omp_teams (gsi_p, ctx);
17131 break;
17132 case GIMPLE_OMP_GRID_BODY:
17133 ctx = maybe_lookup_ctx (stmt);
17134 gcc_assert (ctx);
17135 lower_omp_grid_body (gsi_p, ctx);
17136 break;
17137 case GIMPLE_CALL:
17138 tree fndecl;
17139 call_stmt = as_a <gcall *> (stmt);
17140 fndecl = gimple_call_fndecl (call_stmt);
17141 if (fndecl
17142 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
17143 switch (DECL_FUNCTION_CODE (fndecl))
17145 case BUILT_IN_GOMP_BARRIER:
17146 if (ctx == NULL)
17147 break;
17148 /* FALLTHRU */
17149 case BUILT_IN_GOMP_CANCEL:
17150 case BUILT_IN_GOMP_CANCELLATION_POINT:
17151 omp_context *cctx;
17152 cctx = ctx;
17153 if (gimple_code (cctx->stmt) == GIMPLE_OMP_SECTION)
17154 cctx = cctx->outer;
17155 gcc_assert (gimple_call_lhs (call_stmt) == NULL_TREE);
17156 if (!cctx->cancellable)
17158 if (DECL_FUNCTION_CODE (fndecl)
17159 == BUILT_IN_GOMP_CANCELLATION_POINT)
17161 stmt = gimple_build_nop ();
17162 gsi_replace (gsi_p, stmt, false);
17164 break;
17166 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER)
17168 fndecl = builtin_decl_explicit (BUILT_IN_GOMP_BARRIER_CANCEL);
17169 gimple_call_set_fndecl (call_stmt, fndecl);
17170 gimple_call_set_fntype (call_stmt, TREE_TYPE (fndecl));
17172 tree lhs;
17173 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (fndecl)));
17174 gimple_call_set_lhs (call_stmt, lhs);
17175 tree fallthru_label;
17176 fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
17177 gimple *g;
17178 g = gimple_build_label (fallthru_label);
17179 gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
17180 g = gimple_build_cond (NE_EXPR, lhs,
17181 fold_convert (TREE_TYPE (lhs),
17182 boolean_false_node),
17183 cctx->cancel_label, fallthru_label);
17184 gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
17185 break;
17186 default:
17187 break;
17189 /* FALLTHRU */
17190 default:
17191 if ((ctx || task_shared_vars)
17192 && walk_gimple_op (stmt, lower_omp_regimplify_p,
17193 ctx ? NULL : &wi))
17195 /* Just remove clobbers, this should happen only if we have
17196 "privatized" local addressable variables in SIMD regions,
17197 the clobber isn't needed in that case and gimplifying address
17198 of the ARRAY_REF into a pointer and creating MEM_REF based
17199 clobber would create worse code than we get with the clobber
17200 dropped. */
17201 if (gimple_clobber_p (stmt))
17203 gsi_replace (gsi_p, gimple_build_nop (), true);
17204 break;
17206 lower_omp_regimplify_operands (ctx, stmt, gsi_p);
17208 break;
17212 static void
17213 lower_omp (gimple_seq *body, omp_context *ctx)
17215 location_t saved_location = input_location;
17216 gimple_stmt_iterator gsi;
17217 for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
17218 lower_omp_1 (&gsi, ctx);
17219 /* During gimplification, we haven't folded statments inside offloading
17220 or taskreg regions (gimplify.c:maybe_fold_stmt); do that now. */
17221 if (target_nesting_level || taskreg_nesting_level)
17222 for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
17223 fold_stmt (&gsi);
17224 input_location = saved_location;
17227 /* Returen true if STMT is an assignment of a register-type into a local
17228 VAR_DECL. */
17230 static bool
17231 grid_reg_assignment_to_local_var_p (gimple *stmt)
17233 gassign *assign = dyn_cast <gassign *> (stmt);
17234 if (!assign)
17235 return false;
17236 tree lhs = gimple_assign_lhs (assign);
17237 if (TREE_CODE (lhs) != VAR_DECL
17238 || !is_gimple_reg_type (TREE_TYPE (lhs))
17239 || is_global_var (lhs))
17240 return false;
17241 return true;
17244 /* Return true if all statements in SEQ are assignments to local register-type
17245 variables. */
17247 static bool
17248 grid_seq_only_contains_local_assignments (gimple_seq seq)
17250 if (!seq)
17251 return true;
17253 gimple_stmt_iterator gsi;
17254 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
17255 if (!grid_reg_assignment_to_local_var_p (gsi_stmt (gsi)))
17256 return false;
17257 return true;
17260 /* Scan statements in SEQ and call itself recursively on any bind. If during
17261 whole search only assignments to register-type local variables and one
17262 single OMP statement is encountered, return true, otherwise return false.
17263 RET is where we store any OMP statement encountered. TARGET_LOC and NAME
17264 are used for dumping a note about a failure. */
17266 static bool
17267 grid_find_single_omp_among_assignments_1 (gimple_seq seq, location_t target_loc,
17268 const char *name, gimple **ret)
17270 gimple_stmt_iterator gsi;
17271 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
17273 gimple *stmt = gsi_stmt (gsi);
17275 if (grid_reg_assignment_to_local_var_p (stmt))
17276 continue;
17277 if (gbind *bind = dyn_cast <gbind *> (stmt))
17279 if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind),
17280 target_loc, name, ret))
17281 return false;
17283 else if (is_gimple_omp (stmt))
17285 if (*ret)
17287 if (dump_enabled_p ())
17288 dump_printf_loc (MSG_NOTE, target_loc,
17289 "Will not turn target construct into a simple "
17290 "GPGPU kernel because %s construct contains "
17291 "multiple OpenMP constructs\n", name);
17292 return false;
17294 *ret = stmt;
17296 else
17298 if (dump_enabled_p ())
17299 dump_printf_loc (MSG_NOTE, target_loc,
17300 "Will not turn target construct into a simple "
17301 "GPGPU kernel because %s construct contains "
17302 "a complex statement\n", name);
17303 return false;
17306 return true;
17309 /* Scan statements in SEQ and make sure that it and any binds in it contain
17310 only assignments to local register-type variables and one OMP construct. If
17311 so, return that construct, otherwise return NULL. If dumping is enabled and
17312 function fails, use TARGET_LOC and NAME to dump a note with the reason for
17313 failure. */
17315 static gimple *
17316 grid_find_single_omp_among_assignments (gimple_seq seq, location_t target_loc,
17317 const char *name)
17319 if (!seq)
17321 if (dump_enabled_p ())
17322 dump_printf_loc (MSG_NOTE, target_loc,
17323 "Will not turn target construct into a simple "
17324 "GPGPU kernel because %s construct has empty "
17325 "body\n",
17326 name);
17327 return NULL;
17330 gimple *ret = NULL;
17331 if (grid_find_single_omp_among_assignments_1 (seq, target_loc, name, &ret))
17333 if (!ret && dump_enabled_p ())
17334 dump_printf_loc (MSG_NOTE, target_loc,
17335 "Will not turn target construct into a simple "
17336 "GPGPU kernel because %s construct does not contain"
17337 "any other OpenMP construct\n", name);
17338 return ret;
17340 else
17341 return NULL;
17344 /* Walker function looking for statements there is no point gridifying (and for
17345 noreturn function calls which we cannot do). Return non-NULL if such a
17346 function is found. */
17348 static tree
17349 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
17350 bool *handled_ops_p,
17351 struct walk_stmt_info *wi)
17353 *handled_ops_p = false;
17354 gimple *stmt = gsi_stmt (*gsi);
17355 switch (gimple_code (stmt))
17357 case GIMPLE_CALL:
17358 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
17360 *handled_ops_p = true;
17361 wi->info = stmt;
17362 return error_mark_node;
17364 break;
17366 /* We may reduce the following list if we find a way to implement the
17367 clauses, but now there is no point trying further. */
17368 case GIMPLE_OMP_CRITICAL:
17369 case GIMPLE_OMP_TASKGROUP:
17370 case GIMPLE_OMP_TASK:
17371 case GIMPLE_OMP_SECTION:
17372 case GIMPLE_OMP_SECTIONS:
17373 case GIMPLE_OMP_SECTIONS_SWITCH:
17374 case GIMPLE_OMP_TARGET:
17375 case GIMPLE_OMP_ORDERED:
17376 *handled_ops_p = true;
17377 wi->info = stmt;
17378 return error_mark_node;
17380 case GIMPLE_OMP_FOR:
17381 if ((gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)
17382 && gimple_omp_for_combined_into_p (stmt))
17384 *handled_ops_p = true;
17385 wi->info = stmt;
17386 return error_mark_node;
17388 break;
17390 default:
17391 break;
17393 return NULL;
17397 /* If TARGET follows a pattern that can be turned into a gridified GPGPU
17398 kernel, return true, otherwise return false. In the case of success, also
17399 fill in GROUP_SIZE_P with the requested group size or NULL if there is
17400 none. */
17402 static bool
17403 grid_target_follows_gridifiable_pattern (gomp_target *target, tree *group_size_p)
17405 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
17406 return false;
17408 location_t tloc = gimple_location (target);
17409 gimple *stmt
17410 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
17411 tloc, "target");
17412 if (!stmt)
17413 return false;
17414 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
17415 tree group_size = NULL;
17416 if (!teams)
17418 dump_printf_loc (MSG_NOTE, tloc,
17419 "Will not turn target construct into a simple "
17420 "GPGPU kernel because it does not have a sole teams "
17421 "construct in it.\n");
17422 return false;
17425 tree clauses = gimple_omp_teams_clauses (teams);
17426 while (clauses)
17428 switch (OMP_CLAUSE_CODE (clauses))
17430 case OMP_CLAUSE_NUM_TEAMS:
17431 if (dump_enabled_p ())
17432 dump_printf_loc (MSG_NOTE, tloc,
17433 "Will not turn target construct into a "
17434 "gridified GPGPU kernel because we cannot "
17435 "handle num_teams clause of teams "
17436 "construct\n ");
17437 return false;
17439 case OMP_CLAUSE_REDUCTION:
17440 if (dump_enabled_p ())
17441 dump_printf_loc (MSG_NOTE, tloc,
17442 "Will not turn target construct into a "
17443 "gridified GPGPU kernel because a reduction "
17444 "clause is present\n ");
17445 return false;
17447 case OMP_CLAUSE_LASTPRIVATE:
17448 if (dump_enabled_p ())
17449 dump_printf_loc (MSG_NOTE, tloc,
17450 "Will not turn target construct into a "
17451 "gridified GPGPU kernel because a lastprivate "
17452 "clause is present\n ");
17453 return false;
17455 case OMP_CLAUSE_THREAD_LIMIT:
17456 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
17457 break;
17459 default:
17460 break;
17462 clauses = OMP_CLAUSE_CHAIN (clauses);
17465 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), tloc,
17466 "teams");
17467 if (!stmt)
17468 return false;
17469 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
17470 if (!dist)
17472 dump_printf_loc (MSG_NOTE, tloc,
17473 "Will not turn target construct into a simple "
17474 "GPGPU kernel because the teams construct does not have "
17475 "a sole distribute construct in it.\n");
17476 return false;
17479 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
17480 if (!gimple_omp_for_combined_p (dist))
17482 if (dump_enabled_p ())
17483 dump_printf_loc (MSG_NOTE, tloc,
17484 "Will not turn target construct into a gridified GPGPU "
17485 "kernel because we cannot handle a standalone "
17486 "distribute construct\n ");
17487 return false;
17489 if (dist->collapse > 1)
17491 if (dump_enabled_p ())
17492 dump_printf_loc (MSG_NOTE, tloc,
17493 "Will not turn target construct into a gridified GPGPU "
17494 "kernel because the distribute construct contains "
17495 "collapse clause\n");
17496 return false;
17498 struct omp_for_data fd;
17499 extract_omp_for_data (dist, &fd, NULL);
17500 if (fd.chunk_size)
17502 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
17504 if (dump_enabled_p ())
17505 dump_printf_loc (MSG_NOTE, tloc,
17506 "Will not turn target construct into a "
17507 "gridified GPGPU kernel because the teams "
17508 "thread limit is different from distribute "
17509 "schedule chunk\n");
17510 return false;
17512 group_size = fd.chunk_size;
17514 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), tloc,
17515 "distribute");
17516 gomp_parallel *par;
17517 if (!stmt || !(par = dyn_cast <gomp_parallel *> (stmt)))
17518 return false;
17520 clauses = gimple_omp_parallel_clauses (par);
17521 while (clauses)
17523 switch (OMP_CLAUSE_CODE (clauses))
17525 case OMP_CLAUSE_NUM_THREADS:
17526 if (dump_enabled_p ())
17527 dump_printf_loc (MSG_NOTE, tloc,
17528 "Will not turn target construct into a gridified"
17529 "GPGPU kernel because there is a num_threads "
17530 "clause of the parallel construct\n");
17531 return false;
17533 case OMP_CLAUSE_REDUCTION:
17534 if (dump_enabled_p ())
17535 dump_printf_loc (MSG_NOTE, tloc,
17536 "Will not turn target construct into a "
17537 "gridified GPGPU kernel because a reduction "
17538 "clause is present\n ");
17539 return false;
17541 case OMP_CLAUSE_LASTPRIVATE:
17542 if (dump_enabled_p ())
17543 dump_printf_loc (MSG_NOTE, tloc,
17544 "Will not turn target construct into a "
17545 "gridified GPGPU kernel because a lastprivate "
17546 "clause is present\n ");
17547 return false;
17549 default:
17550 break;
17552 clauses = OMP_CLAUSE_CHAIN (clauses);
17555 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), tloc,
17556 "parallel");
17557 gomp_for *gfor;
17558 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
17559 return false;
17561 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
17563 if (dump_enabled_p ())
17564 dump_printf_loc (MSG_NOTE, tloc,
17565 "Will not turn target construct into a gridified GPGPU "
17566 "kernel because the inner loop is not a simple for "
17567 "loop\n");
17568 return false;
17570 if (gfor->collapse > 1)
17572 if (dump_enabled_p ())
17573 dump_printf_loc (MSG_NOTE, tloc,
17574 "Will not turn target construct into a gridified GPGPU "
17575 "kernel because the inner loop contains collapse "
17576 "clause\n");
17577 return false;
17580 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor)))
17582 if (dump_enabled_p ())
17583 dump_printf_loc (MSG_NOTE, tloc,
17584 "Will not turn target construct into a gridified GPGPU "
17585 "kernel because the inner loop pre_body contains"
17586 "a complex instruction\n");
17587 return false;
17590 clauses = gimple_omp_for_clauses (gfor);
17591 while (clauses)
17593 switch (OMP_CLAUSE_CODE (clauses))
17595 case OMP_CLAUSE_SCHEDULE:
17596 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
17598 if (dump_enabled_p ())
17599 dump_printf_loc (MSG_NOTE, tloc,
17600 "Will not turn target construct into a "
17601 "gridified GPGPU kernel because the inner "
17602 "loop has a non-automatic scheduling clause\n");
17603 return false;
17605 break;
17607 case OMP_CLAUSE_REDUCTION:
17608 if (dump_enabled_p ())
17609 dump_printf_loc (MSG_NOTE, tloc,
17610 "Will not turn target construct into a "
17611 "gridified GPGPU kernel because a reduction "
17612 "clause is present\n ");
17613 return false;
17615 case OMP_CLAUSE_LASTPRIVATE:
17616 if (dump_enabled_p ())
17617 dump_printf_loc (MSG_NOTE, tloc,
17618 "Will not turn target construct into a "
17619 "gridified GPGPU kernel because a lastprivate "
17620 "clause is present\n ");
17621 return false;
17623 default:
17624 break;
17626 clauses = OMP_CLAUSE_CHAIN (clauses);
17629 struct walk_stmt_info wi;
17630 memset (&wi, 0, sizeof (wi));
17631 if (walk_gimple_seq (gimple_omp_body (gfor),
17632 grid_find_ungridifiable_statement,
17633 NULL, &wi))
17635 gimple *bad = (gimple *) wi.info;
17636 if (dump_enabled_p ())
17638 if (is_gimple_call (bad))
17639 dump_printf_loc (MSG_NOTE, tloc,
17640 "Will not turn target construct into a gridified "
17641 " GPGPU kernel because the inner loop contains "
17642 "call to a noreturn function\n");
17643 if (gimple_code (bad) == GIMPLE_OMP_FOR)
17644 dump_printf_loc (MSG_NOTE, tloc,
17645 "Will not turn target construct into a gridified "
17646 " GPGPU kernel because the inner loop contains "
17647 "a simd construct\n");
17648 else
17649 dump_printf_loc (MSG_NOTE, tloc,
17650 "Will not turn target construct into a gridified "
17651 "GPGPU kernel because the inner loop contains "
17652 "statement %s which cannot be transformed\n",
17653 gimple_code_name[(int) gimple_code (bad)]);
17655 return false;
17658 *group_size_p = group_size;
17659 return true;
17662 /* Operand walker, used to remap pre-body declarations according to a hash map
17663 provided in DATA. */
17665 static tree
17666 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
17668 tree t = *tp;
17670 if (DECL_P (t) || TYPE_P (t))
17671 *walk_subtrees = 0;
17672 else
17673 *walk_subtrees = 1;
17675 if (TREE_CODE (t) == VAR_DECL)
17677 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
17678 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
17679 tree *repl = declmap->get (t);
17680 if (repl)
17681 *tp = *repl;
17683 return NULL_TREE;
17686 /* Copy leading register-type assignments to local variables in SRC to just
17687 before DST, Creating temporaries, adjusting mapping of operands in WI and
17688 remapping operands as necessary. Add any new temporaries to TGT_BIND.
17689 Return the first statement that does not conform to
17690 grid_reg_assignment_to_local_var_p or NULL. */
17692 static gimple *
17693 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
17694 gbind *tgt_bind, struct walk_stmt_info *wi)
17696 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
17697 gimple_stmt_iterator gsi;
17698 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
17700 gimple *stmt = gsi_stmt (gsi);
17701 if (gbind *bind = dyn_cast <gbind *> (stmt))
17703 gimple *r = grid_copy_leading_local_assignments
17704 (gimple_bind_body (bind), dst, tgt_bind, wi);
17705 if (r)
17706 return r;
17707 else
17708 continue;
17710 if (!grid_reg_assignment_to_local_var_p (stmt))
17711 return stmt;
17712 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
17713 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
17714 TREE_TYPE (lhs));
17715 DECL_CONTEXT (repl) = current_function_decl;
17716 gimple_bind_append_vars (tgt_bind, repl);
17718 declmap->put (lhs, repl);
17719 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
17720 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
17721 gsi_insert_before (dst, copy, GSI_SAME_STMT);
17723 return NULL;
17726 /* Given freshly copied top level kernel SEQ, identify the individual OMP
17727 components, mark them as part of kernel and return the inner loop, and copy
17728 assignment leading to them just before DST, remapping them using WI and
17729 adding new temporaries to TGT_BIND. */
17731 static gomp_for *
17732 grid_process_kernel_body_copy (gimple_seq seq, gimple_stmt_iterator *dst,
17733 gbind *tgt_bind, struct walk_stmt_info *wi)
17735 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, wi);
17736 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
17737 gcc_assert (teams);
17738 gimple_omp_teams_set_grid_phony (teams, true);
17739 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
17740 tgt_bind, wi);
17741 gcc_checking_assert (stmt);
17742 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
17743 gcc_assert (dist);
17744 gimple_seq prebody = gimple_omp_for_pre_body (dist);
17745 if (prebody)
17746 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, wi);
17747 gimple_omp_for_set_grid_phony (dist, true);
17748 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
17749 tgt_bind, wi);
17750 gcc_checking_assert (stmt);
17752 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
17753 gimple_omp_parallel_set_grid_phony (parallel, true);
17754 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), dst,
17755 tgt_bind, wi);
17756 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
17757 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
17758 prebody = gimple_omp_for_pre_body (inner_loop);
17759 if (prebody)
17760 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, wi);
17762 return inner_loop;
17765 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
17766 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
17767 is the bind into which temporaries inserted before TARGET should be
17768 added. */
17770 static void
17771 grid_attempt_target_gridification (gomp_target *target,
17772 gimple_stmt_iterator *gsi,
17773 gbind *tgt_bind)
17775 tree group_size;
17776 if (!target || !grid_target_follows_gridifiable_pattern (target, &group_size))
17777 return;
17779 location_t loc = gimple_location (target);
17780 if (dump_enabled_p ())
17781 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
17782 "Target construct will be turned into a gridified GPGPU "
17783 "kernel\n");
17785 /* Copy target body to a GPUKERNEL construct: */
17786 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
17787 (gimple_omp_body (target));
17789 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
17790 struct walk_stmt_info wi;
17791 memset (&wi, 0, sizeof (struct walk_stmt_info));
17792 wi.info = declmap;
17794 /* Copy assignments in between OMP statements before target, mark OMP
17795 statements within copy appropriatly. */
17796 gomp_for *inner_loop = grid_process_kernel_body_copy (kernel_seq, gsi,
17797 tgt_bind, &wi);
17799 gbind *old_bind = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
17800 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
17801 tree new_block = gimple_bind_block (new_bind);
17802 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
17803 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
17804 BLOCK_SUBBLOCKS (enc_block) = new_block;
17805 BLOCK_SUPERCONTEXT (new_block) = enc_block;
17806 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
17807 gimple_seq_add_stmt
17808 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
17809 gpukernel);
17811 walk_tree (&group_size, grid_remap_prebody_decls, &wi, NULL);
17812 push_gimplify_context ();
17813 size_t collapse = gimple_omp_for_collapse (inner_loop);
17814 for (size_t i = 0; i < collapse; i++)
17816 tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
17817 if (POINTER_TYPE_P (type))
17818 itype = signed_type_for (type);
17819 else
17820 itype = type;
17822 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
17823 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
17824 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
17825 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
17826 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
17827 adjust_for_condition (loc, &cond_code, &n2);
17828 tree step;
17829 step = get_omp_for_step_from_incr (loc,
17830 gimple_omp_for_incr (inner_loop, i));
17831 gimple_seq tmpseq = NULL;
17832 n1 = fold_convert (itype, n1);
17833 n2 = fold_convert (itype, n2);
17834 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
17835 t = fold_build2 (PLUS_EXPR, itype, step, t);
17836 t = fold_build2 (PLUS_EXPR, itype, t, n2);
17837 t = fold_build2 (MINUS_EXPR, itype, t, n1);
17838 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
17839 t = fold_build2 (TRUNC_DIV_EXPR, itype,
17840 fold_build1 (NEGATE_EXPR, itype, t),
17841 fold_build1 (NEGATE_EXPR, itype, step));
17842 else
17843 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
17844 tree gs = fold_convert (uint32_type_node, t);
17845 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
17846 if (!gimple_seq_empty_p (tmpseq))
17847 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
17849 tree ws;
17850 if (i == 0 && group_size)
17852 ws = fold_convert (uint32_type_node, group_size);
17853 tmpseq = NULL;
17854 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
17855 if (!gimple_seq_empty_p (tmpseq))
17856 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
17858 else
17859 ws = build_zero_cst (uint32_type_node);
17861 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
17862 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
17863 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
17864 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
17865 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
17866 gimple_omp_target_set_clauses (target, c);
17868 pop_gimplify_context (tgt_bind);
17869 delete declmap;
17870 return;
17873 /* Walker function doing all the work for create_target_kernels. */
17875 static tree
17876 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
17877 bool *handled_ops_p,
17878 struct walk_stmt_info *incoming)
17880 *handled_ops_p = false;
17882 gimple *stmt = gsi_stmt (*gsi);
17883 gomp_target *target = dyn_cast <gomp_target *> (stmt);
17884 if (target)
17886 gbind *tgt_bind = (gbind *) incoming->info;
17887 gcc_checking_assert (tgt_bind);
17888 grid_attempt_target_gridification (target, gsi, tgt_bind);
17889 return NULL_TREE;
17891 gbind *bind = dyn_cast <gbind *> (stmt);
17892 if (bind)
17894 *handled_ops_p = true;
17895 struct walk_stmt_info wi;
17896 memset (&wi, 0, sizeof (wi));
17897 wi.info = bind;
17898 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
17899 grid_gridify_all_targets_stmt, NULL, &wi);
17901 return NULL_TREE;
17904 /* Attempt to gridify all target constructs in BODY_P. All such targets will
17905 have their bodies duplicated, with the new copy being put into a
17906 gimple_omp_grid_body statement. All kernel-related construct within the
17907 grid_body will be marked with phony flags or kernel kinds. Moreover, some
17908 re-structuring is often needed, such as copying pre-bodies before the target
17909 construct so that kernel grid sizes can be computed. */
17911 static void
17912 grid_gridify_all_targets (gimple_seq *body_p)
17914 struct walk_stmt_info wi;
17915 memset (&wi, 0, sizeof (wi));
17916 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
17920 /* Main entry point. */
17922 static unsigned int
17923 execute_lower_omp (void)
17925 gimple_seq body;
17926 int i;
17927 omp_context *ctx;
17929 /* This pass always runs, to provide PROP_gimple_lomp.
17930 But often, there is nothing to do. */
17931 if (flag_cilkplus == 0 && flag_openacc == 0 && flag_openmp == 0
17932 && flag_openmp_simd == 0)
17933 return 0;
17935 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
17936 delete_omp_context);
17938 body = gimple_body (current_function_decl);
17940 if (hsa_gen_requested_p ())
17941 grid_gridify_all_targets (&body);
17943 scan_omp (&body, NULL);
17944 gcc_assert (taskreg_nesting_level == 0);
17945 FOR_EACH_VEC_ELT (taskreg_contexts, i, ctx)
17946 finish_taskreg_scan (ctx);
17947 taskreg_contexts.release ();
17949 if (all_contexts->root)
17951 if (task_shared_vars)
17952 push_gimplify_context ();
17953 lower_omp (&body, NULL);
17954 if (task_shared_vars)
17955 pop_gimplify_context (NULL);
17958 if (all_contexts)
17960 splay_tree_delete (all_contexts);
17961 all_contexts = NULL;
17963 BITMAP_FREE (task_shared_vars);
17964 return 0;
17967 namespace {
17969 const pass_data pass_data_lower_omp =
17971 GIMPLE_PASS, /* type */
17972 "omplower", /* name */
17973 OPTGROUP_NONE, /* optinfo_flags */
17974 TV_NONE, /* tv_id */
17975 PROP_gimple_any, /* properties_required */
17976 PROP_gimple_lomp, /* properties_provided */
17977 0, /* properties_destroyed */
17978 0, /* todo_flags_start */
17979 0, /* todo_flags_finish */
17982 class pass_lower_omp : public gimple_opt_pass
17984 public:
17985 pass_lower_omp (gcc::context *ctxt)
17986 : gimple_opt_pass (pass_data_lower_omp, ctxt)
17989 /* opt_pass methods: */
17990 virtual unsigned int execute (function *) { return execute_lower_omp (); }
17992 }; // class pass_lower_omp
17994 } // anon namespace
17996 gimple_opt_pass *
17997 make_pass_lower_omp (gcc::context *ctxt)
17999 return new pass_lower_omp (ctxt);
18002 /* The following is a utility to diagnose structured block violations.
18003 It is not part of the "omplower" pass, as that's invoked too late. It
18004 should be invoked by the respective front ends after gimplification. */
18006 static splay_tree all_labels;
18008 /* Check for mismatched contexts and generate an error if needed. Return
18009 true if an error is detected. */
18011 static bool
18012 diagnose_sb_0 (gimple_stmt_iterator *gsi_p,
18013 gimple *branch_ctx, gimple *label_ctx)
18015 gcc_checking_assert (!branch_ctx || is_gimple_omp (branch_ctx));
18016 gcc_checking_assert (!label_ctx || is_gimple_omp (label_ctx));
18018 if (label_ctx == branch_ctx)
18019 return false;
18021 const char* kind = NULL;
18023 if (flag_cilkplus)
18025 if ((branch_ctx
18026 && gimple_code (branch_ctx) == GIMPLE_OMP_FOR
18027 && gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD)
18028 || (label_ctx
18029 && gimple_code (label_ctx) == GIMPLE_OMP_FOR
18030 && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD))
18031 kind = "Cilk Plus";
18033 if (flag_openacc)
18035 if ((branch_ctx && is_gimple_omp_oacc (branch_ctx))
18036 || (label_ctx && is_gimple_omp_oacc (label_ctx)))
18038 gcc_checking_assert (kind == NULL);
18039 kind = "OpenACC";
18042 if (kind == NULL)
18044 gcc_checking_assert (flag_openmp);
18045 kind = "OpenMP";
18049 Previously we kept track of the label's entire context in diagnose_sb_[12]
18050 so we could traverse it and issue a correct "exit" or "enter" error
18051 message upon a structured block violation.
18053 We built the context by building a list with tree_cons'ing, but there is
18054 no easy counterpart in gimple tuples. It seems like far too much work
18055 for issuing exit/enter error messages. If someone really misses the
18056 distinct error message... patches welcome.
18059 #if 0
18060 /* Try to avoid confusing the user by producing and error message
18061 with correct "exit" or "enter" verbiage. We prefer "exit"
18062 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
18063 if (branch_ctx == NULL)
18064 exit_p = false;
18065 else
18067 while (label_ctx)
18069 if (TREE_VALUE (label_ctx) == branch_ctx)
18071 exit_p = false;
18072 break;
18074 label_ctx = TREE_CHAIN (label_ctx);
18078 if (exit_p)
18079 error ("invalid exit from %s structured block", kind);
18080 else
18081 error ("invalid entry to %s structured block", kind);
18082 #endif
18084 /* If it's obvious we have an invalid entry, be specific about the error. */
18085 if (branch_ctx == NULL)
18086 error ("invalid entry to %s structured block", kind);
18087 else
18089 /* Otherwise, be vague and lazy, but efficient. */
18090 error ("invalid branch to/from %s structured block", kind);
18093 gsi_replace (gsi_p, gimple_build_nop (), false);
18094 return true;
18097 /* Pass 1: Create a minimal tree of structured blocks, and record
18098 where each label is found. */
18100 static tree
18101 diagnose_sb_1 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
18102 struct walk_stmt_info *wi)
18104 gimple *context = (gimple *) wi->info;
18105 gimple *inner_context;
18106 gimple *stmt = gsi_stmt (*gsi_p);
18108 *handled_ops_p = true;
18110 switch (gimple_code (stmt))
18112 WALK_SUBSTMTS;
18114 case GIMPLE_OMP_PARALLEL:
18115 case GIMPLE_OMP_TASK:
18116 case GIMPLE_OMP_SECTIONS:
18117 case GIMPLE_OMP_SINGLE:
18118 case GIMPLE_OMP_SECTION:
18119 case GIMPLE_OMP_MASTER:
18120 case GIMPLE_OMP_ORDERED:
18121 case GIMPLE_OMP_CRITICAL:
18122 case GIMPLE_OMP_TARGET:
18123 case GIMPLE_OMP_TEAMS:
18124 case GIMPLE_OMP_TASKGROUP:
18125 /* The minimal context here is just the current OMP construct. */
18126 inner_context = stmt;
18127 wi->info = inner_context;
18128 walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
18129 wi->info = context;
18130 break;
18132 case GIMPLE_OMP_FOR:
18133 inner_context = stmt;
18134 wi->info = inner_context;
18135 /* gimple_omp_for_{index,initial,final} are all DECLs; no need to
18136 walk them. */
18137 walk_gimple_seq (gimple_omp_for_pre_body (stmt),
18138 diagnose_sb_1, NULL, wi);
18139 walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
18140 wi->info = context;
18141 break;
18143 case GIMPLE_LABEL:
18144 splay_tree_insert (all_labels,
18145 (splay_tree_key) gimple_label_label (
18146 as_a <glabel *> (stmt)),
18147 (splay_tree_value) context);
18148 break;
18150 default:
18151 break;
18154 return NULL_TREE;
18157 /* Pass 2: Check each branch and see if its context differs from that of
18158 the destination label's context. */
18160 static tree
18161 diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
18162 struct walk_stmt_info *wi)
18164 gimple *context = (gimple *) wi->info;
18165 splay_tree_node n;
18166 gimple *stmt = gsi_stmt (*gsi_p);
18168 *handled_ops_p = true;
18170 switch (gimple_code (stmt))
18172 WALK_SUBSTMTS;
18174 case GIMPLE_OMP_PARALLEL:
18175 case GIMPLE_OMP_TASK:
18176 case GIMPLE_OMP_SECTIONS:
18177 case GIMPLE_OMP_SINGLE:
18178 case GIMPLE_OMP_SECTION:
18179 case GIMPLE_OMP_MASTER:
18180 case GIMPLE_OMP_ORDERED:
18181 case GIMPLE_OMP_CRITICAL:
18182 case GIMPLE_OMP_TARGET:
18183 case GIMPLE_OMP_TEAMS:
18184 case GIMPLE_OMP_TASKGROUP:
18185 wi->info = stmt;
18186 walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), diagnose_sb_2, NULL, wi);
18187 wi->info = context;
18188 break;
18190 case GIMPLE_OMP_FOR:
18191 wi->info = stmt;
18192 /* gimple_omp_for_{index,initial,final} are all DECLs; no need to
18193 walk them. */
18194 walk_gimple_seq_mod (gimple_omp_for_pre_body_ptr (stmt),
18195 diagnose_sb_2, NULL, wi);
18196 walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), diagnose_sb_2, NULL, wi);
18197 wi->info = context;
18198 break;
18200 case GIMPLE_COND:
18202 gcond *cond_stmt = as_a <gcond *> (stmt);
18203 tree lab = gimple_cond_true_label (cond_stmt);
18204 if (lab)
18206 n = splay_tree_lookup (all_labels,
18207 (splay_tree_key) lab);
18208 diagnose_sb_0 (gsi_p, context,
18209 n ? (gimple *) n->value : NULL);
18211 lab = gimple_cond_false_label (cond_stmt);
18212 if (lab)
18214 n = splay_tree_lookup (all_labels,
18215 (splay_tree_key) lab);
18216 diagnose_sb_0 (gsi_p, context,
18217 n ? (gimple *) n->value : NULL);
18220 break;
18222 case GIMPLE_GOTO:
18224 tree lab = gimple_goto_dest (stmt);
18225 if (TREE_CODE (lab) != LABEL_DECL)
18226 break;
18228 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
18229 diagnose_sb_0 (gsi_p, context, n ? (gimple *) n->value : NULL);
18231 break;
18233 case GIMPLE_SWITCH:
18235 gswitch *switch_stmt = as_a <gswitch *> (stmt);
18236 unsigned int i;
18237 for (i = 0; i < gimple_switch_num_labels (switch_stmt); ++i)
18239 tree lab = CASE_LABEL (gimple_switch_label (switch_stmt, i));
18240 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
18241 if (n && diagnose_sb_0 (gsi_p, context, (gimple *) n->value))
18242 break;
18245 break;
18247 case GIMPLE_RETURN:
18248 diagnose_sb_0 (gsi_p, context, NULL);
18249 break;
18251 default:
18252 break;
18255 return NULL_TREE;
18258 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
18259 GIMPLE_* codes. */
18260 bool
18261 make_gimple_omp_edges (basic_block bb, struct omp_region **region,
18262 int *region_idx)
18264 gimple *last = last_stmt (bb);
18265 enum gimple_code code = gimple_code (last);
18266 struct omp_region *cur_region = *region;
18267 bool fallthru = false;
18269 switch (code)
18271 case GIMPLE_OMP_PARALLEL:
18272 case GIMPLE_OMP_TASK:
18273 case GIMPLE_OMP_FOR:
18274 case GIMPLE_OMP_SINGLE:
18275 case GIMPLE_OMP_TEAMS:
18276 case GIMPLE_OMP_MASTER:
18277 case GIMPLE_OMP_TASKGROUP:
18278 case GIMPLE_OMP_CRITICAL:
18279 case GIMPLE_OMP_SECTION:
18280 case GIMPLE_OMP_GRID_BODY:
18281 cur_region = new_omp_region (bb, code, cur_region);
18282 fallthru = true;
18283 break;
18285 case GIMPLE_OMP_ORDERED:
18286 cur_region = new_omp_region (bb, code, cur_region);
18287 fallthru = true;
18288 if (find_omp_clause (gimple_omp_ordered_clauses
18289 (as_a <gomp_ordered *> (last)),
18290 OMP_CLAUSE_DEPEND))
18291 cur_region = cur_region->outer;
18292 break;
18294 case GIMPLE_OMP_TARGET:
18295 cur_region = new_omp_region (bb, code, cur_region);
18296 fallthru = true;
18297 switch (gimple_omp_target_kind (last))
18299 case GF_OMP_TARGET_KIND_REGION:
18300 case GF_OMP_TARGET_KIND_DATA:
18301 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
18302 case GF_OMP_TARGET_KIND_OACC_KERNELS:
18303 case GF_OMP_TARGET_KIND_OACC_DATA:
18304 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
18305 break;
18306 case GF_OMP_TARGET_KIND_UPDATE:
18307 case GF_OMP_TARGET_KIND_ENTER_DATA:
18308 case GF_OMP_TARGET_KIND_EXIT_DATA:
18309 case GF_OMP_TARGET_KIND_OACC_UPDATE:
18310 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
18311 case GF_OMP_TARGET_KIND_OACC_DECLARE:
18312 cur_region = cur_region->outer;
18313 break;
18314 default:
18315 gcc_unreachable ();
18317 break;
18319 case GIMPLE_OMP_SECTIONS:
18320 cur_region = new_omp_region (bb, code, cur_region);
18321 fallthru = true;
18322 break;
18324 case GIMPLE_OMP_SECTIONS_SWITCH:
18325 fallthru = false;
18326 break;
18328 case GIMPLE_OMP_ATOMIC_LOAD:
18329 case GIMPLE_OMP_ATOMIC_STORE:
18330 fallthru = true;
18331 break;
18333 case GIMPLE_OMP_RETURN:
18334 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
18335 somewhere other than the next block. This will be
18336 created later. */
18337 cur_region->exit = bb;
18338 if (cur_region->type == GIMPLE_OMP_TASK)
18339 /* Add an edge corresponding to not scheduling the task
18340 immediately. */
18341 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
18342 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
18343 cur_region = cur_region->outer;
18344 break;
18346 case GIMPLE_OMP_CONTINUE:
18347 cur_region->cont = bb;
18348 switch (cur_region->type)
18350 case GIMPLE_OMP_FOR:
18351 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
18352 succs edges as abnormal to prevent splitting
18353 them. */
18354 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
18355 /* Make the loopback edge. */
18356 make_edge (bb, single_succ (cur_region->entry),
18357 EDGE_ABNORMAL);
18359 /* Create an edge from GIMPLE_OMP_FOR to exit, which
18360 corresponds to the case that the body of the loop
18361 is not executed at all. */
18362 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
18363 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
18364 fallthru = false;
18365 break;
18367 case GIMPLE_OMP_SECTIONS:
18368 /* Wire up the edges into and out of the nested sections. */
18370 basic_block switch_bb = single_succ (cur_region->entry);
18372 struct omp_region *i;
18373 for (i = cur_region->inner; i ; i = i->next)
18375 gcc_assert (i->type == GIMPLE_OMP_SECTION);
18376 make_edge (switch_bb, i->entry, 0);
18377 make_edge (i->exit, bb, EDGE_FALLTHRU);
18380 /* Make the loopback edge to the block with
18381 GIMPLE_OMP_SECTIONS_SWITCH. */
18382 make_edge (bb, switch_bb, 0);
18384 /* Make the edge from the switch to exit. */
18385 make_edge (switch_bb, bb->next_bb, 0);
18386 fallthru = false;
18388 break;
18390 case GIMPLE_OMP_TASK:
18391 fallthru = true;
18392 break;
18394 default:
18395 gcc_unreachable ();
18397 break;
18399 default:
18400 gcc_unreachable ();
18403 if (*region != cur_region)
18405 *region = cur_region;
18406 if (cur_region)
18407 *region_idx = cur_region->entry->index;
18408 else
18409 *region_idx = 0;
18412 return fallthru;
18415 static unsigned int
18416 diagnose_omp_structured_block_errors (void)
18418 struct walk_stmt_info wi;
18419 gimple_seq body = gimple_body (current_function_decl);
18421 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
18423 memset (&wi, 0, sizeof (wi));
18424 walk_gimple_seq (body, diagnose_sb_1, NULL, &wi);
18426 memset (&wi, 0, sizeof (wi));
18427 wi.want_locations = true;
18428 walk_gimple_seq_mod (&body, diagnose_sb_2, NULL, &wi);
18430 gimple_set_body (current_function_decl, body);
18432 splay_tree_delete (all_labels);
18433 all_labels = NULL;
18435 return 0;
18438 namespace {
18440 const pass_data pass_data_diagnose_omp_blocks =
18442 GIMPLE_PASS, /* type */
18443 "*diagnose_omp_blocks", /* name */
18444 OPTGROUP_NONE, /* optinfo_flags */
18445 TV_NONE, /* tv_id */
18446 PROP_gimple_any, /* properties_required */
18447 0, /* properties_provided */
18448 0, /* properties_destroyed */
18449 0, /* todo_flags_start */
18450 0, /* todo_flags_finish */
18453 class pass_diagnose_omp_blocks : public gimple_opt_pass
18455 public:
18456 pass_diagnose_omp_blocks (gcc::context *ctxt)
18457 : gimple_opt_pass (pass_data_diagnose_omp_blocks, ctxt)
18460 /* opt_pass methods: */
18461 virtual bool gate (function *)
18463 return flag_cilkplus || flag_openacc || flag_openmp;
18465 virtual unsigned int execute (function *)
18467 return diagnose_omp_structured_block_errors ();
18470 }; // class pass_diagnose_omp_blocks
18472 } // anon namespace
18474 gimple_opt_pass *
18475 make_pass_diagnose_omp_blocks (gcc::context *ctxt)
18477 return new pass_diagnose_omp_blocks (ctxt);
18480 /* SIMD clone supporting code. */
18482 /* Allocate a fresh `simd_clone' and return it. NARGS is the number
18483 of arguments to reserve space for. */
18485 static struct cgraph_simd_clone *
18486 simd_clone_struct_alloc (int nargs)
18488 struct cgraph_simd_clone *clone_info;
18489 size_t len = (sizeof (struct cgraph_simd_clone)
18490 + nargs * sizeof (struct cgraph_simd_clone_arg));
18491 clone_info = (struct cgraph_simd_clone *)
18492 ggc_internal_cleared_alloc (len);
18493 return clone_info;
18496 /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
18498 static inline void
18499 simd_clone_struct_copy (struct cgraph_simd_clone *to,
18500 struct cgraph_simd_clone *from)
18502 memcpy (to, from, (sizeof (struct cgraph_simd_clone)
18503 + ((from->nargs - from->inbranch)
18504 * sizeof (struct cgraph_simd_clone_arg))));
18507 /* Return vector of parameter types of function FNDECL. This uses
18508 TYPE_ARG_TYPES if available, otherwise falls back to types of
18509 DECL_ARGUMENTS types. */
18511 vec<tree>
18512 simd_clone_vector_of_formal_parm_types (tree fndecl)
18514 if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
18515 return ipa_get_vector_of_formal_parm_types (TREE_TYPE (fndecl));
18516 vec<tree> args = ipa_get_vector_of_formal_parms (fndecl);
18517 unsigned int i;
18518 tree arg;
18519 FOR_EACH_VEC_ELT (args, i, arg)
18520 args[i] = TREE_TYPE (args[i]);
18521 return args;
18524 /* Given a simd function in NODE, extract the simd specific
18525 information from the OMP clauses passed in CLAUSES, and return
18526 the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
18527 is set to TRUE if the `inbranch' or `notinbranch' clause specified,
18528 otherwise set to FALSE. */
18530 static struct cgraph_simd_clone *
18531 simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
18532 bool *inbranch_specified)
18534 vec<tree> args = simd_clone_vector_of_formal_parm_types (node->decl);
18535 tree t;
18536 int n;
18537 *inbranch_specified = false;
18539 n = args.length ();
18540 if (n > 0 && args.last () == void_type_node)
18541 n--;
18543 /* To distinguish from an OpenMP simd clone, Cilk Plus functions to
18544 be cloned have a distinctive artificial label in addition to "omp
18545 declare simd". */
18546 bool cilk_clone
18547 = (flag_cilkplus
18548 && lookup_attribute ("cilk simd function",
18549 DECL_ATTRIBUTES (node->decl)));
18551 /* Allocate one more than needed just in case this is an in-branch
18552 clone which will require a mask argument. */
18553 struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
18554 clone_info->nargs = n;
18555 clone_info->cilk_elemental = cilk_clone;
18557 if (!clauses)
18559 args.release ();
18560 return clone_info;
18562 clauses = TREE_VALUE (clauses);
18563 if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
18564 return clone_info;
18566 for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
18568 switch (OMP_CLAUSE_CODE (t))
18570 case OMP_CLAUSE_INBRANCH:
18571 clone_info->inbranch = 1;
18572 *inbranch_specified = true;
18573 break;
18574 case OMP_CLAUSE_NOTINBRANCH:
18575 clone_info->inbranch = 0;
18576 *inbranch_specified = true;
18577 break;
18578 case OMP_CLAUSE_SIMDLEN:
18579 clone_info->simdlen
18580 = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
18581 break;
18582 case OMP_CLAUSE_LINEAR:
18584 tree decl = OMP_CLAUSE_DECL (t);
18585 tree step = OMP_CLAUSE_LINEAR_STEP (t);
18586 int argno = TREE_INT_CST_LOW (decl);
18587 if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
18589 enum cgraph_simd_clone_arg_type arg_type;
18590 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
18591 switch (OMP_CLAUSE_LINEAR_KIND (t))
18593 case OMP_CLAUSE_LINEAR_REF:
18594 arg_type
18595 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP;
18596 break;
18597 case OMP_CLAUSE_LINEAR_UVAL:
18598 arg_type
18599 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP;
18600 break;
18601 case OMP_CLAUSE_LINEAR_VAL:
18602 case OMP_CLAUSE_LINEAR_DEFAULT:
18603 arg_type
18604 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP;
18605 break;
18606 default:
18607 gcc_unreachable ();
18609 else
18610 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
18611 clone_info->args[argno].arg_type = arg_type;
18612 clone_info->args[argno].linear_step = tree_to_shwi (step);
18613 gcc_assert (clone_info->args[argno].linear_step >= 0
18614 && clone_info->args[argno].linear_step < n);
18616 else
18618 if (POINTER_TYPE_P (args[argno]))
18619 step = fold_convert (ssizetype, step);
18620 if (!tree_fits_shwi_p (step))
18622 warning_at (OMP_CLAUSE_LOCATION (t), 0,
18623 "ignoring large linear step");
18624 args.release ();
18625 return NULL;
18627 else if (integer_zerop (step))
18629 warning_at (OMP_CLAUSE_LOCATION (t), 0,
18630 "ignoring zero linear step");
18631 args.release ();
18632 return NULL;
18634 else
18636 enum cgraph_simd_clone_arg_type arg_type;
18637 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
18638 switch (OMP_CLAUSE_LINEAR_KIND (t))
18640 case OMP_CLAUSE_LINEAR_REF:
18641 arg_type
18642 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP;
18643 break;
18644 case OMP_CLAUSE_LINEAR_UVAL:
18645 arg_type
18646 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP;
18647 break;
18648 case OMP_CLAUSE_LINEAR_VAL:
18649 case OMP_CLAUSE_LINEAR_DEFAULT:
18650 arg_type
18651 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP;
18652 break;
18653 default:
18654 gcc_unreachable ();
18656 else
18657 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP;
18658 clone_info->args[argno].arg_type = arg_type;
18659 clone_info->args[argno].linear_step = tree_to_shwi (step);
18662 break;
18664 case OMP_CLAUSE_UNIFORM:
18666 tree decl = OMP_CLAUSE_DECL (t);
18667 int argno = tree_to_uhwi (decl);
18668 clone_info->args[argno].arg_type
18669 = SIMD_CLONE_ARG_TYPE_UNIFORM;
18670 break;
18672 case OMP_CLAUSE_ALIGNED:
18674 tree decl = OMP_CLAUSE_DECL (t);
18675 int argno = tree_to_uhwi (decl);
18676 clone_info->args[argno].alignment
18677 = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
18678 break;
18680 default:
18681 break;
18684 args.release ();
18685 return clone_info;
18688 /* Given a SIMD clone in NODE, calculate the characteristic data
18689 type and return the coresponding type. The characteristic data
18690 type is computed as described in the Intel Vector ABI. */
18692 static tree
18693 simd_clone_compute_base_data_type (struct cgraph_node *node,
18694 struct cgraph_simd_clone *clone_info)
18696 tree type = integer_type_node;
18697 tree fndecl = node->decl;
18699 /* a) For non-void function, the characteristic data type is the
18700 return type. */
18701 if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
18702 type = TREE_TYPE (TREE_TYPE (fndecl));
18704 /* b) If the function has any non-uniform, non-linear parameters,
18705 then the characteristic data type is the type of the first
18706 such parameter. */
18707 else
18709 vec<tree> map = simd_clone_vector_of_formal_parm_types (fndecl);
18710 for (unsigned int i = 0; i < clone_info->nargs; ++i)
18711 if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
18713 type = map[i];
18714 break;
18716 map.release ();
18719 /* c) If the characteristic data type determined by a) or b) above
18720 is struct, union, or class type which is pass-by-value (except
18721 for the type that maps to the built-in complex data type), the
18722 characteristic data type is int. */
18723 if (RECORD_OR_UNION_TYPE_P (type)
18724 && !aggregate_value_p (type, NULL)
18725 && TREE_CODE (type) != COMPLEX_TYPE)
18726 return integer_type_node;
18728 /* d) If none of the above three classes is applicable, the
18729 characteristic data type is int. */
18731 return type;
18733 /* e) For Intel Xeon Phi native and offload compilation, if the
18734 resulting characteristic data type is 8-bit or 16-bit integer
18735 data type, the characteristic data type is int. */
18736 /* Well, we don't handle Xeon Phi yet. */
18739 static tree
18740 simd_clone_mangle (struct cgraph_node *node,
18741 struct cgraph_simd_clone *clone_info)
18743 char vecsize_mangle = clone_info->vecsize_mangle;
18744 char mask = clone_info->inbranch ? 'M' : 'N';
18745 unsigned int simdlen = clone_info->simdlen;
18746 unsigned int n;
18747 pretty_printer pp;
18749 gcc_assert (vecsize_mangle && simdlen);
18751 pp_string (&pp, "_ZGV");
18752 pp_character (&pp, vecsize_mangle);
18753 pp_character (&pp, mask);
18754 pp_decimal_int (&pp, simdlen);
18756 for (n = 0; n < clone_info->nargs; ++n)
18758 struct cgraph_simd_clone_arg arg = clone_info->args[n];
18760 switch (arg.arg_type)
18762 case SIMD_CLONE_ARG_TYPE_UNIFORM:
18763 pp_character (&pp, 'u');
18764 break;
18765 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
18766 pp_character (&pp, 'l');
18767 goto mangle_linear;
18768 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
18769 pp_character (&pp, 'R');
18770 goto mangle_linear;
18771 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
18772 pp_character (&pp, 'L');
18773 goto mangle_linear;
18774 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
18775 pp_character (&pp, 'U');
18776 goto mangle_linear;
18777 mangle_linear:
18778 gcc_assert (arg.linear_step != 0);
18779 if (arg.linear_step > 1)
18780 pp_unsigned_wide_integer (&pp, arg.linear_step);
18781 else if (arg.linear_step < 0)
18783 pp_character (&pp, 'n');
18784 pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
18785 arg.linear_step));
18787 break;
18788 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
18789 pp_string (&pp, "ls");
18790 pp_unsigned_wide_integer (&pp, arg.linear_step);
18791 break;
18792 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
18793 pp_string (&pp, "Rs");
18794 pp_unsigned_wide_integer (&pp, arg.linear_step);
18795 break;
18796 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
18797 pp_string (&pp, "Ls");
18798 pp_unsigned_wide_integer (&pp, arg.linear_step);
18799 break;
18800 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
18801 pp_string (&pp, "Us");
18802 pp_unsigned_wide_integer (&pp, arg.linear_step);
18803 break;
18804 default:
18805 pp_character (&pp, 'v');
18807 if (arg.alignment)
18809 pp_character (&pp, 'a');
18810 pp_decimal_int (&pp, arg.alignment);
18814 pp_underscore (&pp);
18815 const char *str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl));
18816 if (*str == '*')
18817 ++str;
18818 pp_string (&pp, str);
18819 str = pp_formatted_text (&pp);
18821 /* If there already is a SIMD clone with the same mangled name, don't
18822 add another one. This can happen e.g. for
18823 #pragma omp declare simd
18824 #pragma omp declare simd simdlen(8)
18825 int foo (int, int);
18826 if the simdlen is assumed to be 8 for the first one, etc. */
18827 for (struct cgraph_node *clone = node->simd_clones; clone;
18828 clone = clone->simdclone->next_clone)
18829 if (strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (clone->decl)),
18830 str) == 0)
18831 return NULL_TREE;
18833 return get_identifier (str);
18836 /* Create a simd clone of OLD_NODE and return it. */
18838 static struct cgraph_node *
18839 simd_clone_create (struct cgraph_node *old_node)
18841 struct cgraph_node *new_node;
18842 if (old_node->definition)
18844 if (!old_node->has_gimple_body_p ())
18845 return NULL;
18846 old_node->get_body ();
18847 new_node = old_node->create_version_clone_with_body (vNULL, NULL, NULL,
18848 false, NULL, NULL,
18849 "simdclone");
18851 else
18853 tree old_decl = old_node->decl;
18854 tree new_decl = copy_node (old_node->decl);
18855 DECL_NAME (new_decl) = clone_function_name (old_decl, "simdclone");
18856 SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
18857 SET_DECL_RTL (new_decl, NULL);
18858 DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
18859 DECL_STATIC_DESTRUCTOR (new_decl) = 0;
18860 new_node = old_node->create_version_clone (new_decl, vNULL, NULL);
18861 if (old_node->in_other_partition)
18862 new_node->in_other_partition = 1;
18864 if (new_node == NULL)
18865 return new_node;
18867 TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
18869 /* The function cgraph_function_versioning () will force the new
18870 symbol local. Undo this, and inherit external visability from
18871 the old node. */
18872 new_node->local.local = old_node->local.local;
18873 new_node->externally_visible = old_node->externally_visible;
18875 return new_node;
18878 /* Adjust the return type of the given function to its appropriate
18879 vector counterpart. Returns a simd array to be used throughout the
18880 function as a return value. */
18882 static tree
18883 simd_clone_adjust_return_type (struct cgraph_node *node)
18885 tree fndecl = node->decl;
18886 tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
18887 unsigned int veclen;
18888 tree t;
18890 /* Adjust the function return type. */
18891 if (orig_rettype == void_type_node)
18892 return NULL_TREE;
18893 TREE_TYPE (fndecl) = build_distinct_type_copy (TREE_TYPE (fndecl));
18894 t = TREE_TYPE (TREE_TYPE (fndecl));
18895 if (INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
18896 veclen = node->simdclone->vecsize_int;
18897 else
18898 veclen = node->simdclone->vecsize_float;
18899 veclen /= GET_MODE_BITSIZE (TYPE_MODE (t));
18900 if (veclen > node->simdclone->simdlen)
18901 veclen = node->simdclone->simdlen;
18902 if (POINTER_TYPE_P (t))
18903 t = pointer_sized_int_node;
18904 if (veclen == node->simdclone->simdlen)
18905 t = build_vector_type (t, node->simdclone->simdlen);
18906 else
18908 t = build_vector_type (t, veclen);
18909 t = build_array_type_nelts (t, node->simdclone->simdlen / veclen);
18911 TREE_TYPE (TREE_TYPE (fndecl)) = t;
18912 if (!node->definition)
18913 return NULL_TREE;
18915 t = DECL_RESULT (fndecl);
18916 /* Adjust the DECL_RESULT. */
18917 gcc_assert (TREE_TYPE (t) != void_type_node);
18918 TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
18919 relayout_decl (t);
18921 tree atype = build_array_type_nelts (orig_rettype,
18922 node->simdclone->simdlen);
18923 if (veclen != node->simdclone->simdlen)
18924 return build1 (VIEW_CONVERT_EXPR, atype, t);
18926 /* Set up a SIMD array to use as the return value. */
18927 tree retval = create_tmp_var_raw (atype, "retval");
18928 gimple_add_tmp_var (retval);
18929 return retval;
18932 /* Each vector argument has a corresponding array to be used locally
18933 as part of the eventual loop. Create such temporary array and
18934 return it.
18936 PREFIX is the prefix to be used for the temporary.
18938 TYPE is the inner element type.
18940 SIMDLEN is the number of elements. */
18942 static tree
18943 create_tmp_simd_array (const char *prefix, tree type, int simdlen)
18945 tree atype = build_array_type_nelts (type, simdlen);
18946 tree avar = create_tmp_var_raw (atype, prefix);
18947 gimple_add_tmp_var (avar);
18948 return avar;
18951 /* Modify the function argument types to their corresponding vector
18952 counterparts if appropriate. Also, create one array for each simd
18953 argument to be used locally when using the function arguments as
18954 part of the loop.
18956 NODE is the function whose arguments are to be adjusted.
18958 Returns an adjustment vector that will be filled describing how the
18959 argument types will be adjusted. */
18961 static ipa_parm_adjustment_vec
18962 simd_clone_adjust_argument_types (struct cgraph_node *node)
18964 vec<tree> args;
18965 ipa_parm_adjustment_vec adjustments;
18967 if (node->definition)
18968 args = ipa_get_vector_of_formal_parms (node->decl);
18969 else
18970 args = simd_clone_vector_of_formal_parm_types (node->decl);
18971 adjustments.create (args.length ());
18972 unsigned i, j, veclen;
18973 struct ipa_parm_adjustment adj;
18974 struct cgraph_simd_clone *sc = node->simdclone;
18976 for (i = 0; i < sc->nargs; ++i)
18978 memset (&adj, 0, sizeof (adj));
18979 tree parm = args[i];
18980 tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
18981 adj.base_index = i;
18982 adj.base = parm;
18984 sc->args[i].orig_arg = node->definition ? parm : NULL_TREE;
18985 sc->args[i].orig_type = parm_type;
18987 switch (sc->args[i].arg_type)
18989 default:
18990 /* No adjustment necessary for scalar arguments. */
18991 adj.op = IPA_PARM_OP_COPY;
18992 break;
18993 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
18994 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
18995 if (node->definition)
18996 sc->args[i].simd_array
18997 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
18998 TREE_TYPE (parm_type),
18999 sc->simdlen);
19000 adj.op = IPA_PARM_OP_COPY;
19001 break;
19002 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
19003 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
19004 case SIMD_CLONE_ARG_TYPE_VECTOR:
19005 if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
19006 veclen = sc->vecsize_int;
19007 else
19008 veclen = sc->vecsize_float;
19009 veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type));
19010 if (veclen > sc->simdlen)
19011 veclen = sc->simdlen;
19012 adj.arg_prefix = "simd";
19013 if (POINTER_TYPE_P (parm_type))
19014 adj.type = build_vector_type (pointer_sized_int_node, veclen);
19015 else
19016 adj.type = build_vector_type (parm_type, veclen);
19017 sc->args[i].vector_type = adj.type;
19018 for (j = veclen; j < sc->simdlen; j += veclen)
19020 adjustments.safe_push (adj);
19021 if (j == veclen)
19023 memset (&adj, 0, sizeof (adj));
19024 adj.op = IPA_PARM_OP_NEW;
19025 adj.arg_prefix = "simd";
19026 adj.base_index = i;
19027 adj.type = sc->args[i].vector_type;
19031 if (node->definition)
19032 sc->args[i].simd_array
19033 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
19034 parm_type, sc->simdlen);
19036 adjustments.safe_push (adj);
19039 if (sc->inbranch)
19041 tree base_type = simd_clone_compute_base_data_type (sc->origin, sc);
19043 memset (&adj, 0, sizeof (adj));
19044 adj.op = IPA_PARM_OP_NEW;
19045 adj.arg_prefix = "mask";
19047 adj.base_index = i;
19048 if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
19049 veclen = sc->vecsize_int;
19050 else
19051 veclen = sc->vecsize_float;
19052 veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
19053 if (veclen > sc->simdlen)
19054 veclen = sc->simdlen;
19055 if (sc->mask_mode != VOIDmode)
19056 adj.type
19057 = lang_hooks.types.type_for_mode (sc->mask_mode, 1);
19058 else if (POINTER_TYPE_P (base_type))
19059 adj.type = build_vector_type (pointer_sized_int_node, veclen);
19060 else
19061 adj.type = build_vector_type (base_type, veclen);
19062 adjustments.safe_push (adj);
19064 for (j = veclen; j < sc->simdlen; j += veclen)
19065 adjustments.safe_push (adj);
19067 /* We have previously allocated one extra entry for the mask. Use
19068 it and fill it. */
19069 sc->nargs++;
19070 if (sc->mask_mode != VOIDmode)
19071 base_type = boolean_type_node;
19072 if (node->definition)
19074 sc->args[i].orig_arg
19075 = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
19076 if (sc->mask_mode == VOIDmode)
19077 sc->args[i].simd_array
19078 = create_tmp_simd_array ("mask", base_type, sc->simdlen);
19079 else if (veclen < sc->simdlen)
19080 sc->args[i].simd_array
19081 = create_tmp_simd_array ("mask", adj.type, sc->simdlen / veclen);
19082 else
19083 sc->args[i].simd_array = NULL_TREE;
19085 sc->args[i].orig_type = base_type;
19086 sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
19089 if (node->definition)
19090 ipa_modify_formal_parameters (node->decl, adjustments);
19091 else
19093 tree new_arg_types = NULL_TREE, new_reversed;
19094 bool last_parm_void = false;
19095 if (args.length () > 0 && args.last () == void_type_node)
19096 last_parm_void = true;
19098 gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
19099 j = adjustments.length ();
19100 for (i = 0; i < j; i++)
19102 struct ipa_parm_adjustment *adj = &adjustments[i];
19103 tree ptype;
19104 if (adj->op == IPA_PARM_OP_COPY)
19105 ptype = args[adj->base_index];
19106 else
19107 ptype = adj->type;
19108 new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
19110 new_reversed = nreverse (new_arg_types);
19111 if (last_parm_void)
19113 if (new_reversed)
19114 TREE_CHAIN (new_arg_types) = void_list_node;
19115 else
19116 new_reversed = void_list_node;
19119 tree new_type = build_distinct_type_copy (TREE_TYPE (node->decl));
19120 TYPE_ARG_TYPES (new_type) = new_reversed;
19121 TREE_TYPE (node->decl) = new_type;
19123 adjustments.release ();
19125 args.release ();
19126 return adjustments;
19129 /* Initialize and copy the function arguments in NODE to their
19130 corresponding local simd arrays. Returns a fresh gimple_seq with
19131 the instruction sequence generated. */
19133 static gimple_seq
19134 simd_clone_init_simd_arrays (struct cgraph_node *node,
19135 ipa_parm_adjustment_vec adjustments)
19137 gimple_seq seq = NULL;
19138 unsigned i = 0, j = 0, k;
19140 for (tree arg = DECL_ARGUMENTS (node->decl);
19141 arg;
19142 arg = DECL_CHAIN (arg), i++, j++)
19144 if (adjustments[j].op == IPA_PARM_OP_COPY
19145 || POINTER_TYPE_P (TREE_TYPE (arg)))
19146 continue;
19148 node->simdclone->args[i].vector_arg = arg;
19150 tree array = node->simdclone->args[i].simd_array;
19151 if (node->simdclone->mask_mode != VOIDmode
19152 && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
19154 if (array == NULL_TREE)
19155 continue;
19156 unsigned int l
19157 = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array))));
19158 for (k = 0; k <= l; k++)
19160 if (k)
19162 arg = DECL_CHAIN (arg);
19163 j++;
19165 tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)),
19166 array, size_int (k), NULL, NULL);
19167 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
19168 gimplify_and_add (t, &seq);
19170 continue;
19172 if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen)
19174 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
19175 tree ptr = build_fold_addr_expr (array);
19176 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
19177 build_int_cst (ptype, 0));
19178 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
19179 gimplify_and_add (t, &seq);
19181 else
19183 unsigned int simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg));
19184 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
19185 for (k = 0; k < node->simdclone->simdlen; k += simdlen)
19187 tree ptr = build_fold_addr_expr (array);
19188 int elemsize;
19189 if (k)
19191 arg = DECL_CHAIN (arg);
19192 j++;
19194 elemsize
19195 = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (arg))));
19196 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
19197 build_int_cst (ptype, k * elemsize));
19198 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
19199 gimplify_and_add (t, &seq);
19203 return seq;
19206 /* Callback info for ipa_simd_modify_stmt_ops below. */
19208 struct modify_stmt_info {
19209 ipa_parm_adjustment_vec adjustments;
19210 gimple *stmt;
19211 /* True if the parent statement was modified by
19212 ipa_simd_modify_stmt_ops. */
19213 bool modified;
19216 /* Callback for walk_gimple_op.
19218 Adjust operands from a given statement as specified in the
19219 adjustments vector in the callback data. */
19221 static tree
19222 ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data)
19224 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
19225 struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info;
19226 tree *orig_tp = tp;
19227 if (TREE_CODE (*tp) == ADDR_EXPR)
19228 tp = &TREE_OPERAND (*tp, 0);
19229 struct ipa_parm_adjustment *cand = NULL;
19230 if (TREE_CODE (*tp) == PARM_DECL)
19231 cand = ipa_get_adjustment_candidate (&tp, NULL, info->adjustments, true);
19232 else
19234 if (TYPE_P (*tp))
19235 *walk_subtrees = 0;
19238 tree repl = NULL_TREE;
19239 if (cand)
19240 repl = unshare_expr (cand->new_decl);
19241 else
19243 if (tp != orig_tp)
19245 *walk_subtrees = 0;
19246 bool modified = info->modified;
19247 info->modified = false;
19248 walk_tree (tp, ipa_simd_modify_stmt_ops, wi, wi->pset);
19249 if (!info->modified)
19251 info->modified = modified;
19252 return NULL_TREE;
19254 info->modified = modified;
19255 repl = *tp;
19257 else
19258 return NULL_TREE;
19261 if (tp != orig_tp)
19263 repl = build_fold_addr_expr (repl);
19264 gimple *stmt;
19265 if (is_gimple_debug (info->stmt))
19267 tree vexpr = make_node (DEBUG_EXPR_DECL);
19268 stmt = gimple_build_debug_source_bind (vexpr, repl, NULL);
19269 DECL_ARTIFICIAL (vexpr) = 1;
19270 TREE_TYPE (vexpr) = TREE_TYPE (repl);
19271 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (repl));
19272 repl = vexpr;
19274 else
19276 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
19277 repl = gimple_assign_lhs (stmt);
19279 gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
19280 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
19281 *orig_tp = repl;
19283 else if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl)))
19285 tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl);
19286 *tp = vce;
19288 else
19289 *tp = repl;
19291 info->modified = true;
19292 return NULL_TREE;
19295 /* Traverse the function body and perform all modifications as
19296 described in ADJUSTMENTS. At function return, ADJUSTMENTS will be
19297 modified such that the replacement/reduction value will now be an
19298 offset into the corresponding simd_array.
19300 This function will replace all function argument uses with their
19301 corresponding simd array elements, and ajust the return values
19302 accordingly. */
19304 static void
19305 ipa_simd_modify_function_body (struct cgraph_node *node,
19306 ipa_parm_adjustment_vec adjustments,
19307 tree retval_array, tree iter)
19309 basic_block bb;
19310 unsigned int i, j, l;
19312 /* Re-use the adjustments array, but this time use it to replace
19313 every function argument use to an offset into the corresponding
19314 simd_array. */
19315 for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
19317 if (!node->simdclone->args[i].vector_arg)
19318 continue;
19320 tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
19321 tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg);
19322 adjustments[j].new_decl
19323 = build4 (ARRAY_REF,
19324 basetype,
19325 node->simdclone->args[i].simd_array,
19326 iter,
19327 NULL_TREE, NULL_TREE);
19328 if (adjustments[j].op == IPA_PARM_OP_NONE
19329 && TYPE_VECTOR_SUBPARTS (vectype) < node->simdclone->simdlen)
19330 j += node->simdclone->simdlen / TYPE_VECTOR_SUBPARTS (vectype) - 1;
19333 l = adjustments.length ();
19334 for (i = 1; i < num_ssa_names; i++)
19336 tree name = ssa_name (i);
19337 if (name
19338 && SSA_NAME_VAR (name)
19339 && TREE_CODE (SSA_NAME_VAR (name)) == PARM_DECL)
19341 for (j = 0; j < l; j++)
19342 if (SSA_NAME_VAR (name) == adjustments[j].base
19343 && adjustments[j].new_decl)
19345 tree base_var;
19346 if (adjustments[j].new_ssa_base == NULL_TREE)
19348 base_var
19349 = copy_var_decl (adjustments[j].base,
19350 DECL_NAME (adjustments[j].base),
19351 TREE_TYPE (adjustments[j].base));
19352 adjustments[j].new_ssa_base = base_var;
19354 else
19355 base_var = adjustments[j].new_ssa_base;
19356 if (SSA_NAME_IS_DEFAULT_DEF (name))
19358 bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
19359 gimple_stmt_iterator gsi = gsi_after_labels (bb);
19360 tree new_decl = unshare_expr (adjustments[j].new_decl);
19361 set_ssa_default_def (cfun, adjustments[j].base, NULL_TREE);
19362 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
19363 SSA_NAME_IS_DEFAULT_DEF (name) = 0;
19364 gimple *stmt = gimple_build_assign (name, new_decl);
19365 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
19367 else
19368 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
19373 struct modify_stmt_info info;
19374 info.adjustments = adjustments;
19376 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
19378 gimple_stmt_iterator gsi;
19380 gsi = gsi_start_bb (bb);
19381 while (!gsi_end_p (gsi))
19383 gimple *stmt = gsi_stmt (gsi);
19384 info.stmt = stmt;
19385 struct walk_stmt_info wi;
19387 memset (&wi, 0, sizeof (wi));
19388 info.modified = false;
19389 wi.info = &info;
19390 walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
19392 if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
19394 tree retval = gimple_return_retval (return_stmt);
19395 if (!retval)
19397 gsi_remove (&gsi, true);
19398 continue;
19401 /* Replace `return foo' with `retval_array[iter] = foo'. */
19402 tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
19403 retval_array, iter, NULL, NULL);
19404 stmt = gimple_build_assign (ref, retval);
19405 gsi_replace (&gsi, stmt, true);
19406 info.modified = true;
19409 if (info.modified)
19411 update_stmt (stmt);
19412 if (maybe_clean_eh_stmt (stmt))
19413 gimple_purge_dead_eh_edges (gimple_bb (stmt));
19415 gsi_next (&gsi);
19420 /* Helper function of simd_clone_adjust, return linear step addend
19421 of Ith argument. */
19423 static tree
19424 simd_clone_linear_addend (struct cgraph_node *node, unsigned int i,
19425 tree addtype, basic_block entry_bb)
19427 tree ptype = NULL_TREE;
19428 switch (node->simdclone->args[i].arg_type)
19430 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
19431 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
19432 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
19433 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
19434 return build_int_cst (addtype, node->simdclone->args[i].linear_step);
19435 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
19436 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
19437 ptype = TREE_TYPE (node->simdclone->args[i].orig_arg);
19438 break;
19439 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
19440 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
19441 ptype = TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg));
19442 break;
19443 default:
19444 gcc_unreachable ();
19447 unsigned int idx = node->simdclone->args[i].linear_step;
19448 tree arg = node->simdclone->args[idx].orig_arg;
19449 gcc_assert (is_gimple_reg_type (TREE_TYPE (arg)));
19450 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
19451 gimple *g;
19452 tree ret;
19453 if (is_gimple_reg (arg))
19454 ret = get_or_create_ssa_default_def (cfun, arg);
19455 else
19457 g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
19458 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19459 ret = gimple_assign_lhs (g);
19461 if (TREE_CODE (TREE_TYPE (arg)) == REFERENCE_TYPE)
19463 g = gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg))),
19464 build_simple_mem_ref (ret));
19465 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19466 ret = gimple_assign_lhs (g);
19468 if (!useless_type_conversion_p (addtype, TREE_TYPE (ret)))
19470 g = gimple_build_assign (make_ssa_name (addtype), NOP_EXPR, ret);
19471 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19472 ret = gimple_assign_lhs (g);
19474 if (POINTER_TYPE_P (ptype))
19476 tree size = TYPE_SIZE_UNIT (TREE_TYPE (ptype));
19477 if (size && TREE_CODE (size) == INTEGER_CST)
19479 g = gimple_build_assign (make_ssa_name (addtype), MULT_EXPR,
19480 ret, fold_convert (addtype, size));
19481 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19482 ret = gimple_assign_lhs (g);
19485 return ret;
19488 /* Adjust the argument types in NODE to their appropriate vector
19489 counterparts. */
19491 static void
19492 simd_clone_adjust (struct cgraph_node *node)
19494 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
19496 targetm.simd_clone.adjust (node);
19498 tree retval = simd_clone_adjust_return_type (node);
19499 ipa_parm_adjustment_vec adjustments
19500 = simd_clone_adjust_argument_types (node);
19502 push_gimplify_context ();
19504 gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments);
19506 /* Adjust all uses of vector arguments accordingly. Adjust all
19507 return values accordingly. */
19508 tree iter = create_tmp_var (unsigned_type_node, "iter");
19509 tree iter1 = make_ssa_name (iter);
19510 tree iter2 = make_ssa_name (iter);
19511 ipa_simd_modify_function_body (node, adjustments, retval, iter1);
19513 /* Initialize the iteration variable. */
19514 basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
19515 basic_block body_bb = split_block_after_labels (entry_bb)->dest;
19516 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
19517 /* Insert the SIMD array and iv initialization at function
19518 entry. */
19519 gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
19521 pop_gimplify_context (NULL);
19523 /* Create a new BB right before the original exit BB, to hold the
19524 iteration increment and the condition/branch. */
19525 basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
19526 basic_block incr_bb = create_empty_bb (orig_exit);
19527 add_bb_to_loop (incr_bb, body_bb->loop_father);
19528 /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty
19529 flag. Set it now to be a FALLTHRU_EDGE. */
19530 gcc_assert (EDGE_COUNT (orig_exit->succs) == 1);
19531 EDGE_SUCC (orig_exit, 0)->flags |= EDGE_FALLTHRU;
19532 for (unsigned i = 0;
19533 i < EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); ++i)
19535 edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i);
19536 redirect_edge_succ (e, incr_bb);
19538 edge e = make_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
19539 e->probability = REG_BR_PROB_BASE;
19540 gsi = gsi_last_bb (incr_bb);
19541 gimple *g = gimple_build_assign (iter2, PLUS_EXPR, iter1,
19542 build_int_cst (unsigned_type_node, 1));
19543 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19545 /* Mostly annotate the loop for the vectorizer (the rest is done below). */
19546 struct loop *loop = alloc_loop ();
19547 cfun->has_force_vectorize_loops = true;
19548 loop->safelen = node->simdclone->simdlen;
19549 loop->force_vectorize = true;
19550 loop->header = body_bb;
19552 /* Branch around the body if the mask applies. */
19553 if (node->simdclone->inbranch)
19555 gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
19556 tree mask_array
19557 = node->simdclone->args[node->simdclone->nargs - 1].simd_array;
19558 tree mask;
19559 if (node->simdclone->mask_mode != VOIDmode)
19561 tree shift_cnt;
19562 if (mask_array == NULL_TREE)
19564 tree arg = node->simdclone->args[node->simdclone->nargs
19565 - 1].vector_arg;
19566 mask = get_or_create_ssa_default_def (cfun, arg);
19567 shift_cnt = iter1;
19569 else
19571 tree maskt = TREE_TYPE (mask_array);
19572 int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt)));
19573 c = node->simdclone->simdlen / (c + 1);
19574 int s = exact_log2 (c);
19575 gcc_assert (s > 0);
19576 c--;
19577 tree idx = make_ssa_name (TREE_TYPE (iter1));
19578 g = gimple_build_assign (idx, RSHIFT_EXPR, iter1,
19579 build_int_cst (NULL_TREE, s));
19580 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19581 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
19582 tree aref = build4 (ARRAY_REF,
19583 TREE_TYPE (TREE_TYPE (mask_array)),
19584 mask_array, idx, NULL, NULL);
19585 g = gimple_build_assign (mask, aref);
19586 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19587 shift_cnt = make_ssa_name (TREE_TYPE (iter1));
19588 g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1,
19589 build_int_cst (TREE_TYPE (iter1), c));
19590 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19592 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
19593 RSHIFT_EXPR, mask, shift_cnt);
19594 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19595 mask = gimple_assign_lhs (g);
19596 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
19597 BIT_AND_EXPR, mask,
19598 build_int_cst (TREE_TYPE (mask), 1));
19599 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19600 mask = gimple_assign_lhs (g);
19602 else
19604 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
19605 tree aref = build4 (ARRAY_REF,
19606 TREE_TYPE (TREE_TYPE (mask_array)),
19607 mask_array, iter1, NULL, NULL);
19608 g = gimple_build_assign (mask, aref);
19609 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19610 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
19611 if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
19613 aref = build1 (VIEW_CONVERT_EXPR,
19614 build_nonstandard_integer_type (bitsize, 0),
19615 mask);
19616 mask = make_ssa_name (TREE_TYPE (aref));
19617 g = gimple_build_assign (mask, aref);
19618 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19622 g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
19623 NULL, NULL);
19624 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19625 make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
19626 FALLTHRU_EDGE (loop->header)->flags = EDGE_FALSE_VALUE;
19629 /* Generate the condition. */
19630 g = gimple_build_cond (LT_EXPR,
19631 iter2,
19632 build_int_cst (unsigned_type_node,
19633 node->simdclone->simdlen),
19634 NULL, NULL);
19635 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19636 e = split_block (incr_bb, gsi_stmt (gsi));
19637 basic_block latch_bb = e->dest;
19638 basic_block new_exit_bb;
19639 new_exit_bb = split_block_after_labels (latch_bb)->dest;
19640 loop->latch = latch_bb;
19642 redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb);
19644 make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE);
19645 /* The successor of incr_bb is already pointing to latch_bb; just
19646 change the flags.
19647 make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
19648 FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
19650 gphi *phi = create_phi_node (iter1, body_bb);
19651 edge preheader_edge = find_edge (entry_bb, body_bb);
19652 edge latch_edge = single_succ_edge (latch_bb);
19653 add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
19654 UNKNOWN_LOCATION);
19655 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
19657 /* Generate the new return. */
19658 gsi = gsi_last_bb (new_exit_bb);
19659 if (retval
19660 && TREE_CODE (retval) == VIEW_CONVERT_EXPR
19661 && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
19662 retval = TREE_OPERAND (retval, 0);
19663 else if (retval)
19665 retval = build1 (VIEW_CONVERT_EXPR,
19666 TREE_TYPE (TREE_TYPE (node->decl)),
19667 retval);
19668 retval = force_gimple_operand_gsi (&gsi, retval, true, NULL,
19669 false, GSI_CONTINUE_LINKING);
19671 g = gimple_build_return (retval);
19672 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19674 /* Handle aligned clauses by replacing default defs of the aligned
19675 uniform args with __builtin_assume_aligned (arg_N(D), alignment)
19676 lhs. Handle linear by adding PHIs. */
19677 for (unsigned i = 0; i < node->simdclone->nargs; i++)
19678 if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
19679 && (TREE_ADDRESSABLE (node->simdclone->args[i].orig_arg)
19680 || !is_gimple_reg_type
19681 (TREE_TYPE (node->simdclone->args[i].orig_arg))))
19683 tree orig_arg = node->simdclone->args[i].orig_arg;
19684 if (is_gimple_reg_type (TREE_TYPE (orig_arg)))
19685 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
19686 else
19688 iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg));
19689 gimple_add_tmp_var (iter1);
19691 gsi = gsi_after_labels (entry_bb);
19692 g = gimple_build_assign (iter1, orig_arg);
19693 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19694 gsi = gsi_after_labels (body_bb);
19695 g = gimple_build_assign (orig_arg, iter1);
19696 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19698 else if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
19699 && DECL_BY_REFERENCE (node->simdclone->args[i].orig_arg)
19700 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
19701 == REFERENCE_TYPE
19702 && TREE_ADDRESSABLE
19703 (TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg))))
19705 tree orig_arg = node->simdclone->args[i].orig_arg;
19706 tree def = ssa_default_def (cfun, orig_arg);
19707 if (def && !has_zero_uses (def))
19709 iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg)));
19710 gimple_add_tmp_var (iter1);
19711 gsi = gsi_after_labels (entry_bb);
19712 g = gimple_build_assign (iter1, build_simple_mem_ref (def));
19713 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19714 gsi = gsi_after_labels (body_bb);
19715 g = gimple_build_assign (build_simple_mem_ref (def), iter1);
19716 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19719 else if (node->simdclone->args[i].alignment
19720 && node->simdclone->args[i].arg_type
19721 == SIMD_CLONE_ARG_TYPE_UNIFORM
19722 && (node->simdclone->args[i].alignment
19723 & (node->simdclone->args[i].alignment - 1)) == 0
19724 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
19725 == POINTER_TYPE)
19727 unsigned int alignment = node->simdclone->args[i].alignment;
19728 tree orig_arg = node->simdclone->args[i].orig_arg;
19729 tree def = ssa_default_def (cfun, orig_arg);
19730 if (def && !has_zero_uses (def))
19732 tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
19733 gimple_seq seq = NULL;
19734 bool need_cvt = false;
19735 gcall *call
19736 = gimple_build_call (fn, 2, def, size_int (alignment));
19737 g = call;
19738 if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
19739 ptr_type_node))
19740 need_cvt = true;
19741 tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg);
19742 gimple_call_set_lhs (g, t);
19743 gimple_seq_add_stmt_without_update (&seq, g);
19744 if (need_cvt)
19746 t = make_ssa_name (orig_arg);
19747 g = gimple_build_assign (t, NOP_EXPR, gimple_call_lhs (g));
19748 gimple_seq_add_stmt_without_update (&seq, g);
19750 gsi_insert_seq_on_edge_immediate
19751 (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
19753 entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
19754 int freq = compute_call_stmt_bb_frequency (current_function_decl,
19755 entry_bb);
19756 node->create_edge (cgraph_node::get_create (fn),
19757 call, entry_bb->count, freq);
19759 imm_use_iterator iter;
19760 use_operand_p use_p;
19761 gimple *use_stmt;
19762 tree repl = gimple_get_lhs (g);
19763 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
19764 if (is_gimple_debug (use_stmt) || use_stmt == call)
19765 continue;
19766 else
19767 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
19768 SET_USE (use_p, repl);
19771 else if ((node->simdclone->args[i].arg_type
19772 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
19773 || (node->simdclone->args[i].arg_type
19774 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
19775 || (node->simdclone->args[i].arg_type
19776 == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP)
19777 || (node->simdclone->args[i].arg_type
19778 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP))
19780 tree orig_arg = node->simdclone->args[i].orig_arg;
19781 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
19782 || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
19783 tree def = NULL_TREE;
19784 if (TREE_ADDRESSABLE (orig_arg))
19786 def = make_ssa_name (TREE_TYPE (orig_arg));
19787 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
19788 iter2 = make_ssa_name (TREE_TYPE (orig_arg));
19789 gsi = gsi_after_labels (entry_bb);
19790 g = gimple_build_assign (def, orig_arg);
19791 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19793 else
19795 def = ssa_default_def (cfun, orig_arg);
19796 if (!def || has_zero_uses (def))
19797 def = NULL_TREE;
19798 else
19800 iter1 = make_ssa_name (orig_arg);
19801 iter2 = make_ssa_name (orig_arg);
19804 if (def)
19806 phi = create_phi_node (iter1, body_bb);
19807 add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
19808 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
19809 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
19810 ? PLUS_EXPR : POINTER_PLUS_EXPR;
19811 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
19812 ? TREE_TYPE (orig_arg) : sizetype;
19813 tree addcst = simd_clone_linear_addend (node, i, addtype,
19814 entry_bb);
19815 gsi = gsi_last_bb (incr_bb);
19816 g = gimple_build_assign (iter2, code, iter1, addcst);
19817 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19819 imm_use_iterator iter;
19820 use_operand_p use_p;
19821 gimple *use_stmt;
19822 if (TREE_ADDRESSABLE (orig_arg))
19824 gsi = gsi_after_labels (body_bb);
19825 g = gimple_build_assign (orig_arg, iter1);
19826 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19828 else
19829 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
19830 if (use_stmt == phi)
19831 continue;
19832 else
19833 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
19834 SET_USE (use_p, iter1);
19837 else if (node->simdclone->args[i].arg_type
19838 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
19839 || (node->simdclone->args[i].arg_type
19840 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP))
19842 tree orig_arg = node->simdclone->args[i].orig_arg;
19843 tree def = ssa_default_def (cfun, orig_arg);
19844 gcc_assert (!TREE_ADDRESSABLE (orig_arg)
19845 && TREE_CODE (TREE_TYPE (orig_arg)) == REFERENCE_TYPE);
19846 if (def && !has_zero_uses (def))
19848 tree rtype = TREE_TYPE (TREE_TYPE (orig_arg));
19849 iter1 = make_ssa_name (orig_arg);
19850 iter2 = make_ssa_name (orig_arg);
19851 tree iter3 = make_ssa_name (rtype);
19852 tree iter4 = make_ssa_name (rtype);
19853 tree iter5 = make_ssa_name (rtype);
19854 gsi = gsi_after_labels (entry_bb);
19855 gimple *load
19856 = gimple_build_assign (iter3, build_simple_mem_ref (def));
19857 gsi_insert_before (&gsi, load, GSI_NEW_STMT);
19859 tree array = node->simdclone->args[i].simd_array;
19860 TREE_ADDRESSABLE (array) = 1;
19861 tree ptr = build_fold_addr_expr (array);
19862 phi = create_phi_node (iter1, body_bb);
19863 add_phi_arg (phi, ptr, preheader_edge, UNKNOWN_LOCATION);
19864 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
19865 g = gimple_build_assign (iter2, POINTER_PLUS_EXPR, iter1,
19866 TYPE_SIZE_UNIT (TREE_TYPE (iter3)));
19867 gsi = gsi_last_bb (incr_bb);
19868 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19870 phi = create_phi_node (iter4, body_bb);
19871 add_phi_arg (phi, iter3, preheader_edge, UNKNOWN_LOCATION);
19872 add_phi_arg (phi, iter5, latch_edge, UNKNOWN_LOCATION);
19873 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
19874 ? PLUS_EXPR : POINTER_PLUS_EXPR;
19875 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
19876 ? TREE_TYPE (iter3) : sizetype;
19877 tree addcst = simd_clone_linear_addend (node, i, addtype,
19878 entry_bb);
19879 g = gimple_build_assign (iter5, code, iter4, addcst);
19880 gsi = gsi_last_bb (incr_bb);
19881 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19883 g = gimple_build_assign (build_simple_mem_ref (iter1), iter4);
19884 gsi = gsi_after_labels (body_bb);
19885 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19887 imm_use_iterator iter;
19888 use_operand_p use_p;
19889 gimple *use_stmt;
19890 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
19891 if (use_stmt == load)
19892 continue;
19893 else
19894 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
19895 SET_USE (use_p, iter1);
19897 if (!TYPE_READONLY (rtype))
19899 tree v = make_ssa_name (rtype);
19900 tree aref = build4 (ARRAY_REF, rtype, array,
19901 size_zero_node, NULL_TREE,
19902 NULL_TREE);
19903 gsi = gsi_after_labels (new_exit_bb);
19904 g = gimple_build_assign (v, aref);
19905 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19906 g = gimple_build_assign (build_simple_mem_ref (def), v);
19907 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19912 calculate_dominance_info (CDI_DOMINATORS);
19913 add_loop (loop, loop->header->loop_father);
19914 update_ssa (TODO_update_ssa);
19916 pop_cfun ();
19919 /* If the function in NODE is tagged as an elemental SIMD function,
19920 create the appropriate SIMD clones. */
19922 static void
19923 expand_simd_clones (struct cgraph_node *node)
19925 tree attr = lookup_attribute ("omp declare simd",
19926 DECL_ATTRIBUTES (node->decl));
19927 if (attr == NULL_TREE
19928 || node->global.inlined_to
19929 || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
19930 return;
19932 /* Ignore
19933 #pragma omp declare simd
19934 extern int foo ();
19935 in C, there we don't know the argument types at all. */
19936 if (!node->definition
19937 && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
19938 return;
19940 /* Call this before creating clone_info, as it might ggc_collect. */
19941 if (node->definition && node->has_gimple_body_p ())
19942 node->get_body ();
19946 /* Start with parsing the "omp declare simd" attribute(s). */
19947 bool inbranch_clause_specified;
19948 struct cgraph_simd_clone *clone_info
19949 = simd_clone_clauses_extract (node, TREE_VALUE (attr),
19950 &inbranch_clause_specified);
19951 if (clone_info == NULL)
19952 continue;
19954 int orig_simdlen = clone_info->simdlen;
19955 tree base_type = simd_clone_compute_base_data_type (node, clone_info);
19956 /* The target can return 0 (no simd clones should be created),
19957 1 (just one ISA of simd clones should be created) or higher
19958 count of ISA variants. In that case, clone_info is initialized
19959 for the first ISA variant. */
19960 int count
19961 = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
19962 base_type, 0);
19963 if (count == 0)
19964 continue;
19966 /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
19967 also create one inbranch and one !inbranch clone of it. */
19968 for (int i = 0; i < count * 2; i++)
19970 struct cgraph_simd_clone *clone = clone_info;
19971 if (inbranch_clause_specified && (i & 1) != 0)
19972 continue;
19974 if (i != 0)
19976 clone = simd_clone_struct_alloc (clone_info->nargs
19977 + ((i & 1) != 0));
19978 simd_clone_struct_copy (clone, clone_info);
19979 /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
19980 and simd_clone_adjust_argument_types did to the first
19981 clone's info. */
19982 clone->nargs -= clone_info->inbranch;
19983 clone->simdlen = orig_simdlen;
19984 /* And call the target hook again to get the right ISA. */
19985 targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
19986 base_type,
19987 i / 2);
19988 if ((i & 1) != 0)
19989 clone->inbranch = 1;
19992 /* simd_clone_mangle might fail if such a clone has been created
19993 already. */
19994 tree id = simd_clone_mangle (node, clone);
19995 if (id == NULL_TREE)
19996 continue;
19998 /* Only when we are sure we want to create the clone actually
19999 clone the function (or definitions) or create another
20000 extern FUNCTION_DECL (for prototypes without definitions). */
20001 struct cgraph_node *n = simd_clone_create (node);
20002 if (n == NULL)
20003 continue;
20005 n->simdclone = clone;
20006 clone->origin = node;
20007 clone->next_clone = NULL;
20008 if (node->simd_clones == NULL)
20010 clone->prev_clone = n;
20011 node->simd_clones = n;
20013 else
20015 clone->prev_clone = node->simd_clones->simdclone->prev_clone;
20016 clone->prev_clone->simdclone->next_clone = n;
20017 node->simd_clones->simdclone->prev_clone = n;
20019 symtab->change_decl_assembler_name (n->decl, id);
20020 /* And finally adjust the return type, parameters and for
20021 definitions also function body. */
20022 if (node->definition)
20023 simd_clone_adjust (n);
20024 else
20026 simd_clone_adjust_return_type (n);
20027 simd_clone_adjust_argument_types (n);
20031 while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
20034 /* Entry point for IPA simd clone creation pass. */
20036 static unsigned int
20037 ipa_omp_simd_clone (void)
20039 struct cgraph_node *node;
20040 FOR_EACH_FUNCTION (node)
20041 expand_simd_clones (node);
20042 return 0;
20045 namespace {
20047 const pass_data pass_data_omp_simd_clone =
20049 SIMPLE_IPA_PASS, /* type */
20050 "simdclone", /* name */
20051 OPTGROUP_NONE, /* optinfo_flags */
20052 TV_NONE, /* tv_id */
20053 ( PROP_ssa | PROP_cfg ), /* properties_required */
20054 0, /* properties_provided */
20055 0, /* properties_destroyed */
20056 0, /* todo_flags_start */
20057 0, /* todo_flags_finish */
20060 class pass_omp_simd_clone : public simple_ipa_opt_pass
20062 public:
20063 pass_omp_simd_clone(gcc::context *ctxt)
20064 : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
20067 /* opt_pass methods: */
20068 virtual bool gate (function *);
20069 virtual unsigned int execute (function *) { return ipa_omp_simd_clone (); }
20072 bool
20073 pass_omp_simd_clone::gate (function *)
20075 return targetm.simd_clone.compute_vecsize_and_simdlen != NULL;
20078 } // anon namespace
20080 simple_ipa_opt_pass *
20081 make_pass_omp_simd_clone (gcc::context *ctxt)
20083 return new pass_omp_simd_clone (ctxt);
20086 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
20087 adds their addresses and sizes to constructor-vector V_CTOR. */
20088 static void
20089 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
20090 vec<constructor_elt, va_gc> *v_ctor)
20092 unsigned len = vec_safe_length (v_decls);
20093 for (unsigned i = 0; i < len; i++)
20095 tree it = (*v_decls)[i];
20096 bool is_var = TREE_CODE (it) == VAR_DECL;
20097 bool is_link_var
20098 = is_var
20099 #ifdef ACCEL_COMPILER
20100 && DECL_HAS_VALUE_EXPR_P (it)
20101 #endif
20102 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
20104 tree size = NULL_TREE;
20105 if (is_var)
20106 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
20108 tree addr;
20109 if (!is_link_var)
20110 addr = build_fold_addr_expr (it);
20111 else
20113 #ifdef ACCEL_COMPILER
20114 /* For "omp declare target link" vars add address of the pointer to
20115 the target table, instead of address of the var. */
20116 tree value_expr = DECL_VALUE_EXPR (it);
20117 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
20118 varpool_node::finalize_decl (link_ptr_decl);
20119 addr = build_fold_addr_expr (link_ptr_decl);
20120 #else
20121 addr = build_fold_addr_expr (it);
20122 #endif
20124 /* Most significant bit of the size marks "omp declare target link"
20125 vars in host and target tables. */
20126 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
20127 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
20128 * BITS_PER_UNIT - 1);
20129 size = wide_int_to_tree (const_ptr_type_node, isize);
20132 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
20133 if (is_var)
20134 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
20138 /* Create new symbols containing (address, size) pairs for global variables,
20139 marked with "omp declare target" attribute, as well as addresses for the
20140 functions, which are outlined offloading regions. */
20141 void
20142 omp_finish_file (void)
20144 unsigned num_funcs = vec_safe_length (offload_funcs);
20145 unsigned num_vars = vec_safe_length (offload_vars);
20147 if (num_funcs == 0 && num_vars == 0)
20148 return;
20150 if (targetm_common.have_named_sections)
20152 vec<constructor_elt, va_gc> *v_f, *v_v;
20153 vec_alloc (v_f, num_funcs);
20154 vec_alloc (v_v, num_vars * 2);
20156 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
20157 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
20159 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
20160 num_vars * 2);
20161 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
20162 num_funcs);
20163 TYPE_ALIGN (vars_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
20164 TYPE_ALIGN (funcs_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
20165 tree ctor_v = build_constructor (vars_decl_type, v_v);
20166 tree ctor_f = build_constructor (funcs_decl_type, v_f);
20167 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
20168 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
20169 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
20170 get_identifier (".offload_func_table"),
20171 funcs_decl_type);
20172 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
20173 get_identifier (".offload_var_table"),
20174 vars_decl_type);
20175 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
20176 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
20177 otherwise a joint table in a binary will contain padding between
20178 tables from multiple object files. */
20179 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
20180 DECL_ALIGN (funcs_decl) = TYPE_ALIGN (funcs_decl_type);
20181 DECL_ALIGN (vars_decl) = TYPE_ALIGN (vars_decl_type);
20182 DECL_INITIAL (funcs_decl) = ctor_f;
20183 DECL_INITIAL (vars_decl) = ctor_v;
20184 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
20185 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
20187 varpool_node::finalize_decl (vars_decl);
20188 varpool_node::finalize_decl (funcs_decl);
20190 else
20192 for (unsigned i = 0; i < num_funcs; i++)
20194 tree it = (*offload_funcs)[i];
20195 targetm.record_offload_symbol (it);
20197 for (unsigned i = 0; i < num_vars; i++)
20199 tree it = (*offload_vars)[i];
20200 targetm.record_offload_symbol (it);
20205 /* Find the number of threads (POS = false), or thread number (POS =
20206 true) for an OpenACC region partitioned as MASK. Setup code
20207 required for the calculation is added to SEQ. */
20209 static tree
20210 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
20212 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
20213 unsigned ix;
20215 /* Start at gang level, and examine relevant dimension indices. */
20216 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
20217 if (GOMP_DIM_MASK (ix) & mask)
20219 tree arg = build_int_cst (unsigned_type_node, ix);
20221 if (res)
20223 /* We had an outer index, so scale that by the size of
20224 this dimension. */
20225 tree n = create_tmp_var (integer_type_node);
20226 gimple *call
20227 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
20229 gimple_call_set_lhs (call, n);
20230 gimple_seq_add_stmt (seq, call);
20231 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
20233 if (pos)
20235 /* Determine index in this dimension. */
20236 tree id = create_tmp_var (integer_type_node);
20237 gimple *call = gimple_build_call_internal
20238 (IFN_GOACC_DIM_POS, 1, arg);
20240 gimple_call_set_lhs (call, id);
20241 gimple_seq_add_stmt (seq, call);
20242 if (res)
20243 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
20244 else
20245 res = id;
20249 if (res == NULL_TREE)
20250 res = integer_zero_node;
20252 return res;
20255 /* Transform IFN_GOACC_LOOP calls to actual code. See
20256 expand_oacc_for for where these are generated. At the vector
20257 level, we stride loops, such that each member of a warp will
20258 operate on adjacent iterations. At the worker and gang level,
20259 each gang/warp executes a set of contiguous iterations. Chunking
20260 can override this such that each iteration engine executes a
20261 contiguous chunk, and then moves on to stride to the next chunk. */
20263 static void
20264 oacc_xform_loop (gcall *call)
20266 gimple_stmt_iterator gsi = gsi_for_stmt (call);
20267 enum ifn_goacc_loop_kind code
20268 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
20269 tree dir = gimple_call_arg (call, 1);
20270 tree range = gimple_call_arg (call, 2);
20271 tree step = gimple_call_arg (call, 3);
20272 tree chunk_size = NULL_TREE;
20273 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
20274 tree lhs = gimple_call_lhs (call);
20275 tree type = TREE_TYPE (lhs);
20276 tree diff_type = TREE_TYPE (range);
20277 tree r = NULL_TREE;
20278 gimple_seq seq = NULL;
20279 bool chunking = false, striding = true;
20280 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
20281 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
20283 #ifdef ACCEL_COMPILER
20284 chunk_size = gimple_call_arg (call, 4);
20285 if (integer_minus_onep (chunk_size) /* Force static allocation. */
20286 || integer_zerop (chunk_size)) /* Default (also static). */
20288 /* If we're at the gang level, we want each to execute a
20289 contiguous run of iterations. Otherwise we want each element
20290 to stride. */
20291 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
20292 chunking = false;
20294 else
20296 /* Chunk of size 1 is striding. */
20297 striding = integer_onep (chunk_size);
20298 chunking = !striding;
20300 #endif
20302 /* striding=true, chunking=true
20303 -> invalid.
20304 striding=true, chunking=false
20305 -> chunks=1
20306 striding=false,chunking=true
20307 -> chunks=ceil (range/(chunksize*threads*step))
20308 striding=false,chunking=false
20309 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
20310 push_gimplify_context (true);
20312 switch (code)
20314 default: gcc_unreachable ();
20316 case IFN_GOACC_LOOP_CHUNKS:
20317 if (!chunking)
20318 r = build_int_cst (type, 1);
20319 else
20321 /* chunk_max
20322 = (range - dir) / (chunks * step * num_threads) + dir */
20323 tree per = oacc_thread_numbers (false, mask, &seq);
20324 per = fold_convert (type, per);
20325 chunk_size = fold_convert (type, chunk_size);
20326 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
20327 per = fold_build2 (MULT_EXPR, type, per, step);
20328 r = build2 (MINUS_EXPR, type, range, dir);
20329 r = build2 (PLUS_EXPR, type, r, per);
20330 r = build2 (TRUNC_DIV_EXPR, type, r, per);
20332 break;
20334 case IFN_GOACC_LOOP_STEP:
20336 /* If striding, step by the entire compute volume, otherwise
20337 step by the inner volume. */
20338 unsigned volume = striding ? mask : inner_mask;
20340 r = oacc_thread_numbers (false, volume, &seq);
20341 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
20343 break;
20345 case IFN_GOACC_LOOP_OFFSET:
20346 if (striding)
20348 r = oacc_thread_numbers (true, mask, &seq);
20349 r = fold_convert (diff_type, r);
20351 else
20353 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
20354 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
20355 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
20356 inner_size, outer_size);
20358 volume = fold_convert (diff_type, volume);
20359 if (chunking)
20360 chunk_size = fold_convert (diff_type, chunk_size);
20361 else
20363 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
20365 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
20366 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
20367 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
20370 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
20371 fold_convert (diff_type, inner_size));
20372 r = oacc_thread_numbers (true, outer_mask, &seq);
20373 r = fold_convert (diff_type, r);
20374 r = build2 (MULT_EXPR, diff_type, r, span);
20376 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
20377 inner = fold_convert (diff_type, inner);
20378 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
20380 if (chunking)
20382 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
20383 tree per
20384 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
20385 per = build2 (MULT_EXPR, diff_type, per, chunk);
20387 r = build2 (PLUS_EXPR, diff_type, r, per);
20390 r = fold_build2 (MULT_EXPR, diff_type, r, step);
20391 if (type != diff_type)
20392 r = fold_convert (type, r);
20393 break;
20395 case IFN_GOACC_LOOP_BOUND:
20396 if (striding)
20397 r = range;
20398 else
20400 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
20401 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
20402 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
20403 inner_size, outer_size);
20405 volume = fold_convert (diff_type, volume);
20406 if (chunking)
20407 chunk_size = fold_convert (diff_type, chunk_size);
20408 else
20410 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
20412 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
20413 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
20414 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
20417 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
20418 fold_convert (diff_type, inner_size));
20420 r = fold_build2 (MULT_EXPR, diff_type, span, step);
20422 tree offset = gimple_call_arg (call, 6);
20423 r = build2 (PLUS_EXPR, diff_type, r,
20424 fold_convert (diff_type, offset));
20425 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
20426 diff_type, r, range);
20428 if (diff_type != type)
20429 r = fold_convert (type, r);
20430 break;
20433 gimplify_assign (lhs, r, &seq);
20435 pop_gimplify_context (NULL);
20437 gsi_replace_with_seq (&gsi, seq, true);
20440 /* Default partitioned and minimum partitioned dimensions. */
20442 static int oacc_default_dims[GOMP_DIM_MAX];
20443 static int oacc_min_dims[GOMP_DIM_MAX];
20445 /* Parse the default dimension parameter. This is a set of
20446 :-separated optional compute dimensions. Each specified dimension
20447 is a positive integer. When device type support is added, it is
20448 planned to be a comma separated list of such compute dimensions,
20449 with all but the first prefixed by the colon-terminated device
20450 type. */
20452 static void
20453 oacc_parse_default_dims (const char *dims)
20455 int ix;
20457 for (ix = GOMP_DIM_MAX; ix--;)
20459 oacc_default_dims[ix] = -1;
20460 oacc_min_dims[ix] = 1;
20463 #ifndef ACCEL_COMPILER
20464 /* Cannot be overridden on the host. */
20465 dims = NULL;
20466 #endif
20467 if (dims)
20469 const char *pos = dims;
20471 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
20473 if (ix)
20475 if (*pos != ':')
20476 goto malformed;
20477 pos++;
20480 if (*pos != ':')
20482 long val;
20483 const char *eptr;
20485 errno = 0;
20486 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
20487 if (errno || val <= 0 || (int) val != val)
20488 goto malformed;
20489 pos = eptr;
20490 oacc_default_dims[ix] = (int) val;
20493 if (*pos)
20495 malformed:
20496 error_at (UNKNOWN_LOCATION,
20497 "-fopenacc-dim operand is malformed at '%s'", pos);
20501 /* Allow the backend to validate the dimensions. */
20502 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
20503 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
20506 /* Validate and update the dimensions for offloaded FN. ATTRS is the
20507 raw attribute. DIMS is an array of dimensions, which is filled in.
20508 LEVEL is the partitioning level of a routine, or -1 for an offload
20509 region itself. USED is the mask of partitioned execution in the
20510 function. */
20512 static void
20513 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
20515 tree purpose[GOMP_DIM_MAX];
20516 unsigned ix;
20517 tree pos = TREE_VALUE (attrs);
20518 bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
20520 /* Make sure the attribute creator attached the dimension
20521 information. */
20522 gcc_assert (pos);
20524 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
20526 purpose[ix] = TREE_PURPOSE (pos);
20527 tree val = TREE_VALUE (pos);
20528 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
20529 pos = TREE_CHAIN (pos);
20532 bool changed = targetm.goacc.validate_dims (fn, dims, level);
20534 /* Default anything left to 1 or a partitioned default. */
20535 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
20536 if (dims[ix] < 0)
20538 /* The OpenACC spec says 'If the [num_gangs] clause is not
20539 specified, an implementation-defined default will be used;
20540 the default may depend on the code within the construct.'
20541 (2.5.6). Thus an implementation is free to choose
20542 non-unity default for a parallel region that doesn't have
20543 any gang-partitioned loops. However, it appears that there
20544 is a sufficient body of user code that expects non-gang
20545 partitioned regions to not execute in gang-redundant mode.
20546 So we (a) don't warn about the non-portability and (b) pick
20547 the minimum permissible dimension size when there is no
20548 partitioned execution. Otherwise we pick the global
20549 default for the dimension, which the user can control. The
20550 same wording and logic applies to num_workers and
20551 vector_length, however the worker- or vector- single
20552 execution doesn't have the same impact as gang-redundant
20553 execution. (If the minimum gang-level partioning is not 1,
20554 the target is probably too confusing.) */
20555 dims[ix] = (used & GOMP_DIM_MASK (ix)
20556 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
20557 changed = true;
20560 if (changed)
20562 /* Replace the attribute with new values. */
20563 pos = NULL_TREE;
20564 for (ix = GOMP_DIM_MAX; ix--;)
20566 pos = tree_cons (purpose[ix],
20567 build_int_cst (integer_type_node, dims[ix]),
20568 pos);
20569 if (is_kernel)
20570 TREE_PUBLIC (pos) = 1;
20572 replace_oacc_fn_attrib (fn, pos);
20576 /* Create an empty OpenACC loop structure at LOC. */
20578 static oacc_loop *
20579 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
20581 oacc_loop *loop = XCNEW (oacc_loop);
20583 loop->parent = parent;
20584 loop->child = loop->sibling = NULL;
20586 if (parent)
20588 loop->sibling = parent->child;
20589 parent->child = loop;
20592 loop->loc = loc;
20593 loop->marker = NULL;
20594 memset (loop->heads, 0, sizeof (loop->heads));
20595 memset (loop->tails, 0, sizeof (loop->tails));
20596 loop->routine = NULL_TREE;
20598 loop->mask = loop->flags = 0;
20599 loop->ifns = 0;
20600 loop->chunk_size = 0;
20601 loop->head_end = NULL;
20603 return loop;
20606 /* Create an outermost, dummy OpenACC loop for offloaded function
20607 DECL. */
20609 static oacc_loop *
20610 new_oacc_loop_outer (tree decl)
20612 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
20615 /* Start a new OpenACC loop structure beginning at head marker HEAD.
20616 Link into PARENT loop. Return the new loop. */
20618 static oacc_loop *
20619 new_oacc_loop (oacc_loop *parent, gcall *marker)
20621 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
20623 loop->marker = marker;
20625 /* TODO: This is where device_type flattening would occur for the loop
20626 flags. */
20628 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
20630 tree chunk_size = integer_zero_node;
20631 if (loop->flags & OLF_GANG_STATIC)
20632 chunk_size = gimple_call_arg (marker, 4);
20633 loop->chunk_size = chunk_size;
20635 return loop;
20638 /* Create a dummy loop encompassing a call to a openACC routine.
20639 Extract the routine's partitioning requirements. */
20641 static void
20642 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
20644 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
20645 int level = oacc_fn_attrib_level (attrs);
20647 gcc_assert (level >= 0);
20649 loop->marker = call;
20650 loop->routine = decl;
20651 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
20652 ^ (GOMP_DIM_MASK (level) - 1));
20655 /* Finish off the current OpenACC loop ending at tail marker TAIL.
20656 Return the parent loop. */
20658 static oacc_loop *
20659 finish_oacc_loop (oacc_loop *loop)
20661 /* If the loop has been collapsed, don't partition it. */
20662 if (!loop->ifns)
20663 loop->mask = loop->flags = 0;
20664 return loop->parent;
20667 /* Free all OpenACC loop structures within LOOP (inclusive). */
20669 static void
20670 free_oacc_loop (oacc_loop *loop)
20672 if (loop->sibling)
20673 free_oacc_loop (loop->sibling);
20674 if (loop->child)
20675 free_oacc_loop (loop->child);
20677 free (loop);
20680 /* Dump out the OpenACC loop head or tail beginning at FROM. */
20682 static void
20683 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
20684 const char *title, int level)
20686 enum ifn_unique_kind kind
20687 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
20689 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
20690 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
20692 gimple *stmt = gsi_stmt (gsi);
20694 if (is_gimple_call (stmt)
20695 && gimple_call_internal_p (stmt)
20696 && gimple_call_internal_fn (stmt) == IFN_UNIQUE)
20698 enum ifn_unique_kind k
20699 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
20700 (gimple_call_arg (stmt, 0)));
20702 if (k == kind && stmt != from)
20703 break;
20705 print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
20707 gsi_next (&gsi);
20708 while (gsi_end_p (gsi))
20709 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
20713 /* Dump OpenACC loops LOOP, its siblings and its children. */
20715 static void
20716 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
20718 int ix;
20720 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
20721 loop->flags, loop->mask,
20722 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
20724 if (loop->marker)
20725 print_gimple_stmt (file, loop->marker, depth * 2, 0);
20727 if (loop->routine)
20728 fprintf (file, "%*sRoutine %s:%u:%s\n",
20729 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
20730 DECL_SOURCE_LINE (loop->routine),
20731 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
20733 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
20734 if (loop->heads[ix])
20735 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
20736 for (ix = GOMP_DIM_MAX; ix--;)
20737 if (loop->tails[ix])
20738 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
20740 if (loop->child)
20741 dump_oacc_loop (file, loop->child, depth + 1);
20742 if (loop->sibling)
20743 dump_oacc_loop (file, loop->sibling, depth);
20746 void debug_oacc_loop (oacc_loop *);
20748 /* Dump loops to stderr. */
20750 DEBUG_FUNCTION void
20751 debug_oacc_loop (oacc_loop *loop)
20753 dump_oacc_loop (stderr, loop, 0);
20756 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
20757 structures as we go. By construction these loops are properly
20758 nested. */
20760 static void
20761 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
20763 int marker = 0;
20764 int remaining = 0;
20766 if (bb->flags & BB_VISITED)
20767 return;
20769 follow:
20770 bb->flags |= BB_VISITED;
20772 /* Scan for loop markers. */
20773 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
20774 gsi_next (&gsi))
20776 gimple *stmt = gsi_stmt (gsi);
20778 if (!is_gimple_call (stmt))
20779 continue;
20781 gcall *call = as_a <gcall *> (stmt);
20783 /* If this is a routine, make a dummy loop for it. */
20784 if (tree decl = gimple_call_fndecl (call))
20785 if (tree attrs = get_oacc_fn_attrib (decl))
20787 gcc_assert (!marker);
20788 new_oacc_loop_routine (loop, call, decl, attrs);
20791 if (!gimple_call_internal_p (call))
20792 continue;
20794 switch (gimple_call_internal_fn (call))
20796 default:
20797 break;
20799 case IFN_GOACC_LOOP:
20800 /* Count the goacc loop abstraction fns, to determine if the
20801 loop was collapsed already. */
20802 loop->ifns++;
20803 break;
20805 case IFN_UNIQUE:
20806 enum ifn_unique_kind kind
20807 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
20808 (gimple_call_arg (call, 0)));
20809 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
20810 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
20812 if (gimple_call_num_args (call) == 2)
20814 gcc_assert (marker && !remaining);
20815 marker = 0;
20816 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
20817 loop = finish_oacc_loop (loop);
20818 else
20819 loop->head_end = call;
20821 else
20823 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
20825 if (!marker)
20827 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
20828 loop = new_oacc_loop (loop, call);
20829 remaining = count;
20831 gcc_assert (count == remaining);
20832 if (remaining)
20834 remaining--;
20835 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
20836 loop->heads[marker] = call;
20837 else
20838 loop->tails[remaining] = call;
20840 marker++;
20845 if (remaining || marker)
20847 bb = single_succ (bb);
20848 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
20849 goto follow;
20852 /* Walk successor blocks. */
20853 edge e;
20854 edge_iterator ei;
20856 FOR_EACH_EDGE (e, ei, bb->succs)
20857 oacc_loop_discover_walk (loop, e->dest);
20860 /* LOOP is the first sibling. Reverse the order in place and return
20861 the new first sibling. Recurse to child loops. */
20863 static oacc_loop *
20864 oacc_loop_sibling_nreverse (oacc_loop *loop)
20866 oacc_loop *last = NULL;
20869 if (loop->child)
20870 loop->child = oacc_loop_sibling_nreverse (loop->child);
20872 oacc_loop *next = loop->sibling;
20873 loop->sibling = last;
20874 last = loop;
20875 loop = next;
20877 while (loop);
20879 return last;
20882 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
20883 the current function. */
20885 static oacc_loop *
20886 oacc_loop_discovery ()
20888 basic_block bb;
20890 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
20891 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
20893 /* The siblings were constructed in reverse order, reverse them so
20894 that diagnostics come out in an unsurprising order. */
20895 top = oacc_loop_sibling_nreverse (top);
20897 /* Reset the visited flags. */
20898 FOR_ALL_BB_FN (bb, cfun)
20899 bb->flags &= ~BB_VISITED;
20901 return top;
20904 /* Transform the abstract internal function markers starting at FROM
20905 to be for partitioning level LEVEL. Stop when we meet another HEAD
20906 or TAIL marker. */
20908 static void
20909 oacc_loop_xform_head_tail (gcall *from, int level)
20911 enum ifn_unique_kind kind
20912 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
20913 tree replacement = build_int_cst (unsigned_type_node, level);
20915 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
20917 gimple *stmt = gsi_stmt (gsi);
20919 if (is_gimple_call (stmt)
20920 && gimple_call_internal_p (stmt)
20921 && gimple_call_internal_fn (stmt) == IFN_UNIQUE)
20923 enum ifn_unique_kind k
20924 = ((enum ifn_unique_kind)
20925 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
20927 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
20928 *gimple_call_arg_ptr (stmt, 2) = replacement;
20929 else if (k == kind && stmt != from)
20930 break;
20932 else if (is_gimple_call (stmt)
20933 && gimple_call_internal_p (stmt)
20934 && gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION)
20935 *gimple_call_arg_ptr (stmt, 3) = replacement;
20937 gsi_next (&gsi);
20938 while (gsi_end_p (gsi))
20939 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
20943 /* Transform the IFN_GOACC_LOOP internal functions by providing the
20944 determined partitioning mask and chunking argument. END_MARKER
20945 points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS
20946 is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is
20947 the replacement partitioning mask and CHUNK_ARG is the replacement
20948 chunking arg. */
20950 static void
20951 oacc_loop_xform_loop (gcall *end_marker, unsigned ifns,
20952 tree mask_arg, tree chunk_arg)
20954 gimple_stmt_iterator gsi = gsi_for_stmt (end_marker);
20956 gcc_checking_assert (ifns);
20957 for (;;)
20959 for (; !gsi_end_p (gsi); gsi_next (&gsi))
20961 gimple *stmt = gsi_stmt (gsi);
20963 if (!is_gimple_call (stmt))
20964 continue;
20966 gcall *call = as_a <gcall *> (stmt);
20968 if (!gimple_call_internal_p (call))
20969 continue;
20971 if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP)
20972 continue;
20974 *gimple_call_arg_ptr (call, 5) = mask_arg;
20975 *gimple_call_arg_ptr (call, 4) = chunk_arg;
20976 ifns--;
20977 if (!ifns)
20978 return;
20981 /* The LOOP_BOUND ifn could be in the single successor
20982 block. */
20983 basic_block bb = single_succ (gsi_bb (gsi));
20984 gsi = gsi_start_bb (bb);
20988 /* Process the discovered OpenACC loops, setting the correct
20989 partitioning level etc. */
20991 static void
20992 oacc_loop_process (oacc_loop *loop)
20994 if (loop->child)
20995 oacc_loop_process (loop->child);
20997 if (loop->mask && !loop->routine)
20999 int ix;
21000 unsigned mask = loop->mask;
21001 unsigned dim = GOMP_DIM_GANG;
21002 tree mask_arg = build_int_cst (unsigned_type_node, mask);
21003 tree chunk_arg = loop->chunk_size;
21005 oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg);
21007 for (ix = 0; ix != GOMP_DIM_MAX && loop->heads[ix]; ix++)
21009 gcc_assert (mask);
21011 while (!(GOMP_DIM_MASK (dim) & mask))
21012 dim++;
21014 oacc_loop_xform_head_tail (loop->heads[ix], dim);
21015 oacc_loop_xform_head_tail (loop->tails[ix], dim);
21017 mask ^= GOMP_DIM_MASK (dim);
21021 if (loop->sibling)
21022 oacc_loop_process (loop->sibling);
21025 /* Walk the OpenACC loop heirarchy checking and assigning the
21026 programmer-specified partitionings. OUTER_MASK is the partitioning
21027 this loop is contained within. Return mask of partitioning
21028 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
21029 bit. */
21031 static unsigned
21032 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
21034 unsigned this_mask = loop->mask;
21035 unsigned mask_all = 0;
21036 bool noisy = true;
21038 #ifdef ACCEL_COMPILER
21039 /* When device_type is supported, we want the device compiler to be
21040 noisy, if the loop parameters are device_type-specific. */
21041 noisy = false;
21042 #endif
21044 if (!loop->routine)
21046 bool auto_par = (loop->flags & OLF_AUTO) != 0;
21047 bool seq_par = (loop->flags & OLF_SEQ) != 0;
21049 this_mask = ((loop->flags >> OLF_DIM_BASE)
21050 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
21052 if ((this_mask != 0) + auto_par + seq_par > 1)
21054 if (noisy)
21055 error_at (loop->loc,
21056 seq_par
21057 ? "%<seq%> overrides other OpenACC loop specifiers"
21058 : "%<auto%> conflicts with other OpenACC loop specifiers");
21059 auto_par = false;
21060 loop->flags &= ~OLF_AUTO;
21061 if (seq_par)
21063 loop->flags &=
21064 ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
21065 this_mask = 0;
21068 if (auto_par && (loop->flags & OLF_INDEPENDENT))
21069 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
21072 if (this_mask & outer_mask)
21074 const oacc_loop *outer;
21075 for (outer = loop->parent; outer; outer = outer->parent)
21076 if (outer->mask & this_mask)
21077 break;
21079 if (noisy)
21081 if (outer)
21083 error_at (loop->loc,
21084 "%s uses same OpenACC parallelism as containing loop",
21085 loop->routine ? "routine call" : "inner loop");
21086 inform (outer->loc, "containing loop here");
21088 else
21089 error_at (loop->loc,
21090 "%s uses OpenACC parallelism disallowed by containing routine",
21091 loop->routine ? "routine call" : "loop");
21093 if (loop->routine)
21094 inform (DECL_SOURCE_LOCATION (loop->routine),
21095 "routine %qD declared here", loop->routine);
21097 this_mask &= ~outer_mask;
21099 else
21101 unsigned outermost = this_mask & -this_mask;
21103 if (outermost && outermost <= outer_mask)
21105 if (noisy)
21107 error_at (loop->loc,
21108 "incorrectly nested OpenACC loop parallelism");
21110 const oacc_loop *outer;
21111 for (outer = loop->parent;
21112 outer->flags && outer->flags < outermost;
21113 outer = outer->parent)
21114 continue;
21115 inform (outer->loc, "containing loop here");
21118 this_mask &= ~outermost;
21122 loop->mask = this_mask;
21123 mask_all |= this_mask;
21125 if (loop->child)
21126 mask_all |= oacc_loop_fixed_partitions (loop->child,
21127 outer_mask | this_mask);
21129 if (loop->sibling)
21130 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
21132 return mask_all;
21135 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
21136 OUTER_MASK is the partitioning this loop is contained within.
21137 Return the cumulative partitioning used by this loop, siblings and
21138 children. */
21140 static unsigned
21141 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
21143 unsigned inner_mask = 0;
21144 bool noisy = true;
21146 #ifdef ACCEL_COMPILER
21147 /* When device_type is supported, we want the device compiler to be
21148 noisy, if the loop parameters are device_type-specific. */
21149 noisy = false;
21150 #endif
21152 if (loop->child)
21153 inner_mask |= oacc_loop_auto_partitions (loop->child,
21154 outer_mask | loop->mask);
21156 if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT))
21158 unsigned this_mask = 0;
21160 /* Determine the outermost partitioning used within this loop. */
21161 this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX);
21162 this_mask = (this_mask & -this_mask);
21164 /* Pick the partitioning just inside that one. */
21165 this_mask >>= 1;
21167 /* And avoid picking one use by an outer loop. */
21168 this_mask &= ~outer_mask;
21170 if (!this_mask && noisy)
21171 warning_at (loop->loc, 0,
21172 "insufficient partitioning available to parallelize loop");
21174 if (dump_file)
21175 fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
21176 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
21177 this_mask);
21179 loop->mask = this_mask;
21181 inner_mask |= loop->mask;
21183 if (loop->sibling)
21184 inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
21186 return inner_mask;
21189 /* Walk the OpenACC loop heirarchy to check and assign partitioning
21190 axes. Return mask of partitioning. */
21192 static unsigned
21193 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
21195 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
21197 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
21199 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
21200 mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
21202 return mask_all;
21205 /* Default fork/join early expander. Delete the function calls if
21206 there is no RTL expander. */
21208 bool
21209 default_goacc_fork_join (gcall *ARG_UNUSED (call),
21210 const int *ARG_UNUSED (dims), bool is_fork)
21212 if (is_fork)
21213 return targetm.have_oacc_fork ();
21214 else
21215 return targetm.have_oacc_join ();
21218 /* Default goacc.reduction early expander.
21220 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
21221 If RES_PTR is not integer-zerop:
21222 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
21223 TEARDOWN - emit '*RES_PTR = VAR'
21224 If LHS is not NULL
21225 emit 'LHS = VAR' */
21227 void
21228 default_goacc_reduction (gcall *call)
21230 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
21231 gimple_stmt_iterator gsi = gsi_for_stmt (call);
21232 tree lhs = gimple_call_lhs (call);
21233 tree var = gimple_call_arg (call, 2);
21234 gimple_seq seq = NULL;
21236 if (code == IFN_GOACC_REDUCTION_SETUP
21237 || code == IFN_GOACC_REDUCTION_TEARDOWN)
21239 /* Setup and Teardown need to copy from/to the receiver object,
21240 if there is one. */
21241 tree ref_to_res = gimple_call_arg (call, 1);
21243 if (!integer_zerop (ref_to_res))
21245 tree dst = build_simple_mem_ref (ref_to_res);
21246 tree src = var;
21248 if (code == IFN_GOACC_REDUCTION_SETUP)
21250 src = dst;
21251 dst = lhs;
21252 lhs = NULL;
21254 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
21258 /* Copy VAR to LHS, if there is an LHS. */
21259 if (lhs)
21260 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
21262 gsi_replace_with_seq (&gsi, seq, true);
21265 /* Main entry point for oacc transformations which run on the device
21266 compiler after LTO, so we know what the target device is at this
21267 point (including the host fallback). */
21269 static unsigned int
21270 execute_oacc_device_lower ()
21272 tree attrs = get_oacc_fn_attrib (current_function_decl);
21274 if (!attrs)
21275 /* Not an offloaded function. */
21276 return 0;
21278 /* Parse the default dim argument exactly once. */
21279 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
21281 oacc_parse_default_dims (flag_openacc_dims);
21282 flag_openacc_dims = (char *)&flag_openacc_dims;
21285 /* Discover, partition and process the loops. */
21286 oacc_loop *loops = oacc_loop_discovery ();
21287 int fn_level = oacc_fn_attrib_level (attrs);
21289 if (dump_file)
21290 fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
21291 ? "Function is kernels offload\n"
21292 : fn_level < 0 ? "Function is parallel offload\n"
21293 : "Function is routine level %d\n", fn_level);
21295 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
21296 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
21297 int dims[GOMP_DIM_MAX];
21299 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
21301 if (dump_file)
21303 const char *comma = "Compute dimensions [";
21304 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
21305 fprintf (dump_file, "%s%d", comma, dims[ix]);
21306 fprintf (dump_file, "]\n");
21309 oacc_loop_process (loops);
21310 if (dump_file)
21312 fprintf (dump_file, "OpenACC loops\n");
21313 dump_oacc_loop (dump_file, loops, 0);
21314 fprintf (dump_file, "\n");
21317 /* Offloaded targets may introduce new basic blocks, which require
21318 dominance information to update SSA. */
21319 calculate_dominance_info (CDI_DOMINATORS);
21321 /* Now lower internal loop functions to target-specific code
21322 sequences. */
21323 basic_block bb;
21324 FOR_ALL_BB_FN (bb, cfun)
21325 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
21327 gimple *stmt = gsi_stmt (gsi);
21328 if (!is_gimple_call (stmt))
21330 gsi_next (&gsi);
21331 continue;
21334 gcall *call = as_a <gcall *> (stmt);
21335 if (!gimple_call_internal_p (call))
21337 gsi_next (&gsi);
21338 continue;
21341 /* Rewind to allow rescan. */
21342 gsi_prev (&gsi);
21343 bool rescan = false, remove = false;
21344 enum internal_fn ifn_code = gimple_call_internal_fn (call);
21346 switch (ifn_code)
21348 default: break;
21350 case IFN_GOACC_LOOP:
21351 oacc_xform_loop (call);
21352 rescan = true;
21353 break;
21355 case IFN_GOACC_REDUCTION:
21356 /* Mark the function for SSA renaming. */
21357 mark_virtual_operands_for_renaming (cfun);
21359 /* If the level is -1, this ended up being an unused
21360 axis. Handle as a default. */
21361 if (integer_minus_onep (gimple_call_arg (call, 3)))
21362 default_goacc_reduction (call);
21363 else
21364 targetm.goacc.reduction (call);
21365 rescan = true;
21366 break;
21368 case IFN_UNIQUE:
21370 enum ifn_unique_kind kind
21371 = ((enum ifn_unique_kind)
21372 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
21374 switch (kind)
21376 default:
21377 gcc_unreachable ();
21379 case IFN_UNIQUE_OACC_FORK:
21380 case IFN_UNIQUE_OACC_JOIN:
21381 if (integer_minus_onep (gimple_call_arg (call, 2)))
21382 remove = true;
21383 else if (!targetm.goacc.fork_join
21384 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
21385 remove = true;
21386 break;
21388 case IFN_UNIQUE_OACC_HEAD_MARK:
21389 case IFN_UNIQUE_OACC_TAIL_MARK:
21390 remove = true;
21391 break;
21393 break;
21397 if (gsi_end_p (gsi))
21398 /* We rewound past the beginning of the BB. */
21399 gsi = gsi_start_bb (bb);
21400 else
21401 /* Undo the rewind. */
21402 gsi_next (&gsi);
21404 if (remove)
21406 if (gimple_vdef (call))
21407 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
21408 if (gimple_call_lhs (call))
21410 /* Propagate the data dependency var. */
21411 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
21412 gimple_call_arg (call, 1));
21413 gsi_replace (&gsi, ass, false);
21415 else
21416 gsi_remove (&gsi, true);
21418 else if (!rescan)
21419 /* If not rescanning, advance over the call. */
21420 gsi_next (&gsi);
21423 free_oacc_loop (loops);
21425 return 0;
21428 /* Default launch dimension validator. Force everything to 1. A
21429 backend that wants to provide larger dimensions must override this
21430 hook. */
21432 bool
21433 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
21434 int ARG_UNUSED (fn_level))
21436 bool changed = false;
21438 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
21440 if (dims[ix] != 1)
21442 dims[ix] = 1;
21443 changed = true;
21447 return changed;
21450 /* Default dimension bound is unknown on accelerator and 1 on host. */
21453 default_goacc_dim_limit (int ARG_UNUSED (axis))
21455 #ifdef ACCEL_COMPILER
21456 return 0;
21457 #else
21458 return 1;
21459 #endif
21462 namespace {
21464 const pass_data pass_data_oacc_device_lower =
21466 GIMPLE_PASS, /* type */
21467 "oaccdevlow", /* name */
21468 OPTGROUP_NONE, /* optinfo_flags */
21469 TV_NONE, /* tv_id */
21470 PROP_cfg, /* properties_required */
21471 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
21472 0, /* properties_destroyed */
21473 0, /* todo_flags_start */
21474 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
21477 class pass_oacc_device_lower : public gimple_opt_pass
21479 public:
21480 pass_oacc_device_lower (gcc::context *ctxt)
21481 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
21484 /* opt_pass methods: */
21485 virtual unsigned int execute (function *)
21487 bool gate = flag_openacc != 0;
21489 if (!gate)
21490 return 0;
21492 return execute_oacc_device_lower ();
21495 }; // class pass_oacc_device_lower
21497 } // anon namespace
21499 gimple_opt_pass *
21500 make_pass_oacc_device_lower (gcc::context *ctxt)
21502 return new pass_oacc_device_lower (ctxt);
21505 /* "omp declare target link" handling pass. */
21507 namespace {
21509 const pass_data pass_data_omp_target_link =
21511 GIMPLE_PASS, /* type */
21512 "omptargetlink", /* name */
21513 OPTGROUP_NONE, /* optinfo_flags */
21514 TV_NONE, /* tv_id */
21515 PROP_ssa, /* properties_required */
21516 0, /* properties_provided */
21517 0, /* properties_destroyed */
21518 0, /* todo_flags_start */
21519 TODO_update_ssa, /* todo_flags_finish */
21522 class pass_omp_target_link : public gimple_opt_pass
21524 public:
21525 pass_omp_target_link (gcc::context *ctxt)
21526 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
21529 /* opt_pass methods: */
21530 virtual bool gate (function *fun)
21532 #ifdef ACCEL_COMPILER
21533 tree attrs = DECL_ATTRIBUTES (fun->decl);
21534 return lookup_attribute ("omp declare target", attrs)
21535 || lookup_attribute ("omp target entrypoint", attrs);
21536 #else
21537 (void) fun;
21538 return false;
21539 #endif
21542 virtual unsigned execute (function *);
21545 /* Callback for walk_gimple_stmt used to scan for link var operands. */
21547 static tree
21548 find_link_var_op (tree *tp, int *walk_subtrees, void *)
21550 tree t = *tp;
21552 if (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t)
21553 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
21555 *walk_subtrees = 0;
21556 return t;
21559 return NULL_TREE;
21562 unsigned
21563 pass_omp_target_link::execute (function *fun)
21565 basic_block bb;
21566 FOR_EACH_BB_FN (bb, fun)
21568 gimple_stmt_iterator gsi;
21569 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21570 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
21571 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
21574 return 0;
21577 } // anon namespace
21579 gimple_opt_pass *
21580 make_pass_omp_target_link (gcc::context *ctxt)
21582 return new pass_omp_target_link (ctxt);
21585 #include "gt-omp-low.h"