usr.sbin/makefs/hammer2: Remove redundant hammer2_inode_modify()
[dragonfly.git] / contrib / gcc-8.0 / gcc / omp-expand.c
blob4ea260e3f4a48ee5668af8d6da983237b090a55b
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
66 struct omp_region
68 /* The enclosing region. */
69 struct omp_region *outer;
71 /* First child region. */
72 struct omp_region *inner;
74 /* Next peer region. */
75 struct omp_region *next;
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
116 /* Return true if REGION is a combined parallel+workshare region. */
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
121 return region->is_combined_parallel;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
138 Is lowered into:
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180 return false;
181 if (fd.iter_type != long_integer_type_node)
182 return false;
184 /* FIXME. We give up too easily here. If any of these arguments
185 are not constants, they will likely involve variables that have
186 been mapped into fields of .omp_data_s for sharing with the child
187 function. With appropriate data flow, it would be possible to
188 see through this. */
189 if (!is_gimple_min_invariant (fd.loop.n1)
190 || !is_gimple_min_invariant (fd.loop.n2)
191 || !is_gimple_min_invariant (fd.loop.step)
192 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193 return false;
195 return true;
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199 presence (SIMD_SCHEDULE). */
201 static tree
202 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 if (!simd_schedule)
205 return chunk_size;
207 poly_uint64 vf = omp_max_vf ();
208 if (known_eq (vf, 1U))
209 return chunk_size;
211 tree type = TREE_TYPE (chunk_size);
212 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213 build_int_cst (type, vf - 1));
214 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215 build_int_cst (type, -vf));
218 /* Collect additional arguments needed to emit a combined
219 parallel+workshare call. WS_STMT is the workshare directive being
220 expanded. */
222 static vec<tree, va_gc> *
223 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 tree t;
226 location_t loc = gimple_location (ws_stmt);
227 vec<tree, va_gc> *ws_args;
229 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231 struct omp_for_data fd;
232 tree n1, n2;
234 omp_extract_for_data (for_stmt, &fd, NULL);
235 n1 = fd.loop.n1;
236 n2 = fd.loop.n2;
238 if (gimple_omp_for_combined_into_p (for_stmt))
240 tree innerc
241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242 OMP_CLAUSE__LOOPTEMP_);
243 gcc_assert (innerc);
244 n1 = OMP_CLAUSE_DECL (innerc);
245 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246 OMP_CLAUSE__LOOPTEMP_);
247 gcc_assert (innerc);
248 n2 = OMP_CLAUSE_DECL (innerc);
251 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253 t = fold_convert_loc (loc, long_integer_type_node, n1);
254 ws_args->quick_push (t);
256 t = fold_convert_loc (loc, long_integer_type_node, n2);
257 ws_args->quick_push (t);
259 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260 ws_args->quick_push (t);
262 if (fd.chunk_size)
264 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265 t = omp_adjust_chunk_size (t, fd.simd_schedule);
266 ws_args->quick_push (t);
269 return ws_args;
271 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273 /* Number of sections is equal to the number of edges from the
274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 the exit of the sections region. */
276 basic_block bb = single_succ (gimple_bb (ws_stmt));
277 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278 vec_alloc (ws_args, 1);
279 ws_args->quick_push (t);
280 return ws_args;
283 gcc_unreachable ();
286 /* Discover whether REGION is a combined parallel+workshare region. */
288 static void
289 determine_parallel_type (struct omp_region *region)
291 basic_block par_entry_bb, par_exit_bb;
292 basic_block ws_entry_bb, ws_exit_bb;
294 if (region == NULL || region->inner == NULL
295 || region->exit == NULL || region->inner->exit == NULL
296 || region->inner->cont == NULL)
297 return;
299 /* We only support parallel+for and parallel+sections. */
300 if (region->type != GIMPLE_OMP_PARALLEL
301 || (region->inner->type != GIMPLE_OMP_FOR
302 && region->inner->type != GIMPLE_OMP_SECTIONS))
303 return;
305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306 WS_EXIT_BB -> PAR_EXIT_BB. */
307 par_entry_bb = region->entry;
308 par_exit_bb = region->exit;
309 ws_entry_bb = region->inner->entry;
310 ws_exit_bb = region->inner->exit;
312 if (single_succ (par_entry_bb) == ws_entry_bb
313 && single_succ (ws_exit_bb) == par_exit_bb
314 && workshare_safe_to_combine_p (ws_entry_bb)
315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316 || (last_and_only_stmt (ws_entry_bb)
317 && last_and_only_stmt (par_exit_bb))))
319 gimple *par_stmt = last_stmt (par_entry_bb);
320 gimple *ws_stmt = last_stmt (ws_entry_bb);
322 if (region->inner->type == GIMPLE_OMP_FOR)
324 /* If this is a combined parallel loop, we need to determine
325 whether or not to use the combined library calls. There
326 are two cases where we do not apply the transformation:
327 static loops and any kind of ordered loop. In the first
328 case, we already open code the loop so there is no need
329 to do anything else. In the latter case, the combined
330 parallel loop call would still need extra synchronization
331 to implement ordered semantics, so there would not be any
332 gain in using the combined call. */
333 tree clauses = gimple_omp_for_clauses (ws_stmt);
334 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335 if (c == NULL
336 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337 == OMP_CLAUSE_SCHEDULE_STATIC)
338 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 region->is_combined_parallel = false;
341 region->inner->is_combined_parallel = false;
342 return;
346 region->is_combined_parallel = true;
347 region->inner->is_combined_parallel = true;
348 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
352 /* Debugging dumps for parallel regions. */
353 void dump_omp_region (FILE *, struct omp_region *, int);
354 void debug_omp_region (struct omp_region *);
355 void debug_all_omp_regions (void);
357 /* Dump the parallel region tree rooted at REGION. */
359 void
360 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363 gimple_code_name[region->type]);
365 if (region->inner)
366 dump_omp_region (file, region->inner, indent + 4);
368 if (region->cont)
370 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371 region->cont->index);
374 if (region->exit)
375 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376 region->exit->index);
377 else
378 fprintf (file, "%*s[no exit marker]\n", indent, "");
380 if (region->next)
381 dump_omp_region (file, region->next, indent);
384 DEBUG_FUNCTION void
385 debug_omp_region (struct omp_region *region)
387 dump_omp_region (stderr, region, 0);
390 DEBUG_FUNCTION void
391 debug_all_omp_regions (void)
393 dump_omp_region (stderr, root_omp_region, 0);
396 /* Create a new parallel region starting at STMT inside region PARENT. */
398 static struct omp_region *
399 new_omp_region (basic_block bb, enum gimple_code type,
400 struct omp_region *parent)
402 struct omp_region *region = XCNEW (struct omp_region);
404 region->outer = parent;
405 region->entry = bb;
406 region->type = type;
408 if (parent)
410 /* This is a nested region. Add it to the list of inner
411 regions in PARENT. */
412 region->next = parent->inner;
413 parent->inner = region;
415 else
417 /* This is a toplevel region. Add it to the list of toplevel
418 regions in ROOT_OMP_REGION. */
419 region->next = root_omp_region;
420 root_omp_region = region;
423 return region;
426 /* Release the memory associated with the region tree rooted at REGION. */
428 static void
429 free_omp_region_1 (struct omp_region *region)
431 struct omp_region *i, *n;
433 for (i = region->inner; i ; i = n)
435 n = i->next;
436 free_omp_region_1 (i);
439 free (region);
442 /* Release the memory for the entire omp region tree. */
444 void
445 omp_free_regions (void)
447 struct omp_region *r, *n;
448 for (r = root_omp_region; r ; r = n)
450 n = r->next;
451 free_omp_region_1 (r);
453 root_omp_region = NULL;
456 /* A convenience function to build an empty GIMPLE_COND with just the
457 condition. */
459 static gcond *
460 gimple_build_cond_empty (tree cond)
462 enum tree_code pred_code;
463 tree lhs, rhs;
465 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
469 /* Return true if a parallel REGION is within a declare target function or
470 within a target region and is not a part of a gridified target. */
472 static bool
473 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 bool indirect = false;
476 for (region = region->outer; region; region = region->outer)
478 if (region->type == GIMPLE_OMP_PARALLEL)
479 indirect = true;
480 else if (region->type == GIMPLE_OMP_TARGET)
482 gomp_target *tgt_stmt
483 = as_a <gomp_target *> (last_stmt (region->entry));
485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486 OMP_CLAUSE__GRIDDIM_))
487 return indirect;
488 else
489 return true;
493 if (lookup_attribute ("omp declare target",
494 DECL_ATTRIBUTES (current_function_decl)))
495 return true;
497 return false;
500 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
501 Add CHILD_FNDECL to decl chain of the supercontext of the block
502 ENTRY_BLOCK - this is the block which originally contained the
503 code from which CHILD_FNDECL was created.
505 Together, these actions ensure that the debug info for the outlined
506 function will be emitted with the correct lexical scope. */
508 static void
509 adjust_context_and_scope (struct omp_region *region, tree entry_block,
510 tree child_fndecl)
512 tree parent_fndecl = NULL_TREE;
513 gimple *entry_stmt;
514 /* OMP expansion expands inner regions before outer ones, so if
515 we e.g. have explicit task region nested in parallel region, when
516 expanding the task region current_function_decl will be the original
517 source function, but we actually want to use as context the child
518 function of the parallel. */
519 for (region = region->outer;
520 region && parent_fndecl == NULL_TREE; region = region->outer)
521 switch (region->type)
523 case GIMPLE_OMP_PARALLEL:
524 case GIMPLE_OMP_TASK:
525 entry_stmt = last_stmt (region->entry);
526 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
527 break;
528 case GIMPLE_OMP_TARGET:
529 entry_stmt = last_stmt (region->entry);
530 parent_fndecl
531 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
532 break;
533 default:
534 break;
537 if (parent_fndecl == NULL_TREE)
538 parent_fndecl = current_function_decl;
539 DECL_CONTEXT (child_fndecl) = parent_fndecl;
541 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
543 tree b = BLOCK_SUPERCONTEXT (entry_block);
544 if (TREE_CODE (b) == BLOCK)
546 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
547 BLOCK_VARS (b) = child_fndecl;
552 /* Build the function calls to GOMP_parallel_start etc to actually
553 generate the parallel operation. REGION is the parallel region
554 being expanded. BB is the block where to insert the code. WS_ARGS
555 will be set if this is a call to a combined parallel+workshare
556 construct, it contains the list of additional arguments needed by
557 the workshare construct. */
559 static void
560 expand_parallel_call (struct omp_region *region, basic_block bb,
561 gomp_parallel *entry_stmt,
562 vec<tree, va_gc> *ws_args)
564 tree t, t1, t2, val, cond, c, clauses, flags;
565 gimple_stmt_iterator gsi;
566 gimple *stmt;
567 enum built_in_function start_ix;
568 int start_ix2;
569 location_t clause_loc;
570 vec<tree, va_gc> *args;
572 clauses = gimple_omp_parallel_clauses (entry_stmt);
574 /* Determine what flavor of GOMP_parallel we will be
575 emitting. */
576 start_ix = BUILT_IN_GOMP_PARALLEL;
577 if (is_combined_parallel (region))
579 switch (region->inner->type)
581 case GIMPLE_OMP_FOR:
582 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
583 switch (region->inner->sched_kind)
585 case OMP_CLAUSE_SCHEDULE_RUNTIME:
586 start_ix2 = 3;
587 break;
588 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
589 case OMP_CLAUSE_SCHEDULE_GUIDED:
590 if (region->inner->sched_modifiers
591 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
593 start_ix2 = 3 + region->inner->sched_kind;
594 break;
596 /* FALLTHRU */
597 default:
598 start_ix2 = region->inner->sched_kind;
599 break;
601 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
602 start_ix = (enum built_in_function) start_ix2;
603 break;
604 case GIMPLE_OMP_SECTIONS:
605 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
606 break;
607 default:
608 gcc_unreachable ();
612 /* By default, the value of NUM_THREADS is zero (selected at run time)
613 and there is no conditional. */
614 cond = NULL_TREE;
615 val = build_int_cst (unsigned_type_node, 0);
616 flags = build_int_cst (unsigned_type_node, 0);
618 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
619 if (c)
620 cond = OMP_CLAUSE_IF_EXPR (c);
622 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
623 if (c)
625 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
626 clause_loc = OMP_CLAUSE_LOCATION (c);
628 else
629 clause_loc = gimple_location (entry_stmt);
631 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
632 if (c)
633 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
635 /* Ensure 'val' is of the correct type. */
636 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
638 /* If we found the clause 'if (cond)', build either
639 (cond != 0) or (cond ? val : 1u). */
640 if (cond)
642 cond = gimple_boolify (cond);
644 if (integer_zerop (val))
645 val = fold_build2_loc (clause_loc,
646 EQ_EXPR, unsigned_type_node, cond,
647 build_int_cst (TREE_TYPE (cond), 0));
648 else
650 basic_block cond_bb, then_bb, else_bb;
651 edge e, e_then, e_else;
652 tree tmp_then, tmp_else, tmp_join, tmp_var;
654 tmp_var = create_tmp_var (TREE_TYPE (val));
655 if (gimple_in_ssa_p (cfun))
657 tmp_then = make_ssa_name (tmp_var);
658 tmp_else = make_ssa_name (tmp_var);
659 tmp_join = make_ssa_name (tmp_var);
661 else
663 tmp_then = tmp_var;
664 tmp_else = tmp_var;
665 tmp_join = tmp_var;
668 e = split_block_after_labels (bb);
669 cond_bb = e->src;
670 bb = e->dest;
671 remove_edge (e);
673 then_bb = create_empty_bb (cond_bb);
674 else_bb = create_empty_bb (then_bb);
675 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
676 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
678 stmt = gimple_build_cond_empty (cond);
679 gsi = gsi_start_bb (cond_bb);
680 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
682 gsi = gsi_start_bb (then_bb);
683 expand_omp_build_assign (&gsi, tmp_then, val, true);
685 gsi = gsi_start_bb (else_bb);
686 expand_omp_build_assign (&gsi, tmp_else,
687 build_int_cst (unsigned_type_node, 1),
688 true);
690 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
691 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
692 add_bb_to_loop (then_bb, cond_bb->loop_father);
693 add_bb_to_loop (else_bb, cond_bb->loop_father);
694 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
695 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
697 if (gimple_in_ssa_p (cfun))
699 gphi *phi = create_phi_node (tmp_join, bb);
700 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
701 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
704 val = tmp_join;
707 gsi = gsi_start_bb (bb);
708 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
709 false, GSI_CONTINUE_LINKING);
712 gsi = gsi_last_nondebug_bb (bb);
713 t = gimple_omp_parallel_data_arg (entry_stmt);
714 if (t == NULL)
715 t1 = null_pointer_node;
716 else
717 t1 = build_fold_addr_expr (t);
718 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
719 t2 = build_fold_addr_expr (child_fndecl);
721 vec_alloc (args, 4 + vec_safe_length (ws_args));
722 args->quick_push (t2);
723 args->quick_push (t1);
724 args->quick_push (val);
725 if (ws_args)
726 args->splice (*ws_args);
727 args->quick_push (flags);
729 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
730 builtin_decl_explicit (start_ix), args);
732 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
733 false, GSI_CONTINUE_LINKING);
735 if (hsa_gen_requested_p ()
736 && parallel_needs_hsa_kernel_p (region))
738 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
739 hsa_register_kernel (child_cnode);
743 /* Build the function call to GOMP_task to actually
744 generate the task operation. BB is the block where to insert the code. */
746 static void
747 expand_task_call (struct omp_region *region, basic_block bb,
748 gomp_task *entry_stmt)
750 tree t1, t2, t3;
751 gimple_stmt_iterator gsi;
752 location_t loc = gimple_location (entry_stmt);
754 tree clauses = gimple_omp_task_clauses (entry_stmt);
756 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
757 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
758 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
759 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
760 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
761 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
763 unsigned int iflags
764 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
765 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
766 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
768 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
769 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
770 tree num_tasks = NULL_TREE;
771 bool ull = false;
772 if (taskloop_p)
774 gimple *g = last_stmt (region->outer->entry);
775 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
776 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
777 struct omp_for_data fd;
778 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
779 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
780 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
781 OMP_CLAUSE__LOOPTEMP_);
782 startvar = OMP_CLAUSE_DECL (startvar);
783 endvar = OMP_CLAUSE_DECL (endvar);
784 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
785 if (fd.loop.cond_code == LT_EXPR)
786 iflags |= GOMP_TASK_FLAG_UP;
787 tree tclauses = gimple_omp_for_clauses (g);
788 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
789 if (num_tasks)
790 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
791 else
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
794 if (num_tasks)
796 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
797 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
799 else
800 num_tasks = integer_zero_node;
802 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
803 if (ifc == NULL_TREE)
804 iflags |= GOMP_TASK_FLAG_IF;
805 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
806 iflags |= GOMP_TASK_FLAG_NOGROUP;
807 ull = fd.iter_type == long_long_unsigned_type_node;
809 else if (priority)
810 iflags |= GOMP_TASK_FLAG_PRIORITY;
812 tree flags = build_int_cst (unsigned_type_node, iflags);
814 tree cond = boolean_true_node;
815 if (ifc)
817 if (taskloop_p)
819 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
820 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
821 build_int_cst (unsigned_type_node,
822 GOMP_TASK_FLAG_IF),
823 build_int_cst (unsigned_type_node, 0));
824 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
825 flags, t);
827 else
828 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
831 if (finalc)
833 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
834 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
835 build_int_cst (unsigned_type_node,
836 GOMP_TASK_FLAG_FINAL),
837 build_int_cst (unsigned_type_node, 0));
838 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
840 if (depend)
841 depend = OMP_CLAUSE_DECL (depend);
842 else
843 depend = build_int_cst (ptr_type_node, 0);
844 if (priority)
845 priority = fold_convert (integer_type_node,
846 OMP_CLAUSE_PRIORITY_EXPR (priority));
847 else
848 priority = integer_zero_node;
850 gsi = gsi_last_nondebug_bb (bb);
851 tree t = gimple_omp_task_data_arg (entry_stmt);
852 if (t == NULL)
853 t2 = null_pointer_node;
854 else
855 t2 = build_fold_addr_expr_loc (loc, t);
856 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
857 t = gimple_omp_task_copy_fn (entry_stmt);
858 if (t == NULL)
859 t3 = null_pointer_node;
860 else
861 t3 = build_fold_addr_expr_loc (loc, t);
863 if (taskloop_p)
864 t = build_call_expr (ull
865 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
866 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
867 11, t1, t2, t3,
868 gimple_omp_task_arg_size (entry_stmt),
869 gimple_omp_task_arg_align (entry_stmt), flags,
870 num_tasks, priority, startvar, endvar, step);
871 else
872 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
873 9, t1, t2, t3,
874 gimple_omp_task_arg_size (entry_stmt),
875 gimple_omp_task_arg_align (entry_stmt), cond, flags,
876 depend, priority);
878 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
879 false, GSI_CONTINUE_LINKING);
882 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
884 static tree
885 vec2chain (vec<tree, va_gc> *v)
887 tree chain = NULL_TREE, t;
888 unsigned ix;
890 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
892 DECL_CHAIN (t) = chain;
893 chain = t;
896 return chain;
899 /* Remove barriers in REGION->EXIT's block. Note that this is only
900 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
901 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
902 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
903 removed. */
905 static void
906 remove_exit_barrier (struct omp_region *region)
908 gimple_stmt_iterator gsi;
909 basic_block exit_bb;
910 edge_iterator ei;
911 edge e;
912 gimple *stmt;
913 int any_addressable_vars = -1;
915 exit_bb = region->exit;
917 /* If the parallel region doesn't return, we don't have REGION->EXIT
918 block at all. */
919 if (! exit_bb)
920 return;
922 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
923 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
924 statements that can appear in between are extremely limited -- no
925 memory operations at all. Here, we allow nothing at all, so the
926 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
927 gsi = gsi_last_nondebug_bb (exit_bb);
928 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
929 gsi_prev_nondebug (&gsi);
930 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
931 return;
933 FOR_EACH_EDGE (e, ei, exit_bb->preds)
935 gsi = gsi_last_nondebug_bb (e->src);
936 if (gsi_end_p (gsi))
937 continue;
938 stmt = gsi_stmt (gsi);
939 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
940 && !gimple_omp_return_nowait_p (stmt))
942 /* OpenMP 3.0 tasks unfortunately prevent this optimization
943 in many cases. If there could be tasks queued, the barrier
944 might be needed to let the tasks run before some local
945 variable of the parallel that the task uses as shared
946 runs out of scope. The task can be spawned either
947 from within current function (this would be easy to check)
948 or from some function it calls and gets passed an address
949 of such a variable. */
950 if (any_addressable_vars < 0)
952 gomp_parallel *parallel_stmt
953 = as_a <gomp_parallel *> (last_stmt (region->entry));
954 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
955 tree local_decls, block, decl;
956 unsigned ix;
958 any_addressable_vars = 0;
959 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
960 if (TREE_ADDRESSABLE (decl))
962 any_addressable_vars = 1;
963 break;
965 for (block = gimple_block (stmt);
966 !any_addressable_vars
967 && block
968 && TREE_CODE (block) == BLOCK;
969 block = BLOCK_SUPERCONTEXT (block))
971 for (local_decls = BLOCK_VARS (block);
972 local_decls;
973 local_decls = DECL_CHAIN (local_decls))
974 if (TREE_ADDRESSABLE (local_decls))
976 any_addressable_vars = 1;
977 break;
979 if (block == gimple_block (parallel_stmt))
980 break;
983 if (!any_addressable_vars)
984 gimple_omp_return_set_nowait (stmt);
989 static void
990 remove_exit_barriers (struct omp_region *region)
992 if (region->type == GIMPLE_OMP_PARALLEL)
993 remove_exit_barrier (region);
995 if (region->inner)
997 region = region->inner;
998 remove_exit_barriers (region);
999 while (region->next)
1001 region = region->next;
1002 remove_exit_barriers (region);
1007 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1008 calls. These can't be declared as const functions, but
1009 within one parallel body they are constant, so they can be
1010 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1011 which are declared const. Similarly for task body, except
1012 that in untied task omp_get_thread_num () can change at any task
1013 scheduling point. */
1015 static void
1016 optimize_omp_library_calls (gimple *entry_stmt)
1018 basic_block bb;
1019 gimple_stmt_iterator gsi;
1020 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1021 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1022 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1023 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1024 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1025 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1026 OMP_CLAUSE_UNTIED) != NULL);
1028 FOR_EACH_BB_FN (bb, cfun)
1029 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1031 gimple *call = gsi_stmt (gsi);
1032 tree decl;
1034 if (is_gimple_call (call)
1035 && (decl = gimple_call_fndecl (call))
1036 && DECL_EXTERNAL (decl)
1037 && TREE_PUBLIC (decl)
1038 && DECL_INITIAL (decl) == NULL)
1040 tree built_in;
1042 if (DECL_NAME (decl) == thr_num_id)
1044 /* In #pragma omp task untied omp_get_thread_num () can change
1045 during the execution of the task region. */
1046 if (untied_task)
1047 continue;
1048 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1050 else if (DECL_NAME (decl) == num_thr_id)
1051 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1052 else
1053 continue;
1055 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1056 || gimple_call_num_args (call) != 0)
1057 continue;
1059 if (flag_exceptions && !TREE_NOTHROW (decl))
1060 continue;
1062 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1063 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1064 TREE_TYPE (TREE_TYPE (built_in))))
1065 continue;
1067 gimple_call_set_fndecl (call, built_in);
1072 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1073 regimplified. */
1075 static tree
1076 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1078 tree t = *tp;
1080 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1081 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1082 return t;
1084 if (TREE_CODE (t) == ADDR_EXPR)
1085 recompute_tree_invariant_for_addr_expr (t);
1087 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1088 return NULL_TREE;
1091 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1093 static void
1094 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1095 bool after)
1097 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1098 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1099 !after, after ? GSI_CONTINUE_LINKING
1100 : GSI_SAME_STMT);
1101 gimple *stmt = gimple_build_assign (to, from);
1102 if (after)
1103 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1104 else
1105 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1106 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1107 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1109 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1110 gimple_regimplify_operands (stmt, &gsi);
1114 /* Expand the OpenMP parallel or task directive starting at REGION. */
1116 static void
1117 expand_omp_taskreg (struct omp_region *region)
1119 basic_block entry_bb, exit_bb, new_bb;
1120 struct function *child_cfun;
1121 tree child_fn, block, t;
1122 gimple_stmt_iterator gsi;
1123 gimple *entry_stmt, *stmt;
1124 edge e;
1125 vec<tree, va_gc> *ws_args;
1127 entry_stmt = last_stmt (region->entry);
1128 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1129 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1131 entry_bb = region->entry;
1132 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1133 exit_bb = region->cont;
1134 else
1135 exit_bb = region->exit;
1137 if (is_combined_parallel (region))
1138 ws_args = region->ws_args;
1139 else
1140 ws_args = NULL;
1142 if (child_cfun->cfg)
1144 /* Due to inlining, it may happen that we have already outlined
1145 the region, in which case all we need to do is make the
1146 sub-graph unreachable and emit the parallel call. */
1147 edge entry_succ_e, exit_succ_e;
1149 entry_succ_e = single_succ_edge (entry_bb);
1151 gsi = gsi_last_nondebug_bb (entry_bb);
1152 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1153 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1154 gsi_remove (&gsi, true);
1156 new_bb = entry_bb;
1157 if (exit_bb)
1159 exit_succ_e = single_succ_edge (exit_bb);
1160 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1162 remove_edge_and_dominated_blocks (entry_succ_e);
1164 else
1166 unsigned srcidx, dstidx, num;
1168 /* If the parallel region needs data sent from the parent
1169 function, then the very first statement (except possible
1170 tree profile counter updates) of the parallel body
1171 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1172 &.OMP_DATA_O is passed as an argument to the child function,
1173 we need to replace it with the argument as seen by the child
1174 function.
1176 In most cases, this will end up being the identity assignment
1177 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1178 a function call that has been inlined, the original PARM_DECL
1179 .OMP_DATA_I may have been converted into a different local
1180 variable. In which case, we need to keep the assignment. */
1181 if (gimple_omp_taskreg_data_arg (entry_stmt))
1183 basic_block entry_succ_bb
1184 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1185 : FALLTHRU_EDGE (entry_bb)->dest;
1186 tree arg;
1187 gimple *parcopy_stmt = NULL;
1189 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1191 gimple *stmt;
1193 gcc_assert (!gsi_end_p (gsi));
1194 stmt = gsi_stmt (gsi);
1195 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1196 continue;
1198 if (gimple_num_ops (stmt) == 2)
1200 tree arg = gimple_assign_rhs1 (stmt);
1202 /* We're ignore the subcode because we're
1203 effectively doing a STRIP_NOPS. */
1205 if (TREE_CODE (arg) == ADDR_EXPR
1206 && TREE_OPERAND (arg, 0)
1207 == gimple_omp_taskreg_data_arg (entry_stmt))
1209 parcopy_stmt = stmt;
1210 break;
1215 gcc_assert (parcopy_stmt != NULL);
1216 arg = DECL_ARGUMENTS (child_fn);
1218 if (!gimple_in_ssa_p (cfun))
1220 if (gimple_assign_lhs (parcopy_stmt) == arg)
1221 gsi_remove (&gsi, true);
1222 else
1224 /* ?? Is setting the subcode really necessary ?? */
1225 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1229 else
1231 tree lhs = gimple_assign_lhs (parcopy_stmt);
1232 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1233 /* We'd like to set the rhs to the default def in the child_fn,
1234 but it's too early to create ssa names in the child_fn.
1235 Instead, we set the rhs to the parm. In
1236 move_sese_region_to_fn, we introduce a default def for the
1237 parm, map the parm to it's default def, and once we encounter
1238 this stmt, replace the parm with the default def. */
1239 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1240 update_stmt (parcopy_stmt);
1244 /* Declare local variables needed in CHILD_CFUN. */
1245 block = DECL_INITIAL (child_fn);
1246 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1247 /* The gimplifier could record temporaries in parallel/task block
1248 rather than in containing function's local_decls chain,
1249 which would mean cgraph missed finalizing them. Do it now. */
1250 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1251 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1252 varpool_node::finalize_decl (t);
1253 DECL_SAVED_TREE (child_fn) = NULL;
1254 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1255 gimple_set_body (child_fn, NULL);
1256 TREE_USED (block) = 1;
1258 /* Reset DECL_CONTEXT on function arguments. */
1259 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1260 DECL_CONTEXT (t) = child_fn;
1262 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263 so that it can be moved to the child function. */
1264 gsi = gsi_last_nondebug_bb (entry_bb);
1265 stmt = gsi_stmt (gsi);
1266 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1267 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1268 e = split_block (entry_bb, stmt);
1269 gsi_remove (&gsi, true);
1270 entry_bb = e->dest;
1271 edge e2 = NULL;
1272 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1273 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1274 else
1276 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1277 gcc_assert (e2->dest == region->exit);
1278 remove_edge (BRANCH_EDGE (entry_bb));
1279 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1280 gsi = gsi_last_nondebug_bb (region->exit);
1281 gcc_assert (!gsi_end_p (gsi)
1282 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1283 gsi_remove (&gsi, true);
1286 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1287 if (exit_bb)
1289 gsi = gsi_last_nondebug_bb (exit_bb);
1290 gcc_assert (!gsi_end_p (gsi)
1291 && (gimple_code (gsi_stmt (gsi))
1292 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1293 stmt = gimple_build_return (NULL);
1294 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1295 gsi_remove (&gsi, true);
1298 /* Move the parallel region into CHILD_CFUN. */
1300 if (gimple_in_ssa_p (cfun))
1302 init_tree_ssa (child_cfun);
1303 init_ssa_operands (child_cfun);
1304 child_cfun->gimple_df->in_ssa_p = true;
1305 block = NULL_TREE;
1307 else
1308 block = gimple_block (entry_stmt);
1310 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1311 if (exit_bb)
1312 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1313 if (e2)
1315 basic_block dest_bb = e2->dest;
1316 if (!exit_bb)
1317 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1318 remove_edge (e2);
1319 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1321 /* When the OMP expansion process cannot guarantee an up-to-date
1322 loop tree arrange for the child function to fixup loops. */
1323 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1324 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1326 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1327 num = vec_safe_length (child_cfun->local_decls);
1328 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1330 t = (*child_cfun->local_decls)[srcidx];
1331 if (DECL_CONTEXT (t) == cfun->decl)
1332 continue;
1333 if (srcidx != dstidx)
1334 (*child_cfun->local_decls)[dstidx] = t;
1335 dstidx++;
1337 if (dstidx != num)
1338 vec_safe_truncate (child_cfun->local_decls, dstidx);
1340 /* Inform the callgraph about the new function. */
1341 child_cfun->curr_properties = cfun->curr_properties;
1342 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1343 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1344 cgraph_node *node = cgraph_node::get_create (child_fn);
1345 node->parallelized_function = 1;
1346 cgraph_node::add_new_function (child_fn, true);
1348 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1349 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1351 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1352 fixed in a following pass. */
1353 push_cfun (child_cfun);
1354 if (need_asm)
1355 assign_assembler_name_if_needed (child_fn);
1357 if (optimize)
1358 optimize_omp_library_calls (entry_stmt);
1359 update_max_bb_count ();
1360 cgraph_edge::rebuild_edges ();
1362 /* Some EH regions might become dead, see PR34608. If
1363 pass_cleanup_cfg isn't the first pass to happen with the
1364 new child, these dead EH edges might cause problems.
1365 Clean them up now. */
1366 if (flag_exceptions)
1368 basic_block bb;
1369 bool changed = false;
1371 FOR_EACH_BB_FN (bb, cfun)
1372 changed |= gimple_purge_dead_eh_edges (bb);
1373 if (changed)
1374 cleanup_tree_cfg ();
1376 if (gimple_in_ssa_p (cfun))
1377 update_ssa (TODO_update_ssa);
1378 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1379 verify_loop_structure ();
1380 pop_cfun ();
1382 if (dump_file && !gimple_in_ssa_p (cfun))
1384 omp_any_child_fn_dumped = true;
1385 dump_function_header (dump_file, child_fn, dump_flags);
1386 dump_function_to_file (child_fn, dump_file, dump_flags);
1390 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1392 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1393 expand_parallel_call (region, new_bb,
1394 as_a <gomp_parallel *> (entry_stmt), ws_args);
1395 else
1396 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1397 if (gimple_in_ssa_p (cfun))
1398 update_ssa (TODO_update_ssa_only_virtuals);
1401 /* Information about members of an OpenACC collapsed loop nest. */
1403 struct oacc_collapse
1405 tree base; /* Base value. */
1406 tree iters; /* Number of steps. */
1407 tree step; /* Step size. */
1408 tree tile; /* Tile increment (if tiled). */
1409 tree outer; /* Tile iterator var. */
1412 /* Helper for expand_oacc_for. Determine collapsed loop information.
1413 Fill in COUNTS array. Emit any initialization code before GSI.
1414 Return the calculated outer loop bound of BOUND_TYPE. */
1416 static tree
1417 expand_oacc_collapse_init (const struct omp_for_data *fd,
1418 gimple_stmt_iterator *gsi,
1419 oacc_collapse *counts, tree bound_type,
1420 location_t loc)
1422 tree tiling = fd->tiling;
1423 tree total = build_int_cst (bound_type, 1);
1424 int ix;
1426 gcc_assert (integer_onep (fd->loop.step));
1427 gcc_assert (integer_zerop (fd->loop.n1));
1429 /* When tiling, the first operand of the tile clause applies to the
1430 innermost loop, and we work outwards from there. Seems
1431 backwards, but whatever. */
1432 for (ix = fd->collapse; ix--;)
1434 const omp_for_data_loop *loop = &fd->loops[ix];
1436 tree iter_type = TREE_TYPE (loop->v);
1437 tree diff_type = iter_type;
1438 tree plus_type = iter_type;
1440 gcc_assert (loop->cond_code == fd->loop.cond_code);
1442 if (POINTER_TYPE_P (iter_type))
1443 plus_type = sizetype;
1444 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1445 diff_type = signed_type_for (diff_type);
1446 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1447 diff_type = integer_type_node;
1449 if (tiling)
1451 tree num = build_int_cst (integer_type_node, fd->collapse);
1452 tree loop_no = build_int_cst (integer_type_node, ix);
1453 tree tile = TREE_VALUE (tiling);
1454 gcall *call
1455 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1456 /* gwv-outer=*/integer_zero_node,
1457 /* gwv-inner=*/integer_zero_node);
1459 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1460 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1461 gimple_call_set_lhs (call, counts[ix].tile);
1462 gimple_set_location (call, loc);
1463 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1465 tiling = TREE_CHAIN (tiling);
1467 else
1469 counts[ix].tile = NULL;
1470 counts[ix].outer = loop->v;
1473 tree b = loop->n1;
1474 tree e = loop->n2;
1475 tree s = loop->step;
1476 bool up = loop->cond_code == LT_EXPR;
1477 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1478 bool negating;
1479 tree expr;
1481 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1482 true, GSI_SAME_STMT);
1483 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1484 true, GSI_SAME_STMT);
1486 /* Convert the step, avoiding possible unsigned->signed overflow. */
1487 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1488 if (negating)
1489 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1490 s = fold_convert (diff_type, s);
1491 if (negating)
1492 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1493 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1494 true, GSI_SAME_STMT);
1496 /* Determine the range, avoiding possible unsigned->signed overflow. */
1497 negating = !up && TYPE_UNSIGNED (iter_type);
1498 expr = fold_build2 (MINUS_EXPR, plus_type,
1499 fold_convert (plus_type, negating ? b : e),
1500 fold_convert (plus_type, negating ? e : b));
1501 expr = fold_convert (diff_type, expr);
1502 if (negating)
1503 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1504 tree range = force_gimple_operand_gsi
1505 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1507 /* Determine number of iterations. */
1508 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1509 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1510 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1512 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1513 true, GSI_SAME_STMT);
1515 counts[ix].base = b;
1516 counts[ix].iters = iters;
1517 counts[ix].step = s;
1519 total = fold_build2 (MULT_EXPR, bound_type, total,
1520 fold_convert (bound_type, iters));
1523 return total;
1526 /* Emit initializers for collapsed loop members. INNER is true if
1527 this is for the element loop of a TILE. IVAR is the outer
1528 loop iteration variable, from which collapsed loop iteration values
1529 are calculated. COUNTS array has been initialized by
1530 expand_oacc_collapse_inits. */
1532 static void
1533 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1534 gimple_stmt_iterator *gsi,
1535 const oacc_collapse *counts, tree ivar)
1537 tree ivar_type = TREE_TYPE (ivar);
1539 /* The most rapidly changing iteration variable is the innermost
1540 one. */
1541 for (int ix = fd->collapse; ix--;)
1543 const omp_for_data_loop *loop = &fd->loops[ix];
1544 const oacc_collapse *collapse = &counts[ix];
1545 tree v = inner ? loop->v : collapse->outer;
1546 tree iter_type = TREE_TYPE (v);
1547 tree diff_type = TREE_TYPE (collapse->step);
1548 tree plus_type = iter_type;
1549 enum tree_code plus_code = PLUS_EXPR;
1550 tree expr;
1552 if (POINTER_TYPE_P (iter_type))
1554 plus_code = POINTER_PLUS_EXPR;
1555 plus_type = sizetype;
1558 expr = ivar;
1559 if (ix)
1561 tree mod = fold_convert (ivar_type, collapse->iters);
1562 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1563 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1564 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1565 true, GSI_SAME_STMT);
1568 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1569 collapse->step);
1570 expr = fold_build2 (plus_code, iter_type,
1571 inner ? collapse->outer : collapse->base,
1572 fold_convert (plus_type, expr));
1573 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1574 true, GSI_SAME_STMT);
1575 gassign *ass = gimple_build_assign (v, expr);
1576 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1580 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1581 of the combined collapse > 1 loop constructs, generate code like:
1582 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1583 if (cond3 is <)
1584 adj = STEP3 - 1;
1585 else
1586 adj = STEP3 + 1;
1587 count3 = (adj + N32 - N31) / STEP3;
1588 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1589 if (cond2 is <)
1590 adj = STEP2 - 1;
1591 else
1592 adj = STEP2 + 1;
1593 count2 = (adj + N22 - N21) / STEP2;
1594 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1595 if (cond1 is <)
1596 adj = STEP1 - 1;
1597 else
1598 adj = STEP1 + 1;
1599 count1 = (adj + N12 - N11) / STEP1;
1600 count = count1 * count2 * count3;
1601 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1602 count = 0;
1603 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1604 of the combined loop constructs, just initialize COUNTS array
1605 from the _looptemp_ clauses. */
1607 /* NOTE: It *could* be better to moosh all of the BBs together,
1608 creating one larger BB with all the computation and the unexpected
1609 jump at the end. I.e.
1611 bool zero3, zero2, zero1, zero;
1613 zero3 = N32 c3 N31;
1614 count3 = (N32 - N31) /[cl] STEP3;
1615 zero2 = N22 c2 N21;
1616 count2 = (N22 - N21) /[cl] STEP2;
1617 zero1 = N12 c1 N11;
1618 count1 = (N12 - N11) /[cl] STEP1;
1619 zero = zero3 || zero2 || zero1;
1620 count = count1 * count2 * count3;
1621 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1623 After all, we expect the zero=false, and thus we expect to have to
1624 evaluate all of the comparison expressions, so short-circuiting
1625 oughtn't be a win. Since the condition isn't protecting a
1626 denominator, we're not concerned about divide-by-zero, so we can
1627 fully evaluate count even if a numerator turned out to be wrong.
1629 It seems like putting this all together would create much better
1630 scheduling opportunities, and less pressure on the chip's branch
1631 predictor. */
1633 static void
1634 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1635 basic_block &entry_bb, tree *counts,
1636 basic_block &zero_iter1_bb, int &first_zero_iter1,
1637 basic_block &zero_iter2_bb, int &first_zero_iter2,
1638 basic_block &l2_dom_bb)
1640 tree t, type = TREE_TYPE (fd->loop.v);
1641 edge e, ne;
1642 int i;
1644 /* Collapsed loops need work for expansion into SSA form. */
1645 gcc_assert (!gimple_in_ssa_p (cfun));
1647 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1648 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1650 gcc_assert (fd->ordered == 0);
1651 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1652 isn't supposed to be handled, as the inner loop doesn't
1653 use it. */
1654 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1655 OMP_CLAUSE__LOOPTEMP_);
1656 gcc_assert (innerc);
1657 for (i = 0; i < fd->collapse; i++)
1659 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1660 OMP_CLAUSE__LOOPTEMP_);
1661 gcc_assert (innerc);
1662 if (i)
1663 counts[i] = OMP_CLAUSE_DECL (innerc);
1664 else
1665 counts[0] = NULL_TREE;
1667 return;
1670 for (i = fd->collapse; i < fd->ordered; i++)
1672 tree itype = TREE_TYPE (fd->loops[i].v);
1673 counts[i] = NULL_TREE;
1674 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1675 fold_convert (itype, fd->loops[i].n1),
1676 fold_convert (itype, fd->loops[i].n2));
1677 if (t && integer_zerop (t))
1679 for (i = fd->collapse; i < fd->ordered; i++)
1680 counts[i] = build_int_cst (type, 0);
1681 break;
1684 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1686 tree itype = TREE_TYPE (fd->loops[i].v);
1688 if (i >= fd->collapse && counts[i])
1689 continue;
1690 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1691 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1692 fold_convert (itype, fd->loops[i].n1),
1693 fold_convert (itype, fd->loops[i].n2)))
1694 == NULL_TREE || !integer_onep (t)))
1696 gcond *cond_stmt;
1697 tree n1, n2;
1698 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1699 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1700 true, GSI_SAME_STMT);
1701 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1702 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1703 true, GSI_SAME_STMT);
1704 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1705 NULL_TREE, NULL_TREE);
1706 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1707 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1708 expand_omp_regimplify_p, NULL, NULL)
1709 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1710 expand_omp_regimplify_p, NULL, NULL))
1712 *gsi = gsi_for_stmt (cond_stmt);
1713 gimple_regimplify_operands (cond_stmt, gsi);
1715 e = split_block (entry_bb, cond_stmt);
1716 basic_block &zero_iter_bb
1717 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1718 int &first_zero_iter
1719 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1720 if (zero_iter_bb == NULL)
1722 gassign *assign_stmt;
1723 first_zero_iter = i;
1724 zero_iter_bb = create_empty_bb (entry_bb);
1725 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1726 *gsi = gsi_after_labels (zero_iter_bb);
1727 if (i < fd->collapse)
1728 assign_stmt = gimple_build_assign (fd->loop.n2,
1729 build_zero_cst (type));
1730 else
1732 counts[i] = create_tmp_reg (type, ".count");
1733 assign_stmt
1734 = gimple_build_assign (counts[i], build_zero_cst (type));
1736 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1737 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1738 entry_bb);
1740 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1741 ne->probability = profile_probability::very_unlikely ();
1742 e->flags = EDGE_TRUE_VALUE;
1743 e->probability = ne->probability.invert ();
1744 if (l2_dom_bb == NULL)
1745 l2_dom_bb = entry_bb;
1746 entry_bb = e->dest;
1747 *gsi = gsi_last_nondebug_bb (entry_bb);
1750 if (POINTER_TYPE_P (itype))
1751 itype = signed_type_for (itype);
1752 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1753 ? -1 : 1));
1754 t = fold_build2 (PLUS_EXPR, itype,
1755 fold_convert (itype, fd->loops[i].step), t);
1756 t = fold_build2 (PLUS_EXPR, itype, t,
1757 fold_convert (itype, fd->loops[i].n2));
1758 t = fold_build2 (MINUS_EXPR, itype, t,
1759 fold_convert (itype, fd->loops[i].n1));
1760 /* ?? We could probably use CEIL_DIV_EXPR instead of
1761 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1762 generate the same code in the end because generically we
1763 don't know that the values involved must be negative for
1764 GT?? */
1765 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1766 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1767 fold_build1 (NEGATE_EXPR, itype, t),
1768 fold_build1 (NEGATE_EXPR, itype,
1769 fold_convert (itype,
1770 fd->loops[i].step)));
1771 else
1772 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1773 fold_convert (itype, fd->loops[i].step));
1774 t = fold_convert (type, t);
1775 if (TREE_CODE (t) == INTEGER_CST)
1776 counts[i] = t;
1777 else
1779 if (i < fd->collapse || i != first_zero_iter2)
1780 counts[i] = create_tmp_reg (type, ".count");
1781 expand_omp_build_assign (gsi, counts[i], t);
1783 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1785 if (i == 0)
1786 t = counts[0];
1787 else
1788 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1789 expand_omp_build_assign (gsi, fd->loop.n2, t);
1794 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1795 T = V;
1796 V3 = N31 + (T % count3) * STEP3;
1797 T = T / count3;
1798 V2 = N21 + (T % count2) * STEP2;
1799 T = T / count2;
1800 V1 = N11 + T * STEP1;
1801 if this loop doesn't have an inner loop construct combined with it.
1802 If it does have an inner loop construct combined with it and the
1803 iteration count isn't known constant, store values from counts array
1804 into its _looptemp_ temporaries instead. */
1806 static void
1807 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1808 tree *counts, gimple *inner_stmt, tree startvar)
1810 int i;
1811 if (gimple_omp_for_combined_p (fd->for_stmt))
1813 /* If fd->loop.n2 is constant, then no propagation of the counts
1814 is needed, they are constant. */
1815 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1816 return;
1818 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1819 ? gimple_omp_taskreg_clauses (inner_stmt)
1820 : gimple_omp_for_clauses (inner_stmt);
1821 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1822 isn't supposed to be handled, as the inner loop doesn't
1823 use it. */
1824 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1825 gcc_assert (innerc);
1826 for (i = 0; i < fd->collapse; i++)
1828 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1829 OMP_CLAUSE__LOOPTEMP_);
1830 gcc_assert (innerc);
1831 if (i)
1833 tree tem = OMP_CLAUSE_DECL (innerc);
1834 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1835 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1836 false, GSI_CONTINUE_LINKING);
1837 gassign *stmt = gimple_build_assign (tem, t);
1838 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1841 return;
1844 tree type = TREE_TYPE (fd->loop.v);
1845 tree tem = create_tmp_reg (type, ".tem");
1846 gassign *stmt = gimple_build_assign (tem, startvar);
1847 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1849 for (i = fd->collapse - 1; i >= 0; i--)
1851 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1852 itype = vtype;
1853 if (POINTER_TYPE_P (vtype))
1854 itype = signed_type_for (vtype);
1855 if (i != 0)
1856 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1857 else
1858 t = tem;
1859 t = fold_convert (itype, t);
1860 t = fold_build2 (MULT_EXPR, itype, t,
1861 fold_convert (itype, fd->loops[i].step));
1862 if (POINTER_TYPE_P (vtype))
1863 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1864 else
1865 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1866 t = force_gimple_operand_gsi (gsi, t,
1867 DECL_P (fd->loops[i].v)
1868 && TREE_ADDRESSABLE (fd->loops[i].v),
1869 NULL_TREE, false,
1870 GSI_CONTINUE_LINKING);
1871 stmt = gimple_build_assign (fd->loops[i].v, t);
1872 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1873 if (i != 0)
1875 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1876 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1877 false, GSI_CONTINUE_LINKING);
1878 stmt = gimple_build_assign (tem, t);
1879 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1884 /* Helper function for expand_omp_for_*. Generate code like:
1885 L10:
1886 V3 += STEP3;
1887 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1888 L11:
1889 V3 = N31;
1890 V2 += STEP2;
1891 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1892 L12:
1893 V2 = N21;
1894 V1 += STEP1;
1895 goto BODY_BB; */
1897 static basic_block
1898 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1899 basic_block body_bb)
1901 basic_block last_bb, bb, collapse_bb = NULL;
1902 int i;
1903 gimple_stmt_iterator gsi;
1904 edge e;
1905 tree t;
1906 gimple *stmt;
1908 last_bb = cont_bb;
1909 for (i = fd->collapse - 1; i >= 0; i--)
1911 tree vtype = TREE_TYPE (fd->loops[i].v);
1913 bb = create_empty_bb (last_bb);
1914 add_bb_to_loop (bb, last_bb->loop_father);
1915 gsi = gsi_start_bb (bb);
1917 if (i < fd->collapse - 1)
1919 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1920 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1922 t = fd->loops[i + 1].n1;
1923 t = force_gimple_operand_gsi (&gsi, t,
1924 DECL_P (fd->loops[i + 1].v)
1925 && TREE_ADDRESSABLE (fd->loops[i
1926 + 1].v),
1927 NULL_TREE, false,
1928 GSI_CONTINUE_LINKING);
1929 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1930 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1932 else
1933 collapse_bb = bb;
1935 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1937 if (POINTER_TYPE_P (vtype))
1938 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1939 else
1940 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1941 t = force_gimple_operand_gsi (&gsi, t,
1942 DECL_P (fd->loops[i].v)
1943 && TREE_ADDRESSABLE (fd->loops[i].v),
1944 NULL_TREE, false, GSI_CONTINUE_LINKING);
1945 stmt = gimple_build_assign (fd->loops[i].v, t);
1946 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1948 if (i > 0)
1950 t = fd->loops[i].n2;
1951 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1952 false, GSI_CONTINUE_LINKING);
1953 tree v = fd->loops[i].v;
1954 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1955 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1956 false, GSI_CONTINUE_LINKING);
1957 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1958 stmt = gimple_build_cond_empty (t);
1959 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1960 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
1961 expand_omp_regimplify_p, NULL, NULL)
1962 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
1963 expand_omp_regimplify_p, NULL, NULL))
1964 gimple_regimplify_operands (stmt, &gsi);
1965 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1966 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1968 else
1969 make_edge (bb, body_bb, EDGE_FALLTHRU);
1970 last_bb = bb;
1973 return collapse_bb;
1976 /* Expand #pragma omp ordered depend(source). */
1978 static void
1979 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1980 tree *counts, location_t loc)
1982 enum built_in_function source_ix
1983 = fd->iter_type == long_integer_type_node
1984 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1985 gimple *g
1986 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1987 build_fold_addr_expr (counts[fd->ordered]));
1988 gimple_set_location (g, loc);
1989 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1992 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1994 static void
1995 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1996 tree *counts, tree c, location_t loc)
1998 auto_vec<tree, 10> args;
1999 enum built_in_function sink_ix
2000 = fd->iter_type == long_integer_type_node
2001 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2002 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2003 int i;
2004 gimple_stmt_iterator gsi2 = *gsi;
2005 bool warned_step = false;
2007 for (i = 0; i < fd->ordered; i++)
2009 tree step = NULL_TREE;
2010 off = TREE_PURPOSE (deps);
2011 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2013 step = TREE_OPERAND (off, 1);
2014 off = TREE_OPERAND (off, 0);
2016 if (!integer_zerop (off))
2018 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2019 || fd->loops[i].cond_code == GT_EXPR);
2020 bool forward = fd->loops[i].cond_code == LT_EXPR;
2021 if (step)
2023 /* Non-simple Fortran DO loops. If step is variable,
2024 we don't know at compile even the direction, so can't
2025 warn. */
2026 if (TREE_CODE (step) != INTEGER_CST)
2027 break;
2028 forward = tree_int_cst_sgn (step) != -1;
2030 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2031 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2032 "lexically later iteration");
2033 break;
2035 deps = TREE_CHAIN (deps);
2037 /* If all offsets corresponding to the collapsed loops are zero,
2038 this depend clause can be ignored. FIXME: but there is still a
2039 flush needed. We need to emit one __sync_synchronize () for it
2040 though (perhaps conditionally)? Solve this together with the
2041 conservative dependence folding optimization.
2042 if (i >= fd->collapse)
2043 return; */
2045 deps = OMP_CLAUSE_DECL (c);
2046 gsi_prev (&gsi2);
2047 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2048 edge e2 = split_block_after_labels (e1->dest);
2050 gsi2 = gsi_after_labels (e1->dest);
2051 *gsi = gsi_last_bb (e1->src);
2052 for (i = 0; i < fd->ordered; i++)
2054 tree itype = TREE_TYPE (fd->loops[i].v);
2055 tree step = NULL_TREE;
2056 tree orig_off = NULL_TREE;
2057 if (POINTER_TYPE_P (itype))
2058 itype = sizetype;
2059 if (i)
2060 deps = TREE_CHAIN (deps);
2061 off = TREE_PURPOSE (deps);
2062 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2064 step = TREE_OPERAND (off, 1);
2065 off = TREE_OPERAND (off, 0);
2066 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2067 && integer_onep (fd->loops[i].step)
2068 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2070 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2071 if (step)
2073 off = fold_convert_loc (loc, itype, off);
2074 orig_off = off;
2075 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2078 if (integer_zerop (off))
2079 t = boolean_true_node;
2080 else
2082 tree a;
2083 tree co = fold_convert_loc (loc, itype, off);
2084 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2086 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2088 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2089 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2090 co);
2092 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2094 fd->loops[i].v, co);
2095 else
2096 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2097 fd->loops[i].v, co);
2098 if (step)
2100 tree t1, t2;
2101 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2102 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2103 fd->loops[i].n1);
2104 else
2105 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2106 fd->loops[i].n2);
2107 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2108 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2109 fd->loops[i].n2);
2110 else
2111 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2112 fd->loops[i].n1);
2113 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2114 step, build_int_cst (TREE_TYPE (step), 0));
2115 if (TREE_CODE (step) != INTEGER_CST)
2117 t1 = unshare_expr (t1);
2118 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2119 false, GSI_CONTINUE_LINKING);
2120 t2 = unshare_expr (t2);
2121 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2122 false, GSI_CONTINUE_LINKING);
2124 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2125 t, t2, t1);
2127 else if (fd->loops[i].cond_code == LT_EXPR)
2129 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2130 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2131 fd->loops[i].n1);
2132 else
2133 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2134 fd->loops[i].n2);
2136 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2137 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2138 fd->loops[i].n2);
2139 else
2140 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2141 fd->loops[i].n1);
2143 if (cond)
2144 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2145 else
2146 cond = t;
2148 off = fold_convert_loc (loc, itype, off);
2150 if (step
2151 || (fd->loops[i].cond_code == LT_EXPR
2152 ? !integer_onep (fd->loops[i].step)
2153 : !integer_minus_onep (fd->loops[i].step)))
2155 if (step == NULL_TREE
2156 && TYPE_UNSIGNED (itype)
2157 && fd->loops[i].cond_code == GT_EXPR)
2158 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2159 fold_build1_loc (loc, NEGATE_EXPR, itype,
2160 s));
2161 else
2162 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2163 orig_off ? orig_off : off, s);
2164 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2165 build_int_cst (itype, 0));
2166 if (integer_zerop (t) && !warned_step)
2168 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2169 "in the iteration space");
2170 warned_step = true;
2172 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2173 cond, t);
2176 if (i <= fd->collapse - 1 && fd->collapse > 1)
2177 t = fd->loop.v;
2178 else if (counts[i])
2179 t = counts[i];
2180 else
2182 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2183 fd->loops[i].v, fd->loops[i].n1);
2184 t = fold_convert_loc (loc, fd->iter_type, t);
2186 if (step)
2187 /* We have divided off by step already earlier. */;
2188 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2189 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2190 fold_build1_loc (loc, NEGATE_EXPR, itype,
2191 s));
2192 else
2193 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2194 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2195 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2196 off = fold_convert_loc (loc, fd->iter_type, off);
2197 if (i <= fd->collapse - 1 && fd->collapse > 1)
2199 if (i)
2200 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2201 off);
2202 if (i < fd->collapse - 1)
2204 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2205 counts[i]);
2206 continue;
2209 off = unshare_expr (off);
2210 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2211 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2212 true, GSI_SAME_STMT);
2213 args.safe_push (t);
2215 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2216 gimple_set_location (g, loc);
2217 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2219 cond = unshare_expr (cond);
2220 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2221 GSI_CONTINUE_LINKING);
2222 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2223 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2224 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2225 e1->probability = e3->probability.invert ();
2226 e1->flags = EDGE_TRUE_VALUE;
2227 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2229 *gsi = gsi_after_labels (e2->dest);
2232 /* Expand all #pragma omp ordered depend(source) and
2233 #pragma omp ordered depend(sink:...) constructs in the current
2234 #pragma omp for ordered(n) region. */
2236 static void
2237 expand_omp_ordered_source_sink (struct omp_region *region,
2238 struct omp_for_data *fd, tree *counts,
2239 basic_block cont_bb)
2241 struct omp_region *inner;
2242 int i;
2243 for (i = fd->collapse - 1; i < fd->ordered; i++)
2244 if (i == fd->collapse - 1 && fd->collapse > 1)
2245 counts[i] = NULL_TREE;
2246 else if (i >= fd->collapse && !cont_bb)
2247 counts[i] = build_zero_cst (fd->iter_type);
2248 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2249 && integer_onep (fd->loops[i].step))
2250 counts[i] = NULL_TREE;
2251 else
2252 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2253 tree atype
2254 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2255 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2256 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2258 for (inner = region->inner; inner; inner = inner->next)
2259 if (inner->type == GIMPLE_OMP_ORDERED)
2261 gomp_ordered *ord_stmt = inner->ord_stmt;
2262 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2263 location_t loc = gimple_location (ord_stmt);
2264 tree c;
2265 for (c = gimple_omp_ordered_clauses (ord_stmt);
2266 c; c = OMP_CLAUSE_CHAIN (c))
2267 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2268 break;
2269 if (c)
2270 expand_omp_ordered_source (&gsi, fd, counts, loc);
2271 for (c = gimple_omp_ordered_clauses (ord_stmt);
2272 c; c = OMP_CLAUSE_CHAIN (c))
2273 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2274 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2275 gsi_remove (&gsi, true);
2279 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2280 collapsed. */
2282 static basic_block
2283 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2284 basic_block cont_bb, basic_block body_bb,
2285 bool ordered_lastprivate)
2287 if (fd->ordered == fd->collapse)
2288 return cont_bb;
2290 if (!cont_bb)
2292 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2293 for (int i = fd->collapse; i < fd->ordered; i++)
2295 tree type = TREE_TYPE (fd->loops[i].v);
2296 tree n1 = fold_convert (type, fd->loops[i].n1);
2297 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2298 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2299 size_int (i - fd->collapse + 1),
2300 NULL_TREE, NULL_TREE);
2301 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2303 return NULL;
2306 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2308 tree t, type = TREE_TYPE (fd->loops[i].v);
2309 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2310 expand_omp_build_assign (&gsi, fd->loops[i].v,
2311 fold_convert (type, fd->loops[i].n1));
2312 if (counts[i])
2313 expand_omp_build_assign (&gsi, counts[i],
2314 build_zero_cst (fd->iter_type));
2315 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2316 size_int (i - fd->collapse + 1),
2317 NULL_TREE, NULL_TREE);
2318 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2319 if (!gsi_end_p (gsi))
2320 gsi_prev (&gsi);
2321 else
2322 gsi = gsi_last_bb (body_bb);
2323 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2324 basic_block new_body = e1->dest;
2325 if (body_bb == cont_bb)
2326 cont_bb = new_body;
2327 edge e2 = NULL;
2328 basic_block new_header;
2329 if (EDGE_COUNT (cont_bb->preds) > 0)
2331 gsi = gsi_last_bb (cont_bb);
2332 if (POINTER_TYPE_P (type))
2333 t = fold_build_pointer_plus (fd->loops[i].v,
2334 fold_convert (sizetype,
2335 fd->loops[i].step));
2336 else
2337 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2338 fold_convert (type, fd->loops[i].step));
2339 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2340 if (counts[i])
2342 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2343 build_int_cst (fd->iter_type, 1));
2344 expand_omp_build_assign (&gsi, counts[i], t);
2345 t = counts[i];
2347 else
2349 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2350 fd->loops[i].v, fd->loops[i].n1);
2351 t = fold_convert (fd->iter_type, t);
2352 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2353 true, GSI_SAME_STMT);
2355 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2356 size_int (i - fd->collapse + 1),
2357 NULL_TREE, NULL_TREE);
2358 expand_omp_build_assign (&gsi, aref, t);
2359 gsi_prev (&gsi);
2360 e2 = split_block (cont_bb, gsi_stmt (gsi));
2361 new_header = e2->dest;
2363 else
2364 new_header = cont_bb;
2365 gsi = gsi_after_labels (new_header);
2366 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2367 true, GSI_SAME_STMT);
2368 tree n2
2369 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2370 true, NULL_TREE, true, GSI_SAME_STMT);
2371 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2372 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2373 edge e3 = split_block (new_header, gsi_stmt (gsi));
2374 cont_bb = e3->dest;
2375 remove_edge (e1);
2376 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2377 e3->flags = EDGE_FALSE_VALUE;
2378 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2379 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2380 e1->probability = e3->probability.invert ();
2382 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2383 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2385 if (e2)
2387 struct loop *loop = alloc_loop ();
2388 loop->header = new_header;
2389 loop->latch = e2->src;
2390 add_loop (loop, body_bb->loop_father);
2394 /* If there are any lastprivate clauses and it is possible some loops
2395 might have zero iterations, ensure all the decls are initialized,
2396 otherwise we could crash evaluating C++ class iterators with lastprivate
2397 clauses. */
2398 bool need_inits = false;
2399 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2400 if (need_inits)
2402 tree type = TREE_TYPE (fd->loops[i].v);
2403 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2404 expand_omp_build_assign (&gsi, fd->loops[i].v,
2405 fold_convert (type, fd->loops[i].n1));
2407 else
2409 tree type = TREE_TYPE (fd->loops[i].v);
2410 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2411 boolean_type_node,
2412 fold_convert (type, fd->loops[i].n1),
2413 fold_convert (type, fd->loops[i].n2));
2414 if (!integer_onep (this_cond))
2415 need_inits = true;
2418 return cont_bb;
2421 /* A subroutine of expand_omp_for. Generate code for a parallel
2422 loop with any schedule. Given parameters:
2424 for (V = N1; V cond N2; V += STEP) BODY;
2426 where COND is "<" or ">", we generate pseudocode
2428 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2429 if (more) goto L0; else goto L3;
2431 V = istart0;
2432 iend = iend0;
2434 BODY;
2435 V += STEP;
2436 if (V cond iend) goto L1; else goto L2;
2438 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2441 If this is a combined omp parallel loop, instead of the call to
2442 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2443 If this is gimple_omp_for_combined_p loop, then instead of assigning
2444 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2445 inner GIMPLE_OMP_FOR and V += STEP; and
2446 if (V cond iend) goto L1; else goto L2; are removed.
2448 For collapsed loops, given parameters:
2449 collapse(3)
2450 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2451 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2452 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2453 BODY;
2455 we generate pseudocode
2457 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2458 if (cond3 is <)
2459 adj = STEP3 - 1;
2460 else
2461 adj = STEP3 + 1;
2462 count3 = (adj + N32 - N31) / STEP3;
2463 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2464 if (cond2 is <)
2465 adj = STEP2 - 1;
2466 else
2467 adj = STEP2 + 1;
2468 count2 = (adj + N22 - N21) / STEP2;
2469 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2470 if (cond1 is <)
2471 adj = STEP1 - 1;
2472 else
2473 adj = STEP1 + 1;
2474 count1 = (adj + N12 - N11) / STEP1;
2475 count = count1 * count2 * count3;
2476 goto Z1;
2478 count = 0;
2480 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2481 if (more) goto L0; else goto L3;
2483 V = istart0;
2484 T = V;
2485 V3 = N31 + (T % count3) * STEP3;
2486 T = T / count3;
2487 V2 = N21 + (T % count2) * STEP2;
2488 T = T / count2;
2489 V1 = N11 + T * STEP1;
2490 iend = iend0;
2492 BODY;
2493 V += 1;
2494 if (V < iend) goto L10; else goto L2;
2495 L10:
2496 V3 += STEP3;
2497 if (V3 cond3 N32) goto L1; else goto L11;
2498 L11:
2499 V3 = N31;
2500 V2 += STEP2;
2501 if (V2 cond2 N22) goto L1; else goto L12;
2502 L12:
2503 V2 = N21;
2504 V1 += STEP1;
2505 goto L1;
2507 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2512 static void
2513 expand_omp_for_generic (struct omp_region *region,
2514 struct omp_for_data *fd,
2515 enum built_in_function start_fn,
2516 enum built_in_function next_fn,
2517 gimple *inner_stmt)
2519 tree type, istart0, iend0, iend;
2520 tree t, vmain, vback, bias = NULL_TREE;
2521 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2522 basic_block l2_bb = NULL, l3_bb = NULL;
2523 gimple_stmt_iterator gsi;
2524 gassign *assign_stmt;
2525 bool in_combined_parallel = is_combined_parallel (region);
2526 bool broken_loop = region->cont == NULL;
2527 edge e, ne;
2528 tree *counts = NULL;
2529 int i;
2530 bool ordered_lastprivate = false;
2532 gcc_assert (!broken_loop || !in_combined_parallel);
2533 gcc_assert (fd->iter_type == long_integer_type_node
2534 || !in_combined_parallel);
2536 entry_bb = region->entry;
2537 cont_bb = region->cont;
2538 collapse_bb = NULL;
2539 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2540 gcc_assert (broken_loop
2541 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2542 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2543 l1_bb = single_succ (l0_bb);
2544 if (!broken_loop)
2546 l2_bb = create_empty_bb (cont_bb);
2547 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2548 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2549 == l1_bb));
2550 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2552 else
2553 l2_bb = NULL;
2554 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2555 exit_bb = region->exit;
2557 gsi = gsi_last_nondebug_bb (entry_bb);
2559 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2560 if (fd->ordered
2561 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2562 OMP_CLAUSE_LASTPRIVATE))
2563 ordered_lastprivate = false;
2564 if (fd->collapse > 1 || fd->ordered)
2566 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2567 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2569 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2570 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2571 zero_iter1_bb, first_zero_iter1,
2572 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2574 if (zero_iter1_bb)
2576 /* Some counts[i] vars might be uninitialized if
2577 some loop has zero iterations. But the body shouldn't
2578 be executed in that case, so just avoid uninit warnings. */
2579 for (i = first_zero_iter1;
2580 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2581 if (SSA_VAR_P (counts[i]))
2582 TREE_NO_WARNING (counts[i]) = 1;
2583 gsi_prev (&gsi);
2584 e = split_block (entry_bb, gsi_stmt (gsi));
2585 entry_bb = e->dest;
2586 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2587 gsi = gsi_last_nondebug_bb (entry_bb);
2588 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2589 get_immediate_dominator (CDI_DOMINATORS,
2590 zero_iter1_bb));
2592 if (zero_iter2_bb)
2594 /* Some counts[i] vars might be uninitialized if
2595 some loop has zero iterations. But the body shouldn't
2596 be executed in that case, so just avoid uninit warnings. */
2597 for (i = first_zero_iter2; i < fd->ordered; i++)
2598 if (SSA_VAR_P (counts[i]))
2599 TREE_NO_WARNING (counts[i]) = 1;
2600 if (zero_iter1_bb)
2601 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2602 else
2604 gsi_prev (&gsi);
2605 e = split_block (entry_bb, gsi_stmt (gsi));
2606 entry_bb = e->dest;
2607 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2608 gsi = gsi_last_nondebug_bb (entry_bb);
2609 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2610 get_immediate_dominator
2611 (CDI_DOMINATORS, zero_iter2_bb));
2614 if (fd->collapse == 1)
2616 counts[0] = fd->loop.n2;
2617 fd->loop = fd->loops[0];
2621 type = TREE_TYPE (fd->loop.v);
2622 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2623 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2624 TREE_ADDRESSABLE (istart0) = 1;
2625 TREE_ADDRESSABLE (iend0) = 1;
2627 /* See if we need to bias by LLONG_MIN. */
2628 if (fd->iter_type == long_long_unsigned_type_node
2629 && TREE_CODE (type) == INTEGER_TYPE
2630 && !TYPE_UNSIGNED (type)
2631 && fd->ordered == 0)
2633 tree n1, n2;
2635 if (fd->loop.cond_code == LT_EXPR)
2637 n1 = fd->loop.n1;
2638 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2640 else
2642 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2643 n2 = fd->loop.n1;
2645 if (TREE_CODE (n1) != INTEGER_CST
2646 || TREE_CODE (n2) != INTEGER_CST
2647 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2648 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2651 gimple_stmt_iterator gsif = gsi;
2652 gsi_prev (&gsif);
2654 tree arr = NULL_TREE;
2655 if (in_combined_parallel)
2657 gcc_assert (fd->ordered == 0);
2658 /* In a combined parallel loop, emit a call to
2659 GOMP_loop_foo_next. */
2660 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2661 build_fold_addr_expr (istart0),
2662 build_fold_addr_expr (iend0));
2664 else
2666 tree t0, t1, t2, t3, t4;
2667 /* If this is not a combined parallel loop, emit a call to
2668 GOMP_loop_foo_start in ENTRY_BB. */
2669 t4 = build_fold_addr_expr (iend0);
2670 t3 = build_fold_addr_expr (istart0);
2671 if (fd->ordered)
2673 t0 = build_int_cst (unsigned_type_node,
2674 fd->ordered - fd->collapse + 1);
2675 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2676 fd->ordered
2677 - fd->collapse + 1),
2678 ".omp_counts");
2679 DECL_NAMELESS (arr) = 1;
2680 TREE_ADDRESSABLE (arr) = 1;
2681 TREE_STATIC (arr) = 1;
2682 vec<constructor_elt, va_gc> *v;
2683 vec_alloc (v, fd->ordered - fd->collapse + 1);
2684 int idx;
2686 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2688 tree c;
2689 if (idx == 0 && fd->collapse > 1)
2690 c = fd->loop.n2;
2691 else
2692 c = counts[idx + fd->collapse - 1];
2693 tree purpose = size_int (idx);
2694 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2695 if (TREE_CODE (c) != INTEGER_CST)
2696 TREE_STATIC (arr) = 0;
2699 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2700 if (!TREE_STATIC (arr))
2701 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2702 void_type_node, arr),
2703 true, NULL_TREE, true, GSI_SAME_STMT);
2704 t1 = build_fold_addr_expr (arr);
2705 t2 = NULL_TREE;
2707 else
2709 t2 = fold_convert (fd->iter_type, fd->loop.step);
2710 t1 = fd->loop.n2;
2711 t0 = fd->loop.n1;
2712 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2714 tree innerc
2715 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2716 OMP_CLAUSE__LOOPTEMP_);
2717 gcc_assert (innerc);
2718 t0 = OMP_CLAUSE_DECL (innerc);
2719 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2720 OMP_CLAUSE__LOOPTEMP_);
2721 gcc_assert (innerc);
2722 t1 = OMP_CLAUSE_DECL (innerc);
2724 if (POINTER_TYPE_P (TREE_TYPE (t0))
2725 && TYPE_PRECISION (TREE_TYPE (t0))
2726 != TYPE_PRECISION (fd->iter_type))
2728 /* Avoid casting pointers to integer of a different size. */
2729 tree itype = signed_type_for (type);
2730 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2731 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2733 else
2735 t1 = fold_convert (fd->iter_type, t1);
2736 t0 = fold_convert (fd->iter_type, t0);
2738 if (bias)
2740 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2741 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2744 if (fd->iter_type == long_integer_type_node || fd->ordered)
2746 if (fd->chunk_size)
2748 t = fold_convert (fd->iter_type, fd->chunk_size);
2749 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2750 if (fd->ordered)
2751 t = build_call_expr (builtin_decl_explicit (start_fn),
2752 5, t0, t1, t, t3, t4);
2753 else
2754 t = build_call_expr (builtin_decl_explicit (start_fn),
2755 6, t0, t1, t2, t, t3, t4);
2757 else if (fd->ordered)
2758 t = build_call_expr (builtin_decl_explicit (start_fn),
2759 4, t0, t1, t3, t4);
2760 else
2761 t = build_call_expr (builtin_decl_explicit (start_fn),
2762 5, t0, t1, t2, t3, t4);
2764 else
2766 tree t5;
2767 tree c_bool_type;
2768 tree bfn_decl;
2770 /* The GOMP_loop_ull_*start functions have additional boolean
2771 argument, true for < loops and false for > loops.
2772 In Fortran, the C bool type can be different from
2773 boolean_type_node. */
2774 bfn_decl = builtin_decl_explicit (start_fn);
2775 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2776 t5 = build_int_cst (c_bool_type,
2777 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2778 if (fd->chunk_size)
2780 tree bfn_decl = builtin_decl_explicit (start_fn);
2781 t = fold_convert (fd->iter_type, fd->chunk_size);
2782 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2783 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2785 else
2786 t = build_call_expr (builtin_decl_explicit (start_fn),
2787 6, t5, t0, t1, t2, t3, t4);
2790 if (TREE_TYPE (t) != boolean_type_node)
2791 t = fold_build2 (NE_EXPR, boolean_type_node,
2792 t, build_int_cst (TREE_TYPE (t), 0));
2793 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2794 true, GSI_SAME_STMT);
2795 if (arr && !TREE_STATIC (arr))
2797 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2798 TREE_THIS_VOLATILE (clobber) = 1;
2799 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2800 GSI_SAME_STMT);
2802 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2804 /* Remove the GIMPLE_OMP_FOR statement. */
2805 gsi_remove (&gsi, true);
2807 if (gsi_end_p (gsif))
2808 gsif = gsi_after_labels (gsi_bb (gsif));
2809 gsi_next (&gsif);
2811 /* Iteration setup for sequential loop goes in L0_BB. */
2812 tree startvar = fd->loop.v;
2813 tree endvar = NULL_TREE;
2815 if (gimple_omp_for_combined_p (fd->for_stmt))
2817 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2818 && gimple_omp_for_kind (inner_stmt)
2819 == GF_OMP_FOR_KIND_SIMD);
2820 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2821 OMP_CLAUSE__LOOPTEMP_);
2822 gcc_assert (innerc);
2823 startvar = OMP_CLAUSE_DECL (innerc);
2824 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2825 OMP_CLAUSE__LOOPTEMP_);
2826 gcc_assert (innerc);
2827 endvar = OMP_CLAUSE_DECL (innerc);
2830 gsi = gsi_start_bb (l0_bb);
2831 t = istart0;
2832 if (fd->ordered && fd->collapse == 1)
2833 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2834 fold_convert (fd->iter_type, fd->loop.step));
2835 else if (bias)
2836 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2837 if (fd->ordered && fd->collapse == 1)
2839 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2840 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2841 fd->loop.n1, fold_convert (sizetype, t));
2842 else
2844 t = fold_convert (TREE_TYPE (startvar), t);
2845 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2846 fd->loop.n1, t);
2849 else
2851 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2852 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2853 t = fold_convert (TREE_TYPE (startvar), t);
2855 t = force_gimple_operand_gsi (&gsi, t,
2856 DECL_P (startvar)
2857 && TREE_ADDRESSABLE (startvar),
2858 NULL_TREE, false, GSI_CONTINUE_LINKING);
2859 assign_stmt = gimple_build_assign (startvar, t);
2860 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2862 t = iend0;
2863 if (fd->ordered && fd->collapse == 1)
2864 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2865 fold_convert (fd->iter_type, fd->loop.step));
2866 else if (bias)
2867 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2868 if (fd->ordered && fd->collapse == 1)
2870 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2871 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2872 fd->loop.n1, fold_convert (sizetype, t));
2873 else
2875 t = fold_convert (TREE_TYPE (startvar), t);
2876 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2877 fd->loop.n1, t);
2880 else
2882 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2883 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2884 t = fold_convert (TREE_TYPE (startvar), t);
2886 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2887 false, GSI_CONTINUE_LINKING);
2888 if (endvar)
2890 assign_stmt = gimple_build_assign (endvar, iend);
2891 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2892 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2893 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2894 else
2895 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2896 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2898 /* Handle linear clause adjustments. */
2899 tree itercnt = NULL_TREE;
2900 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2901 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2902 c; c = OMP_CLAUSE_CHAIN (c))
2903 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2904 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2906 tree d = OMP_CLAUSE_DECL (c);
2907 bool is_ref = omp_is_reference (d);
2908 tree t = d, a, dest;
2909 if (is_ref)
2910 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2911 tree type = TREE_TYPE (t);
2912 if (POINTER_TYPE_P (type))
2913 type = sizetype;
2914 dest = unshare_expr (t);
2915 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2916 expand_omp_build_assign (&gsif, v, t);
2917 if (itercnt == NULL_TREE)
2919 itercnt = startvar;
2920 tree n1 = fd->loop.n1;
2921 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2923 itercnt
2924 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2925 itercnt);
2926 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2928 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2929 itercnt, n1);
2930 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2931 itercnt, fd->loop.step);
2932 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2933 NULL_TREE, false,
2934 GSI_CONTINUE_LINKING);
2936 a = fold_build2 (MULT_EXPR, type,
2937 fold_convert (type, itercnt),
2938 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2939 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2940 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2941 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2942 false, GSI_CONTINUE_LINKING);
2943 assign_stmt = gimple_build_assign (dest, t);
2944 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2946 if (fd->collapse > 1)
2947 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2949 if (fd->ordered)
2951 /* Until now, counts array contained number of iterations or
2952 variable containing it for ith loop. From now on, we need
2953 those counts only for collapsed loops, and only for the 2nd
2954 till the last collapsed one. Move those one element earlier,
2955 we'll use counts[fd->collapse - 1] for the first source/sink
2956 iteration counter and so on and counts[fd->ordered]
2957 as the array holding the current counter values for
2958 depend(source). */
2959 if (fd->collapse > 1)
2960 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2961 if (broken_loop)
2963 int i;
2964 for (i = fd->collapse; i < fd->ordered; i++)
2966 tree type = TREE_TYPE (fd->loops[i].v);
2967 tree this_cond
2968 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2969 fold_convert (type, fd->loops[i].n1),
2970 fold_convert (type, fd->loops[i].n2));
2971 if (!integer_onep (this_cond))
2972 break;
2974 if (i < fd->ordered)
2976 cont_bb
2977 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2978 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2979 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2980 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2981 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2982 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2983 make_edge (cont_bb, l1_bb, 0);
2984 l2_bb = create_empty_bb (cont_bb);
2985 broken_loop = false;
2988 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2989 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2990 ordered_lastprivate);
2991 if (counts[fd->collapse - 1])
2993 gcc_assert (fd->collapse == 1);
2994 gsi = gsi_last_bb (l0_bb);
2995 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2996 istart0, true);
2997 gsi = gsi_last_bb (cont_bb);
2998 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2999 build_int_cst (fd->iter_type, 1));
3000 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3001 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3002 size_zero_node, NULL_TREE, NULL_TREE);
3003 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3004 t = counts[fd->collapse - 1];
3006 else if (fd->collapse > 1)
3007 t = fd->loop.v;
3008 else
3010 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3011 fd->loops[0].v, fd->loops[0].n1);
3012 t = fold_convert (fd->iter_type, t);
3014 gsi = gsi_last_bb (l0_bb);
3015 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3016 size_zero_node, NULL_TREE, NULL_TREE);
3017 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3018 false, GSI_CONTINUE_LINKING);
3019 expand_omp_build_assign (&gsi, aref, t, true);
3022 if (!broken_loop)
3024 /* Code to control the increment and predicate for the sequential
3025 loop goes in the CONT_BB. */
3026 gsi = gsi_last_nondebug_bb (cont_bb);
3027 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3028 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3029 vmain = gimple_omp_continue_control_use (cont_stmt);
3030 vback = gimple_omp_continue_control_def (cont_stmt);
3032 if (!gimple_omp_for_combined_p (fd->for_stmt))
3034 if (POINTER_TYPE_P (type))
3035 t = fold_build_pointer_plus (vmain, fd->loop.step);
3036 else
3037 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3038 t = force_gimple_operand_gsi (&gsi, t,
3039 DECL_P (vback)
3040 && TREE_ADDRESSABLE (vback),
3041 NULL_TREE, true, GSI_SAME_STMT);
3042 assign_stmt = gimple_build_assign (vback, t);
3043 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3045 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3047 tree tem;
3048 if (fd->collapse > 1)
3049 tem = fd->loop.v;
3050 else
3052 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3053 fd->loops[0].v, fd->loops[0].n1);
3054 tem = fold_convert (fd->iter_type, tem);
3056 tree aref = build4 (ARRAY_REF, fd->iter_type,
3057 counts[fd->ordered], size_zero_node,
3058 NULL_TREE, NULL_TREE);
3059 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3060 true, GSI_SAME_STMT);
3061 expand_omp_build_assign (&gsi, aref, tem);
3064 t = build2 (fd->loop.cond_code, boolean_type_node,
3065 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3066 iend);
3067 gcond *cond_stmt = gimple_build_cond_empty (t);
3068 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3071 /* Remove GIMPLE_OMP_CONTINUE. */
3072 gsi_remove (&gsi, true);
3074 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3075 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3077 /* Emit code to get the next parallel iteration in L2_BB. */
3078 gsi = gsi_start_bb (l2_bb);
3080 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3081 build_fold_addr_expr (istart0),
3082 build_fold_addr_expr (iend0));
3083 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3084 false, GSI_CONTINUE_LINKING);
3085 if (TREE_TYPE (t) != boolean_type_node)
3086 t = fold_build2 (NE_EXPR, boolean_type_node,
3087 t, build_int_cst (TREE_TYPE (t), 0));
3088 gcond *cond_stmt = gimple_build_cond_empty (t);
3089 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3092 /* Add the loop cleanup function. */
3093 gsi = gsi_last_nondebug_bb (exit_bb);
3094 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3095 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3096 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3097 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3098 else
3099 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3100 gcall *call_stmt = gimple_build_call (t, 0);
3101 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3102 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3103 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3104 if (fd->ordered)
3106 tree arr = counts[fd->ordered];
3107 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3108 TREE_THIS_VOLATILE (clobber) = 1;
3109 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3110 GSI_SAME_STMT);
3112 gsi_remove (&gsi, true);
3114 /* Connect the new blocks. */
3115 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3116 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3118 if (!broken_loop)
3120 gimple_seq phis;
3122 e = find_edge (cont_bb, l3_bb);
3123 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3125 phis = phi_nodes (l3_bb);
3126 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3128 gimple *phi = gsi_stmt (gsi);
3129 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3130 PHI_ARG_DEF_FROM_EDGE (phi, e));
3132 remove_edge (e);
3134 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3135 e = find_edge (cont_bb, l1_bb);
3136 if (e == NULL)
3138 e = BRANCH_EDGE (cont_bb);
3139 gcc_assert (single_succ (e->dest) == l1_bb);
3141 if (gimple_omp_for_combined_p (fd->for_stmt))
3143 remove_edge (e);
3144 e = NULL;
3146 else if (fd->collapse > 1)
3148 remove_edge (e);
3149 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3151 else
3152 e->flags = EDGE_TRUE_VALUE;
3153 if (e)
3155 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3156 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3158 else
3160 e = find_edge (cont_bb, l2_bb);
3161 e->flags = EDGE_FALLTHRU;
3163 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3165 if (gimple_in_ssa_p (cfun))
3167 /* Add phis to the outer loop that connect to the phis in the inner,
3168 original loop, and move the loop entry value of the inner phi to
3169 the loop entry value of the outer phi. */
3170 gphi_iterator psi;
3171 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3173 source_location locus;
3174 gphi *nphi;
3175 gphi *exit_phi = psi.phi ();
3177 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3178 continue;
3180 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3181 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3183 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3184 edge latch_to_l1 = find_edge (latch, l1_bb);
3185 gphi *inner_phi
3186 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3188 tree t = gimple_phi_result (exit_phi);
3189 tree new_res = copy_ssa_name (t, NULL);
3190 nphi = create_phi_node (new_res, l0_bb);
3192 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3193 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3194 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3195 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3196 add_phi_arg (nphi, t, entry_to_l0, locus);
3198 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3199 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3201 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3205 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3206 recompute_dominator (CDI_DOMINATORS, l2_bb));
3207 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3208 recompute_dominator (CDI_DOMINATORS, l3_bb));
3209 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3210 recompute_dominator (CDI_DOMINATORS, l0_bb));
3211 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3212 recompute_dominator (CDI_DOMINATORS, l1_bb));
3214 /* We enter expand_omp_for_generic with a loop. This original loop may
3215 have its own loop struct, or it may be part of an outer loop struct
3216 (which may be the fake loop). */
3217 struct loop *outer_loop = entry_bb->loop_father;
3218 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3220 add_bb_to_loop (l2_bb, outer_loop);
3222 /* We've added a new loop around the original loop. Allocate the
3223 corresponding loop struct. */
3224 struct loop *new_loop = alloc_loop ();
3225 new_loop->header = l0_bb;
3226 new_loop->latch = l2_bb;
3227 add_loop (new_loop, outer_loop);
3229 /* Allocate a loop structure for the original loop unless we already
3230 had one. */
3231 if (!orig_loop_has_loop_struct
3232 && !gimple_omp_for_combined_p (fd->for_stmt))
3234 struct loop *orig_loop = alloc_loop ();
3235 orig_loop->header = l1_bb;
3236 /* The loop may have multiple latches. */
3237 add_loop (orig_loop, new_loop);
3242 /* A subroutine of expand_omp_for. Generate code for a parallel
3243 loop with static schedule and no specified chunk size. Given
3244 parameters:
3246 for (V = N1; V cond N2; V += STEP) BODY;
3248 where COND is "<" or ">", we generate pseudocode
3250 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3251 if (cond is <)
3252 adj = STEP - 1;
3253 else
3254 adj = STEP + 1;
3255 if ((__typeof (V)) -1 > 0 && cond is >)
3256 n = -(adj + N2 - N1) / -STEP;
3257 else
3258 n = (adj + N2 - N1) / STEP;
3259 q = n / nthreads;
3260 tt = n % nthreads;
3261 if (threadid < tt) goto L3; else goto L4;
3263 tt = 0;
3264 q = q + 1;
3266 s0 = q * threadid + tt;
3267 e0 = s0 + q;
3268 V = s0 * STEP + N1;
3269 if (s0 >= e0) goto L2; else goto L0;
3271 e = e0 * STEP + N1;
3273 BODY;
3274 V += STEP;
3275 if (V cond e) goto L1;
3279 static void
3280 expand_omp_for_static_nochunk (struct omp_region *region,
3281 struct omp_for_data *fd,
3282 gimple *inner_stmt)
3284 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3285 tree type, itype, vmain, vback;
3286 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3287 basic_block body_bb, cont_bb, collapse_bb = NULL;
3288 basic_block fin_bb;
3289 gimple_stmt_iterator gsi;
3290 edge ep;
3291 bool broken_loop = region->cont == NULL;
3292 tree *counts = NULL;
3293 tree n1, n2, step;
3295 itype = type = TREE_TYPE (fd->loop.v);
3296 if (POINTER_TYPE_P (type))
3297 itype = signed_type_for (type);
3299 entry_bb = region->entry;
3300 cont_bb = region->cont;
3301 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3302 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3303 gcc_assert (broken_loop
3304 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3305 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3306 body_bb = single_succ (seq_start_bb);
3307 if (!broken_loop)
3309 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3310 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3311 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3313 exit_bb = region->exit;
3315 /* Iteration space partitioning goes in ENTRY_BB. */
3316 gsi = gsi_last_nondebug_bb (entry_bb);
3317 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3319 if (fd->collapse > 1)
3321 int first_zero_iter = -1, dummy = -1;
3322 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3324 counts = XALLOCAVEC (tree, fd->collapse);
3325 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3326 fin_bb, first_zero_iter,
3327 dummy_bb, dummy, l2_dom_bb);
3328 t = NULL_TREE;
3330 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3331 t = integer_one_node;
3332 else
3333 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3334 fold_convert (type, fd->loop.n1),
3335 fold_convert (type, fd->loop.n2));
3336 if (fd->collapse == 1
3337 && TYPE_UNSIGNED (type)
3338 && (t == NULL_TREE || !integer_onep (t)))
3340 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3341 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3342 true, GSI_SAME_STMT);
3343 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3344 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3345 true, GSI_SAME_STMT);
3346 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3347 NULL_TREE, NULL_TREE);
3348 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3349 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3350 expand_omp_regimplify_p, NULL, NULL)
3351 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3352 expand_omp_regimplify_p, NULL, NULL))
3354 gsi = gsi_for_stmt (cond_stmt);
3355 gimple_regimplify_operands (cond_stmt, &gsi);
3357 ep = split_block (entry_bb, cond_stmt);
3358 ep->flags = EDGE_TRUE_VALUE;
3359 entry_bb = ep->dest;
3360 ep->probability = profile_probability::very_likely ();
3361 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3362 ep->probability = profile_probability::very_unlikely ();
3363 if (gimple_in_ssa_p (cfun))
3365 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3366 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3367 !gsi_end_p (gpi); gsi_next (&gpi))
3369 gphi *phi = gpi.phi ();
3370 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3371 ep, UNKNOWN_LOCATION);
3374 gsi = gsi_last_bb (entry_bb);
3377 switch (gimple_omp_for_kind (fd->for_stmt))
3379 case GF_OMP_FOR_KIND_FOR:
3380 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3381 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3382 break;
3383 case GF_OMP_FOR_KIND_DISTRIBUTE:
3384 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3385 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3386 break;
3387 default:
3388 gcc_unreachable ();
3390 nthreads = build_call_expr (nthreads, 0);
3391 nthreads = fold_convert (itype, nthreads);
3392 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3393 true, GSI_SAME_STMT);
3394 threadid = build_call_expr (threadid, 0);
3395 threadid = fold_convert (itype, threadid);
3396 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3397 true, GSI_SAME_STMT);
3399 n1 = fd->loop.n1;
3400 n2 = fd->loop.n2;
3401 step = fd->loop.step;
3402 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3404 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3405 OMP_CLAUSE__LOOPTEMP_);
3406 gcc_assert (innerc);
3407 n1 = OMP_CLAUSE_DECL (innerc);
3408 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3409 OMP_CLAUSE__LOOPTEMP_);
3410 gcc_assert (innerc);
3411 n2 = OMP_CLAUSE_DECL (innerc);
3413 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3414 true, NULL_TREE, true, GSI_SAME_STMT);
3415 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3416 true, NULL_TREE, true, GSI_SAME_STMT);
3417 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3418 true, NULL_TREE, true, GSI_SAME_STMT);
3420 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3421 t = fold_build2 (PLUS_EXPR, itype, step, t);
3422 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3423 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3424 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3425 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3426 fold_build1 (NEGATE_EXPR, itype, t),
3427 fold_build1 (NEGATE_EXPR, itype, step));
3428 else
3429 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3430 t = fold_convert (itype, t);
3431 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3433 q = create_tmp_reg (itype, "q");
3434 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3435 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3438 tt = create_tmp_reg (itype, "tt");
3439 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3440 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3441 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3443 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3444 gcond *cond_stmt = gimple_build_cond_empty (t);
3445 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3447 second_bb = split_block (entry_bb, cond_stmt)->dest;
3448 gsi = gsi_last_nondebug_bb (second_bb);
3449 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3451 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3452 GSI_SAME_STMT);
3453 gassign *assign_stmt
3454 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3455 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3457 third_bb = split_block (second_bb, assign_stmt)->dest;
3458 gsi = gsi_last_nondebug_bb (third_bb);
3459 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3461 t = build2 (MULT_EXPR, itype, q, threadid);
3462 t = build2 (PLUS_EXPR, itype, t, tt);
3463 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3465 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3466 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3468 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3469 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3471 /* Remove the GIMPLE_OMP_FOR statement. */
3472 gsi_remove (&gsi, true);
3474 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3475 gsi = gsi_start_bb (seq_start_bb);
3477 tree startvar = fd->loop.v;
3478 tree endvar = NULL_TREE;
3480 if (gimple_omp_for_combined_p (fd->for_stmt))
3482 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3483 ? gimple_omp_parallel_clauses (inner_stmt)
3484 : gimple_omp_for_clauses (inner_stmt);
3485 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3486 gcc_assert (innerc);
3487 startvar = OMP_CLAUSE_DECL (innerc);
3488 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3489 OMP_CLAUSE__LOOPTEMP_);
3490 gcc_assert (innerc);
3491 endvar = OMP_CLAUSE_DECL (innerc);
3492 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3493 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3495 int i;
3496 for (i = 1; i < fd->collapse; i++)
3498 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3499 OMP_CLAUSE__LOOPTEMP_);
3500 gcc_assert (innerc);
3502 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3503 OMP_CLAUSE__LOOPTEMP_);
3504 if (innerc)
3506 /* If needed (distribute parallel for with lastprivate),
3507 propagate down the total number of iterations. */
3508 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3509 fd->loop.n2);
3510 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3511 GSI_CONTINUE_LINKING);
3512 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3513 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3517 t = fold_convert (itype, s0);
3518 t = fold_build2 (MULT_EXPR, itype, t, step);
3519 if (POINTER_TYPE_P (type))
3520 t = fold_build_pointer_plus (n1, t);
3521 else
3522 t = fold_build2 (PLUS_EXPR, type, t, n1);
3523 t = fold_convert (TREE_TYPE (startvar), t);
3524 t = force_gimple_operand_gsi (&gsi, t,
3525 DECL_P (startvar)
3526 && TREE_ADDRESSABLE (startvar),
3527 NULL_TREE, false, GSI_CONTINUE_LINKING);
3528 assign_stmt = gimple_build_assign (startvar, t);
3529 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3531 t = fold_convert (itype, e0);
3532 t = fold_build2 (MULT_EXPR, itype, t, step);
3533 if (POINTER_TYPE_P (type))
3534 t = fold_build_pointer_plus (n1, t);
3535 else
3536 t = fold_build2 (PLUS_EXPR, type, t, n1);
3537 t = fold_convert (TREE_TYPE (startvar), t);
3538 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3539 false, GSI_CONTINUE_LINKING);
3540 if (endvar)
3542 assign_stmt = gimple_build_assign (endvar, e);
3543 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3545 assign_stmt = gimple_build_assign (fd->loop.v, e);
3546 else
3547 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3548 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3550 /* Handle linear clause adjustments. */
3551 tree itercnt = NULL_TREE;
3552 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3553 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3554 c; c = OMP_CLAUSE_CHAIN (c))
3555 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3556 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3558 tree d = OMP_CLAUSE_DECL (c);
3559 bool is_ref = omp_is_reference (d);
3560 tree t = d, a, dest;
3561 if (is_ref)
3562 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3563 if (itercnt == NULL_TREE)
3565 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3567 itercnt = fold_build2 (MINUS_EXPR, itype,
3568 fold_convert (itype, n1),
3569 fold_convert (itype, fd->loop.n1));
3570 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3571 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3572 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3573 NULL_TREE, false,
3574 GSI_CONTINUE_LINKING);
3576 else
3577 itercnt = s0;
3579 tree type = TREE_TYPE (t);
3580 if (POINTER_TYPE_P (type))
3581 type = sizetype;
3582 a = fold_build2 (MULT_EXPR, type,
3583 fold_convert (type, itercnt),
3584 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3585 dest = unshare_expr (t);
3586 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3587 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3588 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3589 false, GSI_CONTINUE_LINKING);
3590 assign_stmt = gimple_build_assign (dest, t);
3591 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3593 if (fd->collapse > 1)
3594 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3596 if (!broken_loop)
3598 /* The code controlling the sequential loop replaces the
3599 GIMPLE_OMP_CONTINUE. */
3600 gsi = gsi_last_nondebug_bb (cont_bb);
3601 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3602 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3603 vmain = gimple_omp_continue_control_use (cont_stmt);
3604 vback = gimple_omp_continue_control_def (cont_stmt);
3606 if (!gimple_omp_for_combined_p (fd->for_stmt))
3608 if (POINTER_TYPE_P (type))
3609 t = fold_build_pointer_plus (vmain, step);
3610 else
3611 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3612 t = force_gimple_operand_gsi (&gsi, t,
3613 DECL_P (vback)
3614 && TREE_ADDRESSABLE (vback),
3615 NULL_TREE, true, GSI_SAME_STMT);
3616 assign_stmt = gimple_build_assign (vback, t);
3617 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3619 t = build2 (fd->loop.cond_code, boolean_type_node,
3620 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3621 ? t : vback, e);
3622 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3625 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3626 gsi_remove (&gsi, true);
3628 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3629 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3632 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3633 gsi = gsi_last_nondebug_bb (exit_bb);
3634 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3636 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3637 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3639 gsi_remove (&gsi, true);
3641 /* Connect all the blocks. */
3642 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3643 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3644 ep = find_edge (entry_bb, second_bb);
3645 ep->flags = EDGE_TRUE_VALUE;
3646 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3647 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3648 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3650 if (!broken_loop)
3652 ep = find_edge (cont_bb, body_bb);
3653 if (ep == NULL)
3655 ep = BRANCH_EDGE (cont_bb);
3656 gcc_assert (single_succ (ep->dest) == body_bb);
3658 if (gimple_omp_for_combined_p (fd->for_stmt))
3660 remove_edge (ep);
3661 ep = NULL;
3663 else if (fd->collapse > 1)
3665 remove_edge (ep);
3666 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3668 else
3669 ep->flags = EDGE_TRUE_VALUE;
3670 find_edge (cont_bb, fin_bb)->flags
3671 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3674 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3675 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3676 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3678 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3679 recompute_dominator (CDI_DOMINATORS, body_bb));
3680 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3681 recompute_dominator (CDI_DOMINATORS, fin_bb));
3683 struct loop *loop = body_bb->loop_father;
3684 if (loop != entry_bb->loop_father)
3686 gcc_assert (broken_loop || loop->header == body_bb);
3687 gcc_assert (broken_loop
3688 || loop->latch == region->cont
3689 || single_pred (loop->latch) == region->cont);
3690 return;
3693 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3695 loop = alloc_loop ();
3696 loop->header = body_bb;
3697 if (collapse_bb == NULL)
3698 loop->latch = cont_bb;
3699 add_loop (loop, body_bb->loop_father);
3703 /* Return phi in E->DEST with ARG on edge E. */
3705 static gphi *
3706 find_phi_with_arg_on_edge (tree arg, edge e)
3708 basic_block bb = e->dest;
3710 for (gphi_iterator gpi = gsi_start_phis (bb);
3711 !gsi_end_p (gpi);
3712 gsi_next (&gpi))
3714 gphi *phi = gpi.phi ();
3715 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3716 return phi;
3719 return NULL;
3722 /* A subroutine of expand_omp_for. Generate code for a parallel
3723 loop with static schedule and a specified chunk size. Given
3724 parameters:
3726 for (V = N1; V cond N2; V += STEP) BODY;
3728 where COND is "<" or ">", we generate pseudocode
3730 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3731 if (cond is <)
3732 adj = STEP - 1;
3733 else
3734 adj = STEP + 1;
3735 if ((__typeof (V)) -1 > 0 && cond is >)
3736 n = -(adj + N2 - N1) / -STEP;
3737 else
3738 n = (adj + N2 - N1) / STEP;
3739 trip = 0;
3740 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3741 here so that V is defined
3742 if the loop is not entered
3744 s0 = (trip * nthreads + threadid) * CHUNK;
3745 e0 = min (s0 + CHUNK, n);
3746 if (s0 < n) goto L1; else goto L4;
3748 V = s0 * STEP + N1;
3749 e = e0 * STEP + N1;
3751 BODY;
3752 V += STEP;
3753 if (V cond e) goto L2; else goto L3;
3755 trip += 1;
3756 goto L0;
3760 static void
3761 expand_omp_for_static_chunk (struct omp_region *region,
3762 struct omp_for_data *fd, gimple *inner_stmt)
3764 tree n, s0, e0, e, t;
3765 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3766 tree type, itype, vmain, vback, vextra;
3767 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3768 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3769 gimple_stmt_iterator gsi;
3770 edge se;
3771 bool broken_loop = region->cont == NULL;
3772 tree *counts = NULL;
3773 tree n1, n2, step;
3775 itype = type = TREE_TYPE (fd->loop.v);
3776 if (POINTER_TYPE_P (type))
3777 itype = signed_type_for (type);
3779 entry_bb = region->entry;
3780 se = split_block (entry_bb, last_stmt (entry_bb));
3781 entry_bb = se->src;
3782 iter_part_bb = se->dest;
3783 cont_bb = region->cont;
3784 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3785 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3786 gcc_assert (broken_loop
3787 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3788 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3789 body_bb = single_succ (seq_start_bb);
3790 if (!broken_loop)
3792 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3793 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3794 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3795 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3797 exit_bb = region->exit;
3799 /* Trip and adjustment setup goes in ENTRY_BB. */
3800 gsi = gsi_last_nondebug_bb (entry_bb);
3801 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3803 if (fd->collapse > 1)
3805 int first_zero_iter = -1, dummy = -1;
3806 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3808 counts = XALLOCAVEC (tree, fd->collapse);
3809 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3810 fin_bb, first_zero_iter,
3811 dummy_bb, dummy, l2_dom_bb);
3812 t = NULL_TREE;
3814 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3815 t = integer_one_node;
3816 else
3817 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3818 fold_convert (type, fd->loop.n1),
3819 fold_convert (type, fd->loop.n2));
3820 if (fd->collapse == 1
3821 && TYPE_UNSIGNED (type)
3822 && (t == NULL_TREE || !integer_onep (t)))
3824 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3825 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3826 true, GSI_SAME_STMT);
3827 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3828 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3829 true, GSI_SAME_STMT);
3830 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3831 NULL_TREE, NULL_TREE);
3832 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3833 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3834 expand_omp_regimplify_p, NULL, NULL)
3835 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3836 expand_omp_regimplify_p, NULL, NULL))
3838 gsi = gsi_for_stmt (cond_stmt);
3839 gimple_regimplify_operands (cond_stmt, &gsi);
3841 se = split_block (entry_bb, cond_stmt);
3842 se->flags = EDGE_TRUE_VALUE;
3843 entry_bb = se->dest;
3844 se->probability = profile_probability::very_likely ();
3845 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3846 se->probability = profile_probability::very_unlikely ();
3847 if (gimple_in_ssa_p (cfun))
3849 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3850 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3851 !gsi_end_p (gpi); gsi_next (&gpi))
3853 gphi *phi = gpi.phi ();
3854 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3855 se, UNKNOWN_LOCATION);
3858 gsi = gsi_last_bb (entry_bb);
3861 switch (gimple_omp_for_kind (fd->for_stmt))
3863 case GF_OMP_FOR_KIND_FOR:
3864 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3865 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3866 break;
3867 case GF_OMP_FOR_KIND_DISTRIBUTE:
3868 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3869 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3870 break;
3871 default:
3872 gcc_unreachable ();
3874 nthreads = build_call_expr (nthreads, 0);
3875 nthreads = fold_convert (itype, nthreads);
3876 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3877 true, GSI_SAME_STMT);
3878 threadid = build_call_expr (threadid, 0);
3879 threadid = fold_convert (itype, threadid);
3880 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3881 true, GSI_SAME_STMT);
3883 n1 = fd->loop.n1;
3884 n2 = fd->loop.n2;
3885 step = fd->loop.step;
3886 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3888 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3889 OMP_CLAUSE__LOOPTEMP_);
3890 gcc_assert (innerc);
3891 n1 = OMP_CLAUSE_DECL (innerc);
3892 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3893 OMP_CLAUSE__LOOPTEMP_);
3894 gcc_assert (innerc);
3895 n2 = OMP_CLAUSE_DECL (innerc);
3897 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3898 true, NULL_TREE, true, GSI_SAME_STMT);
3899 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3900 true, NULL_TREE, true, GSI_SAME_STMT);
3901 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3902 true, NULL_TREE, true, GSI_SAME_STMT);
3903 tree chunk_size = fold_convert (itype, fd->chunk_size);
3904 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3905 chunk_size
3906 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3907 GSI_SAME_STMT);
3909 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3910 t = fold_build2 (PLUS_EXPR, itype, step, t);
3911 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3912 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3913 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3914 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3915 fold_build1 (NEGATE_EXPR, itype, t),
3916 fold_build1 (NEGATE_EXPR, itype, step));
3917 else
3918 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3919 t = fold_convert (itype, t);
3920 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921 true, GSI_SAME_STMT);
3923 trip_var = create_tmp_reg (itype, ".trip");
3924 if (gimple_in_ssa_p (cfun))
3926 trip_init = make_ssa_name (trip_var);
3927 trip_main = make_ssa_name (trip_var);
3928 trip_back = make_ssa_name (trip_var);
3930 else
3932 trip_init = trip_var;
3933 trip_main = trip_var;
3934 trip_back = trip_var;
3937 gassign *assign_stmt
3938 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3939 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3941 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3942 t = fold_build2 (MULT_EXPR, itype, t, step);
3943 if (POINTER_TYPE_P (type))
3944 t = fold_build_pointer_plus (n1, t);
3945 else
3946 t = fold_build2 (PLUS_EXPR, type, t, n1);
3947 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948 true, GSI_SAME_STMT);
3950 /* Remove the GIMPLE_OMP_FOR. */
3951 gsi_remove (&gsi, true);
3953 gimple_stmt_iterator gsif = gsi;
3955 /* Iteration space partitioning goes in ITER_PART_BB. */
3956 gsi = gsi_last_bb (iter_part_bb);
3958 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3959 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3960 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3961 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962 false, GSI_CONTINUE_LINKING);
3964 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3965 t = fold_build2 (MIN_EXPR, itype, t, n);
3966 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3967 false, GSI_CONTINUE_LINKING);
3969 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3970 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3972 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3973 gsi = gsi_start_bb (seq_start_bb);
3975 tree startvar = fd->loop.v;
3976 tree endvar = NULL_TREE;
3978 if (gimple_omp_for_combined_p (fd->for_stmt))
3980 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3981 ? gimple_omp_parallel_clauses (inner_stmt)
3982 : gimple_omp_for_clauses (inner_stmt);
3983 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3984 gcc_assert (innerc);
3985 startvar = OMP_CLAUSE_DECL (innerc);
3986 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3987 OMP_CLAUSE__LOOPTEMP_);
3988 gcc_assert (innerc);
3989 endvar = OMP_CLAUSE_DECL (innerc);
3990 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3991 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3993 int i;
3994 for (i = 1; i < fd->collapse; i++)
3996 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3997 OMP_CLAUSE__LOOPTEMP_);
3998 gcc_assert (innerc);
4000 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4001 OMP_CLAUSE__LOOPTEMP_);
4002 if (innerc)
4004 /* If needed (distribute parallel for with lastprivate),
4005 propagate down the total number of iterations. */
4006 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4007 fd->loop.n2);
4008 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4009 GSI_CONTINUE_LINKING);
4010 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4011 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4016 t = fold_convert (itype, s0);
4017 t = fold_build2 (MULT_EXPR, itype, t, step);
4018 if (POINTER_TYPE_P (type))
4019 t = fold_build_pointer_plus (n1, t);
4020 else
4021 t = fold_build2 (PLUS_EXPR, type, t, n1);
4022 t = fold_convert (TREE_TYPE (startvar), t);
4023 t = force_gimple_operand_gsi (&gsi, t,
4024 DECL_P (startvar)
4025 && TREE_ADDRESSABLE (startvar),
4026 NULL_TREE, false, GSI_CONTINUE_LINKING);
4027 assign_stmt = gimple_build_assign (startvar, t);
4028 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4030 t = fold_convert (itype, e0);
4031 t = fold_build2 (MULT_EXPR, itype, t, step);
4032 if (POINTER_TYPE_P (type))
4033 t = fold_build_pointer_plus (n1, t);
4034 else
4035 t = fold_build2 (PLUS_EXPR, type, t, n1);
4036 t = fold_convert (TREE_TYPE (startvar), t);
4037 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4038 false, GSI_CONTINUE_LINKING);
4039 if (endvar)
4041 assign_stmt = gimple_build_assign (endvar, e);
4042 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4044 assign_stmt = gimple_build_assign (fd->loop.v, e);
4045 else
4046 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4047 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4049 /* Handle linear clause adjustments. */
4050 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4051 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4052 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4053 c; c = OMP_CLAUSE_CHAIN (c))
4054 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4055 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4057 tree d = OMP_CLAUSE_DECL (c);
4058 bool is_ref = omp_is_reference (d);
4059 tree t = d, a, dest;
4060 if (is_ref)
4061 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4062 tree type = TREE_TYPE (t);
4063 if (POINTER_TYPE_P (type))
4064 type = sizetype;
4065 dest = unshare_expr (t);
4066 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4067 expand_omp_build_assign (&gsif, v, t);
4068 if (itercnt == NULL_TREE)
4070 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4072 itercntbias
4073 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4074 fold_convert (itype, fd->loop.n1));
4075 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4076 itercntbias, step);
4077 itercntbias
4078 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4079 NULL_TREE, true,
4080 GSI_SAME_STMT);
4081 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4082 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4083 NULL_TREE, false,
4084 GSI_CONTINUE_LINKING);
4086 else
4087 itercnt = s0;
4089 a = fold_build2 (MULT_EXPR, type,
4090 fold_convert (type, itercnt),
4091 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4092 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4093 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4094 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4095 false, GSI_CONTINUE_LINKING);
4096 assign_stmt = gimple_build_assign (dest, t);
4097 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4099 if (fd->collapse > 1)
4100 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4102 if (!broken_loop)
4104 /* The code controlling the sequential loop goes in CONT_BB,
4105 replacing the GIMPLE_OMP_CONTINUE. */
4106 gsi = gsi_last_nondebug_bb (cont_bb);
4107 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4108 vmain = gimple_omp_continue_control_use (cont_stmt);
4109 vback = gimple_omp_continue_control_def (cont_stmt);
4111 if (!gimple_omp_for_combined_p (fd->for_stmt))
4113 if (POINTER_TYPE_P (type))
4114 t = fold_build_pointer_plus (vmain, step);
4115 else
4116 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4117 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4118 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4119 true, GSI_SAME_STMT);
4120 assign_stmt = gimple_build_assign (vback, t);
4121 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4123 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4124 t = build2 (EQ_EXPR, boolean_type_node,
4125 build_int_cst (itype, 0),
4126 build_int_cst (itype, 1));
4127 else
4128 t = build2 (fd->loop.cond_code, boolean_type_node,
4129 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4130 ? t : vback, e);
4131 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4134 /* Remove GIMPLE_OMP_CONTINUE. */
4135 gsi_remove (&gsi, true);
4137 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4138 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4140 /* Trip update code goes into TRIP_UPDATE_BB. */
4141 gsi = gsi_start_bb (trip_update_bb);
4143 t = build_int_cst (itype, 1);
4144 t = build2 (PLUS_EXPR, itype, trip_main, t);
4145 assign_stmt = gimple_build_assign (trip_back, t);
4146 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4149 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4150 gsi = gsi_last_nondebug_bb (exit_bb);
4151 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4153 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4154 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4156 gsi_remove (&gsi, true);
4158 /* Connect the new blocks. */
4159 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4160 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4162 if (!broken_loop)
4164 se = find_edge (cont_bb, body_bb);
4165 if (se == NULL)
4167 se = BRANCH_EDGE (cont_bb);
4168 gcc_assert (single_succ (se->dest) == body_bb);
4170 if (gimple_omp_for_combined_p (fd->for_stmt))
4172 remove_edge (se);
4173 se = NULL;
4175 else if (fd->collapse > 1)
4177 remove_edge (se);
4178 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4180 else
4181 se->flags = EDGE_TRUE_VALUE;
4182 find_edge (cont_bb, trip_update_bb)->flags
4183 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4185 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4186 iter_part_bb);
4189 if (gimple_in_ssa_p (cfun))
4191 gphi_iterator psi;
4192 gphi *phi;
4193 edge re, ene;
4194 edge_var_map *vm;
4195 size_t i;
4197 gcc_assert (fd->collapse == 1 && !broken_loop);
4199 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4200 remove arguments of the phi nodes in fin_bb. We need to create
4201 appropriate phi nodes in iter_part_bb instead. */
4202 se = find_edge (iter_part_bb, fin_bb);
4203 re = single_succ_edge (trip_update_bb);
4204 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4205 ene = single_succ_edge (entry_bb);
4207 psi = gsi_start_phis (fin_bb);
4208 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4209 gsi_next (&psi), ++i)
4211 gphi *nphi;
4212 source_location locus;
4214 phi = psi.phi ();
4215 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4216 redirect_edge_var_map_def (vm), 0))
4217 continue;
4219 t = gimple_phi_result (phi);
4220 gcc_assert (t == redirect_edge_var_map_result (vm));
4222 if (!single_pred_p (fin_bb))
4223 t = copy_ssa_name (t, phi);
4225 nphi = create_phi_node (t, iter_part_bb);
4227 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4228 locus = gimple_phi_arg_location_from_edge (phi, se);
4230 /* A special case -- fd->loop.v is not yet computed in
4231 iter_part_bb, we need to use vextra instead. */
4232 if (t == fd->loop.v)
4233 t = vextra;
4234 add_phi_arg (nphi, t, ene, locus);
4235 locus = redirect_edge_var_map_location (vm);
4236 tree back_arg = redirect_edge_var_map_def (vm);
4237 add_phi_arg (nphi, back_arg, re, locus);
4238 edge ce = find_edge (cont_bb, body_bb);
4239 if (ce == NULL)
4241 ce = BRANCH_EDGE (cont_bb);
4242 gcc_assert (single_succ (ce->dest) == body_bb);
4243 ce = single_succ_edge (ce->dest);
4245 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4246 gcc_assert (inner_loop_phi != NULL);
4247 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4248 find_edge (seq_start_bb, body_bb), locus);
4250 if (!single_pred_p (fin_bb))
4251 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4253 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4254 redirect_edge_var_map_clear (re);
4255 if (single_pred_p (fin_bb))
4256 while (1)
4258 psi = gsi_start_phis (fin_bb);
4259 if (gsi_end_p (psi))
4260 break;
4261 remove_phi_node (&psi, false);
4264 /* Make phi node for trip. */
4265 phi = create_phi_node (trip_main, iter_part_bb);
4266 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4267 UNKNOWN_LOCATION);
4268 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4269 UNKNOWN_LOCATION);
4272 if (!broken_loop)
4273 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4274 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4275 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4276 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4277 recompute_dominator (CDI_DOMINATORS, fin_bb));
4278 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4279 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4280 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4281 recompute_dominator (CDI_DOMINATORS, body_bb));
4283 if (!broken_loop)
4285 struct loop *loop = body_bb->loop_father;
4286 struct loop *trip_loop = alloc_loop ();
4287 trip_loop->header = iter_part_bb;
4288 trip_loop->latch = trip_update_bb;
4289 add_loop (trip_loop, iter_part_bb->loop_father);
4291 if (loop != entry_bb->loop_father)
4293 gcc_assert (loop->header == body_bb);
4294 gcc_assert (loop->latch == region->cont
4295 || single_pred (loop->latch) == region->cont);
4296 trip_loop->inner = loop;
4297 return;
4300 if (!gimple_omp_for_combined_p (fd->for_stmt))
4302 loop = alloc_loop ();
4303 loop->header = body_bb;
4304 if (collapse_bb == NULL)
4305 loop->latch = cont_bb;
4306 add_loop (loop, trip_loop);
4311 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4312 loop. Given parameters:
4314 for (V = N1; V cond N2; V += STEP) BODY;
4316 where COND is "<" or ">", we generate pseudocode
4318 V = N1;
4319 goto L1;
4321 BODY;
4322 V += STEP;
4324 if (V cond N2) goto L0; else goto L2;
4327 For collapsed loops, given parameters:
4328 collapse(3)
4329 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4330 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4331 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4332 BODY;
4334 we generate pseudocode
4336 if (cond3 is <)
4337 adj = STEP3 - 1;
4338 else
4339 adj = STEP3 + 1;
4340 count3 = (adj + N32 - N31) / STEP3;
4341 if (cond2 is <)
4342 adj = STEP2 - 1;
4343 else
4344 adj = STEP2 + 1;
4345 count2 = (adj + N22 - N21) / STEP2;
4346 if (cond1 is <)
4347 adj = STEP1 - 1;
4348 else
4349 adj = STEP1 + 1;
4350 count1 = (adj + N12 - N11) / STEP1;
4351 count = count1 * count2 * count3;
4352 V = 0;
4353 V1 = N11;
4354 V2 = N21;
4355 V3 = N31;
4356 goto L1;
4358 BODY;
4359 V += 1;
4360 V3 += STEP3;
4361 V2 += (V3 cond3 N32) ? 0 : STEP2;
4362 V3 = (V3 cond3 N32) ? V3 : N31;
4363 V1 += (V2 cond2 N22) ? 0 : STEP1;
4364 V2 = (V2 cond2 N22) ? V2 : N21;
4366 if (V < count) goto L0; else goto L2;
4371 static void
4372 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4374 tree type, t;
4375 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4376 gimple_stmt_iterator gsi;
4377 gimple *stmt;
4378 gcond *cond_stmt;
4379 bool broken_loop = region->cont == NULL;
4380 edge e, ne;
4381 tree *counts = NULL;
4382 int i;
4383 int safelen_int = INT_MAX;
4384 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4385 OMP_CLAUSE_SAFELEN);
4386 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4387 OMP_CLAUSE__SIMDUID_);
4388 tree n1, n2;
4390 if (safelen)
4392 poly_uint64 val;
4393 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4394 if (!poly_int_tree_p (safelen, &val))
4395 safelen_int = 0;
4396 else
4397 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4398 if (safelen_int == 1)
4399 safelen_int = 0;
4401 type = TREE_TYPE (fd->loop.v);
4402 entry_bb = region->entry;
4403 cont_bb = region->cont;
4404 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4405 gcc_assert (broken_loop
4406 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4407 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4408 if (!broken_loop)
4410 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4411 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4412 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4413 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4415 else
4417 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4418 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4419 l2_bb = single_succ (l1_bb);
4421 exit_bb = region->exit;
4422 l2_dom_bb = NULL;
4424 gsi = gsi_last_nondebug_bb (entry_bb);
4426 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4427 /* Not needed in SSA form right now. */
4428 gcc_assert (!gimple_in_ssa_p (cfun));
4429 if (fd->collapse > 1)
4431 int first_zero_iter = -1, dummy = -1;
4432 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4434 counts = XALLOCAVEC (tree, fd->collapse);
4435 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4436 zero_iter_bb, first_zero_iter,
4437 dummy_bb, dummy, l2_dom_bb);
4439 if (l2_dom_bb == NULL)
4440 l2_dom_bb = l1_bb;
4442 n1 = fd->loop.n1;
4443 n2 = fd->loop.n2;
4444 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4446 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4447 OMP_CLAUSE__LOOPTEMP_);
4448 gcc_assert (innerc);
4449 n1 = OMP_CLAUSE_DECL (innerc);
4450 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4451 OMP_CLAUSE__LOOPTEMP_);
4452 gcc_assert (innerc);
4453 n2 = OMP_CLAUSE_DECL (innerc);
4455 tree step = fd->loop.step;
4457 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4458 OMP_CLAUSE__SIMT_);
4459 if (is_simt)
4461 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4462 is_simt = safelen_int > 1;
4464 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4465 if (is_simt)
4467 simt_lane = create_tmp_var (unsigned_type_node);
4468 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4469 gimple_call_set_lhs (g, simt_lane);
4470 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4471 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4472 fold_convert (TREE_TYPE (step), simt_lane));
4473 n1 = fold_convert (type, n1);
4474 if (POINTER_TYPE_P (type))
4475 n1 = fold_build_pointer_plus (n1, offset);
4476 else
4477 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4479 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4480 if (fd->collapse > 1)
4481 simt_maxlane = build_one_cst (unsigned_type_node);
4482 else if (safelen_int < omp_max_simt_vf ())
4483 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4484 tree vf
4485 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4486 unsigned_type_node, 0);
4487 if (simt_maxlane)
4488 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4489 vf = fold_convert (TREE_TYPE (step), vf);
4490 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4493 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4494 if (fd->collapse > 1)
4496 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4498 gsi_prev (&gsi);
4499 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4500 gsi_next (&gsi);
4502 else
4503 for (i = 0; i < fd->collapse; i++)
4505 tree itype = TREE_TYPE (fd->loops[i].v);
4506 if (POINTER_TYPE_P (itype))
4507 itype = signed_type_for (itype);
4508 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4509 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4513 /* Remove the GIMPLE_OMP_FOR statement. */
4514 gsi_remove (&gsi, true);
4516 if (!broken_loop)
4518 /* Code to control the increment goes in the CONT_BB. */
4519 gsi = gsi_last_nondebug_bb (cont_bb);
4520 stmt = gsi_stmt (gsi);
4521 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4523 if (POINTER_TYPE_P (type))
4524 t = fold_build_pointer_plus (fd->loop.v, step);
4525 else
4526 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4527 expand_omp_build_assign (&gsi, fd->loop.v, t);
4529 if (fd->collapse > 1)
4531 i = fd->collapse - 1;
4532 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4534 t = fold_convert (sizetype, fd->loops[i].step);
4535 t = fold_build_pointer_plus (fd->loops[i].v, t);
4537 else
4539 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4540 fd->loops[i].step);
4541 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4542 fd->loops[i].v, t);
4544 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4546 for (i = fd->collapse - 1; i > 0; i--)
4548 tree itype = TREE_TYPE (fd->loops[i].v);
4549 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4550 if (POINTER_TYPE_P (itype2))
4551 itype2 = signed_type_for (itype2);
4552 t = fold_convert (itype2, fd->loops[i - 1].step);
4553 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4554 GSI_SAME_STMT);
4555 t = build3 (COND_EXPR, itype2,
4556 build2 (fd->loops[i].cond_code, boolean_type_node,
4557 fd->loops[i].v,
4558 fold_convert (itype, fd->loops[i].n2)),
4559 build_int_cst (itype2, 0), t);
4560 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4561 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4562 else
4563 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4564 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4566 t = fold_convert (itype, fd->loops[i].n1);
4567 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4568 GSI_SAME_STMT);
4569 t = build3 (COND_EXPR, itype,
4570 build2 (fd->loops[i].cond_code, boolean_type_node,
4571 fd->loops[i].v,
4572 fold_convert (itype, fd->loops[i].n2)),
4573 fd->loops[i].v, t);
4574 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4578 /* Remove GIMPLE_OMP_CONTINUE. */
4579 gsi_remove (&gsi, true);
4582 /* Emit the condition in L1_BB. */
4583 gsi = gsi_start_bb (l1_bb);
4585 t = fold_convert (type, n2);
4586 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4587 false, GSI_CONTINUE_LINKING);
4588 tree v = fd->loop.v;
4589 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4590 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4591 false, GSI_CONTINUE_LINKING);
4592 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4593 cond_stmt = gimple_build_cond_empty (t);
4594 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4595 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4596 NULL, NULL)
4597 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4598 NULL, NULL))
4600 gsi = gsi_for_stmt (cond_stmt);
4601 gimple_regimplify_operands (cond_stmt, &gsi);
4604 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4605 if (is_simt)
4607 gsi = gsi_start_bb (l2_bb);
4608 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4609 if (POINTER_TYPE_P (type))
4610 t = fold_build_pointer_plus (fd->loop.v, step);
4611 else
4612 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4613 expand_omp_build_assign (&gsi, fd->loop.v, t);
4616 /* Remove GIMPLE_OMP_RETURN. */
4617 gsi = gsi_last_nondebug_bb (exit_bb);
4618 gsi_remove (&gsi, true);
4620 /* Connect the new blocks. */
4621 remove_edge (FALLTHRU_EDGE (entry_bb));
4623 if (!broken_loop)
4625 remove_edge (BRANCH_EDGE (entry_bb));
4626 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4628 e = BRANCH_EDGE (l1_bb);
4629 ne = FALLTHRU_EDGE (l1_bb);
4630 e->flags = EDGE_TRUE_VALUE;
4632 else
4634 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4636 ne = single_succ_edge (l1_bb);
4637 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4640 ne->flags = EDGE_FALSE_VALUE;
4641 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4642 ne->probability = e->probability.invert ();
4644 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4645 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4647 if (simt_maxlane)
4649 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4650 NULL_TREE, NULL_TREE);
4651 gsi = gsi_last_bb (entry_bb);
4652 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4653 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4654 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4655 FALLTHRU_EDGE (entry_bb)->probability
4656 = profile_probability::guessed_always ().apply_scale (7, 8);
4657 BRANCH_EDGE (entry_bb)->probability
4658 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4659 l2_dom_bb = entry_bb;
4661 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4663 if (!broken_loop)
4665 struct loop *loop = alloc_loop ();
4666 loop->header = l1_bb;
4667 loop->latch = cont_bb;
4668 add_loop (loop, l1_bb->loop_father);
4669 loop->safelen = safelen_int;
4670 if (simduid)
4672 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4673 cfun->has_simduid_loops = true;
4675 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4676 the loop. */
4677 if ((flag_tree_loop_vectorize
4678 || !global_options_set.x_flag_tree_loop_vectorize)
4679 && flag_tree_loop_optimize
4680 && loop->safelen > 1)
4682 loop->force_vectorize = true;
4683 cfun->has_force_vectorize_loops = true;
4686 else if (simduid)
4687 cfun->has_simduid_loops = true;
4690 /* Taskloop construct is represented after gimplification with
4691 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4692 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4693 which should just compute all the needed loop temporaries
4694 for GIMPLE_OMP_TASK. */
4696 static void
4697 expand_omp_taskloop_for_outer (struct omp_region *region,
4698 struct omp_for_data *fd,
4699 gimple *inner_stmt)
4701 tree type, bias = NULL_TREE;
4702 basic_block entry_bb, cont_bb, exit_bb;
4703 gimple_stmt_iterator gsi;
4704 gassign *assign_stmt;
4705 tree *counts = NULL;
4706 int i;
4708 gcc_assert (inner_stmt);
4709 gcc_assert (region->cont);
4710 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4711 && gimple_omp_task_taskloop_p (inner_stmt));
4712 type = TREE_TYPE (fd->loop.v);
4714 /* See if we need to bias by LLONG_MIN. */
4715 if (fd->iter_type == long_long_unsigned_type_node
4716 && TREE_CODE (type) == INTEGER_TYPE
4717 && !TYPE_UNSIGNED (type))
4719 tree n1, n2;
4721 if (fd->loop.cond_code == LT_EXPR)
4723 n1 = fd->loop.n1;
4724 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4726 else
4728 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4729 n2 = fd->loop.n1;
4731 if (TREE_CODE (n1) != INTEGER_CST
4732 || TREE_CODE (n2) != INTEGER_CST
4733 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4734 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4737 entry_bb = region->entry;
4738 cont_bb = region->cont;
4739 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4740 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4741 exit_bb = region->exit;
4743 gsi = gsi_last_nondebug_bb (entry_bb);
4744 gimple *for_stmt = gsi_stmt (gsi);
4745 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4746 if (fd->collapse > 1)
4748 int first_zero_iter = -1, dummy = -1;
4749 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4751 counts = XALLOCAVEC (tree, fd->collapse);
4752 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4753 zero_iter_bb, first_zero_iter,
4754 dummy_bb, dummy, l2_dom_bb);
4756 if (zero_iter_bb)
4758 /* Some counts[i] vars might be uninitialized if
4759 some loop has zero iterations. But the body shouldn't
4760 be executed in that case, so just avoid uninit warnings. */
4761 for (i = first_zero_iter; i < fd->collapse; i++)
4762 if (SSA_VAR_P (counts[i]))
4763 TREE_NO_WARNING (counts[i]) = 1;
4764 gsi_prev (&gsi);
4765 edge e = split_block (entry_bb, gsi_stmt (gsi));
4766 entry_bb = e->dest;
4767 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4768 gsi = gsi_last_bb (entry_bb);
4769 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4770 get_immediate_dominator (CDI_DOMINATORS,
4771 zero_iter_bb));
4775 tree t0, t1;
4776 t1 = fd->loop.n2;
4777 t0 = fd->loop.n1;
4778 if (POINTER_TYPE_P (TREE_TYPE (t0))
4779 && TYPE_PRECISION (TREE_TYPE (t0))
4780 != TYPE_PRECISION (fd->iter_type))
4782 /* Avoid casting pointers to integer of a different size. */
4783 tree itype = signed_type_for (type);
4784 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4785 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4787 else
4789 t1 = fold_convert (fd->iter_type, t1);
4790 t0 = fold_convert (fd->iter_type, t0);
4792 if (bias)
4794 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4795 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4798 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4799 OMP_CLAUSE__LOOPTEMP_);
4800 gcc_assert (innerc);
4801 tree startvar = OMP_CLAUSE_DECL (innerc);
4802 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4803 gcc_assert (innerc);
4804 tree endvar = OMP_CLAUSE_DECL (innerc);
4805 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4807 gcc_assert (innerc);
4808 for (i = 1; i < fd->collapse; i++)
4810 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4811 OMP_CLAUSE__LOOPTEMP_);
4812 gcc_assert (innerc);
4814 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4815 OMP_CLAUSE__LOOPTEMP_);
4816 if (innerc)
4818 /* If needed (inner taskloop has lastprivate clause), propagate
4819 down the total number of iterations. */
4820 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4821 NULL_TREE, false,
4822 GSI_CONTINUE_LINKING);
4823 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4824 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4828 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4829 GSI_CONTINUE_LINKING);
4830 assign_stmt = gimple_build_assign (startvar, t0);
4831 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4833 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4834 GSI_CONTINUE_LINKING);
4835 assign_stmt = gimple_build_assign (endvar, t1);
4836 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4837 if (fd->collapse > 1)
4838 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4840 /* Remove the GIMPLE_OMP_FOR statement. */
4841 gsi = gsi_for_stmt (for_stmt);
4842 gsi_remove (&gsi, true);
4844 gsi = gsi_last_nondebug_bb (cont_bb);
4845 gsi_remove (&gsi, true);
4847 gsi = gsi_last_nondebug_bb (exit_bb);
4848 gsi_remove (&gsi, true);
4850 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4851 remove_edge (BRANCH_EDGE (entry_bb));
4852 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4853 remove_edge (BRANCH_EDGE (cont_bb));
4854 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4855 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4856 recompute_dominator (CDI_DOMINATORS, region->entry));
4859 /* Taskloop construct is represented after gimplification with
4860 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4861 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4862 GOMP_taskloop{,_ull} function arranges for each task to be given just
4863 a single range of iterations. */
4865 static void
4866 expand_omp_taskloop_for_inner (struct omp_region *region,
4867 struct omp_for_data *fd,
4868 gimple *inner_stmt)
4870 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4871 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4872 basic_block fin_bb;
4873 gimple_stmt_iterator gsi;
4874 edge ep;
4875 bool broken_loop = region->cont == NULL;
4876 tree *counts = NULL;
4877 tree n1, n2, step;
4879 itype = type = TREE_TYPE (fd->loop.v);
4880 if (POINTER_TYPE_P (type))
4881 itype = signed_type_for (type);
4883 /* See if we need to bias by LLONG_MIN. */
4884 if (fd->iter_type == long_long_unsigned_type_node
4885 && TREE_CODE (type) == INTEGER_TYPE
4886 && !TYPE_UNSIGNED (type))
4888 tree n1, n2;
4890 if (fd->loop.cond_code == LT_EXPR)
4892 n1 = fd->loop.n1;
4893 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4895 else
4897 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4898 n2 = fd->loop.n1;
4900 if (TREE_CODE (n1) != INTEGER_CST
4901 || TREE_CODE (n2) != INTEGER_CST
4902 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4903 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4906 entry_bb = region->entry;
4907 cont_bb = region->cont;
4908 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4909 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4910 gcc_assert (broken_loop
4911 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4912 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4913 if (!broken_loop)
4915 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4916 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4918 exit_bb = region->exit;
4920 /* Iteration space partitioning goes in ENTRY_BB. */
4921 gsi = gsi_last_nondebug_bb (entry_bb);
4922 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4924 if (fd->collapse > 1)
4926 int first_zero_iter = -1, dummy = -1;
4927 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4929 counts = XALLOCAVEC (tree, fd->collapse);
4930 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4931 fin_bb, first_zero_iter,
4932 dummy_bb, dummy, l2_dom_bb);
4933 t = NULL_TREE;
4935 else
4936 t = integer_one_node;
4938 step = fd->loop.step;
4939 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4940 OMP_CLAUSE__LOOPTEMP_);
4941 gcc_assert (innerc);
4942 n1 = OMP_CLAUSE_DECL (innerc);
4943 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4944 gcc_assert (innerc);
4945 n2 = OMP_CLAUSE_DECL (innerc);
4946 if (bias)
4948 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4949 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4951 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4952 true, NULL_TREE, true, GSI_SAME_STMT);
4953 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4954 true, NULL_TREE, true, GSI_SAME_STMT);
4955 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4956 true, NULL_TREE, true, GSI_SAME_STMT);
4958 tree startvar = fd->loop.v;
4959 tree endvar = NULL_TREE;
4961 if (gimple_omp_for_combined_p (fd->for_stmt))
4963 tree clauses = gimple_omp_for_clauses (inner_stmt);
4964 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4965 gcc_assert (innerc);
4966 startvar = OMP_CLAUSE_DECL (innerc);
4967 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4968 OMP_CLAUSE__LOOPTEMP_);
4969 gcc_assert (innerc);
4970 endvar = OMP_CLAUSE_DECL (innerc);
4972 t = fold_convert (TREE_TYPE (startvar), n1);
4973 t = force_gimple_operand_gsi (&gsi, t,
4974 DECL_P (startvar)
4975 && TREE_ADDRESSABLE (startvar),
4976 NULL_TREE, false, GSI_CONTINUE_LINKING);
4977 gimple *assign_stmt = gimple_build_assign (startvar, t);
4978 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4980 t = fold_convert (TREE_TYPE (startvar), n2);
4981 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4982 false, GSI_CONTINUE_LINKING);
4983 if (endvar)
4985 assign_stmt = gimple_build_assign (endvar, e);
4986 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4987 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4988 assign_stmt = gimple_build_assign (fd->loop.v, e);
4989 else
4990 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4991 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4993 if (fd->collapse > 1)
4994 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4996 if (!broken_loop)
4998 /* The code controlling the sequential loop replaces the
4999 GIMPLE_OMP_CONTINUE. */
5000 gsi = gsi_last_nondebug_bb (cont_bb);
5001 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5002 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5003 vmain = gimple_omp_continue_control_use (cont_stmt);
5004 vback = gimple_omp_continue_control_def (cont_stmt);
5006 if (!gimple_omp_for_combined_p (fd->for_stmt))
5008 if (POINTER_TYPE_P (type))
5009 t = fold_build_pointer_plus (vmain, step);
5010 else
5011 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5012 t = force_gimple_operand_gsi (&gsi, t,
5013 DECL_P (vback)
5014 && TREE_ADDRESSABLE (vback),
5015 NULL_TREE, true, GSI_SAME_STMT);
5016 assign_stmt = gimple_build_assign (vback, t);
5017 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5019 t = build2 (fd->loop.cond_code, boolean_type_node,
5020 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5021 ? t : vback, e);
5022 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5025 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5026 gsi_remove (&gsi, true);
5028 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5029 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5032 /* Remove the GIMPLE_OMP_FOR statement. */
5033 gsi = gsi_for_stmt (fd->for_stmt);
5034 gsi_remove (&gsi, true);
5036 /* Remove the GIMPLE_OMP_RETURN statement. */
5037 gsi = gsi_last_nondebug_bb (exit_bb);
5038 gsi_remove (&gsi, true);
5040 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5041 if (!broken_loop)
5042 remove_edge (BRANCH_EDGE (entry_bb));
5043 else
5045 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5046 region->outer->cont = NULL;
5049 /* Connect all the blocks. */
5050 if (!broken_loop)
5052 ep = find_edge (cont_bb, body_bb);
5053 if (gimple_omp_for_combined_p (fd->for_stmt))
5055 remove_edge (ep);
5056 ep = NULL;
5058 else if (fd->collapse > 1)
5060 remove_edge (ep);
5061 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5063 else
5064 ep->flags = EDGE_TRUE_VALUE;
5065 find_edge (cont_bb, fin_bb)->flags
5066 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5069 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5070 recompute_dominator (CDI_DOMINATORS, body_bb));
5071 if (!broken_loop)
5072 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5073 recompute_dominator (CDI_DOMINATORS, fin_bb));
5075 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5077 struct loop *loop = alloc_loop ();
5078 loop->header = body_bb;
5079 if (collapse_bb == NULL)
5080 loop->latch = cont_bb;
5081 add_loop (loop, body_bb->loop_father);
5085 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5086 partitioned loop. The lowering here is abstracted, in that the
5087 loop parameters are passed through internal functions, which are
5088 further lowered by oacc_device_lower, once we get to the target
5089 compiler. The loop is of the form:
5091 for (V = B; V LTGT E; V += S) {BODY}
5093 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5094 (constant 0 for no chunking) and we will have a GWV partitioning
5095 mask, specifying dimensions over which the loop is to be
5096 partitioned (see note below). We generate code that looks like
5097 (this ignores tiling):
5099 <entry_bb> [incoming FALL->body, BRANCH->exit]
5100 typedef signedintify (typeof (V)) T; // underlying signed integral type
5101 T range = E - B;
5102 T chunk_no = 0;
5103 T DIR = LTGT == '<' ? +1 : -1;
5104 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5105 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5107 <head_bb> [created by splitting end of entry_bb]
5108 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5109 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5110 if (!(offset LTGT bound)) goto bottom_bb;
5112 <body_bb> [incoming]
5113 V = B + offset;
5114 {BODY}
5116 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5117 offset += step;
5118 if (offset LTGT bound) goto body_bb; [*]
5120 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5121 chunk_no++;
5122 if (chunk < chunk_max) goto head_bb;
5124 <exit_bb> [incoming]
5125 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5127 [*] Needed if V live at end of loop. */
5129 static void
5130 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5132 tree v = fd->loop.v;
5133 enum tree_code cond_code = fd->loop.cond_code;
5134 enum tree_code plus_code = PLUS_EXPR;
5136 tree chunk_size = integer_minus_one_node;
5137 tree gwv = integer_zero_node;
5138 tree iter_type = TREE_TYPE (v);
5139 tree diff_type = iter_type;
5140 tree plus_type = iter_type;
5141 struct oacc_collapse *counts = NULL;
5143 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5144 == GF_OMP_FOR_KIND_OACC_LOOP);
5145 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5146 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5148 if (POINTER_TYPE_P (iter_type))
5150 plus_code = POINTER_PLUS_EXPR;
5151 plus_type = sizetype;
5153 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5154 diff_type = signed_type_for (diff_type);
5155 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5156 diff_type = integer_type_node;
5158 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5159 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5160 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5161 basic_block bottom_bb = NULL;
5163 /* entry_bb has two sucessors; the branch edge is to the exit
5164 block, fallthrough edge to body. */
5165 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5166 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5168 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5169 body_bb, or to a block whose only successor is the body_bb. Its
5170 fallthrough successor is the final block (same as the branch
5171 successor of the entry_bb). */
5172 if (cont_bb)
5174 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5175 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5177 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5178 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5180 else
5181 gcc_assert (!gimple_in_ssa_p (cfun));
5183 /* The exit block only has entry_bb and cont_bb as predecessors. */
5184 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5186 tree chunk_no;
5187 tree chunk_max = NULL_TREE;
5188 tree bound, offset;
5189 tree step = create_tmp_var (diff_type, ".step");
5190 bool up = cond_code == LT_EXPR;
5191 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5192 bool chunking = !gimple_in_ssa_p (cfun);
5193 bool negating;
5195 /* Tiling vars. */
5196 tree tile_size = NULL_TREE;
5197 tree element_s = NULL_TREE;
5198 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5199 basic_block elem_body_bb = NULL;
5200 basic_block elem_cont_bb = NULL;
5202 /* SSA instances. */
5203 tree offset_incr = NULL_TREE;
5204 tree offset_init = NULL_TREE;
5206 gimple_stmt_iterator gsi;
5207 gassign *ass;
5208 gcall *call;
5209 gimple *stmt;
5210 tree expr;
5211 location_t loc;
5212 edge split, be, fte;
5214 /* Split the end of entry_bb to create head_bb. */
5215 split = split_block (entry_bb, last_stmt (entry_bb));
5216 basic_block head_bb = split->dest;
5217 entry_bb = split->src;
5219 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5220 gsi = gsi_last_nondebug_bb (entry_bb);
5221 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5222 loc = gimple_location (for_stmt);
5224 if (gimple_in_ssa_p (cfun))
5226 offset_init = gimple_omp_for_index (for_stmt, 0);
5227 gcc_assert (integer_zerop (fd->loop.n1));
5228 /* The SSA parallelizer does gang parallelism. */
5229 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5232 if (fd->collapse > 1 || fd->tiling)
5234 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5235 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5236 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5237 TREE_TYPE (fd->loop.n2), loc);
5239 if (SSA_VAR_P (fd->loop.n2))
5241 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5242 true, GSI_SAME_STMT);
5243 ass = gimple_build_assign (fd->loop.n2, total);
5244 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5248 tree b = fd->loop.n1;
5249 tree e = fd->loop.n2;
5250 tree s = fd->loop.step;
5252 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5253 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5255 /* Convert the step, avoiding possible unsigned->signed overflow. */
5256 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5257 if (negating)
5258 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5259 s = fold_convert (diff_type, s);
5260 if (negating)
5261 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5262 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5264 if (!chunking)
5265 chunk_size = integer_zero_node;
5266 expr = fold_convert (diff_type, chunk_size);
5267 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5268 NULL_TREE, true, GSI_SAME_STMT);
5270 if (fd->tiling)
5272 /* Determine the tile size and element step,
5273 modify the outer loop step size. */
5274 tile_size = create_tmp_var (diff_type, ".tile_size");
5275 expr = build_int_cst (diff_type, 1);
5276 for (int ix = 0; ix < fd->collapse; ix++)
5277 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5278 expr = force_gimple_operand_gsi (&gsi, expr, true,
5279 NULL_TREE, true, GSI_SAME_STMT);
5280 ass = gimple_build_assign (tile_size, expr);
5281 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5283 element_s = create_tmp_var (diff_type, ".element_s");
5284 ass = gimple_build_assign (element_s, s);
5285 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5287 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5288 s = force_gimple_operand_gsi (&gsi, expr, true,
5289 NULL_TREE, true, GSI_SAME_STMT);
5292 /* Determine the range, avoiding possible unsigned->signed overflow. */
5293 negating = !up && TYPE_UNSIGNED (iter_type);
5294 expr = fold_build2 (MINUS_EXPR, plus_type,
5295 fold_convert (plus_type, negating ? b : e),
5296 fold_convert (plus_type, negating ? e : b));
5297 expr = fold_convert (diff_type, expr);
5298 if (negating)
5299 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5300 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5301 NULL_TREE, true, GSI_SAME_STMT);
5303 chunk_no = build_int_cst (diff_type, 0);
5304 if (chunking)
5306 gcc_assert (!gimple_in_ssa_p (cfun));
5308 expr = chunk_no;
5309 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5310 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5312 ass = gimple_build_assign (chunk_no, expr);
5313 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5315 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5316 build_int_cst (integer_type_node,
5317 IFN_GOACC_LOOP_CHUNKS),
5318 dir, range, s, chunk_size, gwv);
5319 gimple_call_set_lhs (call, chunk_max);
5320 gimple_set_location (call, loc);
5321 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5323 else
5324 chunk_size = chunk_no;
5326 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5327 build_int_cst (integer_type_node,
5328 IFN_GOACC_LOOP_STEP),
5329 dir, range, s, chunk_size, gwv);
5330 gimple_call_set_lhs (call, step);
5331 gimple_set_location (call, loc);
5332 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5334 /* Remove the GIMPLE_OMP_FOR. */
5335 gsi_remove (&gsi, true);
5337 /* Fixup edges from head_bb. */
5338 be = BRANCH_EDGE (head_bb);
5339 fte = FALLTHRU_EDGE (head_bb);
5340 be->flags |= EDGE_FALSE_VALUE;
5341 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5343 basic_block body_bb = fte->dest;
5345 if (gimple_in_ssa_p (cfun))
5347 gsi = gsi_last_nondebug_bb (cont_bb);
5348 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5350 offset = gimple_omp_continue_control_use (cont_stmt);
5351 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5353 else
5355 offset = create_tmp_var (diff_type, ".offset");
5356 offset_init = offset_incr = offset;
5358 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5360 /* Loop offset & bound go into head_bb. */
5361 gsi = gsi_start_bb (head_bb);
5363 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5364 build_int_cst (integer_type_node,
5365 IFN_GOACC_LOOP_OFFSET),
5366 dir, range, s,
5367 chunk_size, gwv, chunk_no);
5368 gimple_call_set_lhs (call, offset_init);
5369 gimple_set_location (call, loc);
5370 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5372 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5373 build_int_cst (integer_type_node,
5374 IFN_GOACC_LOOP_BOUND),
5375 dir, range, s,
5376 chunk_size, gwv, offset_init);
5377 gimple_call_set_lhs (call, bound);
5378 gimple_set_location (call, loc);
5379 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5381 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5382 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5383 GSI_CONTINUE_LINKING);
5385 /* V assignment goes into body_bb. */
5386 if (!gimple_in_ssa_p (cfun))
5388 gsi = gsi_start_bb (body_bb);
5390 expr = build2 (plus_code, iter_type, b,
5391 fold_convert (plus_type, offset));
5392 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5393 true, GSI_SAME_STMT);
5394 ass = gimple_build_assign (v, expr);
5395 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5397 if (fd->collapse > 1 || fd->tiling)
5398 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5400 if (fd->tiling)
5402 /* Determine the range of the element loop -- usually simply
5403 the tile_size, but could be smaller if the final
5404 iteration of the outer loop is a partial tile. */
5405 tree e_range = create_tmp_var (diff_type, ".e_range");
5407 expr = build2 (MIN_EXPR, diff_type,
5408 build2 (MINUS_EXPR, diff_type, bound, offset),
5409 build2 (MULT_EXPR, diff_type, tile_size,
5410 element_s));
5411 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5412 true, GSI_SAME_STMT);
5413 ass = gimple_build_assign (e_range, expr);
5414 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5416 /* Determine bound, offset & step of inner loop. */
5417 e_bound = create_tmp_var (diff_type, ".e_bound");
5418 e_offset = create_tmp_var (diff_type, ".e_offset");
5419 e_step = create_tmp_var (diff_type, ".e_step");
5421 /* Mark these as element loops. */
5422 tree t, e_gwv = integer_minus_one_node;
5423 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5425 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5426 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5427 element_s, chunk, e_gwv, chunk);
5428 gimple_call_set_lhs (call, e_offset);
5429 gimple_set_location (call, loc);
5430 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5432 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5433 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5434 element_s, chunk, e_gwv, e_offset);
5435 gimple_call_set_lhs (call, e_bound);
5436 gimple_set_location (call, loc);
5437 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5439 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5440 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5441 element_s, chunk, e_gwv);
5442 gimple_call_set_lhs (call, e_step);
5443 gimple_set_location (call, loc);
5444 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5446 /* Add test and split block. */
5447 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5448 stmt = gimple_build_cond_empty (expr);
5449 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5450 split = split_block (body_bb, stmt);
5451 elem_body_bb = split->dest;
5452 if (cont_bb == body_bb)
5453 cont_bb = elem_body_bb;
5454 body_bb = split->src;
5456 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5458 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5459 if (cont_bb == NULL)
5461 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5462 e->probability = profile_probability::even ();
5463 split->probability = profile_probability::even ();
5466 /* Initialize the user's loop vars. */
5467 gsi = gsi_start_bb (elem_body_bb);
5468 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5472 /* Loop increment goes into cont_bb. If this is not a loop, we
5473 will have spawned threads as if it was, and each one will
5474 execute one iteration. The specification is not explicit about
5475 whether such constructs are ill-formed or not, and they can
5476 occur, especially when noreturn routines are involved. */
5477 if (cont_bb)
5479 gsi = gsi_last_nondebug_bb (cont_bb);
5480 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5481 loc = gimple_location (cont_stmt);
5483 if (fd->tiling)
5485 /* Insert element loop increment and test. */
5486 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5487 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5488 true, GSI_SAME_STMT);
5489 ass = gimple_build_assign (e_offset, expr);
5490 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5491 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5493 stmt = gimple_build_cond_empty (expr);
5494 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5495 split = split_block (cont_bb, stmt);
5496 elem_cont_bb = split->src;
5497 cont_bb = split->dest;
5499 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5500 split->probability = profile_probability::unlikely ().guessed ();
5501 edge latch_edge
5502 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5503 latch_edge->probability = profile_probability::likely ().guessed ();
5505 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5506 skip_edge->probability = profile_probability::unlikely ().guessed ();
5507 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5508 loop_entry_edge->probability
5509 = profile_probability::likely ().guessed ();
5511 gsi = gsi_for_stmt (cont_stmt);
5514 /* Increment offset. */
5515 if (gimple_in_ssa_p (cfun))
5516 expr = build2 (plus_code, iter_type, offset,
5517 fold_convert (plus_type, step));
5518 else
5519 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5520 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5521 true, GSI_SAME_STMT);
5522 ass = gimple_build_assign (offset_incr, expr);
5523 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5524 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5525 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5527 /* Remove the GIMPLE_OMP_CONTINUE. */
5528 gsi_remove (&gsi, true);
5530 /* Fixup edges from cont_bb. */
5531 be = BRANCH_EDGE (cont_bb);
5532 fte = FALLTHRU_EDGE (cont_bb);
5533 be->flags |= EDGE_TRUE_VALUE;
5534 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5536 if (chunking)
5538 /* Split the beginning of exit_bb to make bottom_bb. We
5539 need to insert a nop at the start, because splitting is
5540 after a stmt, not before. */
5541 gsi = gsi_start_bb (exit_bb);
5542 stmt = gimple_build_nop ();
5543 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5544 split = split_block (exit_bb, stmt);
5545 bottom_bb = split->src;
5546 exit_bb = split->dest;
5547 gsi = gsi_last_bb (bottom_bb);
5549 /* Chunk increment and test goes into bottom_bb. */
5550 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5551 build_int_cst (diff_type, 1));
5552 ass = gimple_build_assign (chunk_no, expr);
5553 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5555 /* Chunk test at end of bottom_bb. */
5556 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5557 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5558 GSI_CONTINUE_LINKING);
5560 /* Fixup edges from bottom_bb. */
5561 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5562 split->probability = profile_probability::unlikely ().guessed ();
5563 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5564 latch_edge->probability = profile_probability::likely ().guessed ();
5568 gsi = gsi_last_nondebug_bb (exit_bb);
5569 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5570 loc = gimple_location (gsi_stmt (gsi));
5572 if (!gimple_in_ssa_p (cfun))
5574 /* Insert the final value of V, in case it is live. This is the
5575 value for the only thread that survives past the join. */
5576 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5577 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5578 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5579 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5580 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5581 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5582 true, GSI_SAME_STMT);
5583 ass = gimple_build_assign (v, expr);
5584 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5587 /* Remove the OMP_RETURN. */
5588 gsi_remove (&gsi, true);
5590 if (cont_bb)
5592 /* We now have one, two or three nested loops. Update the loop
5593 structures. */
5594 struct loop *parent = entry_bb->loop_father;
5595 struct loop *body = body_bb->loop_father;
5597 if (chunking)
5599 struct loop *chunk_loop = alloc_loop ();
5600 chunk_loop->header = head_bb;
5601 chunk_loop->latch = bottom_bb;
5602 add_loop (chunk_loop, parent);
5603 parent = chunk_loop;
5605 else if (parent != body)
5607 gcc_assert (body->header == body_bb);
5608 gcc_assert (body->latch == cont_bb
5609 || single_pred (body->latch) == cont_bb);
5610 parent = NULL;
5613 if (parent)
5615 struct loop *body_loop = alloc_loop ();
5616 body_loop->header = body_bb;
5617 body_loop->latch = cont_bb;
5618 add_loop (body_loop, parent);
5620 if (fd->tiling)
5622 /* Insert tiling's element loop. */
5623 struct loop *inner_loop = alloc_loop ();
5624 inner_loop->header = elem_body_bb;
5625 inner_loop->latch = elem_cont_bb;
5626 add_loop (inner_loop, body_loop);
5632 /* Expand the OMP loop defined by REGION. */
5634 static void
5635 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5637 struct omp_for_data fd;
5638 struct omp_for_data_loop *loops;
5640 loops
5641 = (struct omp_for_data_loop *)
5642 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5643 * sizeof (struct omp_for_data_loop));
5644 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5645 &fd, loops);
5646 region->sched_kind = fd.sched_kind;
5647 region->sched_modifiers = fd.sched_modifiers;
5649 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5650 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5651 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5652 if (region->cont)
5654 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5655 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5656 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5658 else
5659 /* If there isn't a continue then this is a degerate case where
5660 the introduction of abnormal edges during lowering will prevent
5661 original loops from being detected. Fix that up. */
5662 loops_state_set (LOOPS_NEED_FIXUP);
5664 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5665 expand_omp_simd (region, &fd);
5666 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5668 gcc_assert (!inner_stmt);
5669 expand_oacc_for (region, &fd);
5671 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5673 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5674 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5675 else
5676 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5678 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5679 && !fd.have_ordered)
5681 if (fd.chunk_size == NULL)
5682 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5683 else
5684 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5686 else
5688 int fn_index, start_ix, next_ix;
5690 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5691 == GF_OMP_FOR_KIND_FOR);
5692 if (fd.chunk_size == NULL
5693 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5694 fd.chunk_size = integer_zero_node;
5695 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5696 switch (fd.sched_kind)
5698 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5699 fn_index = 3;
5700 break;
5701 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5702 case OMP_CLAUSE_SCHEDULE_GUIDED:
5703 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5704 && !fd.ordered
5705 && !fd.have_ordered)
5707 fn_index = 3 + fd.sched_kind;
5708 break;
5710 /* FALLTHRU */
5711 default:
5712 fn_index = fd.sched_kind;
5713 break;
5715 if (!fd.ordered)
5716 fn_index += fd.have_ordered * 6;
5717 if (fd.ordered)
5718 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5719 else
5720 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5721 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5722 if (fd.iter_type == long_long_unsigned_type_node)
5724 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5725 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5726 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5727 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5729 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5730 (enum built_in_function) next_ix, inner_stmt);
5733 if (gimple_in_ssa_p (cfun))
5734 update_ssa (TODO_update_ssa_only_virtuals);
5737 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5739 v = GOMP_sections_start (n);
5741 switch (v)
5743 case 0:
5744 goto L2;
5745 case 1:
5746 section 1;
5747 goto L1;
5748 case 2:
5750 case n:
5752 default:
5753 abort ();
5756 v = GOMP_sections_next ();
5757 goto L0;
5759 reduction;
5761 If this is a combined parallel sections, replace the call to
5762 GOMP_sections_start with call to GOMP_sections_next. */
5764 static void
5765 expand_omp_sections (struct omp_region *region)
5767 tree t, u, vin = NULL, vmain, vnext, l2;
5768 unsigned len;
5769 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5770 gimple_stmt_iterator si, switch_si;
5771 gomp_sections *sections_stmt;
5772 gimple *stmt;
5773 gomp_continue *cont;
5774 edge_iterator ei;
5775 edge e;
5776 struct omp_region *inner;
5777 unsigned i, casei;
5778 bool exit_reachable = region->cont != NULL;
5780 gcc_assert (region->exit != NULL);
5781 entry_bb = region->entry;
5782 l0_bb = single_succ (entry_bb);
5783 l1_bb = region->cont;
5784 l2_bb = region->exit;
5785 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5786 l2 = gimple_block_label (l2_bb);
5787 else
5789 /* This can happen if there are reductions. */
5790 len = EDGE_COUNT (l0_bb->succs);
5791 gcc_assert (len > 0);
5792 e = EDGE_SUCC (l0_bb, len - 1);
5793 si = gsi_last_nondebug_bb (e->dest);
5794 l2 = NULL_TREE;
5795 if (gsi_end_p (si)
5796 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5797 l2 = gimple_block_label (e->dest);
5798 else
5799 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5801 si = gsi_last_nondebug_bb (e->dest);
5802 if (gsi_end_p (si)
5803 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5805 l2 = gimple_block_label (e->dest);
5806 break;
5810 if (exit_reachable)
5811 default_bb = create_empty_bb (l1_bb->prev_bb);
5812 else
5813 default_bb = create_empty_bb (l0_bb);
5815 /* We will build a switch() with enough cases for all the
5816 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5817 and a default case to abort if something goes wrong. */
5818 len = EDGE_COUNT (l0_bb->succs);
5820 /* Use vec::quick_push on label_vec throughout, since we know the size
5821 in advance. */
5822 auto_vec<tree> label_vec (len);
5824 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5825 GIMPLE_OMP_SECTIONS statement. */
5826 si = gsi_last_nondebug_bb (entry_bb);
5827 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5828 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5829 vin = gimple_omp_sections_control (sections_stmt);
5830 if (!is_combined_parallel (region))
5832 /* If we are not inside a combined parallel+sections region,
5833 call GOMP_sections_start. */
5834 t = build_int_cst (unsigned_type_node, len - 1);
5835 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5836 stmt = gimple_build_call (u, 1, t);
5838 else
5840 /* Otherwise, call GOMP_sections_next. */
5841 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5842 stmt = gimple_build_call (u, 0);
5844 gimple_call_set_lhs (stmt, vin);
5845 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5846 gsi_remove (&si, true);
5848 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5849 L0_BB. */
5850 switch_si = gsi_last_nondebug_bb (l0_bb);
5851 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5852 if (exit_reachable)
5854 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5855 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5856 vmain = gimple_omp_continue_control_use (cont);
5857 vnext = gimple_omp_continue_control_def (cont);
5859 else
5861 vmain = vin;
5862 vnext = NULL_TREE;
5865 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5866 label_vec.quick_push (t);
5867 i = 1;
5869 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5870 for (inner = region->inner, casei = 1;
5871 inner;
5872 inner = inner->next, i++, casei++)
5874 basic_block s_entry_bb, s_exit_bb;
5876 /* Skip optional reduction region. */
5877 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5879 --i;
5880 --casei;
5881 continue;
5884 s_entry_bb = inner->entry;
5885 s_exit_bb = inner->exit;
5887 t = gimple_block_label (s_entry_bb);
5888 u = build_int_cst (unsigned_type_node, casei);
5889 u = build_case_label (u, NULL, t);
5890 label_vec.quick_push (u);
5892 si = gsi_last_nondebug_bb (s_entry_bb);
5893 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5894 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5895 gsi_remove (&si, true);
5896 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5898 if (s_exit_bb == NULL)
5899 continue;
5901 si = gsi_last_nondebug_bb (s_exit_bb);
5902 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5903 gsi_remove (&si, true);
5905 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5908 /* Error handling code goes in DEFAULT_BB. */
5909 t = gimple_block_label (default_bb);
5910 u = build_case_label (NULL, NULL, t);
5911 make_edge (l0_bb, default_bb, 0);
5912 add_bb_to_loop (default_bb, current_loops->tree_root);
5914 stmt = gimple_build_switch (vmain, u, label_vec);
5915 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5916 gsi_remove (&switch_si, true);
5918 si = gsi_start_bb (default_bb);
5919 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5920 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5922 if (exit_reachable)
5924 tree bfn_decl;
5926 /* Code to get the next section goes in L1_BB. */
5927 si = gsi_last_nondebug_bb (l1_bb);
5928 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5930 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5931 stmt = gimple_build_call (bfn_decl, 0);
5932 gimple_call_set_lhs (stmt, vnext);
5933 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5934 gsi_remove (&si, true);
5936 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5939 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5940 si = gsi_last_nondebug_bb (l2_bb);
5941 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5942 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5943 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5944 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5945 else
5946 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5947 stmt = gimple_build_call (t, 0);
5948 if (gimple_omp_return_lhs (gsi_stmt (si)))
5949 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5950 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5951 gsi_remove (&si, true);
5953 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5956 /* Expand code for an OpenMP single directive. We've already expanded
5957 much of the code, here we simply place the GOMP_barrier call. */
5959 static void
5960 expand_omp_single (struct omp_region *region)
5962 basic_block entry_bb, exit_bb;
5963 gimple_stmt_iterator si;
5965 entry_bb = region->entry;
5966 exit_bb = region->exit;
5968 si = gsi_last_nondebug_bb (entry_bb);
5969 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5970 gsi_remove (&si, true);
5971 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5973 si = gsi_last_nondebug_bb (exit_bb);
5974 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5976 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5977 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5979 gsi_remove (&si, true);
5980 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5983 /* Generic expansion for OpenMP synchronization directives: master,
5984 ordered and critical. All we need to do here is remove the entry
5985 and exit markers for REGION. */
5987 static void
5988 expand_omp_synch (struct omp_region *region)
5990 basic_block entry_bb, exit_bb;
5991 gimple_stmt_iterator si;
5993 entry_bb = region->entry;
5994 exit_bb = region->exit;
5996 si = gsi_last_nondebug_bb (entry_bb);
5997 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5998 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5999 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6000 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6001 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6002 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6003 gsi_remove (&si, true);
6004 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6006 if (exit_bb)
6008 si = gsi_last_nondebug_bb (exit_bb);
6009 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6010 gsi_remove (&si, true);
6011 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6015 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6016 operation as a normal volatile load. */
6018 static bool
6019 expand_omp_atomic_load (basic_block load_bb, tree addr,
6020 tree loaded_val, int index)
6022 enum built_in_function tmpbase;
6023 gimple_stmt_iterator gsi;
6024 basic_block store_bb;
6025 location_t loc;
6026 gimple *stmt;
6027 tree decl, call, type, itype;
6029 gsi = gsi_last_nondebug_bb (load_bb);
6030 stmt = gsi_stmt (gsi);
6031 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6032 loc = gimple_location (stmt);
6034 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6035 is smaller than word size, then expand_atomic_load assumes that the load
6036 is atomic. We could avoid the builtin entirely in this case. */
6038 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6039 decl = builtin_decl_explicit (tmpbase);
6040 if (decl == NULL_TREE)
6041 return false;
6043 type = TREE_TYPE (loaded_val);
6044 itype = TREE_TYPE (TREE_TYPE (decl));
6046 call = build_call_expr_loc (loc, decl, 2, addr,
6047 build_int_cst (NULL,
6048 gimple_omp_atomic_seq_cst_p (stmt)
6049 ? MEMMODEL_SEQ_CST
6050 : MEMMODEL_RELAXED));
6051 if (!useless_type_conversion_p (type, itype))
6052 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6053 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6055 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6056 gsi_remove (&gsi, true);
6058 store_bb = single_succ (load_bb);
6059 gsi = gsi_last_nondebug_bb (store_bb);
6060 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6061 gsi_remove (&gsi, true);
6063 if (gimple_in_ssa_p (cfun))
6064 update_ssa (TODO_update_ssa_no_phi);
6066 return true;
6069 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6070 operation as a normal volatile store. */
6072 static bool
6073 expand_omp_atomic_store (basic_block load_bb, tree addr,
6074 tree loaded_val, tree stored_val, int index)
6076 enum built_in_function tmpbase;
6077 gimple_stmt_iterator gsi;
6078 basic_block store_bb = single_succ (load_bb);
6079 location_t loc;
6080 gimple *stmt;
6081 tree decl, call, type, itype;
6082 machine_mode imode;
6083 bool exchange;
6085 gsi = gsi_last_nondebug_bb (load_bb);
6086 stmt = gsi_stmt (gsi);
6087 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6089 /* If the load value is needed, then this isn't a store but an exchange. */
6090 exchange = gimple_omp_atomic_need_value_p (stmt);
6092 gsi = gsi_last_nondebug_bb (store_bb);
6093 stmt = gsi_stmt (gsi);
6094 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6095 loc = gimple_location (stmt);
6097 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6098 is smaller than word size, then expand_atomic_store assumes that the store
6099 is atomic. We could avoid the builtin entirely in this case. */
6101 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6102 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6103 decl = builtin_decl_explicit (tmpbase);
6104 if (decl == NULL_TREE)
6105 return false;
6107 type = TREE_TYPE (stored_val);
6109 /* Dig out the type of the function's second argument. */
6110 itype = TREE_TYPE (decl);
6111 itype = TYPE_ARG_TYPES (itype);
6112 itype = TREE_CHAIN (itype);
6113 itype = TREE_VALUE (itype);
6114 imode = TYPE_MODE (itype);
6116 if (exchange && !can_atomic_exchange_p (imode, true))
6117 return false;
6119 if (!useless_type_conversion_p (itype, type))
6120 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6121 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6122 build_int_cst (NULL,
6123 gimple_omp_atomic_seq_cst_p (stmt)
6124 ? MEMMODEL_SEQ_CST
6125 : MEMMODEL_RELAXED));
6126 if (exchange)
6128 if (!useless_type_conversion_p (type, itype))
6129 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6130 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6133 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6134 gsi_remove (&gsi, true);
6136 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6137 gsi = gsi_last_nondebug_bb (load_bb);
6138 gsi_remove (&gsi, true);
6140 if (gimple_in_ssa_p (cfun))
6141 update_ssa (TODO_update_ssa_no_phi);
6143 return true;
6146 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6147 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6148 size of the data type, and thus usable to find the index of the builtin
6149 decl. Returns false if the expression is not of the proper form. */
6151 static bool
6152 expand_omp_atomic_fetch_op (basic_block load_bb,
6153 tree addr, tree loaded_val,
6154 tree stored_val, int index)
6156 enum built_in_function oldbase, newbase, tmpbase;
6157 tree decl, itype, call;
6158 tree lhs, rhs;
6159 basic_block store_bb = single_succ (load_bb);
6160 gimple_stmt_iterator gsi;
6161 gimple *stmt;
6162 location_t loc;
6163 enum tree_code code;
6164 bool need_old, need_new;
6165 machine_mode imode;
6166 bool seq_cst;
6168 /* We expect to find the following sequences:
6170 load_bb:
6171 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6173 store_bb:
6174 val = tmp OP something; (or: something OP tmp)
6175 GIMPLE_OMP_STORE (val)
6177 ???FIXME: Allow a more flexible sequence.
6178 Perhaps use data flow to pick the statements.
6182 gsi = gsi_after_labels (store_bb);
6183 stmt = gsi_stmt (gsi);
6184 if (is_gimple_debug (stmt))
6186 gsi_next_nondebug (&gsi);
6187 if (gsi_end_p (gsi))
6188 return false;
6189 stmt = gsi_stmt (gsi);
6191 loc = gimple_location (stmt);
6192 if (!is_gimple_assign (stmt))
6193 return false;
6194 gsi_next_nondebug (&gsi);
6195 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6196 return false;
6197 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6198 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6199 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6200 gcc_checking_assert (!need_old || !need_new);
6202 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6203 return false;
6205 /* Check for one of the supported fetch-op operations. */
6206 code = gimple_assign_rhs_code (stmt);
6207 switch (code)
6209 case PLUS_EXPR:
6210 case POINTER_PLUS_EXPR:
6211 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6212 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6213 break;
6214 case MINUS_EXPR:
6215 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6216 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6217 break;
6218 case BIT_AND_EXPR:
6219 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6220 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6221 break;
6222 case BIT_IOR_EXPR:
6223 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6224 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6225 break;
6226 case BIT_XOR_EXPR:
6227 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6228 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6229 break;
6230 default:
6231 return false;
6234 /* Make sure the expression is of the proper form. */
6235 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6236 rhs = gimple_assign_rhs2 (stmt);
6237 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6238 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6239 rhs = gimple_assign_rhs1 (stmt);
6240 else
6241 return false;
6243 tmpbase = ((enum built_in_function)
6244 ((need_new ? newbase : oldbase) + index + 1));
6245 decl = builtin_decl_explicit (tmpbase);
6246 if (decl == NULL_TREE)
6247 return false;
6248 itype = TREE_TYPE (TREE_TYPE (decl));
6249 imode = TYPE_MODE (itype);
6251 /* We could test all of the various optabs involved, but the fact of the
6252 matter is that (with the exception of i486 vs i586 and xadd) all targets
6253 that support any atomic operaton optab also implements compare-and-swap.
6254 Let optabs.c take care of expanding any compare-and-swap loop. */
6255 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6256 return false;
6258 gsi = gsi_last_nondebug_bb (load_bb);
6259 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6261 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6262 It only requires that the operation happen atomically. Thus we can
6263 use the RELAXED memory model. */
6264 call = build_call_expr_loc (loc, decl, 3, addr,
6265 fold_convert_loc (loc, itype, rhs),
6266 build_int_cst (NULL,
6267 seq_cst ? MEMMODEL_SEQ_CST
6268 : MEMMODEL_RELAXED));
6270 if (need_old || need_new)
6272 lhs = need_old ? loaded_val : stored_val;
6273 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6274 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6276 else
6277 call = fold_convert_loc (loc, void_type_node, call);
6278 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6279 gsi_remove (&gsi, true);
6281 gsi = gsi_last_nondebug_bb (store_bb);
6282 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6283 gsi_remove (&gsi, true);
6284 gsi = gsi_last_nondebug_bb (store_bb);
6285 stmt = gsi_stmt (gsi);
6286 gsi_remove (&gsi, true);
6288 if (gimple_in_ssa_p (cfun))
6290 release_defs (stmt);
6291 update_ssa (TODO_update_ssa_no_phi);
6294 return true;
6297 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6299 oldval = *addr;
6300 repeat:
6301 newval = rhs; // with oldval replacing *addr in rhs
6302 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6303 if (oldval != newval)
6304 goto repeat;
6306 INDEX is log2 of the size of the data type, and thus usable to find the
6307 index of the builtin decl. */
6309 static bool
6310 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6311 tree addr, tree loaded_val, tree stored_val,
6312 int index)
6314 tree loadedi, storedi, initial, new_storedi, old_vali;
6315 tree type, itype, cmpxchg, iaddr, atype;
6316 gimple_stmt_iterator si;
6317 basic_block loop_header = single_succ (load_bb);
6318 gimple *phi, *stmt;
6319 edge e;
6320 enum built_in_function fncode;
6322 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6323 order to use the RELAXED memory model effectively. */
6324 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6325 + index + 1);
6326 cmpxchg = builtin_decl_explicit (fncode);
6327 if (cmpxchg == NULL_TREE)
6328 return false;
6329 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6330 atype = type;
6331 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6333 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6334 || !can_atomic_load_p (TYPE_MODE (itype)))
6335 return false;
6337 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6338 si = gsi_last_nondebug_bb (load_bb);
6339 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6341 /* For floating-point values, we'll need to view-convert them to integers
6342 so that we can perform the atomic compare and swap. Simplify the
6343 following code by always setting up the "i"ntegral variables. */
6344 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6346 tree iaddr_val;
6348 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6349 true));
6350 atype = itype;
6351 iaddr_val
6352 = force_gimple_operand_gsi (&si,
6353 fold_convert (TREE_TYPE (iaddr), addr),
6354 false, NULL_TREE, true, GSI_SAME_STMT);
6355 stmt = gimple_build_assign (iaddr, iaddr_val);
6356 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6357 loadedi = create_tmp_var (itype);
6358 if (gimple_in_ssa_p (cfun))
6359 loadedi = make_ssa_name (loadedi);
6361 else
6363 iaddr = addr;
6364 loadedi = loaded_val;
6367 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6368 tree loaddecl = builtin_decl_explicit (fncode);
6369 if (loaddecl)
6370 initial
6371 = fold_convert (atype,
6372 build_call_expr (loaddecl, 2, iaddr,
6373 build_int_cst (NULL_TREE,
6374 MEMMODEL_RELAXED)));
6375 else
6377 tree off
6378 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6379 true), 0);
6380 initial = build2 (MEM_REF, atype, iaddr, off);
6383 initial
6384 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6385 GSI_SAME_STMT);
6387 /* Move the value to the LOADEDI temporary. */
6388 if (gimple_in_ssa_p (cfun))
6390 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6391 phi = create_phi_node (loadedi, loop_header);
6392 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6393 initial);
6395 else
6396 gsi_insert_before (&si,
6397 gimple_build_assign (loadedi, initial),
6398 GSI_SAME_STMT);
6399 if (loadedi != loaded_val)
6401 gimple_stmt_iterator gsi2;
6402 tree x;
6404 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6405 gsi2 = gsi_start_bb (loop_header);
6406 if (gimple_in_ssa_p (cfun))
6408 gassign *stmt;
6409 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6410 true, GSI_SAME_STMT);
6411 stmt = gimple_build_assign (loaded_val, x);
6412 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6414 else
6416 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6417 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6418 true, GSI_SAME_STMT);
6421 gsi_remove (&si, true);
6423 si = gsi_last_nondebug_bb (store_bb);
6424 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6426 if (iaddr == addr)
6427 storedi = stored_val;
6428 else
6429 storedi
6430 = force_gimple_operand_gsi (&si,
6431 build1 (VIEW_CONVERT_EXPR, itype,
6432 stored_val), true, NULL_TREE, true,
6433 GSI_SAME_STMT);
6435 /* Build the compare&swap statement. */
6436 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6437 new_storedi = force_gimple_operand_gsi (&si,
6438 fold_convert (TREE_TYPE (loadedi),
6439 new_storedi),
6440 true, NULL_TREE,
6441 true, GSI_SAME_STMT);
6443 if (gimple_in_ssa_p (cfun))
6444 old_vali = loadedi;
6445 else
6447 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6448 stmt = gimple_build_assign (old_vali, loadedi);
6449 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6451 stmt = gimple_build_assign (loadedi, new_storedi);
6452 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6455 /* Note that we always perform the comparison as an integer, even for
6456 floating point. This allows the atomic operation to properly
6457 succeed even with NaNs and -0.0. */
6458 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6459 stmt = gimple_build_cond_empty (ne);
6460 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6462 /* Update cfg. */
6463 e = single_succ_edge (store_bb);
6464 e->flags &= ~EDGE_FALLTHRU;
6465 e->flags |= EDGE_FALSE_VALUE;
6466 /* Expect no looping. */
6467 e->probability = profile_probability::guessed_always ();
6469 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6470 e->probability = profile_probability::guessed_never ();
6472 /* Copy the new value to loadedi (we already did that before the condition
6473 if we are not in SSA). */
6474 if (gimple_in_ssa_p (cfun))
6476 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6477 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6480 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6481 gsi_remove (&si, true);
6483 struct loop *loop = alloc_loop ();
6484 loop->header = loop_header;
6485 loop->latch = store_bb;
6486 add_loop (loop, loop_header->loop_father);
6488 if (gimple_in_ssa_p (cfun))
6489 update_ssa (TODO_update_ssa_no_phi);
6491 return true;
6494 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6496 GOMP_atomic_start ();
6497 *addr = rhs;
6498 GOMP_atomic_end ();
6500 The result is not globally atomic, but works so long as all parallel
6501 references are within #pragma omp atomic directives. According to
6502 responses received from omp@openmp.org, appears to be within spec.
6503 Which makes sense, since that's how several other compilers handle
6504 this situation as well.
6505 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6506 expanding. STORED_VAL is the operand of the matching
6507 GIMPLE_OMP_ATOMIC_STORE.
6509 We replace
6510 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6511 loaded_val = *addr;
6513 and replace
6514 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6515 *addr = stored_val;
6518 static bool
6519 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6520 tree addr, tree loaded_val, tree stored_val)
6522 gimple_stmt_iterator si;
6523 gassign *stmt;
6524 tree t;
6526 si = gsi_last_nondebug_bb (load_bb);
6527 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6529 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6530 t = build_call_expr (t, 0);
6531 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6533 tree mem = build_simple_mem_ref (addr);
6534 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6535 TREE_OPERAND (mem, 1)
6536 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6537 true),
6538 TREE_OPERAND (mem, 1));
6539 stmt = gimple_build_assign (loaded_val, mem);
6540 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6541 gsi_remove (&si, true);
6543 si = gsi_last_nondebug_bb (store_bb);
6544 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6546 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6547 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6549 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6550 t = build_call_expr (t, 0);
6551 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6552 gsi_remove (&si, true);
6554 if (gimple_in_ssa_p (cfun))
6555 update_ssa (TODO_update_ssa_no_phi);
6556 return true;
6559 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6560 using expand_omp_atomic_fetch_op. If it failed, we try to
6561 call expand_omp_atomic_pipeline, and if it fails too, the
6562 ultimate fallback is wrapping the operation in a mutex
6563 (expand_omp_atomic_mutex). REGION is the atomic region built
6564 by build_omp_regions_1(). */
6566 static void
6567 expand_omp_atomic (struct omp_region *region)
6569 basic_block load_bb = region->entry, store_bb = region->exit;
6570 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6571 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6572 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6573 tree addr = gimple_omp_atomic_load_rhs (load);
6574 tree stored_val = gimple_omp_atomic_store_val (store);
6575 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6576 HOST_WIDE_INT index;
6578 /* Make sure the type is one of the supported sizes. */
6579 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6580 index = exact_log2 (index);
6581 if (index >= 0 && index <= 4)
6583 unsigned int align = TYPE_ALIGN_UNIT (type);
6585 /* __sync builtins require strict data alignment. */
6586 if (exact_log2 (align) >= index)
6588 /* Atomic load. */
6589 scalar_mode smode;
6590 if (loaded_val == stored_val
6591 && (is_int_mode (TYPE_MODE (type), &smode)
6592 || is_float_mode (TYPE_MODE (type), &smode))
6593 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6594 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6595 return;
6597 /* Atomic store. */
6598 if ((is_int_mode (TYPE_MODE (type), &smode)
6599 || is_float_mode (TYPE_MODE (type), &smode))
6600 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6601 && store_bb == single_succ (load_bb)
6602 && first_stmt (store_bb) == store
6603 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6604 stored_val, index))
6605 return;
6607 /* When possible, use specialized atomic update functions. */
6608 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6609 && store_bb == single_succ (load_bb)
6610 && expand_omp_atomic_fetch_op (load_bb, addr,
6611 loaded_val, stored_val, index))
6612 return;
6614 /* If we don't have specialized __sync builtins, try and implement
6615 as a compare and swap loop. */
6616 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6617 loaded_val, stored_val, index))
6618 return;
6622 /* The ultimate fallback is wrapping the operation in a mutex. */
6623 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6626 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6627 at REGION_EXIT. */
6629 static void
6630 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6631 basic_block region_exit)
6633 struct loop *outer = region_entry->loop_father;
6634 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6636 /* Don't parallelize the kernels region if it contains more than one outer
6637 loop. */
6638 unsigned int nr_outer_loops = 0;
6639 struct loop *single_outer = NULL;
6640 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6642 gcc_assert (loop_outer (loop) == outer);
6644 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6645 continue;
6647 if (region_exit != NULL
6648 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6649 continue;
6651 nr_outer_loops++;
6652 single_outer = loop;
6654 if (nr_outer_loops != 1)
6655 return;
6657 for (struct loop *loop = single_outer->inner;
6658 loop != NULL;
6659 loop = loop->inner)
6660 if (loop->next)
6661 return;
6663 /* Mark the loops in the region. */
6664 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6665 loop->in_oacc_kernels_region = true;
6668 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6670 struct GTY(()) grid_launch_attributes_trees
6672 tree kernel_dim_array_type;
6673 tree kernel_lattrs_dimnum_decl;
6674 tree kernel_lattrs_grid_decl;
6675 tree kernel_lattrs_group_decl;
6676 tree kernel_launch_attributes_type;
6679 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6681 /* Create types used to pass kernel launch attributes to target. */
6683 static void
6684 grid_create_kernel_launch_attr_types (void)
6686 if (grid_attr_trees)
6687 return;
6688 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6690 tree dim_arr_index_type
6691 = build_index_type (build_int_cst (integer_type_node, 2));
6692 grid_attr_trees->kernel_dim_array_type
6693 = build_array_type (uint32_type_node, dim_arr_index_type);
6695 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6696 grid_attr_trees->kernel_lattrs_dimnum_decl
6697 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6698 uint32_type_node);
6699 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6701 grid_attr_trees->kernel_lattrs_grid_decl
6702 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6703 grid_attr_trees->kernel_dim_array_type);
6704 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6705 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6706 grid_attr_trees->kernel_lattrs_group_decl
6707 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6708 grid_attr_trees->kernel_dim_array_type);
6709 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6710 = grid_attr_trees->kernel_lattrs_grid_decl;
6711 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6712 "__gomp_kernel_launch_attributes",
6713 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6716 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6717 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6718 of type uint32_type_node. */
6720 static void
6721 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6722 tree fld_decl, int index, tree value)
6724 tree ref = build4 (ARRAY_REF, uint32_type_node,
6725 build3 (COMPONENT_REF,
6726 grid_attr_trees->kernel_dim_array_type,
6727 range_var, fld_decl, NULL_TREE),
6728 build_int_cst (integer_type_node, index),
6729 NULL_TREE, NULL_TREE);
6730 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6733 /* Return a tree representation of a pointer to a structure with grid and
6734 work-group size information. Statements filling that information will be
6735 inserted before GSI, TGT_STMT is the target statement which has the
6736 necessary information in it. */
6738 static tree
6739 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6740 gomp_target *tgt_stmt)
6742 grid_create_kernel_launch_attr_types ();
6743 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6744 "__kernel_launch_attrs");
6746 unsigned max_dim = 0;
6747 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6748 clause;
6749 clause = OMP_CLAUSE_CHAIN (clause))
6751 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6752 continue;
6754 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6755 max_dim = MAX (dim, max_dim);
6757 grid_insert_store_range_dim (gsi, lattrs,
6758 grid_attr_trees->kernel_lattrs_grid_decl,
6759 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6760 grid_insert_store_range_dim (gsi, lattrs,
6761 grid_attr_trees->kernel_lattrs_group_decl,
6762 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6765 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6766 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6767 gcc_checking_assert (max_dim <= 2);
6768 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6769 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6770 GSI_SAME_STMT);
6771 TREE_ADDRESSABLE (lattrs) = 1;
6772 return build_fold_addr_expr (lattrs);
6775 /* Build target argument identifier from the DEVICE identifier, value
6776 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6778 static tree
6779 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6781 tree t = build_int_cst (integer_type_node, device);
6782 if (subseqent_param)
6783 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6784 build_int_cst (integer_type_node,
6785 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6786 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6787 build_int_cst (integer_type_node, id));
6788 return t;
6791 /* Like above but return it in type that can be directly stored as an element
6792 of the argument array. */
6794 static tree
6795 get_target_argument_identifier (int device, bool subseqent_param, int id)
6797 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6798 return fold_convert (ptr_type_node, t);
6801 /* Return a target argument consisting of DEVICE identifier, value identifier
6802 ID, and the actual VALUE. */
6804 static tree
6805 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6806 tree value)
6808 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6809 fold_convert (integer_type_node, value),
6810 build_int_cst (unsigned_type_node,
6811 GOMP_TARGET_ARG_VALUE_SHIFT));
6812 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6813 get_target_argument_identifier_1 (device, false, id));
6814 t = fold_convert (ptr_type_node, t);
6815 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6818 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6819 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6820 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6821 arguments. */
6823 static void
6824 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6825 int id, tree value, vec <tree> *args)
6827 if (tree_fits_shwi_p (value)
6828 && tree_to_shwi (value) > -(1 << 15)
6829 && tree_to_shwi (value) < (1 << 15))
6830 args->quick_push (get_target_argument_value (gsi, device, id, value));
6831 else
6833 args->quick_push (get_target_argument_identifier (device, true, id));
6834 value = fold_convert (ptr_type_node, value);
6835 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6836 GSI_SAME_STMT);
6837 args->quick_push (value);
6841 /* Create an array of arguments that is then passed to GOMP_target. */
6843 static tree
6844 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6846 auto_vec <tree, 6> args;
6847 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6848 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6849 if (c)
6850 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6851 else
6852 t = integer_minus_one_node;
6853 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6854 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6856 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6857 if (c)
6858 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6859 else
6860 t = integer_minus_one_node;
6861 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6862 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6863 &args);
6865 /* Add HSA-specific grid sizes, if available. */
6866 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6867 OMP_CLAUSE__GRIDDIM_))
6869 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6870 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6871 args.quick_push (t);
6872 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6875 /* Produce more, perhaps device specific, arguments here. */
6877 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6878 args.length () + 1),
6879 ".omp_target_args");
6880 for (unsigned i = 0; i < args.length (); i++)
6882 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6883 build_int_cst (integer_type_node, i),
6884 NULL_TREE, NULL_TREE);
6885 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6886 GSI_SAME_STMT);
6888 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6889 build_int_cst (integer_type_node, args.length ()),
6890 NULL_TREE, NULL_TREE);
6891 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6892 GSI_SAME_STMT);
6893 TREE_ADDRESSABLE (argarray) = 1;
6894 return build_fold_addr_expr (argarray);
6897 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6899 static void
6900 expand_omp_target (struct omp_region *region)
6902 basic_block entry_bb, exit_bb, new_bb;
6903 struct function *child_cfun;
6904 tree child_fn, block, t;
6905 gimple_stmt_iterator gsi;
6906 gomp_target *entry_stmt;
6907 gimple *stmt;
6908 edge e;
6909 bool offloaded, data_region;
6911 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6912 new_bb = region->entry;
6914 offloaded = is_gimple_omp_offloaded (entry_stmt);
6915 switch (gimple_omp_target_kind (entry_stmt))
6917 case GF_OMP_TARGET_KIND_REGION:
6918 case GF_OMP_TARGET_KIND_UPDATE:
6919 case GF_OMP_TARGET_KIND_ENTER_DATA:
6920 case GF_OMP_TARGET_KIND_EXIT_DATA:
6921 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6922 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6923 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6924 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6925 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6926 data_region = false;
6927 break;
6928 case GF_OMP_TARGET_KIND_DATA:
6929 case GF_OMP_TARGET_KIND_OACC_DATA:
6930 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6931 data_region = true;
6932 break;
6933 default:
6934 gcc_unreachable ();
6937 child_fn = NULL_TREE;
6938 child_cfun = NULL;
6939 if (offloaded)
6941 child_fn = gimple_omp_target_child_fn (entry_stmt);
6942 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6945 /* Supported by expand_omp_taskreg, but not here. */
6946 if (child_cfun != NULL)
6947 gcc_checking_assert (!child_cfun->cfg);
6948 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6950 entry_bb = region->entry;
6951 exit_bb = region->exit;
6953 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6955 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6957 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6958 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6959 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6960 DECL_ATTRIBUTES (child_fn)
6961 = tree_cons (get_identifier ("oacc kernels"),
6962 NULL_TREE, DECL_ATTRIBUTES (child_fn));
6965 if (offloaded)
6967 unsigned srcidx, dstidx, num;
6969 /* If the offloading region needs data sent from the parent
6970 function, then the very first statement (except possible
6971 tree profile counter updates) of the offloading body
6972 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6973 &.OMP_DATA_O is passed as an argument to the child function,
6974 we need to replace it with the argument as seen by the child
6975 function.
6977 In most cases, this will end up being the identity assignment
6978 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6979 a function call that has been inlined, the original PARM_DECL
6980 .OMP_DATA_I may have been converted into a different local
6981 variable. In which case, we need to keep the assignment. */
6982 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6983 if (data_arg)
6985 basic_block entry_succ_bb = single_succ (entry_bb);
6986 gimple_stmt_iterator gsi;
6987 tree arg;
6988 gimple *tgtcopy_stmt = NULL;
6989 tree sender = TREE_VEC_ELT (data_arg, 0);
6991 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6993 gcc_assert (!gsi_end_p (gsi));
6994 stmt = gsi_stmt (gsi);
6995 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6996 continue;
6998 if (gimple_num_ops (stmt) == 2)
7000 tree arg = gimple_assign_rhs1 (stmt);
7002 /* We're ignoring the subcode because we're
7003 effectively doing a STRIP_NOPS. */
7005 if (TREE_CODE (arg) == ADDR_EXPR
7006 && TREE_OPERAND (arg, 0) == sender)
7008 tgtcopy_stmt = stmt;
7009 break;
7014 gcc_assert (tgtcopy_stmt != NULL);
7015 arg = DECL_ARGUMENTS (child_fn);
7017 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7018 gsi_remove (&gsi, true);
7021 /* Declare local variables needed in CHILD_CFUN. */
7022 block = DECL_INITIAL (child_fn);
7023 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7024 /* The gimplifier could record temporaries in the offloading block
7025 rather than in containing function's local_decls chain,
7026 which would mean cgraph missed finalizing them. Do it now. */
7027 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7028 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7029 varpool_node::finalize_decl (t);
7030 DECL_SAVED_TREE (child_fn) = NULL;
7031 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7032 gimple_set_body (child_fn, NULL);
7033 TREE_USED (block) = 1;
7035 /* Reset DECL_CONTEXT on function arguments. */
7036 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7037 DECL_CONTEXT (t) = child_fn;
7039 /* Split ENTRY_BB at GIMPLE_*,
7040 so that it can be moved to the child function. */
7041 gsi = gsi_last_nondebug_bb (entry_bb);
7042 stmt = gsi_stmt (gsi);
7043 gcc_assert (stmt
7044 && gimple_code (stmt) == gimple_code (entry_stmt));
7045 e = split_block (entry_bb, stmt);
7046 gsi_remove (&gsi, true);
7047 entry_bb = e->dest;
7048 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7050 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7051 if (exit_bb)
7053 gsi = gsi_last_nondebug_bb (exit_bb);
7054 gcc_assert (!gsi_end_p (gsi)
7055 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7056 stmt = gimple_build_return (NULL);
7057 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7058 gsi_remove (&gsi, true);
7061 /* Move the offloading region into CHILD_CFUN. */
7063 block = gimple_block (entry_stmt);
7065 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7066 if (exit_bb)
7067 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7068 /* When the OMP expansion process cannot guarantee an up-to-date
7069 loop tree arrange for the child function to fixup loops. */
7070 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7071 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7073 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7074 num = vec_safe_length (child_cfun->local_decls);
7075 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7077 t = (*child_cfun->local_decls)[srcidx];
7078 if (DECL_CONTEXT (t) == cfun->decl)
7079 continue;
7080 if (srcidx != dstidx)
7081 (*child_cfun->local_decls)[dstidx] = t;
7082 dstidx++;
7084 if (dstidx != num)
7085 vec_safe_truncate (child_cfun->local_decls, dstidx);
7087 /* Inform the callgraph about the new function. */
7088 child_cfun->curr_properties = cfun->curr_properties;
7089 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7090 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7091 cgraph_node *node = cgraph_node::get_create (child_fn);
7092 node->parallelized_function = 1;
7093 cgraph_node::add_new_function (child_fn, true);
7095 /* Add the new function to the offload table. */
7096 if (ENABLE_OFFLOADING)
7098 if (in_lto_p)
7099 DECL_PRESERVE_P (child_fn) = 1;
7100 vec_safe_push (offload_funcs, child_fn);
7103 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7104 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7106 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7107 fixed in a following pass. */
7108 push_cfun (child_cfun);
7109 if (need_asm)
7110 assign_assembler_name_if_needed (child_fn);
7111 cgraph_edge::rebuild_edges ();
7113 /* Some EH regions might become dead, see PR34608. If
7114 pass_cleanup_cfg isn't the first pass to happen with the
7115 new child, these dead EH edges might cause problems.
7116 Clean them up now. */
7117 if (flag_exceptions)
7119 basic_block bb;
7120 bool changed = false;
7122 FOR_EACH_BB_FN (bb, cfun)
7123 changed |= gimple_purge_dead_eh_edges (bb);
7124 if (changed)
7125 cleanup_tree_cfg ();
7127 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7128 verify_loop_structure ();
7129 pop_cfun ();
7131 if (dump_file && !gimple_in_ssa_p (cfun))
7133 omp_any_child_fn_dumped = true;
7134 dump_function_header (dump_file, child_fn, dump_flags);
7135 dump_function_to_file (child_fn, dump_file, dump_flags);
7138 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7141 /* Emit a library call to launch the offloading region, or do data
7142 transfers. */
7143 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7144 enum built_in_function start_ix;
7145 location_t clause_loc;
7146 unsigned int flags_i = 0;
7148 switch (gimple_omp_target_kind (entry_stmt))
7150 case GF_OMP_TARGET_KIND_REGION:
7151 start_ix = BUILT_IN_GOMP_TARGET;
7152 break;
7153 case GF_OMP_TARGET_KIND_DATA:
7154 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7155 break;
7156 case GF_OMP_TARGET_KIND_UPDATE:
7157 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7158 break;
7159 case GF_OMP_TARGET_KIND_ENTER_DATA:
7160 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7161 break;
7162 case GF_OMP_TARGET_KIND_EXIT_DATA:
7163 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7164 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7165 break;
7166 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7167 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7168 start_ix = BUILT_IN_GOACC_PARALLEL;
7169 break;
7170 case GF_OMP_TARGET_KIND_OACC_DATA:
7171 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7172 start_ix = BUILT_IN_GOACC_DATA_START;
7173 break;
7174 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7175 start_ix = BUILT_IN_GOACC_UPDATE;
7176 break;
7177 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7178 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7179 break;
7180 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7181 start_ix = BUILT_IN_GOACC_DECLARE;
7182 break;
7183 default:
7184 gcc_unreachable ();
7187 clauses = gimple_omp_target_clauses (entry_stmt);
7189 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7190 library choose) and there is no conditional. */
7191 cond = NULL_TREE;
7192 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7194 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7195 if (c)
7196 cond = OMP_CLAUSE_IF_EXPR (c);
7198 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7199 if (c)
7201 /* Even if we pass it to all library function calls, it is currently only
7202 defined/used for the OpenMP target ones. */
7203 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7204 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7205 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7206 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7208 device = OMP_CLAUSE_DEVICE_ID (c);
7209 clause_loc = OMP_CLAUSE_LOCATION (c);
7211 else
7212 clause_loc = gimple_location (entry_stmt);
7214 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7215 if (c)
7216 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7218 /* Ensure 'device' is of the correct type. */
7219 device = fold_convert_loc (clause_loc, integer_type_node, device);
7221 /* If we found the clause 'if (cond)', build
7222 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7223 if (cond)
7225 cond = gimple_boolify (cond);
7227 basic_block cond_bb, then_bb, else_bb;
7228 edge e;
7229 tree tmp_var;
7231 tmp_var = create_tmp_var (TREE_TYPE (device));
7232 if (offloaded)
7233 e = split_block_after_labels (new_bb);
7234 else
7236 gsi = gsi_last_nondebug_bb (new_bb);
7237 gsi_prev (&gsi);
7238 e = split_block (new_bb, gsi_stmt (gsi));
7240 cond_bb = e->src;
7241 new_bb = e->dest;
7242 remove_edge (e);
7244 then_bb = create_empty_bb (cond_bb);
7245 else_bb = create_empty_bb (then_bb);
7246 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7247 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7249 stmt = gimple_build_cond_empty (cond);
7250 gsi = gsi_last_bb (cond_bb);
7251 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7253 gsi = gsi_start_bb (then_bb);
7254 stmt = gimple_build_assign (tmp_var, device);
7255 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7257 gsi = gsi_start_bb (else_bb);
7258 stmt = gimple_build_assign (tmp_var,
7259 build_int_cst (integer_type_node,
7260 GOMP_DEVICE_HOST_FALLBACK));
7261 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7263 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7264 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7265 add_bb_to_loop (then_bb, cond_bb->loop_father);
7266 add_bb_to_loop (else_bb, cond_bb->loop_father);
7267 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7268 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7270 device = tmp_var;
7271 gsi = gsi_last_nondebug_bb (new_bb);
7273 else
7275 gsi = gsi_last_nondebug_bb (new_bb);
7276 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7277 true, GSI_SAME_STMT);
7280 t = gimple_omp_target_data_arg (entry_stmt);
7281 if (t == NULL)
7283 t1 = size_zero_node;
7284 t2 = build_zero_cst (ptr_type_node);
7285 t3 = t2;
7286 t4 = t2;
7288 else
7290 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7291 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7292 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7293 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7294 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7297 gimple *g;
7298 bool tagging = false;
7299 /* The maximum number used by any start_ix, without varargs. */
7300 auto_vec<tree, 11> args;
7301 args.quick_push (device);
7302 if (offloaded)
7303 args.quick_push (build_fold_addr_expr (child_fn));
7304 args.quick_push (t1);
7305 args.quick_push (t2);
7306 args.quick_push (t3);
7307 args.quick_push (t4);
7308 switch (start_ix)
7310 case BUILT_IN_GOACC_DATA_START:
7311 case BUILT_IN_GOACC_DECLARE:
7312 case BUILT_IN_GOMP_TARGET_DATA:
7313 break;
7314 case BUILT_IN_GOMP_TARGET:
7315 case BUILT_IN_GOMP_TARGET_UPDATE:
7316 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7317 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7318 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7319 if (c)
7320 depend = OMP_CLAUSE_DECL (c);
7321 else
7322 depend = build_int_cst (ptr_type_node, 0);
7323 args.quick_push (depend);
7324 if (start_ix == BUILT_IN_GOMP_TARGET)
7325 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7326 break;
7327 case BUILT_IN_GOACC_PARALLEL:
7328 oacc_set_fn_attrib (child_fn, clauses, &args);
7329 tagging = true;
7330 /* FALLTHRU */
7331 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7332 case BUILT_IN_GOACC_UPDATE:
7334 tree t_async = NULL_TREE;
7336 /* If present, use the value specified by the respective
7337 clause, making sure that is of the correct type. */
7338 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7339 if (c)
7340 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7341 integer_type_node,
7342 OMP_CLAUSE_ASYNC_EXPR (c));
7343 else if (!tagging)
7344 /* Default values for t_async. */
7345 t_async = fold_convert_loc (gimple_location (entry_stmt),
7346 integer_type_node,
7347 build_int_cst (integer_type_node,
7348 GOMP_ASYNC_SYNC));
7349 if (tagging && t_async)
7351 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7353 if (TREE_CODE (t_async) == INTEGER_CST)
7355 /* See if we can pack the async arg in to the tag's
7356 operand. */
7357 i_async = TREE_INT_CST_LOW (t_async);
7358 if (i_async < GOMP_LAUNCH_OP_MAX)
7359 t_async = NULL_TREE;
7360 else
7361 i_async = GOMP_LAUNCH_OP_MAX;
7363 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7364 i_async));
7366 if (t_async)
7367 args.safe_push (t_async);
7369 /* Save the argument index, and ... */
7370 unsigned t_wait_idx = args.length ();
7371 unsigned num_waits = 0;
7372 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7373 if (!tagging || c)
7374 /* ... push a placeholder. */
7375 args.safe_push (integer_zero_node);
7377 for (; c; c = OMP_CLAUSE_CHAIN (c))
7378 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7380 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7381 integer_type_node,
7382 OMP_CLAUSE_WAIT_EXPR (c)));
7383 num_waits++;
7386 if (!tagging || num_waits)
7388 tree len;
7390 /* Now that we know the number, update the placeholder. */
7391 if (tagging)
7392 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7393 else
7394 len = build_int_cst (integer_type_node, num_waits);
7395 len = fold_convert_loc (gimple_location (entry_stmt),
7396 unsigned_type_node, len);
7397 args[t_wait_idx] = len;
7400 break;
7401 default:
7402 gcc_unreachable ();
7404 if (tagging)
7405 /* Push terminal marker - zero. */
7406 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7408 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7409 gimple_set_location (g, gimple_location (entry_stmt));
7410 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7411 if (!offloaded)
7413 g = gsi_stmt (gsi);
7414 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7415 gsi_remove (&gsi, true);
7417 if (data_region && region->exit)
7419 gsi = gsi_last_nondebug_bb (region->exit);
7420 g = gsi_stmt (gsi);
7421 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7422 gsi_remove (&gsi, true);
7426 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7427 iteration variable derived from the thread number. INTRA_GROUP means this
7428 is an expansion of a loop iterating over work-items within a separate
7429 iteration over groups. */
7431 static void
7432 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7434 gimple_stmt_iterator gsi;
7435 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7436 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7437 == GF_OMP_FOR_KIND_GRID_LOOP);
7438 size_t collapse = gimple_omp_for_collapse (for_stmt);
7439 struct omp_for_data_loop *loops
7440 = XALLOCAVEC (struct omp_for_data_loop,
7441 gimple_omp_for_collapse (for_stmt));
7442 struct omp_for_data fd;
7444 remove_edge (BRANCH_EDGE (kfor->entry));
7445 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7447 gcc_assert (kfor->cont);
7448 omp_extract_for_data (for_stmt, &fd, loops);
7450 gsi = gsi_start_bb (body_bb);
7452 for (size_t dim = 0; dim < collapse; dim++)
7454 tree type, itype;
7455 itype = type = TREE_TYPE (fd.loops[dim].v);
7456 if (POINTER_TYPE_P (type))
7457 itype = signed_type_for (type);
7459 tree n1 = fd.loops[dim].n1;
7460 tree step = fd.loops[dim].step;
7461 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7462 true, NULL_TREE, true, GSI_SAME_STMT);
7463 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7464 true, NULL_TREE, true, GSI_SAME_STMT);
7465 tree threadid;
7466 if (gimple_omp_for_grid_group_iter (for_stmt))
7468 gcc_checking_assert (!intra_group);
7469 threadid = build_call_expr (builtin_decl_explicit
7470 (BUILT_IN_HSA_WORKGROUPID), 1,
7471 build_int_cstu (unsigned_type_node, dim));
7473 else if (intra_group)
7474 threadid = build_call_expr (builtin_decl_explicit
7475 (BUILT_IN_HSA_WORKITEMID), 1,
7476 build_int_cstu (unsigned_type_node, dim));
7477 else
7478 threadid = build_call_expr (builtin_decl_explicit
7479 (BUILT_IN_HSA_WORKITEMABSID), 1,
7480 build_int_cstu (unsigned_type_node, dim));
7481 threadid = fold_convert (itype, threadid);
7482 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7483 true, GSI_SAME_STMT);
7485 tree startvar = fd.loops[dim].v;
7486 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7487 if (POINTER_TYPE_P (type))
7488 t = fold_build_pointer_plus (n1, t);
7489 else
7490 t = fold_build2 (PLUS_EXPR, type, t, n1);
7491 t = fold_convert (type, t);
7492 t = force_gimple_operand_gsi (&gsi, t,
7493 DECL_P (startvar)
7494 && TREE_ADDRESSABLE (startvar),
7495 NULL_TREE, true, GSI_SAME_STMT);
7496 gassign *assign_stmt = gimple_build_assign (startvar, t);
7497 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7499 /* Remove the omp for statement. */
7500 gsi = gsi_last_nondebug_bb (kfor->entry);
7501 gsi_remove (&gsi, true);
7503 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7504 gsi = gsi_last_nondebug_bb (kfor->cont);
7505 gcc_assert (!gsi_end_p (gsi)
7506 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7507 gsi_remove (&gsi, true);
7509 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7510 gsi = gsi_last_nondebug_bb (kfor->exit);
7511 gcc_assert (!gsi_end_p (gsi)
7512 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7513 if (intra_group)
7514 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7515 gsi_remove (&gsi, true);
7517 /* Fixup the much simpler CFG. */
7518 remove_edge (find_edge (kfor->cont, body_bb));
7520 if (kfor->cont != body_bb)
7521 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7522 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7525 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7526 argument_decls. */
7528 struct grid_arg_decl_map
7530 tree old_arg;
7531 tree new_arg;
7534 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7535 pertaining to kernel function. */
7537 static tree
7538 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7540 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7541 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7542 tree t = *tp;
7544 if (t == adm->old_arg)
7545 *tp = adm->new_arg;
7546 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7547 return NULL_TREE;
7550 /* If TARGET region contains a kernel body for loop, remove its region from the
7551 TARGET and expand it in HSA gridified kernel fashion. */
7553 static void
7554 grid_expand_target_grid_body (struct omp_region *target)
7556 if (!hsa_gen_requested_p ())
7557 return;
7559 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7560 struct omp_region **pp;
7562 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7563 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7564 break;
7566 struct omp_region *gpukernel = *pp;
7568 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7569 if (!gpukernel)
7571 /* HSA cannot handle OACC stuff. */
7572 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7573 return;
7574 gcc_checking_assert (orig_child_fndecl);
7575 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7576 OMP_CLAUSE__GRIDDIM_));
7577 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7579 hsa_register_kernel (n);
7580 return;
7583 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7584 OMP_CLAUSE__GRIDDIM_));
7585 tree inside_block
7586 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7587 *pp = gpukernel->next;
7588 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7589 if ((*pp)->type == GIMPLE_OMP_FOR)
7590 break;
7592 struct omp_region *kfor = *pp;
7593 gcc_assert (kfor);
7594 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7595 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7596 *pp = kfor->next;
7597 if (kfor->inner)
7599 if (gimple_omp_for_grid_group_iter (for_stmt))
7601 struct omp_region **next_pp;
7602 for (pp = &kfor->inner; *pp; pp = next_pp)
7604 next_pp = &(*pp)->next;
7605 if ((*pp)->type != GIMPLE_OMP_FOR)
7606 continue;
7607 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7608 gcc_assert (gimple_omp_for_kind (inner)
7609 == GF_OMP_FOR_KIND_GRID_LOOP);
7610 grid_expand_omp_for_loop (*pp, true);
7611 *pp = (*pp)->next;
7612 next_pp = pp;
7615 expand_omp (kfor->inner);
7617 if (gpukernel->inner)
7618 expand_omp (gpukernel->inner);
7620 tree kern_fndecl = copy_node (orig_child_fndecl);
7621 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7622 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7623 tree tgtblock = gimple_block (tgt_stmt);
7624 tree fniniblock = make_node (BLOCK);
7625 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7626 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7627 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7628 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7629 DECL_INITIAL (kern_fndecl) = fniniblock;
7630 push_struct_function (kern_fndecl);
7631 cfun->function_end_locus = gimple_location (tgt_stmt);
7632 init_tree_ssa (cfun);
7633 pop_cfun ();
7635 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7636 gcc_assert (!DECL_CHAIN (old_parm_decl));
7637 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7638 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7639 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7640 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7641 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7642 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7643 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7644 kern_cfun->curr_properties = cfun->curr_properties;
7646 grid_expand_omp_for_loop (kfor, false);
7648 /* Remove the omp for statement. */
7649 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7650 gsi_remove (&gsi, true);
7651 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7652 return. */
7653 gsi = gsi_last_nondebug_bb (gpukernel->exit);
7654 gcc_assert (!gsi_end_p (gsi)
7655 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7656 gimple *ret_stmt = gimple_build_return (NULL);
7657 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7658 gsi_remove (&gsi, true);
7660 /* Statements in the first BB in the target construct have been produced by
7661 target lowering and must be copied inside the GPUKERNEL, with the two
7662 exceptions of the first OMP statement and the OMP_DATA assignment
7663 statement. */
7664 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7665 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7666 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7667 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7668 !gsi_end_p (tsi); gsi_next (&tsi))
7670 gimple *stmt = gsi_stmt (tsi);
7671 if (is_gimple_omp (stmt))
7672 break;
7673 if (sender
7674 && is_gimple_assign (stmt)
7675 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7676 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7677 continue;
7678 gimple *copy = gimple_copy (stmt);
7679 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7680 gimple_set_block (copy, fniniblock);
7683 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7684 gpukernel->exit, inside_block);
7686 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7687 kcn->mark_force_output ();
7688 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7690 hsa_register_kernel (kcn, orig_child);
7692 cgraph_node::add_new_function (kern_fndecl, true);
7693 push_cfun (kern_cfun);
7694 cgraph_edge::rebuild_edges ();
7696 /* Re-map any mention of the PARM_DECL of the original function to the
7697 PARM_DECL of the new one.
7699 TODO: It would be great if lowering produced references into the GPU
7700 kernel decl straight away and we did not have to do this. */
7701 struct grid_arg_decl_map adm;
7702 adm.old_arg = old_parm_decl;
7703 adm.new_arg = new_parm_decl;
7704 basic_block bb;
7705 FOR_EACH_BB_FN (bb, kern_cfun)
7707 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7709 gimple *stmt = gsi_stmt (gsi);
7710 struct walk_stmt_info wi;
7711 memset (&wi, 0, sizeof (wi));
7712 wi.info = &adm;
7713 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7716 pop_cfun ();
7718 return;
7721 /* Expand the parallel region tree rooted at REGION. Expansion
7722 proceeds in depth-first order. Innermost regions are expanded
7723 first. This way, parallel regions that require a new function to
7724 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7725 internal dependencies in their body. */
7727 static void
7728 expand_omp (struct omp_region *region)
7730 omp_any_child_fn_dumped = false;
7731 while (region)
7733 location_t saved_location;
7734 gimple *inner_stmt = NULL;
7736 /* First, determine whether this is a combined parallel+workshare
7737 region. */
7738 if (region->type == GIMPLE_OMP_PARALLEL)
7739 determine_parallel_type (region);
7740 else if (region->type == GIMPLE_OMP_TARGET)
7741 grid_expand_target_grid_body (region);
7743 if (region->type == GIMPLE_OMP_FOR
7744 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7745 inner_stmt = last_stmt (region->inner->entry);
7747 if (region->inner)
7748 expand_omp (region->inner);
7750 saved_location = input_location;
7751 if (gimple_has_location (last_stmt (region->entry)))
7752 input_location = gimple_location (last_stmt (region->entry));
7754 switch (region->type)
7756 case GIMPLE_OMP_PARALLEL:
7757 case GIMPLE_OMP_TASK:
7758 expand_omp_taskreg (region);
7759 break;
7761 case GIMPLE_OMP_FOR:
7762 expand_omp_for (region, inner_stmt);
7763 break;
7765 case GIMPLE_OMP_SECTIONS:
7766 expand_omp_sections (region);
7767 break;
7769 case GIMPLE_OMP_SECTION:
7770 /* Individual omp sections are handled together with their
7771 parent GIMPLE_OMP_SECTIONS region. */
7772 break;
7774 case GIMPLE_OMP_SINGLE:
7775 expand_omp_single (region);
7776 break;
7778 case GIMPLE_OMP_ORDERED:
7780 gomp_ordered *ord_stmt
7781 = as_a <gomp_ordered *> (last_stmt (region->entry));
7782 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7783 OMP_CLAUSE_DEPEND))
7785 /* We'll expand these when expanding corresponding
7786 worksharing region with ordered(n) clause. */
7787 gcc_assert (region->outer
7788 && region->outer->type == GIMPLE_OMP_FOR);
7789 region->ord_stmt = ord_stmt;
7790 break;
7793 /* FALLTHRU */
7794 case GIMPLE_OMP_MASTER:
7795 case GIMPLE_OMP_TASKGROUP:
7796 case GIMPLE_OMP_CRITICAL:
7797 case GIMPLE_OMP_TEAMS:
7798 expand_omp_synch (region);
7799 break;
7801 case GIMPLE_OMP_ATOMIC_LOAD:
7802 expand_omp_atomic (region);
7803 break;
7805 case GIMPLE_OMP_TARGET:
7806 expand_omp_target (region);
7807 break;
7809 default:
7810 gcc_unreachable ();
7813 input_location = saved_location;
7814 region = region->next;
7816 if (omp_any_child_fn_dumped)
7818 if (dump_file)
7819 dump_function_header (dump_file, current_function_decl, dump_flags);
7820 omp_any_child_fn_dumped = false;
7824 /* Helper for build_omp_regions. Scan the dominator tree starting at
7825 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7826 true, the function ends once a single tree is built (otherwise, whole
7827 forest of OMP constructs may be built). */
7829 static void
7830 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7831 bool single_tree)
7833 gimple_stmt_iterator gsi;
7834 gimple *stmt;
7835 basic_block son;
7837 gsi = gsi_last_nondebug_bb (bb);
7838 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7840 struct omp_region *region;
7841 enum gimple_code code;
7843 stmt = gsi_stmt (gsi);
7844 code = gimple_code (stmt);
7845 if (code == GIMPLE_OMP_RETURN)
7847 /* STMT is the return point out of region PARENT. Mark it
7848 as the exit point and make PARENT the immediately
7849 enclosing region. */
7850 gcc_assert (parent);
7851 region = parent;
7852 region->exit = bb;
7853 parent = parent->outer;
7855 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7857 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7858 GIMPLE_OMP_RETURN, but matches with
7859 GIMPLE_OMP_ATOMIC_LOAD. */
7860 gcc_assert (parent);
7861 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7862 region = parent;
7863 region->exit = bb;
7864 parent = parent->outer;
7866 else if (code == GIMPLE_OMP_CONTINUE)
7868 gcc_assert (parent);
7869 parent->cont = bb;
7871 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7873 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7874 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7876 else
7878 region = new_omp_region (bb, code, parent);
7879 /* Otherwise... */
7880 if (code == GIMPLE_OMP_TARGET)
7882 switch (gimple_omp_target_kind (stmt))
7884 case GF_OMP_TARGET_KIND_REGION:
7885 case GF_OMP_TARGET_KIND_DATA:
7886 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7887 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7888 case GF_OMP_TARGET_KIND_OACC_DATA:
7889 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7890 break;
7891 case GF_OMP_TARGET_KIND_UPDATE:
7892 case GF_OMP_TARGET_KIND_ENTER_DATA:
7893 case GF_OMP_TARGET_KIND_EXIT_DATA:
7894 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7895 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7896 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7897 /* ..., other than for those stand-alone directives... */
7898 region = NULL;
7899 break;
7900 default:
7901 gcc_unreachable ();
7904 else if (code == GIMPLE_OMP_ORDERED
7905 && omp_find_clause (gimple_omp_ordered_clauses
7906 (as_a <gomp_ordered *> (stmt)),
7907 OMP_CLAUSE_DEPEND))
7908 /* #pragma omp ordered depend is also just a stand-alone
7909 directive. */
7910 region = NULL;
7911 /* ..., this directive becomes the parent for a new region. */
7912 if (region)
7913 parent = region;
7917 if (single_tree && !parent)
7918 return;
7920 for (son = first_dom_son (CDI_DOMINATORS, bb);
7921 son;
7922 son = next_dom_son (CDI_DOMINATORS, son))
7923 build_omp_regions_1 (son, parent, single_tree);
7926 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7927 root_omp_region. */
7929 static void
7930 build_omp_regions_root (basic_block root)
7932 gcc_assert (root_omp_region == NULL);
7933 build_omp_regions_1 (root, NULL, true);
7934 gcc_assert (root_omp_region != NULL);
7937 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7939 void
7940 omp_expand_local (basic_block head)
7942 build_omp_regions_root (head);
7943 if (dump_file && (dump_flags & TDF_DETAILS))
7945 fprintf (dump_file, "\nOMP region tree\n\n");
7946 dump_omp_region (dump_file, root_omp_region, 0);
7947 fprintf (dump_file, "\n");
7950 remove_exit_barriers (root_omp_region);
7951 expand_omp (root_omp_region);
7953 omp_free_regions ();
7956 /* Scan the CFG and build a tree of OMP regions. Return the root of
7957 the OMP region tree. */
7959 static void
7960 build_omp_regions (void)
7962 gcc_assert (root_omp_region == NULL);
7963 calculate_dominance_info (CDI_DOMINATORS);
7964 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7967 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7969 static unsigned int
7970 execute_expand_omp (void)
7972 build_omp_regions ();
7974 if (!root_omp_region)
7975 return 0;
7977 if (dump_file)
7979 fprintf (dump_file, "\nOMP region tree\n\n");
7980 dump_omp_region (dump_file, root_omp_region, 0);
7981 fprintf (dump_file, "\n");
7984 remove_exit_barriers (root_omp_region);
7986 expand_omp (root_omp_region);
7988 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7989 verify_loop_structure ();
7990 cleanup_tree_cfg ();
7992 omp_free_regions ();
7994 return 0;
7997 /* OMP expansion -- the default pass, run before creation of SSA form. */
7999 namespace {
8001 const pass_data pass_data_expand_omp =
8003 GIMPLE_PASS, /* type */
8004 "ompexp", /* name */
8005 OPTGROUP_OMP, /* optinfo_flags */
8006 TV_NONE, /* tv_id */
8007 PROP_gimple_any, /* properties_required */
8008 PROP_gimple_eomp, /* properties_provided */
8009 0, /* properties_destroyed */
8010 0, /* todo_flags_start */
8011 0, /* todo_flags_finish */
8014 class pass_expand_omp : public gimple_opt_pass
8016 public:
8017 pass_expand_omp (gcc::context *ctxt)
8018 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8021 /* opt_pass methods: */
8022 virtual unsigned int execute (function *)
8024 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8025 || flag_openmp_simd != 0)
8026 && !seen_error ());
8028 /* This pass always runs, to provide PROP_gimple_eomp.
8029 But often, there is nothing to do. */
8030 if (!gate)
8031 return 0;
8033 return execute_expand_omp ();
8036 }; // class pass_expand_omp
8038 } // anon namespace
8040 gimple_opt_pass *
8041 make_pass_expand_omp (gcc::context *ctxt)
8043 return new pass_expand_omp (ctxt);
8046 namespace {
8048 const pass_data pass_data_expand_omp_ssa =
8050 GIMPLE_PASS, /* type */
8051 "ompexpssa", /* name */
8052 OPTGROUP_OMP, /* optinfo_flags */
8053 TV_NONE, /* tv_id */
8054 PROP_cfg | PROP_ssa, /* properties_required */
8055 PROP_gimple_eomp, /* properties_provided */
8056 0, /* properties_destroyed */
8057 0, /* todo_flags_start */
8058 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8061 class pass_expand_omp_ssa : public gimple_opt_pass
8063 public:
8064 pass_expand_omp_ssa (gcc::context *ctxt)
8065 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8068 /* opt_pass methods: */
8069 virtual bool gate (function *fun)
8071 return !(fun->curr_properties & PROP_gimple_eomp);
8073 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8074 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8076 }; // class pass_expand_omp_ssa
8078 } // anon namespace
8080 gimple_opt_pass *
8081 make_pass_expand_omp_ssa (gcc::context *ctxt)
8083 return new pass_expand_omp_ssa (ctxt);
8086 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8087 GIMPLE_* codes. */
8089 bool
8090 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8091 int *region_idx)
8093 gimple *last = last_stmt (bb);
8094 enum gimple_code code = gimple_code (last);
8095 struct omp_region *cur_region = *region;
8096 bool fallthru = false;
8098 switch (code)
8100 case GIMPLE_OMP_PARALLEL:
8101 case GIMPLE_OMP_TASK:
8102 case GIMPLE_OMP_FOR:
8103 case GIMPLE_OMP_SINGLE:
8104 case GIMPLE_OMP_TEAMS:
8105 case GIMPLE_OMP_MASTER:
8106 case GIMPLE_OMP_TASKGROUP:
8107 case GIMPLE_OMP_CRITICAL:
8108 case GIMPLE_OMP_SECTION:
8109 case GIMPLE_OMP_GRID_BODY:
8110 cur_region = new_omp_region (bb, code, cur_region);
8111 fallthru = true;
8112 break;
8114 case GIMPLE_OMP_ORDERED:
8115 cur_region = new_omp_region (bb, code, cur_region);
8116 fallthru = true;
8117 if (omp_find_clause (gimple_omp_ordered_clauses
8118 (as_a <gomp_ordered *> (last)),
8119 OMP_CLAUSE_DEPEND))
8120 cur_region = cur_region->outer;
8121 break;
8123 case GIMPLE_OMP_TARGET:
8124 cur_region = new_omp_region (bb, code, cur_region);
8125 fallthru = true;
8126 switch (gimple_omp_target_kind (last))
8128 case GF_OMP_TARGET_KIND_REGION:
8129 case GF_OMP_TARGET_KIND_DATA:
8130 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8131 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8132 case GF_OMP_TARGET_KIND_OACC_DATA:
8133 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8134 break;
8135 case GF_OMP_TARGET_KIND_UPDATE:
8136 case GF_OMP_TARGET_KIND_ENTER_DATA:
8137 case GF_OMP_TARGET_KIND_EXIT_DATA:
8138 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8139 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8140 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8141 cur_region = cur_region->outer;
8142 break;
8143 default:
8144 gcc_unreachable ();
8146 break;
8148 case GIMPLE_OMP_SECTIONS:
8149 cur_region = new_omp_region (bb, code, cur_region);
8150 fallthru = true;
8151 break;
8153 case GIMPLE_OMP_SECTIONS_SWITCH:
8154 fallthru = false;
8155 break;
8157 case GIMPLE_OMP_ATOMIC_LOAD:
8158 case GIMPLE_OMP_ATOMIC_STORE:
8159 fallthru = true;
8160 break;
8162 case GIMPLE_OMP_RETURN:
8163 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8164 somewhere other than the next block. This will be
8165 created later. */
8166 cur_region->exit = bb;
8167 if (cur_region->type == GIMPLE_OMP_TASK)
8168 /* Add an edge corresponding to not scheduling the task
8169 immediately. */
8170 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8171 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8172 cur_region = cur_region->outer;
8173 break;
8175 case GIMPLE_OMP_CONTINUE:
8176 cur_region->cont = bb;
8177 switch (cur_region->type)
8179 case GIMPLE_OMP_FOR:
8180 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8181 succs edges as abnormal to prevent splitting
8182 them. */
8183 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8184 /* Make the loopback edge. */
8185 make_edge (bb, single_succ (cur_region->entry),
8186 EDGE_ABNORMAL);
8188 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8189 corresponds to the case that the body of the loop
8190 is not executed at all. */
8191 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8192 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8193 fallthru = false;
8194 break;
8196 case GIMPLE_OMP_SECTIONS:
8197 /* Wire up the edges into and out of the nested sections. */
8199 basic_block switch_bb = single_succ (cur_region->entry);
8201 struct omp_region *i;
8202 for (i = cur_region->inner; i ; i = i->next)
8204 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8205 make_edge (switch_bb, i->entry, 0);
8206 make_edge (i->exit, bb, EDGE_FALLTHRU);
8209 /* Make the loopback edge to the block with
8210 GIMPLE_OMP_SECTIONS_SWITCH. */
8211 make_edge (bb, switch_bb, 0);
8213 /* Make the edge from the switch to exit. */
8214 make_edge (switch_bb, bb->next_bb, 0);
8215 fallthru = false;
8217 break;
8219 case GIMPLE_OMP_TASK:
8220 fallthru = true;
8221 break;
8223 default:
8224 gcc_unreachable ();
8226 break;
8228 default:
8229 gcc_unreachable ();
8232 if (*region != cur_region)
8234 *region = cur_region;
8235 if (cur_region)
8236 *region_idx = cur_region->entry->index;
8237 else
8238 *region_idx = 0;
8241 return fallthru;
8244 #include "gt-omp-expand.h"