* doc/invoke.texi: Document -ftree-loop-distribution for O3.
[official-gcc.git] / gcc / omp-expand.c
blobac83ba168d23ab7bf387cadf8d786a3bf6989b53
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "debug.h"
61 #include "stringpool.h"
62 #include "attribs.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
106 a depend clause. */
107 gomp_ordered *ord_stmt;
110 static struct omp_region *root_omp_region;
111 static bool omp_any_child_fn_dumped;
113 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
114 bool = false);
115 static gphi *find_phi_with_arg_on_edge (tree, edge);
116 static void expand_omp (struct omp_region *region);
118 /* Return true if REGION is a combined parallel+workshare region. */
120 static inline bool
121 is_combined_parallel (struct omp_region *region)
123 return region->is_combined_parallel;
126 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
127 is the immediate dominator of PAR_ENTRY_BB, return true if there
128 are no data dependencies that would prevent expanding the parallel
129 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131 When expanding a combined parallel+workshare region, the call to
132 the child function may need additional arguments in the case of
133 GIMPLE_OMP_FOR regions. In some cases, these arguments are
134 computed out of variables passed in from the parent to the child
135 via 'struct .omp_data_s'. For instance:
137 #pragma omp parallel for schedule (guided, i * 4)
138 for (j ...)
140 Is lowered into:
142 # BLOCK 2 (PAR_ENTRY_BB)
143 .omp_data_o.i = i;
144 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146 # BLOCK 3 (WS_ENTRY_BB)
147 .omp_data_i = &.omp_data_o;
148 D.1667 = .omp_data_i->i;
149 D.1598 = D.1667 * 4;
150 #pragma omp for schedule (guided, D.1598)
152 When we outline the parallel region, the call to the child function
153 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
154 that value is computed *after* the call site. So, in principle we
155 cannot do the transformation.
157 To see whether the code in WS_ENTRY_BB blocks the combined
158 parallel+workshare call, we collect all the variables used in the
159 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
160 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 call.
163 FIXME. If we had the SSA form built at this point, we could merely
164 hoist the code in block 3 into block 2 and be done with it. But at
165 this point we don't have dataflow information and though we could
166 hack something up here, it is really not worth the aggravation. */
168 static bool
169 workshare_safe_to_combine_p (basic_block ws_entry_bb)
171 struct omp_for_data fd;
172 gimple *ws_stmt = last_stmt (ws_entry_bb);
174 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
175 return true;
177 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
197 return true;
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
206 if (!simd_schedule)
207 return chunk_size;
209 int vf = omp_max_vf ();
210 if (vf == 1)
211 return chunk_size;
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
233 struct omp_for_data fd;
234 tree n1, n2;
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
240 if (gimple_omp_for_combined_into_p (for_stmt))
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
264 if (fd.chunk_size)
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
271 return ws_args;
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
285 gcc_unreachable ();
288 /* Discover whether REGION is a combined parallel+workshare region. */
290 static void
291 determine_parallel_type (struct omp_region *region)
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
314 if (single_succ (par_entry_bb) == ws_entry_bb
315 && single_succ (ws_exit_bb) == par_exit_bb
316 && workshare_safe_to_combine_p (ws_entry_bb)
317 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
318 || (last_and_only_stmt (ws_entry_bb)
319 && last_and_only_stmt (par_exit_bb))))
321 gimple *par_stmt = last_stmt (par_entry_bb);
322 gimple *ws_stmt = last_stmt (ws_entry_bb);
324 if (region->inner->type == GIMPLE_OMP_FOR)
326 /* If this is a combined parallel loop, we need to determine
327 whether or not to use the combined library calls. There
328 are two cases where we do not apply the transformation:
329 static loops and any kind of ordered loop. In the first
330 case, we already open code the loop so there is no need
331 to do anything else. In the latter case, the combined
332 parallel loop call would still need extra synchronization
333 to implement ordered semantics, so there would not be any
334 gain in using the combined call. */
335 tree clauses = gimple_omp_for_clauses (ws_stmt);
336 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
337 if (c == NULL
338 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
339 == OMP_CLAUSE_SCHEDULE_STATIC)
340 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
342 region->is_combined_parallel = false;
343 region->inner->is_combined_parallel = false;
344 return;
348 region->is_combined_parallel = true;
349 region->inner->is_combined_parallel = true;
350 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
354 /* Debugging dumps for parallel regions. */
355 void dump_omp_region (FILE *, struct omp_region *, int);
356 void debug_omp_region (struct omp_region *);
357 void debug_all_omp_regions (void);
359 /* Dump the parallel region tree rooted at REGION. */
361 void
362 dump_omp_region (FILE *file, struct omp_region *region, int indent)
364 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
365 gimple_code_name[region->type]);
367 if (region->inner)
368 dump_omp_region (file, region->inner, indent + 4);
370 if (region->cont)
372 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
373 region->cont->index);
376 if (region->exit)
377 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
378 region->exit->index);
379 else
380 fprintf (file, "%*s[no exit marker]\n", indent, "");
382 if (region->next)
383 dump_omp_region (file, region->next, indent);
386 DEBUG_FUNCTION void
387 debug_omp_region (struct omp_region *region)
389 dump_omp_region (stderr, region, 0);
392 DEBUG_FUNCTION void
393 debug_all_omp_regions (void)
395 dump_omp_region (stderr, root_omp_region, 0);
398 /* Create a new parallel region starting at STMT inside region PARENT. */
400 static struct omp_region *
401 new_omp_region (basic_block bb, enum gimple_code type,
402 struct omp_region *parent)
404 struct omp_region *region = XCNEW (struct omp_region);
406 region->outer = parent;
407 region->entry = bb;
408 region->type = type;
410 if (parent)
412 /* This is a nested region. Add it to the list of inner
413 regions in PARENT. */
414 region->next = parent->inner;
415 parent->inner = region;
417 else
419 /* This is a toplevel region. Add it to the list of toplevel
420 regions in ROOT_OMP_REGION. */
421 region->next = root_omp_region;
422 root_omp_region = region;
425 return region;
428 /* Release the memory associated with the region tree rooted at REGION. */
430 static void
431 free_omp_region_1 (struct omp_region *region)
433 struct omp_region *i, *n;
435 for (i = region->inner; i ; i = n)
437 n = i->next;
438 free_omp_region_1 (i);
441 free (region);
444 /* Release the memory for the entire omp region tree. */
446 void
447 omp_free_regions (void)
449 struct omp_region *r, *n;
450 for (r = root_omp_region; r ; r = n)
452 n = r->next;
453 free_omp_region_1 (r);
455 root_omp_region = NULL;
458 /* A convenience function to build an empty GIMPLE_COND with just the
459 condition. */
461 static gcond *
462 gimple_build_cond_empty (tree cond)
464 enum tree_code pred_code;
465 tree lhs, rhs;
467 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
468 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
471 /* Return true if a parallel REGION is within a declare target function or
472 within a target region and is not a part of a gridified target. */
474 static bool
475 parallel_needs_hsa_kernel_p (struct omp_region *region)
477 bool indirect = false;
478 for (region = region->outer; region; region = region->outer)
480 if (region->type == GIMPLE_OMP_PARALLEL)
481 indirect = true;
482 else if (region->type == GIMPLE_OMP_TARGET)
484 gomp_target *tgt_stmt
485 = as_a <gomp_target *> (last_stmt (region->entry));
487 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
488 OMP_CLAUSE__GRIDDIM_))
489 return indirect;
490 else
491 return true;
495 if (lookup_attribute ("omp declare target",
496 DECL_ATTRIBUTES (current_function_decl)))
497 return true;
499 return false;
502 /* Build the function calls to GOMP_parallel_start etc to actually
503 generate the parallel operation. REGION is the parallel region
504 being expanded. BB is the block where to insert the code. WS_ARGS
505 will be set if this is a call to a combined parallel+workshare
506 construct, it contains the list of additional arguments needed by
507 the workshare construct. */
509 static void
510 expand_parallel_call (struct omp_region *region, basic_block bb,
511 gomp_parallel *entry_stmt,
512 vec<tree, va_gc> *ws_args)
514 tree t, t1, t2, val, cond, c, clauses, flags;
515 gimple_stmt_iterator gsi;
516 gimple *stmt;
517 enum built_in_function start_ix;
518 int start_ix2;
519 location_t clause_loc;
520 vec<tree, va_gc> *args;
522 clauses = gimple_omp_parallel_clauses (entry_stmt);
524 /* Determine what flavor of GOMP_parallel we will be
525 emitting. */
526 start_ix = BUILT_IN_GOMP_PARALLEL;
527 if (is_combined_parallel (region))
529 switch (region->inner->type)
531 case GIMPLE_OMP_FOR:
532 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
533 switch (region->inner->sched_kind)
535 case OMP_CLAUSE_SCHEDULE_RUNTIME:
536 start_ix2 = 3;
537 break;
538 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
539 case OMP_CLAUSE_SCHEDULE_GUIDED:
540 if (region->inner->sched_modifiers
541 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
543 start_ix2 = 3 + region->inner->sched_kind;
544 break;
546 /* FALLTHRU */
547 default:
548 start_ix2 = region->inner->sched_kind;
549 break;
551 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
552 start_ix = (enum built_in_function) start_ix2;
553 break;
554 case GIMPLE_OMP_SECTIONS:
555 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
556 break;
557 default:
558 gcc_unreachable ();
562 /* By default, the value of NUM_THREADS is zero (selected at run time)
563 and there is no conditional. */
564 cond = NULL_TREE;
565 val = build_int_cst (unsigned_type_node, 0);
566 flags = build_int_cst (unsigned_type_node, 0);
568 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
569 if (c)
570 cond = OMP_CLAUSE_IF_EXPR (c);
572 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
573 if (c)
575 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
576 clause_loc = OMP_CLAUSE_LOCATION (c);
578 else
579 clause_loc = gimple_location (entry_stmt);
581 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
582 if (c)
583 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
585 /* Ensure 'val' is of the correct type. */
586 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
588 /* If we found the clause 'if (cond)', build either
589 (cond != 0) or (cond ? val : 1u). */
590 if (cond)
592 cond = gimple_boolify (cond);
594 if (integer_zerop (val))
595 val = fold_build2_loc (clause_loc,
596 EQ_EXPR, unsigned_type_node, cond,
597 build_int_cst (TREE_TYPE (cond), 0));
598 else
600 basic_block cond_bb, then_bb, else_bb;
601 edge e, e_then, e_else;
602 tree tmp_then, tmp_else, tmp_join, tmp_var;
604 tmp_var = create_tmp_var (TREE_TYPE (val));
605 if (gimple_in_ssa_p (cfun))
607 tmp_then = make_ssa_name (tmp_var);
608 tmp_else = make_ssa_name (tmp_var);
609 tmp_join = make_ssa_name (tmp_var);
611 else
613 tmp_then = tmp_var;
614 tmp_else = tmp_var;
615 tmp_join = tmp_var;
618 e = split_block_after_labels (bb);
619 cond_bb = e->src;
620 bb = e->dest;
621 remove_edge (e);
623 then_bb = create_empty_bb (cond_bb);
624 else_bb = create_empty_bb (then_bb);
625 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
626 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
628 stmt = gimple_build_cond_empty (cond);
629 gsi = gsi_start_bb (cond_bb);
630 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
632 gsi = gsi_start_bb (then_bb);
633 expand_omp_build_assign (&gsi, tmp_then, val, true);
635 gsi = gsi_start_bb (else_bb);
636 expand_omp_build_assign (&gsi, tmp_else,
637 build_int_cst (unsigned_type_node, 1),
638 true);
640 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
641 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
642 add_bb_to_loop (then_bb, cond_bb->loop_father);
643 add_bb_to_loop (else_bb, cond_bb->loop_father);
644 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
645 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
647 if (gimple_in_ssa_p (cfun))
649 gphi *phi = create_phi_node (tmp_join, bb);
650 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
651 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
654 val = tmp_join;
657 gsi = gsi_start_bb (bb);
658 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
659 false, GSI_CONTINUE_LINKING);
662 gsi = gsi_last_bb (bb);
663 t = gimple_omp_parallel_data_arg (entry_stmt);
664 if (t == NULL)
665 t1 = null_pointer_node;
666 else
667 t1 = build_fold_addr_expr (t);
668 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
669 t2 = build_fold_addr_expr (child_fndecl);
671 vec_alloc (args, 4 + vec_safe_length (ws_args));
672 args->quick_push (t2);
673 args->quick_push (t1);
674 args->quick_push (val);
675 if (ws_args)
676 args->splice (*ws_args);
677 args->quick_push (flags);
679 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
680 builtin_decl_explicit (start_ix), args);
682 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
683 false, GSI_CONTINUE_LINKING);
685 if (hsa_gen_requested_p ()
686 && parallel_needs_hsa_kernel_p (region))
688 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
689 hsa_register_kernel (child_cnode);
693 /* Insert a function call whose name is FUNC_NAME with the information from
694 ENTRY_STMT into the basic_block BB. */
696 static void
697 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
698 vec <tree, va_gc> *ws_args)
700 tree t, t1, t2;
701 gimple_stmt_iterator gsi;
702 vec <tree, va_gc> *args;
704 gcc_assert (vec_safe_length (ws_args) == 2);
705 tree func_name = (*ws_args)[0];
706 tree grain = (*ws_args)[1];
708 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
709 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
710 gcc_assert (count != NULL_TREE);
711 count = OMP_CLAUSE_OPERAND (count, 0);
713 gsi = gsi_last_bb (bb);
714 t = gimple_omp_parallel_data_arg (entry_stmt);
715 if (t == NULL)
716 t1 = null_pointer_node;
717 else
718 t1 = build_fold_addr_expr (t);
719 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
721 vec_alloc (args, 4);
722 args->quick_push (t2);
723 args->quick_push (t1);
724 args->quick_push (count);
725 args->quick_push (grain);
726 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
728 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
729 GSI_CONTINUE_LINKING);
732 /* Build the function call to GOMP_task to actually
733 generate the task operation. BB is the block where to insert the code. */
735 static void
736 expand_task_call (struct omp_region *region, basic_block bb,
737 gomp_task *entry_stmt)
739 tree t1, t2, t3;
740 gimple_stmt_iterator gsi;
741 location_t loc = gimple_location (entry_stmt);
743 tree clauses = gimple_omp_task_clauses (entry_stmt);
745 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
746 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
747 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
748 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
749 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
750 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
752 unsigned int iflags
753 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
754 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
755 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
757 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
758 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
759 tree num_tasks = NULL_TREE;
760 bool ull = false;
761 if (taskloop_p)
763 gimple *g = last_stmt (region->outer->entry);
764 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
765 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
766 struct omp_for_data fd;
767 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
768 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
769 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
770 OMP_CLAUSE__LOOPTEMP_);
771 startvar = OMP_CLAUSE_DECL (startvar);
772 endvar = OMP_CLAUSE_DECL (endvar);
773 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
774 if (fd.loop.cond_code == LT_EXPR)
775 iflags |= GOMP_TASK_FLAG_UP;
776 tree tclauses = gimple_omp_for_clauses (g);
777 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
778 if (num_tasks)
779 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
780 else
782 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
783 if (num_tasks)
785 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
786 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
788 else
789 num_tasks = integer_zero_node;
791 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
792 if (ifc == NULL_TREE)
793 iflags |= GOMP_TASK_FLAG_IF;
794 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
795 iflags |= GOMP_TASK_FLAG_NOGROUP;
796 ull = fd.iter_type == long_long_unsigned_type_node;
798 else if (priority)
799 iflags |= GOMP_TASK_FLAG_PRIORITY;
801 tree flags = build_int_cst (unsigned_type_node, iflags);
803 tree cond = boolean_true_node;
804 if (ifc)
806 if (taskloop_p)
808 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
809 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
810 build_int_cst (unsigned_type_node,
811 GOMP_TASK_FLAG_IF),
812 build_int_cst (unsigned_type_node, 0));
813 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
814 flags, t);
816 else
817 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
820 if (finalc)
822 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
823 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
824 build_int_cst (unsigned_type_node,
825 GOMP_TASK_FLAG_FINAL),
826 build_int_cst (unsigned_type_node, 0));
827 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
829 if (depend)
830 depend = OMP_CLAUSE_DECL (depend);
831 else
832 depend = build_int_cst (ptr_type_node, 0);
833 if (priority)
834 priority = fold_convert (integer_type_node,
835 OMP_CLAUSE_PRIORITY_EXPR (priority));
836 else
837 priority = integer_zero_node;
839 gsi = gsi_last_bb (bb);
840 tree t = gimple_omp_task_data_arg (entry_stmt);
841 if (t == NULL)
842 t2 = null_pointer_node;
843 else
844 t2 = build_fold_addr_expr_loc (loc, t);
845 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
846 t = gimple_omp_task_copy_fn (entry_stmt);
847 if (t == NULL)
848 t3 = null_pointer_node;
849 else
850 t3 = build_fold_addr_expr_loc (loc, t);
852 if (taskloop_p)
853 t = build_call_expr (ull
854 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
855 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
856 11, t1, t2, t3,
857 gimple_omp_task_arg_size (entry_stmt),
858 gimple_omp_task_arg_align (entry_stmt), flags,
859 num_tasks, priority, startvar, endvar, step);
860 else
861 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
862 9, t1, t2, t3,
863 gimple_omp_task_arg_size (entry_stmt),
864 gimple_omp_task_arg_align (entry_stmt), cond, flags,
865 depend, priority);
867 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
868 false, GSI_CONTINUE_LINKING);
871 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
873 static tree
874 vec2chain (vec<tree, va_gc> *v)
876 tree chain = NULL_TREE, t;
877 unsigned ix;
879 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
881 DECL_CHAIN (t) = chain;
882 chain = t;
885 return chain;
888 /* Remove barriers in REGION->EXIT's block. Note that this is only
889 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
890 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
891 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
892 removed. */
894 static void
895 remove_exit_barrier (struct omp_region *region)
897 gimple_stmt_iterator gsi;
898 basic_block exit_bb;
899 edge_iterator ei;
900 edge e;
901 gimple *stmt;
902 int any_addressable_vars = -1;
904 exit_bb = region->exit;
906 /* If the parallel region doesn't return, we don't have REGION->EXIT
907 block at all. */
908 if (! exit_bb)
909 return;
911 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
912 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
913 statements that can appear in between are extremely limited -- no
914 memory operations at all. Here, we allow nothing at all, so the
915 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
916 gsi = gsi_last_bb (exit_bb);
917 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
918 gsi_prev (&gsi);
919 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
920 return;
922 FOR_EACH_EDGE (e, ei, exit_bb->preds)
924 gsi = gsi_last_bb (e->src);
925 if (gsi_end_p (gsi))
926 continue;
927 stmt = gsi_stmt (gsi);
928 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
929 && !gimple_omp_return_nowait_p (stmt))
931 /* OpenMP 3.0 tasks unfortunately prevent this optimization
932 in many cases. If there could be tasks queued, the barrier
933 might be needed to let the tasks run before some local
934 variable of the parallel that the task uses as shared
935 runs out of scope. The task can be spawned either
936 from within current function (this would be easy to check)
937 or from some function it calls and gets passed an address
938 of such a variable. */
939 if (any_addressable_vars < 0)
941 gomp_parallel *parallel_stmt
942 = as_a <gomp_parallel *> (last_stmt (region->entry));
943 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
944 tree local_decls, block, decl;
945 unsigned ix;
947 any_addressable_vars = 0;
948 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
949 if (TREE_ADDRESSABLE (decl))
951 any_addressable_vars = 1;
952 break;
954 for (block = gimple_block (stmt);
955 !any_addressable_vars
956 && block
957 && TREE_CODE (block) == BLOCK;
958 block = BLOCK_SUPERCONTEXT (block))
960 for (local_decls = BLOCK_VARS (block);
961 local_decls;
962 local_decls = DECL_CHAIN (local_decls))
963 if (TREE_ADDRESSABLE (local_decls))
965 any_addressable_vars = 1;
966 break;
968 if (block == gimple_block (parallel_stmt))
969 break;
972 if (!any_addressable_vars)
973 gimple_omp_return_set_nowait (stmt);
978 static void
979 remove_exit_barriers (struct omp_region *region)
981 if (region->type == GIMPLE_OMP_PARALLEL)
982 remove_exit_barrier (region);
984 if (region->inner)
986 region = region->inner;
987 remove_exit_barriers (region);
988 while (region->next)
990 region = region->next;
991 remove_exit_barriers (region);
996 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
997 calls. These can't be declared as const functions, but
998 within one parallel body they are constant, so they can be
999 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1000 which are declared const. Similarly for task body, except
1001 that in untied task omp_get_thread_num () can change at any task
1002 scheduling point. */
1004 static void
1005 optimize_omp_library_calls (gimple *entry_stmt)
1007 basic_block bb;
1008 gimple_stmt_iterator gsi;
1009 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1010 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1011 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1012 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1013 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1014 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1015 OMP_CLAUSE_UNTIED) != NULL);
1017 FOR_EACH_BB_FN (bb, cfun)
1018 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1020 gimple *call = gsi_stmt (gsi);
1021 tree decl;
1023 if (is_gimple_call (call)
1024 && (decl = gimple_call_fndecl (call))
1025 && DECL_EXTERNAL (decl)
1026 && TREE_PUBLIC (decl)
1027 && DECL_INITIAL (decl) == NULL)
1029 tree built_in;
1031 if (DECL_NAME (decl) == thr_num_id)
1033 /* In #pragma omp task untied omp_get_thread_num () can change
1034 during the execution of the task region. */
1035 if (untied_task)
1036 continue;
1037 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1039 else if (DECL_NAME (decl) == num_thr_id)
1040 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1041 else
1042 continue;
1044 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1045 || gimple_call_num_args (call) != 0)
1046 continue;
1048 if (flag_exceptions && !TREE_NOTHROW (decl))
1049 continue;
1051 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1052 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1053 TREE_TYPE (TREE_TYPE (built_in))))
1054 continue;
1056 gimple_call_set_fndecl (call, built_in);
1061 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1062 regimplified. */
1064 static tree
1065 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1067 tree t = *tp;
1069 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1070 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1071 return t;
1073 if (TREE_CODE (t) == ADDR_EXPR)
1074 recompute_tree_invariant_for_addr_expr (t);
1076 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1077 return NULL_TREE;
1080 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1082 static void
1083 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1084 bool after)
1086 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1087 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1088 !after, after ? GSI_CONTINUE_LINKING
1089 : GSI_SAME_STMT);
1090 gimple *stmt = gimple_build_assign (to, from);
1091 if (after)
1092 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1093 else
1094 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1095 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1096 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1098 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1099 gimple_regimplify_operands (stmt, &gsi);
1103 /* Expand the OpenMP parallel or task directive starting at REGION. */
1105 static void
1106 expand_omp_taskreg (struct omp_region *region)
1108 basic_block entry_bb, exit_bb, new_bb;
1109 struct function *child_cfun;
1110 tree child_fn, block, t;
1111 gimple_stmt_iterator gsi;
1112 gimple *entry_stmt, *stmt;
1113 edge e;
1114 vec<tree, va_gc> *ws_args;
1116 entry_stmt = last_stmt (region->entry);
1117 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1118 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1120 entry_bb = region->entry;
1121 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1122 exit_bb = region->cont;
1123 else
1124 exit_bb = region->exit;
1126 bool is_cilk_for
1127 = (flag_cilkplus
1128 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1129 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1130 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1132 if (is_cilk_for)
1133 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1134 and the inner statement contains the name of the built-in function
1135 and grain. */
1136 ws_args = region->inner->ws_args;
1137 else if (is_combined_parallel (region))
1138 ws_args = region->ws_args;
1139 else
1140 ws_args = NULL;
1142 if (child_cfun->cfg)
1144 /* Due to inlining, it may happen that we have already outlined
1145 the region, in which case all we need to do is make the
1146 sub-graph unreachable and emit the parallel call. */
1147 edge entry_succ_e, exit_succ_e;
1149 entry_succ_e = single_succ_edge (entry_bb);
1151 gsi = gsi_last_bb (entry_bb);
1152 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1153 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1154 gsi_remove (&gsi, true);
1156 new_bb = entry_bb;
1157 if (exit_bb)
1159 exit_succ_e = single_succ_edge (exit_bb);
1160 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1162 remove_edge_and_dominated_blocks (entry_succ_e);
1164 else
1166 unsigned srcidx, dstidx, num;
1168 /* If the parallel region needs data sent from the parent
1169 function, then the very first statement (except possible
1170 tree profile counter updates) of the parallel body
1171 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1172 &.OMP_DATA_O is passed as an argument to the child function,
1173 we need to replace it with the argument as seen by the child
1174 function.
1176 In most cases, this will end up being the identity assignment
1177 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1178 a function call that has been inlined, the original PARM_DECL
1179 .OMP_DATA_I may have been converted into a different local
1180 variable. In which case, we need to keep the assignment. */
1181 if (gimple_omp_taskreg_data_arg (entry_stmt))
1183 basic_block entry_succ_bb
1184 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1185 : FALLTHRU_EDGE (entry_bb)->dest;
1186 tree arg;
1187 gimple *parcopy_stmt = NULL;
1189 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1191 gimple *stmt;
1193 gcc_assert (!gsi_end_p (gsi));
1194 stmt = gsi_stmt (gsi);
1195 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1196 continue;
1198 if (gimple_num_ops (stmt) == 2)
1200 tree arg = gimple_assign_rhs1 (stmt);
1202 /* We're ignore the subcode because we're
1203 effectively doing a STRIP_NOPS. */
1205 if (TREE_CODE (arg) == ADDR_EXPR
1206 && TREE_OPERAND (arg, 0)
1207 == gimple_omp_taskreg_data_arg (entry_stmt))
1209 parcopy_stmt = stmt;
1210 break;
1215 gcc_assert (parcopy_stmt != NULL);
1216 arg = DECL_ARGUMENTS (child_fn);
1218 if (!gimple_in_ssa_p (cfun))
1220 if (gimple_assign_lhs (parcopy_stmt) == arg)
1221 gsi_remove (&gsi, true);
1222 else
1224 /* ?? Is setting the subcode really necessary ?? */
1225 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1229 else
1231 tree lhs = gimple_assign_lhs (parcopy_stmt);
1232 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1233 /* We'd like to set the rhs to the default def in the child_fn,
1234 but it's too early to create ssa names in the child_fn.
1235 Instead, we set the rhs to the parm. In
1236 move_sese_region_to_fn, we introduce a default def for the
1237 parm, map the parm to it's default def, and once we encounter
1238 this stmt, replace the parm with the default def. */
1239 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1240 update_stmt (parcopy_stmt);
1244 /* Declare local variables needed in CHILD_CFUN. */
1245 block = DECL_INITIAL (child_fn);
1246 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1247 /* The gimplifier could record temporaries in parallel/task block
1248 rather than in containing function's local_decls chain,
1249 which would mean cgraph missed finalizing them. Do it now. */
1250 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1251 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1252 varpool_node::finalize_decl (t);
1253 DECL_SAVED_TREE (child_fn) = NULL;
1254 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1255 gimple_set_body (child_fn, NULL);
1256 TREE_USED (block) = 1;
1258 /* Reset DECL_CONTEXT on function arguments. */
1259 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1260 DECL_CONTEXT (t) = child_fn;
1262 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263 so that it can be moved to the child function. */
1264 gsi = gsi_last_bb (entry_bb);
1265 stmt = gsi_stmt (gsi);
1266 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1267 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1268 e = split_block (entry_bb, stmt);
1269 gsi_remove (&gsi, true);
1270 entry_bb = e->dest;
1271 edge e2 = NULL;
1272 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1273 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1274 else
1276 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1277 gcc_assert (e2->dest == region->exit);
1278 remove_edge (BRANCH_EDGE (entry_bb));
1279 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1280 gsi = gsi_last_bb (region->exit);
1281 gcc_assert (!gsi_end_p (gsi)
1282 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1283 gsi_remove (&gsi, true);
1286 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1287 if (exit_bb)
1289 gsi = gsi_last_bb (exit_bb);
1290 gcc_assert (!gsi_end_p (gsi)
1291 && (gimple_code (gsi_stmt (gsi))
1292 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1293 stmt = gimple_build_return (NULL);
1294 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1295 gsi_remove (&gsi, true);
1298 /* Move the parallel region into CHILD_CFUN. */
1300 if (gimple_in_ssa_p (cfun))
1302 init_tree_ssa (child_cfun);
1303 init_ssa_operands (child_cfun);
1304 child_cfun->gimple_df->in_ssa_p = true;
1305 block = NULL_TREE;
1307 else
1308 block = gimple_block (entry_stmt);
1310 /* Make sure to generate early debug for the function before
1311 outlining anything. */
1312 if (! gimple_in_ssa_p (cfun))
1313 (*debug_hooks->early_global_decl) (cfun->decl);
1315 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1316 if (exit_bb)
1317 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1318 if (e2)
1320 basic_block dest_bb = e2->dest;
1321 if (!exit_bb)
1322 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1323 remove_edge (e2);
1324 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1326 /* When the OMP expansion process cannot guarantee an up-to-date
1327 loop tree arrange for the child function to fixup loops. */
1328 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1329 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1331 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1332 num = vec_safe_length (child_cfun->local_decls);
1333 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1335 t = (*child_cfun->local_decls)[srcidx];
1336 if (DECL_CONTEXT (t) == cfun->decl)
1337 continue;
1338 if (srcidx != dstidx)
1339 (*child_cfun->local_decls)[dstidx] = t;
1340 dstidx++;
1342 if (dstidx != num)
1343 vec_safe_truncate (child_cfun->local_decls, dstidx);
1345 /* Inform the callgraph about the new function. */
1346 child_cfun->curr_properties = cfun->curr_properties;
1347 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1348 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1349 cgraph_node *node = cgraph_node::get_create (child_fn);
1350 node->parallelized_function = 1;
1351 cgraph_node::add_new_function (child_fn, true);
1353 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1354 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1356 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1357 fixed in a following pass. */
1358 push_cfun (child_cfun);
1359 if (need_asm)
1360 assign_assembler_name_if_needed (child_fn);
1362 if (optimize)
1363 optimize_omp_library_calls (entry_stmt);
1364 cgraph_edge::rebuild_edges ();
1366 /* Some EH regions might become dead, see PR34608. If
1367 pass_cleanup_cfg isn't the first pass to happen with the
1368 new child, these dead EH edges might cause problems.
1369 Clean them up now. */
1370 if (flag_exceptions)
1372 basic_block bb;
1373 bool changed = false;
1375 FOR_EACH_BB_FN (bb, cfun)
1376 changed |= gimple_purge_dead_eh_edges (bb);
1377 if (changed)
1378 cleanup_tree_cfg ();
1380 if (gimple_in_ssa_p (cfun))
1381 update_ssa (TODO_update_ssa);
1382 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1383 verify_loop_structure ();
1384 pop_cfun ();
1386 if (dump_file && !gimple_in_ssa_p (cfun))
1388 omp_any_child_fn_dumped = true;
1389 dump_function_header (dump_file, child_fn, dump_flags);
1390 dump_function_to_file (child_fn, dump_file, dump_flags);
1394 /* Emit a library call to launch the children threads. */
1395 if (is_cilk_for)
1396 expand_cilk_for_call (new_bb,
1397 as_a <gomp_parallel *> (entry_stmt), ws_args);
1398 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1399 expand_parallel_call (region, new_bb,
1400 as_a <gomp_parallel *> (entry_stmt), ws_args);
1401 else
1402 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1403 if (gimple_in_ssa_p (cfun))
1404 update_ssa (TODO_update_ssa_only_virtuals);
1407 /* Information about members of an OpenACC collapsed loop nest. */
1409 struct oacc_collapse
1411 tree base; /* Base value. */
1412 tree iters; /* Number of steps. */
1413 tree step; /* Step size. */
1414 tree tile; /* Tile increment (if tiled). */
1415 tree outer; /* Tile iterator var. */
1418 /* Helper for expand_oacc_for. Determine collapsed loop information.
1419 Fill in COUNTS array. Emit any initialization code before GSI.
1420 Return the calculated outer loop bound of BOUND_TYPE. */
1422 static tree
1423 expand_oacc_collapse_init (const struct omp_for_data *fd,
1424 gimple_stmt_iterator *gsi,
1425 oacc_collapse *counts, tree bound_type,
1426 location_t loc)
1428 tree tiling = fd->tiling;
1429 tree total = build_int_cst (bound_type, 1);
1430 int ix;
1432 gcc_assert (integer_onep (fd->loop.step));
1433 gcc_assert (integer_zerop (fd->loop.n1));
1435 /* When tiling, the first operand of the tile clause applies to the
1436 innermost loop, and we work outwards from there. Seems
1437 backwards, but whatever. */
1438 for (ix = fd->collapse; ix--;)
1440 const omp_for_data_loop *loop = &fd->loops[ix];
1442 tree iter_type = TREE_TYPE (loop->v);
1443 tree diff_type = iter_type;
1444 tree plus_type = iter_type;
1446 gcc_assert (loop->cond_code == fd->loop.cond_code);
1448 if (POINTER_TYPE_P (iter_type))
1449 plus_type = sizetype;
1450 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1451 diff_type = signed_type_for (diff_type);
1453 if (tiling)
1455 tree num = build_int_cst (integer_type_node, fd->collapse);
1456 tree loop_no = build_int_cst (integer_type_node, ix);
1457 tree tile = TREE_VALUE (tiling);
1458 gcall *call
1459 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1460 /* gwv-outer=*/integer_zero_node,
1461 /* gwv-inner=*/integer_zero_node);
1463 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1464 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1465 gimple_call_set_lhs (call, counts[ix].tile);
1466 gimple_set_location (call, loc);
1467 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1469 tiling = TREE_CHAIN (tiling);
1471 else
1473 counts[ix].tile = NULL;
1474 counts[ix].outer = loop->v;
1477 tree b = loop->n1;
1478 tree e = loop->n2;
1479 tree s = loop->step;
1480 bool up = loop->cond_code == LT_EXPR;
1481 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1482 bool negating;
1483 tree expr;
1485 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1486 true, GSI_SAME_STMT);
1487 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1488 true, GSI_SAME_STMT);
1490 /* Convert the step, avoiding possible unsigned->signed overflow. */
1491 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1492 if (negating)
1493 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1494 s = fold_convert (diff_type, s);
1495 if (negating)
1496 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1497 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1498 true, GSI_SAME_STMT);
1500 /* Determine the range, avoiding possible unsigned->signed overflow. */
1501 negating = !up && TYPE_UNSIGNED (iter_type);
1502 expr = fold_build2 (MINUS_EXPR, plus_type,
1503 fold_convert (plus_type, negating ? b : e),
1504 fold_convert (plus_type, negating ? e : b));
1505 expr = fold_convert (diff_type, expr);
1506 if (negating)
1507 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1508 tree range = force_gimple_operand_gsi
1509 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1511 /* Determine number of iterations. */
1512 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1513 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1514 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1516 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1517 true, GSI_SAME_STMT);
1519 counts[ix].base = b;
1520 counts[ix].iters = iters;
1521 counts[ix].step = s;
1523 total = fold_build2 (MULT_EXPR, bound_type, total,
1524 fold_convert (bound_type, iters));
1527 return total;
1530 /* Emit initializers for collapsed loop members. INNER is true if
1531 this is for the element loop of a TILE. IVAR is the outer
1532 loop iteration variable, from which collapsed loop iteration values
1533 are calculated. COUNTS array has been initialized by
1534 expand_oacc_collapse_inits. */
1536 static void
1537 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1538 gimple_stmt_iterator *gsi,
1539 const oacc_collapse *counts, tree ivar)
1541 tree ivar_type = TREE_TYPE (ivar);
1543 /* The most rapidly changing iteration variable is the innermost
1544 one. */
1545 for (int ix = fd->collapse; ix--;)
1547 const omp_for_data_loop *loop = &fd->loops[ix];
1548 const oacc_collapse *collapse = &counts[ix];
1549 tree v = inner ? loop->v : collapse->outer;
1550 tree iter_type = TREE_TYPE (v);
1551 tree diff_type = TREE_TYPE (collapse->step);
1552 tree plus_type = iter_type;
1553 enum tree_code plus_code = PLUS_EXPR;
1554 tree expr;
1556 if (POINTER_TYPE_P (iter_type))
1558 plus_code = POINTER_PLUS_EXPR;
1559 plus_type = sizetype;
1562 expr = ivar;
1563 if (ix)
1565 tree mod = fold_convert (ivar_type, collapse->iters);
1566 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1567 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1568 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1569 true, GSI_SAME_STMT);
1572 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1573 collapse->step);
1574 expr = fold_build2 (plus_code, iter_type,
1575 inner ? collapse->outer : collapse->base,
1576 fold_convert (plus_type, expr));
1577 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1578 true, GSI_SAME_STMT);
1579 gassign *ass = gimple_build_assign (v, expr);
1580 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1584 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1585 of the combined collapse > 1 loop constructs, generate code like:
1586 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1587 if (cond3 is <)
1588 adj = STEP3 - 1;
1589 else
1590 adj = STEP3 + 1;
1591 count3 = (adj + N32 - N31) / STEP3;
1592 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1593 if (cond2 is <)
1594 adj = STEP2 - 1;
1595 else
1596 adj = STEP2 + 1;
1597 count2 = (adj + N22 - N21) / STEP2;
1598 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1599 if (cond1 is <)
1600 adj = STEP1 - 1;
1601 else
1602 adj = STEP1 + 1;
1603 count1 = (adj + N12 - N11) / STEP1;
1604 count = count1 * count2 * count3;
1605 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1606 count = 0;
1607 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1608 of the combined loop constructs, just initialize COUNTS array
1609 from the _looptemp_ clauses. */
1611 /* NOTE: It *could* be better to moosh all of the BBs together,
1612 creating one larger BB with all the computation and the unexpected
1613 jump at the end. I.e.
1615 bool zero3, zero2, zero1, zero;
1617 zero3 = N32 c3 N31;
1618 count3 = (N32 - N31) /[cl] STEP3;
1619 zero2 = N22 c2 N21;
1620 count2 = (N22 - N21) /[cl] STEP2;
1621 zero1 = N12 c1 N11;
1622 count1 = (N12 - N11) /[cl] STEP1;
1623 zero = zero3 || zero2 || zero1;
1624 count = count1 * count2 * count3;
1625 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1627 After all, we expect the zero=false, and thus we expect to have to
1628 evaluate all of the comparison expressions, so short-circuiting
1629 oughtn't be a win. Since the condition isn't protecting a
1630 denominator, we're not concerned about divide-by-zero, so we can
1631 fully evaluate count even if a numerator turned out to be wrong.
1633 It seems like putting this all together would create much better
1634 scheduling opportunities, and less pressure on the chip's branch
1635 predictor. */
1637 static void
1638 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1639 basic_block &entry_bb, tree *counts,
1640 basic_block &zero_iter1_bb, int &first_zero_iter1,
1641 basic_block &zero_iter2_bb, int &first_zero_iter2,
1642 basic_block &l2_dom_bb)
1644 tree t, type = TREE_TYPE (fd->loop.v);
1645 edge e, ne;
1646 int i;
1648 /* Collapsed loops need work for expansion into SSA form. */
1649 gcc_assert (!gimple_in_ssa_p (cfun));
1651 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1652 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1654 gcc_assert (fd->ordered == 0);
1655 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1656 isn't supposed to be handled, as the inner loop doesn't
1657 use it. */
1658 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1659 OMP_CLAUSE__LOOPTEMP_);
1660 gcc_assert (innerc);
1661 for (i = 0; i < fd->collapse; i++)
1663 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1664 OMP_CLAUSE__LOOPTEMP_);
1665 gcc_assert (innerc);
1666 if (i)
1667 counts[i] = OMP_CLAUSE_DECL (innerc);
1668 else
1669 counts[0] = NULL_TREE;
1671 return;
1674 for (i = fd->collapse; i < fd->ordered; i++)
1676 tree itype = TREE_TYPE (fd->loops[i].v);
1677 counts[i] = NULL_TREE;
1678 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1679 fold_convert (itype, fd->loops[i].n1),
1680 fold_convert (itype, fd->loops[i].n2));
1681 if (t && integer_zerop (t))
1683 for (i = fd->collapse; i < fd->ordered; i++)
1684 counts[i] = build_int_cst (type, 0);
1685 break;
1688 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1690 tree itype = TREE_TYPE (fd->loops[i].v);
1692 if (i >= fd->collapse && counts[i])
1693 continue;
1694 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1695 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1696 fold_convert (itype, fd->loops[i].n1),
1697 fold_convert (itype, fd->loops[i].n2)))
1698 == NULL_TREE || !integer_onep (t)))
1700 gcond *cond_stmt;
1701 tree n1, n2;
1702 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1703 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1704 true, GSI_SAME_STMT);
1705 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1706 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1707 true, GSI_SAME_STMT);
1708 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1709 NULL_TREE, NULL_TREE);
1710 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1711 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1712 expand_omp_regimplify_p, NULL, NULL)
1713 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1714 expand_omp_regimplify_p, NULL, NULL))
1716 *gsi = gsi_for_stmt (cond_stmt);
1717 gimple_regimplify_operands (cond_stmt, gsi);
1719 e = split_block (entry_bb, cond_stmt);
1720 basic_block &zero_iter_bb
1721 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1722 int &first_zero_iter
1723 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1724 if (zero_iter_bb == NULL)
1726 gassign *assign_stmt;
1727 first_zero_iter = i;
1728 zero_iter_bb = create_empty_bb (entry_bb);
1729 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1730 *gsi = gsi_after_labels (zero_iter_bb);
1731 if (i < fd->collapse)
1732 assign_stmt = gimple_build_assign (fd->loop.n2,
1733 build_zero_cst (type));
1734 else
1736 counts[i] = create_tmp_reg (type, ".count");
1737 assign_stmt
1738 = gimple_build_assign (counts[i], build_zero_cst (type));
1740 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1741 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1742 entry_bb);
1744 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1745 ne->probability = profile_probability::very_unlikely ();
1746 e->flags = EDGE_TRUE_VALUE;
1747 e->probability = ne->probability.invert ();
1748 if (l2_dom_bb == NULL)
1749 l2_dom_bb = entry_bb;
1750 entry_bb = e->dest;
1751 *gsi = gsi_last_bb (entry_bb);
1754 if (POINTER_TYPE_P (itype))
1755 itype = signed_type_for (itype);
1756 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1757 ? -1 : 1));
1758 t = fold_build2 (PLUS_EXPR, itype,
1759 fold_convert (itype, fd->loops[i].step), t);
1760 t = fold_build2 (PLUS_EXPR, itype, t,
1761 fold_convert (itype, fd->loops[i].n2));
1762 t = fold_build2 (MINUS_EXPR, itype, t,
1763 fold_convert (itype, fd->loops[i].n1));
1764 /* ?? We could probably use CEIL_DIV_EXPR instead of
1765 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1766 generate the same code in the end because generically we
1767 don't know that the values involved must be negative for
1768 GT?? */
1769 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1770 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1771 fold_build1 (NEGATE_EXPR, itype, t),
1772 fold_build1 (NEGATE_EXPR, itype,
1773 fold_convert (itype,
1774 fd->loops[i].step)));
1775 else
1776 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1777 fold_convert (itype, fd->loops[i].step));
1778 t = fold_convert (type, t);
1779 if (TREE_CODE (t) == INTEGER_CST)
1780 counts[i] = t;
1781 else
1783 if (i < fd->collapse || i != first_zero_iter2)
1784 counts[i] = create_tmp_reg (type, ".count");
1785 expand_omp_build_assign (gsi, counts[i], t);
1787 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1789 if (i == 0)
1790 t = counts[0];
1791 else
1792 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1793 expand_omp_build_assign (gsi, fd->loop.n2, t);
1798 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1799 T = V;
1800 V3 = N31 + (T % count3) * STEP3;
1801 T = T / count3;
1802 V2 = N21 + (T % count2) * STEP2;
1803 T = T / count2;
1804 V1 = N11 + T * STEP1;
1805 if this loop doesn't have an inner loop construct combined with it.
1806 If it does have an inner loop construct combined with it and the
1807 iteration count isn't known constant, store values from counts array
1808 into its _looptemp_ temporaries instead. */
1810 static void
1811 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1812 tree *counts, gimple *inner_stmt, tree startvar)
1814 int i;
1815 if (gimple_omp_for_combined_p (fd->for_stmt))
1817 /* If fd->loop.n2 is constant, then no propagation of the counts
1818 is needed, they are constant. */
1819 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1820 return;
1822 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1823 ? gimple_omp_taskreg_clauses (inner_stmt)
1824 : gimple_omp_for_clauses (inner_stmt);
1825 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1826 isn't supposed to be handled, as the inner loop doesn't
1827 use it. */
1828 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1829 gcc_assert (innerc);
1830 for (i = 0; i < fd->collapse; i++)
1832 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1833 OMP_CLAUSE__LOOPTEMP_);
1834 gcc_assert (innerc);
1835 if (i)
1837 tree tem = OMP_CLAUSE_DECL (innerc);
1838 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1839 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1840 false, GSI_CONTINUE_LINKING);
1841 gassign *stmt = gimple_build_assign (tem, t);
1842 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1845 return;
1848 tree type = TREE_TYPE (fd->loop.v);
1849 tree tem = create_tmp_reg (type, ".tem");
1850 gassign *stmt = gimple_build_assign (tem, startvar);
1851 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1853 for (i = fd->collapse - 1; i >= 0; i--)
1855 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1856 itype = vtype;
1857 if (POINTER_TYPE_P (vtype))
1858 itype = signed_type_for (vtype);
1859 if (i != 0)
1860 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1861 else
1862 t = tem;
1863 t = fold_convert (itype, t);
1864 t = fold_build2 (MULT_EXPR, itype, t,
1865 fold_convert (itype, fd->loops[i].step));
1866 if (POINTER_TYPE_P (vtype))
1867 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1868 else
1869 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1870 t = force_gimple_operand_gsi (gsi, t,
1871 DECL_P (fd->loops[i].v)
1872 && TREE_ADDRESSABLE (fd->loops[i].v),
1873 NULL_TREE, false,
1874 GSI_CONTINUE_LINKING);
1875 stmt = gimple_build_assign (fd->loops[i].v, t);
1876 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1877 if (i != 0)
1879 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1880 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1881 false, GSI_CONTINUE_LINKING);
1882 stmt = gimple_build_assign (tem, t);
1883 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1888 /* Helper function for expand_omp_for_*. Generate code like:
1889 L10:
1890 V3 += STEP3;
1891 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1892 L11:
1893 V3 = N31;
1894 V2 += STEP2;
1895 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1896 L12:
1897 V2 = N21;
1898 V1 += STEP1;
1899 goto BODY_BB; */
1901 static basic_block
1902 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1903 basic_block body_bb)
1905 basic_block last_bb, bb, collapse_bb = NULL;
1906 int i;
1907 gimple_stmt_iterator gsi;
1908 edge e;
1909 tree t;
1910 gimple *stmt;
1912 last_bb = cont_bb;
1913 for (i = fd->collapse - 1; i >= 0; i--)
1915 tree vtype = TREE_TYPE (fd->loops[i].v);
1917 bb = create_empty_bb (last_bb);
1918 add_bb_to_loop (bb, last_bb->loop_father);
1919 gsi = gsi_start_bb (bb);
1921 if (i < fd->collapse - 1)
1923 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1924 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1926 t = fd->loops[i + 1].n1;
1927 t = force_gimple_operand_gsi (&gsi, t,
1928 DECL_P (fd->loops[i + 1].v)
1929 && TREE_ADDRESSABLE (fd->loops[i
1930 + 1].v),
1931 NULL_TREE, false,
1932 GSI_CONTINUE_LINKING);
1933 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1934 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1936 else
1937 collapse_bb = bb;
1939 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1941 if (POINTER_TYPE_P (vtype))
1942 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1943 else
1944 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1945 t = force_gimple_operand_gsi (&gsi, t,
1946 DECL_P (fd->loops[i].v)
1947 && TREE_ADDRESSABLE (fd->loops[i].v),
1948 NULL_TREE, false, GSI_CONTINUE_LINKING);
1949 stmt = gimple_build_assign (fd->loops[i].v, t);
1950 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1952 if (i > 0)
1954 t = fd->loops[i].n2;
1955 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1956 false, GSI_CONTINUE_LINKING);
1957 tree v = fd->loops[i].v;
1958 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1959 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1960 false, GSI_CONTINUE_LINKING);
1961 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1962 stmt = gimple_build_cond_empty (t);
1963 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1964 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1965 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1967 else
1968 make_edge (bb, body_bb, EDGE_FALLTHRU);
1969 last_bb = bb;
1972 return collapse_bb;
1975 /* Expand #pragma omp ordered depend(source). */
1977 static void
1978 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1979 tree *counts, location_t loc)
1981 enum built_in_function source_ix
1982 = fd->iter_type == long_integer_type_node
1983 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1984 gimple *g
1985 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1986 build_fold_addr_expr (counts[fd->ordered]));
1987 gimple_set_location (g, loc);
1988 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1991 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1993 static void
1994 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1995 tree *counts, tree c, location_t loc)
1997 auto_vec<tree, 10> args;
1998 enum built_in_function sink_ix
1999 = fd->iter_type == long_integer_type_node
2000 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2001 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2002 int i;
2003 gimple_stmt_iterator gsi2 = *gsi;
2004 bool warned_step = false;
2006 for (i = 0; i < fd->ordered; i++)
2008 tree step = NULL_TREE;
2009 off = TREE_PURPOSE (deps);
2010 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2012 step = TREE_OPERAND (off, 1);
2013 off = TREE_OPERAND (off, 0);
2015 if (!integer_zerop (off))
2017 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2018 || fd->loops[i].cond_code == GT_EXPR);
2019 bool forward = fd->loops[i].cond_code == LT_EXPR;
2020 if (step)
2022 /* Non-simple Fortran DO loops. If step is variable,
2023 we don't know at compile even the direction, so can't
2024 warn. */
2025 if (TREE_CODE (step) != INTEGER_CST)
2026 break;
2027 forward = tree_int_cst_sgn (step) != -1;
2029 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2030 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2031 "lexically later iteration");
2032 break;
2034 deps = TREE_CHAIN (deps);
2036 /* If all offsets corresponding to the collapsed loops are zero,
2037 this depend clause can be ignored. FIXME: but there is still a
2038 flush needed. We need to emit one __sync_synchronize () for it
2039 though (perhaps conditionally)? Solve this together with the
2040 conservative dependence folding optimization.
2041 if (i >= fd->collapse)
2042 return; */
2044 deps = OMP_CLAUSE_DECL (c);
2045 gsi_prev (&gsi2);
2046 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2047 edge e2 = split_block_after_labels (e1->dest);
2049 gsi2 = gsi_after_labels (e1->dest);
2050 *gsi = gsi_last_bb (e1->src);
2051 for (i = 0; i < fd->ordered; i++)
2053 tree itype = TREE_TYPE (fd->loops[i].v);
2054 tree step = NULL_TREE;
2055 tree orig_off = NULL_TREE;
2056 if (POINTER_TYPE_P (itype))
2057 itype = sizetype;
2058 if (i)
2059 deps = TREE_CHAIN (deps);
2060 off = TREE_PURPOSE (deps);
2061 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2063 step = TREE_OPERAND (off, 1);
2064 off = TREE_OPERAND (off, 0);
2065 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2066 && integer_onep (fd->loops[i].step)
2067 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2069 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2070 if (step)
2072 off = fold_convert_loc (loc, itype, off);
2073 orig_off = off;
2074 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2077 if (integer_zerop (off))
2078 t = boolean_true_node;
2079 else
2081 tree a;
2082 tree co = fold_convert_loc (loc, itype, off);
2083 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2085 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2086 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2087 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2088 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2089 co);
2091 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2092 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2093 fd->loops[i].v, co);
2094 else
2095 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2096 fd->loops[i].v, co);
2097 if (step)
2099 tree t1, t2;
2100 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2101 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2102 fd->loops[i].n1);
2103 else
2104 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2105 fd->loops[i].n2);
2106 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2107 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2108 fd->loops[i].n2);
2109 else
2110 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2111 fd->loops[i].n1);
2112 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2113 step, build_int_cst (TREE_TYPE (step), 0));
2114 if (TREE_CODE (step) != INTEGER_CST)
2116 t1 = unshare_expr (t1);
2117 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2118 false, GSI_CONTINUE_LINKING);
2119 t2 = unshare_expr (t2);
2120 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2121 false, GSI_CONTINUE_LINKING);
2123 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2124 t, t2, t1);
2126 else if (fd->loops[i].cond_code == LT_EXPR)
2128 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2129 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2130 fd->loops[i].n1);
2131 else
2132 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2133 fd->loops[i].n2);
2135 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2136 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2137 fd->loops[i].n2);
2138 else
2139 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2140 fd->loops[i].n1);
2142 if (cond)
2143 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2144 else
2145 cond = t;
2147 off = fold_convert_loc (loc, itype, off);
2149 if (step
2150 || (fd->loops[i].cond_code == LT_EXPR
2151 ? !integer_onep (fd->loops[i].step)
2152 : !integer_minus_onep (fd->loops[i].step)))
2154 if (step == NULL_TREE
2155 && TYPE_UNSIGNED (itype)
2156 && fd->loops[i].cond_code == GT_EXPR)
2157 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2158 fold_build1_loc (loc, NEGATE_EXPR, itype,
2159 s));
2160 else
2161 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2162 orig_off ? orig_off : off, s);
2163 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2164 build_int_cst (itype, 0));
2165 if (integer_zerop (t) && !warned_step)
2167 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2168 "in the iteration space");
2169 warned_step = true;
2171 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2172 cond, t);
2175 if (i <= fd->collapse - 1 && fd->collapse > 1)
2176 t = fd->loop.v;
2177 else if (counts[i])
2178 t = counts[i];
2179 else
2181 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2182 fd->loops[i].v, fd->loops[i].n1);
2183 t = fold_convert_loc (loc, fd->iter_type, t);
2185 if (step)
2186 /* We have divided off by step already earlier. */;
2187 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2188 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2189 fold_build1_loc (loc, NEGATE_EXPR, itype,
2190 s));
2191 else
2192 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2193 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2194 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2195 off = fold_convert_loc (loc, fd->iter_type, off);
2196 if (i <= fd->collapse - 1 && fd->collapse > 1)
2198 if (i)
2199 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2200 off);
2201 if (i < fd->collapse - 1)
2203 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2204 counts[i]);
2205 continue;
2208 off = unshare_expr (off);
2209 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2210 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2211 true, GSI_SAME_STMT);
2212 args.safe_push (t);
2214 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2215 gimple_set_location (g, loc);
2216 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2218 cond = unshare_expr (cond);
2219 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2220 GSI_CONTINUE_LINKING);
2221 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2222 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2223 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2224 e1->probability = e3->probability.invert ();
2225 e1->flags = EDGE_TRUE_VALUE;
2226 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2228 *gsi = gsi_after_labels (e2->dest);
2231 /* Expand all #pragma omp ordered depend(source) and
2232 #pragma omp ordered depend(sink:...) constructs in the current
2233 #pragma omp for ordered(n) region. */
2235 static void
2236 expand_omp_ordered_source_sink (struct omp_region *region,
2237 struct omp_for_data *fd, tree *counts,
2238 basic_block cont_bb)
2240 struct omp_region *inner;
2241 int i;
2242 for (i = fd->collapse - 1; i < fd->ordered; i++)
2243 if (i == fd->collapse - 1 && fd->collapse > 1)
2244 counts[i] = NULL_TREE;
2245 else if (i >= fd->collapse && !cont_bb)
2246 counts[i] = build_zero_cst (fd->iter_type);
2247 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2248 && integer_onep (fd->loops[i].step))
2249 counts[i] = NULL_TREE;
2250 else
2251 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2252 tree atype
2253 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2254 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2255 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2257 for (inner = region->inner; inner; inner = inner->next)
2258 if (inner->type == GIMPLE_OMP_ORDERED)
2260 gomp_ordered *ord_stmt = inner->ord_stmt;
2261 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2262 location_t loc = gimple_location (ord_stmt);
2263 tree c;
2264 for (c = gimple_omp_ordered_clauses (ord_stmt);
2265 c; c = OMP_CLAUSE_CHAIN (c))
2266 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2267 break;
2268 if (c)
2269 expand_omp_ordered_source (&gsi, fd, counts, loc);
2270 for (c = gimple_omp_ordered_clauses (ord_stmt);
2271 c; c = OMP_CLAUSE_CHAIN (c))
2272 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2273 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2274 gsi_remove (&gsi, true);
2278 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2279 collapsed. */
2281 static basic_block
2282 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2283 basic_block cont_bb, basic_block body_bb,
2284 bool ordered_lastprivate)
2286 if (fd->ordered == fd->collapse)
2287 return cont_bb;
2289 if (!cont_bb)
2291 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2292 for (int i = fd->collapse; i < fd->ordered; i++)
2294 tree type = TREE_TYPE (fd->loops[i].v);
2295 tree n1 = fold_convert (type, fd->loops[i].n1);
2296 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2297 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2298 size_int (i - fd->collapse + 1),
2299 NULL_TREE, NULL_TREE);
2300 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2302 return NULL;
2305 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2307 tree t, type = TREE_TYPE (fd->loops[i].v);
2308 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2309 expand_omp_build_assign (&gsi, fd->loops[i].v,
2310 fold_convert (type, fd->loops[i].n1));
2311 if (counts[i])
2312 expand_omp_build_assign (&gsi, counts[i],
2313 build_zero_cst (fd->iter_type));
2314 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2315 size_int (i - fd->collapse + 1),
2316 NULL_TREE, NULL_TREE);
2317 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2318 if (!gsi_end_p (gsi))
2319 gsi_prev (&gsi);
2320 else
2321 gsi = gsi_last_bb (body_bb);
2322 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2323 basic_block new_body = e1->dest;
2324 if (body_bb == cont_bb)
2325 cont_bb = new_body;
2326 edge e2 = NULL;
2327 basic_block new_header;
2328 if (EDGE_COUNT (cont_bb->preds) > 0)
2330 gsi = gsi_last_bb (cont_bb);
2331 if (POINTER_TYPE_P (type))
2332 t = fold_build_pointer_plus (fd->loops[i].v,
2333 fold_convert (sizetype,
2334 fd->loops[i].step));
2335 else
2336 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2337 fold_convert (type, fd->loops[i].step));
2338 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2339 if (counts[i])
2341 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2342 build_int_cst (fd->iter_type, 1));
2343 expand_omp_build_assign (&gsi, counts[i], t);
2344 t = counts[i];
2346 else
2348 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2349 fd->loops[i].v, fd->loops[i].n1);
2350 t = fold_convert (fd->iter_type, t);
2351 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2352 true, GSI_SAME_STMT);
2354 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2355 size_int (i - fd->collapse + 1),
2356 NULL_TREE, NULL_TREE);
2357 expand_omp_build_assign (&gsi, aref, t);
2358 gsi_prev (&gsi);
2359 e2 = split_block (cont_bb, gsi_stmt (gsi));
2360 new_header = e2->dest;
2362 else
2363 new_header = cont_bb;
2364 gsi = gsi_after_labels (new_header);
2365 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2366 true, GSI_SAME_STMT);
2367 tree n2
2368 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2369 true, NULL_TREE, true, GSI_SAME_STMT);
2370 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2371 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2372 edge e3 = split_block (new_header, gsi_stmt (gsi));
2373 cont_bb = e3->dest;
2374 remove_edge (e1);
2375 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2376 e3->flags = EDGE_FALSE_VALUE;
2377 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2378 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2379 e1->probability = e3->probability.invert ();
2381 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2382 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2384 if (e2)
2386 struct loop *loop = alloc_loop ();
2387 loop->header = new_header;
2388 loop->latch = e2->src;
2389 add_loop (loop, body_bb->loop_father);
2393 /* If there are any lastprivate clauses and it is possible some loops
2394 might have zero iterations, ensure all the decls are initialized,
2395 otherwise we could crash evaluating C++ class iterators with lastprivate
2396 clauses. */
2397 bool need_inits = false;
2398 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2399 if (need_inits)
2401 tree type = TREE_TYPE (fd->loops[i].v);
2402 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2403 expand_omp_build_assign (&gsi, fd->loops[i].v,
2404 fold_convert (type, fd->loops[i].n1));
2406 else
2408 tree type = TREE_TYPE (fd->loops[i].v);
2409 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2410 boolean_type_node,
2411 fold_convert (type, fd->loops[i].n1),
2412 fold_convert (type, fd->loops[i].n2));
2413 if (!integer_onep (this_cond))
2414 need_inits = true;
2417 return cont_bb;
2420 /* A subroutine of expand_omp_for. Generate code for a parallel
2421 loop with any schedule. Given parameters:
2423 for (V = N1; V cond N2; V += STEP) BODY;
2425 where COND is "<" or ">", we generate pseudocode
2427 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2428 if (more) goto L0; else goto L3;
2430 V = istart0;
2431 iend = iend0;
2433 BODY;
2434 V += STEP;
2435 if (V cond iend) goto L1; else goto L2;
2437 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2440 If this is a combined omp parallel loop, instead of the call to
2441 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2442 If this is gimple_omp_for_combined_p loop, then instead of assigning
2443 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2444 inner GIMPLE_OMP_FOR and V += STEP; and
2445 if (V cond iend) goto L1; else goto L2; are removed.
2447 For collapsed loops, given parameters:
2448 collapse(3)
2449 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2450 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2451 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2452 BODY;
2454 we generate pseudocode
2456 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2457 if (cond3 is <)
2458 adj = STEP3 - 1;
2459 else
2460 adj = STEP3 + 1;
2461 count3 = (adj + N32 - N31) / STEP3;
2462 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2463 if (cond2 is <)
2464 adj = STEP2 - 1;
2465 else
2466 adj = STEP2 + 1;
2467 count2 = (adj + N22 - N21) / STEP2;
2468 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2469 if (cond1 is <)
2470 adj = STEP1 - 1;
2471 else
2472 adj = STEP1 + 1;
2473 count1 = (adj + N12 - N11) / STEP1;
2474 count = count1 * count2 * count3;
2475 goto Z1;
2477 count = 0;
2479 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2480 if (more) goto L0; else goto L3;
2482 V = istart0;
2483 T = V;
2484 V3 = N31 + (T % count3) * STEP3;
2485 T = T / count3;
2486 V2 = N21 + (T % count2) * STEP2;
2487 T = T / count2;
2488 V1 = N11 + T * STEP1;
2489 iend = iend0;
2491 BODY;
2492 V += 1;
2493 if (V < iend) goto L10; else goto L2;
2494 L10:
2495 V3 += STEP3;
2496 if (V3 cond3 N32) goto L1; else goto L11;
2497 L11:
2498 V3 = N31;
2499 V2 += STEP2;
2500 if (V2 cond2 N22) goto L1; else goto L12;
2501 L12:
2502 V2 = N21;
2503 V1 += STEP1;
2504 goto L1;
2506 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2511 static void
2512 expand_omp_for_generic (struct omp_region *region,
2513 struct omp_for_data *fd,
2514 enum built_in_function start_fn,
2515 enum built_in_function next_fn,
2516 gimple *inner_stmt)
2518 tree type, istart0, iend0, iend;
2519 tree t, vmain, vback, bias = NULL_TREE;
2520 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2521 basic_block l2_bb = NULL, l3_bb = NULL;
2522 gimple_stmt_iterator gsi;
2523 gassign *assign_stmt;
2524 bool in_combined_parallel = is_combined_parallel (region);
2525 bool broken_loop = region->cont == NULL;
2526 edge e, ne;
2527 tree *counts = NULL;
2528 int i;
2529 bool ordered_lastprivate = false;
2531 gcc_assert (!broken_loop || !in_combined_parallel);
2532 gcc_assert (fd->iter_type == long_integer_type_node
2533 || !in_combined_parallel);
2535 entry_bb = region->entry;
2536 cont_bb = region->cont;
2537 collapse_bb = NULL;
2538 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2539 gcc_assert (broken_loop
2540 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2541 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2542 l1_bb = single_succ (l0_bb);
2543 if (!broken_loop)
2545 l2_bb = create_empty_bb (cont_bb);
2546 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2547 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2548 == l1_bb));
2549 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2551 else
2552 l2_bb = NULL;
2553 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2554 exit_bb = region->exit;
2556 gsi = gsi_last_bb (entry_bb);
2558 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2559 if (fd->ordered
2560 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2561 OMP_CLAUSE_LASTPRIVATE))
2562 ordered_lastprivate = false;
2563 if (fd->collapse > 1 || fd->ordered)
2565 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2566 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2568 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2569 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2570 zero_iter1_bb, first_zero_iter1,
2571 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2573 if (zero_iter1_bb)
2575 /* Some counts[i] vars might be uninitialized if
2576 some loop has zero iterations. But the body shouldn't
2577 be executed in that case, so just avoid uninit warnings. */
2578 for (i = first_zero_iter1;
2579 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2580 if (SSA_VAR_P (counts[i]))
2581 TREE_NO_WARNING (counts[i]) = 1;
2582 gsi_prev (&gsi);
2583 e = split_block (entry_bb, gsi_stmt (gsi));
2584 entry_bb = e->dest;
2585 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2586 gsi = gsi_last_bb (entry_bb);
2587 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2588 get_immediate_dominator (CDI_DOMINATORS,
2589 zero_iter1_bb));
2591 if (zero_iter2_bb)
2593 /* Some counts[i] vars might be uninitialized if
2594 some loop has zero iterations. But the body shouldn't
2595 be executed in that case, so just avoid uninit warnings. */
2596 for (i = first_zero_iter2; i < fd->ordered; i++)
2597 if (SSA_VAR_P (counts[i]))
2598 TREE_NO_WARNING (counts[i]) = 1;
2599 if (zero_iter1_bb)
2600 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2601 else
2603 gsi_prev (&gsi);
2604 e = split_block (entry_bb, gsi_stmt (gsi));
2605 entry_bb = e->dest;
2606 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2607 gsi = gsi_last_bb (entry_bb);
2608 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2609 get_immediate_dominator
2610 (CDI_DOMINATORS, zero_iter2_bb));
2613 if (fd->collapse == 1)
2615 counts[0] = fd->loop.n2;
2616 fd->loop = fd->loops[0];
2620 type = TREE_TYPE (fd->loop.v);
2621 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2622 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2623 TREE_ADDRESSABLE (istart0) = 1;
2624 TREE_ADDRESSABLE (iend0) = 1;
2626 /* See if we need to bias by LLONG_MIN. */
2627 if (fd->iter_type == long_long_unsigned_type_node
2628 && TREE_CODE (type) == INTEGER_TYPE
2629 && !TYPE_UNSIGNED (type)
2630 && fd->ordered == 0)
2632 tree n1, n2;
2634 if (fd->loop.cond_code == LT_EXPR)
2636 n1 = fd->loop.n1;
2637 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2639 else
2641 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2642 n2 = fd->loop.n1;
2644 if (TREE_CODE (n1) != INTEGER_CST
2645 || TREE_CODE (n2) != INTEGER_CST
2646 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2647 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2650 gimple_stmt_iterator gsif = gsi;
2651 gsi_prev (&gsif);
2653 tree arr = NULL_TREE;
2654 if (in_combined_parallel)
2656 gcc_assert (fd->ordered == 0);
2657 /* In a combined parallel loop, emit a call to
2658 GOMP_loop_foo_next. */
2659 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2660 build_fold_addr_expr (istart0),
2661 build_fold_addr_expr (iend0));
2663 else
2665 tree t0, t1, t2, t3, t4;
2666 /* If this is not a combined parallel loop, emit a call to
2667 GOMP_loop_foo_start in ENTRY_BB. */
2668 t4 = build_fold_addr_expr (iend0);
2669 t3 = build_fold_addr_expr (istart0);
2670 if (fd->ordered)
2672 t0 = build_int_cst (unsigned_type_node,
2673 fd->ordered - fd->collapse + 1);
2674 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2675 fd->ordered
2676 - fd->collapse + 1),
2677 ".omp_counts");
2678 DECL_NAMELESS (arr) = 1;
2679 TREE_ADDRESSABLE (arr) = 1;
2680 TREE_STATIC (arr) = 1;
2681 vec<constructor_elt, va_gc> *v;
2682 vec_alloc (v, fd->ordered - fd->collapse + 1);
2683 int idx;
2685 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2687 tree c;
2688 if (idx == 0 && fd->collapse > 1)
2689 c = fd->loop.n2;
2690 else
2691 c = counts[idx + fd->collapse - 1];
2692 tree purpose = size_int (idx);
2693 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2694 if (TREE_CODE (c) != INTEGER_CST)
2695 TREE_STATIC (arr) = 0;
2698 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2699 if (!TREE_STATIC (arr))
2700 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2701 void_type_node, arr),
2702 true, NULL_TREE, true, GSI_SAME_STMT);
2703 t1 = build_fold_addr_expr (arr);
2704 t2 = NULL_TREE;
2706 else
2708 t2 = fold_convert (fd->iter_type, fd->loop.step);
2709 t1 = fd->loop.n2;
2710 t0 = fd->loop.n1;
2711 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2713 tree innerc
2714 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2715 OMP_CLAUSE__LOOPTEMP_);
2716 gcc_assert (innerc);
2717 t0 = OMP_CLAUSE_DECL (innerc);
2718 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2719 OMP_CLAUSE__LOOPTEMP_);
2720 gcc_assert (innerc);
2721 t1 = OMP_CLAUSE_DECL (innerc);
2723 if (POINTER_TYPE_P (TREE_TYPE (t0))
2724 && TYPE_PRECISION (TREE_TYPE (t0))
2725 != TYPE_PRECISION (fd->iter_type))
2727 /* Avoid casting pointers to integer of a different size. */
2728 tree itype = signed_type_for (type);
2729 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2730 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2732 else
2734 t1 = fold_convert (fd->iter_type, t1);
2735 t0 = fold_convert (fd->iter_type, t0);
2737 if (bias)
2739 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2740 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2743 if (fd->iter_type == long_integer_type_node || fd->ordered)
2745 if (fd->chunk_size)
2747 t = fold_convert (fd->iter_type, fd->chunk_size);
2748 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2749 if (fd->ordered)
2750 t = build_call_expr (builtin_decl_explicit (start_fn),
2751 5, t0, t1, t, t3, t4);
2752 else
2753 t = build_call_expr (builtin_decl_explicit (start_fn),
2754 6, t0, t1, t2, t, t3, t4);
2756 else if (fd->ordered)
2757 t = build_call_expr (builtin_decl_explicit (start_fn),
2758 4, t0, t1, t3, t4);
2759 else
2760 t = build_call_expr (builtin_decl_explicit (start_fn),
2761 5, t0, t1, t2, t3, t4);
2763 else
2765 tree t5;
2766 tree c_bool_type;
2767 tree bfn_decl;
2769 /* The GOMP_loop_ull_*start functions have additional boolean
2770 argument, true for < loops and false for > loops.
2771 In Fortran, the C bool type can be different from
2772 boolean_type_node. */
2773 bfn_decl = builtin_decl_explicit (start_fn);
2774 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2775 t5 = build_int_cst (c_bool_type,
2776 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2777 if (fd->chunk_size)
2779 tree bfn_decl = builtin_decl_explicit (start_fn);
2780 t = fold_convert (fd->iter_type, fd->chunk_size);
2781 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2782 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2784 else
2785 t = build_call_expr (builtin_decl_explicit (start_fn),
2786 6, t5, t0, t1, t2, t3, t4);
2789 if (TREE_TYPE (t) != boolean_type_node)
2790 t = fold_build2 (NE_EXPR, boolean_type_node,
2791 t, build_int_cst (TREE_TYPE (t), 0));
2792 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2793 true, GSI_SAME_STMT);
2794 if (arr && !TREE_STATIC (arr))
2796 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2797 TREE_THIS_VOLATILE (clobber) = 1;
2798 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2799 GSI_SAME_STMT);
2801 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2803 /* Remove the GIMPLE_OMP_FOR statement. */
2804 gsi_remove (&gsi, true);
2806 if (gsi_end_p (gsif))
2807 gsif = gsi_after_labels (gsi_bb (gsif));
2808 gsi_next (&gsif);
2810 /* Iteration setup for sequential loop goes in L0_BB. */
2811 tree startvar = fd->loop.v;
2812 tree endvar = NULL_TREE;
2814 if (gimple_omp_for_combined_p (fd->for_stmt))
2816 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2817 && gimple_omp_for_kind (inner_stmt)
2818 == GF_OMP_FOR_KIND_SIMD);
2819 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2820 OMP_CLAUSE__LOOPTEMP_);
2821 gcc_assert (innerc);
2822 startvar = OMP_CLAUSE_DECL (innerc);
2823 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2824 OMP_CLAUSE__LOOPTEMP_);
2825 gcc_assert (innerc);
2826 endvar = OMP_CLAUSE_DECL (innerc);
2829 gsi = gsi_start_bb (l0_bb);
2830 t = istart0;
2831 if (fd->ordered && fd->collapse == 1)
2832 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2833 fold_convert (fd->iter_type, fd->loop.step));
2834 else if (bias)
2835 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2836 if (fd->ordered && fd->collapse == 1)
2838 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2839 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2840 fd->loop.n1, fold_convert (sizetype, t));
2841 else
2843 t = fold_convert (TREE_TYPE (startvar), t);
2844 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2845 fd->loop.n1, t);
2848 else
2850 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2851 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2852 t = fold_convert (TREE_TYPE (startvar), t);
2854 t = force_gimple_operand_gsi (&gsi, t,
2855 DECL_P (startvar)
2856 && TREE_ADDRESSABLE (startvar),
2857 NULL_TREE, false, GSI_CONTINUE_LINKING);
2858 assign_stmt = gimple_build_assign (startvar, t);
2859 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2861 t = iend0;
2862 if (fd->ordered && fd->collapse == 1)
2863 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2864 fold_convert (fd->iter_type, fd->loop.step));
2865 else if (bias)
2866 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2867 if (fd->ordered && fd->collapse == 1)
2869 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2870 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2871 fd->loop.n1, fold_convert (sizetype, t));
2872 else
2874 t = fold_convert (TREE_TYPE (startvar), t);
2875 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2876 fd->loop.n1, t);
2879 else
2881 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2882 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2883 t = fold_convert (TREE_TYPE (startvar), t);
2885 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2886 false, GSI_CONTINUE_LINKING);
2887 if (endvar)
2889 assign_stmt = gimple_build_assign (endvar, iend);
2890 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2891 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2892 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2893 else
2894 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2895 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2897 /* Handle linear clause adjustments. */
2898 tree itercnt = NULL_TREE;
2899 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2900 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2901 c; c = OMP_CLAUSE_CHAIN (c))
2902 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2903 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2905 tree d = OMP_CLAUSE_DECL (c);
2906 bool is_ref = omp_is_reference (d);
2907 tree t = d, a, dest;
2908 if (is_ref)
2909 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2910 tree type = TREE_TYPE (t);
2911 if (POINTER_TYPE_P (type))
2912 type = sizetype;
2913 dest = unshare_expr (t);
2914 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2915 expand_omp_build_assign (&gsif, v, t);
2916 if (itercnt == NULL_TREE)
2918 itercnt = startvar;
2919 tree n1 = fd->loop.n1;
2920 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2922 itercnt
2923 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2924 itercnt);
2925 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2927 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2928 itercnt, n1);
2929 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2930 itercnt, fd->loop.step);
2931 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2932 NULL_TREE, false,
2933 GSI_CONTINUE_LINKING);
2935 a = fold_build2 (MULT_EXPR, type,
2936 fold_convert (type, itercnt),
2937 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2938 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2939 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2940 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2941 false, GSI_CONTINUE_LINKING);
2942 assign_stmt = gimple_build_assign (dest, t);
2943 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2945 if (fd->collapse > 1)
2946 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2948 if (fd->ordered)
2950 /* Until now, counts array contained number of iterations or
2951 variable containing it for ith loop. From now on, we need
2952 those counts only for collapsed loops, and only for the 2nd
2953 till the last collapsed one. Move those one element earlier,
2954 we'll use counts[fd->collapse - 1] for the first source/sink
2955 iteration counter and so on and counts[fd->ordered]
2956 as the array holding the current counter values for
2957 depend(source). */
2958 if (fd->collapse > 1)
2959 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2960 if (broken_loop)
2962 int i;
2963 for (i = fd->collapse; i < fd->ordered; i++)
2965 tree type = TREE_TYPE (fd->loops[i].v);
2966 tree this_cond
2967 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2968 fold_convert (type, fd->loops[i].n1),
2969 fold_convert (type, fd->loops[i].n2));
2970 if (!integer_onep (this_cond))
2971 break;
2973 if (i < fd->ordered)
2975 cont_bb
2976 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2977 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2978 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2979 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2980 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2981 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2982 make_edge (cont_bb, l1_bb, 0);
2983 l2_bb = create_empty_bb (cont_bb);
2984 broken_loop = false;
2987 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2988 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2989 ordered_lastprivate);
2990 if (counts[fd->collapse - 1])
2992 gcc_assert (fd->collapse == 1);
2993 gsi = gsi_last_bb (l0_bb);
2994 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2995 istart0, true);
2996 gsi = gsi_last_bb (cont_bb);
2997 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2998 build_int_cst (fd->iter_type, 1));
2999 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3000 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3001 size_zero_node, NULL_TREE, NULL_TREE);
3002 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3003 t = counts[fd->collapse - 1];
3005 else if (fd->collapse > 1)
3006 t = fd->loop.v;
3007 else
3009 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3010 fd->loops[0].v, fd->loops[0].n1);
3011 t = fold_convert (fd->iter_type, t);
3013 gsi = gsi_last_bb (l0_bb);
3014 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3015 size_zero_node, NULL_TREE, NULL_TREE);
3016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3017 false, GSI_CONTINUE_LINKING);
3018 expand_omp_build_assign (&gsi, aref, t, true);
3021 if (!broken_loop)
3023 /* Code to control the increment and predicate for the sequential
3024 loop goes in the CONT_BB. */
3025 gsi = gsi_last_bb (cont_bb);
3026 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3027 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3028 vmain = gimple_omp_continue_control_use (cont_stmt);
3029 vback = gimple_omp_continue_control_def (cont_stmt);
3031 if (!gimple_omp_for_combined_p (fd->for_stmt))
3033 if (POINTER_TYPE_P (type))
3034 t = fold_build_pointer_plus (vmain, fd->loop.step);
3035 else
3036 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3037 t = force_gimple_operand_gsi (&gsi, t,
3038 DECL_P (vback)
3039 && TREE_ADDRESSABLE (vback),
3040 NULL_TREE, true, GSI_SAME_STMT);
3041 assign_stmt = gimple_build_assign (vback, t);
3042 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3044 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3046 if (fd->collapse > 1)
3047 t = fd->loop.v;
3048 else
3050 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3051 fd->loops[0].v, fd->loops[0].n1);
3052 t = fold_convert (fd->iter_type, t);
3054 tree aref = build4 (ARRAY_REF, fd->iter_type,
3055 counts[fd->ordered], size_zero_node,
3056 NULL_TREE, NULL_TREE);
3057 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3058 true, GSI_SAME_STMT);
3059 expand_omp_build_assign (&gsi, aref, t);
3062 t = build2 (fd->loop.cond_code, boolean_type_node,
3063 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3064 iend);
3065 gcond *cond_stmt = gimple_build_cond_empty (t);
3066 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3069 /* Remove GIMPLE_OMP_CONTINUE. */
3070 gsi_remove (&gsi, true);
3072 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3073 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3075 /* Emit code to get the next parallel iteration in L2_BB. */
3076 gsi = gsi_start_bb (l2_bb);
3078 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3079 build_fold_addr_expr (istart0),
3080 build_fold_addr_expr (iend0));
3081 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3082 false, GSI_CONTINUE_LINKING);
3083 if (TREE_TYPE (t) != boolean_type_node)
3084 t = fold_build2 (NE_EXPR, boolean_type_node,
3085 t, build_int_cst (TREE_TYPE (t), 0));
3086 gcond *cond_stmt = gimple_build_cond_empty (t);
3087 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3090 /* Add the loop cleanup function. */
3091 gsi = gsi_last_bb (exit_bb);
3092 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3093 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3094 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3095 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3096 else
3097 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3098 gcall *call_stmt = gimple_build_call (t, 0);
3099 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3100 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3101 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3102 if (fd->ordered)
3104 tree arr = counts[fd->ordered];
3105 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3106 TREE_THIS_VOLATILE (clobber) = 1;
3107 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3108 GSI_SAME_STMT);
3110 gsi_remove (&gsi, true);
3112 /* Connect the new blocks. */
3113 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3114 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3116 if (!broken_loop)
3118 gimple_seq phis;
3120 e = find_edge (cont_bb, l3_bb);
3121 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3123 phis = phi_nodes (l3_bb);
3124 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3126 gimple *phi = gsi_stmt (gsi);
3127 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3128 PHI_ARG_DEF_FROM_EDGE (phi, e));
3130 remove_edge (e);
3132 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3133 e = find_edge (cont_bb, l1_bb);
3134 if (e == NULL)
3136 e = BRANCH_EDGE (cont_bb);
3137 gcc_assert (single_succ (e->dest) == l1_bb);
3139 if (gimple_omp_for_combined_p (fd->for_stmt))
3141 remove_edge (e);
3142 e = NULL;
3144 else if (fd->collapse > 1)
3146 remove_edge (e);
3147 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3149 else
3150 e->flags = EDGE_TRUE_VALUE;
3151 if (e)
3153 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3154 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3156 else
3158 e = find_edge (cont_bb, l2_bb);
3159 e->flags = EDGE_FALLTHRU;
3161 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3163 if (gimple_in_ssa_p (cfun))
3165 /* Add phis to the outer loop that connect to the phis in the inner,
3166 original loop, and move the loop entry value of the inner phi to
3167 the loop entry value of the outer phi. */
3168 gphi_iterator psi;
3169 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3171 source_location locus;
3172 gphi *nphi;
3173 gphi *exit_phi = psi.phi ();
3175 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3176 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3178 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3179 edge latch_to_l1 = find_edge (latch, l1_bb);
3180 gphi *inner_phi
3181 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3183 tree t = gimple_phi_result (exit_phi);
3184 tree new_res = copy_ssa_name (t, NULL);
3185 nphi = create_phi_node (new_res, l0_bb);
3187 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3188 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3189 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3190 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3191 add_phi_arg (nphi, t, entry_to_l0, locus);
3193 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3194 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3196 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3200 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3201 recompute_dominator (CDI_DOMINATORS, l2_bb));
3202 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3203 recompute_dominator (CDI_DOMINATORS, l3_bb));
3204 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3205 recompute_dominator (CDI_DOMINATORS, l0_bb));
3206 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3207 recompute_dominator (CDI_DOMINATORS, l1_bb));
3209 /* We enter expand_omp_for_generic with a loop. This original loop may
3210 have its own loop struct, or it may be part of an outer loop struct
3211 (which may be the fake loop). */
3212 struct loop *outer_loop = entry_bb->loop_father;
3213 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3215 add_bb_to_loop (l2_bb, outer_loop);
3217 /* We've added a new loop around the original loop. Allocate the
3218 corresponding loop struct. */
3219 struct loop *new_loop = alloc_loop ();
3220 new_loop->header = l0_bb;
3221 new_loop->latch = l2_bb;
3222 add_loop (new_loop, outer_loop);
3224 /* Allocate a loop structure for the original loop unless we already
3225 had one. */
3226 if (!orig_loop_has_loop_struct
3227 && !gimple_omp_for_combined_p (fd->for_stmt))
3229 struct loop *orig_loop = alloc_loop ();
3230 orig_loop->header = l1_bb;
3231 /* The loop may have multiple latches. */
3232 add_loop (orig_loop, new_loop);
3237 /* A subroutine of expand_omp_for. Generate code for a parallel
3238 loop with static schedule and no specified chunk size. Given
3239 parameters:
3241 for (V = N1; V cond N2; V += STEP) BODY;
3243 where COND is "<" or ">", we generate pseudocode
3245 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3246 if (cond is <)
3247 adj = STEP - 1;
3248 else
3249 adj = STEP + 1;
3250 if ((__typeof (V)) -1 > 0 && cond is >)
3251 n = -(adj + N2 - N1) / -STEP;
3252 else
3253 n = (adj + N2 - N1) / STEP;
3254 q = n / nthreads;
3255 tt = n % nthreads;
3256 if (threadid < tt) goto L3; else goto L4;
3258 tt = 0;
3259 q = q + 1;
3261 s0 = q * threadid + tt;
3262 e0 = s0 + q;
3263 V = s0 * STEP + N1;
3264 if (s0 >= e0) goto L2; else goto L0;
3266 e = e0 * STEP + N1;
3268 BODY;
3269 V += STEP;
3270 if (V cond e) goto L1;
3274 static void
3275 expand_omp_for_static_nochunk (struct omp_region *region,
3276 struct omp_for_data *fd,
3277 gimple *inner_stmt)
3279 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3280 tree type, itype, vmain, vback;
3281 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3282 basic_block body_bb, cont_bb, collapse_bb = NULL;
3283 basic_block fin_bb;
3284 gimple_stmt_iterator gsi;
3285 edge ep;
3286 bool broken_loop = region->cont == NULL;
3287 tree *counts = NULL;
3288 tree n1, n2, step;
3290 itype = type = TREE_TYPE (fd->loop.v);
3291 if (POINTER_TYPE_P (type))
3292 itype = signed_type_for (type);
3294 entry_bb = region->entry;
3295 cont_bb = region->cont;
3296 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3297 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3298 gcc_assert (broken_loop
3299 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3300 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3301 body_bb = single_succ (seq_start_bb);
3302 if (!broken_loop)
3304 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3305 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3306 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3308 exit_bb = region->exit;
3310 /* Iteration space partitioning goes in ENTRY_BB. */
3311 gsi = gsi_last_bb (entry_bb);
3312 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3314 if (fd->collapse > 1)
3316 int first_zero_iter = -1, dummy = -1;
3317 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3319 counts = XALLOCAVEC (tree, fd->collapse);
3320 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3321 fin_bb, first_zero_iter,
3322 dummy_bb, dummy, l2_dom_bb);
3323 t = NULL_TREE;
3325 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3326 t = integer_one_node;
3327 else
3328 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3329 fold_convert (type, fd->loop.n1),
3330 fold_convert (type, fd->loop.n2));
3331 if (fd->collapse == 1
3332 && TYPE_UNSIGNED (type)
3333 && (t == NULL_TREE || !integer_onep (t)))
3335 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3336 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3337 true, GSI_SAME_STMT);
3338 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3339 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3340 true, GSI_SAME_STMT);
3341 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3342 NULL_TREE, NULL_TREE);
3343 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3344 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3345 expand_omp_regimplify_p, NULL, NULL)
3346 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3347 expand_omp_regimplify_p, NULL, NULL))
3349 gsi = gsi_for_stmt (cond_stmt);
3350 gimple_regimplify_operands (cond_stmt, &gsi);
3352 ep = split_block (entry_bb, cond_stmt);
3353 ep->flags = EDGE_TRUE_VALUE;
3354 entry_bb = ep->dest;
3355 ep->probability = profile_probability::very_likely ();
3356 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3357 ep->probability = profile_probability::very_unlikely ();
3358 if (gimple_in_ssa_p (cfun))
3360 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3361 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3362 !gsi_end_p (gpi); gsi_next (&gpi))
3364 gphi *phi = gpi.phi ();
3365 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3366 ep, UNKNOWN_LOCATION);
3369 gsi = gsi_last_bb (entry_bb);
3372 switch (gimple_omp_for_kind (fd->for_stmt))
3374 case GF_OMP_FOR_KIND_FOR:
3375 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3376 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3377 break;
3378 case GF_OMP_FOR_KIND_DISTRIBUTE:
3379 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3380 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3381 break;
3382 default:
3383 gcc_unreachable ();
3385 nthreads = build_call_expr (nthreads, 0);
3386 nthreads = fold_convert (itype, nthreads);
3387 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3388 true, GSI_SAME_STMT);
3389 threadid = build_call_expr (threadid, 0);
3390 threadid = fold_convert (itype, threadid);
3391 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3392 true, GSI_SAME_STMT);
3394 n1 = fd->loop.n1;
3395 n2 = fd->loop.n2;
3396 step = fd->loop.step;
3397 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3399 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3400 OMP_CLAUSE__LOOPTEMP_);
3401 gcc_assert (innerc);
3402 n1 = OMP_CLAUSE_DECL (innerc);
3403 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3404 OMP_CLAUSE__LOOPTEMP_);
3405 gcc_assert (innerc);
3406 n2 = OMP_CLAUSE_DECL (innerc);
3408 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3409 true, NULL_TREE, true, GSI_SAME_STMT);
3410 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3411 true, NULL_TREE, true, GSI_SAME_STMT);
3412 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3413 true, NULL_TREE, true, GSI_SAME_STMT);
3415 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3416 t = fold_build2 (PLUS_EXPR, itype, step, t);
3417 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3418 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3419 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3420 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3421 fold_build1 (NEGATE_EXPR, itype, t),
3422 fold_build1 (NEGATE_EXPR, itype, step));
3423 else
3424 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3425 t = fold_convert (itype, t);
3426 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3428 q = create_tmp_reg (itype, "q");
3429 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3430 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3431 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3433 tt = create_tmp_reg (itype, "tt");
3434 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3435 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3438 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3439 gcond *cond_stmt = gimple_build_cond_empty (t);
3440 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3442 second_bb = split_block (entry_bb, cond_stmt)->dest;
3443 gsi = gsi_last_bb (second_bb);
3444 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3446 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3447 GSI_SAME_STMT);
3448 gassign *assign_stmt
3449 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3450 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3452 third_bb = split_block (second_bb, assign_stmt)->dest;
3453 gsi = gsi_last_bb (third_bb);
3454 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3456 t = build2 (MULT_EXPR, itype, q, threadid);
3457 t = build2 (PLUS_EXPR, itype, t, tt);
3458 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3460 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3461 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3463 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3464 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3466 /* Remove the GIMPLE_OMP_FOR statement. */
3467 gsi_remove (&gsi, true);
3469 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3470 gsi = gsi_start_bb (seq_start_bb);
3472 tree startvar = fd->loop.v;
3473 tree endvar = NULL_TREE;
3475 if (gimple_omp_for_combined_p (fd->for_stmt))
3477 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3478 ? gimple_omp_parallel_clauses (inner_stmt)
3479 : gimple_omp_for_clauses (inner_stmt);
3480 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3481 gcc_assert (innerc);
3482 startvar = OMP_CLAUSE_DECL (innerc);
3483 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3484 OMP_CLAUSE__LOOPTEMP_);
3485 gcc_assert (innerc);
3486 endvar = OMP_CLAUSE_DECL (innerc);
3487 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3488 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3490 int i;
3491 for (i = 1; i < fd->collapse; i++)
3493 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3494 OMP_CLAUSE__LOOPTEMP_);
3495 gcc_assert (innerc);
3497 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3498 OMP_CLAUSE__LOOPTEMP_);
3499 if (innerc)
3501 /* If needed (distribute parallel for with lastprivate),
3502 propagate down the total number of iterations. */
3503 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3504 fd->loop.n2);
3505 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3506 GSI_CONTINUE_LINKING);
3507 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3508 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3512 t = fold_convert (itype, s0);
3513 t = fold_build2 (MULT_EXPR, itype, t, step);
3514 if (POINTER_TYPE_P (type))
3515 t = fold_build_pointer_plus (n1, t);
3516 else
3517 t = fold_build2 (PLUS_EXPR, type, t, n1);
3518 t = fold_convert (TREE_TYPE (startvar), t);
3519 t = force_gimple_operand_gsi (&gsi, t,
3520 DECL_P (startvar)
3521 && TREE_ADDRESSABLE (startvar),
3522 NULL_TREE, false, GSI_CONTINUE_LINKING);
3523 assign_stmt = gimple_build_assign (startvar, t);
3524 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3526 t = fold_convert (itype, e0);
3527 t = fold_build2 (MULT_EXPR, itype, t, step);
3528 if (POINTER_TYPE_P (type))
3529 t = fold_build_pointer_plus (n1, t);
3530 else
3531 t = fold_build2 (PLUS_EXPR, type, t, n1);
3532 t = fold_convert (TREE_TYPE (startvar), t);
3533 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3534 false, GSI_CONTINUE_LINKING);
3535 if (endvar)
3537 assign_stmt = gimple_build_assign (endvar, e);
3538 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3539 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3540 assign_stmt = gimple_build_assign (fd->loop.v, e);
3541 else
3542 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3543 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3545 /* Handle linear clause adjustments. */
3546 tree itercnt = NULL_TREE;
3547 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3548 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3549 c; c = OMP_CLAUSE_CHAIN (c))
3550 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3551 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3553 tree d = OMP_CLAUSE_DECL (c);
3554 bool is_ref = omp_is_reference (d);
3555 tree t = d, a, dest;
3556 if (is_ref)
3557 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3558 if (itercnt == NULL_TREE)
3560 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3562 itercnt = fold_build2 (MINUS_EXPR, itype,
3563 fold_convert (itype, n1),
3564 fold_convert (itype, fd->loop.n1));
3565 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3566 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3567 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3568 NULL_TREE, false,
3569 GSI_CONTINUE_LINKING);
3571 else
3572 itercnt = s0;
3574 tree type = TREE_TYPE (t);
3575 if (POINTER_TYPE_P (type))
3576 type = sizetype;
3577 a = fold_build2 (MULT_EXPR, type,
3578 fold_convert (type, itercnt),
3579 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3580 dest = unshare_expr (t);
3581 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3582 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3583 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3584 false, GSI_CONTINUE_LINKING);
3585 assign_stmt = gimple_build_assign (dest, t);
3586 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3588 if (fd->collapse > 1)
3589 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3591 if (!broken_loop)
3593 /* The code controlling the sequential loop replaces the
3594 GIMPLE_OMP_CONTINUE. */
3595 gsi = gsi_last_bb (cont_bb);
3596 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3597 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3598 vmain = gimple_omp_continue_control_use (cont_stmt);
3599 vback = gimple_omp_continue_control_def (cont_stmt);
3601 if (!gimple_omp_for_combined_p (fd->for_stmt))
3603 if (POINTER_TYPE_P (type))
3604 t = fold_build_pointer_plus (vmain, step);
3605 else
3606 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3607 t = force_gimple_operand_gsi (&gsi, t,
3608 DECL_P (vback)
3609 && TREE_ADDRESSABLE (vback),
3610 NULL_TREE, true, GSI_SAME_STMT);
3611 assign_stmt = gimple_build_assign (vback, t);
3612 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3614 t = build2 (fd->loop.cond_code, boolean_type_node,
3615 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3616 ? t : vback, e);
3617 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3620 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3621 gsi_remove (&gsi, true);
3623 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3624 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3627 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3628 gsi = gsi_last_bb (exit_bb);
3629 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3631 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3632 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3634 gsi_remove (&gsi, true);
3636 /* Connect all the blocks. */
3637 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3638 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3639 ep = find_edge (entry_bb, second_bb);
3640 ep->flags = EDGE_TRUE_VALUE;
3641 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3642 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3643 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3645 if (!broken_loop)
3647 ep = find_edge (cont_bb, body_bb);
3648 if (ep == NULL)
3650 ep = BRANCH_EDGE (cont_bb);
3651 gcc_assert (single_succ (ep->dest) == body_bb);
3653 if (gimple_omp_for_combined_p (fd->for_stmt))
3655 remove_edge (ep);
3656 ep = NULL;
3658 else if (fd->collapse > 1)
3660 remove_edge (ep);
3661 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3663 else
3664 ep->flags = EDGE_TRUE_VALUE;
3665 find_edge (cont_bb, fin_bb)->flags
3666 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3669 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3670 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3671 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3673 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3674 recompute_dominator (CDI_DOMINATORS, body_bb));
3675 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3676 recompute_dominator (CDI_DOMINATORS, fin_bb));
3678 struct loop *loop = body_bb->loop_father;
3679 if (loop != entry_bb->loop_father)
3681 gcc_assert (broken_loop || loop->header == body_bb);
3682 gcc_assert (broken_loop
3683 || loop->latch == region->cont
3684 || single_pred (loop->latch) == region->cont);
3685 return;
3688 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3690 loop = alloc_loop ();
3691 loop->header = body_bb;
3692 if (collapse_bb == NULL)
3693 loop->latch = cont_bb;
3694 add_loop (loop, body_bb->loop_father);
3698 /* Return phi in E->DEST with ARG on edge E. */
3700 static gphi *
3701 find_phi_with_arg_on_edge (tree arg, edge e)
3703 basic_block bb = e->dest;
3705 for (gphi_iterator gpi = gsi_start_phis (bb);
3706 !gsi_end_p (gpi);
3707 gsi_next (&gpi))
3709 gphi *phi = gpi.phi ();
3710 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3711 return phi;
3714 return NULL;
3717 /* A subroutine of expand_omp_for. Generate code for a parallel
3718 loop with static schedule and a specified chunk size. Given
3719 parameters:
3721 for (V = N1; V cond N2; V += STEP) BODY;
3723 where COND is "<" or ">", we generate pseudocode
3725 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3726 if (cond is <)
3727 adj = STEP - 1;
3728 else
3729 adj = STEP + 1;
3730 if ((__typeof (V)) -1 > 0 && cond is >)
3731 n = -(adj + N2 - N1) / -STEP;
3732 else
3733 n = (adj + N2 - N1) / STEP;
3734 trip = 0;
3735 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3736 here so that V is defined
3737 if the loop is not entered
3739 s0 = (trip * nthreads + threadid) * CHUNK;
3740 e0 = min (s0 + CHUNK, n);
3741 if (s0 < n) goto L1; else goto L4;
3743 V = s0 * STEP + N1;
3744 e = e0 * STEP + N1;
3746 BODY;
3747 V += STEP;
3748 if (V cond e) goto L2; else goto L3;
3750 trip += 1;
3751 goto L0;
3755 static void
3756 expand_omp_for_static_chunk (struct omp_region *region,
3757 struct omp_for_data *fd, gimple *inner_stmt)
3759 tree n, s0, e0, e, t;
3760 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3761 tree type, itype, vmain, vback, vextra;
3762 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3763 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3764 gimple_stmt_iterator gsi;
3765 edge se;
3766 bool broken_loop = region->cont == NULL;
3767 tree *counts = NULL;
3768 tree n1, n2, step;
3770 itype = type = TREE_TYPE (fd->loop.v);
3771 if (POINTER_TYPE_P (type))
3772 itype = signed_type_for (type);
3774 entry_bb = region->entry;
3775 se = split_block (entry_bb, last_stmt (entry_bb));
3776 entry_bb = se->src;
3777 iter_part_bb = se->dest;
3778 cont_bb = region->cont;
3779 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3780 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3781 gcc_assert (broken_loop
3782 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3783 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3784 body_bb = single_succ (seq_start_bb);
3785 if (!broken_loop)
3787 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3788 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3789 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3790 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3792 exit_bb = region->exit;
3794 /* Trip and adjustment setup goes in ENTRY_BB. */
3795 gsi = gsi_last_bb (entry_bb);
3796 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3798 if (fd->collapse > 1)
3800 int first_zero_iter = -1, dummy = -1;
3801 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3803 counts = XALLOCAVEC (tree, fd->collapse);
3804 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3805 fin_bb, first_zero_iter,
3806 dummy_bb, dummy, l2_dom_bb);
3807 t = NULL_TREE;
3809 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3810 t = integer_one_node;
3811 else
3812 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3813 fold_convert (type, fd->loop.n1),
3814 fold_convert (type, fd->loop.n2));
3815 if (fd->collapse == 1
3816 && TYPE_UNSIGNED (type)
3817 && (t == NULL_TREE || !integer_onep (t)))
3819 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3820 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3821 true, GSI_SAME_STMT);
3822 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3823 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3824 true, GSI_SAME_STMT);
3825 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3826 NULL_TREE, NULL_TREE);
3827 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3828 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3829 expand_omp_regimplify_p, NULL, NULL)
3830 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3831 expand_omp_regimplify_p, NULL, NULL))
3833 gsi = gsi_for_stmt (cond_stmt);
3834 gimple_regimplify_operands (cond_stmt, &gsi);
3836 se = split_block (entry_bb, cond_stmt);
3837 se->flags = EDGE_TRUE_VALUE;
3838 entry_bb = se->dest;
3839 se->probability = profile_probability::very_likely ();
3840 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3841 se->probability = profile_probability::very_unlikely ();
3842 if (gimple_in_ssa_p (cfun))
3844 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3845 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3846 !gsi_end_p (gpi); gsi_next (&gpi))
3848 gphi *phi = gpi.phi ();
3849 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3850 se, UNKNOWN_LOCATION);
3853 gsi = gsi_last_bb (entry_bb);
3856 switch (gimple_omp_for_kind (fd->for_stmt))
3858 case GF_OMP_FOR_KIND_FOR:
3859 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3860 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3861 break;
3862 case GF_OMP_FOR_KIND_DISTRIBUTE:
3863 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3864 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3865 break;
3866 default:
3867 gcc_unreachable ();
3869 nthreads = build_call_expr (nthreads, 0);
3870 nthreads = fold_convert (itype, nthreads);
3871 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3872 true, GSI_SAME_STMT);
3873 threadid = build_call_expr (threadid, 0);
3874 threadid = fold_convert (itype, threadid);
3875 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3876 true, GSI_SAME_STMT);
3878 n1 = fd->loop.n1;
3879 n2 = fd->loop.n2;
3880 step = fd->loop.step;
3881 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3883 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3884 OMP_CLAUSE__LOOPTEMP_);
3885 gcc_assert (innerc);
3886 n1 = OMP_CLAUSE_DECL (innerc);
3887 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3888 OMP_CLAUSE__LOOPTEMP_);
3889 gcc_assert (innerc);
3890 n2 = OMP_CLAUSE_DECL (innerc);
3892 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3893 true, NULL_TREE, true, GSI_SAME_STMT);
3894 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3895 true, NULL_TREE, true, GSI_SAME_STMT);
3896 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3897 true, NULL_TREE, true, GSI_SAME_STMT);
3898 tree chunk_size = fold_convert (itype, fd->chunk_size);
3899 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3900 chunk_size
3901 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3902 GSI_SAME_STMT);
3904 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3905 t = fold_build2 (PLUS_EXPR, itype, step, t);
3906 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3907 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3908 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3909 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3910 fold_build1 (NEGATE_EXPR, itype, t),
3911 fold_build1 (NEGATE_EXPR, itype, step));
3912 else
3913 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3914 t = fold_convert (itype, t);
3915 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3916 true, GSI_SAME_STMT);
3918 trip_var = create_tmp_reg (itype, ".trip");
3919 if (gimple_in_ssa_p (cfun))
3921 trip_init = make_ssa_name (trip_var);
3922 trip_main = make_ssa_name (trip_var);
3923 trip_back = make_ssa_name (trip_var);
3925 else
3927 trip_init = trip_var;
3928 trip_main = trip_var;
3929 trip_back = trip_var;
3932 gassign *assign_stmt
3933 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3934 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3936 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3937 t = fold_build2 (MULT_EXPR, itype, t, step);
3938 if (POINTER_TYPE_P (type))
3939 t = fold_build_pointer_plus (n1, t);
3940 else
3941 t = fold_build2 (PLUS_EXPR, type, t, n1);
3942 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3943 true, GSI_SAME_STMT);
3945 /* Remove the GIMPLE_OMP_FOR. */
3946 gsi_remove (&gsi, true);
3948 gimple_stmt_iterator gsif = gsi;
3950 /* Iteration space partitioning goes in ITER_PART_BB. */
3951 gsi = gsi_last_bb (iter_part_bb);
3953 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3954 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3955 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3956 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3957 false, GSI_CONTINUE_LINKING);
3959 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3960 t = fold_build2 (MIN_EXPR, itype, t, n);
3961 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962 false, GSI_CONTINUE_LINKING);
3964 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3965 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3967 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3968 gsi = gsi_start_bb (seq_start_bb);
3970 tree startvar = fd->loop.v;
3971 tree endvar = NULL_TREE;
3973 if (gimple_omp_for_combined_p (fd->for_stmt))
3975 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3976 ? gimple_omp_parallel_clauses (inner_stmt)
3977 : gimple_omp_for_clauses (inner_stmt);
3978 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3979 gcc_assert (innerc);
3980 startvar = OMP_CLAUSE_DECL (innerc);
3981 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3982 OMP_CLAUSE__LOOPTEMP_);
3983 gcc_assert (innerc);
3984 endvar = OMP_CLAUSE_DECL (innerc);
3985 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3986 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3988 int i;
3989 for (i = 1; i < fd->collapse; i++)
3991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3992 OMP_CLAUSE__LOOPTEMP_);
3993 gcc_assert (innerc);
3995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3996 OMP_CLAUSE__LOOPTEMP_);
3997 if (innerc)
3999 /* If needed (distribute parallel for with lastprivate),
4000 propagate down the total number of iterations. */
4001 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4002 fd->loop.n2);
4003 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4004 GSI_CONTINUE_LINKING);
4005 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4006 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4011 t = fold_convert (itype, s0);
4012 t = fold_build2 (MULT_EXPR, itype, t, step);
4013 if (POINTER_TYPE_P (type))
4014 t = fold_build_pointer_plus (n1, t);
4015 else
4016 t = fold_build2 (PLUS_EXPR, type, t, n1);
4017 t = fold_convert (TREE_TYPE (startvar), t);
4018 t = force_gimple_operand_gsi (&gsi, t,
4019 DECL_P (startvar)
4020 && TREE_ADDRESSABLE (startvar),
4021 NULL_TREE, false, GSI_CONTINUE_LINKING);
4022 assign_stmt = gimple_build_assign (startvar, t);
4023 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4025 t = fold_convert (itype, e0);
4026 t = fold_build2 (MULT_EXPR, itype, t, step);
4027 if (POINTER_TYPE_P (type))
4028 t = fold_build_pointer_plus (n1, t);
4029 else
4030 t = fold_build2 (PLUS_EXPR, type, t, n1);
4031 t = fold_convert (TREE_TYPE (startvar), t);
4032 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4033 false, GSI_CONTINUE_LINKING);
4034 if (endvar)
4036 assign_stmt = gimple_build_assign (endvar, e);
4037 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4038 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4039 assign_stmt = gimple_build_assign (fd->loop.v, e);
4040 else
4041 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4042 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4044 /* Handle linear clause adjustments. */
4045 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4046 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4047 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4048 c; c = OMP_CLAUSE_CHAIN (c))
4049 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4050 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4052 tree d = OMP_CLAUSE_DECL (c);
4053 bool is_ref = omp_is_reference (d);
4054 tree t = d, a, dest;
4055 if (is_ref)
4056 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4057 tree type = TREE_TYPE (t);
4058 if (POINTER_TYPE_P (type))
4059 type = sizetype;
4060 dest = unshare_expr (t);
4061 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4062 expand_omp_build_assign (&gsif, v, t);
4063 if (itercnt == NULL_TREE)
4065 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4067 itercntbias
4068 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4069 fold_convert (itype, fd->loop.n1));
4070 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4071 itercntbias, step);
4072 itercntbias
4073 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4074 NULL_TREE, true,
4075 GSI_SAME_STMT);
4076 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4077 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4078 NULL_TREE, false,
4079 GSI_CONTINUE_LINKING);
4081 else
4082 itercnt = s0;
4084 a = fold_build2 (MULT_EXPR, type,
4085 fold_convert (type, itercnt),
4086 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4087 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4088 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4089 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4090 false, GSI_CONTINUE_LINKING);
4091 assign_stmt = gimple_build_assign (dest, t);
4092 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4094 if (fd->collapse > 1)
4095 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4097 if (!broken_loop)
4099 /* The code controlling the sequential loop goes in CONT_BB,
4100 replacing the GIMPLE_OMP_CONTINUE. */
4101 gsi = gsi_last_bb (cont_bb);
4102 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4103 vmain = gimple_omp_continue_control_use (cont_stmt);
4104 vback = gimple_omp_continue_control_def (cont_stmt);
4106 if (!gimple_omp_for_combined_p (fd->for_stmt))
4108 if (POINTER_TYPE_P (type))
4109 t = fold_build_pointer_plus (vmain, step);
4110 else
4111 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4112 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4113 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4114 true, GSI_SAME_STMT);
4115 assign_stmt = gimple_build_assign (vback, t);
4116 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4118 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4119 t = build2 (EQ_EXPR, boolean_type_node,
4120 build_int_cst (itype, 0),
4121 build_int_cst (itype, 1));
4122 else
4123 t = build2 (fd->loop.cond_code, boolean_type_node,
4124 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4125 ? t : vback, e);
4126 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4129 /* Remove GIMPLE_OMP_CONTINUE. */
4130 gsi_remove (&gsi, true);
4132 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4133 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4135 /* Trip update code goes into TRIP_UPDATE_BB. */
4136 gsi = gsi_start_bb (trip_update_bb);
4138 t = build_int_cst (itype, 1);
4139 t = build2 (PLUS_EXPR, itype, trip_main, t);
4140 assign_stmt = gimple_build_assign (trip_back, t);
4141 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4144 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4145 gsi = gsi_last_bb (exit_bb);
4146 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4148 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4149 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4151 gsi_remove (&gsi, true);
4153 /* Connect the new blocks. */
4154 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4155 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4157 if (!broken_loop)
4159 se = find_edge (cont_bb, body_bb);
4160 if (se == NULL)
4162 se = BRANCH_EDGE (cont_bb);
4163 gcc_assert (single_succ (se->dest) == body_bb);
4165 if (gimple_omp_for_combined_p (fd->for_stmt))
4167 remove_edge (se);
4168 se = NULL;
4170 else if (fd->collapse > 1)
4172 remove_edge (se);
4173 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4175 else
4176 se->flags = EDGE_TRUE_VALUE;
4177 find_edge (cont_bb, trip_update_bb)->flags
4178 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4180 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4181 iter_part_bb);
4184 if (gimple_in_ssa_p (cfun))
4186 gphi_iterator psi;
4187 gphi *phi;
4188 edge re, ene;
4189 edge_var_map *vm;
4190 size_t i;
4192 gcc_assert (fd->collapse == 1 && !broken_loop);
4194 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4195 remove arguments of the phi nodes in fin_bb. We need to create
4196 appropriate phi nodes in iter_part_bb instead. */
4197 se = find_edge (iter_part_bb, fin_bb);
4198 re = single_succ_edge (trip_update_bb);
4199 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4200 ene = single_succ_edge (entry_bb);
4202 psi = gsi_start_phis (fin_bb);
4203 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4204 gsi_next (&psi), ++i)
4206 gphi *nphi;
4207 source_location locus;
4209 phi = psi.phi ();
4210 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4211 redirect_edge_var_map_def (vm), 0))
4212 continue;
4214 t = gimple_phi_result (phi);
4215 gcc_assert (t == redirect_edge_var_map_result (vm));
4217 if (!single_pred_p (fin_bb))
4218 t = copy_ssa_name (t, phi);
4220 nphi = create_phi_node (t, iter_part_bb);
4222 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4223 locus = gimple_phi_arg_location_from_edge (phi, se);
4225 /* A special case -- fd->loop.v is not yet computed in
4226 iter_part_bb, we need to use vextra instead. */
4227 if (t == fd->loop.v)
4228 t = vextra;
4229 add_phi_arg (nphi, t, ene, locus);
4230 locus = redirect_edge_var_map_location (vm);
4231 tree back_arg = redirect_edge_var_map_def (vm);
4232 add_phi_arg (nphi, back_arg, re, locus);
4233 edge ce = find_edge (cont_bb, body_bb);
4234 if (ce == NULL)
4236 ce = BRANCH_EDGE (cont_bb);
4237 gcc_assert (single_succ (ce->dest) == body_bb);
4238 ce = single_succ_edge (ce->dest);
4240 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4241 gcc_assert (inner_loop_phi != NULL);
4242 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4243 find_edge (seq_start_bb, body_bb), locus);
4245 if (!single_pred_p (fin_bb))
4246 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4248 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4249 redirect_edge_var_map_clear (re);
4250 if (single_pred_p (fin_bb))
4251 while (1)
4253 psi = gsi_start_phis (fin_bb);
4254 if (gsi_end_p (psi))
4255 break;
4256 remove_phi_node (&psi, false);
4259 /* Make phi node for trip. */
4260 phi = create_phi_node (trip_main, iter_part_bb);
4261 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4262 UNKNOWN_LOCATION);
4263 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4264 UNKNOWN_LOCATION);
4267 if (!broken_loop)
4268 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4269 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4270 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4271 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4272 recompute_dominator (CDI_DOMINATORS, fin_bb));
4273 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4274 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4275 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4276 recompute_dominator (CDI_DOMINATORS, body_bb));
4278 if (!broken_loop)
4280 struct loop *loop = body_bb->loop_father;
4281 struct loop *trip_loop = alloc_loop ();
4282 trip_loop->header = iter_part_bb;
4283 trip_loop->latch = trip_update_bb;
4284 add_loop (trip_loop, iter_part_bb->loop_father);
4286 if (loop != entry_bb->loop_father)
4288 gcc_assert (loop->header == body_bb);
4289 gcc_assert (loop->latch == region->cont
4290 || single_pred (loop->latch) == region->cont);
4291 trip_loop->inner = loop;
4292 return;
4295 if (!gimple_omp_for_combined_p (fd->for_stmt))
4297 loop = alloc_loop ();
4298 loop->header = body_bb;
4299 if (collapse_bb == NULL)
4300 loop->latch = cont_bb;
4301 add_loop (loop, trip_loop);
4306 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4307 Given parameters:
4308 for (V = N1; V cond N2; V += STEP) BODY;
4310 where COND is "<" or ">" or "!=", we generate pseudocode
4312 for (ind_var = low; ind_var < high; ind_var++)
4314 V = n1 + (ind_var * STEP)
4316 <BODY>
4319 In the above pseudocode, low and high are function parameters of the
4320 child function. In the function below, we are inserting a temp.
4321 variable that will be making a call to two OMP functions that will not be
4322 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4323 with _Cilk_for). These functions are replaced with low and high
4324 by the function that handles taskreg. */
4327 static void
4328 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4330 bool broken_loop = region->cont == NULL;
4331 basic_block entry_bb = region->entry;
4332 basic_block cont_bb = region->cont;
4334 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4335 gcc_assert (broken_loop
4336 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4337 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4338 basic_block l1_bb, l2_bb;
4340 if (!broken_loop)
4342 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4343 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4344 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4345 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4347 else
4349 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4350 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4351 l2_bb = single_succ (l1_bb);
4353 basic_block exit_bb = region->exit;
4354 basic_block l2_dom_bb = NULL;
4356 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4358 /* Below statements until the "tree high_val = ..." are pseudo statements
4359 used to pass information to be used by expand_omp_taskreg.
4360 low_val and high_val will be replaced by the __low and __high
4361 parameter from the child function.
4363 The call_exprs part is a place-holder, it is mainly used
4364 to distinctly identify to the top-level part that this is
4365 where we should put low and high (reasoning given in header
4366 comment). */
4368 gomp_parallel *par_stmt
4369 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4370 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4371 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4372 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4374 if (id_equal (DECL_NAME (t), "__high"))
4375 high_val = t;
4376 else if (id_equal (DECL_NAME (t), "__low"))
4377 low_val = t;
4379 gcc_assert (low_val && high_val);
4381 tree type = TREE_TYPE (low_val);
4382 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4383 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4385 /* Not needed in SSA form right now. */
4386 gcc_assert (!gimple_in_ssa_p (cfun));
4387 if (l2_dom_bb == NULL)
4388 l2_dom_bb = l1_bb;
4390 tree n1 = low_val;
4391 tree n2 = high_val;
4393 gimple *stmt = gimple_build_assign (ind_var, n1);
4395 /* Replace the GIMPLE_OMP_FOR statement. */
4396 gsi_replace (&gsi, stmt, true);
4398 if (!broken_loop)
4400 /* Code to control the increment goes in the CONT_BB. */
4401 gsi = gsi_last_bb (cont_bb);
4402 stmt = gsi_stmt (gsi);
4403 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4404 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4405 build_one_cst (type));
4407 /* Replace GIMPLE_OMP_CONTINUE. */
4408 gsi_replace (&gsi, stmt, true);
4411 /* Emit the condition in L1_BB. */
4412 gsi = gsi_after_labels (l1_bb);
4413 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4414 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4415 fd->loop.step);
4416 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4417 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4418 fd->loop.n1, fold_convert (sizetype, t));
4419 else
4420 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4421 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4422 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4423 expand_omp_build_assign (&gsi, fd->loop.v, t);
4425 /* The condition is always '<' since the runtime will fill in the low
4426 and high values. */
4427 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4428 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4430 /* Remove GIMPLE_OMP_RETURN. */
4431 gsi = gsi_last_bb (exit_bb);
4432 gsi_remove (&gsi, true);
4434 /* Connect the new blocks. */
4435 remove_edge (FALLTHRU_EDGE (entry_bb));
4437 edge e, ne;
4438 if (!broken_loop)
4440 remove_edge (BRANCH_EDGE (entry_bb));
4441 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4443 e = BRANCH_EDGE (l1_bb);
4444 ne = FALLTHRU_EDGE (l1_bb);
4445 e->flags = EDGE_TRUE_VALUE;
4447 else
4449 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4451 ne = single_succ_edge (l1_bb);
4452 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4455 ne->flags = EDGE_FALSE_VALUE;
4456 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4457 ne->probability = e->probability.invert ();
4459 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4460 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4461 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4463 if (!broken_loop)
4465 struct loop *loop = alloc_loop ();
4466 loop->header = l1_bb;
4467 loop->latch = cont_bb;
4468 add_loop (loop, l1_bb->loop_father);
4469 loop->safelen = INT_MAX;
4472 /* Pick the correct library function based on the precision of the
4473 induction variable type. */
4474 tree lib_fun = NULL_TREE;
4475 if (TYPE_PRECISION (type) == 32)
4476 lib_fun = cilk_for_32_fndecl;
4477 else if (TYPE_PRECISION (type) == 64)
4478 lib_fun = cilk_for_64_fndecl;
4479 else
4480 gcc_unreachable ();
4482 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4484 /* WS_ARGS contains the library function flavor to call:
4485 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4486 user-defined grain value. If the user does not define one, then zero
4487 is passed in by the parser. */
4488 vec_alloc (region->ws_args, 2);
4489 region->ws_args->quick_push (lib_fun);
4490 region->ws_args->quick_push (fd->chunk_size);
4493 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4494 loop. Given parameters:
4496 for (V = N1; V cond N2; V += STEP) BODY;
4498 where COND is "<" or ">", we generate pseudocode
4500 V = N1;
4501 goto L1;
4503 BODY;
4504 V += STEP;
4506 if (V cond N2) goto L0; else goto L2;
4509 For collapsed loops, given parameters:
4510 collapse(3)
4511 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4512 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4513 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4514 BODY;
4516 we generate pseudocode
4518 if (cond3 is <)
4519 adj = STEP3 - 1;
4520 else
4521 adj = STEP3 + 1;
4522 count3 = (adj + N32 - N31) / STEP3;
4523 if (cond2 is <)
4524 adj = STEP2 - 1;
4525 else
4526 adj = STEP2 + 1;
4527 count2 = (adj + N22 - N21) / STEP2;
4528 if (cond1 is <)
4529 adj = STEP1 - 1;
4530 else
4531 adj = STEP1 + 1;
4532 count1 = (adj + N12 - N11) / STEP1;
4533 count = count1 * count2 * count3;
4534 V = 0;
4535 V1 = N11;
4536 V2 = N21;
4537 V3 = N31;
4538 goto L1;
4540 BODY;
4541 V += 1;
4542 V3 += STEP3;
4543 V2 += (V3 cond3 N32) ? 0 : STEP2;
4544 V3 = (V3 cond3 N32) ? V3 : N31;
4545 V1 += (V2 cond2 N22) ? 0 : STEP1;
4546 V2 = (V2 cond2 N22) ? V2 : N21;
4548 if (V < count) goto L0; else goto L2;
4553 static void
4554 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4556 tree type, t;
4557 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4558 gimple_stmt_iterator gsi;
4559 gimple *stmt;
4560 gcond *cond_stmt;
4561 bool broken_loop = region->cont == NULL;
4562 edge e, ne;
4563 tree *counts = NULL;
4564 int i;
4565 int safelen_int = INT_MAX;
4566 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4567 OMP_CLAUSE_SAFELEN);
4568 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4569 OMP_CLAUSE__SIMDUID_);
4570 tree n1, n2;
4572 if (safelen)
4574 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4575 if (TREE_CODE (safelen) != INTEGER_CST)
4576 safelen_int = 0;
4577 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4578 safelen_int = tree_to_uhwi (safelen);
4579 if (safelen_int == 1)
4580 safelen_int = 0;
4582 type = TREE_TYPE (fd->loop.v);
4583 entry_bb = region->entry;
4584 cont_bb = region->cont;
4585 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4586 gcc_assert (broken_loop
4587 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4588 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4589 if (!broken_loop)
4591 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4592 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4593 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4594 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4596 else
4598 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4599 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4600 l2_bb = single_succ (l1_bb);
4602 exit_bb = region->exit;
4603 l2_dom_bb = NULL;
4605 gsi = gsi_last_bb (entry_bb);
4607 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4608 /* Not needed in SSA form right now. */
4609 gcc_assert (!gimple_in_ssa_p (cfun));
4610 if (fd->collapse > 1)
4612 int first_zero_iter = -1, dummy = -1;
4613 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4615 counts = XALLOCAVEC (tree, fd->collapse);
4616 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4617 zero_iter_bb, first_zero_iter,
4618 dummy_bb, dummy, l2_dom_bb);
4620 if (l2_dom_bb == NULL)
4621 l2_dom_bb = l1_bb;
4623 n1 = fd->loop.n1;
4624 n2 = fd->loop.n2;
4625 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4627 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4628 OMP_CLAUSE__LOOPTEMP_);
4629 gcc_assert (innerc);
4630 n1 = OMP_CLAUSE_DECL (innerc);
4631 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4632 OMP_CLAUSE__LOOPTEMP_);
4633 gcc_assert (innerc);
4634 n2 = OMP_CLAUSE_DECL (innerc);
4636 tree step = fd->loop.step;
4638 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4639 OMP_CLAUSE__SIMT_);
4640 if (is_simt)
4642 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4643 is_simt = safelen_int > 1;
4645 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4646 if (is_simt)
4648 simt_lane = create_tmp_var (unsigned_type_node);
4649 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4650 gimple_call_set_lhs (g, simt_lane);
4651 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4652 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4653 fold_convert (TREE_TYPE (step), simt_lane));
4654 n1 = fold_convert (type, n1);
4655 if (POINTER_TYPE_P (type))
4656 n1 = fold_build_pointer_plus (n1, offset);
4657 else
4658 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4660 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4661 if (fd->collapse > 1)
4662 simt_maxlane = build_one_cst (unsigned_type_node);
4663 else if (safelen_int < omp_max_simt_vf ())
4664 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4665 tree vf
4666 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4667 unsigned_type_node, 0);
4668 if (simt_maxlane)
4669 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4670 vf = fold_convert (TREE_TYPE (step), vf);
4671 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4674 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4675 if (fd->collapse > 1)
4677 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4679 gsi_prev (&gsi);
4680 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4681 gsi_next (&gsi);
4683 else
4684 for (i = 0; i < fd->collapse; i++)
4686 tree itype = TREE_TYPE (fd->loops[i].v);
4687 if (POINTER_TYPE_P (itype))
4688 itype = signed_type_for (itype);
4689 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4690 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4694 /* Remove the GIMPLE_OMP_FOR statement. */
4695 gsi_remove (&gsi, true);
4697 if (!broken_loop)
4699 /* Code to control the increment goes in the CONT_BB. */
4700 gsi = gsi_last_bb (cont_bb);
4701 stmt = gsi_stmt (gsi);
4702 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4704 if (POINTER_TYPE_P (type))
4705 t = fold_build_pointer_plus (fd->loop.v, step);
4706 else
4707 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4708 expand_omp_build_assign (&gsi, fd->loop.v, t);
4710 if (fd->collapse > 1)
4712 i = fd->collapse - 1;
4713 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4715 t = fold_convert (sizetype, fd->loops[i].step);
4716 t = fold_build_pointer_plus (fd->loops[i].v, t);
4718 else
4720 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4721 fd->loops[i].step);
4722 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4723 fd->loops[i].v, t);
4725 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4727 for (i = fd->collapse - 1; i > 0; i--)
4729 tree itype = TREE_TYPE (fd->loops[i].v);
4730 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4731 if (POINTER_TYPE_P (itype2))
4732 itype2 = signed_type_for (itype2);
4733 t = build3 (COND_EXPR, itype2,
4734 build2 (fd->loops[i].cond_code, boolean_type_node,
4735 fd->loops[i].v,
4736 fold_convert (itype, fd->loops[i].n2)),
4737 build_int_cst (itype2, 0),
4738 fold_convert (itype2, fd->loops[i - 1].step));
4739 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4740 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4741 else
4742 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4743 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4745 t = build3 (COND_EXPR, itype,
4746 build2 (fd->loops[i].cond_code, boolean_type_node,
4747 fd->loops[i].v,
4748 fold_convert (itype, fd->loops[i].n2)),
4749 fd->loops[i].v,
4750 fold_convert (itype, fd->loops[i].n1));
4751 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4755 /* Remove GIMPLE_OMP_CONTINUE. */
4756 gsi_remove (&gsi, true);
4759 /* Emit the condition in L1_BB. */
4760 gsi = gsi_start_bb (l1_bb);
4762 t = fold_convert (type, n2);
4763 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4764 false, GSI_CONTINUE_LINKING);
4765 tree v = fd->loop.v;
4766 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4767 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4768 false, GSI_CONTINUE_LINKING);
4769 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4770 cond_stmt = gimple_build_cond_empty (t);
4771 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4772 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4773 NULL, NULL)
4774 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4775 NULL, NULL))
4777 gsi = gsi_for_stmt (cond_stmt);
4778 gimple_regimplify_operands (cond_stmt, &gsi);
4781 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4782 if (is_simt)
4784 gsi = gsi_start_bb (l2_bb);
4785 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4786 if (POINTER_TYPE_P (type))
4787 t = fold_build_pointer_plus (fd->loop.v, step);
4788 else
4789 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4790 expand_omp_build_assign (&gsi, fd->loop.v, t);
4793 /* Remove GIMPLE_OMP_RETURN. */
4794 gsi = gsi_last_bb (exit_bb);
4795 gsi_remove (&gsi, true);
4797 /* Connect the new blocks. */
4798 remove_edge (FALLTHRU_EDGE (entry_bb));
4800 if (!broken_loop)
4802 remove_edge (BRANCH_EDGE (entry_bb));
4803 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4805 e = BRANCH_EDGE (l1_bb);
4806 ne = FALLTHRU_EDGE (l1_bb);
4807 e->flags = EDGE_TRUE_VALUE;
4809 else
4811 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4813 ne = single_succ_edge (l1_bb);
4814 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4817 ne->flags = EDGE_FALSE_VALUE;
4818 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4819 ne->probability = e->probability.invert ();
4821 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4822 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4824 if (simt_maxlane)
4826 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4827 NULL_TREE, NULL_TREE);
4828 gsi = gsi_last_bb (entry_bb);
4829 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4830 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4831 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4832 FALLTHRU_EDGE (entry_bb)->probability
4833 = profile_probability::guessed_always ().apply_scale (7, 8);
4834 BRANCH_EDGE (entry_bb)->probability
4835 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4836 l2_dom_bb = entry_bb;
4838 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4840 if (!broken_loop)
4842 struct loop *loop = alloc_loop ();
4843 loop->header = l1_bb;
4844 loop->latch = cont_bb;
4845 add_loop (loop, l1_bb->loop_father);
4846 loop->safelen = safelen_int;
4847 if (simduid)
4849 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4850 cfun->has_simduid_loops = true;
4852 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4853 the loop. */
4854 if ((flag_tree_loop_vectorize
4855 || !global_options_set.x_flag_tree_loop_vectorize)
4856 && flag_tree_loop_optimize
4857 && loop->safelen > 1)
4859 loop->force_vectorize = true;
4860 cfun->has_force_vectorize_loops = true;
4863 else if (simduid)
4864 cfun->has_simduid_loops = true;
4867 /* Taskloop construct is represented after gimplification with
4868 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4869 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4870 which should just compute all the needed loop temporaries
4871 for GIMPLE_OMP_TASK. */
4873 static void
4874 expand_omp_taskloop_for_outer (struct omp_region *region,
4875 struct omp_for_data *fd,
4876 gimple *inner_stmt)
4878 tree type, bias = NULL_TREE;
4879 basic_block entry_bb, cont_bb, exit_bb;
4880 gimple_stmt_iterator gsi;
4881 gassign *assign_stmt;
4882 tree *counts = NULL;
4883 int i;
4885 gcc_assert (inner_stmt);
4886 gcc_assert (region->cont);
4887 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4888 && gimple_omp_task_taskloop_p (inner_stmt));
4889 type = TREE_TYPE (fd->loop.v);
4891 /* See if we need to bias by LLONG_MIN. */
4892 if (fd->iter_type == long_long_unsigned_type_node
4893 && TREE_CODE (type) == INTEGER_TYPE
4894 && !TYPE_UNSIGNED (type))
4896 tree n1, n2;
4898 if (fd->loop.cond_code == LT_EXPR)
4900 n1 = fd->loop.n1;
4901 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4903 else
4905 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4906 n2 = fd->loop.n1;
4908 if (TREE_CODE (n1) != INTEGER_CST
4909 || TREE_CODE (n2) != INTEGER_CST
4910 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4911 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4914 entry_bb = region->entry;
4915 cont_bb = region->cont;
4916 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4917 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4918 exit_bb = region->exit;
4920 gsi = gsi_last_bb (entry_bb);
4921 gimple *for_stmt = gsi_stmt (gsi);
4922 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4923 if (fd->collapse > 1)
4925 int first_zero_iter = -1, dummy = -1;
4926 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4928 counts = XALLOCAVEC (tree, fd->collapse);
4929 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4930 zero_iter_bb, first_zero_iter,
4931 dummy_bb, dummy, l2_dom_bb);
4933 if (zero_iter_bb)
4935 /* Some counts[i] vars might be uninitialized if
4936 some loop has zero iterations. But the body shouldn't
4937 be executed in that case, so just avoid uninit warnings. */
4938 for (i = first_zero_iter; i < fd->collapse; i++)
4939 if (SSA_VAR_P (counts[i]))
4940 TREE_NO_WARNING (counts[i]) = 1;
4941 gsi_prev (&gsi);
4942 edge e = split_block (entry_bb, gsi_stmt (gsi));
4943 entry_bb = e->dest;
4944 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4945 gsi = gsi_last_bb (entry_bb);
4946 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4947 get_immediate_dominator (CDI_DOMINATORS,
4948 zero_iter_bb));
4952 tree t0, t1;
4953 t1 = fd->loop.n2;
4954 t0 = fd->loop.n1;
4955 if (POINTER_TYPE_P (TREE_TYPE (t0))
4956 && TYPE_PRECISION (TREE_TYPE (t0))
4957 != TYPE_PRECISION (fd->iter_type))
4959 /* Avoid casting pointers to integer of a different size. */
4960 tree itype = signed_type_for (type);
4961 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4962 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4964 else
4966 t1 = fold_convert (fd->iter_type, t1);
4967 t0 = fold_convert (fd->iter_type, t0);
4969 if (bias)
4971 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4972 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4975 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4976 OMP_CLAUSE__LOOPTEMP_);
4977 gcc_assert (innerc);
4978 tree startvar = OMP_CLAUSE_DECL (innerc);
4979 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4980 gcc_assert (innerc);
4981 tree endvar = OMP_CLAUSE_DECL (innerc);
4982 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4984 gcc_assert (innerc);
4985 for (i = 1; i < fd->collapse; i++)
4987 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4988 OMP_CLAUSE__LOOPTEMP_);
4989 gcc_assert (innerc);
4991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4992 OMP_CLAUSE__LOOPTEMP_);
4993 if (innerc)
4995 /* If needed (inner taskloop has lastprivate clause), propagate
4996 down the total number of iterations. */
4997 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4998 NULL_TREE, false,
4999 GSI_CONTINUE_LINKING);
5000 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5001 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5005 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5006 GSI_CONTINUE_LINKING);
5007 assign_stmt = gimple_build_assign (startvar, t0);
5008 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5010 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5011 GSI_CONTINUE_LINKING);
5012 assign_stmt = gimple_build_assign (endvar, t1);
5013 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5014 if (fd->collapse > 1)
5015 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5017 /* Remove the GIMPLE_OMP_FOR statement. */
5018 gsi = gsi_for_stmt (for_stmt);
5019 gsi_remove (&gsi, true);
5021 gsi = gsi_last_bb (cont_bb);
5022 gsi_remove (&gsi, true);
5024 gsi = gsi_last_bb (exit_bb);
5025 gsi_remove (&gsi, true);
5027 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5028 remove_edge (BRANCH_EDGE (entry_bb));
5029 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5030 remove_edge (BRANCH_EDGE (cont_bb));
5031 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5032 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5033 recompute_dominator (CDI_DOMINATORS, region->entry));
5036 /* Taskloop construct is represented after gimplification with
5037 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5038 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5039 GOMP_taskloop{,_ull} function arranges for each task to be given just
5040 a single range of iterations. */
5042 static void
5043 expand_omp_taskloop_for_inner (struct omp_region *region,
5044 struct omp_for_data *fd,
5045 gimple *inner_stmt)
5047 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5048 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5049 basic_block fin_bb;
5050 gimple_stmt_iterator gsi;
5051 edge ep;
5052 bool broken_loop = region->cont == NULL;
5053 tree *counts = NULL;
5054 tree n1, n2, step;
5056 itype = type = TREE_TYPE (fd->loop.v);
5057 if (POINTER_TYPE_P (type))
5058 itype = signed_type_for (type);
5060 /* See if we need to bias by LLONG_MIN. */
5061 if (fd->iter_type == long_long_unsigned_type_node
5062 && TREE_CODE (type) == INTEGER_TYPE
5063 && !TYPE_UNSIGNED (type))
5065 tree n1, n2;
5067 if (fd->loop.cond_code == LT_EXPR)
5069 n1 = fd->loop.n1;
5070 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5072 else
5074 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5075 n2 = fd->loop.n1;
5077 if (TREE_CODE (n1) != INTEGER_CST
5078 || TREE_CODE (n2) != INTEGER_CST
5079 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5080 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5083 entry_bb = region->entry;
5084 cont_bb = region->cont;
5085 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5086 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5087 gcc_assert (broken_loop
5088 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5089 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5090 if (!broken_loop)
5092 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5093 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5095 exit_bb = region->exit;
5097 /* Iteration space partitioning goes in ENTRY_BB. */
5098 gsi = gsi_last_bb (entry_bb);
5099 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5101 if (fd->collapse > 1)
5103 int first_zero_iter = -1, dummy = -1;
5104 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5106 counts = XALLOCAVEC (tree, fd->collapse);
5107 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5108 fin_bb, first_zero_iter,
5109 dummy_bb, dummy, l2_dom_bb);
5110 t = NULL_TREE;
5112 else
5113 t = integer_one_node;
5115 step = fd->loop.step;
5116 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5117 OMP_CLAUSE__LOOPTEMP_);
5118 gcc_assert (innerc);
5119 n1 = OMP_CLAUSE_DECL (innerc);
5120 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5121 gcc_assert (innerc);
5122 n2 = OMP_CLAUSE_DECL (innerc);
5123 if (bias)
5125 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5126 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5128 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5129 true, NULL_TREE, true, GSI_SAME_STMT);
5130 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5131 true, NULL_TREE, true, GSI_SAME_STMT);
5132 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5133 true, NULL_TREE, true, GSI_SAME_STMT);
5135 tree startvar = fd->loop.v;
5136 tree endvar = NULL_TREE;
5138 if (gimple_omp_for_combined_p (fd->for_stmt))
5140 tree clauses = gimple_omp_for_clauses (inner_stmt);
5141 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5142 gcc_assert (innerc);
5143 startvar = OMP_CLAUSE_DECL (innerc);
5144 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5145 OMP_CLAUSE__LOOPTEMP_);
5146 gcc_assert (innerc);
5147 endvar = OMP_CLAUSE_DECL (innerc);
5149 t = fold_convert (TREE_TYPE (startvar), n1);
5150 t = force_gimple_operand_gsi (&gsi, t,
5151 DECL_P (startvar)
5152 && TREE_ADDRESSABLE (startvar),
5153 NULL_TREE, false, GSI_CONTINUE_LINKING);
5154 gimple *assign_stmt = gimple_build_assign (startvar, t);
5155 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5157 t = fold_convert (TREE_TYPE (startvar), n2);
5158 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5159 false, GSI_CONTINUE_LINKING);
5160 if (endvar)
5162 assign_stmt = gimple_build_assign (endvar, e);
5163 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5164 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5165 assign_stmt = gimple_build_assign (fd->loop.v, e);
5166 else
5167 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5168 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5170 if (fd->collapse > 1)
5171 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5173 if (!broken_loop)
5175 /* The code controlling the sequential loop replaces the
5176 GIMPLE_OMP_CONTINUE. */
5177 gsi = gsi_last_bb (cont_bb);
5178 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5179 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5180 vmain = gimple_omp_continue_control_use (cont_stmt);
5181 vback = gimple_omp_continue_control_def (cont_stmt);
5183 if (!gimple_omp_for_combined_p (fd->for_stmt))
5185 if (POINTER_TYPE_P (type))
5186 t = fold_build_pointer_plus (vmain, step);
5187 else
5188 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5189 t = force_gimple_operand_gsi (&gsi, t,
5190 DECL_P (vback)
5191 && TREE_ADDRESSABLE (vback),
5192 NULL_TREE, true, GSI_SAME_STMT);
5193 assign_stmt = gimple_build_assign (vback, t);
5194 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5196 t = build2 (fd->loop.cond_code, boolean_type_node,
5197 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5198 ? t : vback, e);
5199 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5202 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5203 gsi_remove (&gsi, true);
5205 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5206 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5209 /* Remove the GIMPLE_OMP_FOR statement. */
5210 gsi = gsi_for_stmt (fd->for_stmt);
5211 gsi_remove (&gsi, true);
5213 /* Remove the GIMPLE_OMP_RETURN statement. */
5214 gsi = gsi_last_bb (exit_bb);
5215 gsi_remove (&gsi, true);
5217 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5218 if (!broken_loop)
5219 remove_edge (BRANCH_EDGE (entry_bb));
5220 else
5222 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5223 region->outer->cont = NULL;
5226 /* Connect all the blocks. */
5227 if (!broken_loop)
5229 ep = find_edge (cont_bb, body_bb);
5230 if (gimple_omp_for_combined_p (fd->for_stmt))
5232 remove_edge (ep);
5233 ep = NULL;
5235 else if (fd->collapse > 1)
5237 remove_edge (ep);
5238 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5240 else
5241 ep->flags = EDGE_TRUE_VALUE;
5242 find_edge (cont_bb, fin_bb)->flags
5243 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5246 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5247 recompute_dominator (CDI_DOMINATORS, body_bb));
5248 if (!broken_loop)
5249 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5250 recompute_dominator (CDI_DOMINATORS, fin_bb));
5252 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5254 struct loop *loop = alloc_loop ();
5255 loop->header = body_bb;
5256 if (collapse_bb == NULL)
5257 loop->latch = cont_bb;
5258 add_loop (loop, body_bb->loop_father);
5262 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5263 partitioned loop. The lowering here is abstracted, in that the
5264 loop parameters are passed through internal functions, which are
5265 further lowered by oacc_device_lower, once we get to the target
5266 compiler. The loop is of the form:
5268 for (V = B; V LTGT E; V += S) {BODY}
5270 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5271 (constant 0 for no chunking) and we will have a GWV partitioning
5272 mask, specifying dimensions over which the loop is to be
5273 partitioned (see note below). We generate code that looks like
5274 (this ignores tiling):
5276 <entry_bb> [incoming FALL->body, BRANCH->exit]
5277 typedef signedintify (typeof (V)) T; // underlying signed integral type
5278 T range = E - B;
5279 T chunk_no = 0;
5280 T DIR = LTGT == '<' ? +1 : -1;
5281 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5282 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5284 <head_bb> [created by splitting end of entry_bb]
5285 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5286 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5287 if (!(offset LTGT bound)) goto bottom_bb;
5289 <body_bb> [incoming]
5290 V = B + offset;
5291 {BODY}
5293 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5294 offset += step;
5295 if (offset LTGT bound) goto body_bb; [*]
5297 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5298 chunk_no++;
5299 if (chunk < chunk_max) goto head_bb;
5301 <exit_bb> [incoming]
5302 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5304 [*] Needed if V live at end of loop. */
5306 static void
5307 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5309 tree v = fd->loop.v;
5310 enum tree_code cond_code = fd->loop.cond_code;
5311 enum tree_code plus_code = PLUS_EXPR;
5313 tree chunk_size = integer_minus_one_node;
5314 tree gwv = integer_zero_node;
5315 tree iter_type = TREE_TYPE (v);
5316 tree diff_type = iter_type;
5317 tree plus_type = iter_type;
5318 struct oacc_collapse *counts = NULL;
5320 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5321 == GF_OMP_FOR_KIND_OACC_LOOP);
5322 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5323 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5325 if (POINTER_TYPE_P (iter_type))
5327 plus_code = POINTER_PLUS_EXPR;
5328 plus_type = sizetype;
5330 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5331 diff_type = signed_type_for (diff_type);
5332 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5333 diff_type = integer_type_node;
5335 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5336 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5337 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5338 basic_block bottom_bb = NULL;
5340 /* entry_bb has two sucessors; the branch edge is to the exit
5341 block, fallthrough edge to body. */
5342 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5343 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5345 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5346 body_bb, or to a block whose only successor is the body_bb. Its
5347 fallthrough successor is the final block (same as the branch
5348 successor of the entry_bb). */
5349 if (cont_bb)
5351 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5352 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5354 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5355 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5357 else
5358 gcc_assert (!gimple_in_ssa_p (cfun));
5360 /* The exit block only has entry_bb and cont_bb as predecessors. */
5361 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5363 tree chunk_no;
5364 tree chunk_max = NULL_TREE;
5365 tree bound, offset;
5366 tree step = create_tmp_var (diff_type, ".step");
5367 bool up = cond_code == LT_EXPR;
5368 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5369 bool chunking = !gimple_in_ssa_p (cfun);
5370 bool negating;
5372 /* Tiling vars. */
5373 tree tile_size = NULL_TREE;
5374 tree element_s = NULL_TREE;
5375 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5376 basic_block elem_body_bb = NULL;
5377 basic_block elem_cont_bb = NULL;
5379 /* SSA instances. */
5380 tree offset_incr = NULL_TREE;
5381 tree offset_init = NULL_TREE;
5383 gimple_stmt_iterator gsi;
5384 gassign *ass;
5385 gcall *call;
5386 gimple *stmt;
5387 tree expr;
5388 location_t loc;
5389 edge split, be, fte;
5391 /* Split the end of entry_bb to create head_bb. */
5392 split = split_block (entry_bb, last_stmt (entry_bb));
5393 basic_block head_bb = split->dest;
5394 entry_bb = split->src;
5396 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5397 gsi = gsi_last_bb (entry_bb);
5398 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5399 loc = gimple_location (for_stmt);
5401 if (gimple_in_ssa_p (cfun))
5403 offset_init = gimple_omp_for_index (for_stmt, 0);
5404 gcc_assert (integer_zerop (fd->loop.n1));
5405 /* The SSA parallelizer does gang parallelism. */
5406 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5409 if (fd->collapse > 1 || fd->tiling)
5411 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5412 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5413 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5414 TREE_TYPE (fd->loop.n2), loc);
5416 if (SSA_VAR_P (fd->loop.n2))
5418 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5419 true, GSI_SAME_STMT);
5420 ass = gimple_build_assign (fd->loop.n2, total);
5421 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5425 tree b = fd->loop.n1;
5426 tree e = fd->loop.n2;
5427 tree s = fd->loop.step;
5429 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5430 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5432 /* Convert the step, avoiding possible unsigned->signed overflow. */
5433 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5434 if (negating)
5435 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5436 s = fold_convert (diff_type, s);
5437 if (negating)
5438 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5439 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5441 if (!chunking)
5442 chunk_size = integer_zero_node;
5443 expr = fold_convert (diff_type, chunk_size);
5444 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5445 NULL_TREE, true, GSI_SAME_STMT);
5447 if (fd->tiling)
5449 /* Determine the tile size and element step,
5450 modify the outer loop step size. */
5451 tile_size = create_tmp_var (diff_type, ".tile_size");
5452 expr = build_int_cst (diff_type, 1);
5453 for (int ix = 0; ix < fd->collapse; ix++)
5454 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5455 expr = force_gimple_operand_gsi (&gsi, expr, true,
5456 NULL_TREE, true, GSI_SAME_STMT);
5457 ass = gimple_build_assign (tile_size, expr);
5458 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5460 element_s = create_tmp_var (diff_type, ".element_s");
5461 ass = gimple_build_assign (element_s, s);
5462 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5464 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5465 s = force_gimple_operand_gsi (&gsi, expr, true,
5466 NULL_TREE, true, GSI_SAME_STMT);
5469 /* Determine the range, avoiding possible unsigned->signed overflow. */
5470 negating = !up && TYPE_UNSIGNED (iter_type);
5471 expr = fold_build2 (MINUS_EXPR, plus_type,
5472 fold_convert (plus_type, negating ? b : e),
5473 fold_convert (plus_type, negating ? e : b));
5474 expr = fold_convert (diff_type, expr);
5475 if (negating)
5476 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5477 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5478 NULL_TREE, true, GSI_SAME_STMT);
5480 chunk_no = build_int_cst (diff_type, 0);
5481 if (chunking)
5483 gcc_assert (!gimple_in_ssa_p (cfun));
5485 expr = chunk_no;
5486 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5487 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5489 ass = gimple_build_assign (chunk_no, expr);
5490 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5492 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5493 build_int_cst (integer_type_node,
5494 IFN_GOACC_LOOP_CHUNKS),
5495 dir, range, s, chunk_size, gwv);
5496 gimple_call_set_lhs (call, chunk_max);
5497 gimple_set_location (call, loc);
5498 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5500 else
5501 chunk_size = chunk_no;
5503 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5504 build_int_cst (integer_type_node,
5505 IFN_GOACC_LOOP_STEP),
5506 dir, range, s, chunk_size, gwv);
5507 gimple_call_set_lhs (call, step);
5508 gimple_set_location (call, loc);
5509 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5511 /* Remove the GIMPLE_OMP_FOR. */
5512 gsi_remove (&gsi, true);
5514 /* Fixup edges from head_bb. */
5515 be = BRANCH_EDGE (head_bb);
5516 fte = FALLTHRU_EDGE (head_bb);
5517 be->flags |= EDGE_FALSE_VALUE;
5518 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5520 basic_block body_bb = fte->dest;
5522 if (gimple_in_ssa_p (cfun))
5524 gsi = gsi_last_bb (cont_bb);
5525 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5527 offset = gimple_omp_continue_control_use (cont_stmt);
5528 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5530 else
5532 offset = create_tmp_var (diff_type, ".offset");
5533 offset_init = offset_incr = offset;
5535 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5537 /* Loop offset & bound go into head_bb. */
5538 gsi = gsi_start_bb (head_bb);
5540 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5541 build_int_cst (integer_type_node,
5542 IFN_GOACC_LOOP_OFFSET),
5543 dir, range, s,
5544 chunk_size, gwv, chunk_no);
5545 gimple_call_set_lhs (call, offset_init);
5546 gimple_set_location (call, loc);
5547 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5549 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5550 build_int_cst (integer_type_node,
5551 IFN_GOACC_LOOP_BOUND),
5552 dir, range, s,
5553 chunk_size, gwv, offset_init);
5554 gimple_call_set_lhs (call, bound);
5555 gimple_set_location (call, loc);
5556 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5558 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5559 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5560 GSI_CONTINUE_LINKING);
5562 /* V assignment goes into body_bb. */
5563 if (!gimple_in_ssa_p (cfun))
5565 gsi = gsi_start_bb (body_bb);
5567 expr = build2 (plus_code, iter_type, b,
5568 fold_convert (plus_type, offset));
5569 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5570 true, GSI_SAME_STMT);
5571 ass = gimple_build_assign (v, expr);
5572 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5574 if (fd->collapse > 1 || fd->tiling)
5575 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5577 if (fd->tiling)
5579 /* Determine the range of the element loop -- usually simply
5580 the tile_size, but could be smaller if the final
5581 iteration of the outer loop is a partial tile. */
5582 tree e_range = create_tmp_var (diff_type, ".e_range");
5584 expr = build2 (MIN_EXPR, diff_type,
5585 build2 (MINUS_EXPR, diff_type, bound, offset),
5586 build2 (MULT_EXPR, diff_type, tile_size,
5587 element_s));
5588 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5589 true, GSI_SAME_STMT);
5590 ass = gimple_build_assign (e_range, expr);
5591 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5593 /* Determine bound, offset & step of inner loop. */
5594 e_bound = create_tmp_var (diff_type, ".e_bound");
5595 e_offset = create_tmp_var (diff_type, ".e_offset");
5596 e_step = create_tmp_var (diff_type, ".e_step");
5598 /* Mark these as element loops. */
5599 tree t, e_gwv = integer_minus_one_node;
5600 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5602 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5603 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5604 element_s, chunk, e_gwv, chunk);
5605 gimple_call_set_lhs (call, e_offset);
5606 gimple_set_location (call, loc);
5607 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5609 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5610 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5611 element_s, chunk, e_gwv, e_offset);
5612 gimple_call_set_lhs (call, e_bound);
5613 gimple_set_location (call, loc);
5614 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5616 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5617 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5618 element_s, chunk, e_gwv);
5619 gimple_call_set_lhs (call, e_step);
5620 gimple_set_location (call, loc);
5621 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5623 /* Add test and split block. */
5624 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5625 stmt = gimple_build_cond_empty (expr);
5626 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5627 split = split_block (body_bb, stmt);
5628 elem_body_bb = split->dest;
5629 if (cont_bb == body_bb)
5630 cont_bb = elem_body_bb;
5631 body_bb = split->src;
5633 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5635 /* Initialize the user's loop vars. */
5636 gsi = gsi_start_bb (elem_body_bb);
5637 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5641 /* Loop increment goes into cont_bb. If this is not a loop, we
5642 will have spawned threads as if it was, and each one will
5643 execute one iteration. The specification is not explicit about
5644 whether such constructs are ill-formed or not, and they can
5645 occur, especially when noreturn routines are involved. */
5646 if (cont_bb)
5648 gsi = gsi_last_bb (cont_bb);
5649 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5650 loc = gimple_location (cont_stmt);
5652 if (fd->tiling)
5654 /* Insert element loop increment and test. */
5655 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5656 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5657 true, GSI_SAME_STMT);
5658 ass = gimple_build_assign (e_offset, expr);
5659 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5660 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5662 stmt = gimple_build_cond_empty (expr);
5663 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5664 split = split_block (cont_bb, stmt);
5665 elem_cont_bb = split->src;
5666 cont_bb = split->dest;
5668 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5669 split->probability = profile_probability::unlikely ().guessed ();
5670 edge latch_edge
5671 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5672 latch_edge->probability = profile_probability::likely ().guessed ();
5674 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5675 skip_edge->probability = profile_probability::unlikely ().guessed ();
5676 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5677 loop_entry_edge->probability
5678 = profile_probability::likely ().guessed ();
5680 gsi = gsi_for_stmt (cont_stmt);
5683 /* Increment offset. */
5684 if (gimple_in_ssa_p (cfun))
5685 expr = build2 (plus_code, iter_type, offset,
5686 fold_convert (plus_type, step));
5687 else
5688 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5689 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5690 true, GSI_SAME_STMT);
5691 ass = gimple_build_assign (offset_incr, expr);
5692 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5693 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5694 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5696 /* Remove the GIMPLE_OMP_CONTINUE. */
5697 gsi_remove (&gsi, true);
5699 /* Fixup edges from cont_bb. */
5700 be = BRANCH_EDGE (cont_bb);
5701 fte = FALLTHRU_EDGE (cont_bb);
5702 be->flags |= EDGE_TRUE_VALUE;
5703 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5705 if (chunking)
5707 /* Split the beginning of exit_bb to make bottom_bb. We
5708 need to insert a nop at the start, because splitting is
5709 after a stmt, not before. */
5710 gsi = gsi_start_bb (exit_bb);
5711 stmt = gimple_build_nop ();
5712 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5713 split = split_block (exit_bb, stmt);
5714 bottom_bb = split->src;
5715 exit_bb = split->dest;
5716 gsi = gsi_last_bb (bottom_bb);
5718 /* Chunk increment and test goes into bottom_bb. */
5719 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5720 build_int_cst (diff_type, 1));
5721 ass = gimple_build_assign (chunk_no, expr);
5722 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5724 /* Chunk test at end of bottom_bb. */
5725 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5726 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5727 GSI_CONTINUE_LINKING);
5729 /* Fixup edges from bottom_bb. */
5730 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5731 split->probability = profile_probability::unlikely ().guessed ();
5732 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5733 latch_edge->probability = profile_probability::likely ().guessed ();
5737 gsi = gsi_last_bb (exit_bb);
5738 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5739 loc = gimple_location (gsi_stmt (gsi));
5741 if (!gimple_in_ssa_p (cfun))
5743 /* Insert the final value of V, in case it is live. This is the
5744 value for the only thread that survives past the join. */
5745 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5746 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5747 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5748 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5749 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5750 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5751 true, GSI_SAME_STMT);
5752 ass = gimple_build_assign (v, expr);
5753 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5756 /* Remove the OMP_RETURN. */
5757 gsi_remove (&gsi, true);
5759 if (cont_bb)
5761 /* We now have one, two or three nested loops. Update the loop
5762 structures. */
5763 struct loop *parent = entry_bb->loop_father;
5764 struct loop *body = body_bb->loop_father;
5766 if (chunking)
5768 struct loop *chunk_loop = alloc_loop ();
5769 chunk_loop->header = head_bb;
5770 chunk_loop->latch = bottom_bb;
5771 add_loop (chunk_loop, parent);
5772 parent = chunk_loop;
5774 else if (parent != body)
5776 gcc_assert (body->header == body_bb);
5777 gcc_assert (body->latch == cont_bb
5778 || single_pred (body->latch) == cont_bb);
5779 parent = NULL;
5782 if (parent)
5784 struct loop *body_loop = alloc_loop ();
5785 body_loop->header = body_bb;
5786 body_loop->latch = cont_bb;
5787 add_loop (body_loop, parent);
5789 if (fd->tiling)
5791 /* Insert tiling's element loop. */
5792 struct loop *inner_loop = alloc_loop ();
5793 inner_loop->header = elem_body_bb;
5794 inner_loop->latch = elem_cont_bb;
5795 add_loop (inner_loop, body_loop);
5801 /* Expand the OMP loop defined by REGION. */
5803 static void
5804 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5806 struct omp_for_data fd;
5807 struct omp_for_data_loop *loops;
5809 loops
5810 = (struct omp_for_data_loop *)
5811 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5812 * sizeof (struct omp_for_data_loop));
5813 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5814 &fd, loops);
5815 region->sched_kind = fd.sched_kind;
5816 region->sched_modifiers = fd.sched_modifiers;
5818 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5819 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5820 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5821 if (region->cont)
5823 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5824 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5825 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5827 else
5828 /* If there isn't a continue then this is a degerate case where
5829 the introduction of abnormal edges during lowering will prevent
5830 original loops from being detected. Fix that up. */
5831 loops_state_set (LOOPS_NEED_FIXUP);
5833 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5834 expand_omp_simd (region, &fd);
5835 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5836 expand_cilk_for (region, &fd);
5837 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5839 gcc_assert (!inner_stmt);
5840 expand_oacc_for (region, &fd);
5842 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5844 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5845 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5846 else
5847 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5849 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5850 && !fd.have_ordered)
5852 if (fd.chunk_size == NULL)
5853 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5854 else
5855 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5857 else
5859 int fn_index, start_ix, next_ix;
5861 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5862 == GF_OMP_FOR_KIND_FOR);
5863 if (fd.chunk_size == NULL
5864 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5865 fd.chunk_size = integer_zero_node;
5866 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5867 switch (fd.sched_kind)
5869 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5870 fn_index = 3;
5871 break;
5872 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5873 case OMP_CLAUSE_SCHEDULE_GUIDED:
5874 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5875 && !fd.ordered
5876 && !fd.have_ordered)
5878 fn_index = 3 + fd.sched_kind;
5879 break;
5881 /* FALLTHRU */
5882 default:
5883 fn_index = fd.sched_kind;
5884 break;
5886 if (!fd.ordered)
5887 fn_index += fd.have_ordered * 6;
5888 if (fd.ordered)
5889 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5890 else
5891 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5892 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5893 if (fd.iter_type == long_long_unsigned_type_node)
5895 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5896 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5897 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5898 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5900 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5901 (enum built_in_function) next_ix, inner_stmt);
5904 if (gimple_in_ssa_p (cfun))
5905 update_ssa (TODO_update_ssa_only_virtuals);
5908 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5910 v = GOMP_sections_start (n);
5912 switch (v)
5914 case 0:
5915 goto L2;
5916 case 1:
5917 section 1;
5918 goto L1;
5919 case 2:
5921 case n:
5923 default:
5924 abort ();
5927 v = GOMP_sections_next ();
5928 goto L0;
5930 reduction;
5932 If this is a combined parallel sections, replace the call to
5933 GOMP_sections_start with call to GOMP_sections_next. */
5935 static void
5936 expand_omp_sections (struct omp_region *region)
5938 tree t, u, vin = NULL, vmain, vnext, l2;
5939 unsigned len;
5940 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5941 gimple_stmt_iterator si, switch_si;
5942 gomp_sections *sections_stmt;
5943 gimple *stmt;
5944 gomp_continue *cont;
5945 edge_iterator ei;
5946 edge e;
5947 struct omp_region *inner;
5948 unsigned i, casei;
5949 bool exit_reachable = region->cont != NULL;
5951 gcc_assert (region->exit != NULL);
5952 entry_bb = region->entry;
5953 l0_bb = single_succ (entry_bb);
5954 l1_bb = region->cont;
5955 l2_bb = region->exit;
5956 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5957 l2 = gimple_block_label (l2_bb);
5958 else
5960 /* This can happen if there are reductions. */
5961 len = EDGE_COUNT (l0_bb->succs);
5962 gcc_assert (len > 0);
5963 e = EDGE_SUCC (l0_bb, len - 1);
5964 si = gsi_last_bb (e->dest);
5965 l2 = NULL_TREE;
5966 if (gsi_end_p (si)
5967 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5968 l2 = gimple_block_label (e->dest);
5969 else
5970 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5972 si = gsi_last_bb (e->dest);
5973 if (gsi_end_p (si)
5974 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5976 l2 = gimple_block_label (e->dest);
5977 break;
5981 if (exit_reachable)
5982 default_bb = create_empty_bb (l1_bb->prev_bb);
5983 else
5984 default_bb = create_empty_bb (l0_bb);
5986 /* We will build a switch() with enough cases for all the
5987 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5988 and a default case to abort if something goes wrong. */
5989 len = EDGE_COUNT (l0_bb->succs);
5991 /* Use vec::quick_push on label_vec throughout, since we know the size
5992 in advance. */
5993 auto_vec<tree> label_vec (len);
5995 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5996 GIMPLE_OMP_SECTIONS statement. */
5997 si = gsi_last_bb (entry_bb);
5998 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5999 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6000 vin = gimple_omp_sections_control (sections_stmt);
6001 if (!is_combined_parallel (region))
6003 /* If we are not inside a combined parallel+sections region,
6004 call GOMP_sections_start. */
6005 t = build_int_cst (unsigned_type_node, len - 1);
6006 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6007 stmt = gimple_build_call (u, 1, t);
6009 else
6011 /* Otherwise, call GOMP_sections_next. */
6012 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6013 stmt = gimple_build_call (u, 0);
6015 gimple_call_set_lhs (stmt, vin);
6016 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6017 gsi_remove (&si, true);
6019 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6020 L0_BB. */
6021 switch_si = gsi_last_bb (l0_bb);
6022 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6023 if (exit_reachable)
6025 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6026 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6027 vmain = gimple_omp_continue_control_use (cont);
6028 vnext = gimple_omp_continue_control_def (cont);
6030 else
6032 vmain = vin;
6033 vnext = NULL_TREE;
6036 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6037 label_vec.quick_push (t);
6038 i = 1;
6040 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6041 for (inner = region->inner, casei = 1;
6042 inner;
6043 inner = inner->next, i++, casei++)
6045 basic_block s_entry_bb, s_exit_bb;
6047 /* Skip optional reduction region. */
6048 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6050 --i;
6051 --casei;
6052 continue;
6055 s_entry_bb = inner->entry;
6056 s_exit_bb = inner->exit;
6058 t = gimple_block_label (s_entry_bb);
6059 u = build_int_cst (unsigned_type_node, casei);
6060 u = build_case_label (u, NULL, t);
6061 label_vec.quick_push (u);
6063 si = gsi_last_bb (s_entry_bb);
6064 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6065 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6066 gsi_remove (&si, true);
6067 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6069 if (s_exit_bb == NULL)
6070 continue;
6072 si = gsi_last_bb (s_exit_bb);
6073 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6074 gsi_remove (&si, true);
6076 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6079 /* Error handling code goes in DEFAULT_BB. */
6080 t = gimple_block_label (default_bb);
6081 u = build_case_label (NULL, NULL, t);
6082 make_edge (l0_bb, default_bb, 0);
6083 add_bb_to_loop (default_bb, current_loops->tree_root);
6085 stmt = gimple_build_switch (vmain, u, label_vec);
6086 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6087 gsi_remove (&switch_si, true);
6089 si = gsi_start_bb (default_bb);
6090 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6091 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6093 if (exit_reachable)
6095 tree bfn_decl;
6097 /* Code to get the next section goes in L1_BB. */
6098 si = gsi_last_bb (l1_bb);
6099 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6101 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6102 stmt = gimple_build_call (bfn_decl, 0);
6103 gimple_call_set_lhs (stmt, vnext);
6104 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6105 gsi_remove (&si, true);
6107 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6110 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6111 si = gsi_last_bb (l2_bb);
6112 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6113 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6114 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6115 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6116 else
6117 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6118 stmt = gimple_build_call (t, 0);
6119 if (gimple_omp_return_lhs (gsi_stmt (si)))
6120 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6121 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6122 gsi_remove (&si, true);
6124 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6127 /* Expand code for an OpenMP single directive. We've already expanded
6128 much of the code, here we simply place the GOMP_barrier call. */
6130 static void
6131 expand_omp_single (struct omp_region *region)
6133 basic_block entry_bb, exit_bb;
6134 gimple_stmt_iterator si;
6136 entry_bb = region->entry;
6137 exit_bb = region->exit;
6139 si = gsi_last_bb (entry_bb);
6140 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6141 gsi_remove (&si, true);
6142 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6144 si = gsi_last_bb (exit_bb);
6145 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6147 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6148 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6150 gsi_remove (&si, true);
6151 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6154 /* Generic expansion for OpenMP synchronization directives: master,
6155 ordered and critical. All we need to do here is remove the entry
6156 and exit markers for REGION. */
6158 static void
6159 expand_omp_synch (struct omp_region *region)
6161 basic_block entry_bb, exit_bb;
6162 gimple_stmt_iterator si;
6164 entry_bb = region->entry;
6165 exit_bb = region->exit;
6167 si = gsi_last_bb (entry_bb);
6168 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6169 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6170 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6171 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6172 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6173 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6174 gsi_remove (&si, true);
6175 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6177 if (exit_bb)
6179 si = gsi_last_bb (exit_bb);
6180 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6181 gsi_remove (&si, true);
6182 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6186 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6187 operation as a normal volatile load. */
6189 static bool
6190 expand_omp_atomic_load (basic_block load_bb, tree addr,
6191 tree loaded_val, int index)
6193 enum built_in_function tmpbase;
6194 gimple_stmt_iterator gsi;
6195 basic_block store_bb;
6196 location_t loc;
6197 gimple *stmt;
6198 tree decl, call, type, itype;
6200 gsi = gsi_last_bb (load_bb);
6201 stmt = gsi_stmt (gsi);
6202 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6203 loc = gimple_location (stmt);
6205 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6206 is smaller than word size, then expand_atomic_load assumes that the load
6207 is atomic. We could avoid the builtin entirely in this case. */
6209 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6210 decl = builtin_decl_explicit (tmpbase);
6211 if (decl == NULL_TREE)
6212 return false;
6214 type = TREE_TYPE (loaded_val);
6215 itype = TREE_TYPE (TREE_TYPE (decl));
6217 call = build_call_expr_loc (loc, decl, 2, addr,
6218 build_int_cst (NULL,
6219 gimple_omp_atomic_seq_cst_p (stmt)
6220 ? MEMMODEL_SEQ_CST
6221 : MEMMODEL_RELAXED));
6222 if (!useless_type_conversion_p (type, itype))
6223 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6224 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6226 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6227 gsi_remove (&gsi, true);
6229 store_bb = single_succ (load_bb);
6230 gsi = gsi_last_bb (store_bb);
6231 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6232 gsi_remove (&gsi, true);
6234 if (gimple_in_ssa_p (cfun))
6235 update_ssa (TODO_update_ssa_no_phi);
6237 return true;
6240 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6241 operation as a normal volatile store. */
6243 static bool
6244 expand_omp_atomic_store (basic_block load_bb, tree addr,
6245 tree loaded_val, tree stored_val, int index)
6247 enum built_in_function tmpbase;
6248 gimple_stmt_iterator gsi;
6249 basic_block store_bb = single_succ (load_bb);
6250 location_t loc;
6251 gimple *stmt;
6252 tree decl, call, type, itype;
6253 machine_mode imode;
6254 bool exchange;
6256 gsi = gsi_last_bb (load_bb);
6257 stmt = gsi_stmt (gsi);
6258 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6260 /* If the load value is needed, then this isn't a store but an exchange. */
6261 exchange = gimple_omp_atomic_need_value_p (stmt);
6263 gsi = gsi_last_bb (store_bb);
6264 stmt = gsi_stmt (gsi);
6265 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6266 loc = gimple_location (stmt);
6268 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6269 is smaller than word size, then expand_atomic_store assumes that the store
6270 is atomic. We could avoid the builtin entirely in this case. */
6272 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6273 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6274 decl = builtin_decl_explicit (tmpbase);
6275 if (decl == NULL_TREE)
6276 return false;
6278 type = TREE_TYPE (stored_val);
6280 /* Dig out the type of the function's second argument. */
6281 itype = TREE_TYPE (decl);
6282 itype = TYPE_ARG_TYPES (itype);
6283 itype = TREE_CHAIN (itype);
6284 itype = TREE_VALUE (itype);
6285 imode = TYPE_MODE (itype);
6287 if (exchange && !can_atomic_exchange_p (imode, true))
6288 return false;
6290 if (!useless_type_conversion_p (itype, type))
6291 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6292 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6293 build_int_cst (NULL,
6294 gimple_omp_atomic_seq_cst_p (stmt)
6295 ? MEMMODEL_SEQ_CST
6296 : MEMMODEL_RELAXED));
6297 if (exchange)
6299 if (!useless_type_conversion_p (type, itype))
6300 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6301 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6304 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6305 gsi_remove (&gsi, true);
6307 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6308 gsi = gsi_last_bb (load_bb);
6309 gsi_remove (&gsi, true);
6311 if (gimple_in_ssa_p (cfun))
6312 update_ssa (TODO_update_ssa_no_phi);
6314 return true;
6317 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6318 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6319 size of the data type, and thus usable to find the index of the builtin
6320 decl. Returns false if the expression is not of the proper form. */
6322 static bool
6323 expand_omp_atomic_fetch_op (basic_block load_bb,
6324 tree addr, tree loaded_val,
6325 tree stored_val, int index)
6327 enum built_in_function oldbase, newbase, tmpbase;
6328 tree decl, itype, call;
6329 tree lhs, rhs;
6330 basic_block store_bb = single_succ (load_bb);
6331 gimple_stmt_iterator gsi;
6332 gimple *stmt;
6333 location_t loc;
6334 enum tree_code code;
6335 bool need_old, need_new;
6336 machine_mode imode;
6337 bool seq_cst;
6339 /* We expect to find the following sequences:
6341 load_bb:
6342 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6344 store_bb:
6345 val = tmp OP something; (or: something OP tmp)
6346 GIMPLE_OMP_STORE (val)
6348 ???FIXME: Allow a more flexible sequence.
6349 Perhaps use data flow to pick the statements.
6353 gsi = gsi_after_labels (store_bb);
6354 stmt = gsi_stmt (gsi);
6355 loc = gimple_location (stmt);
6356 if (!is_gimple_assign (stmt))
6357 return false;
6358 gsi_next (&gsi);
6359 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6360 return false;
6361 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6362 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6363 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6364 gcc_checking_assert (!need_old || !need_new);
6366 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6367 return false;
6369 /* Check for one of the supported fetch-op operations. */
6370 code = gimple_assign_rhs_code (stmt);
6371 switch (code)
6373 case PLUS_EXPR:
6374 case POINTER_PLUS_EXPR:
6375 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6376 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6377 break;
6378 case MINUS_EXPR:
6379 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6380 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6381 break;
6382 case BIT_AND_EXPR:
6383 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6384 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6385 break;
6386 case BIT_IOR_EXPR:
6387 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6388 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6389 break;
6390 case BIT_XOR_EXPR:
6391 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6392 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6393 break;
6394 default:
6395 return false;
6398 /* Make sure the expression is of the proper form. */
6399 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6400 rhs = gimple_assign_rhs2 (stmt);
6401 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6402 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6403 rhs = gimple_assign_rhs1 (stmt);
6404 else
6405 return false;
6407 tmpbase = ((enum built_in_function)
6408 ((need_new ? newbase : oldbase) + index + 1));
6409 decl = builtin_decl_explicit (tmpbase);
6410 if (decl == NULL_TREE)
6411 return false;
6412 itype = TREE_TYPE (TREE_TYPE (decl));
6413 imode = TYPE_MODE (itype);
6415 /* We could test all of the various optabs involved, but the fact of the
6416 matter is that (with the exception of i486 vs i586 and xadd) all targets
6417 that support any atomic operaton optab also implements compare-and-swap.
6418 Let optabs.c take care of expanding any compare-and-swap loop. */
6419 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6420 return false;
6422 gsi = gsi_last_bb (load_bb);
6423 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6425 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6426 It only requires that the operation happen atomically. Thus we can
6427 use the RELAXED memory model. */
6428 call = build_call_expr_loc (loc, decl, 3, addr,
6429 fold_convert_loc (loc, itype, rhs),
6430 build_int_cst (NULL,
6431 seq_cst ? MEMMODEL_SEQ_CST
6432 : MEMMODEL_RELAXED));
6434 if (need_old || need_new)
6436 lhs = need_old ? loaded_val : stored_val;
6437 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6438 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6440 else
6441 call = fold_convert_loc (loc, void_type_node, call);
6442 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6443 gsi_remove (&gsi, true);
6445 gsi = gsi_last_bb (store_bb);
6446 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6447 gsi_remove (&gsi, true);
6448 gsi = gsi_last_bb (store_bb);
6449 stmt = gsi_stmt (gsi);
6450 gsi_remove (&gsi, true);
6452 if (gimple_in_ssa_p (cfun))
6454 release_defs (stmt);
6455 update_ssa (TODO_update_ssa_no_phi);
6458 return true;
6461 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6463 oldval = *addr;
6464 repeat:
6465 newval = rhs; // with oldval replacing *addr in rhs
6466 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6467 if (oldval != newval)
6468 goto repeat;
6470 INDEX is log2 of the size of the data type, and thus usable to find the
6471 index of the builtin decl. */
6473 static bool
6474 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6475 tree addr, tree loaded_val, tree stored_val,
6476 int index)
6478 tree loadedi, storedi, initial, new_storedi, old_vali;
6479 tree type, itype, cmpxchg, iaddr;
6480 gimple_stmt_iterator si;
6481 basic_block loop_header = single_succ (load_bb);
6482 gimple *phi, *stmt;
6483 edge e;
6484 enum built_in_function fncode;
6486 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6487 order to use the RELAXED memory model effectively. */
6488 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6489 + index + 1);
6490 cmpxchg = builtin_decl_explicit (fncode);
6491 if (cmpxchg == NULL_TREE)
6492 return false;
6493 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6494 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6496 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6497 || !can_atomic_load_p (TYPE_MODE (itype)))
6498 return false;
6500 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6501 si = gsi_last_bb (load_bb);
6502 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6504 /* For floating-point values, we'll need to view-convert them to integers
6505 so that we can perform the atomic compare and swap. Simplify the
6506 following code by always setting up the "i"ntegral variables. */
6507 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6509 tree iaddr_val;
6511 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6512 true));
6513 iaddr_val
6514 = force_gimple_operand_gsi (&si,
6515 fold_convert (TREE_TYPE (iaddr), addr),
6516 false, NULL_TREE, true, GSI_SAME_STMT);
6517 stmt = gimple_build_assign (iaddr, iaddr_val);
6518 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6519 loadedi = create_tmp_var (itype);
6520 if (gimple_in_ssa_p (cfun))
6521 loadedi = make_ssa_name (loadedi);
6523 else
6525 iaddr = addr;
6526 loadedi = loaded_val;
6529 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6530 tree loaddecl = builtin_decl_explicit (fncode);
6531 if (loaddecl)
6532 initial
6533 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6534 build_call_expr (loaddecl, 2, iaddr,
6535 build_int_cst (NULL_TREE,
6536 MEMMODEL_RELAXED)));
6537 else
6538 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6539 build_int_cst (TREE_TYPE (iaddr), 0));
6541 initial
6542 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6543 GSI_SAME_STMT);
6545 /* Move the value to the LOADEDI temporary. */
6546 if (gimple_in_ssa_p (cfun))
6548 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6549 phi = create_phi_node (loadedi, loop_header);
6550 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6551 initial);
6553 else
6554 gsi_insert_before (&si,
6555 gimple_build_assign (loadedi, initial),
6556 GSI_SAME_STMT);
6557 if (loadedi != loaded_val)
6559 gimple_stmt_iterator gsi2;
6560 tree x;
6562 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6563 gsi2 = gsi_start_bb (loop_header);
6564 if (gimple_in_ssa_p (cfun))
6566 gassign *stmt;
6567 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6568 true, GSI_SAME_STMT);
6569 stmt = gimple_build_assign (loaded_val, x);
6570 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6572 else
6574 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6575 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6576 true, GSI_SAME_STMT);
6579 gsi_remove (&si, true);
6581 si = gsi_last_bb (store_bb);
6582 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6584 if (iaddr == addr)
6585 storedi = stored_val;
6586 else
6587 storedi
6588 = force_gimple_operand_gsi (&si,
6589 build1 (VIEW_CONVERT_EXPR, itype,
6590 stored_val), true, NULL_TREE, true,
6591 GSI_SAME_STMT);
6593 /* Build the compare&swap statement. */
6594 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6595 new_storedi = force_gimple_operand_gsi (&si,
6596 fold_convert (TREE_TYPE (loadedi),
6597 new_storedi),
6598 true, NULL_TREE,
6599 true, GSI_SAME_STMT);
6601 if (gimple_in_ssa_p (cfun))
6602 old_vali = loadedi;
6603 else
6605 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6606 stmt = gimple_build_assign (old_vali, loadedi);
6607 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6609 stmt = gimple_build_assign (loadedi, new_storedi);
6610 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6613 /* Note that we always perform the comparison as an integer, even for
6614 floating point. This allows the atomic operation to properly
6615 succeed even with NaNs and -0.0. */
6616 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6617 stmt = gimple_build_cond_empty (ne);
6618 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6620 /* Update cfg. */
6621 e = single_succ_edge (store_bb);
6622 e->flags &= ~EDGE_FALLTHRU;
6623 e->flags |= EDGE_FALSE_VALUE;
6624 /* Expect no looping. */
6625 e->probability = profile_probability::guessed_always ();
6627 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6628 e->probability = profile_probability::guessed_never ();
6630 /* Copy the new value to loadedi (we already did that before the condition
6631 if we are not in SSA). */
6632 if (gimple_in_ssa_p (cfun))
6634 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6635 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6638 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6639 gsi_remove (&si, true);
6641 struct loop *loop = alloc_loop ();
6642 loop->header = loop_header;
6643 loop->latch = store_bb;
6644 add_loop (loop, loop_header->loop_father);
6646 if (gimple_in_ssa_p (cfun))
6647 update_ssa (TODO_update_ssa_no_phi);
6649 return true;
6652 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6654 GOMP_atomic_start ();
6655 *addr = rhs;
6656 GOMP_atomic_end ();
6658 The result is not globally atomic, but works so long as all parallel
6659 references are within #pragma omp atomic directives. According to
6660 responses received from omp@openmp.org, appears to be within spec.
6661 Which makes sense, since that's how several other compilers handle
6662 this situation as well.
6663 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6664 expanding. STORED_VAL is the operand of the matching
6665 GIMPLE_OMP_ATOMIC_STORE.
6667 We replace
6668 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6669 loaded_val = *addr;
6671 and replace
6672 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6673 *addr = stored_val;
6676 static bool
6677 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6678 tree addr, tree loaded_val, tree stored_val)
6680 gimple_stmt_iterator si;
6681 gassign *stmt;
6682 tree t;
6684 si = gsi_last_bb (load_bb);
6685 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6687 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6688 t = build_call_expr (t, 0);
6689 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6691 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6692 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6693 gsi_remove (&si, true);
6695 si = gsi_last_bb (store_bb);
6696 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6698 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6699 stored_val);
6700 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6702 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6703 t = build_call_expr (t, 0);
6704 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6705 gsi_remove (&si, true);
6707 if (gimple_in_ssa_p (cfun))
6708 update_ssa (TODO_update_ssa_no_phi);
6709 return true;
6712 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6713 using expand_omp_atomic_fetch_op. If it failed, we try to
6714 call expand_omp_atomic_pipeline, and if it fails too, the
6715 ultimate fallback is wrapping the operation in a mutex
6716 (expand_omp_atomic_mutex). REGION is the atomic region built
6717 by build_omp_regions_1(). */
6719 static void
6720 expand_omp_atomic (struct omp_region *region)
6722 basic_block load_bb = region->entry, store_bb = region->exit;
6723 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6724 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6725 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6726 tree addr = gimple_omp_atomic_load_rhs (load);
6727 tree stored_val = gimple_omp_atomic_store_val (store);
6728 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6729 HOST_WIDE_INT index;
6731 /* Make sure the type is one of the supported sizes. */
6732 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6733 index = exact_log2 (index);
6734 if (index >= 0 && index <= 4)
6736 unsigned int align = TYPE_ALIGN_UNIT (type);
6738 /* __sync builtins require strict data alignment. */
6739 if (exact_log2 (align) >= index)
6741 /* Atomic load. */
6742 if (loaded_val == stored_val
6743 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6744 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6745 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6746 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6747 return;
6749 /* Atomic store. */
6750 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6751 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6752 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6753 && store_bb == single_succ (load_bb)
6754 && first_stmt (store_bb) == store
6755 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6756 stored_val, index))
6757 return;
6759 /* When possible, use specialized atomic update functions. */
6760 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6761 && store_bb == single_succ (load_bb)
6762 && expand_omp_atomic_fetch_op (load_bb, addr,
6763 loaded_val, stored_val, index))
6764 return;
6766 /* If we don't have specialized __sync builtins, try and implement
6767 as a compare and swap loop. */
6768 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6769 loaded_val, stored_val, index))
6770 return;
6774 /* The ultimate fallback is wrapping the operation in a mutex. */
6775 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6778 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6779 at REGION_EXIT. */
6781 static void
6782 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6783 basic_block region_exit)
6785 struct loop *outer = region_entry->loop_father;
6786 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6788 /* Don't parallelize the kernels region if it contains more than one outer
6789 loop. */
6790 unsigned int nr_outer_loops = 0;
6791 struct loop *single_outer = NULL;
6792 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6794 gcc_assert (loop_outer (loop) == outer);
6796 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6797 continue;
6799 if (region_exit != NULL
6800 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6801 continue;
6803 nr_outer_loops++;
6804 single_outer = loop;
6806 if (nr_outer_loops != 1)
6807 return;
6809 for (struct loop *loop = single_outer->inner;
6810 loop != NULL;
6811 loop = loop->inner)
6812 if (loop->next)
6813 return;
6815 /* Mark the loops in the region. */
6816 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6817 loop->in_oacc_kernels_region = true;
6820 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6822 struct GTY(()) grid_launch_attributes_trees
6824 tree kernel_dim_array_type;
6825 tree kernel_lattrs_dimnum_decl;
6826 tree kernel_lattrs_grid_decl;
6827 tree kernel_lattrs_group_decl;
6828 tree kernel_launch_attributes_type;
6831 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6833 /* Create types used to pass kernel launch attributes to target. */
6835 static void
6836 grid_create_kernel_launch_attr_types (void)
6838 if (grid_attr_trees)
6839 return;
6840 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6842 tree dim_arr_index_type
6843 = build_index_type (build_int_cst (integer_type_node, 2));
6844 grid_attr_trees->kernel_dim_array_type
6845 = build_array_type (uint32_type_node, dim_arr_index_type);
6847 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6848 grid_attr_trees->kernel_lattrs_dimnum_decl
6849 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6850 uint32_type_node);
6851 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6853 grid_attr_trees->kernel_lattrs_grid_decl
6854 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6855 grid_attr_trees->kernel_dim_array_type);
6856 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6857 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6858 grid_attr_trees->kernel_lattrs_group_decl
6859 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6860 grid_attr_trees->kernel_dim_array_type);
6861 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6862 = grid_attr_trees->kernel_lattrs_grid_decl;
6863 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6864 "__gomp_kernel_launch_attributes",
6865 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6868 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6869 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6870 of type uint32_type_node. */
6872 static void
6873 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6874 tree fld_decl, int index, tree value)
6876 tree ref = build4 (ARRAY_REF, uint32_type_node,
6877 build3 (COMPONENT_REF,
6878 grid_attr_trees->kernel_dim_array_type,
6879 range_var, fld_decl, NULL_TREE),
6880 build_int_cst (integer_type_node, index),
6881 NULL_TREE, NULL_TREE);
6882 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6885 /* Return a tree representation of a pointer to a structure with grid and
6886 work-group size information. Statements filling that information will be
6887 inserted before GSI, TGT_STMT is the target statement which has the
6888 necessary information in it. */
6890 static tree
6891 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6892 gomp_target *tgt_stmt)
6894 grid_create_kernel_launch_attr_types ();
6895 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6896 "__kernel_launch_attrs");
6898 unsigned max_dim = 0;
6899 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6900 clause;
6901 clause = OMP_CLAUSE_CHAIN (clause))
6903 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6904 continue;
6906 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6907 max_dim = MAX (dim, max_dim);
6909 grid_insert_store_range_dim (gsi, lattrs,
6910 grid_attr_trees->kernel_lattrs_grid_decl,
6911 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6912 grid_insert_store_range_dim (gsi, lattrs,
6913 grid_attr_trees->kernel_lattrs_group_decl,
6914 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6917 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6918 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6919 gcc_checking_assert (max_dim <= 2);
6920 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6921 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6922 GSI_SAME_STMT);
6923 TREE_ADDRESSABLE (lattrs) = 1;
6924 return build_fold_addr_expr (lattrs);
6927 /* Build target argument identifier from the DEVICE identifier, value
6928 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6930 static tree
6931 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6933 tree t = build_int_cst (integer_type_node, device);
6934 if (subseqent_param)
6935 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6936 build_int_cst (integer_type_node,
6937 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6938 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6939 build_int_cst (integer_type_node, id));
6940 return t;
6943 /* Like above but return it in type that can be directly stored as an element
6944 of the argument array. */
6946 static tree
6947 get_target_argument_identifier (int device, bool subseqent_param, int id)
6949 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6950 return fold_convert (ptr_type_node, t);
6953 /* Return a target argument consisting of DEVICE identifier, value identifier
6954 ID, and the actual VALUE. */
6956 static tree
6957 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6958 tree value)
6960 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6961 fold_convert (integer_type_node, value),
6962 build_int_cst (unsigned_type_node,
6963 GOMP_TARGET_ARG_VALUE_SHIFT));
6964 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6965 get_target_argument_identifier_1 (device, false, id));
6966 t = fold_convert (ptr_type_node, t);
6967 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6970 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6971 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6972 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6973 arguments. */
6975 static void
6976 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6977 int id, tree value, vec <tree> *args)
6979 if (tree_fits_shwi_p (value)
6980 && tree_to_shwi (value) > -(1 << 15)
6981 && tree_to_shwi (value) < (1 << 15))
6982 args->quick_push (get_target_argument_value (gsi, device, id, value));
6983 else
6985 args->quick_push (get_target_argument_identifier (device, true, id));
6986 value = fold_convert (ptr_type_node, value);
6987 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6988 GSI_SAME_STMT);
6989 args->quick_push (value);
6993 /* Create an array of arguments that is then passed to GOMP_target. */
6995 static tree
6996 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6998 auto_vec <tree, 6> args;
6999 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7000 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7001 if (c)
7002 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7003 else
7004 t = integer_minus_one_node;
7005 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7006 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7008 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7009 if (c)
7010 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7011 else
7012 t = integer_minus_one_node;
7013 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7014 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7015 &args);
7017 /* Add HSA-specific grid sizes, if available. */
7018 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7019 OMP_CLAUSE__GRIDDIM_))
7021 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7022 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7023 args.quick_push (t);
7024 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7027 /* Produce more, perhaps device specific, arguments here. */
7029 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7030 args.length () + 1),
7031 ".omp_target_args");
7032 for (unsigned i = 0; i < args.length (); i++)
7034 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7035 build_int_cst (integer_type_node, i),
7036 NULL_TREE, NULL_TREE);
7037 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7038 GSI_SAME_STMT);
7040 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7041 build_int_cst (integer_type_node, args.length ()),
7042 NULL_TREE, NULL_TREE);
7043 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7044 GSI_SAME_STMT);
7045 TREE_ADDRESSABLE (argarray) = 1;
7046 return build_fold_addr_expr (argarray);
7049 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7051 static void
7052 expand_omp_target (struct omp_region *region)
7054 basic_block entry_bb, exit_bb, new_bb;
7055 struct function *child_cfun;
7056 tree child_fn, block, t;
7057 gimple_stmt_iterator gsi;
7058 gomp_target *entry_stmt;
7059 gimple *stmt;
7060 edge e;
7061 bool offloaded, data_region;
7063 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7064 new_bb = region->entry;
7066 offloaded = is_gimple_omp_offloaded (entry_stmt);
7067 switch (gimple_omp_target_kind (entry_stmt))
7069 case GF_OMP_TARGET_KIND_REGION:
7070 case GF_OMP_TARGET_KIND_UPDATE:
7071 case GF_OMP_TARGET_KIND_ENTER_DATA:
7072 case GF_OMP_TARGET_KIND_EXIT_DATA:
7073 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7074 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7075 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7076 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7077 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7078 data_region = false;
7079 break;
7080 case GF_OMP_TARGET_KIND_DATA:
7081 case GF_OMP_TARGET_KIND_OACC_DATA:
7082 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7083 data_region = true;
7084 break;
7085 default:
7086 gcc_unreachable ();
7089 child_fn = NULL_TREE;
7090 child_cfun = NULL;
7091 if (offloaded)
7093 child_fn = gimple_omp_target_child_fn (entry_stmt);
7094 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7097 /* Supported by expand_omp_taskreg, but not here. */
7098 if (child_cfun != NULL)
7099 gcc_checking_assert (!child_cfun->cfg);
7100 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7102 entry_bb = region->entry;
7103 exit_bb = region->exit;
7105 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7107 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7109 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7110 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7111 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7112 DECL_ATTRIBUTES (child_fn)
7113 = tree_cons (get_identifier ("oacc kernels"),
7114 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7117 if (offloaded)
7119 unsigned srcidx, dstidx, num;
7121 /* If the offloading region needs data sent from the parent
7122 function, then the very first statement (except possible
7123 tree profile counter updates) of the offloading body
7124 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7125 &.OMP_DATA_O is passed as an argument to the child function,
7126 we need to replace it with the argument as seen by the child
7127 function.
7129 In most cases, this will end up being the identity assignment
7130 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7131 a function call that has been inlined, the original PARM_DECL
7132 .OMP_DATA_I may have been converted into a different local
7133 variable. In which case, we need to keep the assignment. */
7134 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7135 if (data_arg)
7137 basic_block entry_succ_bb = single_succ (entry_bb);
7138 gimple_stmt_iterator gsi;
7139 tree arg;
7140 gimple *tgtcopy_stmt = NULL;
7141 tree sender = TREE_VEC_ELT (data_arg, 0);
7143 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7145 gcc_assert (!gsi_end_p (gsi));
7146 stmt = gsi_stmt (gsi);
7147 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7148 continue;
7150 if (gimple_num_ops (stmt) == 2)
7152 tree arg = gimple_assign_rhs1 (stmt);
7154 /* We're ignoring the subcode because we're
7155 effectively doing a STRIP_NOPS. */
7157 if (TREE_CODE (arg) == ADDR_EXPR
7158 && TREE_OPERAND (arg, 0) == sender)
7160 tgtcopy_stmt = stmt;
7161 break;
7166 gcc_assert (tgtcopy_stmt != NULL);
7167 arg = DECL_ARGUMENTS (child_fn);
7169 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7170 gsi_remove (&gsi, true);
7173 /* Declare local variables needed in CHILD_CFUN. */
7174 block = DECL_INITIAL (child_fn);
7175 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7176 /* The gimplifier could record temporaries in the offloading block
7177 rather than in containing function's local_decls chain,
7178 which would mean cgraph missed finalizing them. Do it now. */
7179 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7180 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7181 varpool_node::finalize_decl (t);
7182 DECL_SAVED_TREE (child_fn) = NULL;
7183 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7184 gimple_set_body (child_fn, NULL);
7185 TREE_USED (block) = 1;
7187 /* Reset DECL_CONTEXT on function arguments. */
7188 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7189 DECL_CONTEXT (t) = child_fn;
7191 /* Split ENTRY_BB at GIMPLE_*,
7192 so that it can be moved to the child function. */
7193 gsi = gsi_last_bb (entry_bb);
7194 stmt = gsi_stmt (gsi);
7195 gcc_assert (stmt
7196 && gimple_code (stmt) == gimple_code (entry_stmt));
7197 e = split_block (entry_bb, stmt);
7198 gsi_remove (&gsi, true);
7199 entry_bb = e->dest;
7200 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7202 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7203 if (exit_bb)
7205 gsi = gsi_last_bb (exit_bb);
7206 gcc_assert (!gsi_end_p (gsi)
7207 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7208 stmt = gimple_build_return (NULL);
7209 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7210 gsi_remove (&gsi, true);
7213 /* Make sure to generate early debug for the function before
7214 outlining anything. */
7215 if (! gimple_in_ssa_p (cfun))
7216 (*debug_hooks->early_global_decl) (cfun->decl);
7218 /* Move the offloading region into CHILD_CFUN. */
7220 block = gimple_block (entry_stmt);
7222 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7223 if (exit_bb)
7224 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7225 /* When the OMP expansion process cannot guarantee an up-to-date
7226 loop tree arrange for the child function to fixup loops. */
7227 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7228 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7230 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7231 num = vec_safe_length (child_cfun->local_decls);
7232 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7234 t = (*child_cfun->local_decls)[srcidx];
7235 if (DECL_CONTEXT (t) == cfun->decl)
7236 continue;
7237 if (srcidx != dstidx)
7238 (*child_cfun->local_decls)[dstidx] = t;
7239 dstidx++;
7241 if (dstidx != num)
7242 vec_safe_truncate (child_cfun->local_decls, dstidx);
7244 /* Inform the callgraph about the new function. */
7245 child_cfun->curr_properties = cfun->curr_properties;
7246 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7247 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7248 cgraph_node *node = cgraph_node::get_create (child_fn);
7249 node->parallelized_function = 1;
7250 cgraph_node::add_new_function (child_fn, true);
7252 /* Add the new function to the offload table. */
7253 if (ENABLE_OFFLOADING)
7254 vec_safe_push (offload_funcs, child_fn);
7256 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7257 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7259 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7260 fixed in a following pass. */
7261 push_cfun (child_cfun);
7262 if (need_asm)
7263 assign_assembler_name_if_needed (child_fn);
7264 cgraph_edge::rebuild_edges ();
7266 /* Some EH regions might become dead, see PR34608. If
7267 pass_cleanup_cfg isn't the first pass to happen with the
7268 new child, these dead EH edges might cause problems.
7269 Clean them up now. */
7270 if (flag_exceptions)
7272 basic_block bb;
7273 bool changed = false;
7275 FOR_EACH_BB_FN (bb, cfun)
7276 changed |= gimple_purge_dead_eh_edges (bb);
7277 if (changed)
7278 cleanup_tree_cfg ();
7280 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7281 verify_loop_structure ();
7282 pop_cfun ();
7284 if (dump_file && !gimple_in_ssa_p (cfun))
7286 omp_any_child_fn_dumped = true;
7287 dump_function_header (dump_file, child_fn, dump_flags);
7288 dump_function_to_file (child_fn, dump_file, dump_flags);
7292 /* Emit a library call to launch the offloading region, or do data
7293 transfers. */
7294 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7295 enum built_in_function start_ix;
7296 location_t clause_loc;
7297 unsigned int flags_i = 0;
7299 switch (gimple_omp_target_kind (entry_stmt))
7301 case GF_OMP_TARGET_KIND_REGION:
7302 start_ix = BUILT_IN_GOMP_TARGET;
7303 break;
7304 case GF_OMP_TARGET_KIND_DATA:
7305 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7306 break;
7307 case GF_OMP_TARGET_KIND_UPDATE:
7308 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7309 break;
7310 case GF_OMP_TARGET_KIND_ENTER_DATA:
7311 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7312 break;
7313 case GF_OMP_TARGET_KIND_EXIT_DATA:
7314 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7315 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7316 break;
7317 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7318 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7319 start_ix = BUILT_IN_GOACC_PARALLEL;
7320 break;
7321 case GF_OMP_TARGET_KIND_OACC_DATA:
7322 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7323 start_ix = BUILT_IN_GOACC_DATA_START;
7324 break;
7325 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7326 start_ix = BUILT_IN_GOACC_UPDATE;
7327 break;
7328 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7329 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7330 break;
7331 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7332 start_ix = BUILT_IN_GOACC_DECLARE;
7333 break;
7334 default:
7335 gcc_unreachable ();
7338 clauses = gimple_omp_target_clauses (entry_stmt);
7340 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7341 library choose) and there is no conditional. */
7342 cond = NULL_TREE;
7343 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7345 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7346 if (c)
7347 cond = OMP_CLAUSE_IF_EXPR (c);
7349 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7350 if (c)
7352 /* Even if we pass it to all library function calls, it is currently only
7353 defined/used for the OpenMP target ones. */
7354 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7355 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7356 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7357 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7359 device = OMP_CLAUSE_DEVICE_ID (c);
7360 clause_loc = OMP_CLAUSE_LOCATION (c);
7362 else
7363 clause_loc = gimple_location (entry_stmt);
7365 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7366 if (c)
7367 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7369 /* Ensure 'device' is of the correct type. */
7370 device = fold_convert_loc (clause_loc, integer_type_node, device);
7372 /* If we found the clause 'if (cond)', build
7373 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7374 if (cond)
7376 cond = gimple_boolify (cond);
7378 basic_block cond_bb, then_bb, else_bb;
7379 edge e;
7380 tree tmp_var;
7382 tmp_var = create_tmp_var (TREE_TYPE (device));
7383 if (offloaded)
7384 e = split_block_after_labels (new_bb);
7385 else
7387 gsi = gsi_last_bb (new_bb);
7388 gsi_prev (&gsi);
7389 e = split_block (new_bb, gsi_stmt (gsi));
7391 cond_bb = e->src;
7392 new_bb = e->dest;
7393 remove_edge (e);
7395 then_bb = create_empty_bb (cond_bb);
7396 else_bb = create_empty_bb (then_bb);
7397 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7398 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7400 stmt = gimple_build_cond_empty (cond);
7401 gsi = gsi_last_bb (cond_bb);
7402 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7404 gsi = gsi_start_bb (then_bb);
7405 stmt = gimple_build_assign (tmp_var, device);
7406 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7408 gsi = gsi_start_bb (else_bb);
7409 stmt = gimple_build_assign (tmp_var,
7410 build_int_cst (integer_type_node,
7411 GOMP_DEVICE_HOST_FALLBACK));
7412 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7414 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7415 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7416 add_bb_to_loop (then_bb, cond_bb->loop_father);
7417 add_bb_to_loop (else_bb, cond_bb->loop_father);
7418 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7419 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7421 device = tmp_var;
7422 gsi = gsi_last_bb (new_bb);
7424 else
7426 gsi = gsi_last_bb (new_bb);
7427 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7428 true, GSI_SAME_STMT);
7431 t = gimple_omp_target_data_arg (entry_stmt);
7432 if (t == NULL)
7434 t1 = size_zero_node;
7435 t2 = build_zero_cst (ptr_type_node);
7436 t3 = t2;
7437 t4 = t2;
7439 else
7441 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7442 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7443 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7444 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7445 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7448 gimple *g;
7449 bool tagging = false;
7450 /* The maximum number used by any start_ix, without varargs. */
7451 auto_vec<tree, 11> args;
7452 args.quick_push (device);
7453 if (offloaded)
7454 args.quick_push (build_fold_addr_expr (child_fn));
7455 args.quick_push (t1);
7456 args.quick_push (t2);
7457 args.quick_push (t3);
7458 args.quick_push (t4);
7459 switch (start_ix)
7461 case BUILT_IN_GOACC_DATA_START:
7462 case BUILT_IN_GOACC_DECLARE:
7463 case BUILT_IN_GOMP_TARGET_DATA:
7464 break;
7465 case BUILT_IN_GOMP_TARGET:
7466 case BUILT_IN_GOMP_TARGET_UPDATE:
7467 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7468 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7469 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7470 if (c)
7471 depend = OMP_CLAUSE_DECL (c);
7472 else
7473 depend = build_int_cst (ptr_type_node, 0);
7474 args.quick_push (depend);
7475 if (start_ix == BUILT_IN_GOMP_TARGET)
7476 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7477 break;
7478 case BUILT_IN_GOACC_PARALLEL:
7479 oacc_set_fn_attrib (child_fn, clauses, &args);
7480 tagging = true;
7481 /* FALLTHRU */
7482 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7483 case BUILT_IN_GOACC_UPDATE:
7485 tree t_async = NULL_TREE;
7487 /* If present, use the value specified by the respective
7488 clause, making sure that is of the correct type. */
7489 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7490 if (c)
7491 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7492 integer_type_node,
7493 OMP_CLAUSE_ASYNC_EXPR (c));
7494 else if (!tagging)
7495 /* Default values for t_async. */
7496 t_async = fold_convert_loc (gimple_location (entry_stmt),
7497 integer_type_node,
7498 build_int_cst (integer_type_node,
7499 GOMP_ASYNC_SYNC));
7500 if (tagging && t_async)
7502 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7504 if (TREE_CODE (t_async) == INTEGER_CST)
7506 /* See if we can pack the async arg in to the tag's
7507 operand. */
7508 i_async = TREE_INT_CST_LOW (t_async);
7509 if (i_async < GOMP_LAUNCH_OP_MAX)
7510 t_async = NULL_TREE;
7511 else
7512 i_async = GOMP_LAUNCH_OP_MAX;
7514 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7515 i_async));
7517 if (t_async)
7518 args.safe_push (t_async);
7520 /* Save the argument index, and ... */
7521 unsigned t_wait_idx = args.length ();
7522 unsigned num_waits = 0;
7523 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7524 if (!tagging || c)
7525 /* ... push a placeholder. */
7526 args.safe_push (integer_zero_node);
7528 for (; c; c = OMP_CLAUSE_CHAIN (c))
7529 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7531 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7532 integer_type_node,
7533 OMP_CLAUSE_WAIT_EXPR (c)));
7534 num_waits++;
7537 if (!tagging || num_waits)
7539 tree len;
7541 /* Now that we know the number, update the placeholder. */
7542 if (tagging)
7543 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7544 else
7545 len = build_int_cst (integer_type_node, num_waits);
7546 len = fold_convert_loc (gimple_location (entry_stmt),
7547 unsigned_type_node, len);
7548 args[t_wait_idx] = len;
7551 break;
7552 default:
7553 gcc_unreachable ();
7555 if (tagging)
7556 /* Push terminal marker - zero. */
7557 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7559 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7560 gimple_set_location (g, gimple_location (entry_stmt));
7561 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7562 if (!offloaded)
7564 g = gsi_stmt (gsi);
7565 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7566 gsi_remove (&gsi, true);
7568 if (data_region && region->exit)
7570 gsi = gsi_last_bb (region->exit);
7571 g = gsi_stmt (gsi);
7572 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7573 gsi_remove (&gsi, true);
7577 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7578 iteration variable derived from the thread number. INTRA_GROUP means this
7579 is an expansion of a loop iterating over work-items within a separate
7580 iteration over groups. */
7582 static void
7583 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7585 gimple_stmt_iterator gsi;
7586 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7587 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7588 == GF_OMP_FOR_KIND_GRID_LOOP);
7589 size_t collapse = gimple_omp_for_collapse (for_stmt);
7590 struct omp_for_data_loop *loops
7591 = XALLOCAVEC (struct omp_for_data_loop,
7592 gimple_omp_for_collapse (for_stmt));
7593 struct omp_for_data fd;
7595 remove_edge (BRANCH_EDGE (kfor->entry));
7596 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7598 gcc_assert (kfor->cont);
7599 omp_extract_for_data (for_stmt, &fd, loops);
7601 gsi = gsi_start_bb (body_bb);
7603 for (size_t dim = 0; dim < collapse; dim++)
7605 tree type, itype;
7606 itype = type = TREE_TYPE (fd.loops[dim].v);
7607 if (POINTER_TYPE_P (type))
7608 itype = signed_type_for (type);
7610 tree n1 = fd.loops[dim].n1;
7611 tree step = fd.loops[dim].step;
7612 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7613 true, NULL_TREE, true, GSI_SAME_STMT);
7614 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7615 true, NULL_TREE, true, GSI_SAME_STMT);
7616 tree threadid;
7617 if (gimple_omp_for_grid_group_iter (for_stmt))
7619 gcc_checking_assert (!intra_group);
7620 threadid = build_call_expr (builtin_decl_explicit
7621 (BUILT_IN_HSA_WORKGROUPID), 1,
7622 build_int_cstu (unsigned_type_node, dim));
7624 else if (intra_group)
7625 threadid = build_call_expr (builtin_decl_explicit
7626 (BUILT_IN_HSA_WORKITEMID), 1,
7627 build_int_cstu (unsigned_type_node, dim));
7628 else
7629 threadid = build_call_expr (builtin_decl_explicit
7630 (BUILT_IN_HSA_WORKITEMABSID), 1,
7631 build_int_cstu (unsigned_type_node, dim));
7632 threadid = fold_convert (itype, threadid);
7633 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7634 true, GSI_SAME_STMT);
7636 tree startvar = fd.loops[dim].v;
7637 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7638 if (POINTER_TYPE_P (type))
7639 t = fold_build_pointer_plus (n1, t);
7640 else
7641 t = fold_build2 (PLUS_EXPR, type, t, n1);
7642 t = fold_convert (type, t);
7643 t = force_gimple_operand_gsi (&gsi, t,
7644 DECL_P (startvar)
7645 && TREE_ADDRESSABLE (startvar),
7646 NULL_TREE, true, GSI_SAME_STMT);
7647 gassign *assign_stmt = gimple_build_assign (startvar, t);
7648 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7650 /* Remove the omp for statement. */
7651 gsi = gsi_last_bb (kfor->entry);
7652 gsi_remove (&gsi, true);
7654 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7655 gsi = gsi_last_bb (kfor->cont);
7656 gcc_assert (!gsi_end_p (gsi)
7657 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7658 gsi_remove (&gsi, true);
7660 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7661 gsi = gsi_last_bb (kfor->exit);
7662 gcc_assert (!gsi_end_p (gsi)
7663 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7664 if (intra_group)
7665 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7666 gsi_remove (&gsi, true);
7668 /* Fixup the much simpler CFG. */
7669 remove_edge (find_edge (kfor->cont, body_bb));
7671 if (kfor->cont != body_bb)
7672 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7673 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7676 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7677 argument_decls. */
7679 struct grid_arg_decl_map
7681 tree old_arg;
7682 tree new_arg;
7685 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7686 pertaining to kernel function. */
7688 static tree
7689 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7691 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7692 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7693 tree t = *tp;
7695 if (t == adm->old_arg)
7696 *tp = adm->new_arg;
7697 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7698 return NULL_TREE;
7701 /* If TARGET region contains a kernel body for loop, remove its region from the
7702 TARGET and expand it in HSA gridified kernel fashion. */
7704 static void
7705 grid_expand_target_grid_body (struct omp_region *target)
7707 if (!hsa_gen_requested_p ())
7708 return;
7710 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7711 struct omp_region **pp;
7713 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7714 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7715 break;
7717 struct omp_region *gpukernel = *pp;
7719 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7720 if (!gpukernel)
7722 /* HSA cannot handle OACC stuff. */
7723 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7724 return;
7725 gcc_checking_assert (orig_child_fndecl);
7726 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7727 OMP_CLAUSE__GRIDDIM_));
7728 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7730 hsa_register_kernel (n);
7731 return;
7734 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7735 OMP_CLAUSE__GRIDDIM_));
7736 tree inside_block
7737 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7738 *pp = gpukernel->next;
7739 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7740 if ((*pp)->type == GIMPLE_OMP_FOR)
7741 break;
7743 struct omp_region *kfor = *pp;
7744 gcc_assert (kfor);
7745 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7746 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7747 *pp = kfor->next;
7748 if (kfor->inner)
7750 if (gimple_omp_for_grid_group_iter (for_stmt))
7752 struct omp_region **next_pp;
7753 for (pp = &kfor->inner; *pp; pp = next_pp)
7755 next_pp = &(*pp)->next;
7756 if ((*pp)->type != GIMPLE_OMP_FOR)
7757 continue;
7758 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7759 gcc_assert (gimple_omp_for_kind (inner)
7760 == GF_OMP_FOR_KIND_GRID_LOOP);
7761 grid_expand_omp_for_loop (*pp, true);
7762 *pp = (*pp)->next;
7763 next_pp = pp;
7766 expand_omp (kfor->inner);
7768 if (gpukernel->inner)
7769 expand_omp (gpukernel->inner);
7771 tree kern_fndecl = copy_node (orig_child_fndecl);
7772 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7773 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7774 tree tgtblock = gimple_block (tgt_stmt);
7775 tree fniniblock = make_node (BLOCK);
7776 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7777 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7778 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7779 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7780 DECL_INITIAL (kern_fndecl) = fniniblock;
7781 push_struct_function (kern_fndecl);
7782 cfun->function_end_locus = gimple_location (tgt_stmt);
7783 init_tree_ssa (cfun);
7784 pop_cfun ();
7786 /* Make sure to generate early debug for the function before
7787 outlining anything. */
7788 if (! gimple_in_ssa_p (cfun))
7789 (*debug_hooks->early_global_decl) (cfun->decl);
7791 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7792 gcc_assert (!DECL_CHAIN (old_parm_decl));
7793 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7794 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7795 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7796 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7797 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7798 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7799 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7800 kern_cfun->curr_properties = cfun->curr_properties;
7802 grid_expand_omp_for_loop (kfor, false);
7804 /* Remove the omp for statement. */
7805 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7806 gsi_remove (&gsi, true);
7807 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7808 return. */
7809 gsi = gsi_last_bb (gpukernel->exit);
7810 gcc_assert (!gsi_end_p (gsi)
7811 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7812 gimple *ret_stmt = gimple_build_return (NULL);
7813 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7814 gsi_remove (&gsi, true);
7816 /* Statements in the first BB in the target construct have been produced by
7817 target lowering and must be copied inside the GPUKERNEL, with the two
7818 exceptions of the first OMP statement and the OMP_DATA assignment
7819 statement. */
7820 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7821 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7822 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7823 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7824 !gsi_end_p (tsi); gsi_next (&tsi))
7826 gimple *stmt = gsi_stmt (tsi);
7827 if (is_gimple_omp (stmt))
7828 break;
7829 if (sender
7830 && is_gimple_assign (stmt)
7831 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7832 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7833 continue;
7834 gimple *copy = gimple_copy (stmt);
7835 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7836 gimple_set_block (copy, fniniblock);
7839 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7840 gpukernel->exit, inside_block);
7842 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7843 kcn->mark_force_output ();
7844 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7846 hsa_register_kernel (kcn, orig_child);
7848 cgraph_node::add_new_function (kern_fndecl, true);
7849 push_cfun (kern_cfun);
7850 cgraph_edge::rebuild_edges ();
7852 /* Re-map any mention of the PARM_DECL of the original function to the
7853 PARM_DECL of the new one.
7855 TODO: It would be great if lowering produced references into the GPU
7856 kernel decl straight away and we did not have to do this. */
7857 struct grid_arg_decl_map adm;
7858 adm.old_arg = old_parm_decl;
7859 adm.new_arg = new_parm_decl;
7860 basic_block bb;
7861 FOR_EACH_BB_FN (bb, kern_cfun)
7863 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7865 gimple *stmt = gsi_stmt (gsi);
7866 struct walk_stmt_info wi;
7867 memset (&wi, 0, sizeof (wi));
7868 wi.info = &adm;
7869 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7872 pop_cfun ();
7874 return;
7877 /* Expand the parallel region tree rooted at REGION. Expansion
7878 proceeds in depth-first order. Innermost regions are expanded
7879 first. This way, parallel regions that require a new function to
7880 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7881 internal dependencies in their body. */
7883 static void
7884 expand_omp (struct omp_region *region)
7886 omp_any_child_fn_dumped = false;
7887 while (region)
7889 location_t saved_location;
7890 gimple *inner_stmt = NULL;
7892 /* First, determine whether this is a combined parallel+workshare
7893 region. */
7894 if (region->type == GIMPLE_OMP_PARALLEL)
7895 determine_parallel_type (region);
7896 else if (region->type == GIMPLE_OMP_TARGET)
7897 grid_expand_target_grid_body (region);
7899 if (region->type == GIMPLE_OMP_FOR
7900 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7901 inner_stmt = last_stmt (region->inner->entry);
7903 if (region->inner)
7904 expand_omp (region->inner);
7906 saved_location = input_location;
7907 if (gimple_has_location (last_stmt (region->entry)))
7908 input_location = gimple_location (last_stmt (region->entry));
7910 switch (region->type)
7912 case GIMPLE_OMP_PARALLEL:
7913 case GIMPLE_OMP_TASK:
7914 expand_omp_taskreg (region);
7915 break;
7917 case GIMPLE_OMP_FOR:
7918 expand_omp_for (region, inner_stmt);
7919 break;
7921 case GIMPLE_OMP_SECTIONS:
7922 expand_omp_sections (region);
7923 break;
7925 case GIMPLE_OMP_SECTION:
7926 /* Individual omp sections are handled together with their
7927 parent GIMPLE_OMP_SECTIONS region. */
7928 break;
7930 case GIMPLE_OMP_SINGLE:
7931 expand_omp_single (region);
7932 break;
7934 case GIMPLE_OMP_ORDERED:
7936 gomp_ordered *ord_stmt
7937 = as_a <gomp_ordered *> (last_stmt (region->entry));
7938 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7939 OMP_CLAUSE_DEPEND))
7941 /* We'll expand these when expanding corresponding
7942 worksharing region with ordered(n) clause. */
7943 gcc_assert (region->outer
7944 && region->outer->type == GIMPLE_OMP_FOR);
7945 region->ord_stmt = ord_stmt;
7946 break;
7949 /* FALLTHRU */
7950 case GIMPLE_OMP_MASTER:
7951 case GIMPLE_OMP_TASKGROUP:
7952 case GIMPLE_OMP_CRITICAL:
7953 case GIMPLE_OMP_TEAMS:
7954 expand_omp_synch (region);
7955 break;
7957 case GIMPLE_OMP_ATOMIC_LOAD:
7958 expand_omp_atomic (region);
7959 break;
7961 case GIMPLE_OMP_TARGET:
7962 expand_omp_target (region);
7963 break;
7965 default:
7966 gcc_unreachable ();
7969 input_location = saved_location;
7970 region = region->next;
7972 if (omp_any_child_fn_dumped)
7974 if (dump_file)
7975 dump_function_header (dump_file, current_function_decl, dump_flags);
7976 omp_any_child_fn_dumped = false;
7980 /* Helper for build_omp_regions. Scan the dominator tree starting at
7981 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7982 true, the function ends once a single tree is built (otherwise, whole
7983 forest of OMP constructs may be built). */
7985 static void
7986 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7987 bool single_tree)
7989 gimple_stmt_iterator gsi;
7990 gimple *stmt;
7991 basic_block son;
7993 gsi = gsi_last_bb (bb);
7994 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7996 struct omp_region *region;
7997 enum gimple_code code;
7999 stmt = gsi_stmt (gsi);
8000 code = gimple_code (stmt);
8001 if (code == GIMPLE_OMP_RETURN)
8003 /* STMT is the return point out of region PARENT. Mark it
8004 as the exit point and make PARENT the immediately
8005 enclosing region. */
8006 gcc_assert (parent);
8007 region = parent;
8008 region->exit = bb;
8009 parent = parent->outer;
8011 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8013 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8014 GIMPLE_OMP_RETURN, but matches with
8015 GIMPLE_OMP_ATOMIC_LOAD. */
8016 gcc_assert (parent);
8017 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8018 region = parent;
8019 region->exit = bb;
8020 parent = parent->outer;
8022 else if (code == GIMPLE_OMP_CONTINUE)
8024 gcc_assert (parent);
8025 parent->cont = bb;
8027 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8029 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8030 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8032 else
8034 region = new_omp_region (bb, code, parent);
8035 /* Otherwise... */
8036 if (code == GIMPLE_OMP_TARGET)
8038 switch (gimple_omp_target_kind (stmt))
8040 case GF_OMP_TARGET_KIND_REGION:
8041 case GF_OMP_TARGET_KIND_DATA:
8042 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8043 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8044 case GF_OMP_TARGET_KIND_OACC_DATA:
8045 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8046 break;
8047 case GF_OMP_TARGET_KIND_UPDATE:
8048 case GF_OMP_TARGET_KIND_ENTER_DATA:
8049 case GF_OMP_TARGET_KIND_EXIT_DATA:
8050 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8051 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8052 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8053 /* ..., other than for those stand-alone directives... */
8054 region = NULL;
8055 break;
8056 default:
8057 gcc_unreachable ();
8060 else if (code == GIMPLE_OMP_ORDERED
8061 && omp_find_clause (gimple_omp_ordered_clauses
8062 (as_a <gomp_ordered *> (stmt)),
8063 OMP_CLAUSE_DEPEND))
8064 /* #pragma omp ordered depend is also just a stand-alone
8065 directive. */
8066 region = NULL;
8067 /* ..., this directive becomes the parent for a new region. */
8068 if (region)
8069 parent = region;
8073 if (single_tree && !parent)
8074 return;
8076 for (son = first_dom_son (CDI_DOMINATORS, bb);
8077 son;
8078 son = next_dom_son (CDI_DOMINATORS, son))
8079 build_omp_regions_1 (son, parent, single_tree);
8082 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8083 root_omp_region. */
8085 static void
8086 build_omp_regions_root (basic_block root)
8088 gcc_assert (root_omp_region == NULL);
8089 build_omp_regions_1 (root, NULL, true);
8090 gcc_assert (root_omp_region != NULL);
8093 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8095 void
8096 omp_expand_local (basic_block head)
8098 build_omp_regions_root (head);
8099 if (dump_file && (dump_flags & TDF_DETAILS))
8101 fprintf (dump_file, "\nOMP region tree\n\n");
8102 dump_omp_region (dump_file, root_omp_region, 0);
8103 fprintf (dump_file, "\n");
8106 remove_exit_barriers (root_omp_region);
8107 expand_omp (root_omp_region);
8109 omp_free_regions ();
8112 /* Scan the CFG and build a tree of OMP regions. Return the root of
8113 the OMP region tree. */
8115 static void
8116 build_omp_regions (void)
8118 gcc_assert (root_omp_region == NULL);
8119 calculate_dominance_info (CDI_DOMINATORS);
8120 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8123 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8125 static unsigned int
8126 execute_expand_omp (void)
8128 build_omp_regions ();
8130 if (!root_omp_region)
8131 return 0;
8133 if (dump_file)
8135 fprintf (dump_file, "\nOMP region tree\n\n");
8136 dump_omp_region (dump_file, root_omp_region, 0);
8137 fprintf (dump_file, "\n");
8140 remove_exit_barriers (root_omp_region);
8142 expand_omp (root_omp_region);
8144 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8145 verify_loop_structure ();
8146 cleanup_tree_cfg ();
8148 omp_free_regions ();
8150 return 0;
8153 /* OMP expansion -- the default pass, run before creation of SSA form. */
8155 namespace {
8157 const pass_data pass_data_expand_omp =
8159 GIMPLE_PASS, /* type */
8160 "ompexp", /* name */
8161 OPTGROUP_OMP, /* optinfo_flags */
8162 TV_NONE, /* tv_id */
8163 PROP_gimple_any, /* properties_required */
8164 PROP_gimple_eomp, /* properties_provided */
8165 0, /* properties_destroyed */
8166 0, /* todo_flags_start */
8167 0, /* todo_flags_finish */
8170 class pass_expand_omp : public gimple_opt_pass
8172 public:
8173 pass_expand_omp (gcc::context *ctxt)
8174 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8177 /* opt_pass methods: */
8178 virtual unsigned int execute (function *)
8180 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8181 || flag_openmp_simd != 0)
8182 && !seen_error ());
8184 /* This pass always runs, to provide PROP_gimple_eomp.
8185 But often, there is nothing to do. */
8186 if (!gate)
8187 return 0;
8189 return execute_expand_omp ();
8192 }; // class pass_expand_omp
8194 } // anon namespace
8196 gimple_opt_pass *
8197 make_pass_expand_omp (gcc::context *ctxt)
8199 return new pass_expand_omp (ctxt);
8202 namespace {
8204 const pass_data pass_data_expand_omp_ssa =
8206 GIMPLE_PASS, /* type */
8207 "ompexpssa", /* name */
8208 OPTGROUP_OMP, /* optinfo_flags */
8209 TV_NONE, /* tv_id */
8210 PROP_cfg | PROP_ssa, /* properties_required */
8211 PROP_gimple_eomp, /* properties_provided */
8212 0, /* properties_destroyed */
8213 0, /* todo_flags_start */
8214 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8217 class pass_expand_omp_ssa : public gimple_opt_pass
8219 public:
8220 pass_expand_omp_ssa (gcc::context *ctxt)
8221 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8224 /* opt_pass methods: */
8225 virtual bool gate (function *fun)
8227 return !(fun->curr_properties & PROP_gimple_eomp);
8229 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8230 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8232 }; // class pass_expand_omp_ssa
8234 } // anon namespace
8236 gimple_opt_pass *
8237 make_pass_expand_omp_ssa (gcc::context *ctxt)
8239 return new pass_expand_omp_ssa (ctxt);
8242 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8243 GIMPLE_* codes. */
8245 bool
8246 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8247 int *region_idx)
8249 gimple *last = last_stmt (bb);
8250 enum gimple_code code = gimple_code (last);
8251 struct omp_region *cur_region = *region;
8252 bool fallthru = false;
8254 switch (code)
8256 case GIMPLE_OMP_PARALLEL:
8257 case GIMPLE_OMP_TASK:
8258 case GIMPLE_OMP_FOR:
8259 case GIMPLE_OMP_SINGLE:
8260 case GIMPLE_OMP_TEAMS:
8261 case GIMPLE_OMP_MASTER:
8262 case GIMPLE_OMP_TASKGROUP:
8263 case GIMPLE_OMP_CRITICAL:
8264 case GIMPLE_OMP_SECTION:
8265 case GIMPLE_OMP_GRID_BODY:
8266 cur_region = new_omp_region (bb, code, cur_region);
8267 fallthru = true;
8268 break;
8270 case GIMPLE_OMP_ORDERED:
8271 cur_region = new_omp_region (bb, code, cur_region);
8272 fallthru = true;
8273 if (omp_find_clause (gimple_omp_ordered_clauses
8274 (as_a <gomp_ordered *> (last)),
8275 OMP_CLAUSE_DEPEND))
8276 cur_region = cur_region->outer;
8277 break;
8279 case GIMPLE_OMP_TARGET:
8280 cur_region = new_omp_region (bb, code, cur_region);
8281 fallthru = true;
8282 switch (gimple_omp_target_kind (last))
8284 case GF_OMP_TARGET_KIND_REGION:
8285 case GF_OMP_TARGET_KIND_DATA:
8286 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8287 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8288 case GF_OMP_TARGET_KIND_OACC_DATA:
8289 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8290 break;
8291 case GF_OMP_TARGET_KIND_UPDATE:
8292 case GF_OMP_TARGET_KIND_ENTER_DATA:
8293 case GF_OMP_TARGET_KIND_EXIT_DATA:
8294 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8295 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8296 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8297 cur_region = cur_region->outer;
8298 break;
8299 default:
8300 gcc_unreachable ();
8302 break;
8304 case GIMPLE_OMP_SECTIONS:
8305 cur_region = new_omp_region (bb, code, cur_region);
8306 fallthru = true;
8307 break;
8309 case GIMPLE_OMP_SECTIONS_SWITCH:
8310 fallthru = false;
8311 break;
8313 case GIMPLE_OMP_ATOMIC_LOAD:
8314 case GIMPLE_OMP_ATOMIC_STORE:
8315 fallthru = true;
8316 break;
8318 case GIMPLE_OMP_RETURN:
8319 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8320 somewhere other than the next block. This will be
8321 created later. */
8322 cur_region->exit = bb;
8323 if (cur_region->type == GIMPLE_OMP_TASK)
8324 /* Add an edge corresponding to not scheduling the task
8325 immediately. */
8326 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8327 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8328 cur_region = cur_region->outer;
8329 break;
8331 case GIMPLE_OMP_CONTINUE:
8332 cur_region->cont = bb;
8333 switch (cur_region->type)
8335 case GIMPLE_OMP_FOR:
8336 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8337 succs edges as abnormal to prevent splitting
8338 them. */
8339 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8340 /* Make the loopback edge. */
8341 make_edge (bb, single_succ (cur_region->entry),
8342 EDGE_ABNORMAL);
8344 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8345 corresponds to the case that the body of the loop
8346 is not executed at all. */
8347 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8348 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8349 fallthru = false;
8350 break;
8352 case GIMPLE_OMP_SECTIONS:
8353 /* Wire up the edges into and out of the nested sections. */
8355 basic_block switch_bb = single_succ (cur_region->entry);
8357 struct omp_region *i;
8358 for (i = cur_region->inner; i ; i = i->next)
8360 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8361 make_edge (switch_bb, i->entry, 0);
8362 make_edge (i->exit, bb, EDGE_FALLTHRU);
8365 /* Make the loopback edge to the block with
8366 GIMPLE_OMP_SECTIONS_SWITCH. */
8367 make_edge (bb, switch_bb, 0);
8369 /* Make the edge from the switch to exit. */
8370 make_edge (switch_bb, bb->next_bb, 0);
8371 fallthru = false;
8373 break;
8375 case GIMPLE_OMP_TASK:
8376 fallthru = true;
8377 break;
8379 default:
8380 gcc_unreachable ();
8382 break;
8384 default:
8385 gcc_unreachable ();
8388 if (*region != cur_region)
8390 *region = cur_region;
8391 if (cur_region)
8392 *region_idx = cur_region->entry->index;
8393 else
8394 *region_idx = 0;
8397 return fallthru;
8400 #include "gt-omp-expand.h"