[gcc/testsuite]
[official-gcc.git] / gcc / omp-expand.c
blob4bdcf19ef64cd08b0cd492b00c111cf2705b0555
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "debug.h"
61 #include "stringpool.h"
62 #include "attribs.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
106 a depend clause. */
107 gomp_ordered *ord_stmt;
110 static struct omp_region *root_omp_region;
111 static bool omp_any_child_fn_dumped;
113 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
114 bool = false);
115 static gphi *find_phi_with_arg_on_edge (tree, edge);
116 static void expand_omp (struct omp_region *region);
118 /* Return true if REGION is a combined parallel+workshare region. */
120 static inline bool
121 is_combined_parallel (struct omp_region *region)
123 return region->is_combined_parallel;
126 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
127 is the immediate dominator of PAR_ENTRY_BB, return true if there
128 are no data dependencies that would prevent expanding the parallel
129 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131 When expanding a combined parallel+workshare region, the call to
132 the child function may need additional arguments in the case of
133 GIMPLE_OMP_FOR regions. In some cases, these arguments are
134 computed out of variables passed in from the parent to the child
135 via 'struct .omp_data_s'. For instance:
137 #pragma omp parallel for schedule (guided, i * 4)
138 for (j ...)
140 Is lowered into:
142 # BLOCK 2 (PAR_ENTRY_BB)
143 .omp_data_o.i = i;
144 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146 # BLOCK 3 (WS_ENTRY_BB)
147 .omp_data_i = &.omp_data_o;
148 D.1667 = .omp_data_i->i;
149 D.1598 = D.1667 * 4;
150 #pragma omp for schedule (guided, D.1598)
152 When we outline the parallel region, the call to the child function
153 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
154 that value is computed *after* the call site. So, in principle we
155 cannot do the transformation.
157 To see whether the code in WS_ENTRY_BB blocks the combined
158 parallel+workshare call, we collect all the variables used in the
159 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
160 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 call.
163 FIXME. If we had the SSA form built at this point, we could merely
164 hoist the code in block 3 into block 2 and be done with it. But at
165 this point we don't have dataflow information and though we could
166 hack something up here, it is really not worth the aggravation. */
168 static bool
169 workshare_safe_to_combine_p (basic_block ws_entry_bb)
171 struct omp_for_data fd;
172 gimple *ws_stmt = last_stmt (ws_entry_bb);
174 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
175 return true;
177 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
197 return true;
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
206 if (!simd_schedule)
207 return chunk_size;
209 int vf = omp_max_vf ();
210 if (vf == 1)
211 return chunk_size;
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
233 struct omp_for_data fd;
234 tree n1, n2;
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
240 if (gimple_omp_for_combined_into_p (for_stmt))
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
264 if (fd.chunk_size)
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
271 return ws_args;
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
285 gcc_unreachable ();
288 /* Discover whether REGION is a combined parallel+workshare region. */
290 static void
291 determine_parallel_type (struct omp_region *region)
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
314 if (single_succ (par_entry_bb) == ws_entry_bb
315 && single_succ (ws_exit_bb) == par_exit_bb
316 && workshare_safe_to_combine_p (ws_entry_bb)
317 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
318 || (last_and_only_stmt (ws_entry_bb)
319 && last_and_only_stmt (par_exit_bb))))
321 gimple *par_stmt = last_stmt (par_entry_bb);
322 gimple *ws_stmt = last_stmt (ws_entry_bb);
324 if (region->inner->type == GIMPLE_OMP_FOR)
326 /* If this is a combined parallel loop, we need to determine
327 whether or not to use the combined library calls. There
328 are two cases where we do not apply the transformation:
329 static loops and any kind of ordered loop. In the first
330 case, we already open code the loop so there is no need
331 to do anything else. In the latter case, the combined
332 parallel loop call would still need extra synchronization
333 to implement ordered semantics, so there would not be any
334 gain in using the combined call. */
335 tree clauses = gimple_omp_for_clauses (ws_stmt);
336 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
337 if (c == NULL
338 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
339 == OMP_CLAUSE_SCHEDULE_STATIC)
340 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
342 region->is_combined_parallel = false;
343 region->inner->is_combined_parallel = false;
344 return;
348 region->is_combined_parallel = true;
349 region->inner->is_combined_parallel = true;
350 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
354 /* Debugging dumps for parallel regions. */
355 void dump_omp_region (FILE *, struct omp_region *, int);
356 void debug_omp_region (struct omp_region *);
357 void debug_all_omp_regions (void);
359 /* Dump the parallel region tree rooted at REGION. */
361 void
362 dump_omp_region (FILE *file, struct omp_region *region, int indent)
364 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
365 gimple_code_name[region->type]);
367 if (region->inner)
368 dump_omp_region (file, region->inner, indent + 4);
370 if (region->cont)
372 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
373 region->cont->index);
376 if (region->exit)
377 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
378 region->exit->index);
379 else
380 fprintf (file, "%*s[no exit marker]\n", indent, "");
382 if (region->next)
383 dump_omp_region (file, region->next, indent);
386 DEBUG_FUNCTION void
387 debug_omp_region (struct omp_region *region)
389 dump_omp_region (stderr, region, 0);
392 DEBUG_FUNCTION void
393 debug_all_omp_regions (void)
395 dump_omp_region (stderr, root_omp_region, 0);
398 /* Create a new parallel region starting at STMT inside region PARENT. */
400 static struct omp_region *
401 new_omp_region (basic_block bb, enum gimple_code type,
402 struct omp_region *parent)
404 struct omp_region *region = XCNEW (struct omp_region);
406 region->outer = parent;
407 region->entry = bb;
408 region->type = type;
410 if (parent)
412 /* This is a nested region. Add it to the list of inner
413 regions in PARENT. */
414 region->next = parent->inner;
415 parent->inner = region;
417 else
419 /* This is a toplevel region. Add it to the list of toplevel
420 regions in ROOT_OMP_REGION. */
421 region->next = root_omp_region;
422 root_omp_region = region;
425 return region;
428 /* Release the memory associated with the region tree rooted at REGION. */
430 static void
431 free_omp_region_1 (struct omp_region *region)
433 struct omp_region *i, *n;
435 for (i = region->inner; i ; i = n)
437 n = i->next;
438 free_omp_region_1 (i);
441 free (region);
444 /* Release the memory for the entire omp region tree. */
446 void
447 omp_free_regions (void)
449 struct omp_region *r, *n;
450 for (r = root_omp_region; r ; r = n)
452 n = r->next;
453 free_omp_region_1 (r);
455 root_omp_region = NULL;
458 /* A convenience function to build an empty GIMPLE_COND with just the
459 condition. */
461 static gcond *
462 gimple_build_cond_empty (tree cond)
464 enum tree_code pred_code;
465 tree lhs, rhs;
467 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
468 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
471 /* Return true if a parallel REGION is within a declare target function or
472 within a target region and is not a part of a gridified target. */
474 static bool
475 parallel_needs_hsa_kernel_p (struct omp_region *region)
477 bool indirect = false;
478 for (region = region->outer; region; region = region->outer)
480 if (region->type == GIMPLE_OMP_PARALLEL)
481 indirect = true;
482 else if (region->type == GIMPLE_OMP_TARGET)
484 gomp_target *tgt_stmt
485 = as_a <gomp_target *> (last_stmt (region->entry));
487 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
488 OMP_CLAUSE__GRIDDIM_))
489 return indirect;
490 else
491 return true;
495 if (lookup_attribute ("omp declare target",
496 DECL_ATTRIBUTES (current_function_decl)))
497 return true;
499 return false;
502 /* Build the function calls to GOMP_parallel_start etc to actually
503 generate the parallel operation. REGION is the parallel region
504 being expanded. BB is the block where to insert the code. WS_ARGS
505 will be set if this is a call to a combined parallel+workshare
506 construct, it contains the list of additional arguments needed by
507 the workshare construct. */
509 static void
510 expand_parallel_call (struct omp_region *region, basic_block bb,
511 gomp_parallel *entry_stmt,
512 vec<tree, va_gc> *ws_args)
514 tree t, t1, t2, val, cond, c, clauses, flags;
515 gimple_stmt_iterator gsi;
516 gimple *stmt;
517 enum built_in_function start_ix;
518 int start_ix2;
519 location_t clause_loc;
520 vec<tree, va_gc> *args;
522 clauses = gimple_omp_parallel_clauses (entry_stmt);
524 /* Determine what flavor of GOMP_parallel we will be
525 emitting. */
526 start_ix = BUILT_IN_GOMP_PARALLEL;
527 if (is_combined_parallel (region))
529 switch (region->inner->type)
531 case GIMPLE_OMP_FOR:
532 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
533 switch (region->inner->sched_kind)
535 case OMP_CLAUSE_SCHEDULE_RUNTIME:
536 start_ix2 = 3;
537 break;
538 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
539 case OMP_CLAUSE_SCHEDULE_GUIDED:
540 if (region->inner->sched_modifiers
541 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
543 start_ix2 = 3 + region->inner->sched_kind;
544 break;
546 /* FALLTHRU */
547 default:
548 start_ix2 = region->inner->sched_kind;
549 break;
551 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
552 start_ix = (enum built_in_function) start_ix2;
553 break;
554 case GIMPLE_OMP_SECTIONS:
555 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
556 break;
557 default:
558 gcc_unreachable ();
562 /* By default, the value of NUM_THREADS is zero (selected at run time)
563 and there is no conditional. */
564 cond = NULL_TREE;
565 val = build_int_cst (unsigned_type_node, 0);
566 flags = build_int_cst (unsigned_type_node, 0);
568 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
569 if (c)
570 cond = OMP_CLAUSE_IF_EXPR (c);
572 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
573 if (c)
575 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
576 clause_loc = OMP_CLAUSE_LOCATION (c);
578 else
579 clause_loc = gimple_location (entry_stmt);
581 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
582 if (c)
583 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
585 /* Ensure 'val' is of the correct type. */
586 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
588 /* If we found the clause 'if (cond)', build either
589 (cond != 0) or (cond ? val : 1u). */
590 if (cond)
592 cond = gimple_boolify (cond);
594 if (integer_zerop (val))
595 val = fold_build2_loc (clause_loc,
596 EQ_EXPR, unsigned_type_node, cond,
597 build_int_cst (TREE_TYPE (cond), 0));
598 else
600 basic_block cond_bb, then_bb, else_bb;
601 edge e, e_then, e_else;
602 tree tmp_then, tmp_else, tmp_join, tmp_var;
604 tmp_var = create_tmp_var (TREE_TYPE (val));
605 if (gimple_in_ssa_p (cfun))
607 tmp_then = make_ssa_name (tmp_var);
608 tmp_else = make_ssa_name (tmp_var);
609 tmp_join = make_ssa_name (tmp_var);
611 else
613 tmp_then = tmp_var;
614 tmp_else = tmp_var;
615 tmp_join = tmp_var;
618 e = split_block_after_labels (bb);
619 cond_bb = e->src;
620 bb = e->dest;
621 remove_edge (e);
623 then_bb = create_empty_bb (cond_bb);
624 else_bb = create_empty_bb (then_bb);
625 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
626 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
628 stmt = gimple_build_cond_empty (cond);
629 gsi = gsi_start_bb (cond_bb);
630 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
632 gsi = gsi_start_bb (then_bb);
633 expand_omp_build_assign (&gsi, tmp_then, val, true);
635 gsi = gsi_start_bb (else_bb);
636 expand_omp_build_assign (&gsi, tmp_else,
637 build_int_cst (unsigned_type_node, 1),
638 true);
640 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
641 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
642 add_bb_to_loop (then_bb, cond_bb->loop_father);
643 add_bb_to_loop (else_bb, cond_bb->loop_father);
644 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
645 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
647 if (gimple_in_ssa_p (cfun))
649 gphi *phi = create_phi_node (tmp_join, bb);
650 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
651 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
654 val = tmp_join;
657 gsi = gsi_start_bb (bb);
658 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
659 false, GSI_CONTINUE_LINKING);
662 gsi = gsi_last_bb (bb);
663 t = gimple_omp_parallel_data_arg (entry_stmt);
664 if (t == NULL)
665 t1 = null_pointer_node;
666 else
667 t1 = build_fold_addr_expr (t);
668 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
669 t2 = build_fold_addr_expr (child_fndecl);
671 vec_alloc (args, 4 + vec_safe_length (ws_args));
672 args->quick_push (t2);
673 args->quick_push (t1);
674 args->quick_push (val);
675 if (ws_args)
676 args->splice (*ws_args);
677 args->quick_push (flags);
679 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
680 builtin_decl_explicit (start_ix), args);
682 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
683 false, GSI_CONTINUE_LINKING);
685 if (hsa_gen_requested_p ()
686 && parallel_needs_hsa_kernel_p (region))
688 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
689 hsa_register_kernel (child_cnode);
693 /* Insert a function call whose name is FUNC_NAME with the information from
694 ENTRY_STMT into the basic_block BB. */
696 static void
697 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
698 vec <tree, va_gc> *ws_args)
700 tree t, t1, t2;
701 gimple_stmt_iterator gsi;
702 vec <tree, va_gc> *args;
704 gcc_assert (vec_safe_length (ws_args) == 2);
705 tree func_name = (*ws_args)[0];
706 tree grain = (*ws_args)[1];
708 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
709 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
710 gcc_assert (count != NULL_TREE);
711 count = OMP_CLAUSE_OPERAND (count, 0);
713 gsi = gsi_last_bb (bb);
714 t = gimple_omp_parallel_data_arg (entry_stmt);
715 if (t == NULL)
716 t1 = null_pointer_node;
717 else
718 t1 = build_fold_addr_expr (t);
719 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
721 vec_alloc (args, 4);
722 args->quick_push (t2);
723 args->quick_push (t1);
724 args->quick_push (count);
725 args->quick_push (grain);
726 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
728 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
729 GSI_CONTINUE_LINKING);
732 /* Build the function call to GOMP_task to actually
733 generate the task operation. BB is the block where to insert the code. */
735 static void
736 expand_task_call (struct omp_region *region, basic_block bb,
737 gomp_task *entry_stmt)
739 tree t1, t2, t3;
740 gimple_stmt_iterator gsi;
741 location_t loc = gimple_location (entry_stmt);
743 tree clauses = gimple_omp_task_clauses (entry_stmt);
745 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
746 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
747 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
748 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
749 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
750 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
752 unsigned int iflags
753 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
754 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
755 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
757 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
758 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
759 tree num_tasks = NULL_TREE;
760 bool ull = false;
761 if (taskloop_p)
763 gimple *g = last_stmt (region->outer->entry);
764 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
765 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
766 struct omp_for_data fd;
767 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
768 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
769 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
770 OMP_CLAUSE__LOOPTEMP_);
771 startvar = OMP_CLAUSE_DECL (startvar);
772 endvar = OMP_CLAUSE_DECL (endvar);
773 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
774 if (fd.loop.cond_code == LT_EXPR)
775 iflags |= GOMP_TASK_FLAG_UP;
776 tree tclauses = gimple_omp_for_clauses (g);
777 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
778 if (num_tasks)
779 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
780 else
782 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
783 if (num_tasks)
785 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
786 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
788 else
789 num_tasks = integer_zero_node;
791 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
792 if (ifc == NULL_TREE)
793 iflags |= GOMP_TASK_FLAG_IF;
794 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
795 iflags |= GOMP_TASK_FLAG_NOGROUP;
796 ull = fd.iter_type == long_long_unsigned_type_node;
798 else if (priority)
799 iflags |= GOMP_TASK_FLAG_PRIORITY;
801 tree flags = build_int_cst (unsigned_type_node, iflags);
803 tree cond = boolean_true_node;
804 if (ifc)
806 if (taskloop_p)
808 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
809 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
810 build_int_cst (unsigned_type_node,
811 GOMP_TASK_FLAG_IF),
812 build_int_cst (unsigned_type_node, 0));
813 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
814 flags, t);
816 else
817 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
820 if (finalc)
822 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
823 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
824 build_int_cst (unsigned_type_node,
825 GOMP_TASK_FLAG_FINAL),
826 build_int_cst (unsigned_type_node, 0));
827 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
829 if (depend)
830 depend = OMP_CLAUSE_DECL (depend);
831 else
832 depend = build_int_cst (ptr_type_node, 0);
833 if (priority)
834 priority = fold_convert (integer_type_node,
835 OMP_CLAUSE_PRIORITY_EXPR (priority));
836 else
837 priority = integer_zero_node;
839 gsi = gsi_last_bb (bb);
840 tree t = gimple_omp_task_data_arg (entry_stmt);
841 if (t == NULL)
842 t2 = null_pointer_node;
843 else
844 t2 = build_fold_addr_expr_loc (loc, t);
845 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
846 t = gimple_omp_task_copy_fn (entry_stmt);
847 if (t == NULL)
848 t3 = null_pointer_node;
849 else
850 t3 = build_fold_addr_expr_loc (loc, t);
852 if (taskloop_p)
853 t = build_call_expr (ull
854 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
855 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
856 11, t1, t2, t3,
857 gimple_omp_task_arg_size (entry_stmt),
858 gimple_omp_task_arg_align (entry_stmt), flags,
859 num_tasks, priority, startvar, endvar, step);
860 else
861 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
862 9, t1, t2, t3,
863 gimple_omp_task_arg_size (entry_stmt),
864 gimple_omp_task_arg_align (entry_stmt), cond, flags,
865 depend, priority);
867 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
868 false, GSI_CONTINUE_LINKING);
871 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
873 static tree
874 vec2chain (vec<tree, va_gc> *v)
876 tree chain = NULL_TREE, t;
877 unsigned ix;
879 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
881 DECL_CHAIN (t) = chain;
882 chain = t;
885 return chain;
888 /* Remove barriers in REGION->EXIT's block. Note that this is only
889 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
890 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
891 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
892 removed. */
894 static void
895 remove_exit_barrier (struct omp_region *region)
897 gimple_stmt_iterator gsi;
898 basic_block exit_bb;
899 edge_iterator ei;
900 edge e;
901 gimple *stmt;
902 int any_addressable_vars = -1;
904 exit_bb = region->exit;
906 /* If the parallel region doesn't return, we don't have REGION->EXIT
907 block at all. */
908 if (! exit_bb)
909 return;
911 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
912 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
913 statements that can appear in between are extremely limited -- no
914 memory operations at all. Here, we allow nothing at all, so the
915 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
916 gsi = gsi_last_bb (exit_bb);
917 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
918 gsi_prev (&gsi);
919 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
920 return;
922 FOR_EACH_EDGE (e, ei, exit_bb->preds)
924 gsi = gsi_last_bb (e->src);
925 if (gsi_end_p (gsi))
926 continue;
927 stmt = gsi_stmt (gsi);
928 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
929 && !gimple_omp_return_nowait_p (stmt))
931 /* OpenMP 3.0 tasks unfortunately prevent this optimization
932 in many cases. If there could be tasks queued, the barrier
933 might be needed to let the tasks run before some local
934 variable of the parallel that the task uses as shared
935 runs out of scope. The task can be spawned either
936 from within current function (this would be easy to check)
937 or from some function it calls and gets passed an address
938 of such a variable. */
939 if (any_addressable_vars < 0)
941 gomp_parallel *parallel_stmt
942 = as_a <gomp_parallel *> (last_stmt (region->entry));
943 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
944 tree local_decls, block, decl;
945 unsigned ix;
947 any_addressable_vars = 0;
948 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
949 if (TREE_ADDRESSABLE (decl))
951 any_addressable_vars = 1;
952 break;
954 for (block = gimple_block (stmt);
955 !any_addressable_vars
956 && block
957 && TREE_CODE (block) == BLOCK;
958 block = BLOCK_SUPERCONTEXT (block))
960 for (local_decls = BLOCK_VARS (block);
961 local_decls;
962 local_decls = DECL_CHAIN (local_decls))
963 if (TREE_ADDRESSABLE (local_decls))
965 any_addressable_vars = 1;
966 break;
968 if (block == gimple_block (parallel_stmt))
969 break;
972 if (!any_addressable_vars)
973 gimple_omp_return_set_nowait (stmt);
978 static void
979 remove_exit_barriers (struct omp_region *region)
981 if (region->type == GIMPLE_OMP_PARALLEL)
982 remove_exit_barrier (region);
984 if (region->inner)
986 region = region->inner;
987 remove_exit_barriers (region);
988 while (region->next)
990 region = region->next;
991 remove_exit_barriers (region);
996 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
997 calls. These can't be declared as const functions, but
998 within one parallel body they are constant, so they can be
999 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1000 which are declared const. Similarly for task body, except
1001 that in untied task omp_get_thread_num () can change at any task
1002 scheduling point. */
1004 static void
1005 optimize_omp_library_calls (gimple *entry_stmt)
1007 basic_block bb;
1008 gimple_stmt_iterator gsi;
1009 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1010 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1011 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1012 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1013 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1014 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1015 OMP_CLAUSE_UNTIED) != NULL);
1017 FOR_EACH_BB_FN (bb, cfun)
1018 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1020 gimple *call = gsi_stmt (gsi);
1021 tree decl;
1023 if (is_gimple_call (call)
1024 && (decl = gimple_call_fndecl (call))
1025 && DECL_EXTERNAL (decl)
1026 && TREE_PUBLIC (decl)
1027 && DECL_INITIAL (decl) == NULL)
1029 tree built_in;
1031 if (DECL_NAME (decl) == thr_num_id)
1033 /* In #pragma omp task untied omp_get_thread_num () can change
1034 during the execution of the task region. */
1035 if (untied_task)
1036 continue;
1037 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1039 else if (DECL_NAME (decl) == num_thr_id)
1040 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1041 else
1042 continue;
1044 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1045 || gimple_call_num_args (call) != 0)
1046 continue;
1048 if (flag_exceptions && !TREE_NOTHROW (decl))
1049 continue;
1051 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1052 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1053 TREE_TYPE (TREE_TYPE (built_in))))
1054 continue;
1056 gimple_call_set_fndecl (call, built_in);
1061 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1062 regimplified. */
1064 static tree
1065 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1067 tree t = *tp;
1069 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1070 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1071 return t;
1073 if (TREE_CODE (t) == ADDR_EXPR)
1074 recompute_tree_invariant_for_addr_expr (t);
1076 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1077 return NULL_TREE;
1080 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1082 static void
1083 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1084 bool after)
1086 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1087 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1088 !after, after ? GSI_CONTINUE_LINKING
1089 : GSI_SAME_STMT);
1090 gimple *stmt = gimple_build_assign (to, from);
1091 if (after)
1092 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1093 else
1094 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1095 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1096 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1098 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1099 gimple_regimplify_operands (stmt, &gsi);
1103 /* Expand the OpenMP parallel or task directive starting at REGION. */
1105 static void
1106 expand_omp_taskreg (struct omp_region *region)
1108 basic_block entry_bb, exit_bb, new_bb;
1109 struct function *child_cfun;
1110 tree child_fn, block, t;
1111 gimple_stmt_iterator gsi;
1112 gimple *entry_stmt, *stmt;
1113 edge e;
1114 vec<tree, va_gc> *ws_args;
1116 entry_stmt = last_stmt (region->entry);
1117 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1118 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1120 entry_bb = region->entry;
1121 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1122 exit_bb = region->cont;
1123 else
1124 exit_bb = region->exit;
1126 bool is_cilk_for
1127 = (flag_cilkplus
1128 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1129 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1130 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1132 if (is_cilk_for)
1133 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1134 and the inner statement contains the name of the built-in function
1135 and grain. */
1136 ws_args = region->inner->ws_args;
1137 else if (is_combined_parallel (region))
1138 ws_args = region->ws_args;
1139 else
1140 ws_args = NULL;
1142 if (child_cfun->cfg)
1144 /* Due to inlining, it may happen that we have already outlined
1145 the region, in which case all we need to do is make the
1146 sub-graph unreachable and emit the parallel call. */
1147 edge entry_succ_e, exit_succ_e;
1149 entry_succ_e = single_succ_edge (entry_bb);
1151 gsi = gsi_last_bb (entry_bb);
1152 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1153 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1154 gsi_remove (&gsi, true);
1156 new_bb = entry_bb;
1157 if (exit_bb)
1159 exit_succ_e = single_succ_edge (exit_bb);
1160 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1162 remove_edge_and_dominated_blocks (entry_succ_e);
1164 else
1166 unsigned srcidx, dstidx, num;
1168 /* If the parallel region needs data sent from the parent
1169 function, then the very first statement (except possible
1170 tree profile counter updates) of the parallel body
1171 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1172 &.OMP_DATA_O is passed as an argument to the child function,
1173 we need to replace it with the argument as seen by the child
1174 function.
1176 In most cases, this will end up being the identity assignment
1177 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1178 a function call that has been inlined, the original PARM_DECL
1179 .OMP_DATA_I may have been converted into a different local
1180 variable. In which case, we need to keep the assignment. */
1181 if (gimple_omp_taskreg_data_arg (entry_stmt))
1183 basic_block entry_succ_bb
1184 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1185 : FALLTHRU_EDGE (entry_bb)->dest;
1186 tree arg;
1187 gimple *parcopy_stmt = NULL;
1189 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1191 gimple *stmt;
1193 gcc_assert (!gsi_end_p (gsi));
1194 stmt = gsi_stmt (gsi);
1195 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1196 continue;
1198 if (gimple_num_ops (stmt) == 2)
1200 tree arg = gimple_assign_rhs1 (stmt);
1202 /* We're ignore the subcode because we're
1203 effectively doing a STRIP_NOPS. */
1205 if (TREE_CODE (arg) == ADDR_EXPR
1206 && TREE_OPERAND (arg, 0)
1207 == gimple_omp_taskreg_data_arg (entry_stmt))
1209 parcopy_stmt = stmt;
1210 break;
1215 gcc_assert (parcopy_stmt != NULL);
1216 arg = DECL_ARGUMENTS (child_fn);
1218 if (!gimple_in_ssa_p (cfun))
1220 if (gimple_assign_lhs (parcopy_stmt) == arg)
1221 gsi_remove (&gsi, true);
1222 else
1224 /* ?? Is setting the subcode really necessary ?? */
1225 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1229 else
1231 tree lhs = gimple_assign_lhs (parcopy_stmt);
1232 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1233 /* We'd like to set the rhs to the default def in the child_fn,
1234 but it's too early to create ssa names in the child_fn.
1235 Instead, we set the rhs to the parm. In
1236 move_sese_region_to_fn, we introduce a default def for the
1237 parm, map the parm to it's default def, and once we encounter
1238 this stmt, replace the parm with the default def. */
1239 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1240 update_stmt (parcopy_stmt);
1244 /* Declare local variables needed in CHILD_CFUN. */
1245 block = DECL_INITIAL (child_fn);
1246 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1247 /* The gimplifier could record temporaries in parallel/task block
1248 rather than in containing function's local_decls chain,
1249 which would mean cgraph missed finalizing them. Do it now. */
1250 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1251 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1252 varpool_node::finalize_decl (t);
1253 DECL_SAVED_TREE (child_fn) = NULL;
1254 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1255 gimple_set_body (child_fn, NULL);
1256 TREE_USED (block) = 1;
1258 /* Reset DECL_CONTEXT on function arguments. */
1259 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1260 DECL_CONTEXT (t) = child_fn;
1262 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263 so that it can be moved to the child function. */
1264 gsi = gsi_last_bb (entry_bb);
1265 stmt = gsi_stmt (gsi);
1266 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1267 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1268 e = split_block (entry_bb, stmt);
1269 gsi_remove (&gsi, true);
1270 entry_bb = e->dest;
1271 edge e2 = NULL;
1272 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1273 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1274 else
1276 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1277 gcc_assert (e2->dest == region->exit);
1278 remove_edge (BRANCH_EDGE (entry_bb));
1279 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1280 gsi = gsi_last_bb (region->exit);
1281 gcc_assert (!gsi_end_p (gsi)
1282 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1283 gsi_remove (&gsi, true);
1286 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1287 if (exit_bb)
1289 gsi = gsi_last_bb (exit_bb);
1290 gcc_assert (!gsi_end_p (gsi)
1291 && (gimple_code (gsi_stmt (gsi))
1292 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1293 stmt = gimple_build_return (NULL);
1294 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1295 gsi_remove (&gsi, true);
1298 /* Move the parallel region into CHILD_CFUN. */
1300 if (gimple_in_ssa_p (cfun))
1302 init_tree_ssa (child_cfun);
1303 init_ssa_operands (child_cfun);
1304 child_cfun->gimple_df->in_ssa_p = true;
1305 block = NULL_TREE;
1307 else
1308 block = gimple_block (entry_stmt);
1310 /* Make sure to generate early debug for the function before
1311 outlining anything. */
1312 if (! gimple_in_ssa_p (cfun))
1313 (*debug_hooks->early_global_decl) (cfun->decl);
1315 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1316 if (exit_bb)
1317 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1318 if (e2)
1320 basic_block dest_bb = e2->dest;
1321 if (!exit_bb)
1322 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1323 remove_edge (e2);
1324 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1326 /* When the OMP expansion process cannot guarantee an up-to-date
1327 loop tree arrange for the child function to fixup loops. */
1328 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1329 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1331 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1332 num = vec_safe_length (child_cfun->local_decls);
1333 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1335 t = (*child_cfun->local_decls)[srcidx];
1336 if (DECL_CONTEXT (t) == cfun->decl)
1337 continue;
1338 if (srcidx != dstidx)
1339 (*child_cfun->local_decls)[dstidx] = t;
1340 dstidx++;
1342 if (dstidx != num)
1343 vec_safe_truncate (child_cfun->local_decls, dstidx);
1345 /* Inform the callgraph about the new function. */
1346 child_cfun->curr_properties = cfun->curr_properties;
1347 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1348 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1349 cgraph_node *node = cgraph_node::get_create (child_fn);
1350 node->parallelized_function = 1;
1351 cgraph_node::add_new_function (child_fn, true);
1353 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1354 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1356 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1357 fixed in a following pass. */
1358 push_cfun (child_cfun);
1359 if (need_asm)
1360 assign_assembler_name_if_needed (child_fn);
1362 if (optimize)
1363 optimize_omp_library_calls (entry_stmt);
1364 cgraph_edge::rebuild_edges ();
1366 /* Some EH regions might become dead, see PR34608. If
1367 pass_cleanup_cfg isn't the first pass to happen with the
1368 new child, these dead EH edges might cause problems.
1369 Clean them up now. */
1370 if (flag_exceptions)
1372 basic_block bb;
1373 bool changed = false;
1375 FOR_EACH_BB_FN (bb, cfun)
1376 changed |= gimple_purge_dead_eh_edges (bb);
1377 if (changed)
1378 cleanup_tree_cfg ();
1380 if (gimple_in_ssa_p (cfun))
1381 update_ssa (TODO_update_ssa);
1382 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1383 verify_loop_structure ();
1384 pop_cfun ();
1386 if (dump_file && !gimple_in_ssa_p (cfun))
1388 omp_any_child_fn_dumped = true;
1389 dump_function_header (dump_file, child_fn, dump_flags);
1390 dump_function_to_file (child_fn, dump_file, dump_flags);
1394 /* Emit a library call to launch the children threads. */
1395 if (is_cilk_for)
1396 expand_cilk_for_call (new_bb,
1397 as_a <gomp_parallel *> (entry_stmt), ws_args);
1398 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1399 expand_parallel_call (region, new_bb,
1400 as_a <gomp_parallel *> (entry_stmt), ws_args);
1401 else
1402 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1403 if (gimple_in_ssa_p (cfun))
1404 update_ssa (TODO_update_ssa_only_virtuals);
1407 /* Information about members of an OpenACC collapsed loop nest. */
1409 struct oacc_collapse
1411 tree base; /* Base value. */
1412 tree iters; /* Number of steps. */
1413 tree step; /* Step size. */
1414 tree tile; /* Tile increment (if tiled). */
1415 tree outer; /* Tile iterator var. */
1418 /* Helper for expand_oacc_for. Determine collapsed loop information.
1419 Fill in COUNTS array. Emit any initialization code before GSI.
1420 Return the calculated outer loop bound of BOUND_TYPE. */
1422 static tree
1423 expand_oacc_collapse_init (const struct omp_for_data *fd,
1424 gimple_stmt_iterator *gsi,
1425 oacc_collapse *counts, tree bound_type,
1426 location_t loc)
1428 tree tiling = fd->tiling;
1429 tree total = build_int_cst (bound_type, 1);
1430 int ix;
1432 gcc_assert (integer_onep (fd->loop.step));
1433 gcc_assert (integer_zerop (fd->loop.n1));
1435 /* When tiling, the first operand of the tile clause applies to the
1436 innermost loop, and we work outwards from there. Seems
1437 backwards, but whatever. */
1438 for (ix = fd->collapse; ix--;)
1440 const omp_for_data_loop *loop = &fd->loops[ix];
1442 tree iter_type = TREE_TYPE (loop->v);
1443 tree diff_type = iter_type;
1444 tree plus_type = iter_type;
1446 gcc_assert (loop->cond_code == fd->loop.cond_code);
1448 if (POINTER_TYPE_P (iter_type))
1449 plus_type = sizetype;
1450 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1451 diff_type = signed_type_for (diff_type);
1453 if (tiling)
1455 tree num = build_int_cst (integer_type_node, fd->collapse);
1456 tree loop_no = build_int_cst (integer_type_node, ix);
1457 tree tile = TREE_VALUE (tiling);
1458 gcall *call
1459 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1460 /* gwv-outer=*/integer_zero_node,
1461 /* gwv-inner=*/integer_zero_node);
1463 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1464 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1465 gimple_call_set_lhs (call, counts[ix].tile);
1466 gimple_set_location (call, loc);
1467 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1469 tiling = TREE_CHAIN (tiling);
1471 else
1473 counts[ix].tile = NULL;
1474 counts[ix].outer = loop->v;
1477 tree b = loop->n1;
1478 tree e = loop->n2;
1479 tree s = loop->step;
1480 bool up = loop->cond_code == LT_EXPR;
1481 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1482 bool negating;
1483 tree expr;
1485 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1486 true, GSI_SAME_STMT);
1487 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1488 true, GSI_SAME_STMT);
1490 /* Convert the step, avoiding possible unsigned->signed overflow. */
1491 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1492 if (negating)
1493 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1494 s = fold_convert (diff_type, s);
1495 if (negating)
1496 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1497 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1498 true, GSI_SAME_STMT);
1500 /* Determine the range, avoiding possible unsigned->signed overflow. */
1501 negating = !up && TYPE_UNSIGNED (iter_type);
1502 expr = fold_build2 (MINUS_EXPR, plus_type,
1503 fold_convert (plus_type, negating ? b : e),
1504 fold_convert (plus_type, negating ? e : b));
1505 expr = fold_convert (diff_type, expr);
1506 if (negating)
1507 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1508 tree range = force_gimple_operand_gsi
1509 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1511 /* Determine number of iterations. */
1512 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1513 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1514 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1516 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1517 true, GSI_SAME_STMT);
1519 counts[ix].base = b;
1520 counts[ix].iters = iters;
1521 counts[ix].step = s;
1523 total = fold_build2 (MULT_EXPR, bound_type, total,
1524 fold_convert (bound_type, iters));
1527 return total;
1530 /* Emit initializers for collapsed loop members. INNER is true if
1531 this is for the element loop of a TILE. IVAR is the outer
1532 loop iteration variable, from which collapsed loop iteration values
1533 are calculated. COUNTS array has been initialized by
1534 expand_oacc_collapse_inits. */
1536 static void
1537 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1538 gimple_stmt_iterator *gsi,
1539 const oacc_collapse *counts, tree ivar)
1541 tree ivar_type = TREE_TYPE (ivar);
1543 /* The most rapidly changing iteration variable is the innermost
1544 one. */
1545 for (int ix = fd->collapse; ix--;)
1547 const omp_for_data_loop *loop = &fd->loops[ix];
1548 const oacc_collapse *collapse = &counts[ix];
1549 tree v = inner ? loop->v : collapse->outer;
1550 tree iter_type = TREE_TYPE (v);
1551 tree diff_type = TREE_TYPE (collapse->step);
1552 tree plus_type = iter_type;
1553 enum tree_code plus_code = PLUS_EXPR;
1554 tree expr;
1556 if (POINTER_TYPE_P (iter_type))
1558 plus_code = POINTER_PLUS_EXPR;
1559 plus_type = sizetype;
1562 expr = ivar;
1563 if (ix)
1565 tree mod = fold_convert (ivar_type, collapse->iters);
1566 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1567 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1568 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1569 true, GSI_SAME_STMT);
1572 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1573 collapse->step);
1574 expr = fold_build2 (plus_code, iter_type,
1575 inner ? collapse->outer : collapse->base,
1576 fold_convert (plus_type, expr));
1577 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1578 true, GSI_SAME_STMT);
1579 gassign *ass = gimple_build_assign (v, expr);
1580 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1584 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1585 of the combined collapse > 1 loop constructs, generate code like:
1586 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1587 if (cond3 is <)
1588 adj = STEP3 - 1;
1589 else
1590 adj = STEP3 + 1;
1591 count3 = (adj + N32 - N31) / STEP3;
1592 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1593 if (cond2 is <)
1594 adj = STEP2 - 1;
1595 else
1596 adj = STEP2 + 1;
1597 count2 = (adj + N22 - N21) / STEP2;
1598 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1599 if (cond1 is <)
1600 adj = STEP1 - 1;
1601 else
1602 adj = STEP1 + 1;
1603 count1 = (adj + N12 - N11) / STEP1;
1604 count = count1 * count2 * count3;
1605 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1606 count = 0;
1607 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1608 of the combined loop constructs, just initialize COUNTS array
1609 from the _looptemp_ clauses. */
1611 /* NOTE: It *could* be better to moosh all of the BBs together,
1612 creating one larger BB with all the computation and the unexpected
1613 jump at the end. I.e.
1615 bool zero3, zero2, zero1, zero;
1617 zero3 = N32 c3 N31;
1618 count3 = (N32 - N31) /[cl] STEP3;
1619 zero2 = N22 c2 N21;
1620 count2 = (N22 - N21) /[cl] STEP2;
1621 zero1 = N12 c1 N11;
1622 count1 = (N12 - N11) /[cl] STEP1;
1623 zero = zero3 || zero2 || zero1;
1624 count = count1 * count2 * count3;
1625 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1627 After all, we expect the zero=false, and thus we expect to have to
1628 evaluate all of the comparison expressions, so short-circuiting
1629 oughtn't be a win. Since the condition isn't protecting a
1630 denominator, we're not concerned about divide-by-zero, so we can
1631 fully evaluate count even if a numerator turned out to be wrong.
1633 It seems like putting this all together would create much better
1634 scheduling opportunities, and less pressure on the chip's branch
1635 predictor. */
1637 static void
1638 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1639 basic_block &entry_bb, tree *counts,
1640 basic_block &zero_iter1_bb, int &first_zero_iter1,
1641 basic_block &zero_iter2_bb, int &first_zero_iter2,
1642 basic_block &l2_dom_bb)
1644 tree t, type = TREE_TYPE (fd->loop.v);
1645 edge e, ne;
1646 int i;
1648 /* Collapsed loops need work for expansion into SSA form. */
1649 gcc_assert (!gimple_in_ssa_p (cfun));
1651 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1652 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1654 gcc_assert (fd->ordered == 0);
1655 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1656 isn't supposed to be handled, as the inner loop doesn't
1657 use it. */
1658 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1659 OMP_CLAUSE__LOOPTEMP_);
1660 gcc_assert (innerc);
1661 for (i = 0; i < fd->collapse; i++)
1663 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1664 OMP_CLAUSE__LOOPTEMP_);
1665 gcc_assert (innerc);
1666 if (i)
1667 counts[i] = OMP_CLAUSE_DECL (innerc);
1668 else
1669 counts[0] = NULL_TREE;
1671 return;
1674 for (i = fd->collapse; i < fd->ordered; i++)
1676 tree itype = TREE_TYPE (fd->loops[i].v);
1677 counts[i] = NULL_TREE;
1678 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1679 fold_convert (itype, fd->loops[i].n1),
1680 fold_convert (itype, fd->loops[i].n2));
1681 if (t && integer_zerop (t))
1683 for (i = fd->collapse; i < fd->ordered; i++)
1684 counts[i] = build_int_cst (type, 0);
1685 break;
1688 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1690 tree itype = TREE_TYPE (fd->loops[i].v);
1692 if (i >= fd->collapse && counts[i])
1693 continue;
1694 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1695 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1696 fold_convert (itype, fd->loops[i].n1),
1697 fold_convert (itype, fd->loops[i].n2)))
1698 == NULL_TREE || !integer_onep (t)))
1700 gcond *cond_stmt;
1701 tree n1, n2;
1702 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1703 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1704 true, GSI_SAME_STMT);
1705 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1706 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1707 true, GSI_SAME_STMT);
1708 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1709 NULL_TREE, NULL_TREE);
1710 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1711 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1712 expand_omp_regimplify_p, NULL, NULL)
1713 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1714 expand_omp_regimplify_p, NULL, NULL))
1716 *gsi = gsi_for_stmt (cond_stmt);
1717 gimple_regimplify_operands (cond_stmt, gsi);
1719 e = split_block (entry_bb, cond_stmt);
1720 basic_block &zero_iter_bb
1721 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1722 int &first_zero_iter
1723 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1724 if (zero_iter_bb == NULL)
1726 gassign *assign_stmt;
1727 first_zero_iter = i;
1728 zero_iter_bb = create_empty_bb (entry_bb);
1729 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1730 *gsi = gsi_after_labels (zero_iter_bb);
1731 if (i < fd->collapse)
1732 assign_stmt = gimple_build_assign (fd->loop.n2,
1733 build_zero_cst (type));
1734 else
1736 counts[i] = create_tmp_reg (type, ".count");
1737 assign_stmt
1738 = gimple_build_assign (counts[i], build_zero_cst (type));
1740 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1741 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1742 entry_bb);
1744 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1745 ne->probability = profile_probability::very_unlikely ();
1746 e->flags = EDGE_TRUE_VALUE;
1747 e->probability = ne->probability.invert ();
1748 if (l2_dom_bb == NULL)
1749 l2_dom_bb = entry_bb;
1750 entry_bb = e->dest;
1751 *gsi = gsi_last_bb (entry_bb);
1754 if (POINTER_TYPE_P (itype))
1755 itype = signed_type_for (itype);
1756 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1757 ? -1 : 1));
1758 t = fold_build2 (PLUS_EXPR, itype,
1759 fold_convert (itype, fd->loops[i].step), t);
1760 t = fold_build2 (PLUS_EXPR, itype, t,
1761 fold_convert (itype, fd->loops[i].n2));
1762 t = fold_build2 (MINUS_EXPR, itype, t,
1763 fold_convert (itype, fd->loops[i].n1));
1764 /* ?? We could probably use CEIL_DIV_EXPR instead of
1765 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1766 generate the same code in the end because generically we
1767 don't know that the values involved must be negative for
1768 GT?? */
1769 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1770 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1771 fold_build1 (NEGATE_EXPR, itype, t),
1772 fold_build1 (NEGATE_EXPR, itype,
1773 fold_convert (itype,
1774 fd->loops[i].step)));
1775 else
1776 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1777 fold_convert (itype, fd->loops[i].step));
1778 t = fold_convert (type, t);
1779 if (TREE_CODE (t) == INTEGER_CST)
1780 counts[i] = t;
1781 else
1783 if (i < fd->collapse || i != first_zero_iter2)
1784 counts[i] = create_tmp_reg (type, ".count");
1785 expand_omp_build_assign (gsi, counts[i], t);
1787 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1789 if (i == 0)
1790 t = counts[0];
1791 else
1792 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1793 expand_omp_build_assign (gsi, fd->loop.n2, t);
1798 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1799 T = V;
1800 V3 = N31 + (T % count3) * STEP3;
1801 T = T / count3;
1802 V2 = N21 + (T % count2) * STEP2;
1803 T = T / count2;
1804 V1 = N11 + T * STEP1;
1805 if this loop doesn't have an inner loop construct combined with it.
1806 If it does have an inner loop construct combined with it and the
1807 iteration count isn't known constant, store values from counts array
1808 into its _looptemp_ temporaries instead. */
1810 static void
1811 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1812 tree *counts, gimple *inner_stmt, tree startvar)
1814 int i;
1815 if (gimple_omp_for_combined_p (fd->for_stmt))
1817 /* If fd->loop.n2 is constant, then no propagation of the counts
1818 is needed, they are constant. */
1819 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1820 return;
1822 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1823 ? gimple_omp_taskreg_clauses (inner_stmt)
1824 : gimple_omp_for_clauses (inner_stmt);
1825 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1826 isn't supposed to be handled, as the inner loop doesn't
1827 use it. */
1828 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1829 gcc_assert (innerc);
1830 for (i = 0; i < fd->collapse; i++)
1832 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1833 OMP_CLAUSE__LOOPTEMP_);
1834 gcc_assert (innerc);
1835 if (i)
1837 tree tem = OMP_CLAUSE_DECL (innerc);
1838 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1839 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1840 false, GSI_CONTINUE_LINKING);
1841 gassign *stmt = gimple_build_assign (tem, t);
1842 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1845 return;
1848 tree type = TREE_TYPE (fd->loop.v);
1849 tree tem = create_tmp_reg (type, ".tem");
1850 gassign *stmt = gimple_build_assign (tem, startvar);
1851 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1853 for (i = fd->collapse - 1; i >= 0; i--)
1855 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1856 itype = vtype;
1857 if (POINTER_TYPE_P (vtype))
1858 itype = signed_type_for (vtype);
1859 if (i != 0)
1860 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1861 else
1862 t = tem;
1863 t = fold_convert (itype, t);
1864 t = fold_build2 (MULT_EXPR, itype, t,
1865 fold_convert (itype, fd->loops[i].step));
1866 if (POINTER_TYPE_P (vtype))
1867 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1868 else
1869 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1870 t = force_gimple_operand_gsi (gsi, t,
1871 DECL_P (fd->loops[i].v)
1872 && TREE_ADDRESSABLE (fd->loops[i].v),
1873 NULL_TREE, false,
1874 GSI_CONTINUE_LINKING);
1875 stmt = gimple_build_assign (fd->loops[i].v, t);
1876 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1877 if (i != 0)
1879 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1880 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1881 false, GSI_CONTINUE_LINKING);
1882 stmt = gimple_build_assign (tem, t);
1883 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1888 /* Helper function for expand_omp_for_*. Generate code like:
1889 L10:
1890 V3 += STEP3;
1891 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1892 L11:
1893 V3 = N31;
1894 V2 += STEP2;
1895 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1896 L12:
1897 V2 = N21;
1898 V1 += STEP1;
1899 goto BODY_BB; */
1901 static basic_block
1902 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1903 basic_block body_bb)
1905 basic_block last_bb, bb, collapse_bb = NULL;
1906 int i;
1907 gimple_stmt_iterator gsi;
1908 edge e;
1909 tree t;
1910 gimple *stmt;
1912 last_bb = cont_bb;
1913 for (i = fd->collapse - 1; i >= 0; i--)
1915 tree vtype = TREE_TYPE (fd->loops[i].v);
1917 bb = create_empty_bb (last_bb);
1918 add_bb_to_loop (bb, last_bb->loop_father);
1919 gsi = gsi_start_bb (bb);
1921 if (i < fd->collapse - 1)
1923 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1924 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1926 t = fd->loops[i + 1].n1;
1927 t = force_gimple_operand_gsi (&gsi, t,
1928 DECL_P (fd->loops[i + 1].v)
1929 && TREE_ADDRESSABLE (fd->loops[i
1930 + 1].v),
1931 NULL_TREE, false,
1932 GSI_CONTINUE_LINKING);
1933 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1934 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1936 else
1937 collapse_bb = bb;
1939 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1941 if (POINTER_TYPE_P (vtype))
1942 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1943 else
1944 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1945 t = force_gimple_operand_gsi (&gsi, t,
1946 DECL_P (fd->loops[i].v)
1947 && TREE_ADDRESSABLE (fd->loops[i].v),
1948 NULL_TREE, false, GSI_CONTINUE_LINKING);
1949 stmt = gimple_build_assign (fd->loops[i].v, t);
1950 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1952 if (i > 0)
1954 t = fd->loops[i].n2;
1955 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1956 false, GSI_CONTINUE_LINKING);
1957 tree v = fd->loops[i].v;
1958 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1959 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1960 false, GSI_CONTINUE_LINKING);
1961 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1962 stmt = gimple_build_cond_empty (t);
1963 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1964 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1965 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1967 else
1968 make_edge (bb, body_bb, EDGE_FALLTHRU);
1969 last_bb = bb;
1972 return collapse_bb;
1975 /* Expand #pragma omp ordered depend(source). */
1977 static void
1978 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1979 tree *counts, location_t loc)
1981 enum built_in_function source_ix
1982 = fd->iter_type == long_integer_type_node
1983 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1984 gimple *g
1985 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1986 build_fold_addr_expr (counts[fd->ordered]));
1987 gimple_set_location (g, loc);
1988 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1991 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1993 static void
1994 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1995 tree *counts, tree c, location_t loc)
1997 auto_vec<tree, 10> args;
1998 enum built_in_function sink_ix
1999 = fd->iter_type == long_integer_type_node
2000 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2001 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2002 int i;
2003 gimple_stmt_iterator gsi2 = *gsi;
2004 bool warned_step = false;
2006 for (i = 0; i < fd->ordered; i++)
2008 tree step = NULL_TREE;
2009 off = TREE_PURPOSE (deps);
2010 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2012 step = TREE_OPERAND (off, 1);
2013 off = TREE_OPERAND (off, 0);
2015 if (!integer_zerop (off))
2017 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2018 || fd->loops[i].cond_code == GT_EXPR);
2019 bool forward = fd->loops[i].cond_code == LT_EXPR;
2020 if (step)
2022 /* Non-simple Fortran DO loops. If step is variable,
2023 we don't know at compile even the direction, so can't
2024 warn. */
2025 if (TREE_CODE (step) != INTEGER_CST)
2026 break;
2027 forward = tree_int_cst_sgn (step) != -1;
2029 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2030 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2031 "lexically later iteration");
2032 break;
2034 deps = TREE_CHAIN (deps);
2036 /* If all offsets corresponding to the collapsed loops are zero,
2037 this depend clause can be ignored. FIXME: but there is still a
2038 flush needed. We need to emit one __sync_synchronize () for it
2039 though (perhaps conditionally)? Solve this together with the
2040 conservative dependence folding optimization.
2041 if (i >= fd->collapse)
2042 return; */
2044 deps = OMP_CLAUSE_DECL (c);
2045 gsi_prev (&gsi2);
2046 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2047 edge e2 = split_block_after_labels (e1->dest);
2049 gsi2 = gsi_after_labels (e1->dest);
2050 *gsi = gsi_last_bb (e1->src);
2051 for (i = 0; i < fd->ordered; i++)
2053 tree itype = TREE_TYPE (fd->loops[i].v);
2054 tree step = NULL_TREE;
2055 tree orig_off = NULL_TREE;
2056 if (POINTER_TYPE_P (itype))
2057 itype = sizetype;
2058 if (i)
2059 deps = TREE_CHAIN (deps);
2060 off = TREE_PURPOSE (deps);
2061 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2063 step = TREE_OPERAND (off, 1);
2064 off = TREE_OPERAND (off, 0);
2065 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2066 && integer_onep (fd->loops[i].step)
2067 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2069 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2070 if (step)
2072 off = fold_convert_loc (loc, itype, off);
2073 orig_off = off;
2074 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2077 if (integer_zerop (off))
2078 t = boolean_true_node;
2079 else
2081 tree a;
2082 tree co = fold_convert_loc (loc, itype, off);
2083 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2085 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2086 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2087 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2088 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2089 co);
2091 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2092 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2093 fd->loops[i].v, co);
2094 else
2095 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2096 fd->loops[i].v, co);
2097 if (step)
2099 tree t1, t2;
2100 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2101 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2102 fd->loops[i].n1);
2103 else
2104 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2105 fd->loops[i].n2);
2106 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2107 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2108 fd->loops[i].n2);
2109 else
2110 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2111 fd->loops[i].n1);
2112 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2113 step, build_int_cst (TREE_TYPE (step), 0));
2114 if (TREE_CODE (step) != INTEGER_CST)
2116 t1 = unshare_expr (t1);
2117 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2118 false, GSI_CONTINUE_LINKING);
2119 t2 = unshare_expr (t2);
2120 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2121 false, GSI_CONTINUE_LINKING);
2123 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2124 t, t2, t1);
2126 else if (fd->loops[i].cond_code == LT_EXPR)
2128 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2129 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2130 fd->loops[i].n1);
2131 else
2132 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2133 fd->loops[i].n2);
2135 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2136 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2137 fd->loops[i].n2);
2138 else
2139 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2140 fd->loops[i].n1);
2142 if (cond)
2143 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2144 else
2145 cond = t;
2147 off = fold_convert_loc (loc, itype, off);
2149 if (step
2150 || (fd->loops[i].cond_code == LT_EXPR
2151 ? !integer_onep (fd->loops[i].step)
2152 : !integer_minus_onep (fd->loops[i].step)))
2154 if (step == NULL_TREE
2155 && TYPE_UNSIGNED (itype)
2156 && fd->loops[i].cond_code == GT_EXPR)
2157 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2158 fold_build1_loc (loc, NEGATE_EXPR, itype,
2159 s));
2160 else
2161 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2162 orig_off ? orig_off : off, s);
2163 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2164 build_int_cst (itype, 0));
2165 if (integer_zerop (t) && !warned_step)
2167 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2168 "in the iteration space");
2169 warned_step = true;
2171 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2172 cond, t);
2175 if (i <= fd->collapse - 1 && fd->collapse > 1)
2176 t = fd->loop.v;
2177 else if (counts[i])
2178 t = counts[i];
2179 else
2181 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2182 fd->loops[i].v, fd->loops[i].n1);
2183 t = fold_convert_loc (loc, fd->iter_type, t);
2185 if (step)
2186 /* We have divided off by step already earlier. */;
2187 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2188 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2189 fold_build1_loc (loc, NEGATE_EXPR, itype,
2190 s));
2191 else
2192 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2193 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2194 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2195 off = fold_convert_loc (loc, fd->iter_type, off);
2196 if (i <= fd->collapse - 1 && fd->collapse > 1)
2198 if (i)
2199 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2200 off);
2201 if (i < fd->collapse - 1)
2203 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2204 counts[i]);
2205 continue;
2208 off = unshare_expr (off);
2209 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2210 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2211 true, GSI_SAME_STMT);
2212 args.safe_push (t);
2214 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2215 gimple_set_location (g, loc);
2216 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2218 cond = unshare_expr (cond);
2219 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2220 GSI_CONTINUE_LINKING);
2221 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2222 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2223 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2224 e1->probability = e3->probability.invert ();
2225 e1->flags = EDGE_TRUE_VALUE;
2226 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2228 *gsi = gsi_after_labels (e2->dest);
2231 /* Expand all #pragma omp ordered depend(source) and
2232 #pragma omp ordered depend(sink:...) constructs in the current
2233 #pragma omp for ordered(n) region. */
2235 static void
2236 expand_omp_ordered_source_sink (struct omp_region *region,
2237 struct omp_for_data *fd, tree *counts,
2238 basic_block cont_bb)
2240 struct omp_region *inner;
2241 int i;
2242 for (i = fd->collapse - 1; i < fd->ordered; i++)
2243 if (i == fd->collapse - 1 && fd->collapse > 1)
2244 counts[i] = NULL_TREE;
2245 else if (i >= fd->collapse && !cont_bb)
2246 counts[i] = build_zero_cst (fd->iter_type);
2247 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2248 && integer_onep (fd->loops[i].step))
2249 counts[i] = NULL_TREE;
2250 else
2251 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2252 tree atype
2253 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2254 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2255 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2257 for (inner = region->inner; inner; inner = inner->next)
2258 if (inner->type == GIMPLE_OMP_ORDERED)
2260 gomp_ordered *ord_stmt = inner->ord_stmt;
2261 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2262 location_t loc = gimple_location (ord_stmt);
2263 tree c;
2264 for (c = gimple_omp_ordered_clauses (ord_stmt);
2265 c; c = OMP_CLAUSE_CHAIN (c))
2266 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2267 break;
2268 if (c)
2269 expand_omp_ordered_source (&gsi, fd, counts, loc);
2270 for (c = gimple_omp_ordered_clauses (ord_stmt);
2271 c; c = OMP_CLAUSE_CHAIN (c))
2272 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2273 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2274 gsi_remove (&gsi, true);
2278 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2279 collapsed. */
2281 static basic_block
2282 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2283 basic_block cont_bb, basic_block body_bb,
2284 bool ordered_lastprivate)
2286 if (fd->ordered == fd->collapse)
2287 return cont_bb;
2289 if (!cont_bb)
2291 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2292 for (int i = fd->collapse; i < fd->ordered; i++)
2294 tree type = TREE_TYPE (fd->loops[i].v);
2295 tree n1 = fold_convert (type, fd->loops[i].n1);
2296 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2297 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2298 size_int (i - fd->collapse + 1),
2299 NULL_TREE, NULL_TREE);
2300 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2302 return NULL;
2305 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2307 tree t, type = TREE_TYPE (fd->loops[i].v);
2308 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2309 expand_omp_build_assign (&gsi, fd->loops[i].v,
2310 fold_convert (type, fd->loops[i].n1));
2311 if (counts[i])
2312 expand_omp_build_assign (&gsi, counts[i],
2313 build_zero_cst (fd->iter_type));
2314 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2315 size_int (i - fd->collapse + 1),
2316 NULL_TREE, NULL_TREE);
2317 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2318 if (!gsi_end_p (gsi))
2319 gsi_prev (&gsi);
2320 else
2321 gsi = gsi_last_bb (body_bb);
2322 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2323 basic_block new_body = e1->dest;
2324 if (body_bb == cont_bb)
2325 cont_bb = new_body;
2326 edge e2 = NULL;
2327 basic_block new_header;
2328 if (EDGE_COUNT (cont_bb->preds) > 0)
2330 gsi = gsi_last_bb (cont_bb);
2331 if (POINTER_TYPE_P (type))
2332 t = fold_build_pointer_plus (fd->loops[i].v,
2333 fold_convert (sizetype,
2334 fd->loops[i].step));
2335 else
2336 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2337 fold_convert (type, fd->loops[i].step));
2338 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2339 if (counts[i])
2341 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2342 build_int_cst (fd->iter_type, 1));
2343 expand_omp_build_assign (&gsi, counts[i], t);
2344 t = counts[i];
2346 else
2348 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2349 fd->loops[i].v, fd->loops[i].n1);
2350 t = fold_convert (fd->iter_type, t);
2351 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2352 true, GSI_SAME_STMT);
2354 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2355 size_int (i - fd->collapse + 1),
2356 NULL_TREE, NULL_TREE);
2357 expand_omp_build_assign (&gsi, aref, t);
2358 gsi_prev (&gsi);
2359 e2 = split_block (cont_bb, gsi_stmt (gsi));
2360 new_header = e2->dest;
2362 else
2363 new_header = cont_bb;
2364 gsi = gsi_after_labels (new_header);
2365 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2366 true, GSI_SAME_STMT);
2367 tree n2
2368 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2369 true, NULL_TREE, true, GSI_SAME_STMT);
2370 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2371 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2372 edge e3 = split_block (new_header, gsi_stmt (gsi));
2373 cont_bb = e3->dest;
2374 remove_edge (e1);
2375 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2376 e3->flags = EDGE_FALSE_VALUE;
2377 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2378 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2379 e1->probability = e3->probability.invert ();
2381 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2382 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2384 if (e2)
2386 struct loop *loop = alloc_loop ();
2387 loop->header = new_header;
2388 loop->latch = e2->src;
2389 add_loop (loop, body_bb->loop_father);
2393 /* If there are any lastprivate clauses and it is possible some loops
2394 might have zero iterations, ensure all the decls are initialized,
2395 otherwise we could crash evaluating C++ class iterators with lastprivate
2396 clauses. */
2397 bool need_inits = false;
2398 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2399 if (need_inits)
2401 tree type = TREE_TYPE (fd->loops[i].v);
2402 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2403 expand_omp_build_assign (&gsi, fd->loops[i].v,
2404 fold_convert (type, fd->loops[i].n1));
2406 else
2408 tree type = TREE_TYPE (fd->loops[i].v);
2409 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2410 boolean_type_node,
2411 fold_convert (type, fd->loops[i].n1),
2412 fold_convert (type, fd->loops[i].n2));
2413 if (!integer_onep (this_cond))
2414 need_inits = true;
2417 return cont_bb;
2420 /* A subroutine of expand_omp_for. Generate code for a parallel
2421 loop with any schedule. Given parameters:
2423 for (V = N1; V cond N2; V += STEP) BODY;
2425 where COND is "<" or ">", we generate pseudocode
2427 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2428 if (more) goto L0; else goto L3;
2430 V = istart0;
2431 iend = iend0;
2433 BODY;
2434 V += STEP;
2435 if (V cond iend) goto L1; else goto L2;
2437 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2440 If this is a combined omp parallel loop, instead of the call to
2441 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2442 If this is gimple_omp_for_combined_p loop, then instead of assigning
2443 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2444 inner GIMPLE_OMP_FOR and V += STEP; and
2445 if (V cond iend) goto L1; else goto L2; are removed.
2447 For collapsed loops, given parameters:
2448 collapse(3)
2449 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2450 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2451 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2452 BODY;
2454 we generate pseudocode
2456 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2457 if (cond3 is <)
2458 adj = STEP3 - 1;
2459 else
2460 adj = STEP3 + 1;
2461 count3 = (adj + N32 - N31) / STEP3;
2462 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2463 if (cond2 is <)
2464 adj = STEP2 - 1;
2465 else
2466 adj = STEP2 + 1;
2467 count2 = (adj + N22 - N21) / STEP2;
2468 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2469 if (cond1 is <)
2470 adj = STEP1 - 1;
2471 else
2472 adj = STEP1 + 1;
2473 count1 = (adj + N12 - N11) / STEP1;
2474 count = count1 * count2 * count3;
2475 goto Z1;
2477 count = 0;
2479 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2480 if (more) goto L0; else goto L3;
2482 V = istart0;
2483 T = V;
2484 V3 = N31 + (T % count3) * STEP3;
2485 T = T / count3;
2486 V2 = N21 + (T % count2) * STEP2;
2487 T = T / count2;
2488 V1 = N11 + T * STEP1;
2489 iend = iend0;
2491 BODY;
2492 V += 1;
2493 if (V < iend) goto L10; else goto L2;
2494 L10:
2495 V3 += STEP3;
2496 if (V3 cond3 N32) goto L1; else goto L11;
2497 L11:
2498 V3 = N31;
2499 V2 += STEP2;
2500 if (V2 cond2 N22) goto L1; else goto L12;
2501 L12:
2502 V2 = N21;
2503 V1 += STEP1;
2504 goto L1;
2506 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2511 static void
2512 expand_omp_for_generic (struct omp_region *region,
2513 struct omp_for_data *fd,
2514 enum built_in_function start_fn,
2515 enum built_in_function next_fn,
2516 gimple *inner_stmt)
2518 tree type, istart0, iend0, iend;
2519 tree t, vmain, vback, bias = NULL_TREE;
2520 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2521 basic_block l2_bb = NULL, l3_bb = NULL;
2522 gimple_stmt_iterator gsi;
2523 gassign *assign_stmt;
2524 bool in_combined_parallel = is_combined_parallel (region);
2525 bool broken_loop = region->cont == NULL;
2526 edge e, ne;
2527 tree *counts = NULL;
2528 int i;
2529 bool ordered_lastprivate = false;
2531 gcc_assert (!broken_loop || !in_combined_parallel);
2532 gcc_assert (fd->iter_type == long_integer_type_node
2533 || !in_combined_parallel);
2535 entry_bb = region->entry;
2536 cont_bb = region->cont;
2537 collapse_bb = NULL;
2538 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2539 gcc_assert (broken_loop
2540 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2541 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2542 l1_bb = single_succ (l0_bb);
2543 if (!broken_loop)
2545 l2_bb = create_empty_bb (cont_bb);
2546 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2547 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2548 == l1_bb));
2549 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2551 else
2552 l2_bb = NULL;
2553 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2554 exit_bb = region->exit;
2556 gsi = gsi_last_bb (entry_bb);
2558 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2559 if (fd->ordered
2560 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2561 OMP_CLAUSE_LASTPRIVATE))
2562 ordered_lastprivate = false;
2563 if (fd->collapse > 1 || fd->ordered)
2565 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2566 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2568 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2569 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2570 zero_iter1_bb, first_zero_iter1,
2571 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2573 if (zero_iter1_bb)
2575 /* Some counts[i] vars might be uninitialized if
2576 some loop has zero iterations. But the body shouldn't
2577 be executed in that case, so just avoid uninit warnings. */
2578 for (i = first_zero_iter1;
2579 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2580 if (SSA_VAR_P (counts[i]))
2581 TREE_NO_WARNING (counts[i]) = 1;
2582 gsi_prev (&gsi);
2583 e = split_block (entry_bb, gsi_stmt (gsi));
2584 entry_bb = e->dest;
2585 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2586 gsi = gsi_last_bb (entry_bb);
2587 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2588 get_immediate_dominator (CDI_DOMINATORS,
2589 zero_iter1_bb));
2591 if (zero_iter2_bb)
2593 /* Some counts[i] vars might be uninitialized if
2594 some loop has zero iterations. But the body shouldn't
2595 be executed in that case, so just avoid uninit warnings. */
2596 for (i = first_zero_iter2; i < fd->ordered; i++)
2597 if (SSA_VAR_P (counts[i]))
2598 TREE_NO_WARNING (counts[i]) = 1;
2599 if (zero_iter1_bb)
2600 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2601 else
2603 gsi_prev (&gsi);
2604 e = split_block (entry_bb, gsi_stmt (gsi));
2605 entry_bb = e->dest;
2606 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2607 gsi = gsi_last_bb (entry_bb);
2608 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2609 get_immediate_dominator
2610 (CDI_DOMINATORS, zero_iter2_bb));
2613 if (fd->collapse == 1)
2615 counts[0] = fd->loop.n2;
2616 fd->loop = fd->loops[0];
2620 type = TREE_TYPE (fd->loop.v);
2621 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2622 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2623 TREE_ADDRESSABLE (istart0) = 1;
2624 TREE_ADDRESSABLE (iend0) = 1;
2626 /* See if we need to bias by LLONG_MIN. */
2627 if (fd->iter_type == long_long_unsigned_type_node
2628 && TREE_CODE (type) == INTEGER_TYPE
2629 && !TYPE_UNSIGNED (type)
2630 && fd->ordered == 0)
2632 tree n1, n2;
2634 if (fd->loop.cond_code == LT_EXPR)
2636 n1 = fd->loop.n1;
2637 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2639 else
2641 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2642 n2 = fd->loop.n1;
2644 if (TREE_CODE (n1) != INTEGER_CST
2645 || TREE_CODE (n2) != INTEGER_CST
2646 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2647 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2650 gimple_stmt_iterator gsif = gsi;
2651 gsi_prev (&gsif);
2653 tree arr = NULL_TREE;
2654 if (in_combined_parallel)
2656 gcc_assert (fd->ordered == 0);
2657 /* In a combined parallel loop, emit a call to
2658 GOMP_loop_foo_next. */
2659 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2660 build_fold_addr_expr (istart0),
2661 build_fold_addr_expr (iend0));
2663 else
2665 tree t0, t1, t2, t3, t4;
2666 /* If this is not a combined parallel loop, emit a call to
2667 GOMP_loop_foo_start in ENTRY_BB. */
2668 t4 = build_fold_addr_expr (iend0);
2669 t3 = build_fold_addr_expr (istart0);
2670 if (fd->ordered)
2672 t0 = build_int_cst (unsigned_type_node,
2673 fd->ordered - fd->collapse + 1);
2674 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2675 fd->ordered
2676 - fd->collapse + 1),
2677 ".omp_counts");
2678 DECL_NAMELESS (arr) = 1;
2679 TREE_ADDRESSABLE (arr) = 1;
2680 TREE_STATIC (arr) = 1;
2681 vec<constructor_elt, va_gc> *v;
2682 vec_alloc (v, fd->ordered - fd->collapse + 1);
2683 int idx;
2685 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2687 tree c;
2688 if (idx == 0 && fd->collapse > 1)
2689 c = fd->loop.n2;
2690 else
2691 c = counts[idx + fd->collapse - 1];
2692 tree purpose = size_int (idx);
2693 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2694 if (TREE_CODE (c) != INTEGER_CST)
2695 TREE_STATIC (arr) = 0;
2698 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2699 if (!TREE_STATIC (arr))
2700 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2701 void_type_node, arr),
2702 true, NULL_TREE, true, GSI_SAME_STMT);
2703 t1 = build_fold_addr_expr (arr);
2704 t2 = NULL_TREE;
2706 else
2708 t2 = fold_convert (fd->iter_type, fd->loop.step);
2709 t1 = fd->loop.n2;
2710 t0 = fd->loop.n1;
2711 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2713 tree innerc
2714 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2715 OMP_CLAUSE__LOOPTEMP_);
2716 gcc_assert (innerc);
2717 t0 = OMP_CLAUSE_DECL (innerc);
2718 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2719 OMP_CLAUSE__LOOPTEMP_);
2720 gcc_assert (innerc);
2721 t1 = OMP_CLAUSE_DECL (innerc);
2723 if (POINTER_TYPE_P (TREE_TYPE (t0))
2724 && TYPE_PRECISION (TREE_TYPE (t0))
2725 != TYPE_PRECISION (fd->iter_type))
2727 /* Avoid casting pointers to integer of a different size. */
2728 tree itype = signed_type_for (type);
2729 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2730 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2732 else
2734 t1 = fold_convert (fd->iter_type, t1);
2735 t0 = fold_convert (fd->iter_type, t0);
2737 if (bias)
2739 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2740 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2743 if (fd->iter_type == long_integer_type_node || fd->ordered)
2745 if (fd->chunk_size)
2747 t = fold_convert (fd->iter_type, fd->chunk_size);
2748 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2749 if (fd->ordered)
2750 t = build_call_expr (builtin_decl_explicit (start_fn),
2751 5, t0, t1, t, t3, t4);
2752 else
2753 t = build_call_expr (builtin_decl_explicit (start_fn),
2754 6, t0, t1, t2, t, t3, t4);
2756 else if (fd->ordered)
2757 t = build_call_expr (builtin_decl_explicit (start_fn),
2758 4, t0, t1, t3, t4);
2759 else
2760 t = build_call_expr (builtin_decl_explicit (start_fn),
2761 5, t0, t1, t2, t3, t4);
2763 else
2765 tree t5;
2766 tree c_bool_type;
2767 tree bfn_decl;
2769 /* The GOMP_loop_ull_*start functions have additional boolean
2770 argument, true for < loops and false for > loops.
2771 In Fortran, the C bool type can be different from
2772 boolean_type_node. */
2773 bfn_decl = builtin_decl_explicit (start_fn);
2774 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2775 t5 = build_int_cst (c_bool_type,
2776 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2777 if (fd->chunk_size)
2779 tree bfn_decl = builtin_decl_explicit (start_fn);
2780 t = fold_convert (fd->iter_type, fd->chunk_size);
2781 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2782 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2784 else
2785 t = build_call_expr (builtin_decl_explicit (start_fn),
2786 6, t5, t0, t1, t2, t3, t4);
2789 if (TREE_TYPE (t) != boolean_type_node)
2790 t = fold_build2 (NE_EXPR, boolean_type_node,
2791 t, build_int_cst (TREE_TYPE (t), 0));
2792 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2793 true, GSI_SAME_STMT);
2794 if (arr && !TREE_STATIC (arr))
2796 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2797 TREE_THIS_VOLATILE (clobber) = 1;
2798 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2799 GSI_SAME_STMT);
2801 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2803 /* Remove the GIMPLE_OMP_FOR statement. */
2804 gsi_remove (&gsi, true);
2806 if (gsi_end_p (gsif))
2807 gsif = gsi_after_labels (gsi_bb (gsif));
2808 gsi_next (&gsif);
2810 /* Iteration setup for sequential loop goes in L0_BB. */
2811 tree startvar = fd->loop.v;
2812 tree endvar = NULL_TREE;
2814 if (gimple_omp_for_combined_p (fd->for_stmt))
2816 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2817 && gimple_omp_for_kind (inner_stmt)
2818 == GF_OMP_FOR_KIND_SIMD);
2819 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2820 OMP_CLAUSE__LOOPTEMP_);
2821 gcc_assert (innerc);
2822 startvar = OMP_CLAUSE_DECL (innerc);
2823 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2824 OMP_CLAUSE__LOOPTEMP_);
2825 gcc_assert (innerc);
2826 endvar = OMP_CLAUSE_DECL (innerc);
2829 gsi = gsi_start_bb (l0_bb);
2830 t = istart0;
2831 if (fd->ordered && fd->collapse == 1)
2832 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2833 fold_convert (fd->iter_type, fd->loop.step));
2834 else if (bias)
2835 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2836 if (fd->ordered && fd->collapse == 1)
2838 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2839 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2840 fd->loop.n1, fold_convert (sizetype, t));
2841 else
2843 t = fold_convert (TREE_TYPE (startvar), t);
2844 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2845 fd->loop.n1, t);
2848 else
2850 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2851 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2852 t = fold_convert (TREE_TYPE (startvar), t);
2854 t = force_gimple_operand_gsi (&gsi, t,
2855 DECL_P (startvar)
2856 && TREE_ADDRESSABLE (startvar),
2857 NULL_TREE, false, GSI_CONTINUE_LINKING);
2858 assign_stmt = gimple_build_assign (startvar, t);
2859 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2861 t = iend0;
2862 if (fd->ordered && fd->collapse == 1)
2863 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2864 fold_convert (fd->iter_type, fd->loop.step));
2865 else if (bias)
2866 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2867 if (fd->ordered && fd->collapse == 1)
2869 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2870 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2871 fd->loop.n1, fold_convert (sizetype, t));
2872 else
2874 t = fold_convert (TREE_TYPE (startvar), t);
2875 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2876 fd->loop.n1, t);
2879 else
2881 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2882 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2883 t = fold_convert (TREE_TYPE (startvar), t);
2885 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2886 false, GSI_CONTINUE_LINKING);
2887 if (endvar)
2889 assign_stmt = gimple_build_assign (endvar, iend);
2890 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2891 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2892 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2893 else
2894 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2895 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2897 /* Handle linear clause adjustments. */
2898 tree itercnt = NULL_TREE;
2899 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2900 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2901 c; c = OMP_CLAUSE_CHAIN (c))
2902 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2903 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2905 tree d = OMP_CLAUSE_DECL (c);
2906 bool is_ref = omp_is_reference (d);
2907 tree t = d, a, dest;
2908 if (is_ref)
2909 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2910 tree type = TREE_TYPE (t);
2911 if (POINTER_TYPE_P (type))
2912 type = sizetype;
2913 dest = unshare_expr (t);
2914 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2915 expand_omp_build_assign (&gsif, v, t);
2916 if (itercnt == NULL_TREE)
2918 itercnt = startvar;
2919 tree n1 = fd->loop.n1;
2920 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2922 itercnt
2923 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2924 itercnt);
2925 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2927 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2928 itercnt, n1);
2929 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2930 itercnt, fd->loop.step);
2931 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2932 NULL_TREE, false,
2933 GSI_CONTINUE_LINKING);
2935 a = fold_build2 (MULT_EXPR, type,
2936 fold_convert (type, itercnt),
2937 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2938 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2939 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2940 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2941 false, GSI_CONTINUE_LINKING);
2942 assign_stmt = gimple_build_assign (dest, t);
2943 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2945 if (fd->collapse > 1)
2946 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2948 if (fd->ordered)
2950 /* Until now, counts array contained number of iterations or
2951 variable containing it for ith loop. From now on, we need
2952 those counts only for collapsed loops, and only for the 2nd
2953 till the last collapsed one. Move those one element earlier,
2954 we'll use counts[fd->collapse - 1] for the first source/sink
2955 iteration counter and so on and counts[fd->ordered]
2956 as the array holding the current counter values for
2957 depend(source). */
2958 if (fd->collapse > 1)
2959 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2960 if (broken_loop)
2962 int i;
2963 for (i = fd->collapse; i < fd->ordered; i++)
2965 tree type = TREE_TYPE (fd->loops[i].v);
2966 tree this_cond
2967 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2968 fold_convert (type, fd->loops[i].n1),
2969 fold_convert (type, fd->loops[i].n2));
2970 if (!integer_onep (this_cond))
2971 break;
2973 if (i < fd->ordered)
2975 cont_bb
2976 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2977 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2978 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2979 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2980 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2981 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2982 make_edge (cont_bb, l1_bb, 0);
2983 l2_bb = create_empty_bb (cont_bb);
2984 broken_loop = false;
2987 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2988 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2989 ordered_lastprivate);
2990 if (counts[fd->collapse - 1])
2992 gcc_assert (fd->collapse == 1);
2993 gsi = gsi_last_bb (l0_bb);
2994 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2995 istart0, true);
2996 gsi = gsi_last_bb (cont_bb);
2997 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2998 build_int_cst (fd->iter_type, 1));
2999 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3000 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3001 size_zero_node, NULL_TREE, NULL_TREE);
3002 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3003 t = counts[fd->collapse - 1];
3005 else if (fd->collapse > 1)
3006 t = fd->loop.v;
3007 else
3009 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3010 fd->loops[0].v, fd->loops[0].n1);
3011 t = fold_convert (fd->iter_type, t);
3013 gsi = gsi_last_bb (l0_bb);
3014 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3015 size_zero_node, NULL_TREE, NULL_TREE);
3016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3017 false, GSI_CONTINUE_LINKING);
3018 expand_omp_build_assign (&gsi, aref, t, true);
3021 if (!broken_loop)
3023 /* Code to control the increment and predicate for the sequential
3024 loop goes in the CONT_BB. */
3025 gsi = gsi_last_bb (cont_bb);
3026 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3027 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3028 vmain = gimple_omp_continue_control_use (cont_stmt);
3029 vback = gimple_omp_continue_control_def (cont_stmt);
3031 if (!gimple_omp_for_combined_p (fd->for_stmt))
3033 if (POINTER_TYPE_P (type))
3034 t = fold_build_pointer_plus (vmain, fd->loop.step);
3035 else
3036 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3037 t = force_gimple_operand_gsi (&gsi, t,
3038 DECL_P (vback)
3039 && TREE_ADDRESSABLE (vback),
3040 NULL_TREE, true, GSI_SAME_STMT);
3041 assign_stmt = gimple_build_assign (vback, t);
3042 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3044 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3046 if (fd->collapse > 1)
3047 t = fd->loop.v;
3048 else
3050 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3051 fd->loops[0].v, fd->loops[0].n1);
3052 t = fold_convert (fd->iter_type, t);
3054 tree aref = build4 (ARRAY_REF, fd->iter_type,
3055 counts[fd->ordered], size_zero_node,
3056 NULL_TREE, NULL_TREE);
3057 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3058 true, GSI_SAME_STMT);
3059 expand_omp_build_assign (&gsi, aref, t);
3062 t = build2 (fd->loop.cond_code, boolean_type_node,
3063 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3064 iend);
3065 gcond *cond_stmt = gimple_build_cond_empty (t);
3066 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3069 /* Remove GIMPLE_OMP_CONTINUE. */
3070 gsi_remove (&gsi, true);
3072 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3073 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3075 /* Emit code to get the next parallel iteration in L2_BB. */
3076 gsi = gsi_start_bb (l2_bb);
3078 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3079 build_fold_addr_expr (istart0),
3080 build_fold_addr_expr (iend0));
3081 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3082 false, GSI_CONTINUE_LINKING);
3083 if (TREE_TYPE (t) != boolean_type_node)
3084 t = fold_build2 (NE_EXPR, boolean_type_node,
3085 t, build_int_cst (TREE_TYPE (t), 0));
3086 gcond *cond_stmt = gimple_build_cond_empty (t);
3087 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3090 /* Add the loop cleanup function. */
3091 gsi = gsi_last_bb (exit_bb);
3092 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3093 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3094 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3095 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3096 else
3097 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3098 gcall *call_stmt = gimple_build_call (t, 0);
3099 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3100 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3101 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3102 if (fd->ordered)
3104 tree arr = counts[fd->ordered];
3105 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3106 TREE_THIS_VOLATILE (clobber) = 1;
3107 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3108 GSI_SAME_STMT);
3110 gsi_remove (&gsi, true);
3112 /* Connect the new blocks. */
3113 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3114 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3116 if (!broken_loop)
3118 gimple_seq phis;
3120 e = find_edge (cont_bb, l3_bb);
3121 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3123 phis = phi_nodes (l3_bb);
3124 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3126 gimple *phi = gsi_stmt (gsi);
3127 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3128 PHI_ARG_DEF_FROM_EDGE (phi, e));
3130 remove_edge (e);
3132 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3133 e = find_edge (cont_bb, l1_bb);
3134 if (e == NULL)
3136 e = BRANCH_EDGE (cont_bb);
3137 gcc_assert (single_succ (e->dest) == l1_bb);
3139 if (gimple_omp_for_combined_p (fd->for_stmt))
3141 remove_edge (e);
3142 e = NULL;
3144 else if (fd->collapse > 1)
3146 remove_edge (e);
3147 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3149 else
3150 e->flags = EDGE_TRUE_VALUE;
3151 if (e)
3153 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3154 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3156 else
3158 e = find_edge (cont_bb, l2_bb);
3159 e->flags = EDGE_FALLTHRU;
3161 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3163 if (gimple_in_ssa_p (cfun))
3165 /* Add phis to the outer loop that connect to the phis in the inner,
3166 original loop, and move the loop entry value of the inner phi to
3167 the loop entry value of the outer phi. */
3168 gphi_iterator psi;
3169 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3171 source_location locus;
3172 gphi *nphi;
3173 gphi *exit_phi = psi.phi ();
3175 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3176 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3178 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3179 edge latch_to_l1 = find_edge (latch, l1_bb);
3180 gphi *inner_phi
3181 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3183 tree t = gimple_phi_result (exit_phi);
3184 tree new_res = copy_ssa_name (t, NULL);
3185 nphi = create_phi_node (new_res, l0_bb);
3187 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3188 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3189 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3190 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3191 add_phi_arg (nphi, t, entry_to_l0, locus);
3193 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3194 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3196 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3200 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3201 recompute_dominator (CDI_DOMINATORS, l2_bb));
3202 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3203 recompute_dominator (CDI_DOMINATORS, l3_bb));
3204 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3205 recompute_dominator (CDI_DOMINATORS, l0_bb));
3206 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3207 recompute_dominator (CDI_DOMINATORS, l1_bb));
3209 /* We enter expand_omp_for_generic with a loop. This original loop may
3210 have its own loop struct, or it may be part of an outer loop struct
3211 (which may be the fake loop). */
3212 struct loop *outer_loop = entry_bb->loop_father;
3213 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3215 add_bb_to_loop (l2_bb, outer_loop);
3217 /* We've added a new loop around the original loop. Allocate the
3218 corresponding loop struct. */
3219 struct loop *new_loop = alloc_loop ();
3220 new_loop->header = l0_bb;
3221 new_loop->latch = l2_bb;
3222 add_loop (new_loop, outer_loop);
3224 /* Allocate a loop structure for the original loop unless we already
3225 had one. */
3226 if (!orig_loop_has_loop_struct
3227 && !gimple_omp_for_combined_p (fd->for_stmt))
3229 struct loop *orig_loop = alloc_loop ();
3230 orig_loop->header = l1_bb;
3231 /* The loop may have multiple latches. */
3232 add_loop (orig_loop, new_loop);
3237 /* A subroutine of expand_omp_for. Generate code for a parallel
3238 loop with static schedule and no specified chunk size. Given
3239 parameters:
3241 for (V = N1; V cond N2; V += STEP) BODY;
3243 where COND is "<" or ">", we generate pseudocode
3245 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3246 if (cond is <)
3247 adj = STEP - 1;
3248 else
3249 adj = STEP + 1;
3250 if ((__typeof (V)) -1 > 0 && cond is >)
3251 n = -(adj + N2 - N1) / -STEP;
3252 else
3253 n = (adj + N2 - N1) / STEP;
3254 q = n / nthreads;
3255 tt = n % nthreads;
3256 if (threadid < tt) goto L3; else goto L4;
3258 tt = 0;
3259 q = q + 1;
3261 s0 = q * threadid + tt;
3262 e0 = s0 + q;
3263 V = s0 * STEP + N1;
3264 if (s0 >= e0) goto L2; else goto L0;
3266 e = e0 * STEP + N1;
3268 BODY;
3269 V += STEP;
3270 if (V cond e) goto L1;
3274 static void
3275 expand_omp_for_static_nochunk (struct omp_region *region,
3276 struct omp_for_data *fd,
3277 gimple *inner_stmt)
3279 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3280 tree type, itype, vmain, vback;
3281 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3282 basic_block body_bb, cont_bb, collapse_bb = NULL;
3283 basic_block fin_bb;
3284 gimple_stmt_iterator gsi;
3285 edge ep;
3286 bool broken_loop = region->cont == NULL;
3287 tree *counts = NULL;
3288 tree n1, n2, step;
3290 itype = type = TREE_TYPE (fd->loop.v);
3291 if (POINTER_TYPE_P (type))
3292 itype = signed_type_for (type);
3294 entry_bb = region->entry;
3295 cont_bb = region->cont;
3296 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3297 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3298 gcc_assert (broken_loop
3299 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3300 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3301 body_bb = single_succ (seq_start_bb);
3302 if (!broken_loop)
3304 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3305 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3306 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3308 exit_bb = region->exit;
3310 /* Iteration space partitioning goes in ENTRY_BB. */
3311 gsi = gsi_last_bb (entry_bb);
3312 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3314 if (fd->collapse > 1)
3316 int first_zero_iter = -1, dummy = -1;
3317 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3319 counts = XALLOCAVEC (tree, fd->collapse);
3320 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3321 fin_bb, first_zero_iter,
3322 dummy_bb, dummy, l2_dom_bb);
3323 t = NULL_TREE;
3325 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3326 t = integer_one_node;
3327 else
3328 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3329 fold_convert (type, fd->loop.n1),
3330 fold_convert (type, fd->loop.n2));
3331 if (fd->collapse == 1
3332 && TYPE_UNSIGNED (type)
3333 && (t == NULL_TREE || !integer_onep (t)))
3335 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3336 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3337 true, GSI_SAME_STMT);
3338 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3339 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3340 true, GSI_SAME_STMT);
3341 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3342 NULL_TREE, NULL_TREE);
3343 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3344 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3345 expand_omp_regimplify_p, NULL, NULL)
3346 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3347 expand_omp_regimplify_p, NULL, NULL))
3349 gsi = gsi_for_stmt (cond_stmt);
3350 gimple_regimplify_operands (cond_stmt, &gsi);
3352 ep = split_block (entry_bb, cond_stmt);
3353 ep->flags = EDGE_TRUE_VALUE;
3354 entry_bb = ep->dest;
3355 ep->probability = profile_probability::very_likely ();
3356 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3357 ep->probability = profile_probability::very_unlikely ();
3358 if (gimple_in_ssa_p (cfun))
3360 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3361 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3362 !gsi_end_p (gpi); gsi_next (&gpi))
3364 gphi *phi = gpi.phi ();
3365 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3366 ep, UNKNOWN_LOCATION);
3369 gsi = gsi_last_bb (entry_bb);
3372 switch (gimple_omp_for_kind (fd->for_stmt))
3374 case GF_OMP_FOR_KIND_FOR:
3375 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3376 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3377 break;
3378 case GF_OMP_FOR_KIND_DISTRIBUTE:
3379 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3380 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3381 break;
3382 default:
3383 gcc_unreachable ();
3385 nthreads = build_call_expr (nthreads, 0);
3386 nthreads = fold_convert (itype, nthreads);
3387 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3388 true, GSI_SAME_STMT);
3389 threadid = build_call_expr (threadid, 0);
3390 threadid = fold_convert (itype, threadid);
3391 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3392 true, GSI_SAME_STMT);
3394 n1 = fd->loop.n1;
3395 n2 = fd->loop.n2;
3396 step = fd->loop.step;
3397 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3399 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3400 OMP_CLAUSE__LOOPTEMP_);
3401 gcc_assert (innerc);
3402 n1 = OMP_CLAUSE_DECL (innerc);
3403 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3404 OMP_CLAUSE__LOOPTEMP_);
3405 gcc_assert (innerc);
3406 n2 = OMP_CLAUSE_DECL (innerc);
3408 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3409 true, NULL_TREE, true, GSI_SAME_STMT);
3410 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3411 true, NULL_TREE, true, GSI_SAME_STMT);
3412 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3413 true, NULL_TREE, true, GSI_SAME_STMT);
3415 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3416 t = fold_build2 (PLUS_EXPR, itype, step, t);
3417 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3418 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3419 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3420 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3421 fold_build1 (NEGATE_EXPR, itype, t),
3422 fold_build1 (NEGATE_EXPR, itype, step));
3423 else
3424 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3425 t = fold_convert (itype, t);
3426 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3428 q = create_tmp_reg (itype, "q");
3429 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3430 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3431 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3433 tt = create_tmp_reg (itype, "tt");
3434 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3435 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3438 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3439 gcond *cond_stmt = gimple_build_cond_empty (t);
3440 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3442 second_bb = split_block (entry_bb, cond_stmt)->dest;
3443 gsi = gsi_last_bb (second_bb);
3444 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3446 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3447 GSI_SAME_STMT);
3448 gassign *assign_stmt
3449 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3450 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3452 third_bb = split_block (second_bb, assign_stmt)->dest;
3453 gsi = gsi_last_bb (third_bb);
3454 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3456 t = build2 (MULT_EXPR, itype, q, threadid);
3457 t = build2 (PLUS_EXPR, itype, t, tt);
3458 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3460 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3461 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3463 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3464 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3466 /* Remove the GIMPLE_OMP_FOR statement. */
3467 gsi_remove (&gsi, true);
3469 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3470 gsi = gsi_start_bb (seq_start_bb);
3472 tree startvar = fd->loop.v;
3473 tree endvar = NULL_TREE;
3475 if (gimple_omp_for_combined_p (fd->for_stmt))
3477 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3478 ? gimple_omp_parallel_clauses (inner_stmt)
3479 : gimple_omp_for_clauses (inner_stmt);
3480 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3481 gcc_assert (innerc);
3482 startvar = OMP_CLAUSE_DECL (innerc);
3483 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3484 OMP_CLAUSE__LOOPTEMP_);
3485 gcc_assert (innerc);
3486 endvar = OMP_CLAUSE_DECL (innerc);
3487 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3488 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3490 int i;
3491 for (i = 1; i < fd->collapse; i++)
3493 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3494 OMP_CLAUSE__LOOPTEMP_);
3495 gcc_assert (innerc);
3497 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3498 OMP_CLAUSE__LOOPTEMP_);
3499 if (innerc)
3501 /* If needed (distribute parallel for with lastprivate),
3502 propagate down the total number of iterations. */
3503 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3504 fd->loop.n2);
3505 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3506 GSI_CONTINUE_LINKING);
3507 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3508 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3512 t = fold_convert (itype, s0);
3513 t = fold_build2 (MULT_EXPR, itype, t, step);
3514 if (POINTER_TYPE_P (type))
3515 t = fold_build_pointer_plus (n1, t);
3516 else
3517 t = fold_build2 (PLUS_EXPR, type, t, n1);
3518 t = fold_convert (TREE_TYPE (startvar), t);
3519 t = force_gimple_operand_gsi (&gsi, t,
3520 DECL_P (startvar)
3521 && TREE_ADDRESSABLE (startvar),
3522 NULL_TREE, false, GSI_CONTINUE_LINKING);
3523 assign_stmt = gimple_build_assign (startvar, t);
3524 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3526 t = fold_convert (itype, e0);
3527 t = fold_build2 (MULT_EXPR, itype, t, step);
3528 if (POINTER_TYPE_P (type))
3529 t = fold_build_pointer_plus (n1, t);
3530 else
3531 t = fold_build2 (PLUS_EXPR, type, t, n1);
3532 t = fold_convert (TREE_TYPE (startvar), t);
3533 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3534 false, GSI_CONTINUE_LINKING);
3535 if (endvar)
3537 assign_stmt = gimple_build_assign (endvar, e);
3538 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3539 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3540 assign_stmt = gimple_build_assign (fd->loop.v, e);
3541 else
3542 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3543 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3545 /* Handle linear clause adjustments. */
3546 tree itercnt = NULL_TREE;
3547 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3548 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3549 c; c = OMP_CLAUSE_CHAIN (c))
3550 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3551 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3553 tree d = OMP_CLAUSE_DECL (c);
3554 bool is_ref = omp_is_reference (d);
3555 tree t = d, a, dest;
3556 if (is_ref)
3557 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3558 if (itercnt == NULL_TREE)
3560 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3562 itercnt = fold_build2 (MINUS_EXPR, itype,
3563 fold_convert (itype, n1),
3564 fold_convert (itype, fd->loop.n1));
3565 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3566 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3567 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3568 NULL_TREE, false,
3569 GSI_CONTINUE_LINKING);
3571 else
3572 itercnt = s0;
3574 tree type = TREE_TYPE (t);
3575 if (POINTER_TYPE_P (type))
3576 type = sizetype;
3577 a = fold_build2 (MULT_EXPR, type,
3578 fold_convert (type, itercnt),
3579 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3580 dest = unshare_expr (t);
3581 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3582 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3583 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3584 false, GSI_CONTINUE_LINKING);
3585 assign_stmt = gimple_build_assign (dest, t);
3586 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3588 if (fd->collapse > 1)
3589 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3591 if (!broken_loop)
3593 /* The code controlling the sequential loop replaces the
3594 GIMPLE_OMP_CONTINUE. */
3595 gsi = gsi_last_bb (cont_bb);
3596 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3597 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3598 vmain = gimple_omp_continue_control_use (cont_stmt);
3599 vback = gimple_omp_continue_control_def (cont_stmt);
3601 if (!gimple_omp_for_combined_p (fd->for_stmt))
3603 if (POINTER_TYPE_P (type))
3604 t = fold_build_pointer_plus (vmain, step);
3605 else
3606 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3607 t = force_gimple_operand_gsi (&gsi, t,
3608 DECL_P (vback)
3609 && TREE_ADDRESSABLE (vback),
3610 NULL_TREE, true, GSI_SAME_STMT);
3611 assign_stmt = gimple_build_assign (vback, t);
3612 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3614 t = build2 (fd->loop.cond_code, boolean_type_node,
3615 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3616 ? t : vback, e);
3617 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3620 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3621 gsi_remove (&gsi, true);
3623 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3624 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3627 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3628 gsi = gsi_last_bb (exit_bb);
3629 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3631 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3632 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3634 gsi_remove (&gsi, true);
3636 /* Connect all the blocks. */
3637 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3638 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3639 ep = find_edge (entry_bb, second_bb);
3640 ep->flags = EDGE_TRUE_VALUE;
3641 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3642 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3643 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3645 if (!broken_loop)
3647 ep = find_edge (cont_bb, body_bb);
3648 if (ep == NULL)
3650 ep = BRANCH_EDGE (cont_bb);
3651 gcc_assert (single_succ (ep->dest) == body_bb);
3653 if (gimple_omp_for_combined_p (fd->for_stmt))
3655 remove_edge (ep);
3656 ep = NULL;
3658 else if (fd->collapse > 1)
3660 remove_edge (ep);
3661 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3663 else
3664 ep->flags = EDGE_TRUE_VALUE;
3665 find_edge (cont_bb, fin_bb)->flags
3666 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3669 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3670 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3671 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3673 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3674 recompute_dominator (CDI_DOMINATORS, body_bb));
3675 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3676 recompute_dominator (CDI_DOMINATORS, fin_bb));
3678 struct loop *loop = body_bb->loop_father;
3679 if (loop != entry_bb->loop_father)
3681 gcc_assert (broken_loop || loop->header == body_bb);
3682 gcc_assert (broken_loop
3683 || loop->latch == region->cont
3684 || single_pred (loop->latch) == region->cont);
3685 return;
3688 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3690 loop = alloc_loop ();
3691 loop->header = body_bb;
3692 if (collapse_bb == NULL)
3693 loop->latch = cont_bb;
3694 add_loop (loop, body_bb->loop_father);
3698 /* Return phi in E->DEST with ARG on edge E. */
3700 static gphi *
3701 find_phi_with_arg_on_edge (tree arg, edge e)
3703 basic_block bb = e->dest;
3705 for (gphi_iterator gpi = gsi_start_phis (bb);
3706 !gsi_end_p (gpi);
3707 gsi_next (&gpi))
3709 gphi *phi = gpi.phi ();
3710 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3711 return phi;
3714 return NULL;
3717 /* A subroutine of expand_omp_for. Generate code for a parallel
3718 loop with static schedule and a specified chunk size. Given
3719 parameters:
3721 for (V = N1; V cond N2; V += STEP) BODY;
3723 where COND is "<" or ">", we generate pseudocode
3725 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3726 if (cond is <)
3727 adj = STEP - 1;
3728 else
3729 adj = STEP + 1;
3730 if ((__typeof (V)) -1 > 0 && cond is >)
3731 n = -(adj + N2 - N1) / -STEP;
3732 else
3733 n = (adj + N2 - N1) / STEP;
3734 trip = 0;
3735 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3736 here so that V is defined
3737 if the loop is not entered
3739 s0 = (trip * nthreads + threadid) * CHUNK;
3740 e0 = min (s0 + CHUNK, n);
3741 if (s0 < n) goto L1; else goto L4;
3743 V = s0 * STEP + N1;
3744 e = e0 * STEP + N1;
3746 BODY;
3747 V += STEP;
3748 if (V cond e) goto L2; else goto L3;
3750 trip += 1;
3751 goto L0;
3755 static void
3756 expand_omp_for_static_chunk (struct omp_region *region,
3757 struct omp_for_data *fd, gimple *inner_stmt)
3759 tree n, s0, e0, e, t;
3760 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3761 tree type, itype, vmain, vback, vextra;
3762 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3763 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3764 gimple_stmt_iterator gsi;
3765 edge se;
3766 bool broken_loop = region->cont == NULL;
3767 tree *counts = NULL;
3768 tree n1, n2, step;
3770 itype = type = TREE_TYPE (fd->loop.v);
3771 if (POINTER_TYPE_P (type))
3772 itype = signed_type_for (type);
3774 entry_bb = region->entry;
3775 se = split_block (entry_bb, last_stmt (entry_bb));
3776 entry_bb = se->src;
3777 iter_part_bb = se->dest;
3778 cont_bb = region->cont;
3779 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3780 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3781 gcc_assert (broken_loop
3782 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3783 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3784 body_bb = single_succ (seq_start_bb);
3785 if (!broken_loop)
3787 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3788 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3789 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3790 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3792 exit_bb = region->exit;
3794 /* Trip and adjustment setup goes in ENTRY_BB. */
3795 gsi = gsi_last_bb (entry_bb);
3796 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3798 if (fd->collapse > 1)
3800 int first_zero_iter = -1, dummy = -1;
3801 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3803 counts = XALLOCAVEC (tree, fd->collapse);
3804 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3805 fin_bb, first_zero_iter,
3806 dummy_bb, dummy, l2_dom_bb);
3807 t = NULL_TREE;
3809 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3810 t = integer_one_node;
3811 else
3812 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3813 fold_convert (type, fd->loop.n1),
3814 fold_convert (type, fd->loop.n2));
3815 if (fd->collapse == 1
3816 && TYPE_UNSIGNED (type)
3817 && (t == NULL_TREE || !integer_onep (t)))
3819 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3820 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3821 true, GSI_SAME_STMT);
3822 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3823 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3824 true, GSI_SAME_STMT);
3825 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3826 NULL_TREE, NULL_TREE);
3827 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3828 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3829 expand_omp_regimplify_p, NULL, NULL)
3830 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3831 expand_omp_regimplify_p, NULL, NULL))
3833 gsi = gsi_for_stmt (cond_stmt);
3834 gimple_regimplify_operands (cond_stmt, &gsi);
3836 se = split_block (entry_bb, cond_stmt);
3837 se->flags = EDGE_TRUE_VALUE;
3838 entry_bb = se->dest;
3839 se->probability = profile_probability::very_likely ();
3840 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3841 se->probability = profile_probability::very_unlikely ();
3842 if (gimple_in_ssa_p (cfun))
3844 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3845 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3846 !gsi_end_p (gpi); gsi_next (&gpi))
3848 gphi *phi = gpi.phi ();
3849 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3850 se, UNKNOWN_LOCATION);
3853 gsi = gsi_last_bb (entry_bb);
3856 switch (gimple_omp_for_kind (fd->for_stmt))
3858 case GF_OMP_FOR_KIND_FOR:
3859 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3860 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3861 break;
3862 case GF_OMP_FOR_KIND_DISTRIBUTE:
3863 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3864 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3865 break;
3866 default:
3867 gcc_unreachable ();
3869 nthreads = build_call_expr (nthreads, 0);
3870 nthreads = fold_convert (itype, nthreads);
3871 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3872 true, GSI_SAME_STMT);
3873 threadid = build_call_expr (threadid, 0);
3874 threadid = fold_convert (itype, threadid);
3875 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3876 true, GSI_SAME_STMT);
3878 n1 = fd->loop.n1;
3879 n2 = fd->loop.n2;
3880 step = fd->loop.step;
3881 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3883 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3884 OMP_CLAUSE__LOOPTEMP_);
3885 gcc_assert (innerc);
3886 n1 = OMP_CLAUSE_DECL (innerc);
3887 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3888 OMP_CLAUSE__LOOPTEMP_);
3889 gcc_assert (innerc);
3890 n2 = OMP_CLAUSE_DECL (innerc);
3892 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3893 true, NULL_TREE, true, GSI_SAME_STMT);
3894 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3895 true, NULL_TREE, true, GSI_SAME_STMT);
3896 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3897 true, NULL_TREE, true, GSI_SAME_STMT);
3898 tree chunk_size = fold_convert (itype, fd->chunk_size);
3899 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3900 chunk_size
3901 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3902 GSI_SAME_STMT);
3904 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3905 t = fold_build2 (PLUS_EXPR, itype, step, t);
3906 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3907 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3908 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3909 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3910 fold_build1 (NEGATE_EXPR, itype, t),
3911 fold_build1 (NEGATE_EXPR, itype, step));
3912 else
3913 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3914 t = fold_convert (itype, t);
3915 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3916 true, GSI_SAME_STMT);
3918 trip_var = create_tmp_reg (itype, ".trip");
3919 if (gimple_in_ssa_p (cfun))
3921 trip_init = make_ssa_name (trip_var);
3922 trip_main = make_ssa_name (trip_var);
3923 trip_back = make_ssa_name (trip_var);
3925 else
3927 trip_init = trip_var;
3928 trip_main = trip_var;
3929 trip_back = trip_var;
3932 gassign *assign_stmt
3933 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3934 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3936 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3937 t = fold_build2 (MULT_EXPR, itype, t, step);
3938 if (POINTER_TYPE_P (type))
3939 t = fold_build_pointer_plus (n1, t);
3940 else
3941 t = fold_build2 (PLUS_EXPR, type, t, n1);
3942 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3943 true, GSI_SAME_STMT);
3945 /* Remove the GIMPLE_OMP_FOR. */
3946 gsi_remove (&gsi, true);
3948 gimple_stmt_iterator gsif = gsi;
3950 /* Iteration space partitioning goes in ITER_PART_BB. */
3951 gsi = gsi_last_bb (iter_part_bb);
3953 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3954 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3955 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3956 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3957 false, GSI_CONTINUE_LINKING);
3959 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3960 t = fold_build2 (MIN_EXPR, itype, t, n);
3961 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962 false, GSI_CONTINUE_LINKING);
3964 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3965 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3967 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3968 gsi = gsi_start_bb (seq_start_bb);
3970 tree startvar = fd->loop.v;
3971 tree endvar = NULL_TREE;
3973 if (gimple_omp_for_combined_p (fd->for_stmt))
3975 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3976 ? gimple_omp_parallel_clauses (inner_stmt)
3977 : gimple_omp_for_clauses (inner_stmt);
3978 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3979 gcc_assert (innerc);
3980 startvar = OMP_CLAUSE_DECL (innerc);
3981 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3982 OMP_CLAUSE__LOOPTEMP_);
3983 gcc_assert (innerc);
3984 endvar = OMP_CLAUSE_DECL (innerc);
3985 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3986 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3988 int i;
3989 for (i = 1; i < fd->collapse; i++)
3991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3992 OMP_CLAUSE__LOOPTEMP_);
3993 gcc_assert (innerc);
3995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3996 OMP_CLAUSE__LOOPTEMP_);
3997 if (innerc)
3999 /* If needed (distribute parallel for with lastprivate),
4000 propagate down the total number of iterations. */
4001 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4002 fd->loop.n2);
4003 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4004 GSI_CONTINUE_LINKING);
4005 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4006 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4011 t = fold_convert (itype, s0);
4012 t = fold_build2 (MULT_EXPR, itype, t, step);
4013 if (POINTER_TYPE_P (type))
4014 t = fold_build_pointer_plus (n1, t);
4015 else
4016 t = fold_build2 (PLUS_EXPR, type, t, n1);
4017 t = fold_convert (TREE_TYPE (startvar), t);
4018 t = force_gimple_operand_gsi (&gsi, t,
4019 DECL_P (startvar)
4020 && TREE_ADDRESSABLE (startvar),
4021 NULL_TREE, false, GSI_CONTINUE_LINKING);
4022 assign_stmt = gimple_build_assign (startvar, t);
4023 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4025 t = fold_convert (itype, e0);
4026 t = fold_build2 (MULT_EXPR, itype, t, step);
4027 if (POINTER_TYPE_P (type))
4028 t = fold_build_pointer_plus (n1, t);
4029 else
4030 t = fold_build2 (PLUS_EXPR, type, t, n1);
4031 t = fold_convert (TREE_TYPE (startvar), t);
4032 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4033 false, GSI_CONTINUE_LINKING);
4034 if (endvar)
4036 assign_stmt = gimple_build_assign (endvar, e);
4037 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4038 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4039 assign_stmt = gimple_build_assign (fd->loop.v, e);
4040 else
4041 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4042 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4044 /* Handle linear clause adjustments. */
4045 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4046 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4047 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4048 c; c = OMP_CLAUSE_CHAIN (c))
4049 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4050 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4052 tree d = OMP_CLAUSE_DECL (c);
4053 bool is_ref = omp_is_reference (d);
4054 tree t = d, a, dest;
4055 if (is_ref)
4056 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4057 tree type = TREE_TYPE (t);
4058 if (POINTER_TYPE_P (type))
4059 type = sizetype;
4060 dest = unshare_expr (t);
4061 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4062 expand_omp_build_assign (&gsif, v, t);
4063 if (itercnt == NULL_TREE)
4065 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4067 itercntbias
4068 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4069 fold_convert (itype, fd->loop.n1));
4070 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4071 itercntbias, step);
4072 itercntbias
4073 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4074 NULL_TREE, true,
4075 GSI_SAME_STMT);
4076 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4077 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4078 NULL_TREE, false,
4079 GSI_CONTINUE_LINKING);
4081 else
4082 itercnt = s0;
4084 a = fold_build2 (MULT_EXPR, type,
4085 fold_convert (type, itercnt),
4086 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4087 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4088 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4089 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4090 false, GSI_CONTINUE_LINKING);
4091 assign_stmt = gimple_build_assign (dest, t);
4092 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4094 if (fd->collapse > 1)
4095 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4097 if (!broken_loop)
4099 /* The code controlling the sequential loop goes in CONT_BB,
4100 replacing the GIMPLE_OMP_CONTINUE. */
4101 gsi = gsi_last_bb (cont_bb);
4102 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4103 vmain = gimple_omp_continue_control_use (cont_stmt);
4104 vback = gimple_omp_continue_control_def (cont_stmt);
4106 if (!gimple_omp_for_combined_p (fd->for_stmt))
4108 if (POINTER_TYPE_P (type))
4109 t = fold_build_pointer_plus (vmain, step);
4110 else
4111 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4112 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4113 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4114 true, GSI_SAME_STMT);
4115 assign_stmt = gimple_build_assign (vback, t);
4116 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4118 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4119 t = build2 (EQ_EXPR, boolean_type_node,
4120 build_int_cst (itype, 0),
4121 build_int_cst (itype, 1));
4122 else
4123 t = build2 (fd->loop.cond_code, boolean_type_node,
4124 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4125 ? t : vback, e);
4126 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4129 /* Remove GIMPLE_OMP_CONTINUE. */
4130 gsi_remove (&gsi, true);
4132 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4133 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4135 /* Trip update code goes into TRIP_UPDATE_BB. */
4136 gsi = gsi_start_bb (trip_update_bb);
4138 t = build_int_cst (itype, 1);
4139 t = build2 (PLUS_EXPR, itype, trip_main, t);
4140 assign_stmt = gimple_build_assign (trip_back, t);
4141 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4144 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4145 gsi = gsi_last_bb (exit_bb);
4146 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4148 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4149 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4151 gsi_remove (&gsi, true);
4153 /* Connect the new blocks. */
4154 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4155 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4157 if (!broken_loop)
4159 se = find_edge (cont_bb, body_bb);
4160 if (se == NULL)
4162 se = BRANCH_EDGE (cont_bb);
4163 gcc_assert (single_succ (se->dest) == body_bb);
4165 if (gimple_omp_for_combined_p (fd->for_stmt))
4167 remove_edge (se);
4168 se = NULL;
4170 else if (fd->collapse > 1)
4172 remove_edge (se);
4173 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4175 else
4176 se->flags = EDGE_TRUE_VALUE;
4177 find_edge (cont_bb, trip_update_bb)->flags
4178 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4180 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4181 iter_part_bb);
4184 if (gimple_in_ssa_p (cfun))
4186 gphi_iterator psi;
4187 gphi *phi;
4188 edge re, ene;
4189 edge_var_map *vm;
4190 size_t i;
4192 gcc_assert (fd->collapse == 1 && !broken_loop);
4194 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4195 remove arguments of the phi nodes in fin_bb. We need to create
4196 appropriate phi nodes in iter_part_bb instead. */
4197 se = find_edge (iter_part_bb, fin_bb);
4198 re = single_succ_edge (trip_update_bb);
4199 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4200 ene = single_succ_edge (entry_bb);
4202 psi = gsi_start_phis (fin_bb);
4203 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4204 gsi_next (&psi), ++i)
4206 gphi *nphi;
4207 source_location locus;
4209 phi = psi.phi ();
4210 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4211 redirect_edge_var_map_def (vm), 0))
4212 continue;
4214 t = gimple_phi_result (phi);
4215 gcc_assert (t == redirect_edge_var_map_result (vm));
4217 if (!single_pred_p (fin_bb))
4218 t = copy_ssa_name (t, phi);
4220 nphi = create_phi_node (t, iter_part_bb);
4222 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4223 locus = gimple_phi_arg_location_from_edge (phi, se);
4225 /* A special case -- fd->loop.v is not yet computed in
4226 iter_part_bb, we need to use vextra instead. */
4227 if (t == fd->loop.v)
4228 t = vextra;
4229 add_phi_arg (nphi, t, ene, locus);
4230 locus = redirect_edge_var_map_location (vm);
4231 tree back_arg = redirect_edge_var_map_def (vm);
4232 add_phi_arg (nphi, back_arg, re, locus);
4233 edge ce = find_edge (cont_bb, body_bb);
4234 if (ce == NULL)
4236 ce = BRANCH_EDGE (cont_bb);
4237 gcc_assert (single_succ (ce->dest) == body_bb);
4238 ce = single_succ_edge (ce->dest);
4240 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4241 gcc_assert (inner_loop_phi != NULL);
4242 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4243 find_edge (seq_start_bb, body_bb), locus);
4245 if (!single_pred_p (fin_bb))
4246 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4248 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4249 redirect_edge_var_map_clear (re);
4250 if (single_pred_p (fin_bb))
4251 while (1)
4253 psi = gsi_start_phis (fin_bb);
4254 if (gsi_end_p (psi))
4255 break;
4256 remove_phi_node (&psi, false);
4259 /* Make phi node for trip. */
4260 phi = create_phi_node (trip_main, iter_part_bb);
4261 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4262 UNKNOWN_LOCATION);
4263 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4264 UNKNOWN_LOCATION);
4267 if (!broken_loop)
4268 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4269 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4270 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4271 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4272 recompute_dominator (CDI_DOMINATORS, fin_bb));
4273 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4274 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4275 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4276 recompute_dominator (CDI_DOMINATORS, body_bb));
4278 if (!broken_loop)
4280 struct loop *loop = body_bb->loop_father;
4281 struct loop *trip_loop = alloc_loop ();
4282 trip_loop->header = iter_part_bb;
4283 trip_loop->latch = trip_update_bb;
4284 add_loop (trip_loop, iter_part_bb->loop_father);
4286 if (loop != entry_bb->loop_father)
4288 gcc_assert (loop->header == body_bb);
4289 gcc_assert (loop->latch == region->cont
4290 || single_pred (loop->latch) == region->cont);
4291 trip_loop->inner = loop;
4292 return;
4295 if (!gimple_omp_for_combined_p (fd->for_stmt))
4297 loop = alloc_loop ();
4298 loop->header = body_bb;
4299 if (collapse_bb == NULL)
4300 loop->latch = cont_bb;
4301 add_loop (loop, trip_loop);
4306 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4307 Given parameters:
4308 for (V = N1; V cond N2; V += STEP) BODY;
4310 where COND is "<" or ">" or "!=", we generate pseudocode
4312 for (ind_var = low; ind_var < high; ind_var++)
4314 V = n1 + (ind_var * STEP)
4316 <BODY>
4319 In the above pseudocode, low and high are function parameters of the
4320 child function. In the function below, we are inserting a temp.
4321 variable that will be making a call to two OMP functions that will not be
4322 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4323 with _Cilk_for). These functions are replaced with low and high
4324 by the function that handles taskreg. */
4327 static void
4328 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4330 bool broken_loop = region->cont == NULL;
4331 basic_block entry_bb = region->entry;
4332 basic_block cont_bb = region->cont;
4334 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4335 gcc_assert (broken_loop
4336 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4337 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4338 basic_block l1_bb, l2_bb;
4340 if (!broken_loop)
4342 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4343 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4344 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4345 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4347 else
4349 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4350 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4351 l2_bb = single_succ (l1_bb);
4353 basic_block exit_bb = region->exit;
4354 basic_block l2_dom_bb = NULL;
4356 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4358 /* Below statements until the "tree high_val = ..." are pseudo statements
4359 used to pass information to be used by expand_omp_taskreg.
4360 low_val and high_val will be replaced by the __low and __high
4361 parameter from the child function.
4363 The call_exprs part is a place-holder, it is mainly used
4364 to distinctly identify to the top-level part that this is
4365 where we should put low and high (reasoning given in header
4366 comment). */
4368 gomp_parallel *par_stmt
4369 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4370 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4371 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4372 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4374 if (id_equal (DECL_NAME (t), "__high"))
4375 high_val = t;
4376 else if (id_equal (DECL_NAME (t), "__low"))
4377 low_val = t;
4379 gcc_assert (low_val && high_val);
4381 tree type = TREE_TYPE (low_val);
4382 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4383 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4385 /* Not needed in SSA form right now. */
4386 gcc_assert (!gimple_in_ssa_p (cfun));
4387 if (l2_dom_bb == NULL)
4388 l2_dom_bb = l1_bb;
4390 tree n1 = low_val;
4391 tree n2 = high_val;
4393 gimple *stmt = gimple_build_assign (ind_var, n1);
4395 /* Replace the GIMPLE_OMP_FOR statement. */
4396 gsi_replace (&gsi, stmt, true);
4398 if (!broken_loop)
4400 /* Code to control the increment goes in the CONT_BB. */
4401 gsi = gsi_last_bb (cont_bb);
4402 stmt = gsi_stmt (gsi);
4403 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4404 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4405 build_one_cst (type));
4407 /* Replace GIMPLE_OMP_CONTINUE. */
4408 gsi_replace (&gsi, stmt, true);
4411 /* Emit the condition in L1_BB. */
4412 gsi = gsi_after_labels (l1_bb);
4413 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4414 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4415 fd->loop.step);
4416 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4417 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4418 fd->loop.n1, fold_convert (sizetype, t));
4419 else
4420 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4421 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4422 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4423 expand_omp_build_assign (&gsi, fd->loop.v, t);
4425 /* The condition is always '<' since the runtime will fill in the low
4426 and high values. */
4427 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4428 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4430 /* Remove GIMPLE_OMP_RETURN. */
4431 gsi = gsi_last_bb (exit_bb);
4432 gsi_remove (&gsi, true);
4434 /* Connect the new blocks. */
4435 remove_edge (FALLTHRU_EDGE (entry_bb));
4437 edge e, ne;
4438 if (!broken_loop)
4440 remove_edge (BRANCH_EDGE (entry_bb));
4441 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4443 e = BRANCH_EDGE (l1_bb);
4444 ne = FALLTHRU_EDGE (l1_bb);
4445 e->flags = EDGE_TRUE_VALUE;
4447 else
4449 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4451 ne = single_succ_edge (l1_bb);
4452 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4455 ne->flags = EDGE_FALSE_VALUE;
4456 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4457 ne->probability = e->probability.invert ();
4459 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4460 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4461 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4463 if (!broken_loop)
4465 struct loop *loop = alloc_loop ();
4466 loop->header = l1_bb;
4467 loop->latch = cont_bb;
4468 add_loop (loop, l1_bb->loop_father);
4469 loop->safelen = INT_MAX;
4472 /* Pick the correct library function based on the precision of the
4473 induction variable type. */
4474 tree lib_fun = NULL_TREE;
4475 if (TYPE_PRECISION (type) == 32)
4476 lib_fun = cilk_for_32_fndecl;
4477 else if (TYPE_PRECISION (type) == 64)
4478 lib_fun = cilk_for_64_fndecl;
4479 else
4480 gcc_unreachable ();
4482 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4484 /* WS_ARGS contains the library function flavor to call:
4485 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4486 user-defined grain value. If the user does not define one, then zero
4487 is passed in by the parser. */
4488 vec_alloc (region->ws_args, 2);
4489 region->ws_args->quick_push (lib_fun);
4490 region->ws_args->quick_push (fd->chunk_size);
4493 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4494 loop. Given parameters:
4496 for (V = N1; V cond N2; V += STEP) BODY;
4498 where COND is "<" or ">", we generate pseudocode
4500 V = N1;
4501 goto L1;
4503 BODY;
4504 V += STEP;
4506 if (V cond N2) goto L0; else goto L2;
4509 For collapsed loops, given parameters:
4510 collapse(3)
4511 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4512 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4513 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4514 BODY;
4516 we generate pseudocode
4518 if (cond3 is <)
4519 adj = STEP3 - 1;
4520 else
4521 adj = STEP3 + 1;
4522 count3 = (adj + N32 - N31) / STEP3;
4523 if (cond2 is <)
4524 adj = STEP2 - 1;
4525 else
4526 adj = STEP2 + 1;
4527 count2 = (adj + N22 - N21) / STEP2;
4528 if (cond1 is <)
4529 adj = STEP1 - 1;
4530 else
4531 adj = STEP1 + 1;
4532 count1 = (adj + N12 - N11) / STEP1;
4533 count = count1 * count2 * count3;
4534 V = 0;
4535 V1 = N11;
4536 V2 = N21;
4537 V3 = N31;
4538 goto L1;
4540 BODY;
4541 V += 1;
4542 V3 += STEP3;
4543 V2 += (V3 cond3 N32) ? 0 : STEP2;
4544 V3 = (V3 cond3 N32) ? V3 : N31;
4545 V1 += (V2 cond2 N22) ? 0 : STEP1;
4546 V2 = (V2 cond2 N22) ? V2 : N21;
4548 if (V < count) goto L0; else goto L2;
4553 static void
4554 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4556 tree type, t;
4557 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4558 gimple_stmt_iterator gsi;
4559 gimple *stmt;
4560 gcond *cond_stmt;
4561 bool broken_loop = region->cont == NULL;
4562 edge e, ne;
4563 tree *counts = NULL;
4564 int i;
4565 int safelen_int = INT_MAX;
4566 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4567 OMP_CLAUSE_SAFELEN);
4568 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4569 OMP_CLAUSE__SIMDUID_);
4570 tree n1, n2;
4572 if (safelen)
4574 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4575 if (TREE_CODE (safelen) != INTEGER_CST)
4576 safelen_int = 0;
4577 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4578 safelen_int = tree_to_uhwi (safelen);
4579 if (safelen_int == 1)
4580 safelen_int = 0;
4582 type = TREE_TYPE (fd->loop.v);
4583 entry_bb = region->entry;
4584 cont_bb = region->cont;
4585 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4586 gcc_assert (broken_loop
4587 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4588 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4589 if (!broken_loop)
4591 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4592 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4593 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4594 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4596 else
4598 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4599 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4600 l2_bb = single_succ (l1_bb);
4602 exit_bb = region->exit;
4603 l2_dom_bb = NULL;
4605 gsi = gsi_last_bb (entry_bb);
4607 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4608 /* Not needed in SSA form right now. */
4609 gcc_assert (!gimple_in_ssa_p (cfun));
4610 if (fd->collapse > 1)
4612 int first_zero_iter = -1, dummy = -1;
4613 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4615 counts = XALLOCAVEC (tree, fd->collapse);
4616 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4617 zero_iter_bb, first_zero_iter,
4618 dummy_bb, dummy, l2_dom_bb);
4620 if (l2_dom_bb == NULL)
4621 l2_dom_bb = l1_bb;
4623 n1 = fd->loop.n1;
4624 n2 = fd->loop.n2;
4625 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4627 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4628 OMP_CLAUSE__LOOPTEMP_);
4629 gcc_assert (innerc);
4630 n1 = OMP_CLAUSE_DECL (innerc);
4631 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4632 OMP_CLAUSE__LOOPTEMP_);
4633 gcc_assert (innerc);
4634 n2 = OMP_CLAUSE_DECL (innerc);
4636 tree step = fd->loop.step;
4638 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4639 OMP_CLAUSE__SIMT_);
4640 if (is_simt)
4642 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4643 is_simt = safelen_int > 1;
4645 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4646 if (is_simt)
4648 simt_lane = create_tmp_var (unsigned_type_node);
4649 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4650 gimple_call_set_lhs (g, simt_lane);
4651 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4652 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4653 fold_convert (TREE_TYPE (step), simt_lane));
4654 n1 = fold_convert (type, n1);
4655 if (POINTER_TYPE_P (type))
4656 n1 = fold_build_pointer_plus (n1, offset);
4657 else
4658 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4660 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4661 if (fd->collapse > 1)
4662 simt_maxlane = build_one_cst (unsigned_type_node);
4663 else if (safelen_int < omp_max_simt_vf ())
4664 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4665 tree vf
4666 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4667 unsigned_type_node, 0);
4668 if (simt_maxlane)
4669 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4670 vf = fold_convert (TREE_TYPE (step), vf);
4671 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4674 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4675 if (fd->collapse > 1)
4677 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4679 gsi_prev (&gsi);
4680 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4681 gsi_next (&gsi);
4683 else
4684 for (i = 0; i < fd->collapse; i++)
4686 tree itype = TREE_TYPE (fd->loops[i].v);
4687 if (POINTER_TYPE_P (itype))
4688 itype = signed_type_for (itype);
4689 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4690 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4694 /* Remove the GIMPLE_OMP_FOR statement. */
4695 gsi_remove (&gsi, true);
4697 if (!broken_loop)
4699 /* Code to control the increment goes in the CONT_BB. */
4700 gsi = gsi_last_bb (cont_bb);
4701 stmt = gsi_stmt (gsi);
4702 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4704 if (POINTER_TYPE_P (type))
4705 t = fold_build_pointer_plus (fd->loop.v, step);
4706 else
4707 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4708 expand_omp_build_assign (&gsi, fd->loop.v, t);
4710 if (fd->collapse > 1)
4712 i = fd->collapse - 1;
4713 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4715 t = fold_convert (sizetype, fd->loops[i].step);
4716 t = fold_build_pointer_plus (fd->loops[i].v, t);
4718 else
4720 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4721 fd->loops[i].step);
4722 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4723 fd->loops[i].v, t);
4725 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4727 for (i = fd->collapse - 1; i > 0; i--)
4729 tree itype = TREE_TYPE (fd->loops[i].v);
4730 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4731 if (POINTER_TYPE_P (itype2))
4732 itype2 = signed_type_for (itype2);
4733 t = fold_convert (itype2, fd->loops[i - 1].step);
4734 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4735 GSI_SAME_STMT);
4736 t = build3 (COND_EXPR, itype2,
4737 build2 (fd->loops[i].cond_code, boolean_type_node,
4738 fd->loops[i].v,
4739 fold_convert (itype, fd->loops[i].n2)),
4740 build_int_cst (itype2, 0), t);
4741 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4742 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4743 else
4744 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4745 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4747 t = fold_convert (itype, fd->loops[i].n1);
4748 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4749 GSI_SAME_STMT);
4750 t = build3 (COND_EXPR, itype,
4751 build2 (fd->loops[i].cond_code, boolean_type_node,
4752 fd->loops[i].v,
4753 fold_convert (itype, fd->loops[i].n2)),
4754 fd->loops[i].v, t);
4755 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4759 /* Remove GIMPLE_OMP_CONTINUE. */
4760 gsi_remove (&gsi, true);
4763 /* Emit the condition in L1_BB. */
4764 gsi = gsi_start_bb (l1_bb);
4766 t = fold_convert (type, n2);
4767 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4768 false, GSI_CONTINUE_LINKING);
4769 tree v = fd->loop.v;
4770 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4771 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4772 false, GSI_CONTINUE_LINKING);
4773 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4774 cond_stmt = gimple_build_cond_empty (t);
4775 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4776 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4777 NULL, NULL)
4778 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4779 NULL, NULL))
4781 gsi = gsi_for_stmt (cond_stmt);
4782 gimple_regimplify_operands (cond_stmt, &gsi);
4785 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4786 if (is_simt)
4788 gsi = gsi_start_bb (l2_bb);
4789 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4790 if (POINTER_TYPE_P (type))
4791 t = fold_build_pointer_plus (fd->loop.v, step);
4792 else
4793 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4794 expand_omp_build_assign (&gsi, fd->loop.v, t);
4797 /* Remove GIMPLE_OMP_RETURN. */
4798 gsi = gsi_last_bb (exit_bb);
4799 gsi_remove (&gsi, true);
4801 /* Connect the new blocks. */
4802 remove_edge (FALLTHRU_EDGE (entry_bb));
4804 if (!broken_loop)
4806 remove_edge (BRANCH_EDGE (entry_bb));
4807 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4809 e = BRANCH_EDGE (l1_bb);
4810 ne = FALLTHRU_EDGE (l1_bb);
4811 e->flags = EDGE_TRUE_VALUE;
4813 else
4815 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4817 ne = single_succ_edge (l1_bb);
4818 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4821 ne->flags = EDGE_FALSE_VALUE;
4822 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4823 ne->probability = e->probability.invert ();
4825 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4826 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4828 if (simt_maxlane)
4830 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4831 NULL_TREE, NULL_TREE);
4832 gsi = gsi_last_bb (entry_bb);
4833 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4834 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4835 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4836 FALLTHRU_EDGE (entry_bb)->probability
4837 = profile_probability::guessed_always ().apply_scale (7, 8);
4838 BRANCH_EDGE (entry_bb)->probability
4839 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4840 l2_dom_bb = entry_bb;
4842 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4844 if (!broken_loop)
4846 struct loop *loop = alloc_loop ();
4847 loop->header = l1_bb;
4848 loop->latch = cont_bb;
4849 add_loop (loop, l1_bb->loop_father);
4850 loop->safelen = safelen_int;
4851 if (simduid)
4853 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4854 cfun->has_simduid_loops = true;
4856 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4857 the loop. */
4858 if ((flag_tree_loop_vectorize
4859 || !global_options_set.x_flag_tree_loop_vectorize)
4860 && flag_tree_loop_optimize
4861 && loop->safelen > 1)
4863 loop->force_vectorize = true;
4864 cfun->has_force_vectorize_loops = true;
4867 else if (simduid)
4868 cfun->has_simduid_loops = true;
4871 /* Taskloop construct is represented after gimplification with
4872 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4873 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4874 which should just compute all the needed loop temporaries
4875 for GIMPLE_OMP_TASK. */
4877 static void
4878 expand_omp_taskloop_for_outer (struct omp_region *region,
4879 struct omp_for_data *fd,
4880 gimple *inner_stmt)
4882 tree type, bias = NULL_TREE;
4883 basic_block entry_bb, cont_bb, exit_bb;
4884 gimple_stmt_iterator gsi;
4885 gassign *assign_stmt;
4886 tree *counts = NULL;
4887 int i;
4889 gcc_assert (inner_stmt);
4890 gcc_assert (region->cont);
4891 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4892 && gimple_omp_task_taskloop_p (inner_stmt));
4893 type = TREE_TYPE (fd->loop.v);
4895 /* See if we need to bias by LLONG_MIN. */
4896 if (fd->iter_type == long_long_unsigned_type_node
4897 && TREE_CODE (type) == INTEGER_TYPE
4898 && !TYPE_UNSIGNED (type))
4900 tree n1, n2;
4902 if (fd->loop.cond_code == LT_EXPR)
4904 n1 = fd->loop.n1;
4905 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4907 else
4909 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4910 n2 = fd->loop.n1;
4912 if (TREE_CODE (n1) != INTEGER_CST
4913 || TREE_CODE (n2) != INTEGER_CST
4914 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4915 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4918 entry_bb = region->entry;
4919 cont_bb = region->cont;
4920 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4921 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4922 exit_bb = region->exit;
4924 gsi = gsi_last_bb (entry_bb);
4925 gimple *for_stmt = gsi_stmt (gsi);
4926 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4927 if (fd->collapse > 1)
4929 int first_zero_iter = -1, dummy = -1;
4930 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4932 counts = XALLOCAVEC (tree, fd->collapse);
4933 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4934 zero_iter_bb, first_zero_iter,
4935 dummy_bb, dummy, l2_dom_bb);
4937 if (zero_iter_bb)
4939 /* Some counts[i] vars might be uninitialized if
4940 some loop has zero iterations. But the body shouldn't
4941 be executed in that case, so just avoid uninit warnings. */
4942 for (i = first_zero_iter; i < fd->collapse; i++)
4943 if (SSA_VAR_P (counts[i]))
4944 TREE_NO_WARNING (counts[i]) = 1;
4945 gsi_prev (&gsi);
4946 edge e = split_block (entry_bb, gsi_stmt (gsi));
4947 entry_bb = e->dest;
4948 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4949 gsi = gsi_last_bb (entry_bb);
4950 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4951 get_immediate_dominator (CDI_DOMINATORS,
4952 zero_iter_bb));
4956 tree t0, t1;
4957 t1 = fd->loop.n2;
4958 t0 = fd->loop.n1;
4959 if (POINTER_TYPE_P (TREE_TYPE (t0))
4960 && TYPE_PRECISION (TREE_TYPE (t0))
4961 != TYPE_PRECISION (fd->iter_type))
4963 /* Avoid casting pointers to integer of a different size. */
4964 tree itype = signed_type_for (type);
4965 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4966 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4968 else
4970 t1 = fold_convert (fd->iter_type, t1);
4971 t0 = fold_convert (fd->iter_type, t0);
4973 if (bias)
4975 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4976 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4979 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4980 OMP_CLAUSE__LOOPTEMP_);
4981 gcc_assert (innerc);
4982 tree startvar = OMP_CLAUSE_DECL (innerc);
4983 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4984 gcc_assert (innerc);
4985 tree endvar = OMP_CLAUSE_DECL (innerc);
4986 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4988 gcc_assert (innerc);
4989 for (i = 1; i < fd->collapse; i++)
4991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4992 OMP_CLAUSE__LOOPTEMP_);
4993 gcc_assert (innerc);
4995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4996 OMP_CLAUSE__LOOPTEMP_);
4997 if (innerc)
4999 /* If needed (inner taskloop has lastprivate clause), propagate
5000 down the total number of iterations. */
5001 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5002 NULL_TREE, false,
5003 GSI_CONTINUE_LINKING);
5004 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5009 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5010 GSI_CONTINUE_LINKING);
5011 assign_stmt = gimple_build_assign (startvar, t0);
5012 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5014 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5015 GSI_CONTINUE_LINKING);
5016 assign_stmt = gimple_build_assign (endvar, t1);
5017 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5018 if (fd->collapse > 1)
5019 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5021 /* Remove the GIMPLE_OMP_FOR statement. */
5022 gsi = gsi_for_stmt (for_stmt);
5023 gsi_remove (&gsi, true);
5025 gsi = gsi_last_bb (cont_bb);
5026 gsi_remove (&gsi, true);
5028 gsi = gsi_last_bb (exit_bb);
5029 gsi_remove (&gsi, true);
5031 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5032 remove_edge (BRANCH_EDGE (entry_bb));
5033 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5034 remove_edge (BRANCH_EDGE (cont_bb));
5035 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5036 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5037 recompute_dominator (CDI_DOMINATORS, region->entry));
5040 /* Taskloop construct is represented after gimplification with
5041 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5042 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5043 GOMP_taskloop{,_ull} function arranges for each task to be given just
5044 a single range of iterations. */
5046 static void
5047 expand_omp_taskloop_for_inner (struct omp_region *region,
5048 struct omp_for_data *fd,
5049 gimple *inner_stmt)
5051 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5052 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5053 basic_block fin_bb;
5054 gimple_stmt_iterator gsi;
5055 edge ep;
5056 bool broken_loop = region->cont == NULL;
5057 tree *counts = NULL;
5058 tree n1, n2, step;
5060 itype = type = TREE_TYPE (fd->loop.v);
5061 if (POINTER_TYPE_P (type))
5062 itype = signed_type_for (type);
5064 /* See if we need to bias by LLONG_MIN. */
5065 if (fd->iter_type == long_long_unsigned_type_node
5066 && TREE_CODE (type) == INTEGER_TYPE
5067 && !TYPE_UNSIGNED (type))
5069 tree n1, n2;
5071 if (fd->loop.cond_code == LT_EXPR)
5073 n1 = fd->loop.n1;
5074 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5076 else
5078 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5079 n2 = fd->loop.n1;
5081 if (TREE_CODE (n1) != INTEGER_CST
5082 || TREE_CODE (n2) != INTEGER_CST
5083 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5084 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5087 entry_bb = region->entry;
5088 cont_bb = region->cont;
5089 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5090 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5091 gcc_assert (broken_loop
5092 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5093 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5094 if (!broken_loop)
5096 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5097 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5099 exit_bb = region->exit;
5101 /* Iteration space partitioning goes in ENTRY_BB. */
5102 gsi = gsi_last_bb (entry_bb);
5103 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5105 if (fd->collapse > 1)
5107 int first_zero_iter = -1, dummy = -1;
5108 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5110 counts = XALLOCAVEC (tree, fd->collapse);
5111 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5112 fin_bb, first_zero_iter,
5113 dummy_bb, dummy, l2_dom_bb);
5114 t = NULL_TREE;
5116 else
5117 t = integer_one_node;
5119 step = fd->loop.step;
5120 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5121 OMP_CLAUSE__LOOPTEMP_);
5122 gcc_assert (innerc);
5123 n1 = OMP_CLAUSE_DECL (innerc);
5124 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5125 gcc_assert (innerc);
5126 n2 = OMP_CLAUSE_DECL (innerc);
5127 if (bias)
5129 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5130 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5132 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5133 true, NULL_TREE, true, GSI_SAME_STMT);
5134 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5135 true, NULL_TREE, true, GSI_SAME_STMT);
5136 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5137 true, NULL_TREE, true, GSI_SAME_STMT);
5139 tree startvar = fd->loop.v;
5140 tree endvar = NULL_TREE;
5142 if (gimple_omp_for_combined_p (fd->for_stmt))
5144 tree clauses = gimple_omp_for_clauses (inner_stmt);
5145 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5146 gcc_assert (innerc);
5147 startvar = OMP_CLAUSE_DECL (innerc);
5148 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5149 OMP_CLAUSE__LOOPTEMP_);
5150 gcc_assert (innerc);
5151 endvar = OMP_CLAUSE_DECL (innerc);
5153 t = fold_convert (TREE_TYPE (startvar), n1);
5154 t = force_gimple_operand_gsi (&gsi, t,
5155 DECL_P (startvar)
5156 && TREE_ADDRESSABLE (startvar),
5157 NULL_TREE, false, GSI_CONTINUE_LINKING);
5158 gimple *assign_stmt = gimple_build_assign (startvar, t);
5159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5161 t = fold_convert (TREE_TYPE (startvar), n2);
5162 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5163 false, GSI_CONTINUE_LINKING);
5164 if (endvar)
5166 assign_stmt = gimple_build_assign (endvar, e);
5167 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5168 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5169 assign_stmt = gimple_build_assign (fd->loop.v, e);
5170 else
5171 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5172 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5174 if (fd->collapse > 1)
5175 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5177 if (!broken_loop)
5179 /* The code controlling the sequential loop replaces the
5180 GIMPLE_OMP_CONTINUE. */
5181 gsi = gsi_last_bb (cont_bb);
5182 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5183 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5184 vmain = gimple_omp_continue_control_use (cont_stmt);
5185 vback = gimple_omp_continue_control_def (cont_stmt);
5187 if (!gimple_omp_for_combined_p (fd->for_stmt))
5189 if (POINTER_TYPE_P (type))
5190 t = fold_build_pointer_plus (vmain, step);
5191 else
5192 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5193 t = force_gimple_operand_gsi (&gsi, t,
5194 DECL_P (vback)
5195 && TREE_ADDRESSABLE (vback),
5196 NULL_TREE, true, GSI_SAME_STMT);
5197 assign_stmt = gimple_build_assign (vback, t);
5198 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5200 t = build2 (fd->loop.cond_code, boolean_type_node,
5201 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5202 ? t : vback, e);
5203 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5206 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5207 gsi_remove (&gsi, true);
5209 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5210 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5213 /* Remove the GIMPLE_OMP_FOR statement. */
5214 gsi = gsi_for_stmt (fd->for_stmt);
5215 gsi_remove (&gsi, true);
5217 /* Remove the GIMPLE_OMP_RETURN statement. */
5218 gsi = gsi_last_bb (exit_bb);
5219 gsi_remove (&gsi, true);
5221 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5222 if (!broken_loop)
5223 remove_edge (BRANCH_EDGE (entry_bb));
5224 else
5226 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5227 region->outer->cont = NULL;
5230 /* Connect all the blocks. */
5231 if (!broken_loop)
5233 ep = find_edge (cont_bb, body_bb);
5234 if (gimple_omp_for_combined_p (fd->for_stmt))
5236 remove_edge (ep);
5237 ep = NULL;
5239 else if (fd->collapse > 1)
5241 remove_edge (ep);
5242 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5244 else
5245 ep->flags = EDGE_TRUE_VALUE;
5246 find_edge (cont_bb, fin_bb)->flags
5247 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5250 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5251 recompute_dominator (CDI_DOMINATORS, body_bb));
5252 if (!broken_loop)
5253 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5254 recompute_dominator (CDI_DOMINATORS, fin_bb));
5256 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5258 struct loop *loop = alloc_loop ();
5259 loop->header = body_bb;
5260 if (collapse_bb == NULL)
5261 loop->latch = cont_bb;
5262 add_loop (loop, body_bb->loop_father);
5266 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5267 partitioned loop. The lowering here is abstracted, in that the
5268 loop parameters are passed through internal functions, which are
5269 further lowered by oacc_device_lower, once we get to the target
5270 compiler. The loop is of the form:
5272 for (V = B; V LTGT E; V += S) {BODY}
5274 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5275 (constant 0 for no chunking) and we will have a GWV partitioning
5276 mask, specifying dimensions over which the loop is to be
5277 partitioned (see note below). We generate code that looks like
5278 (this ignores tiling):
5280 <entry_bb> [incoming FALL->body, BRANCH->exit]
5281 typedef signedintify (typeof (V)) T; // underlying signed integral type
5282 T range = E - B;
5283 T chunk_no = 0;
5284 T DIR = LTGT == '<' ? +1 : -1;
5285 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5286 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5288 <head_bb> [created by splitting end of entry_bb]
5289 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5290 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5291 if (!(offset LTGT bound)) goto bottom_bb;
5293 <body_bb> [incoming]
5294 V = B + offset;
5295 {BODY}
5297 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5298 offset += step;
5299 if (offset LTGT bound) goto body_bb; [*]
5301 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5302 chunk_no++;
5303 if (chunk < chunk_max) goto head_bb;
5305 <exit_bb> [incoming]
5306 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5308 [*] Needed if V live at end of loop. */
5310 static void
5311 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5313 tree v = fd->loop.v;
5314 enum tree_code cond_code = fd->loop.cond_code;
5315 enum tree_code plus_code = PLUS_EXPR;
5317 tree chunk_size = integer_minus_one_node;
5318 tree gwv = integer_zero_node;
5319 tree iter_type = TREE_TYPE (v);
5320 tree diff_type = iter_type;
5321 tree plus_type = iter_type;
5322 struct oacc_collapse *counts = NULL;
5324 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5325 == GF_OMP_FOR_KIND_OACC_LOOP);
5326 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5327 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5329 if (POINTER_TYPE_P (iter_type))
5331 plus_code = POINTER_PLUS_EXPR;
5332 plus_type = sizetype;
5334 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5335 diff_type = signed_type_for (diff_type);
5336 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5337 diff_type = integer_type_node;
5339 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5340 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5341 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5342 basic_block bottom_bb = NULL;
5344 /* entry_bb has two sucessors; the branch edge is to the exit
5345 block, fallthrough edge to body. */
5346 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5347 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5349 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5350 body_bb, or to a block whose only successor is the body_bb. Its
5351 fallthrough successor is the final block (same as the branch
5352 successor of the entry_bb). */
5353 if (cont_bb)
5355 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5356 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5358 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5359 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5361 else
5362 gcc_assert (!gimple_in_ssa_p (cfun));
5364 /* The exit block only has entry_bb and cont_bb as predecessors. */
5365 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5367 tree chunk_no;
5368 tree chunk_max = NULL_TREE;
5369 tree bound, offset;
5370 tree step = create_tmp_var (diff_type, ".step");
5371 bool up = cond_code == LT_EXPR;
5372 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5373 bool chunking = !gimple_in_ssa_p (cfun);
5374 bool negating;
5376 /* Tiling vars. */
5377 tree tile_size = NULL_TREE;
5378 tree element_s = NULL_TREE;
5379 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5380 basic_block elem_body_bb = NULL;
5381 basic_block elem_cont_bb = NULL;
5383 /* SSA instances. */
5384 tree offset_incr = NULL_TREE;
5385 tree offset_init = NULL_TREE;
5387 gimple_stmt_iterator gsi;
5388 gassign *ass;
5389 gcall *call;
5390 gimple *stmt;
5391 tree expr;
5392 location_t loc;
5393 edge split, be, fte;
5395 /* Split the end of entry_bb to create head_bb. */
5396 split = split_block (entry_bb, last_stmt (entry_bb));
5397 basic_block head_bb = split->dest;
5398 entry_bb = split->src;
5400 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5401 gsi = gsi_last_bb (entry_bb);
5402 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5403 loc = gimple_location (for_stmt);
5405 if (gimple_in_ssa_p (cfun))
5407 offset_init = gimple_omp_for_index (for_stmt, 0);
5408 gcc_assert (integer_zerop (fd->loop.n1));
5409 /* The SSA parallelizer does gang parallelism. */
5410 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5413 if (fd->collapse > 1 || fd->tiling)
5415 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5416 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5417 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5418 TREE_TYPE (fd->loop.n2), loc);
5420 if (SSA_VAR_P (fd->loop.n2))
5422 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5423 true, GSI_SAME_STMT);
5424 ass = gimple_build_assign (fd->loop.n2, total);
5425 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5429 tree b = fd->loop.n1;
5430 tree e = fd->loop.n2;
5431 tree s = fd->loop.step;
5433 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5434 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5436 /* Convert the step, avoiding possible unsigned->signed overflow. */
5437 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5438 if (negating)
5439 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5440 s = fold_convert (diff_type, s);
5441 if (negating)
5442 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5443 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5445 if (!chunking)
5446 chunk_size = integer_zero_node;
5447 expr = fold_convert (diff_type, chunk_size);
5448 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5449 NULL_TREE, true, GSI_SAME_STMT);
5451 if (fd->tiling)
5453 /* Determine the tile size and element step,
5454 modify the outer loop step size. */
5455 tile_size = create_tmp_var (diff_type, ".tile_size");
5456 expr = build_int_cst (diff_type, 1);
5457 for (int ix = 0; ix < fd->collapse; ix++)
5458 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5459 expr = force_gimple_operand_gsi (&gsi, expr, true,
5460 NULL_TREE, true, GSI_SAME_STMT);
5461 ass = gimple_build_assign (tile_size, expr);
5462 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5464 element_s = create_tmp_var (diff_type, ".element_s");
5465 ass = gimple_build_assign (element_s, s);
5466 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5468 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5469 s = force_gimple_operand_gsi (&gsi, expr, true,
5470 NULL_TREE, true, GSI_SAME_STMT);
5473 /* Determine the range, avoiding possible unsigned->signed overflow. */
5474 negating = !up && TYPE_UNSIGNED (iter_type);
5475 expr = fold_build2 (MINUS_EXPR, plus_type,
5476 fold_convert (plus_type, negating ? b : e),
5477 fold_convert (plus_type, negating ? e : b));
5478 expr = fold_convert (diff_type, expr);
5479 if (negating)
5480 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5481 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5482 NULL_TREE, true, GSI_SAME_STMT);
5484 chunk_no = build_int_cst (diff_type, 0);
5485 if (chunking)
5487 gcc_assert (!gimple_in_ssa_p (cfun));
5489 expr = chunk_no;
5490 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5491 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5493 ass = gimple_build_assign (chunk_no, expr);
5494 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5496 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5497 build_int_cst (integer_type_node,
5498 IFN_GOACC_LOOP_CHUNKS),
5499 dir, range, s, chunk_size, gwv);
5500 gimple_call_set_lhs (call, chunk_max);
5501 gimple_set_location (call, loc);
5502 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5504 else
5505 chunk_size = chunk_no;
5507 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5508 build_int_cst (integer_type_node,
5509 IFN_GOACC_LOOP_STEP),
5510 dir, range, s, chunk_size, gwv);
5511 gimple_call_set_lhs (call, step);
5512 gimple_set_location (call, loc);
5513 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5515 /* Remove the GIMPLE_OMP_FOR. */
5516 gsi_remove (&gsi, true);
5518 /* Fixup edges from head_bb. */
5519 be = BRANCH_EDGE (head_bb);
5520 fte = FALLTHRU_EDGE (head_bb);
5521 be->flags |= EDGE_FALSE_VALUE;
5522 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5524 basic_block body_bb = fte->dest;
5526 if (gimple_in_ssa_p (cfun))
5528 gsi = gsi_last_bb (cont_bb);
5529 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5531 offset = gimple_omp_continue_control_use (cont_stmt);
5532 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5534 else
5536 offset = create_tmp_var (diff_type, ".offset");
5537 offset_init = offset_incr = offset;
5539 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5541 /* Loop offset & bound go into head_bb. */
5542 gsi = gsi_start_bb (head_bb);
5544 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5545 build_int_cst (integer_type_node,
5546 IFN_GOACC_LOOP_OFFSET),
5547 dir, range, s,
5548 chunk_size, gwv, chunk_no);
5549 gimple_call_set_lhs (call, offset_init);
5550 gimple_set_location (call, loc);
5551 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5553 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5554 build_int_cst (integer_type_node,
5555 IFN_GOACC_LOOP_BOUND),
5556 dir, range, s,
5557 chunk_size, gwv, offset_init);
5558 gimple_call_set_lhs (call, bound);
5559 gimple_set_location (call, loc);
5560 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5562 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5563 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5564 GSI_CONTINUE_LINKING);
5566 /* V assignment goes into body_bb. */
5567 if (!gimple_in_ssa_p (cfun))
5569 gsi = gsi_start_bb (body_bb);
5571 expr = build2 (plus_code, iter_type, b,
5572 fold_convert (plus_type, offset));
5573 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5574 true, GSI_SAME_STMT);
5575 ass = gimple_build_assign (v, expr);
5576 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5578 if (fd->collapse > 1 || fd->tiling)
5579 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5581 if (fd->tiling)
5583 /* Determine the range of the element loop -- usually simply
5584 the tile_size, but could be smaller if the final
5585 iteration of the outer loop is a partial tile. */
5586 tree e_range = create_tmp_var (diff_type, ".e_range");
5588 expr = build2 (MIN_EXPR, diff_type,
5589 build2 (MINUS_EXPR, diff_type, bound, offset),
5590 build2 (MULT_EXPR, diff_type, tile_size,
5591 element_s));
5592 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5593 true, GSI_SAME_STMT);
5594 ass = gimple_build_assign (e_range, expr);
5595 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5597 /* Determine bound, offset & step of inner loop. */
5598 e_bound = create_tmp_var (diff_type, ".e_bound");
5599 e_offset = create_tmp_var (diff_type, ".e_offset");
5600 e_step = create_tmp_var (diff_type, ".e_step");
5602 /* Mark these as element loops. */
5603 tree t, e_gwv = integer_minus_one_node;
5604 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5606 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5607 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5608 element_s, chunk, e_gwv, chunk);
5609 gimple_call_set_lhs (call, e_offset);
5610 gimple_set_location (call, loc);
5611 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5613 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5614 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5615 element_s, chunk, e_gwv, e_offset);
5616 gimple_call_set_lhs (call, e_bound);
5617 gimple_set_location (call, loc);
5618 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5620 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5621 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5622 element_s, chunk, e_gwv);
5623 gimple_call_set_lhs (call, e_step);
5624 gimple_set_location (call, loc);
5625 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5627 /* Add test and split block. */
5628 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5629 stmt = gimple_build_cond_empty (expr);
5630 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5631 split = split_block (body_bb, stmt);
5632 elem_body_bb = split->dest;
5633 if (cont_bb == body_bb)
5634 cont_bb = elem_body_bb;
5635 body_bb = split->src;
5637 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5639 /* Initialize the user's loop vars. */
5640 gsi = gsi_start_bb (elem_body_bb);
5641 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5645 /* Loop increment goes into cont_bb. If this is not a loop, we
5646 will have spawned threads as if it was, and each one will
5647 execute one iteration. The specification is not explicit about
5648 whether such constructs are ill-formed or not, and they can
5649 occur, especially when noreturn routines are involved. */
5650 if (cont_bb)
5652 gsi = gsi_last_bb (cont_bb);
5653 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5654 loc = gimple_location (cont_stmt);
5656 if (fd->tiling)
5658 /* Insert element loop increment and test. */
5659 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5660 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5661 true, GSI_SAME_STMT);
5662 ass = gimple_build_assign (e_offset, expr);
5663 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5664 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5666 stmt = gimple_build_cond_empty (expr);
5667 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5668 split = split_block (cont_bb, stmt);
5669 elem_cont_bb = split->src;
5670 cont_bb = split->dest;
5672 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5673 split->probability = profile_probability::unlikely ().guessed ();
5674 edge latch_edge
5675 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5676 latch_edge->probability = profile_probability::likely ().guessed ();
5678 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5679 skip_edge->probability = profile_probability::unlikely ().guessed ();
5680 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5681 loop_entry_edge->probability
5682 = profile_probability::likely ().guessed ();
5684 gsi = gsi_for_stmt (cont_stmt);
5687 /* Increment offset. */
5688 if (gimple_in_ssa_p (cfun))
5689 expr = build2 (plus_code, iter_type, offset,
5690 fold_convert (plus_type, step));
5691 else
5692 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5693 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5694 true, GSI_SAME_STMT);
5695 ass = gimple_build_assign (offset_incr, expr);
5696 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5697 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5698 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5700 /* Remove the GIMPLE_OMP_CONTINUE. */
5701 gsi_remove (&gsi, true);
5703 /* Fixup edges from cont_bb. */
5704 be = BRANCH_EDGE (cont_bb);
5705 fte = FALLTHRU_EDGE (cont_bb);
5706 be->flags |= EDGE_TRUE_VALUE;
5707 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5709 if (chunking)
5711 /* Split the beginning of exit_bb to make bottom_bb. We
5712 need to insert a nop at the start, because splitting is
5713 after a stmt, not before. */
5714 gsi = gsi_start_bb (exit_bb);
5715 stmt = gimple_build_nop ();
5716 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5717 split = split_block (exit_bb, stmt);
5718 bottom_bb = split->src;
5719 exit_bb = split->dest;
5720 gsi = gsi_last_bb (bottom_bb);
5722 /* Chunk increment and test goes into bottom_bb. */
5723 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5724 build_int_cst (diff_type, 1));
5725 ass = gimple_build_assign (chunk_no, expr);
5726 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5728 /* Chunk test at end of bottom_bb. */
5729 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5730 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5731 GSI_CONTINUE_LINKING);
5733 /* Fixup edges from bottom_bb. */
5734 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5735 split->probability = profile_probability::unlikely ().guessed ();
5736 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5737 latch_edge->probability = profile_probability::likely ().guessed ();
5741 gsi = gsi_last_bb (exit_bb);
5742 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5743 loc = gimple_location (gsi_stmt (gsi));
5745 if (!gimple_in_ssa_p (cfun))
5747 /* Insert the final value of V, in case it is live. This is the
5748 value for the only thread that survives past the join. */
5749 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5750 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5751 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5752 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5753 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5754 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5755 true, GSI_SAME_STMT);
5756 ass = gimple_build_assign (v, expr);
5757 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5760 /* Remove the OMP_RETURN. */
5761 gsi_remove (&gsi, true);
5763 if (cont_bb)
5765 /* We now have one, two or three nested loops. Update the loop
5766 structures. */
5767 struct loop *parent = entry_bb->loop_father;
5768 struct loop *body = body_bb->loop_father;
5770 if (chunking)
5772 struct loop *chunk_loop = alloc_loop ();
5773 chunk_loop->header = head_bb;
5774 chunk_loop->latch = bottom_bb;
5775 add_loop (chunk_loop, parent);
5776 parent = chunk_loop;
5778 else if (parent != body)
5780 gcc_assert (body->header == body_bb);
5781 gcc_assert (body->latch == cont_bb
5782 || single_pred (body->latch) == cont_bb);
5783 parent = NULL;
5786 if (parent)
5788 struct loop *body_loop = alloc_loop ();
5789 body_loop->header = body_bb;
5790 body_loop->latch = cont_bb;
5791 add_loop (body_loop, parent);
5793 if (fd->tiling)
5795 /* Insert tiling's element loop. */
5796 struct loop *inner_loop = alloc_loop ();
5797 inner_loop->header = elem_body_bb;
5798 inner_loop->latch = elem_cont_bb;
5799 add_loop (inner_loop, body_loop);
5805 /* Expand the OMP loop defined by REGION. */
5807 static void
5808 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5810 struct omp_for_data fd;
5811 struct omp_for_data_loop *loops;
5813 loops
5814 = (struct omp_for_data_loop *)
5815 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5816 * sizeof (struct omp_for_data_loop));
5817 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5818 &fd, loops);
5819 region->sched_kind = fd.sched_kind;
5820 region->sched_modifiers = fd.sched_modifiers;
5822 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5823 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5824 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5825 if (region->cont)
5827 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5828 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5829 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5831 else
5832 /* If there isn't a continue then this is a degerate case where
5833 the introduction of abnormal edges during lowering will prevent
5834 original loops from being detected. Fix that up. */
5835 loops_state_set (LOOPS_NEED_FIXUP);
5837 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5838 expand_omp_simd (region, &fd);
5839 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5840 expand_cilk_for (region, &fd);
5841 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5843 gcc_assert (!inner_stmt);
5844 expand_oacc_for (region, &fd);
5846 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5848 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5849 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5850 else
5851 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5853 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5854 && !fd.have_ordered)
5856 if (fd.chunk_size == NULL)
5857 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5858 else
5859 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5861 else
5863 int fn_index, start_ix, next_ix;
5865 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5866 == GF_OMP_FOR_KIND_FOR);
5867 if (fd.chunk_size == NULL
5868 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5869 fd.chunk_size = integer_zero_node;
5870 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5871 switch (fd.sched_kind)
5873 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5874 fn_index = 3;
5875 break;
5876 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5877 case OMP_CLAUSE_SCHEDULE_GUIDED:
5878 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5879 && !fd.ordered
5880 && !fd.have_ordered)
5882 fn_index = 3 + fd.sched_kind;
5883 break;
5885 /* FALLTHRU */
5886 default:
5887 fn_index = fd.sched_kind;
5888 break;
5890 if (!fd.ordered)
5891 fn_index += fd.have_ordered * 6;
5892 if (fd.ordered)
5893 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5894 else
5895 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5896 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5897 if (fd.iter_type == long_long_unsigned_type_node)
5899 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5900 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5901 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5902 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5904 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5905 (enum built_in_function) next_ix, inner_stmt);
5908 if (gimple_in_ssa_p (cfun))
5909 update_ssa (TODO_update_ssa_only_virtuals);
5912 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5914 v = GOMP_sections_start (n);
5916 switch (v)
5918 case 0:
5919 goto L2;
5920 case 1:
5921 section 1;
5922 goto L1;
5923 case 2:
5925 case n:
5927 default:
5928 abort ();
5931 v = GOMP_sections_next ();
5932 goto L0;
5934 reduction;
5936 If this is a combined parallel sections, replace the call to
5937 GOMP_sections_start with call to GOMP_sections_next. */
5939 static void
5940 expand_omp_sections (struct omp_region *region)
5942 tree t, u, vin = NULL, vmain, vnext, l2;
5943 unsigned len;
5944 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5945 gimple_stmt_iterator si, switch_si;
5946 gomp_sections *sections_stmt;
5947 gimple *stmt;
5948 gomp_continue *cont;
5949 edge_iterator ei;
5950 edge e;
5951 struct omp_region *inner;
5952 unsigned i, casei;
5953 bool exit_reachable = region->cont != NULL;
5955 gcc_assert (region->exit != NULL);
5956 entry_bb = region->entry;
5957 l0_bb = single_succ (entry_bb);
5958 l1_bb = region->cont;
5959 l2_bb = region->exit;
5960 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5961 l2 = gimple_block_label (l2_bb);
5962 else
5964 /* This can happen if there are reductions. */
5965 len = EDGE_COUNT (l0_bb->succs);
5966 gcc_assert (len > 0);
5967 e = EDGE_SUCC (l0_bb, len - 1);
5968 si = gsi_last_bb (e->dest);
5969 l2 = NULL_TREE;
5970 if (gsi_end_p (si)
5971 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5972 l2 = gimple_block_label (e->dest);
5973 else
5974 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5976 si = gsi_last_bb (e->dest);
5977 if (gsi_end_p (si)
5978 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5980 l2 = gimple_block_label (e->dest);
5981 break;
5985 if (exit_reachable)
5986 default_bb = create_empty_bb (l1_bb->prev_bb);
5987 else
5988 default_bb = create_empty_bb (l0_bb);
5990 /* We will build a switch() with enough cases for all the
5991 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5992 and a default case to abort if something goes wrong. */
5993 len = EDGE_COUNT (l0_bb->succs);
5995 /* Use vec::quick_push on label_vec throughout, since we know the size
5996 in advance. */
5997 auto_vec<tree> label_vec (len);
5999 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6000 GIMPLE_OMP_SECTIONS statement. */
6001 si = gsi_last_bb (entry_bb);
6002 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6003 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6004 vin = gimple_omp_sections_control (sections_stmt);
6005 if (!is_combined_parallel (region))
6007 /* If we are not inside a combined parallel+sections region,
6008 call GOMP_sections_start. */
6009 t = build_int_cst (unsigned_type_node, len - 1);
6010 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6011 stmt = gimple_build_call (u, 1, t);
6013 else
6015 /* Otherwise, call GOMP_sections_next. */
6016 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6017 stmt = gimple_build_call (u, 0);
6019 gimple_call_set_lhs (stmt, vin);
6020 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6021 gsi_remove (&si, true);
6023 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6024 L0_BB. */
6025 switch_si = gsi_last_bb (l0_bb);
6026 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6027 if (exit_reachable)
6029 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6030 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6031 vmain = gimple_omp_continue_control_use (cont);
6032 vnext = gimple_omp_continue_control_def (cont);
6034 else
6036 vmain = vin;
6037 vnext = NULL_TREE;
6040 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6041 label_vec.quick_push (t);
6042 i = 1;
6044 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6045 for (inner = region->inner, casei = 1;
6046 inner;
6047 inner = inner->next, i++, casei++)
6049 basic_block s_entry_bb, s_exit_bb;
6051 /* Skip optional reduction region. */
6052 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6054 --i;
6055 --casei;
6056 continue;
6059 s_entry_bb = inner->entry;
6060 s_exit_bb = inner->exit;
6062 t = gimple_block_label (s_entry_bb);
6063 u = build_int_cst (unsigned_type_node, casei);
6064 u = build_case_label (u, NULL, t);
6065 label_vec.quick_push (u);
6067 si = gsi_last_bb (s_entry_bb);
6068 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6069 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6070 gsi_remove (&si, true);
6071 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6073 if (s_exit_bb == NULL)
6074 continue;
6076 si = gsi_last_bb (s_exit_bb);
6077 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6078 gsi_remove (&si, true);
6080 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6083 /* Error handling code goes in DEFAULT_BB. */
6084 t = gimple_block_label (default_bb);
6085 u = build_case_label (NULL, NULL, t);
6086 make_edge (l0_bb, default_bb, 0);
6087 add_bb_to_loop (default_bb, current_loops->tree_root);
6089 stmt = gimple_build_switch (vmain, u, label_vec);
6090 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6091 gsi_remove (&switch_si, true);
6093 si = gsi_start_bb (default_bb);
6094 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6095 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6097 if (exit_reachable)
6099 tree bfn_decl;
6101 /* Code to get the next section goes in L1_BB. */
6102 si = gsi_last_bb (l1_bb);
6103 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6105 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6106 stmt = gimple_build_call (bfn_decl, 0);
6107 gimple_call_set_lhs (stmt, vnext);
6108 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6109 gsi_remove (&si, true);
6111 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6114 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6115 si = gsi_last_bb (l2_bb);
6116 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6117 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6118 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6119 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6120 else
6121 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6122 stmt = gimple_build_call (t, 0);
6123 if (gimple_omp_return_lhs (gsi_stmt (si)))
6124 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6125 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6126 gsi_remove (&si, true);
6128 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6131 /* Expand code for an OpenMP single directive. We've already expanded
6132 much of the code, here we simply place the GOMP_barrier call. */
6134 static void
6135 expand_omp_single (struct omp_region *region)
6137 basic_block entry_bb, exit_bb;
6138 gimple_stmt_iterator si;
6140 entry_bb = region->entry;
6141 exit_bb = region->exit;
6143 si = gsi_last_bb (entry_bb);
6144 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6145 gsi_remove (&si, true);
6146 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6148 si = gsi_last_bb (exit_bb);
6149 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6151 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6152 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6154 gsi_remove (&si, true);
6155 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6158 /* Generic expansion for OpenMP synchronization directives: master,
6159 ordered and critical. All we need to do here is remove the entry
6160 and exit markers for REGION. */
6162 static void
6163 expand_omp_synch (struct omp_region *region)
6165 basic_block entry_bb, exit_bb;
6166 gimple_stmt_iterator si;
6168 entry_bb = region->entry;
6169 exit_bb = region->exit;
6171 si = gsi_last_bb (entry_bb);
6172 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6173 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6174 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6175 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6176 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6177 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6178 gsi_remove (&si, true);
6179 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6181 if (exit_bb)
6183 si = gsi_last_bb (exit_bb);
6184 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6185 gsi_remove (&si, true);
6186 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6190 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6191 operation as a normal volatile load. */
6193 static bool
6194 expand_omp_atomic_load (basic_block load_bb, tree addr,
6195 tree loaded_val, int index)
6197 enum built_in_function tmpbase;
6198 gimple_stmt_iterator gsi;
6199 basic_block store_bb;
6200 location_t loc;
6201 gimple *stmt;
6202 tree decl, call, type, itype;
6204 gsi = gsi_last_bb (load_bb);
6205 stmt = gsi_stmt (gsi);
6206 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6207 loc = gimple_location (stmt);
6209 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6210 is smaller than word size, then expand_atomic_load assumes that the load
6211 is atomic. We could avoid the builtin entirely in this case. */
6213 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6214 decl = builtin_decl_explicit (tmpbase);
6215 if (decl == NULL_TREE)
6216 return false;
6218 type = TREE_TYPE (loaded_val);
6219 itype = TREE_TYPE (TREE_TYPE (decl));
6221 call = build_call_expr_loc (loc, decl, 2, addr,
6222 build_int_cst (NULL,
6223 gimple_omp_atomic_seq_cst_p (stmt)
6224 ? MEMMODEL_SEQ_CST
6225 : MEMMODEL_RELAXED));
6226 if (!useless_type_conversion_p (type, itype))
6227 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6228 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6230 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6231 gsi_remove (&gsi, true);
6233 store_bb = single_succ (load_bb);
6234 gsi = gsi_last_bb (store_bb);
6235 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6236 gsi_remove (&gsi, true);
6238 if (gimple_in_ssa_p (cfun))
6239 update_ssa (TODO_update_ssa_no_phi);
6241 return true;
6244 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6245 operation as a normal volatile store. */
6247 static bool
6248 expand_omp_atomic_store (basic_block load_bb, tree addr,
6249 tree loaded_val, tree stored_val, int index)
6251 enum built_in_function tmpbase;
6252 gimple_stmt_iterator gsi;
6253 basic_block store_bb = single_succ (load_bb);
6254 location_t loc;
6255 gimple *stmt;
6256 tree decl, call, type, itype;
6257 machine_mode imode;
6258 bool exchange;
6260 gsi = gsi_last_bb (load_bb);
6261 stmt = gsi_stmt (gsi);
6262 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6264 /* If the load value is needed, then this isn't a store but an exchange. */
6265 exchange = gimple_omp_atomic_need_value_p (stmt);
6267 gsi = gsi_last_bb (store_bb);
6268 stmt = gsi_stmt (gsi);
6269 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6270 loc = gimple_location (stmt);
6272 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6273 is smaller than word size, then expand_atomic_store assumes that the store
6274 is atomic. We could avoid the builtin entirely in this case. */
6276 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6277 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6278 decl = builtin_decl_explicit (tmpbase);
6279 if (decl == NULL_TREE)
6280 return false;
6282 type = TREE_TYPE (stored_val);
6284 /* Dig out the type of the function's second argument. */
6285 itype = TREE_TYPE (decl);
6286 itype = TYPE_ARG_TYPES (itype);
6287 itype = TREE_CHAIN (itype);
6288 itype = TREE_VALUE (itype);
6289 imode = TYPE_MODE (itype);
6291 if (exchange && !can_atomic_exchange_p (imode, true))
6292 return false;
6294 if (!useless_type_conversion_p (itype, type))
6295 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6296 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6297 build_int_cst (NULL,
6298 gimple_omp_atomic_seq_cst_p (stmt)
6299 ? MEMMODEL_SEQ_CST
6300 : MEMMODEL_RELAXED));
6301 if (exchange)
6303 if (!useless_type_conversion_p (type, itype))
6304 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6305 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6308 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6309 gsi_remove (&gsi, true);
6311 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6312 gsi = gsi_last_bb (load_bb);
6313 gsi_remove (&gsi, true);
6315 if (gimple_in_ssa_p (cfun))
6316 update_ssa (TODO_update_ssa_no_phi);
6318 return true;
6321 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6322 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6323 size of the data type, and thus usable to find the index of the builtin
6324 decl. Returns false if the expression is not of the proper form. */
6326 static bool
6327 expand_omp_atomic_fetch_op (basic_block load_bb,
6328 tree addr, tree loaded_val,
6329 tree stored_val, int index)
6331 enum built_in_function oldbase, newbase, tmpbase;
6332 tree decl, itype, call;
6333 tree lhs, rhs;
6334 basic_block store_bb = single_succ (load_bb);
6335 gimple_stmt_iterator gsi;
6336 gimple *stmt;
6337 location_t loc;
6338 enum tree_code code;
6339 bool need_old, need_new;
6340 machine_mode imode;
6341 bool seq_cst;
6343 /* We expect to find the following sequences:
6345 load_bb:
6346 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6348 store_bb:
6349 val = tmp OP something; (or: something OP tmp)
6350 GIMPLE_OMP_STORE (val)
6352 ???FIXME: Allow a more flexible sequence.
6353 Perhaps use data flow to pick the statements.
6357 gsi = gsi_after_labels (store_bb);
6358 stmt = gsi_stmt (gsi);
6359 loc = gimple_location (stmt);
6360 if (!is_gimple_assign (stmt))
6361 return false;
6362 gsi_next (&gsi);
6363 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6364 return false;
6365 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6366 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6367 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6368 gcc_checking_assert (!need_old || !need_new);
6370 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6371 return false;
6373 /* Check for one of the supported fetch-op operations. */
6374 code = gimple_assign_rhs_code (stmt);
6375 switch (code)
6377 case PLUS_EXPR:
6378 case POINTER_PLUS_EXPR:
6379 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6380 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6381 break;
6382 case MINUS_EXPR:
6383 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6384 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6385 break;
6386 case BIT_AND_EXPR:
6387 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6388 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6389 break;
6390 case BIT_IOR_EXPR:
6391 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6392 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6393 break;
6394 case BIT_XOR_EXPR:
6395 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6396 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6397 break;
6398 default:
6399 return false;
6402 /* Make sure the expression is of the proper form. */
6403 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6404 rhs = gimple_assign_rhs2 (stmt);
6405 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6406 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6407 rhs = gimple_assign_rhs1 (stmt);
6408 else
6409 return false;
6411 tmpbase = ((enum built_in_function)
6412 ((need_new ? newbase : oldbase) + index + 1));
6413 decl = builtin_decl_explicit (tmpbase);
6414 if (decl == NULL_TREE)
6415 return false;
6416 itype = TREE_TYPE (TREE_TYPE (decl));
6417 imode = TYPE_MODE (itype);
6419 /* We could test all of the various optabs involved, but the fact of the
6420 matter is that (with the exception of i486 vs i586 and xadd) all targets
6421 that support any atomic operaton optab also implements compare-and-swap.
6422 Let optabs.c take care of expanding any compare-and-swap loop. */
6423 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6424 return false;
6426 gsi = gsi_last_bb (load_bb);
6427 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6429 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6430 It only requires that the operation happen atomically. Thus we can
6431 use the RELAXED memory model. */
6432 call = build_call_expr_loc (loc, decl, 3, addr,
6433 fold_convert_loc (loc, itype, rhs),
6434 build_int_cst (NULL,
6435 seq_cst ? MEMMODEL_SEQ_CST
6436 : MEMMODEL_RELAXED));
6438 if (need_old || need_new)
6440 lhs = need_old ? loaded_val : stored_val;
6441 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6442 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6444 else
6445 call = fold_convert_loc (loc, void_type_node, call);
6446 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6447 gsi_remove (&gsi, true);
6449 gsi = gsi_last_bb (store_bb);
6450 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6451 gsi_remove (&gsi, true);
6452 gsi = gsi_last_bb (store_bb);
6453 stmt = gsi_stmt (gsi);
6454 gsi_remove (&gsi, true);
6456 if (gimple_in_ssa_p (cfun))
6458 release_defs (stmt);
6459 update_ssa (TODO_update_ssa_no_phi);
6462 return true;
6465 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6467 oldval = *addr;
6468 repeat:
6469 newval = rhs; // with oldval replacing *addr in rhs
6470 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6471 if (oldval != newval)
6472 goto repeat;
6474 INDEX is log2 of the size of the data type, and thus usable to find the
6475 index of the builtin decl. */
6477 static bool
6478 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6479 tree addr, tree loaded_val, tree stored_val,
6480 int index)
6482 tree loadedi, storedi, initial, new_storedi, old_vali;
6483 tree type, itype, cmpxchg, iaddr;
6484 gimple_stmt_iterator si;
6485 basic_block loop_header = single_succ (load_bb);
6486 gimple *phi, *stmt;
6487 edge e;
6488 enum built_in_function fncode;
6490 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6491 order to use the RELAXED memory model effectively. */
6492 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6493 + index + 1);
6494 cmpxchg = builtin_decl_explicit (fncode);
6495 if (cmpxchg == NULL_TREE)
6496 return false;
6497 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6498 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6500 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6501 || !can_atomic_load_p (TYPE_MODE (itype)))
6502 return false;
6504 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6505 si = gsi_last_bb (load_bb);
6506 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6508 /* For floating-point values, we'll need to view-convert them to integers
6509 so that we can perform the atomic compare and swap. Simplify the
6510 following code by always setting up the "i"ntegral variables. */
6511 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6513 tree iaddr_val;
6515 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6516 true));
6517 iaddr_val
6518 = force_gimple_operand_gsi (&si,
6519 fold_convert (TREE_TYPE (iaddr), addr),
6520 false, NULL_TREE, true, GSI_SAME_STMT);
6521 stmt = gimple_build_assign (iaddr, iaddr_val);
6522 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6523 loadedi = create_tmp_var (itype);
6524 if (gimple_in_ssa_p (cfun))
6525 loadedi = make_ssa_name (loadedi);
6527 else
6529 iaddr = addr;
6530 loadedi = loaded_val;
6533 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6534 tree loaddecl = builtin_decl_explicit (fncode);
6535 if (loaddecl)
6536 initial
6537 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6538 build_call_expr (loaddecl, 2, iaddr,
6539 build_int_cst (NULL_TREE,
6540 MEMMODEL_RELAXED)));
6541 else
6542 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6543 build_int_cst (TREE_TYPE (iaddr), 0));
6545 initial
6546 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6547 GSI_SAME_STMT);
6549 /* Move the value to the LOADEDI temporary. */
6550 if (gimple_in_ssa_p (cfun))
6552 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6553 phi = create_phi_node (loadedi, loop_header);
6554 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6555 initial);
6557 else
6558 gsi_insert_before (&si,
6559 gimple_build_assign (loadedi, initial),
6560 GSI_SAME_STMT);
6561 if (loadedi != loaded_val)
6563 gimple_stmt_iterator gsi2;
6564 tree x;
6566 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6567 gsi2 = gsi_start_bb (loop_header);
6568 if (gimple_in_ssa_p (cfun))
6570 gassign *stmt;
6571 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6572 true, GSI_SAME_STMT);
6573 stmt = gimple_build_assign (loaded_val, x);
6574 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6576 else
6578 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6579 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6580 true, GSI_SAME_STMT);
6583 gsi_remove (&si, true);
6585 si = gsi_last_bb (store_bb);
6586 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6588 if (iaddr == addr)
6589 storedi = stored_val;
6590 else
6591 storedi
6592 = force_gimple_operand_gsi (&si,
6593 build1 (VIEW_CONVERT_EXPR, itype,
6594 stored_val), true, NULL_TREE, true,
6595 GSI_SAME_STMT);
6597 /* Build the compare&swap statement. */
6598 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6599 new_storedi = force_gimple_operand_gsi (&si,
6600 fold_convert (TREE_TYPE (loadedi),
6601 new_storedi),
6602 true, NULL_TREE,
6603 true, GSI_SAME_STMT);
6605 if (gimple_in_ssa_p (cfun))
6606 old_vali = loadedi;
6607 else
6609 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6610 stmt = gimple_build_assign (old_vali, loadedi);
6611 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6613 stmt = gimple_build_assign (loadedi, new_storedi);
6614 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6617 /* Note that we always perform the comparison as an integer, even for
6618 floating point. This allows the atomic operation to properly
6619 succeed even with NaNs and -0.0. */
6620 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6621 stmt = gimple_build_cond_empty (ne);
6622 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6624 /* Update cfg. */
6625 e = single_succ_edge (store_bb);
6626 e->flags &= ~EDGE_FALLTHRU;
6627 e->flags |= EDGE_FALSE_VALUE;
6628 /* Expect no looping. */
6629 e->probability = profile_probability::guessed_always ();
6631 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6632 e->probability = profile_probability::guessed_never ();
6634 /* Copy the new value to loadedi (we already did that before the condition
6635 if we are not in SSA). */
6636 if (gimple_in_ssa_p (cfun))
6638 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6639 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6642 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6643 gsi_remove (&si, true);
6645 struct loop *loop = alloc_loop ();
6646 loop->header = loop_header;
6647 loop->latch = store_bb;
6648 add_loop (loop, loop_header->loop_father);
6650 if (gimple_in_ssa_p (cfun))
6651 update_ssa (TODO_update_ssa_no_phi);
6653 return true;
6656 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6658 GOMP_atomic_start ();
6659 *addr = rhs;
6660 GOMP_atomic_end ();
6662 The result is not globally atomic, but works so long as all parallel
6663 references are within #pragma omp atomic directives. According to
6664 responses received from omp@openmp.org, appears to be within spec.
6665 Which makes sense, since that's how several other compilers handle
6666 this situation as well.
6667 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6668 expanding. STORED_VAL is the operand of the matching
6669 GIMPLE_OMP_ATOMIC_STORE.
6671 We replace
6672 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6673 loaded_val = *addr;
6675 and replace
6676 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6677 *addr = stored_val;
6680 static bool
6681 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6682 tree addr, tree loaded_val, tree stored_val)
6684 gimple_stmt_iterator si;
6685 gassign *stmt;
6686 tree t;
6688 si = gsi_last_bb (load_bb);
6689 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6691 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6692 t = build_call_expr (t, 0);
6693 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6695 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6696 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6697 gsi_remove (&si, true);
6699 si = gsi_last_bb (store_bb);
6700 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6702 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6703 stored_val);
6704 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6706 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6707 t = build_call_expr (t, 0);
6708 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6709 gsi_remove (&si, true);
6711 if (gimple_in_ssa_p (cfun))
6712 update_ssa (TODO_update_ssa_no_phi);
6713 return true;
6716 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6717 using expand_omp_atomic_fetch_op. If it failed, we try to
6718 call expand_omp_atomic_pipeline, and if it fails too, the
6719 ultimate fallback is wrapping the operation in a mutex
6720 (expand_omp_atomic_mutex). REGION is the atomic region built
6721 by build_omp_regions_1(). */
6723 static void
6724 expand_omp_atomic (struct omp_region *region)
6726 basic_block load_bb = region->entry, store_bb = region->exit;
6727 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6728 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6729 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6730 tree addr = gimple_omp_atomic_load_rhs (load);
6731 tree stored_val = gimple_omp_atomic_store_val (store);
6732 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6733 HOST_WIDE_INT index;
6735 /* Make sure the type is one of the supported sizes. */
6736 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6737 index = exact_log2 (index);
6738 if (index >= 0 && index <= 4)
6740 unsigned int align = TYPE_ALIGN_UNIT (type);
6742 /* __sync builtins require strict data alignment. */
6743 if (exact_log2 (align) >= index)
6745 /* Atomic load. */
6746 scalar_mode smode;
6747 if (loaded_val == stored_val
6748 && (is_int_mode (TYPE_MODE (type), &smode)
6749 || is_float_mode (TYPE_MODE (type), &smode))
6750 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6751 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6752 return;
6754 /* Atomic store. */
6755 if ((is_int_mode (TYPE_MODE (type), &smode)
6756 || is_float_mode (TYPE_MODE (type), &smode))
6757 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6758 && store_bb == single_succ (load_bb)
6759 && first_stmt (store_bb) == store
6760 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6761 stored_val, index))
6762 return;
6764 /* When possible, use specialized atomic update functions. */
6765 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6766 && store_bb == single_succ (load_bb)
6767 && expand_omp_atomic_fetch_op (load_bb, addr,
6768 loaded_val, stored_val, index))
6769 return;
6771 /* If we don't have specialized __sync builtins, try and implement
6772 as a compare and swap loop. */
6773 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6774 loaded_val, stored_val, index))
6775 return;
6779 /* The ultimate fallback is wrapping the operation in a mutex. */
6780 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6783 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6784 at REGION_EXIT. */
6786 static void
6787 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6788 basic_block region_exit)
6790 struct loop *outer = region_entry->loop_father;
6791 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6793 /* Don't parallelize the kernels region if it contains more than one outer
6794 loop. */
6795 unsigned int nr_outer_loops = 0;
6796 struct loop *single_outer = NULL;
6797 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6799 gcc_assert (loop_outer (loop) == outer);
6801 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6802 continue;
6804 if (region_exit != NULL
6805 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6806 continue;
6808 nr_outer_loops++;
6809 single_outer = loop;
6811 if (nr_outer_loops != 1)
6812 return;
6814 for (struct loop *loop = single_outer->inner;
6815 loop != NULL;
6816 loop = loop->inner)
6817 if (loop->next)
6818 return;
6820 /* Mark the loops in the region. */
6821 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6822 loop->in_oacc_kernels_region = true;
6825 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6827 struct GTY(()) grid_launch_attributes_trees
6829 tree kernel_dim_array_type;
6830 tree kernel_lattrs_dimnum_decl;
6831 tree kernel_lattrs_grid_decl;
6832 tree kernel_lattrs_group_decl;
6833 tree kernel_launch_attributes_type;
6836 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6838 /* Create types used to pass kernel launch attributes to target. */
6840 static void
6841 grid_create_kernel_launch_attr_types (void)
6843 if (grid_attr_trees)
6844 return;
6845 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6847 tree dim_arr_index_type
6848 = build_index_type (build_int_cst (integer_type_node, 2));
6849 grid_attr_trees->kernel_dim_array_type
6850 = build_array_type (uint32_type_node, dim_arr_index_type);
6852 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6853 grid_attr_trees->kernel_lattrs_dimnum_decl
6854 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6855 uint32_type_node);
6856 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6858 grid_attr_trees->kernel_lattrs_grid_decl
6859 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6860 grid_attr_trees->kernel_dim_array_type);
6861 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6862 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6863 grid_attr_trees->kernel_lattrs_group_decl
6864 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6865 grid_attr_trees->kernel_dim_array_type);
6866 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6867 = grid_attr_trees->kernel_lattrs_grid_decl;
6868 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6869 "__gomp_kernel_launch_attributes",
6870 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6873 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6874 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6875 of type uint32_type_node. */
6877 static void
6878 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6879 tree fld_decl, int index, tree value)
6881 tree ref = build4 (ARRAY_REF, uint32_type_node,
6882 build3 (COMPONENT_REF,
6883 grid_attr_trees->kernel_dim_array_type,
6884 range_var, fld_decl, NULL_TREE),
6885 build_int_cst (integer_type_node, index),
6886 NULL_TREE, NULL_TREE);
6887 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6890 /* Return a tree representation of a pointer to a structure with grid and
6891 work-group size information. Statements filling that information will be
6892 inserted before GSI, TGT_STMT is the target statement which has the
6893 necessary information in it. */
6895 static tree
6896 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6897 gomp_target *tgt_stmt)
6899 grid_create_kernel_launch_attr_types ();
6900 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6901 "__kernel_launch_attrs");
6903 unsigned max_dim = 0;
6904 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6905 clause;
6906 clause = OMP_CLAUSE_CHAIN (clause))
6908 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6909 continue;
6911 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6912 max_dim = MAX (dim, max_dim);
6914 grid_insert_store_range_dim (gsi, lattrs,
6915 grid_attr_trees->kernel_lattrs_grid_decl,
6916 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6917 grid_insert_store_range_dim (gsi, lattrs,
6918 grid_attr_trees->kernel_lattrs_group_decl,
6919 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6922 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6923 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6924 gcc_checking_assert (max_dim <= 2);
6925 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6926 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6927 GSI_SAME_STMT);
6928 TREE_ADDRESSABLE (lattrs) = 1;
6929 return build_fold_addr_expr (lattrs);
6932 /* Build target argument identifier from the DEVICE identifier, value
6933 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6935 static tree
6936 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6938 tree t = build_int_cst (integer_type_node, device);
6939 if (subseqent_param)
6940 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6941 build_int_cst (integer_type_node,
6942 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6943 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6944 build_int_cst (integer_type_node, id));
6945 return t;
6948 /* Like above but return it in type that can be directly stored as an element
6949 of the argument array. */
6951 static tree
6952 get_target_argument_identifier (int device, bool subseqent_param, int id)
6954 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6955 return fold_convert (ptr_type_node, t);
6958 /* Return a target argument consisting of DEVICE identifier, value identifier
6959 ID, and the actual VALUE. */
6961 static tree
6962 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6963 tree value)
6965 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6966 fold_convert (integer_type_node, value),
6967 build_int_cst (unsigned_type_node,
6968 GOMP_TARGET_ARG_VALUE_SHIFT));
6969 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6970 get_target_argument_identifier_1 (device, false, id));
6971 t = fold_convert (ptr_type_node, t);
6972 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6975 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6976 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6977 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6978 arguments. */
6980 static void
6981 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6982 int id, tree value, vec <tree> *args)
6984 if (tree_fits_shwi_p (value)
6985 && tree_to_shwi (value) > -(1 << 15)
6986 && tree_to_shwi (value) < (1 << 15))
6987 args->quick_push (get_target_argument_value (gsi, device, id, value));
6988 else
6990 args->quick_push (get_target_argument_identifier (device, true, id));
6991 value = fold_convert (ptr_type_node, value);
6992 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6993 GSI_SAME_STMT);
6994 args->quick_push (value);
6998 /* Create an array of arguments that is then passed to GOMP_target. */
7000 static tree
7001 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7003 auto_vec <tree, 6> args;
7004 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7005 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7006 if (c)
7007 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7008 else
7009 t = integer_minus_one_node;
7010 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7011 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7013 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7014 if (c)
7015 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7016 else
7017 t = integer_minus_one_node;
7018 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7019 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7020 &args);
7022 /* Add HSA-specific grid sizes, if available. */
7023 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7024 OMP_CLAUSE__GRIDDIM_))
7026 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7027 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7028 args.quick_push (t);
7029 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7032 /* Produce more, perhaps device specific, arguments here. */
7034 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7035 args.length () + 1),
7036 ".omp_target_args");
7037 for (unsigned i = 0; i < args.length (); i++)
7039 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7040 build_int_cst (integer_type_node, i),
7041 NULL_TREE, NULL_TREE);
7042 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7043 GSI_SAME_STMT);
7045 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7046 build_int_cst (integer_type_node, args.length ()),
7047 NULL_TREE, NULL_TREE);
7048 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7049 GSI_SAME_STMT);
7050 TREE_ADDRESSABLE (argarray) = 1;
7051 return build_fold_addr_expr (argarray);
7054 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7056 static void
7057 expand_omp_target (struct omp_region *region)
7059 basic_block entry_bb, exit_bb, new_bb;
7060 struct function *child_cfun;
7061 tree child_fn, block, t;
7062 gimple_stmt_iterator gsi;
7063 gomp_target *entry_stmt;
7064 gimple *stmt;
7065 edge e;
7066 bool offloaded, data_region;
7068 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7069 new_bb = region->entry;
7071 offloaded = is_gimple_omp_offloaded (entry_stmt);
7072 switch (gimple_omp_target_kind (entry_stmt))
7074 case GF_OMP_TARGET_KIND_REGION:
7075 case GF_OMP_TARGET_KIND_UPDATE:
7076 case GF_OMP_TARGET_KIND_ENTER_DATA:
7077 case GF_OMP_TARGET_KIND_EXIT_DATA:
7078 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7079 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7080 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7081 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7082 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7083 data_region = false;
7084 break;
7085 case GF_OMP_TARGET_KIND_DATA:
7086 case GF_OMP_TARGET_KIND_OACC_DATA:
7087 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7088 data_region = true;
7089 break;
7090 default:
7091 gcc_unreachable ();
7094 child_fn = NULL_TREE;
7095 child_cfun = NULL;
7096 if (offloaded)
7098 child_fn = gimple_omp_target_child_fn (entry_stmt);
7099 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7102 /* Supported by expand_omp_taskreg, but not here. */
7103 if (child_cfun != NULL)
7104 gcc_checking_assert (!child_cfun->cfg);
7105 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7107 entry_bb = region->entry;
7108 exit_bb = region->exit;
7110 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7112 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7114 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7115 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7116 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7117 DECL_ATTRIBUTES (child_fn)
7118 = tree_cons (get_identifier ("oacc kernels"),
7119 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7122 if (offloaded)
7124 unsigned srcidx, dstidx, num;
7126 /* If the offloading region needs data sent from the parent
7127 function, then the very first statement (except possible
7128 tree profile counter updates) of the offloading body
7129 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7130 &.OMP_DATA_O is passed as an argument to the child function,
7131 we need to replace it with the argument as seen by the child
7132 function.
7134 In most cases, this will end up being the identity assignment
7135 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7136 a function call that has been inlined, the original PARM_DECL
7137 .OMP_DATA_I may have been converted into a different local
7138 variable. In which case, we need to keep the assignment. */
7139 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7140 if (data_arg)
7142 basic_block entry_succ_bb = single_succ (entry_bb);
7143 gimple_stmt_iterator gsi;
7144 tree arg;
7145 gimple *tgtcopy_stmt = NULL;
7146 tree sender = TREE_VEC_ELT (data_arg, 0);
7148 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7150 gcc_assert (!gsi_end_p (gsi));
7151 stmt = gsi_stmt (gsi);
7152 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7153 continue;
7155 if (gimple_num_ops (stmt) == 2)
7157 tree arg = gimple_assign_rhs1 (stmt);
7159 /* We're ignoring the subcode because we're
7160 effectively doing a STRIP_NOPS. */
7162 if (TREE_CODE (arg) == ADDR_EXPR
7163 && TREE_OPERAND (arg, 0) == sender)
7165 tgtcopy_stmt = stmt;
7166 break;
7171 gcc_assert (tgtcopy_stmt != NULL);
7172 arg = DECL_ARGUMENTS (child_fn);
7174 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7175 gsi_remove (&gsi, true);
7178 /* Declare local variables needed in CHILD_CFUN. */
7179 block = DECL_INITIAL (child_fn);
7180 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7181 /* The gimplifier could record temporaries in the offloading block
7182 rather than in containing function's local_decls chain,
7183 which would mean cgraph missed finalizing them. Do it now. */
7184 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7185 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7186 varpool_node::finalize_decl (t);
7187 DECL_SAVED_TREE (child_fn) = NULL;
7188 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7189 gimple_set_body (child_fn, NULL);
7190 TREE_USED (block) = 1;
7192 /* Reset DECL_CONTEXT on function arguments. */
7193 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7194 DECL_CONTEXT (t) = child_fn;
7196 /* Split ENTRY_BB at GIMPLE_*,
7197 so that it can be moved to the child function. */
7198 gsi = gsi_last_bb (entry_bb);
7199 stmt = gsi_stmt (gsi);
7200 gcc_assert (stmt
7201 && gimple_code (stmt) == gimple_code (entry_stmt));
7202 e = split_block (entry_bb, stmt);
7203 gsi_remove (&gsi, true);
7204 entry_bb = e->dest;
7205 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7207 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7208 if (exit_bb)
7210 gsi = gsi_last_bb (exit_bb);
7211 gcc_assert (!gsi_end_p (gsi)
7212 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7213 stmt = gimple_build_return (NULL);
7214 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7215 gsi_remove (&gsi, true);
7218 /* Make sure to generate early debug for the function before
7219 outlining anything. */
7220 if (! gimple_in_ssa_p (cfun))
7221 (*debug_hooks->early_global_decl) (cfun->decl);
7223 /* Move the offloading region into CHILD_CFUN. */
7225 block = gimple_block (entry_stmt);
7227 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7228 if (exit_bb)
7229 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7230 /* When the OMP expansion process cannot guarantee an up-to-date
7231 loop tree arrange for the child function to fixup loops. */
7232 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7233 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7235 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7236 num = vec_safe_length (child_cfun->local_decls);
7237 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7239 t = (*child_cfun->local_decls)[srcidx];
7240 if (DECL_CONTEXT (t) == cfun->decl)
7241 continue;
7242 if (srcidx != dstidx)
7243 (*child_cfun->local_decls)[dstidx] = t;
7244 dstidx++;
7246 if (dstidx != num)
7247 vec_safe_truncate (child_cfun->local_decls, dstidx);
7249 /* Inform the callgraph about the new function. */
7250 child_cfun->curr_properties = cfun->curr_properties;
7251 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7252 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7253 cgraph_node *node = cgraph_node::get_create (child_fn);
7254 node->parallelized_function = 1;
7255 cgraph_node::add_new_function (child_fn, true);
7257 /* Add the new function to the offload table. */
7258 if (ENABLE_OFFLOADING)
7259 vec_safe_push (offload_funcs, child_fn);
7261 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7262 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7264 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7265 fixed in a following pass. */
7266 push_cfun (child_cfun);
7267 if (need_asm)
7268 assign_assembler_name_if_needed (child_fn);
7269 cgraph_edge::rebuild_edges ();
7271 /* Some EH regions might become dead, see PR34608. If
7272 pass_cleanup_cfg isn't the first pass to happen with the
7273 new child, these dead EH edges might cause problems.
7274 Clean them up now. */
7275 if (flag_exceptions)
7277 basic_block bb;
7278 bool changed = false;
7280 FOR_EACH_BB_FN (bb, cfun)
7281 changed |= gimple_purge_dead_eh_edges (bb);
7282 if (changed)
7283 cleanup_tree_cfg ();
7285 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7286 verify_loop_structure ();
7287 pop_cfun ();
7289 if (dump_file && !gimple_in_ssa_p (cfun))
7291 omp_any_child_fn_dumped = true;
7292 dump_function_header (dump_file, child_fn, dump_flags);
7293 dump_function_to_file (child_fn, dump_file, dump_flags);
7297 /* Emit a library call to launch the offloading region, or do data
7298 transfers. */
7299 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7300 enum built_in_function start_ix;
7301 location_t clause_loc;
7302 unsigned int flags_i = 0;
7304 switch (gimple_omp_target_kind (entry_stmt))
7306 case GF_OMP_TARGET_KIND_REGION:
7307 start_ix = BUILT_IN_GOMP_TARGET;
7308 break;
7309 case GF_OMP_TARGET_KIND_DATA:
7310 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7311 break;
7312 case GF_OMP_TARGET_KIND_UPDATE:
7313 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7314 break;
7315 case GF_OMP_TARGET_KIND_ENTER_DATA:
7316 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7317 break;
7318 case GF_OMP_TARGET_KIND_EXIT_DATA:
7319 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7320 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7321 break;
7322 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7323 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7324 start_ix = BUILT_IN_GOACC_PARALLEL;
7325 break;
7326 case GF_OMP_TARGET_KIND_OACC_DATA:
7327 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7328 start_ix = BUILT_IN_GOACC_DATA_START;
7329 break;
7330 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7331 start_ix = BUILT_IN_GOACC_UPDATE;
7332 break;
7333 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7334 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7335 break;
7336 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7337 start_ix = BUILT_IN_GOACC_DECLARE;
7338 break;
7339 default:
7340 gcc_unreachable ();
7343 clauses = gimple_omp_target_clauses (entry_stmt);
7345 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7346 library choose) and there is no conditional. */
7347 cond = NULL_TREE;
7348 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7350 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7351 if (c)
7352 cond = OMP_CLAUSE_IF_EXPR (c);
7354 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7355 if (c)
7357 /* Even if we pass it to all library function calls, it is currently only
7358 defined/used for the OpenMP target ones. */
7359 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7360 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7361 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7362 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7364 device = OMP_CLAUSE_DEVICE_ID (c);
7365 clause_loc = OMP_CLAUSE_LOCATION (c);
7367 else
7368 clause_loc = gimple_location (entry_stmt);
7370 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7371 if (c)
7372 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7374 /* Ensure 'device' is of the correct type. */
7375 device = fold_convert_loc (clause_loc, integer_type_node, device);
7377 /* If we found the clause 'if (cond)', build
7378 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7379 if (cond)
7381 cond = gimple_boolify (cond);
7383 basic_block cond_bb, then_bb, else_bb;
7384 edge e;
7385 tree tmp_var;
7387 tmp_var = create_tmp_var (TREE_TYPE (device));
7388 if (offloaded)
7389 e = split_block_after_labels (new_bb);
7390 else
7392 gsi = gsi_last_bb (new_bb);
7393 gsi_prev (&gsi);
7394 e = split_block (new_bb, gsi_stmt (gsi));
7396 cond_bb = e->src;
7397 new_bb = e->dest;
7398 remove_edge (e);
7400 then_bb = create_empty_bb (cond_bb);
7401 else_bb = create_empty_bb (then_bb);
7402 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7403 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7405 stmt = gimple_build_cond_empty (cond);
7406 gsi = gsi_last_bb (cond_bb);
7407 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7409 gsi = gsi_start_bb (then_bb);
7410 stmt = gimple_build_assign (tmp_var, device);
7411 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7413 gsi = gsi_start_bb (else_bb);
7414 stmt = gimple_build_assign (tmp_var,
7415 build_int_cst (integer_type_node,
7416 GOMP_DEVICE_HOST_FALLBACK));
7417 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7419 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7420 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7421 add_bb_to_loop (then_bb, cond_bb->loop_father);
7422 add_bb_to_loop (else_bb, cond_bb->loop_father);
7423 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7424 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7426 device = tmp_var;
7427 gsi = gsi_last_bb (new_bb);
7429 else
7431 gsi = gsi_last_bb (new_bb);
7432 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7433 true, GSI_SAME_STMT);
7436 t = gimple_omp_target_data_arg (entry_stmt);
7437 if (t == NULL)
7439 t1 = size_zero_node;
7440 t2 = build_zero_cst (ptr_type_node);
7441 t3 = t2;
7442 t4 = t2;
7444 else
7446 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7447 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7448 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7449 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7450 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7453 gimple *g;
7454 bool tagging = false;
7455 /* The maximum number used by any start_ix, without varargs. */
7456 auto_vec<tree, 11> args;
7457 args.quick_push (device);
7458 if (offloaded)
7459 args.quick_push (build_fold_addr_expr (child_fn));
7460 args.quick_push (t1);
7461 args.quick_push (t2);
7462 args.quick_push (t3);
7463 args.quick_push (t4);
7464 switch (start_ix)
7466 case BUILT_IN_GOACC_DATA_START:
7467 case BUILT_IN_GOACC_DECLARE:
7468 case BUILT_IN_GOMP_TARGET_DATA:
7469 break;
7470 case BUILT_IN_GOMP_TARGET:
7471 case BUILT_IN_GOMP_TARGET_UPDATE:
7472 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7473 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7474 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7475 if (c)
7476 depend = OMP_CLAUSE_DECL (c);
7477 else
7478 depend = build_int_cst (ptr_type_node, 0);
7479 args.quick_push (depend);
7480 if (start_ix == BUILT_IN_GOMP_TARGET)
7481 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7482 break;
7483 case BUILT_IN_GOACC_PARALLEL:
7484 oacc_set_fn_attrib (child_fn, clauses, &args);
7485 tagging = true;
7486 /* FALLTHRU */
7487 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7488 case BUILT_IN_GOACC_UPDATE:
7490 tree t_async = NULL_TREE;
7492 /* If present, use the value specified by the respective
7493 clause, making sure that is of the correct type. */
7494 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7495 if (c)
7496 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7497 integer_type_node,
7498 OMP_CLAUSE_ASYNC_EXPR (c));
7499 else if (!tagging)
7500 /* Default values for t_async. */
7501 t_async = fold_convert_loc (gimple_location (entry_stmt),
7502 integer_type_node,
7503 build_int_cst (integer_type_node,
7504 GOMP_ASYNC_SYNC));
7505 if (tagging && t_async)
7507 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7509 if (TREE_CODE (t_async) == INTEGER_CST)
7511 /* See if we can pack the async arg in to the tag's
7512 operand. */
7513 i_async = TREE_INT_CST_LOW (t_async);
7514 if (i_async < GOMP_LAUNCH_OP_MAX)
7515 t_async = NULL_TREE;
7516 else
7517 i_async = GOMP_LAUNCH_OP_MAX;
7519 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7520 i_async));
7522 if (t_async)
7523 args.safe_push (t_async);
7525 /* Save the argument index, and ... */
7526 unsigned t_wait_idx = args.length ();
7527 unsigned num_waits = 0;
7528 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7529 if (!tagging || c)
7530 /* ... push a placeholder. */
7531 args.safe_push (integer_zero_node);
7533 for (; c; c = OMP_CLAUSE_CHAIN (c))
7534 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7536 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7537 integer_type_node,
7538 OMP_CLAUSE_WAIT_EXPR (c)));
7539 num_waits++;
7542 if (!tagging || num_waits)
7544 tree len;
7546 /* Now that we know the number, update the placeholder. */
7547 if (tagging)
7548 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7549 else
7550 len = build_int_cst (integer_type_node, num_waits);
7551 len = fold_convert_loc (gimple_location (entry_stmt),
7552 unsigned_type_node, len);
7553 args[t_wait_idx] = len;
7556 break;
7557 default:
7558 gcc_unreachable ();
7560 if (tagging)
7561 /* Push terminal marker - zero. */
7562 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7564 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7565 gimple_set_location (g, gimple_location (entry_stmt));
7566 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7567 if (!offloaded)
7569 g = gsi_stmt (gsi);
7570 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7571 gsi_remove (&gsi, true);
7573 if (data_region && region->exit)
7575 gsi = gsi_last_bb (region->exit);
7576 g = gsi_stmt (gsi);
7577 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7578 gsi_remove (&gsi, true);
7582 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7583 iteration variable derived from the thread number. INTRA_GROUP means this
7584 is an expansion of a loop iterating over work-items within a separate
7585 iteration over groups. */
7587 static void
7588 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7590 gimple_stmt_iterator gsi;
7591 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7592 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7593 == GF_OMP_FOR_KIND_GRID_LOOP);
7594 size_t collapse = gimple_omp_for_collapse (for_stmt);
7595 struct omp_for_data_loop *loops
7596 = XALLOCAVEC (struct omp_for_data_loop,
7597 gimple_omp_for_collapse (for_stmt));
7598 struct omp_for_data fd;
7600 remove_edge (BRANCH_EDGE (kfor->entry));
7601 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7603 gcc_assert (kfor->cont);
7604 omp_extract_for_data (for_stmt, &fd, loops);
7606 gsi = gsi_start_bb (body_bb);
7608 for (size_t dim = 0; dim < collapse; dim++)
7610 tree type, itype;
7611 itype = type = TREE_TYPE (fd.loops[dim].v);
7612 if (POINTER_TYPE_P (type))
7613 itype = signed_type_for (type);
7615 tree n1 = fd.loops[dim].n1;
7616 tree step = fd.loops[dim].step;
7617 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7618 true, NULL_TREE, true, GSI_SAME_STMT);
7619 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7620 true, NULL_TREE, true, GSI_SAME_STMT);
7621 tree threadid;
7622 if (gimple_omp_for_grid_group_iter (for_stmt))
7624 gcc_checking_assert (!intra_group);
7625 threadid = build_call_expr (builtin_decl_explicit
7626 (BUILT_IN_HSA_WORKGROUPID), 1,
7627 build_int_cstu (unsigned_type_node, dim));
7629 else if (intra_group)
7630 threadid = build_call_expr (builtin_decl_explicit
7631 (BUILT_IN_HSA_WORKITEMID), 1,
7632 build_int_cstu (unsigned_type_node, dim));
7633 else
7634 threadid = build_call_expr (builtin_decl_explicit
7635 (BUILT_IN_HSA_WORKITEMABSID), 1,
7636 build_int_cstu (unsigned_type_node, dim));
7637 threadid = fold_convert (itype, threadid);
7638 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7639 true, GSI_SAME_STMT);
7641 tree startvar = fd.loops[dim].v;
7642 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7643 if (POINTER_TYPE_P (type))
7644 t = fold_build_pointer_plus (n1, t);
7645 else
7646 t = fold_build2 (PLUS_EXPR, type, t, n1);
7647 t = fold_convert (type, t);
7648 t = force_gimple_operand_gsi (&gsi, t,
7649 DECL_P (startvar)
7650 && TREE_ADDRESSABLE (startvar),
7651 NULL_TREE, true, GSI_SAME_STMT);
7652 gassign *assign_stmt = gimple_build_assign (startvar, t);
7653 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7655 /* Remove the omp for statement. */
7656 gsi = gsi_last_bb (kfor->entry);
7657 gsi_remove (&gsi, true);
7659 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7660 gsi = gsi_last_bb (kfor->cont);
7661 gcc_assert (!gsi_end_p (gsi)
7662 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7663 gsi_remove (&gsi, true);
7665 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7666 gsi = gsi_last_bb (kfor->exit);
7667 gcc_assert (!gsi_end_p (gsi)
7668 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7669 if (intra_group)
7670 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7671 gsi_remove (&gsi, true);
7673 /* Fixup the much simpler CFG. */
7674 remove_edge (find_edge (kfor->cont, body_bb));
7676 if (kfor->cont != body_bb)
7677 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7678 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7681 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7682 argument_decls. */
7684 struct grid_arg_decl_map
7686 tree old_arg;
7687 tree new_arg;
7690 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7691 pertaining to kernel function. */
7693 static tree
7694 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7696 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7697 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7698 tree t = *tp;
7700 if (t == adm->old_arg)
7701 *tp = adm->new_arg;
7702 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7703 return NULL_TREE;
7706 /* If TARGET region contains a kernel body for loop, remove its region from the
7707 TARGET and expand it in HSA gridified kernel fashion. */
7709 static void
7710 grid_expand_target_grid_body (struct omp_region *target)
7712 if (!hsa_gen_requested_p ())
7713 return;
7715 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7716 struct omp_region **pp;
7718 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7719 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7720 break;
7722 struct omp_region *gpukernel = *pp;
7724 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7725 if (!gpukernel)
7727 /* HSA cannot handle OACC stuff. */
7728 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7729 return;
7730 gcc_checking_assert (orig_child_fndecl);
7731 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7732 OMP_CLAUSE__GRIDDIM_));
7733 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7735 hsa_register_kernel (n);
7736 return;
7739 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7740 OMP_CLAUSE__GRIDDIM_));
7741 tree inside_block
7742 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7743 *pp = gpukernel->next;
7744 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7745 if ((*pp)->type == GIMPLE_OMP_FOR)
7746 break;
7748 struct omp_region *kfor = *pp;
7749 gcc_assert (kfor);
7750 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7751 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7752 *pp = kfor->next;
7753 if (kfor->inner)
7755 if (gimple_omp_for_grid_group_iter (for_stmt))
7757 struct omp_region **next_pp;
7758 for (pp = &kfor->inner; *pp; pp = next_pp)
7760 next_pp = &(*pp)->next;
7761 if ((*pp)->type != GIMPLE_OMP_FOR)
7762 continue;
7763 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7764 gcc_assert (gimple_omp_for_kind (inner)
7765 == GF_OMP_FOR_KIND_GRID_LOOP);
7766 grid_expand_omp_for_loop (*pp, true);
7767 *pp = (*pp)->next;
7768 next_pp = pp;
7771 expand_omp (kfor->inner);
7773 if (gpukernel->inner)
7774 expand_omp (gpukernel->inner);
7776 tree kern_fndecl = copy_node (orig_child_fndecl);
7777 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7778 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7779 tree tgtblock = gimple_block (tgt_stmt);
7780 tree fniniblock = make_node (BLOCK);
7781 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7782 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7783 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7784 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7785 DECL_INITIAL (kern_fndecl) = fniniblock;
7786 push_struct_function (kern_fndecl);
7787 cfun->function_end_locus = gimple_location (tgt_stmt);
7788 init_tree_ssa (cfun);
7789 pop_cfun ();
7791 /* Make sure to generate early debug for the function before
7792 outlining anything. */
7793 if (! gimple_in_ssa_p (cfun))
7794 (*debug_hooks->early_global_decl) (cfun->decl);
7796 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7797 gcc_assert (!DECL_CHAIN (old_parm_decl));
7798 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7799 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7800 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7801 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7802 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7803 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7804 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7805 kern_cfun->curr_properties = cfun->curr_properties;
7807 grid_expand_omp_for_loop (kfor, false);
7809 /* Remove the omp for statement. */
7810 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7811 gsi_remove (&gsi, true);
7812 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7813 return. */
7814 gsi = gsi_last_bb (gpukernel->exit);
7815 gcc_assert (!gsi_end_p (gsi)
7816 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7817 gimple *ret_stmt = gimple_build_return (NULL);
7818 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7819 gsi_remove (&gsi, true);
7821 /* Statements in the first BB in the target construct have been produced by
7822 target lowering and must be copied inside the GPUKERNEL, with the two
7823 exceptions of the first OMP statement and the OMP_DATA assignment
7824 statement. */
7825 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7826 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7827 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7828 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7829 !gsi_end_p (tsi); gsi_next (&tsi))
7831 gimple *stmt = gsi_stmt (tsi);
7832 if (is_gimple_omp (stmt))
7833 break;
7834 if (sender
7835 && is_gimple_assign (stmt)
7836 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7837 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7838 continue;
7839 gimple *copy = gimple_copy (stmt);
7840 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7841 gimple_set_block (copy, fniniblock);
7844 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7845 gpukernel->exit, inside_block);
7847 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7848 kcn->mark_force_output ();
7849 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7851 hsa_register_kernel (kcn, orig_child);
7853 cgraph_node::add_new_function (kern_fndecl, true);
7854 push_cfun (kern_cfun);
7855 cgraph_edge::rebuild_edges ();
7857 /* Re-map any mention of the PARM_DECL of the original function to the
7858 PARM_DECL of the new one.
7860 TODO: It would be great if lowering produced references into the GPU
7861 kernel decl straight away and we did not have to do this. */
7862 struct grid_arg_decl_map adm;
7863 adm.old_arg = old_parm_decl;
7864 adm.new_arg = new_parm_decl;
7865 basic_block bb;
7866 FOR_EACH_BB_FN (bb, kern_cfun)
7868 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7870 gimple *stmt = gsi_stmt (gsi);
7871 struct walk_stmt_info wi;
7872 memset (&wi, 0, sizeof (wi));
7873 wi.info = &adm;
7874 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7877 pop_cfun ();
7879 return;
7882 /* Expand the parallel region tree rooted at REGION. Expansion
7883 proceeds in depth-first order. Innermost regions are expanded
7884 first. This way, parallel regions that require a new function to
7885 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7886 internal dependencies in their body. */
7888 static void
7889 expand_omp (struct omp_region *region)
7891 omp_any_child_fn_dumped = false;
7892 while (region)
7894 location_t saved_location;
7895 gimple *inner_stmt = NULL;
7897 /* First, determine whether this is a combined parallel+workshare
7898 region. */
7899 if (region->type == GIMPLE_OMP_PARALLEL)
7900 determine_parallel_type (region);
7901 else if (region->type == GIMPLE_OMP_TARGET)
7902 grid_expand_target_grid_body (region);
7904 if (region->type == GIMPLE_OMP_FOR
7905 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7906 inner_stmt = last_stmt (region->inner->entry);
7908 if (region->inner)
7909 expand_omp (region->inner);
7911 saved_location = input_location;
7912 if (gimple_has_location (last_stmt (region->entry)))
7913 input_location = gimple_location (last_stmt (region->entry));
7915 switch (region->type)
7917 case GIMPLE_OMP_PARALLEL:
7918 case GIMPLE_OMP_TASK:
7919 expand_omp_taskreg (region);
7920 break;
7922 case GIMPLE_OMP_FOR:
7923 expand_omp_for (region, inner_stmt);
7924 break;
7926 case GIMPLE_OMP_SECTIONS:
7927 expand_omp_sections (region);
7928 break;
7930 case GIMPLE_OMP_SECTION:
7931 /* Individual omp sections are handled together with their
7932 parent GIMPLE_OMP_SECTIONS region. */
7933 break;
7935 case GIMPLE_OMP_SINGLE:
7936 expand_omp_single (region);
7937 break;
7939 case GIMPLE_OMP_ORDERED:
7941 gomp_ordered *ord_stmt
7942 = as_a <gomp_ordered *> (last_stmt (region->entry));
7943 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7944 OMP_CLAUSE_DEPEND))
7946 /* We'll expand these when expanding corresponding
7947 worksharing region with ordered(n) clause. */
7948 gcc_assert (region->outer
7949 && region->outer->type == GIMPLE_OMP_FOR);
7950 region->ord_stmt = ord_stmt;
7951 break;
7954 /* FALLTHRU */
7955 case GIMPLE_OMP_MASTER:
7956 case GIMPLE_OMP_TASKGROUP:
7957 case GIMPLE_OMP_CRITICAL:
7958 case GIMPLE_OMP_TEAMS:
7959 expand_omp_synch (region);
7960 break;
7962 case GIMPLE_OMP_ATOMIC_LOAD:
7963 expand_omp_atomic (region);
7964 break;
7966 case GIMPLE_OMP_TARGET:
7967 expand_omp_target (region);
7968 break;
7970 default:
7971 gcc_unreachable ();
7974 input_location = saved_location;
7975 region = region->next;
7977 if (omp_any_child_fn_dumped)
7979 if (dump_file)
7980 dump_function_header (dump_file, current_function_decl, dump_flags);
7981 omp_any_child_fn_dumped = false;
7985 /* Helper for build_omp_regions. Scan the dominator tree starting at
7986 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7987 true, the function ends once a single tree is built (otherwise, whole
7988 forest of OMP constructs may be built). */
7990 static void
7991 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7992 bool single_tree)
7994 gimple_stmt_iterator gsi;
7995 gimple *stmt;
7996 basic_block son;
7998 gsi = gsi_last_bb (bb);
7999 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8001 struct omp_region *region;
8002 enum gimple_code code;
8004 stmt = gsi_stmt (gsi);
8005 code = gimple_code (stmt);
8006 if (code == GIMPLE_OMP_RETURN)
8008 /* STMT is the return point out of region PARENT. Mark it
8009 as the exit point and make PARENT the immediately
8010 enclosing region. */
8011 gcc_assert (parent);
8012 region = parent;
8013 region->exit = bb;
8014 parent = parent->outer;
8016 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8018 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8019 GIMPLE_OMP_RETURN, but matches with
8020 GIMPLE_OMP_ATOMIC_LOAD. */
8021 gcc_assert (parent);
8022 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8023 region = parent;
8024 region->exit = bb;
8025 parent = parent->outer;
8027 else if (code == GIMPLE_OMP_CONTINUE)
8029 gcc_assert (parent);
8030 parent->cont = bb;
8032 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8034 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8035 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8037 else
8039 region = new_omp_region (bb, code, parent);
8040 /* Otherwise... */
8041 if (code == GIMPLE_OMP_TARGET)
8043 switch (gimple_omp_target_kind (stmt))
8045 case GF_OMP_TARGET_KIND_REGION:
8046 case GF_OMP_TARGET_KIND_DATA:
8047 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8048 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8049 case GF_OMP_TARGET_KIND_OACC_DATA:
8050 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8051 break;
8052 case GF_OMP_TARGET_KIND_UPDATE:
8053 case GF_OMP_TARGET_KIND_ENTER_DATA:
8054 case GF_OMP_TARGET_KIND_EXIT_DATA:
8055 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8056 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8057 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8058 /* ..., other than for those stand-alone directives... */
8059 region = NULL;
8060 break;
8061 default:
8062 gcc_unreachable ();
8065 else if (code == GIMPLE_OMP_ORDERED
8066 && omp_find_clause (gimple_omp_ordered_clauses
8067 (as_a <gomp_ordered *> (stmt)),
8068 OMP_CLAUSE_DEPEND))
8069 /* #pragma omp ordered depend is also just a stand-alone
8070 directive. */
8071 region = NULL;
8072 /* ..., this directive becomes the parent for a new region. */
8073 if (region)
8074 parent = region;
8078 if (single_tree && !parent)
8079 return;
8081 for (son = first_dom_son (CDI_DOMINATORS, bb);
8082 son;
8083 son = next_dom_son (CDI_DOMINATORS, son))
8084 build_omp_regions_1 (son, parent, single_tree);
8087 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8088 root_omp_region. */
8090 static void
8091 build_omp_regions_root (basic_block root)
8093 gcc_assert (root_omp_region == NULL);
8094 build_omp_regions_1 (root, NULL, true);
8095 gcc_assert (root_omp_region != NULL);
8098 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8100 void
8101 omp_expand_local (basic_block head)
8103 build_omp_regions_root (head);
8104 if (dump_file && (dump_flags & TDF_DETAILS))
8106 fprintf (dump_file, "\nOMP region tree\n\n");
8107 dump_omp_region (dump_file, root_omp_region, 0);
8108 fprintf (dump_file, "\n");
8111 remove_exit_barriers (root_omp_region);
8112 expand_omp (root_omp_region);
8114 omp_free_regions ();
8117 /* Scan the CFG and build a tree of OMP regions. Return the root of
8118 the OMP region tree. */
8120 static void
8121 build_omp_regions (void)
8123 gcc_assert (root_omp_region == NULL);
8124 calculate_dominance_info (CDI_DOMINATORS);
8125 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8128 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8130 static unsigned int
8131 execute_expand_omp (void)
8133 build_omp_regions ();
8135 if (!root_omp_region)
8136 return 0;
8138 if (dump_file)
8140 fprintf (dump_file, "\nOMP region tree\n\n");
8141 dump_omp_region (dump_file, root_omp_region, 0);
8142 fprintf (dump_file, "\n");
8145 remove_exit_barriers (root_omp_region);
8147 expand_omp (root_omp_region);
8149 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8150 verify_loop_structure ();
8151 cleanup_tree_cfg ();
8153 omp_free_regions ();
8155 return 0;
8158 /* OMP expansion -- the default pass, run before creation of SSA form. */
8160 namespace {
8162 const pass_data pass_data_expand_omp =
8164 GIMPLE_PASS, /* type */
8165 "ompexp", /* name */
8166 OPTGROUP_OMP, /* optinfo_flags */
8167 TV_NONE, /* tv_id */
8168 PROP_gimple_any, /* properties_required */
8169 PROP_gimple_eomp, /* properties_provided */
8170 0, /* properties_destroyed */
8171 0, /* todo_flags_start */
8172 0, /* todo_flags_finish */
8175 class pass_expand_omp : public gimple_opt_pass
8177 public:
8178 pass_expand_omp (gcc::context *ctxt)
8179 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8182 /* opt_pass methods: */
8183 virtual unsigned int execute (function *)
8185 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8186 || flag_openmp_simd != 0)
8187 && !seen_error ());
8189 /* This pass always runs, to provide PROP_gimple_eomp.
8190 But often, there is nothing to do. */
8191 if (!gate)
8192 return 0;
8194 return execute_expand_omp ();
8197 }; // class pass_expand_omp
8199 } // anon namespace
8201 gimple_opt_pass *
8202 make_pass_expand_omp (gcc::context *ctxt)
8204 return new pass_expand_omp (ctxt);
8207 namespace {
8209 const pass_data pass_data_expand_omp_ssa =
8211 GIMPLE_PASS, /* type */
8212 "ompexpssa", /* name */
8213 OPTGROUP_OMP, /* optinfo_flags */
8214 TV_NONE, /* tv_id */
8215 PROP_cfg | PROP_ssa, /* properties_required */
8216 PROP_gimple_eomp, /* properties_provided */
8217 0, /* properties_destroyed */
8218 0, /* todo_flags_start */
8219 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8222 class pass_expand_omp_ssa : public gimple_opt_pass
8224 public:
8225 pass_expand_omp_ssa (gcc::context *ctxt)
8226 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8229 /* opt_pass methods: */
8230 virtual bool gate (function *fun)
8232 return !(fun->curr_properties & PROP_gimple_eomp);
8234 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8235 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8237 }; // class pass_expand_omp_ssa
8239 } // anon namespace
8241 gimple_opt_pass *
8242 make_pass_expand_omp_ssa (gcc::context *ctxt)
8244 return new pass_expand_omp_ssa (ctxt);
8247 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8248 GIMPLE_* codes. */
8250 bool
8251 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8252 int *region_idx)
8254 gimple *last = last_stmt (bb);
8255 enum gimple_code code = gimple_code (last);
8256 struct omp_region *cur_region = *region;
8257 bool fallthru = false;
8259 switch (code)
8261 case GIMPLE_OMP_PARALLEL:
8262 case GIMPLE_OMP_TASK:
8263 case GIMPLE_OMP_FOR:
8264 case GIMPLE_OMP_SINGLE:
8265 case GIMPLE_OMP_TEAMS:
8266 case GIMPLE_OMP_MASTER:
8267 case GIMPLE_OMP_TASKGROUP:
8268 case GIMPLE_OMP_CRITICAL:
8269 case GIMPLE_OMP_SECTION:
8270 case GIMPLE_OMP_GRID_BODY:
8271 cur_region = new_omp_region (bb, code, cur_region);
8272 fallthru = true;
8273 break;
8275 case GIMPLE_OMP_ORDERED:
8276 cur_region = new_omp_region (bb, code, cur_region);
8277 fallthru = true;
8278 if (omp_find_clause (gimple_omp_ordered_clauses
8279 (as_a <gomp_ordered *> (last)),
8280 OMP_CLAUSE_DEPEND))
8281 cur_region = cur_region->outer;
8282 break;
8284 case GIMPLE_OMP_TARGET:
8285 cur_region = new_omp_region (bb, code, cur_region);
8286 fallthru = true;
8287 switch (gimple_omp_target_kind (last))
8289 case GF_OMP_TARGET_KIND_REGION:
8290 case GF_OMP_TARGET_KIND_DATA:
8291 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8292 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8293 case GF_OMP_TARGET_KIND_OACC_DATA:
8294 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8295 break;
8296 case GF_OMP_TARGET_KIND_UPDATE:
8297 case GF_OMP_TARGET_KIND_ENTER_DATA:
8298 case GF_OMP_TARGET_KIND_EXIT_DATA:
8299 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8300 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8301 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8302 cur_region = cur_region->outer;
8303 break;
8304 default:
8305 gcc_unreachable ();
8307 break;
8309 case GIMPLE_OMP_SECTIONS:
8310 cur_region = new_omp_region (bb, code, cur_region);
8311 fallthru = true;
8312 break;
8314 case GIMPLE_OMP_SECTIONS_SWITCH:
8315 fallthru = false;
8316 break;
8318 case GIMPLE_OMP_ATOMIC_LOAD:
8319 case GIMPLE_OMP_ATOMIC_STORE:
8320 fallthru = true;
8321 break;
8323 case GIMPLE_OMP_RETURN:
8324 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8325 somewhere other than the next block. This will be
8326 created later. */
8327 cur_region->exit = bb;
8328 if (cur_region->type == GIMPLE_OMP_TASK)
8329 /* Add an edge corresponding to not scheduling the task
8330 immediately. */
8331 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8332 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8333 cur_region = cur_region->outer;
8334 break;
8336 case GIMPLE_OMP_CONTINUE:
8337 cur_region->cont = bb;
8338 switch (cur_region->type)
8340 case GIMPLE_OMP_FOR:
8341 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8342 succs edges as abnormal to prevent splitting
8343 them. */
8344 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8345 /* Make the loopback edge. */
8346 make_edge (bb, single_succ (cur_region->entry),
8347 EDGE_ABNORMAL);
8349 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8350 corresponds to the case that the body of the loop
8351 is not executed at all. */
8352 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8353 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8354 fallthru = false;
8355 break;
8357 case GIMPLE_OMP_SECTIONS:
8358 /* Wire up the edges into and out of the nested sections. */
8360 basic_block switch_bb = single_succ (cur_region->entry);
8362 struct omp_region *i;
8363 for (i = cur_region->inner; i ; i = i->next)
8365 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8366 make_edge (switch_bb, i->entry, 0);
8367 make_edge (i->exit, bb, EDGE_FALLTHRU);
8370 /* Make the loopback edge to the block with
8371 GIMPLE_OMP_SECTIONS_SWITCH. */
8372 make_edge (bb, switch_bb, 0);
8374 /* Make the edge from the switch to exit. */
8375 make_edge (switch_bb, bb->next_bb, 0);
8376 fallthru = false;
8378 break;
8380 case GIMPLE_OMP_TASK:
8381 fallthru = true;
8382 break;
8384 default:
8385 gcc_unreachable ();
8387 break;
8389 default:
8390 gcc_unreachable ();
8393 if (*region != cur_region)
8395 *region = cur_region;
8396 if (cur_region)
8397 *region_idx = cur_region->entry->index;
8398 else
8399 *region_idx = 0;
8402 return fallthru;
8405 #include "gt-omp-expand.h"