Eliminate source_location in favor of location_t
[official-gcc.git] / gcc / omp-expand.c
blob2361520e60b73765c0c4ed0564b9159744d4750c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "debug.h"
60 #include "stringpool.h"
61 #include "attribs.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
117 /* Return true if REGION is a combined parallel+workshare region. */
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
122 return region->is_combined_parallel;
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
139 Is lowered into:
141 # BLOCK 2 (PAR_ENTRY_BB)
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
178 return false;
180 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
182 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
183 return false;
184 if (fd.iter_type != long_integer_type_node)
185 return false;
187 /* FIXME. We give up too easily here. If any of these arguments
188 are not constants, they will likely involve variables that have
189 been mapped into fields of .omp_data_s for sharing with the child
190 function. With appropriate data flow, it would be possible to
191 see through this. */
192 if (!is_gimple_min_invariant (fd.loop.n1)
193 || !is_gimple_min_invariant (fd.loop.n2)
194 || !is_gimple_min_invariant (fd.loop.step)
195 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
196 return false;
198 return true;
201 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
202 presence (SIMD_SCHEDULE). */
204 static tree
205 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
207 if (!simd_schedule || integer_zerop (chunk_size))
208 return chunk_size;
210 poly_uint64 vf = omp_max_vf ();
211 if (known_eq (vf, 1U))
212 return chunk_size;
214 tree type = TREE_TYPE (chunk_size);
215 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
216 build_int_cst (type, vf - 1));
217 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
218 build_int_cst (type, -vf));
221 /* Collect additional arguments needed to emit a combined
222 parallel+workshare call. WS_STMT is the workshare directive being
223 expanded. */
225 static vec<tree, va_gc> *
226 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
228 tree t;
229 location_t loc = gimple_location (ws_stmt);
230 vec<tree, va_gc> *ws_args;
232 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
234 struct omp_for_data fd;
235 tree n1, n2;
237 omp_extract_for_data (for_stmt, &fd, NULL);
238 n1 = fd.loop.n1;
239 n2 = fd.loop.n2;
241 if (gimple_omp_for_combined_into_p (for_stmt))
243 tree innerc
244 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
245 OMP_CLAUSE__LOOPTEMP_);
246 gcc_assert (innerc);
247 n1 = OMP_CLAUSE_DECL (innerc);
248 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n2 = OMP_CLAUSE_DECL (innerc);
254 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
256 t = fold_convert_loc (loc, long_integer_type_node, n1);
257 ws_args->quick_push (t);
259 t = fold_convert_loc (loc, long_integer_type_node, n2);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
263 ws_args->quick_push (t);
265 if (fd.chunk_size)
267 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
268 t = omp_adjust_chunk_size (t, fd.simd_schedule);
269 ws_args->quick_push (t);
272 return ws_args;
274 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
276 /* Number of sections is equal to the number of edges from the
277 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
278 the exit of the sections region. */
279 basic_block bb = single_succ (gimple_bb (ws_stmt));
280 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
281 vec_alloc (ws_args, 1);
282 ws_args->quick_push (t);
283 return ws_args;
286 gcc_unreachable ();
289 /* Discover whether REGION is a combined parallel+workshare region. */
291 static void
292 determine_parallel_type (struct omp_region *region)
294 basic_block par_entry_bb, par_exit_bb;
295 basic_block ws_entry_bb, ws_exit_bb;
297 if (region == NULL || region->inner == NULL
298 || region->exit == NULL || region->inner->exit == NULL
299 || region->inner->cont == NULL)
300 return;
302 /* We only support parallel+for and parallel+sections. */
303 if (region->type != GIMPLE_OMP_PARALLEL
304 || (region->inner->type != GIMPLE_OMP_FOR
305 && region->inner->type != GIMPLE_OMP_SECTIONS))
306 return;
308 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
309 WS_EXIT_BB -> PAR_EXIT_BB. */
310 par_entry_bb = region->entry;
311 par_exit_bb = region->exit;
312 ws_entry_bb = region->inner->entry;
313 ws_exit_bb = region->inner->exit;
315 /* Give up for task reductions on the parallel, while it is implementable,
316 adding another big set of APIs or slowing down the normal paths is
317 not acceptable. */
318 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
319 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
320 return;
322 if (single_succ (par_entry_bb) == ws_entry_bb
323 && single_succ (ws_exit_bb) == par_exit_bb
324 && workshare_safe_to_combine_p (ws_entry_bb)
325 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
326 || (last_and_only_stmt (ws_entry_bb)
327 && last_and_only_stmt (par_exit_bb))))
329 gimple *par_stmt = last_stmt (par_entry_bb);
330 gimple *ws_stmt = last_stmt (ws_entry_bb);
332 if (region->inner->type == GIMPLE_OMP_FOR)
334 /* If this is a combined parallel loop, we need to determine
335 whether or not to use the combined library calls. There
336 are two cases where we do not apply the transformation:
337 static loops and any kind of ordered loop. In the first
338 case, we already open code the loop so there is no need
339 to do anything else. In the latter case, the combined
340 parallel loop call would still need extra synchronization
341 to implement ordered semantics, so there would not be any
342 gain in using the combined call. */
343 tree clauses = gimple_omp_for_clauses (ws_stmt);
344 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
345 if (c == NULL
346 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
347 == OMP_CLAUSE_SCHEDULE_STATIC)
348 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
349 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
350 return;
352 else if (region->inner->type == GIMPLE_OMP_SECTIONS
353 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
354 OMP_CLAUSE__REDUCTEMP_))
355 return;
357 region->is_combined_parallel = true;
358 region->inner->is_combined_parallel = true;
359 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
363 /* Debugging dumps for parallel regions. */
364 void dump_omp_region (FILE *, struct omp_region *, int);
365 void debug_omp_region (struct omp_region *);
366 void debug_all_omp_regions (void);
368 /* Dump the parallel region tree rooted at REGION. */
370 void
371 dump_omp_region (FILE *file, struct omp_region *region, int indent)
373 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
374 gimple_code_name[region->type]);
376 if (region->inner)
377 dump_omp_region (file, region->inner, indent + 4);
379 if (region->cont)
381 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
382 region->cont->index);
385 if (region->exit)
386 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
387 region->exit->index);
388 else
389 fprintf (file, "%*s[no exit marker]\n", indent, "");
391 if (region->next)
392 dump_omp_region (file, region->next, indent);
395 DEBUG_FUNCTION void
396 debug_omp_region (struct omp_region *region)
398 dump_omp_region (stderr, region, 0);
401 DEBUG_FUNCTION void
402 debug_all_omp_regions (void)
404 dump_omp_region (stderr, root_omp_region, 0);
407 /* Create a new parallel region starting at STMT inside region PARENT. */
409 static struct omp_region *
410 new_omp_region (basic_block bb, enum gimple_code type,
411 struct omp_region *parent)
413 struct omp_region *region = XCNEW (struct omp_region);
415 region->outer = parent;
416 region->entry = bb;
417 region->type = type;
419 if (parent)
421 /* This is a nested region. Add it to the list of inner
422 regions in PARENT. */
423 region->next = parent->inner;
424 parent->inner = region;
426 else
428 /* This is a toplevel region. Add it to the list of toplevel
429 regions in ROOT_OMP_REGION. */
430 region->next = root_omp_region;
431 root_omp_region = region;
434 return region;
437 /* Release the memory associated with the region tree rooted at REGION. */
439 static void
440 free_omp_region_1 (struct omp_region *region)
442 struct omp_region *i, *n;
444 for (i = region->inner; i ; i = n)
446 n = i->next;
447 free_omp_region_1 (i);
450 free (region);
453 /* Release the memory for the entire omp region tree. */
455 void
456 omp_free_regions (void)
458 struct omp_region *r, *n;
459 for (r = root_omp_region; r ; r = n)
461 n = r->next;
462 free_omp_region_1 (r);
464 root_omp_region = NULL;
467 /* A convenience function to build an empty GIMPLE_COND with just the
468 condition. */
470 static gcond *
471 gimple_build_cond_empty (tree cond)
473 enum tree_code pred_code;
474 tree lhs, rhs;
476 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
477 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
480 /* Return true if a parallel REGION is within a declare target function or
481 within a target region and is not a part of a gridified target. */
483 static bool
484 parallel_needs_hsa_kernel_p (struct omp_region *region)
486 bool indirect = false;
487 for (region = region->outer; region; region = region->outer)
489 if (region->type == GIMPLE_OMP_PARALLEL)
490 indirect = true;
491 else if (region->type == GIMPLE_OMP_TARGET)
493 gomp_target *tgt_stmt
494 = as_a <gomp_target *> (last_stmt (region->entry));
496 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
497 OMP_CLAUSE__GRIDDIM_))
498 return indirect;
499 else
500 return true;
504 if (lookup_attribute ("omp declare target",
505 DECL_ATTRIBUTES (current_function_decl)))
506 return true;
508 return false;
511 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
512 Add CHILD_FNDECL to decl chain of the supercontext of the block
513 ENTRY_BLOCK - this is the block which originally contained the
514 code from which CHILD_FNDECL was created.
516 Together, these actions ensure that the debug info for the outlined
517 function will be emitted with the correct lexical scope. */
519 static void
520 adjust_context_and_scope (tree entry_block, tree child_fndecl)
522 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
524 tree b = BLOCK_SUPERCONTEXT (entry_block);
526 if (TREE_CODE (b) == BLOCK)
528 tree parent_fndecl;
530 /* Follow supercontext chain until the parent fndecl
531 is found. */
532 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
533 TREE_CODE (parent_fndecl) == BLOCK;
534 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
537 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
539 DECL_CONTEXT (child_fndecl) = parent_fndecl;
541 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
542 BLOCK_VARS (b) = child_fndecl;
547 /* Build the function calls to GOMP_parallel etc to actually
548 generate the parallel operation. REGION is the parallel region
549 being expanded. BB is the block where to insert the code. WS_ARGS
550 will be set if this is a call to a combined parallel+workshare
551 construct, it contains the list of additional arguments needed by
552 the workshare construct. */
554 static void
555 expand_parallel_call (struct omp_region *region, basic_block bb,
556 gomp_parallel *entry_stmt,
557 vec<tree, va_gc> *ws_args)
559 tree t, t1, t2, val, cond, c, clauses, flags;
560 gimple_stmt_iterator gsi;
561 gimple *stmt;
562 enum built_in_function start_ix;
563 int start_ix2;
564 location_t clause_loc;
565 vec<tree, va_gc> *args;
567 clauses = gimple_omp_parallel_clauses (entry_stmt);
569 /* Determine what flavor of GOMP_parallel we will be
570 emitting. */
571 start_ix = BUILT_IN_GOMP_PARALLEL;
572 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
573 if (rtmp)
574 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
575 else if (is_combined_parallel (region))
577 switch (region->inner->type)
579 case GIMPLE_OMP_FOR:
580 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
581 switch (region->inner->sched_kind)
583 case OMP_CLAUSE_SCHEDULE_RUNTIME:
584 if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
586 start_ix2 = 6;
587 else if ((region->inner->sched_modifiers
588 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
589 start_ix2 = 7;
590 else
591 start_ix2 = 3;
592 break;
593 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
594 case OMP_CLAUSE_SCHEDULE_GUIDED:
595 if ((region->inner->sched_modifiers
596 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
598 start_ix2 = 3 + region->inner->sched_kind;
599 break;
601 /* FALLTHRU */
602 default:
603 start_ix2 = region->inner->sched_kind;
604 break;
606 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
607 start_ix = (enum built_in_function) start_ix2;
608 break;
609 case GIMPLE_OMP_SECTIONS:
610 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
611 break;
612 default:
613 gcc_unreachable ();
617 /* By default, the value of NUM_THREADS is zero (selected at run time)
618 and there is no conditional. */
619 cond = NULL_TREE;
620 val = build_int_cst (unsigned_type_node, 0);
621 flags = build_int_cst (unsigned_type_node, 0);
623 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
624 if (c)
625 cond = OMP_CLAUSE_IF_EXPR (c);
627 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
628 if (c)
630 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
631 clause_loc = OMP_CLAUSE_LOCATION (c);
633 else
634 clause_loc = gimple_location (entry_stmt);
636 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
637 if (c)
638 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
640 /* Ensure 'val' is of the correct type. */
641 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
643 /* If we found the clause 'if (cond)', build either
644 (cond != 0) or (cond ? val : 1u). */
645 if (cond)
647 cond = gimple_boolify (cond);
649 if (integer_zerop (val))
650 val = fold_build2_loc (clause_loc,
651 EQ_EXPR, unsigned_type_node, cond,
652 build_int_cst (TREE_TYPE (cond), 0));
653 else
655 basic_block cond_bb, then_bb, else_bb;
656 edge e, e_then, e_else;
657 tree tmp_then, tmp_else, tmp_join, tmp_var;
659 tmp_var = create_tmp_var (TREE_TYPE (val));
660 if (gimple_in_ssa_p (cfun))
662 tmp_then = make_ssa_name (tmp_var);
663 tmp_else = make_ssa_name (tmp_var);
664 tmp_join = make_ssa_name (tmp_var);
666 else
668 tmp_then = tmp_var;
669 tmp_else = tmp_var;
670 tmp_join = tmp_var;
673 e = split_block_after_labels (bb);
674 cond_bb = e->src;
675 bb = e->dest;
676 remove_edge (e);
678 then_bb = create_empty_bb (cond_bb);
679 else_bb = create_empty_bb (then_bb);
680 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
681 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
683 stmt = gimple_build_cond_empty (cond);
684 gsi = gsi_start_bb (cond_bb);
685 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
687 gsi = gsi_start_bb (then_bb);
688 expand_omp_build_assign (&gsi, tmp_then, val, true);
690 gsi = gsi_start_bb (else_bb);
691 expand_omp_build_assign (&gsi, tmp_else,
692 build_int_cst (unsigned_type_node, 1),
693 true);
695 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
696 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
697 add_bb_to_loop (then_bb, cond_bb->loop_father);
698 add_bb_to_loop (else_bb, cond_bb->loop_father);
699 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
700 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
702 if (gimple_in_ssa_p (cfun))
704 gphi *phi = create_phi_node (tmp_join, bb);
705 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
706 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
709 val = tmp_join;
712 gsi = gsi_start_bb (bb);
713 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
714 false, GSI_CONTINUE_LINKING);
717 gsi = gsi_last_nondebug_bb (bb);
718 t = gimple_omp_parallel_data_arg (entry_stmt);
719 if (t == NULL)
720 t1 = null_pointer_node;
721 else
722 t1 = build_fold_addr_expr (t);
723 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
724 t2 = build_fold_addr_expr (child_fndecl);
726 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
728 vec_alloc (args, 4 + vec_safe_length (ws_args));
729 args->quick_push (t2);
730 args->quick_push (t1);
731 args->quick_push (val);
732 if (ws_args)
733 args->splice (*ws_args);
734 args->quick_push (flags);
736 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
737 builtin_decl_explicit (start_ix), args);
739 if (rtmp)
741 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
742 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
743 fold_convert (type,
744 fold_convert (pointer_sized_int_node, t)));
746 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
747 false, GSI_CONTINUE_LINKING);
749 if (hsa_gen_requested_p ()
750 && parallel_needs_hsa_kernel_p (region))
752 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
753 hsa_register_kernel (child_cnode);
757 /* Build the function call to GOMP_task to actually
758 generate the task operation. BB is the block where to insert the code. */
760 static void
761 expand_task_call (struct omp_region *region, basic_block bb,
762 gomp_task *entry_stmt)
764 tree t1, t2, t3;
765 gimple_stmt_iterator gsi;
766 location_t loc = gimple_location (entry_stmt);
768 tree clauses = gimple_omp_task_clauses (entry_stmt);
770 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
771 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
772 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
773 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
774 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
775 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
777 unsigned int iflags
778 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
779 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
780 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
782 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
783 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
784 tree num_tasks = NULL_TREE;
785 bool ull = false;
786 if (taskloop_p)
788 gimple *g = last_stmt (region->outer->entry);
789 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
790 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
791 struct omp_for_data fd;
792 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
793 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
794 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
795 OMP_CLAUSE__LOOPTEMP_);
796 startvar = OMP_CLAUSE_DECL (startvar);
797 endvar = OMP_CLAUSE_DECL (endvar);
798 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
799 if (fd.loop.cond_code == LT_EXPR)
800 iflags |= GOMP_TASK_FLAG_UP;
801 tree tclauses = gimple_omp_for_clauses (g);
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
803 if (num_tasks)
804 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
805 else
807 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
808 if (num_tasks)
810 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
811 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
813 else
814 num_tasks = integer_zero_node;
816 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
817 if (ifc == NULL_TREE)
818 iflags |= GOMP_TASK_FLAG_IF;
819 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
820 iflags |= GOMP_TASK_FLAG_NOGROUP;
821 ull = fd.iter_type == long_long_unsigned_type_node;
822 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
823 iflags |= GOMP_TASK_FLAG_REDUCTION;
825 else if (priority)
826 iflags |= GOMP_TASK_FLAG_PRIORITY;
828 tree flags = build_int_cst (unsigned_type_node, iflags);
830 tree cond = boolean_true_node;
831 if (ifc)
833 if (taskloop_p)
835 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
836 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
837 build_int_cst (unsigned_type_node,
838 GOMP_TASK_FLAG_IF),
839 build_int_cst (unsigned_type_node, 0));
840 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
841 flags, t);
843 else
844 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847 if (finalc)
849 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
850 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
851 build_int_cst (unsigned_type_node,
852 GOMP_TASK_FLAG_FINAL),
853 build_int_cst (unsigned_type_node, 0));
854 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
856 if (depend)
857 depend = OMP_CLAUSE_DECL (depend);
858 else
859 depend = build_int_cst (ptr_type_node, 0);
860 if (priority)
861 priority = fold_convert (integer_type_node,
862 OMP_CLAUSE_PRIORITY_EXPR (priority));
863 else
864 priority = integer_zero_node;
866 gsi = gsi_last_nondebug_bb (bb);
867 tree t = gimple_omp_task_data_arg (entry_stmt);
868 if (t == NULL)
869 t2 = null_pointer_node;
870 else
871 t2 = build_fold_addr_expr_loc (loc, t);
872 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
873 t = gimple_omp_task_copy_fn (entry_stmt);
874 if (t == NULL)
875 t3 = null_pointer_node;
876 else
877 t3 = build_fold_addr_expr_loc (loc, t);
879 if (taskloop_p)
880 t = build_call_expr (ull
881 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
882 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
883 11, t1, t2, t3,
884 gimple_omp_task_arg_size (entry_stmt),
885 gimple_omp_task_arg_align (entry_stmt), flags,
886 num_tasks, priority, startvar, endvar, step);
887 else
888 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
889 9, t1, t2, t3,
890 gimple_omp_task_arg_size (entry_stmt),
891 gimple_omp_task_arg_align (entry_stmt), cond, flags,
892 depend, priority);
894 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
895 false, GSI_CONTINUE_LINKING);
898 /* Build the function call to GOMP_taskwait_depend to actually
899 generate the taskwait operation. BB is the block where to insert the
900 code. */
902 static void
903 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
905 tree clauses = gimple_omp_task_clauses (entry_stmt);
906 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
907 if (depend == NULL_TREE)
908 return;
910 depend = OMP_CLAUSE_DECL (depend);
912 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
913 tree t
914 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
915 1, depend);
917 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
918 false, GSI_CONTINUE_LINKING);
921 /* Build the function call to GOMP_teams_reg to actually
922 generate the host teams operation. REGION is the teams region
923 being expanded. BB is the block where to insert the code. */
925 static void
926 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
928 tree clauses = gimple_omp_teams_clauses (entry_stmt);
929 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
930 if (num_teams == NULL_TREE)
931 num_teams = build_int_cst (unsigned_type_node, 0);
932 else
934 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
935 num_teams = fold_convert (unsigned_type_node, num_teams);
937 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
938 if (thread_limit == NULL_TREE)
939 thread_limit = build_int_cst (unsigned_type_node, 0);
940 else
942 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
943 thread_limit = fold_convert (unsigned_type_node, thread_limit);
946 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
947 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
948 if (t == NULL)
949 t1 = null_pointer_node;
950 else
951 t1 = build_fold_addr_expr (t);
952 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
953 tree t2 = build_fold_addr_expr (child_fndecl);
955 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
957 vec<tree, va_gc> *args;
958 vec_alloc (args, 5);
959 args->quick_push (t2);
960 args->quick_push (t1);
961 args->quick_push (num_teams);
962 args->quick_push (thread_limit);
963 /* For future extensibility. */
964 args->quick_push (build_zero_cst (unsigned_type_node));
966 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
967 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
968 args);
970 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
971 false, GSI_CONTINUE_LINKING);
974 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
976 static tree
977 vec2chain (vec<tree, va_gc> *v)
979 tree chain = NULL_TREE, t;
980 unsigned ix;
982 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
984 DECL_CHAIN (t) = chain;
985 chain = t;
988 return chain;
991 /* Remove barriers in REGION->EXIT's block. Note that this is only
992 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
993 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
994 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
995 removed. */
997 static void
998 remove_exit_barrier (struct omp_region *region)
1000 gimple_stmt_iterator gsi;
1001 basic_block exit_bb;
1002 edge_iterator ei;
1003 edge e;
1004 gimple *stmt;
1005 int any_addressable_vars = -1;
1007 exit_bb = region->exit;
1009 /* If the parallel region doesn't return, we don't have REGION->EXIT
1010 block at all. */
1011 if (! exit_bb)
1012 return;
1014 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1015 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1016 statements that can appear in between are extremely limited -- no
1017 memory operations at all. Here, we allow nothing at all, so the
1018 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1019 gsi = gsi_last_nondebug_bb (exit_bb);
1020 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1021 gsi_prev_nondebug (&gsi);
1022 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1023 return;
1025 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1027 gsi = gsi_last_nondebug_bb (e->src);
1028 if (gsi_end_p (gsi))
1029 continue;
1030 stmt = gsi_stmt (gsi);
1031 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1032 && !gimple_omp_return_nowait_p (stmt))
1034 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1035 in many cases. If there could be tasks queued, the barrier
1036 might be needed to let the tasks run before some local
1037 variable of the parallel that the task uses as shared
1038 runs out of scope. The task can be spawned either
1039 from within current function (this would be easy to check)
1040 or from some function it calls and gets passed an address
1041 of such a variable. */
1042 if (any_addressable_vars < 0)
1044 gomp_parallel *parallel_stmt
1045 = as_a <gomp_parallel *> (last_stmt (region->entry));
1046 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1047 tree local_decls, block, decl;
1048 unsigned ix;
1050 any_addressable_vars = 0;
1051 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1052 if (TREE_ADDRESSABLE (decl))
1054 any_addressable_vars = 1;
1055 break;
1057 for (block = gimple_block (stmt);
1058 !any_addressable_vars
1059 && block
1060 && TREE_CODE (block) == BLOCK;
1061 block = BLOCK_SUPERCONTEXT (block))
1063 for (local_decls = BLOCK_VARS (block);
1064 local_decls;
1065 local_decls = DECL_CHAIN (local_decls))
1066 if (TREE_ADDRESSABLE (local_decls))
1068 any_addressable_vars = 1;
1069 break;
1071 if (block == gimple_block (parallel_stmt))
1072 break;
1075 if (!any_addressable_vars)
1076 gimple_omp_return_set_nowait (stmt);
1081 static void
1082 remove_exit_barriers (struct omp_region *region)
1084 if (region->type == GIMPLE_OMP_PARALLEL)
1085 remove_exit_barrier (region);
1087 if (region->inner)
1089 region = region->inner;
1090 remove_exit_barriers (region);
1091 while (region->next)
1093 region = region->next;
1094 remove_exit_barriers (region);
1099 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1100 calls. These can't be declared as const functions, but
1101 within one parallel body they are constant, so they can be
1102 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1103 which are declared const. Similarly for task body, except
1104 that in untied task omp_get_thread_num () can change at any task
1105 scheduling point. */
1107 static void
1108 optimize_omp_library_calls (gimple *entry_stmt)
1110 basic_block bb;
1111 gimple_stmt_iterator gsi;
1112 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1113 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1114 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1115 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1116 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1117 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1118 OMP_CLAUSE_UNTIED) != NULL);
1120 FOR_EACH_BB_FN (bb, cfun)
1121 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1123 gimple *call = gsi_stmt (gsi);
1124 tree decl;
1126 if (is_gimple_call (call)
1127 && (decl = gimple_call_fndecl (call))
1128 && DECL_EXTERNAL (decl)
1129 && TREE_PUBLIC (decl)
1130 && DECL_INITIAL (decl) == NULL)
1132 tree built_in;
1134 if (DECL_NAME (decl) == thr_num_id)
1136 /* In #pragma omp task untied omp_get_thread_num () can change
1137 during the execution of the task region. */
1138 if (untied_task)
1139 continue;
1140 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1142 else if (DECL_NAME (decl) == num_thr_id)
1143 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1144 else
1145 continue;
1147 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1148 || gimple_call_num_args (call) != 0)
1149 continue;
1151 if (flag_exceptions && !TREE_NOTHROW (decl))
1152 continue;
1154 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1155 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1156 TREE_TYPE (TREE_TYPE (built_in))))
1157 continue;
1159 gimple_call_set_fndecl (call, built_in);
1164 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1165 regimplified. */
1167 static tree
1168 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1170 tree t = *tp;
1172 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1173 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1174 return t;
1176 if (TREE_CODE (t) == ADDR_EXPR)
1177 recompute_tree_invariant_for_addr_expr (t);
1179 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1180 return NULL_TREE;
1183 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1185 static void
1186 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1187 bool after)
1189 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1190 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1191 !after, after ? GSI_CONTINUE_LINKING
1192 : GSI_SAME_STMT);
1193 gimple *stmt = gimple_build_assign (to, from);
1194 if (after)
1195 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1196 else
1197 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1198 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1199 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1201 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1202 gimple_regimplify_operands (stmt, &gsi);
1206 /* Expand the OpenMP parallel or task directive starting at REGION. */
1208 static void
1209 expand_omp_taskreg (struct omp_region *region)
1211 basic_block entry_bb, exit_bb, new_bb;
1212 struct function *child_cfun;
1213 tree child_fn, block, t;
1214 gimple_stmt_iterator gsi;
1215 gimple *entry_stmt, *stmt;
1216 edge e;
1217 vec<tree, va_gc> *ws_args;
1219 entry_stmt = last_stmt (region->entry);
1220 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1221 && gimple_omp_task_taskwait_p (entry_stmt))
1223 new_bb = region->entry;
1224 gsi = gsi_last_nondebug_bb (region->entry);
1225 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1226 gsi_remove (&gsi, true);
1227 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1228 return;
1231 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1232 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1234 entry_bb = region->entry;
1235 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1236 exit_bb = region->cont;
1237 else
1238 exit_bb = region->exit;
1240 if (is_combined_parallel (region))
1241 ws_args = region->ws_args;
1242 else
1243 ws_args = NULL;
1245 if (child_cfun->cfg)
1247 /* Due to inlining, it may happen that we have already outlined
1248 the region, in which case all we need to do is make the
1249 sub-graph unreachable and emit the parallel call. */
1250 edge entry_succ_e, exit_succ_e;
1252 entry_succ_e = single_succ_edge (entry_bb);
1254 gsi = gsi_last_nondebug_bb (entry_bb);
1255 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1256 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1257 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1258 gsi_remove (&gsi, true);
1260 new_bb = entry_bb;
1261 if (exit_bb)
1263 exit_succ_e = single_succ_edge (exit_bb);
1264 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1266 remove_edge_and_dominated_blocks (entry_succ_e);
1268 else
1270 unsigned srcidx, dstidx, num;
1272 /* If the parallel region needs data sent from the parent
1273 function, then the very first statement (except possible
1274 tree profile counter updates) of the parallel body
1275 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1276 &.OMP_DATA_O is passed as an argument to the child function,
1277 we need to replace it with the argument as seen by the child
1278 function.
1280 In most cases, this will end up being the identity assignment
1281 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1282 a function call that has been inlined, the original PARM_DECL
1283 .OMP_DATA_I may have been converted into a different local
1284 variable. In which case, we need to keep the assignment. */
1285 if (gimple_omp_taskreg_data_arg (entry_stmt))
1287 basic_block entry_succ_bb
1288 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1289 : FALLTHRU_EDGE (entry_bb)->dest;
1290 tree arg;
1291 gimple *parcopy_stmt = NULL;
1293 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1295 gimple *stmt;
1297 gcc_assert (!gsi_end_p (gsi));
1298 stmt = gsi_stmt (gsi);
1299 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1300 continue;
1302 if (gimple_num_ops (stmt) == 2)
1304 tree arg = gimple_assign_rhs1 (stmt);
1306 /* We're ignore the subcode because we're
1307 effectively doing a STRIP_NOPS. */
1309 if (TREE_CODE (arg) == ADDR_EXPR
1310 && (TREE_OPERAND (arg, 0)
1311 == gimple_omp_taskreg_data_arg (entry_stmt)))
1313 parcopy_stmt = stmt;
1314 break;
1319 gcc_assert (parcopy_stmt != NULL);
1320 arg = DECL_ARGUMENTS (child_fn);
1322 if (!gimple_in_ssa_p (cfun))
1324 if (gimple_assign_lhs (parcopy_stmt) == arg)
1325 gsi_remove (&gsi, true);
1326 else
1328 /* ?? Is setting the subcode really necessary ?? */
1329 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1333 else
1335 tree lhs = gimple_assign_lhs (parcopy_stmt);
1336 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1337 /* We'd like to set the rhs to the default def in the child_fn,
1338 but it's too early to create ssa names in the child_fn.
1339 Instead, we set the rhs to the parm. In
1340 move_sese_region_to_fn, we introduce a default def for the
1341 parm, map the parm to it's default def, and once we encounter
1342 this stmt, replace the parm with the default def. */
1343 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1344 update_stmt (parcopy_stmt);
1348 /* Declare local variables needed in CHILD_CFUN. */
1349 block = DECL_INITIAL (child_fn);
1350 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1351 /* The gimplifier could record temporaries in parallel/task block
1352 rather than in containing function's local_decls chain,
1353 which would mean cgraph missed finalizing them. Do it now. */
1354 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1355 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1356 varpool_node::finalize_decl (t);
1357 DECL_SAVED_TREE (child_fn) = NULL;
1358 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1359 gimple_set_body (child_fn, NULL);
1360 TREE_USED (block) = 1;
1362 /* Reset DECL_CONTEXT on function arguments. */
1363 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1364 DECL_CONTEXT (t) = child_fn;
1366 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1367 so that it can be moved to the child function. */
1368 gsi = gsi_last_nondebug_bb (entry_bb);
1369 stmt = gsi_stmt (gsi);
1370 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1371 || gimple_code (stmt) == GIMPLE_OMP_TASK
1372 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1373 e = split_block (entry_bb, stmt);
1374 gsi_remove (&gsi, true);
1375 entry_bb = e->dest;
1376 edge e2 = NULL;
1377 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1378 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1379 else
1381 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1382 gcc_assert (e2->dest == region->exit);
1383 remove_edge (BRANCH_EDGE (entry_bb));
1384 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1385 gsi = gsi_last_nondebug_bb (region->exit);
1386 gcc_assert (!gsi_end_p (gsi)
1387 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1388 gsi_remove (&gsi, true);
1391 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1392 if (exit_bb)
1394 gsi = gsi_last_nondebug_bb (exit_bb);
1395 gcc_assert (!gsi_end_p (gsi)
1396 && (gimple_code (gsi_stmt (gsi))
1397 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1398 stmt = gimple_build_return (NULL);
1399 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1400 gsi_remove (&gsi, true);
1403 /* Move the parallel region into CHILD_CFUN. */
1405 if (gimple_in_ssa_p (cfun))
1407 init_tree_ssa (child_cfun);
1408 init_ssa_operands (child_cfun);
1409 child_cfun->gimple_df->in_ssa_p = true;
1410 block = NULL_TREE;
1412 else
1413 block = gimple_block (entry_stmt);
1415 /* Make sure to generate early debug for the function before
1416 outlining anything. */
1417 if (! gimple_in_ssa_p (cfun))
1418 (*debug_hooks->early_global_decl) (cfun->decl);
1420 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1421 if (exit_bb)
1422 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1423 if (e2)
1425 basic_block dest_bb = e2->dest;
1426 if (!exit_bb)
1427 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1428 remove_edge (e2);
1429 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1431 /* When the OMP expansion process cannot guarantee an up-to-date
1432 loop tree arrange for the child function to fixup loops. */
1433 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1434 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1436 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1437 num = vec_safe_length (child_cfun->local_decls);
1438 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1440 t = (*child_cfun->local_decls)[srcidx];
1441 if (DECL_CONTEXT (t) == cfun->decl)
1442 continue;
1443 if (srcidx != dstidx)
1444 (*child_cfun->local_decls)[dstidx] = t;
1445 dstidx++;
1447 if (dstidx != num)
1448 vec_safe_truncate (child_cfun->local_decls, dstidx);
1450 /* Inform the callgraph about the new function. */
1451 child_cfun->curr_properties = cfun->curr_properties;
1452 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1453 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1454 cgraph_node *node = cgraph_node::get_create (child_fn);
1455 node->parallelized_function = 1;
1456 cgraph_node::add_new_function (child_fn, true);
1458 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1459 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1461 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1462 fixed in a following pass. */
1463 push_cfun (child_cfun);
1464 if (need_asm)
1465 assign_assembler_name_if_needed (child_fn);
1467 if (optimize)
1468 optimize_omp_library_calls (entry_stmt);
1469 update_max_bb_count ();
1470 cgraph_edge::rebuild_edges ();
1472 /* Some EH regions might become dead, see PR34608. If
1473 pass_cleanup_cfg isn't the first pass to happen with the
1474 new child, these dead EH edges might cause problems.
1475 Clean them up now. */
1476 if (flag_exceptions)
1478 basic_block bb;
1479 bool changed = false;
1481 FOR_EACH_BB_FN (bb, cfun)
1482 changed |= gimple_purge_dead_eh_edges (bb);
1483 if (changed)
1484 cleanup_tree_cfg ();
1486 if (gimple_in_ssa_p (cfun))
1487 update_ssa (TODO_update_ssa);
1488 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1489 verify_loop_structure ();
1490 pop_cfun ();
1492 if (dump_file && !gimple_in_ssa_p (cfun))
1494 omp_any_child_fn_dumped = true;
1495 dump_function_header (dump_file, child_fn, dump_flags);
1496 dump_function_to_file (child_fn, dump_file, dump_flags);
1500 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1501 expand_parallel_call (region, new_bb,
1502 as_a <gomp_parallel *> (entry_stmt), ws_args);
1503 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1504 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1505 else
1506 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1507 if (gimple_in_ssa_p (cfun))
1508 update_ssa (TODO_update_ssa_only_virtuals);
1511 /* Information about members of an OpenACC collapsed loop nest. */
1513 struct oacc_collapse
1515 tree base; /* Base value. */
1516 tree iters; /* Number of steps. */
1517 tree step; /* Step size. */
1518 tree tile; /* Tile increment (if tiled). */
1519 tree outer; /* Tile iterator var. */
1522 /* Helper for expand_oacc_for. Determine collapsed loop information.
1523 Fill in COUNTS array. Emit any initialization code before GSI.
1524 Return the calculated outer loop bound of BOUND_TYPE. */
1526 static tree
1527 expand_oacc_collapse_init (const struct omp_for_data *fd,
1528 gimple_stmt_iterator *gsi,
1529 oacc_collapse *counts, tree bound_type,
1530 location_t loc)
1532 tree tiling = fd->tiling;
1533 tree total = build_int_cst (bound_type, 1);
1534 int ix;
1536 gcc_assert (integer_onep (fd->loop.step));
1537 gcc_assert (integer_zerop (fd->loop.n1));
1539 /* When tiling, the first operand of the tile clause applies to the
1540 innermost loop, and we work outwards from there. Seems
1541 backwards, but whatever. */
1542 for (ix = fd->collapse; ix--;)
1544 const omp_for_data_loop *loop = &fd->loops[ix];
1546 tree iter_type = TREE_TYPE (loop->v);
1547 tree diff_type = iter_type;
1548 tree plus_type = iter_type;
1550 gcc_assert (loop->cond_code == fd->loop.cond_code);
1552 if (POINTER_TYPE_P (iter_type))
1553 plus_type = sizetype;
1554 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1555 diff_type = signed_type_for (diff_type);
1556 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1557 diff_type = integer_type_node;
1559 if (tiling)
1561 tree num = build_int_cst (integer_type_node, fd->collapse);
1562 tree loop_no = build_int_cst (integer_type_node, ix);
1563 tree tile = TREE_VALUE (tiling);
1564 gcall *call
1565 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1566 /* gwv-outer=*/integer_zero_node,
1567 /* gwv-inner=*/integer_zero_node);
1569 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1570 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1571 gimple_call_set_lhs (call, counts[ix].tile);
1572 gimple_set_location (call, loc);
1573 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1575 tiling = TREE_CHAIN (tiling);
1577 else
1579 counts[ix].tile = NULL;
1580 counts[ix].outer = loop->v;
1583 tree b = loop->n1;
1584 tree e = loop->n2;
1585 tree s = loop->step;
1586 bool up = loop->cond_code == LT_EXPR;
1587 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1588 bool negating;
1589 tree expr;
1591 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1592 true, GSI_SAME_STMT);
1593 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1594 true, GSI_SAME_STMT);
1596 /* Convert the step, avoiding possible unsigned->signed overflow. */
1597 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1598 if (negating)
1599 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1600 s = fold_convert (diff_type, s);
1601 if (negating)
1602 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1603 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1604 true, GSI_SAME_STMT);
1606 /* Determine the range, avoiding possible unsigned->signed overflow. */
1607 negating = !up && TYPE_UNSIGNED (iter_type);
1608 expr = fold_build2 (MINUS_EXPR, plus_type,
1609 fold_convert (plus_type, negating ? b : e),
1610 fold_convert (plus_type, negating ? e : b));
1611 expr = fold_convert (diff_type, expr);
1612 if (negating)
1613 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1614 tree range = force_gimple_operand_gsi
1615 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1617 /* Determine number of iterations. */
1618 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1619 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1620 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1622 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1623 true, GSI_SAME_STMT);
1625 counts[ix].base = b;
1626 counts[ix].iters = iters;
1627 counts[ix].step = s;
1629 total = fold_build2 (MULT_EXPR, bound_type, total,
1630 fold_convert (bound_type, iters));
1633 return total;
1636 /* Emit initializers for collapsed loop members. INNER is true if
1637 this is for the element loop of a TILE. IVAR is the outer
1638 loop iteration variable, from which collapsed loop iteration values
1639 are calculated. COUNTS array has been initialized by
1640 expand_oacc_collapse_inits. */
1642 static void
1643 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1644 gimple_stmt_iterator *gsi,
1645 const oacc_collapse *counts, tree ivar)
1647 tree ivar_type = TREE_TYPE (ivar);
1649 /* The most rapidly changing iteration variable is the innermost
1650 one. */
1651 for (int ix = fd->collapse; ix--;)
1653 const omp_for_data_loop *loop = &fd->loops[ix];
1654 const oacc_collapse *collapse = &counts[ix];
1655 tree v = inner ? loop->v : collapse->outer;
1656 tree iter_type = TREE_TYPE (v);
1657 tree diff_type = TREE_TYPE (collapse->step);
1658 tree plus_type = iter_type;
1659 enum tree_code plus_code = PLUS_EXPR;
1660 tree expr;
1662 if (POINTER_TYPE_P (iter_type))
1664 plus_code = POINTER_PLUS_EXPR;
1665 plus_type = sizetype;
1668 expr = ivar;
1669 if (ix)
1671 tree mod = fold_convert (ivar_type, collapse->iters);
1672 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1673 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1674 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1675 true, GSI_SAME_STMT);
1678 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1679 collapse->step);
1680 expr = fold_build2 (plus_code, iter_type,
1681 inner ? collapse->outer : collapse->base,
1682 fold_convert (plus_type, expr));
1683 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1684 true, GSI_SAME_STMT);
1685 gassign *ass = gimple_build_assign (v, expr);
1686 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1690 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1691 of the combined collapse > 1 loop constructs, generate code like:
1692 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1693 if (cond3 is <)
1694 adj = STEP3 - 1;
1695 else
1696 adj = STEP3 + 1;
1697 count3 = (adj + N32 - N31) / STEP3;
1698 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1699 if (cond2 is <)
1700 adj = STEP2 - 1;
1701 else
1702 adj = STEP2 + 1;
1703 count2 = (adj + N22 - N21) / STEP2;
1704 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1705 if (cond1 is <)
1706 adj = STEP1 - 1;
1707 else
1708 adj = STEP1 + 1;
1709 count1 = (adj + N12 - N11) / STEP1;
1710 count = count1 * count2 * count3;
1711 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1712 count = 0;
1713 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1714 of the combined loop constructs, just initialize COUNTS array
1715 from the _looptemp_ clauses. */
1717 /* NOTE: It *could* be better to moosh all of the BBs together,
1718 creating one larger BB with all the computation and the unexpected
1719 jump at the end. I.e.
1721 bool zero3, zero2, zero1, zero;
1723 zero3 = N32 c3 N31;
1724 count3 = (N32 - N31) /[cl] STEP3;
1725 zero2 = N22 c2 N21;
1726 count2 = (N22 - N21) /[cl] STEP2;
1727 zero1 = N12 c1 N11;
1728 count1 = (N12 - N11) /[cl] STEP1;
1729 zero = zero3 || zero2 || zero1;
1730 count = count1 * count2 * count3;
1731 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1733 After all, we expect the zero=false, and thus we expect to have to
1734 evaluate all of the comparison expressions, so short-circuiting
1735 oughtn't be a win. Since the condition isn't protecting a
1736 denominator, we're not concerned about divide-by-zero, so we can
1737 fully evaluate count even if a numerator turned out to be wrong.
1739 It seems like putting this all together would create much better
1740 scheduling opportunities, and less pressure on the chip's branch
1741 predictor. */
1743 static void
1744 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1745 basic_block &entry_bb, tree *counts,
1746 basic_block &zero_iter1_bb, int &first_zero_iter1,
1747 basic_block &zero_iter2_bb, int &first_zero_iter2,
1748 basic_block &l2_dom_bb)
1750 tree t, type = TREE_TYPE (fd->loop.v);
1751 edge e, ne;
1752 int i;
1754 /* Collapsed loops need work for expansion into SSA form. */
1755 gcc_assert (!gimple_in_ssa_p (cfun));
1757 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1758 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1760 gcc_assert (fd->ordered == 0);
1761 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1762 isn't supposed to be handled, as the inner loop doesn't
1763 use it. */
1764 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1765 OMP_CLAUSE__LOOPTEMP_);
1766 gcc_assert (innerc);
1767 for (i = 0; i < fd->collapse; i++)
1769 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1770 OMP_CLAUSE__LOOPTEMP_);
1771 gcc_assert (innerc);
1772 if (i)
1773 counts[i] = OMP_CLAUSE_DECL (innerc);
1774 else
1775 counts[0] = NULL_TREE;
1777 return;
1780 for (i = fd->collapse; i < fd->ordered; i++)
1782 tree itype = TREE_TYPE (fd->loops[i].v);
1783 counts[i] = NULL_TREE;
1784 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1785 fold_convert (itype, fd->loops[i].n1),
1786 fold_convert (itype, fd->loops[i].n2));
1787 if (t && integer_zerop (t))
1789 for (i = fd->collapse; i < fd->ordered; i++)
1790 counts[i] = build_int_cst (type, 0);
1791 break;
1794 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1796 tree itype = TREE_TYPE (fd->loops[i].v);
1798 if (i >= fd->collapse && counts[i])
1799 continue;
1800 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1801 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1802 fold_convert (itype, fd->loops[i].n1),
1803 fold_convert (itype, fd->loops[i].n2)))
1804 == NULL_TREE || !integer_onep (t)))
1806 gcond *cond_stmt;
1807 tree n1, n2;
1808 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1809 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1810 true, GSI_SAME_STMT);
1811 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1812 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1813 true, GSI_SAME_STMT);
1814 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1815 NULL_TREE, NULL_TREE);
1816 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1817 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1818 expand_omp_regimplify_p, NULL, NULL)
1819 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1820 expand_omp_regimplify_p, NULL, NULL))
1822 *gsi = gsi_for_stmt (cond_stmt);
1823 gimple_regimplify_operands (cond_stmt, gsi);
1825 e = split_block (entry_bb, cond_stmt);
1826 basic_block &zero_iter_bb
1827 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1828 int &first_zero_iter
1829 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1830 if (zero_iter_bb == NULL)
1832 gassign *assign_stmt;
1833 first_zero_iter = i;
1834 zero_iter_bb = create_empty_bb (entry_bb);
1835 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1836 *gsi = gsi_after_labels (zero_iter_bb);
1837 if (i < fd->collapse)
1838 assign_stmt = gimple_build_assign (fd->loop.n2,
1839 build_zero_cst (type));
1840 else
1842 counts[i] = create_tmp_reg (type, ".count");
1843 assign_stmt
1844 = gimple_build_assign (counts[i], build_zero_cst (type));
1846 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1847 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1848 entry_bb);
1850 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1851 ne->probability = profile_probability::very_unlikely ();
1852 e->flags = EDGE_TRUE_VALUE;
1853 e->probability = ne->probability.invert ();
1854 if (l2_dom_bb == NULL)
1855 l2_dom_bb = entry_bb;
1856 entry_bb = e->dest;
1857 *gsi = gsi_last_nondebug_bb (entry_bb);
1860 if (POINTER_TYPE_P (itype))
1861 itype = signed_type_for (itype);
1862 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1863 ? -1 : 1));
1864 t = fold_build2 (PLUS_EXPR, itype,
1865 fold_convert (itype, fd->loops[i].step), t);
1866 t = fold_build2 (PLUS_EXPR, itype, t,
1867 fold_convert (itype, fd->loops[i].n2));
1868 t = fold_build2 (MINUS_EXPR, itype, t,
1869 fold_convert (itype, fd->loops[i].n1));
1870 /* ?? We could probably use CEIL_DIV_EXPR instead of
1871 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1872 generate the same code in the end because generically we
1873 don't know that the values involved must be negative for
1874 GT?? */
1875 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1876 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1877 fold_build1 (NEGATE_EXPR, itype, t),
1878 fold_build1 (NEGATE_EXPR, itype,
1879 fold_convert (itype,
1880 fd->loops[i].step)));
1881 else
1882 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1883 fold_convert (itype, fd->loops[i].step));
1884 t = fold_convert (type, t);
1885 if (TREE_CODE (t) == INTEGER_CST)
1886 counts[i] = t;
1887 else
1889 if (i < fd->collapse || i != first_zero_iter2)
1890 counts[i] = create_tmp_reg (type, ".count");
1891 expand_omp_build_assign (gsi, counts[i], t);
1893 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1895 if (i == 0)
1896 t = counts[0];
1897 else
1898 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1899 expand_omp_build_assign (gsi, fd->loop.n2, t);
1904 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1905 T = V;
1906 V3 = N31 + (T % count3) * STEP3;
1907 T = T / count3;
1908 V2 = N21 + (T % count2) * STEP2;
1909 T = T / count2;
1910 V1 = N11 + T * STEP1;
1911 if this loop doesn't have an inner loop construct combined with it.
1912 If it does have an inner loop construct combined with it and the
1913 iteration count isn't known constant, store values from counts array
1914 into its _looptemp_ temporaries instead. */
1916 static void
1917 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1918 tree *counts, gimple *inner_stmt, tree startvar)
1920 int i;
1921 if (gimple_omp_for_combined_p (fd->for_stmt))
1923 /* If fd->loop.n2 is constant, then no propagation of the counts
1924 is needed, they are constant. */
1925 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1926 return;
1928 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1929 ? gimple_omp_taskreg_clauses (inner_stmt)
1930 : gimple_omp_for_clauses (inner_stmt);
1931 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1932 isn't supposed to be handled, as the inner loop doesn't
1933 use it. */
1934 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1935 gcc_assert (innerc);
1936 for (i = 0; i < fd->collapse; i++)
1938 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1939 OMP_CLAUSE__LOOPTEMP_);
1940 gcc_assert (innerc);
1941 if (i)
1943 tree tem = OMP_CLAUSE_DECL (innerc);
1944 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1945 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1946 false, GSI_CONTINUE_LINKING);
1947 gassign *stmt = gimple_build_assign (tem, t);
1948 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1951 return;
1954 tree type = TREE_TYPE (fd->loop.v);
1955 tree tem = create_tmp_reg (type, ".tem");
1956 gassign *stmt = gimple_build_assign (tem, startvar);
1957 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1959 for (i = fd->collapse - 1; i >= 0; i--)
1961 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1962 itype = vtype;
1963 if (POINTER_TYPE_P (vtype))
1964 itype = signed_type_for (vtype);
1965 if (i != 0)
1966 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1967 else
1968 t = tem;
1969 t = fold_convert (itype, t);
1970 t = fold_build2 (MULT_EXPR, itype, t,
1971 fold_convert (itype, fd->loops[i].step));
1972 if (POINTER_TYPE_P (vtype))
1973 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1974 else
1975 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1976 t = force_gimple_operand_gsi (gsi, t,
1977 DECL_P (fd->loops[i].v)
1978 && TREE_ADDRESSABLE (fd->loops[i].v),
1979 NULL_TREE, false,
1980 GSI_CONTINUE_LINKING);
1981 stmt = gimple_build_assign (fd->loops[i].v, t);
1982 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1983 if (i != 0)
1985 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1986 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1987 false, GSI_CONTINUE_LINKING);
1988 stmt = gimple_build_assign (tem, t);
1989 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1994 /* Helper function for expand_omp_for_*. Generate code like:
1995 L10:
1996 V3 += STEP3;
1997 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1998 L11:
1999 V3 = N31;
2000 V2 += STEP2;
2001 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2002 L12:
2003 V2 = N21;
2004 V1 += STEP1;
2005 goto BODY_BB; */
2007 static basic_block
2008 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2009 basic_block body_bb)
2011 basic_block last_bb, bb, collapse_bb = NULL;
2012 int i;
2013 gimple_stmt_iterator gsi;
2014 edge e;
2015 tree t;
2016 gimple *stmt;
2018 last_bb = cont_bb;
2019 for (i = fd->collapse - 1; i >= 0; i--)
2021 tree vtype = TREE_TYPE (fd->loops[i].v);
2023 bb = create_empty_bb (last_bb);
2024 add_bb_to_loop (bb, last_bb->loop_father);
2025 gsi = gsi_start_bb (bb);
2027 if (i < fd->collapse - 1)
2029 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2030 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2032 t = fd->loops[i + 1].n1;
2033 t = force_gimple_operand_gsi (&gsi, t,
2034 DECL_P (fd->loops[i + 1].v)
2035 && TREE_ADDRESSABLE (fd->loops[i
2036 + 1].v),
2037 NULL_TREE, false,
2038 GSI_CONTINUE_LINKING);
2039 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2040 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2042 else
2043 collapse_bb = bb;
2045 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2047 if (POINTER_TYPE_P (vtype))
2048 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2049 else
2050 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2051 t = force_gimple_operand_gsi (&gsi, t,
2052 DECL_P (fd->loops[i].v)
2053 && TREE_ADDRESSABLE (fd->loops[i].v),
2054 NULL_TREE, false, GSI_CONTINUE_LINKING);
2055 stmt = gimple_build_assign (fd->loops[i].v, t);
2056 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2058 if (i > 0)
2060 t = fd->loops[i].n2;
2061 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2062 false, GSI_CONTINUE_LINKING);
2063 tree v = fd->loops[i].v;
2064 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2065 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2066 false, GSI_CONTINUE_LINKING);
2067 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2068 stmt = gimple_build_cond_empty (t);
2069 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2070 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2071 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2073 else
2074 make_edge (bb, body_bb, EDGE_FALLTHRU);
2075 last_bb = bb;
2078 return collapse_bb;
2081 /* Expand #pragma omp ordered depend(source). */
2083 static void
2084 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2085 tree *counts, location_t loc)
2087 enum built_in_function source_ix
2088 = fd->iter_type == long_integer_type_node
2089 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2090 gimple *g
2091 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2092 build_fold_addr_expr (counts[fd->ordered]));
2093 gimple_set_location (g, loc);
2094 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2097 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2099 static void
2100 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2101 tree *counts, tree c, location_t loc)
2103 auto_vec<tree, 10> args;
2104 enum built_in_function sink_ix
2105 = fd->iter_type == long_integer_type_node
2106 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2107 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2108 int i;
2109 gimple_stmt_iterator gsi2 = *gsi;
2110 bool warned_step = false;
2112 for (i = 0; i < fd->ordered; i++)
2114 tree step = NULL_TREE;
2115 off = TREE_PURPOSE (deps);
2116 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2118 step = TREE_OPERAND (off, 1);
2119 off = TREE_OPERAND (off, 0);
2121 if (!integer_zerop (off))
2123 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2124 || fd->loops[i].cond_code == GT_EXPR);
2125 bool forward = fd->loops[i].cond_code == LT_EXPR;
2126 if (step)
2128 /* Non-simple Fortran DO loops. If step is variable,
2129 we don't know at compile even the direction, so can't
2130 warn. */
2131 if (TREE_CODE (step) != INTEGER_CST)
2132 break;
2133 forward = tree_int_cst_sgn (step) != -1;
2135 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2136 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2137 "lexically later iteration");
2138 break;
2140 deps = TREE_CHAIN (deps);
2142 /* If all offsets corresponding to the collapsed loops are zero,
2143 this depend clause can be ignored. FIXME: but there is still a
2144 flush needed. We need to emit one __sync_synchronize () for it
2145 though (perhaps conditionally)? Solve this together with the
2146 conservative dependence folding optimization.
2147 if (i >= fd->collapse)
2148 return; */
2150 deps = OMP_CLAUSE_DECL (c);
2151 gsi_prev (&gsi2);
2152 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2153 edge e2 = split_block_after_labels (e1->dest);
2155 gsi2 = gsi_after_labels (e1->dest);
2156 *gsi = gsi_last_bb (e1->src);
2157 for (i = 0; i < fd->ordered; i++)
2159 tree itype = TREE_TYPE (fd->loops[i].v);
2160 tree step = NULL_TREE;
2161 tree orig_off = NULL_TREE;
2162 if (POINTER_TYPE_P (itype))
2163 itype = sizetype;
2164 if (i)
2165 deps = TREE_CHAIN (deps);
2166 off = TREE_PURPOSE (deps);
2167 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2169 step = TREE_OPERAND (off, 1);
2170 off = TREE_OPERAND (off, 0);
2171 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2172 && integer_onep (fd->loops[i].step)
2173 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2175 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2176 if (step)
2178 off = fold_convert_loc (loc, itype, off);
2179 orig_off = off;
2180 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2183 if (integer_zerop (off))
2184 t = boolean_true_node;
2185 else
2187 tree a;
2188 tree co = fold_convert_loc (loc, itype, off);
2189 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2191 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2192 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2193 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2194 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2195 co);
2197 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2198 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2199 fd->loops[i].v, co);
2200 else
2201 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2202 fd->loops[i].v, co);
2203 if (step)
2205 tree t1, t2;
2206 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2207 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2208 fd->loops[i].n1);
2209 else
2210 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2211 fd->loops[i].n2);
2212 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2213 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2214 fd->loops[i].n2);
2215 else
2216 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2217 fd->loops[i].n1);
2218 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2219 step, build_int_cst (TREE_TYPE (step), 0));
2220 if (TREE_CODE (step) != INTEGER_CST)
2222 t1 = unshare_expr (t1);
2223 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2224 false, GSI_CONTINUE_LINKING);
2225 t2 = unshare_expr (t2);
2226 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2227 false, GSI_CONTINUE_LINKING);
2229 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2230 t, t2, t1);
2232 else if (fd->loops[i].cond_code == LT_EXPR)
2234 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2235 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2236 fd->loops[i].n1);
2237 else
2238 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2239 fd->loops[i].n2);
2241 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2242 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2243 fd->loops[i].n2);
2244 else
2245 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2246 fd->loops[i].n1);
2248 if (cond)
2249 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2250 else
2251 cond = t;
2253 off = fold_convert_loc (loc, itype, off);
2255 if (step
2256 || (fd->loops[i].cond_code == LT_EXPR
2257 ? !integer_onep (fd->loops[i].step)
2258 : !integer_minus_onep (fd->loops[i].step)))
2260 if (step == NULL_TREE
2261 && TYPE_UNSIGNED (itype)
2262 && fd->loops[i].cond_code == GT_EXPR)
2263 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2264 fold_build1_loc (loc, NEGATE_EXPR, itype,
2265 s));
2266 else
2267 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2268 orig_off ? orig_off : off, s);
2269 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2270 build_int_cst (itype, 0));
2271 if (integer_zerop (t) && !warned_step)
2273 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2274 "in the iteration space");
2275 warned_step = true;
2277 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2278 cond, t);
2281 if (i <= fd->collapse - 1 && fd->collapse > 1)
2282 t = fd->loop.v;
2283 else if (counts[i])
2284 t = counts[i];
2285 else
2287 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2288 fd->loops[i].v, fd->loops[i].n1);
2289 t = fold_convert_loc (loc, fd->iter_type, t);
2291 if (step)
2292 /* We have divided off by step already earlier. */;
2293 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2294 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2295 fold_build1_loc (loc, NEGATE_EXPR, itype,
2296 s));
2297 else
2298 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2299 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2300 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2301 off = fold_convert_loc (loc, fd->iter_type, off);
2302 if (i <= fd->collapse - 1 && fd->collapse > 1)
2304 if (i)
2305 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2306 off);
2307 if (i < fd->collapse - 1)
2309 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2310 counts[i]);
2311 continue;
2314 off = unshare_expr (off);
2315 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2316 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2317 true, GSI_SAME_STMT);
2318 args.safe_push (t);
2320 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2321 gimple_set_location (g, loc);
2322 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2324 cond = unshare_expr (cond);
2325 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2326 GSI_CONTINUE_LINKING);
2327 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2328 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2329 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2330 e1->probability = e3->probability.invert ();
2331 e1->flags = EDGE_TRUE_VALUE;
2332 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2334 *gsi = gsi_after_labels (e2->dest);
2337 /* Expand all #pragma omp ordered depend(source) and
2338 #pragma omp ordered depend(sink:...) constructs in the current
2339 #pragma omp for ordered(n) region. */
2341 static void
2342 expand_omp_ordered_source_sink (struct omp_region *region,
2343 struct omp_for_data *fd, tree *counts,
2344 basic_block cont_bb)
2346 struct omp_region *inner;
2347 int i;
2348 for (i = fd->collapse - 1; i < fd->ordered; i++)
2349 if (i == fd->collapse - 1 && fd->collapse > 1)
2350 counts[i] = NULL_TREE;
2351 else if (i >= fd->collapse && !cont_bb)
2352 counts[i] = build_zero_cst (fd->iter_type);
2353 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2354 && integer_onep (fd->loops[i].step))
2355 counts[i] = NULL_TREE;
2356 else
2357 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2358 tree atype
2359 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2360 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2361 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2363 for (inner = region->inner; inner; inner = inner->next)
2364 if (inner->type == GIMPLE_OMP_ORDERED)
2366 gomp_ordered *ord_stmt = inner->ord_stmt;
2367 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2368 location_t loc = gimple_location (ord_stmt);
2369 tree c;
2370 for (c = gimple_omp_ordered_clauses (ord_stmt);
2371 c; c = OMP_CLAUSE_CHAIN (c))
2372 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2373 break;
2374 if (c)
2375 expand_omp_ordered_source (&gsi, fd, counts, loc);
2376 for (c = gimple_omp_ordered_clauses (ord_stmt);
2377 c; c = OMP_CLAUSE_CHAIN (c))
2378 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2379 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2380 gsi_remove (&gsi, true);
2384 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2385 collapsed. */
2387 static basic_block
2388 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2389 basic_block cont_bb, basic_block body_bb,
2390 bool ordered_lastprivate)
2392 if (fd->ordered == fd->collapse)
2393 return cont_bb;
2395 if (!cont_bb)
2397 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2398 for (int i = fd->collapse; i < fd->ordered; i++)
2400 tree type = TREE_TYPE (fd->loops[i].v);
2401 tree n1 = fold_convert (type, fd->loops[i].n1);
2402 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2403 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2404 size_int (i - fd->collapse + 1),
2405 NULL_TREE, NULL_TREE);
2406 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2408 return NULL;
2411 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2413 tree t, type = TREE_TYPE (fd->loops[i].v);
2414 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2415 expand_omp_build_assign (&gsi, fd->loops[i].v,
2416 fold_convert (type, fd->loops[i].n1));
2417 if (counts[i])
2418 expand_omp_build_assign (&gsi, counts[i],
2419 build_zero_cst (fd->iter_type));
2420 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2421 size_int (i - fd->collapse + 1),
2422 NULL_TREE, NULL_TREE);
2423 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2424 if (!gsi_end_p (gsi))
2425 gsi_prev (&gsi);
2426 else
2427 gsi = gsi_last_bb (body_bb);
2428 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2429 basic_block new_body = e1->dest;
2430 if (body_bb == cont_bb)
2431 cont_bb = new_body;
2432 edge e2 = NULL;
2433 basic_block new_header;
2434 if (EDGE_COUNT (cont_bb->preds) > 0)
2436 gsi = gsi_last_bb (cont_bb);
2437 if (POINTER_TYPE_P (type))
2438 t = fold_build_pointer_plus (fd->loops[i].v,
2439 fold_convert (sizetype,
2440 fd->loops[i].step));
2441 else
2442 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2443 fold_convert (type, fd->loops[i].step));
2444 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2445 if (counts[i])
2447 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2448 build_int_cst (fd->iter_type, 1));
2449 expand_omp_build_assign (&gsi, counts[i], t);
2450 t = counts[i];
2452 else
2454 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2455 fd->loops[i].v, fd->loops[i].n1);
2456 t = fold_convert (fd->iter_type, t);
2457 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2458 true, GSI_SAME_STMT);
2460 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2461 size_int (i - fd->collapse + 1),
2462 NULL_TREE, NULL_TREE);
2463 expand_omp_build_assign (&gsi, aref, t);
2464 gsi_prev (&gsi);
2465 e2 = split_block (cont_bb, gsi_stmt (gsi));
2466 new_header = e2->dest;
2468 else
2469 new_header = cont_bb;
2470 gsi = gsi_after_labels (new_header);
2471 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2472 true, GSI_SAME_STMT);
2473 tree n2
2474 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2475 true, NULL_TREE, true, GSI_SAME_STMT);
2476 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2477 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2478 edge e3 = split_block (new_header, gsi_stmt (gsi));
2479 cont_bb = e3->dest;
2480 remove_edge (e1);
2481 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2482 e3->flags = EDGE_FALSE_VALUE;
2483 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2484 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2485 e1->probability = e3->probability.invert ();
2487 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2488 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2490 if (e2)
2492 struct loop *loop = alloc_loop ();
2493 loop->header = new_header;
2494 loop->latch = e2->src;
2495 add_loop (loop, body_bb->loop_father);
2499 /* If there are any lastprivate clauses and it is possible some loops
2500 might have zero iterations, ensure all the decls are initialized,
2501 otherwise we could crash evaluating C++ class iterators with lastprivate
2502 clauses. */
2503 bool need_inits = false;
2504 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2505 if (need_inits)
2507 tree type = TREE_TYPE (fd->loops[i].v);
2508 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2509 expand_omp_build_assign (&gsi, fd->loops[i].v,
2510 fold_convert (type, fd->loops[i].n1));
2512 else
2514 tree type = TREE_TYPE (fd->loops[i].v);
2515 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2516 boolean_type_node,
2517 fold_convert (type, fd->loops[i].n1),
2518 fold_convert (type, fd->loops[i].n2));
2519 if (!integer_onep (this_cond))
2520 need_inits = true;
2523 return cont_bb;
2526 /* A subroutine of expand_omp_for. Generate code for a parallel
2527 loop with any schedule. Given parameters:
2529 for (V = N1; V cond N2; V += STEP) BODY;
2531 where COND is "<" or ">", we generate pseudocode
2533 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2534 if (more) goto L0; else goto L3;
2536 V = istart0;
2537 iend = iend0;
2539 BODY;
2540 V += STEP;
2541 if (V cond iend) goto L1; else goto L2;
2543 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2546 If this is a combined omp parallel loop, instead of the call to
2547 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2548 If this is gimple_omp_for_combined_p loop, then instead of assigning
2549 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2550 inner GIMPLE_OMP_FOR and V += STEP; and
2551 if (V cond iend) goto L1; else goto L2; are removed.
2553 For collapsed loops, given parameters:
2554 collapse(3)
2555 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2556 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2557 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2558 BODY;
2560 we generate pseudocode
2562 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2563 if (cond3 is <)
2564 adj = STEP3 - 1;
2565 else
2566 adj = STEP3 + 1;
2567 count3 = (adj + N32 - N31) / STEP3;
2568 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2569 if (cond2 is <)
2570 adj = STEP2 - 1;
2571 else
2572 adj = STEP2 + 1;
2573 count2 = (adj + N22 - N21) / STEP2;
2574 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2575 if (cond1 is <)
2576 adj = STEP1 - 1;
2577 else
2578 adj = STEP1 + 1;
2579 count1 = (adj + N12 - N11) / STEP1;
2580 count = count1 * count2 * count3;
2581 goto Z1;
2583 count = 0;
2585 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2586 if (more) goto L0; else goto L3;
2588 V = istart0;
2589 T = V;
2590 V3 = N31 + (T % count3) * STEP3;
2591 T = T / count3;
2592 V2 = N21 + (T % count2) * STEP2;
2593 T = T / count2;
2594 V1 = N11 + T * STEP1;
2595 iend = iend0;
2597 BODY;
2598 V += 1;
2599 if (V < iend) goto L10; else goto L2;
2600 L10:
2601 V3 += STEP3;
2602 if (V3 cond3 N32) goto L1; else goto L11;
2603 L11:
2604 V3 = N31;
2605 V2 += STEP2;
2606 if (V2 cond2 N22) goto L1; else goto L12;
2607 L12:
2608 V2 = N21;
2609 V1 += STEP1;
2610 goto L1;
2612 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2617 static void
2618 expand_omp_for_generic (struct omp_region *region,
2619 struct omp_for_data *fd,
2620 enum built_in_function start_fn,
2621 enum built_in_function next_fn,
2622 tree sched_arg,
2623 gimple *inner_stmt)
2625 tree type, istart0, iend0, iend;
2626 tree t, vmain, vback, bias = NULL_TREE;
2627 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2628 basic_block l2_bb = NULL, l3_bb = NULL;
2629 gimple_stmt_iterator gsi;
2630 gassign *assign_stmt;
2631 bool in_combined_parallel = is_combined_parallel (region);
2632 bool broken_loop = region->cont == NULL;
2633 edge e, ne;
2634 tree *counts = NULL;
2635 int i;
2636 bool ordered_lastprivate = false;
2638 gcc_assert (!broken_loop || !in_combined_parallel);
2639 gcc_assert (fd->iter_type == long_integer_type_node
2640 || !in_combined_parallel);
2642 entry_bb = region->entry;
2643 cont_bb = region->cont;
2644 collapse_bb = NULL;
2645 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2646 gcc_assert (broken_loop
2647 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2648 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2649 l1_bb = single_succ (l0_bb);
2650 if (!broken_loop)
2652 l2_bb = create_empty_bb (cont_bb);
2653 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2654 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2655 == l1_bb));
2656 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2658 else
2659 l2_bb = NULL;
2660 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2661 exit_bb = region->exit;
2663 gsi = gsi_last_nondebug_bb (entry_bb);
2665 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2666 if (fd->ordered
2667 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2668 OMP_CLAUSE_LASTPRIVATE))
2669 ordered_lastprivate = false;
2670 tree reductions = NULL_TREE;
2671 tree mem = NULL_TREE;
2672 if (sched_arg)
2674 if (fd->have_reductemp)
2676 tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2677 OMP_CLAUSE__REDUCTEMP_);
2678 reductions = OMP_CLAUSE_DECL (c);
2679 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2680 gimple *g = SSA_NAME_DEF_STMT (reductions);
2681 reductions = gimple_assign_rhs1 (g);
2682 OMP_CLAUSE_DECL (c) = reductions;
2683 entry_bb = gimple_bb (g);
2684 edge e = split_block (entry_bb, g);
2685 if (region->entry == entry_bb)
2686 region->entry = e->dest;
2687 gsi = gsi_last_bb (entry_bb);
2689 else
2690 reductions = null_pointer_node;
2691 /* For now. */
2692 mem = null_pointer_node;
2694 if (fd->collapse > 1 || fd->ordered)
2696 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2697 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2699 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2700 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2701 zero_iter1_bb, first_zero_iter1,
2702 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2704 if (zero_iter1_bb)
2706 /* Some counts[i] vars might be uninitialized if
2707 some loop has zero iterations. But the body shouldn't
2708 be executed in that case, so just avoid uninit warnings. */
2709 for (i = first_zero_iter1;
2710 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2711 if (SSA_VAR_P (counts[i]))
2712 TREE_NO_WARNING (counts[i]) = 1;
2713 gsi_prev (&gsi);
2714 e = split_block (entry_bb, gsi_stmt (gsi));
2715 entry_bb = e->dest;
2716 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2717 gsi = gsi_last_nondebug_bb (entry_bb);
2718 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2719 get_immediate_dominator (CDI_DOMINATORS,
2720 zero_iter1_bb));
2722 if (zero_iter2_bb)
2724 /* Some counts[i] vars might be uninitialized if
2725 some loop has zero iterations. But the body shouldn't
2726 be executed in that case, so just avoid uninit warnings. */
2727 for (i = first_zero_iter2; i < fd->ordered; i++)
2728 if (SSA_VAR_P (counts[i]))
2729 TREE_NO_WARNING (counts[i]) = 1;
2730 if (zero_iter1_bb)
2731 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2732 else
2734 gsi_prev (&gsi);
2735 e = split_block (entry_bb, gsi_stmt (gsi));
2736 entry_bb = e->dest;
2737 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2738 gsi = gsi_last_nondebug_bb (entry_bb);
2739 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2740 get_immediate_dominator
2741 (CDI_DOMINATORS, zero_iter2_bb));
2744 if (fd->collapse == 1)
2746 counts[0] = fd->loop.n2;
2747 fd->loop = fd->loops[0];
2751 type = TREE_TYPE (fd->loop.v);
2752 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2753 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2754 TREE_ADDRESSABLE (istart0) = 1;
2755 TREE_ADDRESSABLE (iend0) = 1;
2757 /* See if we need to bias by LLONG_MIN. */
2758 if (fd->iter_type == long_long_unsigned_type_node
2759 && TREE_CODE (type) == INTEGER_TYPE
2760 && !TYPE_UNSIGNED (type)
2761 && fd->ordered == 0)
2763 tree n1, n2;
2765 if (fd->loop.cond_code == LT_EXPR)
2767 n1 = fd->loop.n1;
2768 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2770 else
2772 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2773 n2 = fd->loop.n1;
2775 if (TREE_CODE (n1) != INTEGER_CST
2776 || TREE_CODE (n2) != INTEGER_CST
2777 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2778 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2781 gimple_stmt_iterator gsif = gsi;
2782 gsi_prev (&gsif);
2784 tree arr = NULL_TREE;
2785 if (in_combined_parallel)
2787 gcc_assert (fd->ordered == 0);
2788 /* In a combined parallel loop, emit a call to
2789 GOMP_loop_foo_next. */
2790 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2791 build_fold_addr_expr (istart0),
2792 build_fold_addr_expr (iend0));
2794 else
2796 tree t0, t1, t2, t3, t4;
2797 /* If this is not a combined parallel loop, emit a call to
2798 GOMP_loop_foo_start in ENTRY_BB. */
2799 t4 = build_fold_addr_expr (iend0);
2800 t3 = build_fold_addr_expr (istart0);
2801 if (fd->ordered)
2803 t0 = build_int_cst (unsigned_type_node,
2804 fd->ordered - fd->collapse + 1);
2805 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2806 fd->ordered
2807 - fd->collapse + 1),
2808 ".omp_counts");
2809 DECL_NAMELESS (arr) = 1;
2810 TREE_ADDRESSABLE (arr) = 1;
2811 TREE_STATIC (arr) = 1;
2812 vec<constructor_elt, va_gc> *v;
2813 vec_alloc (v, fd->ordered - fd->collapse + 1);
2814 int idx;
2816 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2818 tree c;
2819 if (idx == 0 && fd->collapse > 1)
2820 c = fd->loop.n2;
2821 else
2822 c = counts[idx + fd->collapse - 1];
2823 tree purpose = size_int (idx);
2824 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2825 if (TREE_CODE (c) != INTEGER_CST)
2826 TREE_STATIC (arr) = 0;
2829 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2830 if (!TREE_STATIC (arr))
2831 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2832 void_type_node, arr),
2833 true, NULL_TREE, true, GSI_SAME_STMT);
2834 t1 = build_fold_addr_expr (arr);
2835 t2 = NULL_TREE;
2837 else
2839 t2 = fold_convert (fd->iter_type, fd->loop.step);
2840 t1 = fd->loop.n2;
2841 t0 = fd->loop.n1;
2842 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2844 tree innerc
2845 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2846 OMP_CLAUSE__LOOPTEMP_);
2847 gcc_assert (innerc);
2848 t0 = OMP_CLAUSE_DECL (innerc);
2849 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2850 OMP_CLAUSE__LOOPTEMP_);
2851 gcc_assert (innerc);
2852 t1 = OMP_CLAUSE_DECL (innerc);
2854 if (POINTER_TYPE_P (TREE_TYPE (t0))
2855 && TYPE_PRECISION (TREE_TYPE (t0))
2856 != TYPE_PRECISION (fd->iter_type))
2858 /* Avoid casting pointers to integer of a different size. */
2859 tree itype = signed_type_for (type);
2860 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2861 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2863 else
2865 t1 = fold_convert (fd->iter_type, t1);
2866 t0 = fold_convert (fd->iter_type, t0);
2868 if (bias)
2870 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2871 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2874 if (fd->iter_type == long_integer_type_node || fd->ordered)
2876 if (fd->chunk_size)
2878 t = fold_convert (fd->iter_type, fd->chunk_size);
2879 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2880 if (sched_arg)
2882 if (fd->ordered)
2883 t = build_call_expr (builtin_decl_explicit (start_fn),
2884 8, t0, t1, sched_arg, t, t3, t4,
2885 reductions, mem);
2886 else
2887 t = build_call_expr (builtin_decl_explicit (start_fn),
2888 9, t0, t1, t2, sched_arg, t, t3, t4,
2889 reductions, mem);
2891 else if (fd->ordered)
2892 t = build_call_expr (builtin_decl_explicit (start_fn),
2893 5, t0, t1, t, t3, t4);
2894 else
2895 t = build_call_expr (builtin_decl_explicit (start_fn),
2896 6, t0, t1, t2, t, t3, t4);
2898 else if (fd->ordered)
2899 t = build_call_expr (builtin_decl_explicit (start_fn),
2900 4, t0, t1, t3, t4);
2901 else
2902 t = build_call_expr (builtin_decl_explicit (start_fn),
2903 5, t0, t1, t2, t3, t4);
2905 else
2907 tree t5;
2908 tree c_bool_type;
2909 tree bfn_decl;
2911 /* The GOMP_loop_ull_*start functions have additional boolean
2912 argument, true for < loops and false for > loops.
2913 In Fortran, the C bool type can be different from
2914 boolean_type_node. */
2915 bfn_decl = builtin_decl_explicit (start_fn);
2916 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2917 t5 = build_int_cst (c_bool_type,
2918 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2919 if (fd->chunk_size)
2921 tree bfn_decl = builtin_decl_explicit (start_fn);
2922 t = fold_convert (fd->iter_type, fd->chunk_size);
2923 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2924 if (sched_arg)
2925 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2926 t, t3, t4, reductions, mem);
2927 else
2928 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2930 else
2931 t = build_call_expr (builtin_decl_explicit (start_fn),
2932 6, t5, t0, t1, t2, t3, t4);
2935 if (TREE_TYPE (t) != boolean_type_node)
2936 t = fold_build2 (NE_EXPR, boolean_type_node,
2937 t, build_int_cst (TREE_TYPE (t), 0));
2938 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2939 true, GSI_SAME_STMT);
2940 if (arr && !TREE_STATIC (arr))
2942 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2943 TREE_THIS_VOLATILE (clobber) = 1;
2944 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2945 GSI_SAME_STMT);
2947 if (fd->have_reductemp)
2949 gimple *g = gsi_stmt (gsi);
2950 gsi_remove (&gsi, true);
2951 release_ssa_name (gimple_assign_lhs (g));
2953 entry_bb = region->entry;
2954 gsi = gsi_last_nondebug_bb (entry_bb);
2956 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2958 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2960 /* Remove the GIMPLE_OMP_FOR statement. */
2961 gsi_remove (&gsi, true);
2963 if (gsi_end_p (gsif))
2964 gsif = gsi_after_labels (gsi_bb (gsif));
2965 gsi_next (&gsif);
2967 /* Iteration setup for sequential loop goes in L0_BB. */
2968 tree startvar = fd->loop.v;
2969 tree endvar = NULL_TREE;
2971 if (gimple_omp_for_combined_p (fd->for_stmt))
2973 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2974 && gimple_omp_for_kind (inner_stmt)
2975 == GF_OMP_FOR_KIND_SIMD);
2976 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2977 OMP_CLAUSE__LOOPTEMP_);
2978 gcc_assert (innerc);
2979 startvar = OMP_CLAUSE_DECL (innerc);
2980 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2981 OMP_CLAUSE__LOOPTEMP_);
2982 gcc_assert (innerc);
2983 endvar = OMP_CLAUSE_DECL (innerc);
2986 gsi = gsi_start_bb (l0_bb);
2987 t = istart0;
2988 if (fd->ordered && fd->collapse == 1)
2989 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2990 fold_convert (fd->iter_type, fd->loop.step));
2991 else if (bias)
2992 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2993 if (fd->ordered && fd->collapse == 1)
2995 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2996 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2997 fd->loop.n1, fold_convert (sizetype, t));
2998 else
3000 t = fold_convert (TREE_TYPE (startvar), t);
3001 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3002 fd->loop.n1, t);
3005 else
3007 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3008 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3009 t = fold_convert (TREE_TYPE (startvar), t);
3011 t = force_gimple_operand_gsi (&gsi, t,
3012 DECL_P (startvar)
3013 && TREE_ADDRESSABLE (startvar),
3014 NULL_TREE, false, GSI_CONTINUE_LINKING);
3015 assign_stmt = gimple_build_assign (startvar, t);
3016 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3018 t = iend0;
3019 if (fd->ordered && fd->collapse == 1)
3020 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3021 fold_convert (fd->iter_type, fd->loop.step));
3022 else if (bias)
3023 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3024 if (fd->ordered && fd->collapse == 1)
3026 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3027 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3028 fd->loop.n1, fold_convert (sizetype, t));
3029 else
3031 t = fold_convert (TREE_TYPE (startvar), t);
3032 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3033 fd->loop.n1, t);
3036 else
3038 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3039 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3040 t = fold_convert (TREE_TYPE (startvar), t);
3042 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3043 false, GSI_CONTINUE_LINKING);
3044 if (endvar)
3046 assign_stmt = gimple_build_assign (endvar, iend);
3047 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3048 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3049 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3050 else
3051 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3052 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3054 /* Handle linear clause adjustments. */
3055 tree itercnt = NULL_TREE;
3056 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3057 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3058 c; c = OMP_CLAUSE_CHAIN (c))
3059 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3060 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3062 tree d = OMP_CLAUSE_DECL (c);
3063 bool is_ref = omp_is_reference (d);
3064 tree t = d, a, dest;
3065 if (is_ref)
3066 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3067 tree type = TREE_TYPE (t);
3068 if (POINTER_TYPE_P (type))
3069 type = sizetype;
3070 dest = unshare_expr (t);
3071 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3072 expand_omp_build_assign (&gsif, v, t);
3073 if (itercnt == NULL_TREE)
3075 itercnt = startvar;
3076 tree n1 = fd->loop.n1;
3077 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3079 itercnt
3080 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3081 itercnt);
3082 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3084 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3085 itercnt, n1);
3086 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3087 itercnt, fd->loop.step);
3088 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3089 NULL_TREE, false,
3090 GSI_CONTINUE_LINKING);
3092 a = fold_build2 (MULT_EXPR, type,
3093 fold_convert (type, itercnt),
3094 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3095 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3096 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3097 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3098 false, GSI_CONTINUE_LINKING);
3099 assign_stmt = gimple_build_assign (dest, t);
3100 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3102 if (fd->collapse > 1)
3103 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3105 if (fd->ordered)
3107 /* Until now, counts array contained number of iterations or
3108 variable containing it for ith loop. From now on, we need
3109 those counts only for collapsed loops, and only for the 2nd
3110 till the last collapsed one. Move those one element earlier,
3111 we'll use counts[fd->collapse - 1] for the first source/sink
3112 iteration counter and so on and counts[fd->ordered]
3113 as the array holding the current counter values for
3114 depend(source). */
3115 if (fd->collapse > 1)
3116 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3117 if (broken_loop)
3119 int i;
3120 for (i = fd->collapse; i < fd->ordered; i++)
3122 tree type = TREE_TYPE (fd->loops[i].v);
3123 tree this_cond
3124 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3125 fold_convert (type, fd->loops[i].n1),
3126 fold_convert (type, fd->loops[i].n2));
3127 if (!integer_onep (this_cond))
3128 break;
3130 if (i < fd->ordered)
3132 cont_bb
3133 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3134 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3135 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3136 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3137 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3138 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3139 make_edge (cont_bb, l1_bb, 0);
3140 l2_bb = create_empty_bb (cont_bb);
3141 broken_loop = false;
3144 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3145 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3146 ordered_lastprivate);
3147 if (counts[fd->collapse - 1])
3149 gcc_assert (fd->collapse == 1);
3150 gsi = gsi_last_bb (l0_bb);
3151 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3152 istart0, true);
3153 gsi = gsi_last_bb (cont_bb);
3154 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3155 build_int_cst (fd->iter_type, 1));
3156 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3157 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3158 size_zero_node, NULL_TREE, NULL_TREE);
3159 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3160 t = counts[fd->collapse - 1];
3162 else if (fd->collapse > 1)
3163 t = fd->loop.v;
3164 else
3166 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3167 fd->loops[0].v, fd->loops[0].n1);
3168 t = fold_convert (fd->iter_type, t);
3170 gsi = gsi_last_bb (l0_bb);
3171 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3172 size_zero_node, NULL_TREE, NULL_TREE);
3173 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3174 false, GSI_CONTINUE_LINKING);
3175 expand_omp_build_assign (&gsi, aref, t, true);
3178 if (!broken_loop)
3180 /* Code to control the increment and predicate for the sequential
3181 loop goes in the CONT_BB. */
3182 gsi = gsi_last_nondebug_bb (cont_bb);
3183 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3184 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3185 vmain = gimple_omp_continue_control_use (cont_stmt);
3186 vback = gimple_omp_continue_control_def (cont_stmt);
3188 if (!gimple_omp_for_combined_p (fd->for_stmt))
3190 if (POINTER_TYPE_P (type))
3191 t = fold_build_pointer_plus (vmain, fd->loop.step);
3192 else
3193 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3194 t = force_gimple_operand_gsi (&gsi, t,
3195 DECL_P (vback)
3196 && TREE_ADDRESSABLE (vback),
3197 NULL_TREE, true, GSI_SAME_STMT);
3198 assign_stmt = gimple_build_assign (vback, t);
3199 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3201 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3203 if (fd->collapse > 1)
3204 t = fd->loop.v;
3205 else
3207 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3208 fd->loops[0].v, fd->loops[0].n1);
3209 t = fold_convert (fd->iter_type, t);
3211 tree aref = build4 (ARRAY_REF, fd->iter_type,
3212 counts[fd->ordered], size_zero_node,
3213 NULL_TREE, NULL_TREE);
3214 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3215 true, GSI_SAME_STMT);
3216 expand_omp_build_assign (&gsi, aref, t);
3219 t = build2 (fd->loop.cond_code, boolean_type_node,
3220 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3221 iend);
3222 gcond *cond_stmt = gimple_build_cond_empty (t);
3223 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3226 /* Remove GIMPLE_OMP_CONTINUE. */
3227 gsi_remove (&gsi, true);
3229 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3230 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3232 /* Emit code to get the next parallel iteration in L2_BB. */
3233 gsi = gsi_start_bb (l2_bb);
3235 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3236 build_fold_addr_expr (istart0),
3237 build_fold_addr_expr (iend0));
3238 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3239 false, GSI_CONTINUE_LINKING);
3240 if (TREE_TYPE (t) != boolean_type_node)
3241 t = fold_build2 (NE_EXPR, boolean_type_node,
3242 t, build_int_cst (TREE_TYPE (t), 0));
3243 gcond *cond_stmt = gimple_build_cond_empty (t);
3244 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3247 /* Add the loop cleanup function. */
3248 gsi = gsi_last_nondebug_bb (exit_bb);
3249 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3250 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3251 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3252 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3253 else
3254 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3255 gcall *call_stmt = gimple_build_call (t, 0);
3256 if (fd->ordered)
3258 tree arr = counts[fd->ordered];
3259 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3260 TREE_THIS_VOLATILE (clobber) = 1;
3261 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3262 GSI_SAME_STMT);
3264 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3266 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3267 if (fd->have_reductemp)
3269 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3270 gimple_call_lhs (call_stmt));
3271 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3274 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3275 gsi_remove (&gsi, true);
3277 /* Connect the new blocks. */
3278 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3279 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3281 if (!broken_loop)
3283 gimple_seq phis;
3285 e = find_edge (cont_bb, l3_bb);
3286 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3288 phis = phi_nodes (l3_bb);
3289 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3291 gimple *phi = gsi_stmt (gsi);
3292 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3293 PHI_ARG_DEF_FROM_EDGE (phi, e));
3295 remove_edge (e);
3297 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3298 e = find_edge (cont_bb, l1_bb);
3299 if (e == NULL)
3301 e = BRANCH_EDGE (cont_bb);
3302 gcc_assert (single_succ (e->dest) == l1_bb);
3304 if (gimple_omp_for_combined_p (fd->for_stmt))
3306 remove_edge (e);
3307 e = NULL;
3309 else if (fd->collapse > 1)
3311 remove_edge (e);
3312 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3314 else
3315 e->flags = EDGE_TRUE_VALUE;
3316 if (e)
3318 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3319 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3321 else
3323 e = find_edge (cont_bb, l2_bb);
3324 e->flags = EDGE_FALLTHRU;
3326 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3328 if (gimple_in_ssa_p (cfun))
3330 /* Add phis to the outer loop that connect to the phis in the inner,
3331 original loop, and move the loop entry value of the inner phi to
3332 the loop entry value of the outer phi. */
3333 gphi_iterator psi;
3334 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3336 location_t locus;
3337 gphi *nphi;
3338 gphi *exit_phi = psi.phi ();
3340 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3341 continue;
3343 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3344 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3346 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3347 edge latch_to_l1 = find_edge (latch, l1_bb);
3348 gphi *inner_phi
3349 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3351 tree t = gimple_phi_result (exit_phi);
3352 tree new_res = copy_ssa_name (t, NULL);
3353 nphi = create_phi_node (new_res, l0_bb);
3355 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3356 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3357 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3358 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3359 add_phi_arg (nphi, t, entry_to_l0, locus);
3361 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3362 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3364 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3368 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3369 recompute_dominator (CDI_DOMINATORS, l2_bb));
3370 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3371 recompute_dominator (CDI_DOMINATORS, l3_bb));
3372 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3373 recompute_dominator (CDI_DOMINATORS, l0_bb));
3374 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3375 recompute_dominator (CDI_DOMINATORS, l1_bb));
3377 /* We enter expand_omp_for_generic with a loop. This original loop may
3378 have its own loop struct, or it may be part of an outer loop struct
3379 (which may be the fake loop). */
3380 struct loop *outer_loop = entry_bb->loop_father;
3381 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3383 add_bb_to_loop (l2_bb, outer_loop);
3385 /* We've added a new loop around the original loop. Allocate the
3386 corresponding loop struct. */
3387 struct loop *new_loop = alloc_loop ();
3388 new_loop->header = l0_bb;
3389 new_loop->latch = l2_bb;
3390 add_loop (new_loop, outer_loop);
3392 /* Allocate a loop structure for the original loop unless we already
3393 had one. */
3394 if (!orig_loop_has_loop_struct
3395 && !gimple_omp_for_combined_p (fd->for_stmt))
3397 struct loop *orig_loop = alloc_loop ();
3398 orig_loop->header = l1_bb;
3399 /* The loop may have multiple latches. */
3400 add_loop (orig_loop, new_loop);
3405 /* A subroutine of expand_omp_for. Generate code for a parallel
3406 loop with static schedule and no specified chunk size. Given
3407 parameters:
3409 for (V = N1; V cond N2; V += STEP) BODY;
3411 where COND is "<" or ">", we generate pseudocode
3413 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3414 if (cond is <)
3415 adj = STEP - 1;
3416 else
3417 adj = STEP + 1;
3418 if ((__typeof (V)) -1 > 0 && cond is >)
3419 n = -(adj + N2 - N1) / -STEP;
3420 else
3421 n = (adj + N2 - N1) / STEP;
3422 q = n / nthreads;
3423 tt = n % nthreads;
3424 if (threadid < tt) goto L3; else goto L4;
3426 tt = 0;
3427 q = q + 1;
3429 s0 = q * threadid + tt;
3430 e0 = s0 + q;
3431 V = s0 * STEP + N1;
3432 if (s0 >= e0) goto L2; else goto L0;
3434 e = e0 * STEP + N1;
3436 BODY;
3437 V += STEP;
3438 if (V cond e) goto L1;
3442 static void
3443 expand_omp_for_static_nochunk (struct omp_region *region,
3444 struct omp_for_data *fd,
3445 gimple *inner_stmt)
3447 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3448 tree type, itype, vmain, vback;
3449 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3450 basic_block body_bb, cont_bb, collapse_bb = NULL;
3451 basic_block fin_bb;
3452 gimple_stmt_iterator gsi;
3453 edge ep;
3454 bool broken_loop = region->cont == NULL;
3455 tree *counts = NULL;
3456 tree n1, n2, step;
3457 tree reductions = NULL_TREE;
3459 itype = type = TREE_TYPE (fd->loop.v);
3460 if (POINTER_TYPE_P (type))
3461 itype = signed_type_for (type);
3463 entry_bb = region->entry;
3464 cont_bb = region->cont;
3465 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3466 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3467 gcc_assert (broken_loop
3468 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3469 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3470 body_bb = single_succ (seq_start_bb);
3471 if (!broken_loop)
3473 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3474 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3475 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3477 exit_bb = region->exit;
3479 /* Iteration space partitioning goes in ENTRY_BB. */
3480 gsi = gsi_last_nondebug_bb (entry_bb);
3481 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3483 if (fd->collapse > 1)
3485 int first_zero_iter = -1, dummy = -1;
3486 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3488 counts = XALLOCAVEC (tree, fd->collapse);
3489 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3490 fin_bb, first_zero_iter,
3491 dummy_bb, dummy, l2_dom_bb);
3492 t = NULL_TREE;
3494 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3495 t = integer_one_node;
3496 else
3497 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3498 fold_convert (type, fd->loop.n1),
3499 fold_convert (type, fd->loop.n2));
3500 if (fd->collapse == 1
3501 && TYPE_UNSIGNED (type)
3502 && (t == NULL_TREE || !integer_onep (t)))
3504 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3505 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3506 true, GSI_SAME_STMT);
3507 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3508 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3509 true, GSI_SAME_STMT);
3510 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3511 NULL_TREE, NULL_TREE);
3512 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3513 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3514 expand_omp_regimplify_p, NULL, NULL)
3515 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3516 expand_omp_regimplify_p, NULL, NULL))
3518 gsi = gsi_for_stmt (cond_stmt);
3519 gimple_regimplify_operands (cond_stmt, &gsi);
3521 ep = split_block (entry_bb, cond_stmt);
3522 ep->flags = EDGE_TRUE_VALUE;
3523 entry_bb = ep->dest;
3524 ep->probability = profile_probability::very_likely ();
3525 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3526 ep->probability = profile_probability::very_unlikely ();
3527 if (gimple_in_ssa_p (cfun))
3529 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3530 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3531 !gsi_end_p (gpi); gsi_next (&gpi))
3533 gphi *phi = gpi.phi ();
3534 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3535 ep, UNKNOWN_LOCATION);
3538 gsi = gsi_last_bb (entry_bb);
3541 if (fd->have_reductemp)
3543 tree t1 = build_int_cst (long_integer_type_node, 0);
3544 tree t2 = build_int_cst (long_integer_type_node, 1);
3545 tree t3 = build_int_cstu (long_integer_type_node,
3546 (HOST_WIDE_INT_1U << 31) + 1);
3547 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3548 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3549 reductions = OMP_CLAUSE_DECL (clauses);
3550 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3551 gimple *g = SSA_NAME_DEF_STMT (reductions);
3552 reductions = gimple_assign_rhs1 (g);
3553 OMP_CLAUSE_DECL (clauses) = reductions;
3554 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
3555 tree t
3556 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3557 9, t1, t2, t2, t3, t1, null_pointer_node,
3558 null_pointer_node, reductions, null_pointer_node);
3559 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3560 true, GSI_SAME_STMT);
3561 gsi_remove (&gsi2, true);
3562 release_ssa_name (gimple_assign_lhs (g));
3564 switch (gimple_omp_for_kind (fd->for_stmt))
3566 case GF_OMP_FOR_KIND_FOR:
3567 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3568 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3569 break;
3570 case GF_OMP_FOR_KIND_DISTRIBUTE:
3571 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3572 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3573 break;
3574 default:
3575 gcc_unreachable ();
3577 nthreads = build_call_expr (nthreads, 0);
3578 nthreads = fold_convert (itype, nthreads);
3579 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3580 true, GSI_SAME_STMT);
3581 threadid = build_call_expr (threadid, 0);
3582 threadid = fold_convert (itype, threadid);
3583 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3584 true, GSI_SAME_STMT);
3586 n1 = fd->loop.n1;
3587 n2 = fd->loop.n2;
3588 step = fd->loop.step;
3589 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3591 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3592 OMP_CLAUSE__LOOPTEMP_);
3593 gcc_assert (innerc);
3594 n1 = OMP_CLAUSE_DECL (innerc);
3595 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3596 OMP_CLAUSE__LOOPTEMP_);
3597 gcc_assert (innerc);
3598 n2 = OMP_CLAUSE_DECL (innerc);
3600 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3601 true, NULL_TREE, true, GSI_SAME_STMT);
3602 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3603 true, NULL_TREE, true, GSI_SAME_STMT);
3604 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3605 true, NULL_TREE, true, GSI_SAME_STMT);
3607 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3608 t = fold_build2 (PLUS_EXPR, itype, step, t);
3609 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3610 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3611 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3612 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3613 fold_build1 (NEGATE_EXPR, itype, t),
3614 fold_build1 (NEGATE_EXPR, itype, step));
3615 else
3616 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3617 t = fold_convert (itype, t);
3618 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3620 q = create_tmp_reg (itype, "q");
3621 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3622 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3623 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3625 tt = create_tmp_reg (itype, "tt");
3626 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3627 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3628 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3630 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3631 gcond *cond_stmt = gimple_build_cond_empty (t);
3632 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3634 second_bb = split_block (entry_bb, cond_stmt)->dest;
3635 gsi = gsi_last_nondebug_bb (second_bb);
3636 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3638 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3639 GSI_SAME_STMT);
3640 gassign *assign_stmt
3641 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3642 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3644 third_bb = split_block (second_bb, assign_stmt)->dest;
3645 gsi = gsi_last_nondebug_bb (third_bb);
3646 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3648 t = build2 (MULT_EXPR, itype, q, threadid);
3649 t = build2 (PLUS_EXPR, itype, t, tt);
3650 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3652 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3653 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3655 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3656 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3658 /* Remove the GIMPLE_OMP_FOR statement. */
3659 gsi_remove (&gsi, true);
3661 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3662 gsi = gsi_start_bb (seq_start_bb);
3664 tree startvar = fd->loop.v;
3665 tree endvar = NULL_TREE;
3667 if (gimple_omp_for_combined_p (fd->for_stmt))
3669 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3670 ? gimple_omp_parallel_clauses (inner_stmt)
3671 : gimple_omp_for_clauses (inner_stmt);
3672 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3673 gcc_assert (innerc);
3674 startvar = OMP_CLAUSE_DECL (innerc);
3675 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3676 OMP_CLAUSE__LOOPTEMP_);
3677 gcc_assert (innerc);
3678 endvar = OMP_CLAUSE_DECL (innerc);
3679 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3680 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3682 int i;
3683 for (i = 1; i < fd->collapse; i++)
3685 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3686 OMP_CLAUSE__LOOPTEMP_);
3687 gcc_assert (innerc);
3689 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3690 OMP_CLAUSE__LOOPTEMP_);
3691 if (innerc)
3693 /* If needed (distribute parallel for with lastprivate),
3694 propagate down the total number of iterations. */
3695 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3696 fd->loop.n2);
3697 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3698 GSI_CONTINUE_LINKING);
3699 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3700 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3704 t = fold_convert (itype, s0);
3705 t = fold_build2 (MULT_EXPR, itype, t, step);
3706 if (POINTER_TYPE_P (type))
3708 t = fold_build_pointer_plus (n1, t);
3709 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3710 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3711 t = fold_convert (signed_type_for (type), t);
3713 else
3714 t = fold_build2 (PLUS_EXPR, type, t, n1);
3715 t = fold_convert (TREE_TYPE (startvar), t);
3716 t = force_gimple_operand_gsi (&gsi, t,
3717 DECL_P (startvar)
3718 && TREE_ADDRESSABLE (startvar),
3719 NULL_TREE, false, GSI_CONTINUE_LINKING);
3720 assign_stmt = gimple_build_assign (startvar, t);
3721 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3723 t = fold_convert (itype, e0);
3724 t = fold_build2 (MULT_EXPR, itype, t, step);
3725 if (POINTER_TYPE_P (type))
3727 t = fold_build_pointer_plus (n1, t);
3728 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3729 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3730 t = fold_convert (signed_type_for (type), t);
3732 else
3733 t = fold_build2 (PLUS_EXPR, type, t, n1);
3734 t = fold_convert (TREE_TYPE (startvar), t);
3735 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3736 false, GSI_CONTINUE_LINKING);
3737 if (endvar)
3739 assign_stmt = gimple_build_assign (endvar, e);
3740 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3741 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3742 assign_stmt = gimple_build_assign (fd->loop.v, e);
3743 else
3744 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3745 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3747 /* Handle linear clause adjustments. */
3748 tree itercnt = NULL_TREE;
3749 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3750 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3751 c; c = OMP_CLAUSE_CHAIN (c))
3752 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3753 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3755 tree d = OMP_CLAUSE_DECL (c);
3756 bool is_ref = omp_is_reference (d);
3757 tree t = d, a, dest;
3758 if (is_ref)
3759 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3760 if (itercnt == NULL_TREE)
3762 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3764 itercnt = fold_build2 (MINUS_EXPR, itype,
3765 fold_convert (itype, n1),
3766 fold_convert (itype, fd->loop.n1));
3767 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3768 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3769 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3770 NULL_TREE, false,
3771 GSI_CONTINUE_LINKING);
3773 else
3774 itercnt = s0;
3776 tree type = TREE_TYPE (t);
3777 if (POINTER_TYPE_P (type))
3778 type = sizetype;
3779 a = fold_build2 (MULT_EXPR, type,
3780 fold_convert (type, itercnt),
3781 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3782 dest = unshare_expr (t);
3783 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3784 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3785 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3786 false, GSI_CONTINUE_LINKING);
3787 assign_stmt = gimple_build_assign (dest, t);
3788 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3790 if (fd->collapse > 1)
3791 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3793 if (!broken_loop)
3795 /* The code controlling the sequential loop replaces the
3796 GIMPLE_OMP_CONTINUE. */
3797 gsi = gsi_last_nondebug_bb (cont_bb);
3798 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3799 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3800 vmain = gimple_omp_continue_control_use (cont_stmt);
3801 vback = gimple_omp_continue_control_def (cont_stmt);
3803 if (!gimple_omp_for_combined_p (fd->for_stmt))
3805 if (POINTER_TYPE_P (type))
3806 t = fold_build_pointer_plus (vmain, step);
3807 else
3808 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3809 t = force_gimple_operand_gsi (&gsi, t,
3810 DECL_P (vback)
3811 && TREE_ADDRESSABLE (vback),
3812 NULL_TREE, true, GSI_SAME_STMT);
3813 assign_stmt = gimple_build_assign (vback, t);
3814 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3816 t = build2 (fd->loop.cond_code, boolean_type_node,
3817 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3818 ? t : vback, e);
3819 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3822 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3823 gsi_remove (&gsi, true);
3825 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3826 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3829 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3830 gsi = gsi_last_nondebug_bb (exit_bb);
3831 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3833 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3834 if (fd->have_reductemp)
3836 tree fn;
3837 if (t)
3838 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3839 else
3840 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3841 gcall *g = gimple_build_call (fn, 0);
3842 if (t)
3844 gimple_call_set_lhs (g, t);
3845 gsi_insert_after (&gsi, gimple_build_assign (reductions,
3846 NOP_EXPR, t),
3847 GSI_SAME_STMT);
3849 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3851 else
3852 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3854 gsi_remove (&gsi, true);
3856 /* Connect all the blocks. */
3857 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3858 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3859 ep = find_edge (entry_bb, second_bb);
3860 ep->flags = EDGE_TRUE_VALUE;
3861 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3862 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3863 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3865 if (!broken_loop)
3867 ep = find_edge (cont_bb, body_bb);
3868 if (ep == NULL)
3870 ep = BRANCH_EDGE (cont_bb);
3871 gcc_assert (single_succ (ep->dest) == body_bb);
3873 if (gimple_omp_for_combined_p (fd->for_stmt))
3875 remove_edge (ep);
3876 ep = NULL;
3878 else if (fd->collapse > 1)
3880 remove_edge (ep);
3881 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3883 else
3884 ep->flags = EDGE_TRUE_VALUE;
3885 find_edge (cont_bb, fin_bb)->flags
3886 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3889 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3890 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3891 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3893 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3894 recompute_dominator (CDI_DOMINATORS, body_bb));
3895 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3896 recompute_dominator (CDI_DOMINATORS, fin_bb));
3898 struct loop *loop = body_bb->loop_father;
3899 if (loop != entry_bb->loop_father)
3901 gcc_assert (broken_loop || loop->header == body_bb);
3902 gcc_assert (broken_loop
3903 || loop->latch == region->cont
3904 || single_pred (loop->latch) == region->cont);
3905 return;
3908 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3910 loop = alloc_loop ();
3911 loop->header = body_bb;
3912 if (collapse_bb == NULL)
3913 loop->latch = cont_bb;
3914 add_loop (loop, body_bb->loop_father);
3918 /* Return phi in E->DEST with ARG on edge E. */
3920 static gphi *
3921 find_phi_with_arg_on_edge (tree arg, edge e)
3923 basic_block bb = e->dest;
3925 for (gphi_iterator gpi = gsi_start_phis (bb);
3926 !gsi_end_p (gpi);
3927 gsi_next (&gpi))
3929 gphi *phi = gpi.phi ();
3930 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3931 return phi;
3934 return NULL;
3937 /* A subroutine of expand_omp_for. Generate code for a parallel
3938 loop with static schedule and a specified chunk size. Given
3939 parameters:
3941 for (V = N1; V cond N2; V += STEP) BODY;
3943 where COND is "<" or ">", we generate pseudocode
3945 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3946 if (cond is <)
3947 adj = STEP - 1;
3948 else
3949 adj = STEP + 1;
3950 if ((__typeof (V)) -1 > 0 && cond is >)
3951 n = -(adj + N2 - N1) / -STEP;
3952 else
3953 n = (adj + N2 - N1) / STEP;
3954 trip = 0;
3955 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3956 here so that V is defined
3957 if the loop is not entered
3959 s0 = (trip * nthreads + threadid) * CHUNK;
3960 e0 = min (s0 + CHUNK, n);
3961 if (s0 < n) goto L1; else goto L4;
3963 V = s0 * STEP + N1;
3964 e = e0 * STEP + N1;
3966 BODY;
3967 V += STEP;
3968 if (V cond e) goto L2; else goto L3;
3970 trip += 1;
3971 goto L0;
3975 static void
3976 expand_omp_for_static_chunk (struct omp_region *region,
3977 struct omp_for_data *fd, gimple *inner_stmt)
3979 tree n, s0, e0, e, t;
3980 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3981 tree type, itype, vmain, vback, vextra;
3982 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3983 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3984 gimple_stmt_iterator gsi;
3985 edge se;
3986 bool broken_loop = region->cont == NULL;
3987 tree *counts = NULL;
3988 tree n1, n2, step;
3989 tree reductions = NULL_TREE;
3991 itype = type = TREE_TYPE (fd->loop.v);
3992 if (POINTER_TYPE_P (type))
3993 itype = signed_type_for (type);
3995 entry_bb = region->entry;
3996 se = split_block (entry_bb, last_stmt (entry_bb));
3997 entry_bb = se->src;
3998 iter_part_bb = se->dest;
3999 cont_bb = region->cont;
4000 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4001 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4002 gcc_assert (broken_loop
4003 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4004 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4005 body_bb = single_succ (seq_start_bb);
4006 if (!broken_loop)
4008 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4009 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4010 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4011 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4013 exit_bb = region->exit;
4015 /* Trip and adjustment setup goes in ENTRY_BB. */
4016 gsi = gsi_last_nondebug_bb (entry_bb);
4017 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4019 if (fd->collapse > 1)
4021 int first_zero_iter = -1, dummy = -1;
4022 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4024 counts = XALLOCAVEC (tree, fd->collapse);
4025 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4026 fin_bb, first_zero_iter,
4027 dummy_bb, dummy, l2_dom_bb);
4028 t = NULL_TREE;
4030 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4031 t = integer_one_node;
4032 else
4033 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4034 fold_convert (type, fd->loop.n1),
4035 fold_convert (type, fd->loop.n2));
4036 if (fd->collapse == 1
4037 && TYPE_UNSIGNED (type)
4038 && (t == NULL_TREE || !integer_onep (t)))
4040 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4041 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4042 true, GSI_SAME_STMT);
4043 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4044 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4045 true, GSI_SAME_STMT);
4046 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4047 NULL_TREE, NULL_TREE);
4048 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4049 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4050 expand_omp_regimplify_p, NULL, NULL)
4051 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4052 expand_omp_regimplify_p, NULL, NULL))
4054 gsi = gsi_for_stmt (cond_stmt);
4055 gimple_regimplify_operands (cond_stmt, &gsi);
4057 se = split_block (entry_bb, cond_stmt);
4058 se->flags = EDGE_TRUE_VALUE;
4059 entry_bb = se->dest;
4060 se->probability = profile_probability::very_likely ();
4061 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4062 se->probability = profile_probability::very_unlikely ();
4063 if (gimple_in_ssa_p (cfun))
4065 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4066 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4067 !gsi_end_p (gpi); gsi_next (&gpi))
4069 gphi *phi = gpi.phi ();
4070 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4071 se, UNKNOWN_LOCATION);
4074 gsi = gsi_last_bb (entry_bb);
4077 if (fd->have_reductemp)
4079 tree t1 = build_int_cst (long_integer_type_node, 0);
4080 tree t2 = build_int_cst (long_integer_type_node, 1);
4081 tree t3 = build_int_cstu (long_integer_type_node,
4082 (HOST_WIDE_INT_1U << 31) + 1);
4083 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4084 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4085 reductions = OMP_CLAUSE_DECL (clauses);
4086 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4087 gimple *g = SSA_NAME_DEF_STMT (reductions);
4088 reductions = gimple_assign_rhs1 (g);
4089 OMP_CLAUSE_DECL (clauses) = reductions;
4090 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4091 tree t
4092 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4093 9, t1, t2, t2, t3, t1, null_pointer_node,
4094 null_pointer_node, reductions, null_pointer_node);
4095 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4096 true, GSI_SAME_STMT);
4097 gsi_remove (&gsi2, true);
4098 release_ssa_name (gimple_assign_lhs (g));
4100 switch (gimple_omp_for_kind (fd->for_stmt))
4102 case GF_OMP_FOR_KIND_FOR:
4103 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4104 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4105 break;
4106 case GF_OMP_FOR_KIND_DISTRIBUTE:
4107 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4108 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4109 break;
4110 default:
4111 gcc_unreachable ();
4113 nthreads = build_call_expr (nthreads, 0);
4114 nthreads = fold_convert (itype, nthreads);
4115 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4116 true, GSI_SAME_STMT);
4117 threadid = build_call_expr (threadid, 0);
4118 threadid = fold_convert (itype, threadid);
4119 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4120 true, GSI_SAME_STMT);
4122 n1 = fd->loop.n1;
4123 n2 = fd->loop.n2;
4124 step = fd->loop.step;
4125 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4127 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4128 OMP_CLAUSE__LOOPTEMP_);
4129 gcc_assert (innerc);
4130 n1 = OMP_CLAUSE_DECL (innerc);
4131 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4132 OMP_CLAUSE__LOOPTEMP_);
4133 gcc_assert (innerc);
4134 n2 = OMP_CLAUSE_DECL (innerc);
4136 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4137 true, NULL_TREE, true, GSI_SAME_STMT);
4138 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4139 true, NULL_TREE, true, GSI_SAME_STMT);
4140 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4141 true, NULL_TREE, true, GSI_SAME_STMT);
4142 tree chunk_size = fold_convert (itype, fd->chunk_size);
4143 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4144 chunk_size
4145 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4146 GSI_SAME_STMT);
4148 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4149 t = fold_build2 (PLUS_EXPR, itype, step, t);
4150 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4151 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4152 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4153 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4154 fold_build1 (NEGATE_EXPR, itype, t),
4155 fold_build1 (NEGATE_EXPR, itype, step));
4156 else
4157 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4158 t = fold_convert (itype, t);
4159 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4160 true, GSI_SAME_STMT);
4162 trip_var = create_tmp_reg (itype, ".trip");
4163 if (gimple_in_ssa_p (cfun))
4165 trip_init = make_ssa_name (trip_var);
4166 trip_main = make_ssa_name (trip_var);
4167 trip_back = make_ssa_name (trip_var);
4169 else
4171 trip_init = trip_var;
4172 trip_main = trip_var;
4173 trip_back = trip_var;
4176 gassign *assign_stmt
4177 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4178 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4180 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4181 t = fold_build2 (MULT_EXPR, itype, t, step);
4182 if (POINTER_TYPE_P (type))
4183 t = fold_build_pointer_plus (n1, t);
4184 else
4185 t = fold_build2 (PLUS_EXPR, type, t, n1);
4186 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4187 true, GSI_SAME_STMT);
4189 /* Remove the GIMPLE_OMP_FOR. */
4190 gsi_remove (&gsi, true);
4192 gimple_stmt_iterator gsif = gsi;
4194 /* Iteration space partitioning goes in ITER_PART_BB. */
4195 gsi = gsi_last_bb (iter_part_bb);
4197 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4198 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4199 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4200 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4201 false, GSI_CONTINUE_LINKING);
4203 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4204 t = fold_build2 (MIN_EXPR, itype, t, n);
4205 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4206 false, GSI_CONTINUE_LINKING);
4208 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4209 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4211 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4212 gsi = gsi_start_bb (seq_start_bb);
4214 tree startvar = fd->loop.v;
4215 tree endvar = NULL_TREE;
4217 if (gimple_omp_for_combined_p (fd->for_stmt))
4219 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4220 ? gimple_omp_parallel_clauses (inner_stmt)
4221 : gimple_omp_for_clauses (inner_stmt);
4222 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4223 gcc_assert (innerc);
4224 startvar = OMP_CLAUSE_DECL (innerc);
4225 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4226 OMP_CLAUSE__LOOPTEMP_);
4227 gcc_assert (innerc);
4228 endvar = OMP_CLAUSE_DECL (innerc);
4229 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4230 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4232 int i;
4233 for (i = 1; i < fd->collapse; i++)
4235 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4236 OMP_CLAUSE__LOOPTEMP_);
4237 gcc_assert (innerc);
4239 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4240 OMP_CLAUSE__LOOPTEMP_);
4241 if (innerc)
4243 /* If needed (distribute parallel for with lastprivate),
4244 propagate down the total number of iterations. */
4245 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4246 fd->loop.n2);
4247 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4248 GSI_CONTINUE_LINKING);
4249 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4250 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4255 t = fold_convert (itype, s0);
4256 t = fold_build2 (MULT_EXPR, itype, t, step);
4257 if (POINTER_TYPE_P (type))
4259 t = fold_build_pointer_plus (n1, t);
4260 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4261 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4262 t = fold_convert (signed_type_for (type), t);
4264 else
4265 t = fold_build2 (PLUS_EXPR, type, t, n1);
4266 t = fold_convert (TREE_TYPE (startvar), t);
4267 t = force_gimple_operand_gsi (&gsi, t,
4268 DECL_P (startvar)
4269 && TREE_ADDRESSABLE (startvar),
4270 NULL_TREE, false, GSI_CONTINUE_LINKING);
4271 assign_stmt = gimple_build_assign (startvar, t);
4272 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4274 t = fold_convert (itype, e0);
4275 t = fold_build2 (MULT_EXPR, itype, t, step);
4276 if (POINTER_TYPE_P (type))
4278 t = fold_build_pointer_plus (n1, t);
4279 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4280 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4281 t = fold_convert (signed_type_for (type), t);
4283 else
4284 t = fold_build2 (PLUS_EXPR, type, t, n1);
4285 t = fold_convert (TREE_TYPE (startvar), t);
4286 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4287 false, GSI_CONTINUE_LINKING);
4288 if (endvar)
4290 assign_stmt = gimple_build_assign (endvar, e);
4291 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4292 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4293 assign_stmt = gimple_build_assign (fd->loop.v, e);
4294 else
4295 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4296 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4298 /* Handle linear clause adjustments. */
4299 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4300 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4301 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4302 c; c = OMP_CLAUSE_CHAIN (c))
4303 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4304 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4306 tree d = OMP_CLAUSE_DECL (c);
4307 bool is_ref = omp_is_reference (d);
4308 tree t = d, a, dest;
4309 if (is_ref)
4310 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4311 tree type = TREE_TYPE (t);
4312 if (POINTER_TYPE_P (type))
4313 type = sizetype;
4314 dest = unshare_expr (t);
4315 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4316 expand_omp_build_assign (&gsif, v, t);
4317 if (itercnt == NULL_TREE)
4319 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4321 itercntbias
4322 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4323 fold_convert (itype, fd->loop.n1));
4324 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4325 itercntbias, step);
4326 itercntbias
4327 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4328 NULL_TREE, true,
4329 GSI_SAME_STMT);
4330 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4331 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4332 NULL_TREE, false,
4333 GSI_CONTINUE_LINKING);
4335 else
4336 itercnt = s0;
4338 a = fold_build2 (MULT_EXPR, type,
4339 fold_convert (type, itercnt),
4340 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4341 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4342 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4343 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4344 false, GSI_CONTINUE_LINKING);
4345 assign_stmt = gimple_build_assign (dest, t);
4346 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4348 if (fd->collapse > 1)
4349 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4351 if (!broken_loop)
4353 /* The code controlling the sequential loop goes in CONT_BB,
4354 replacing the GIMPLE_OMP_CONTINUE. */
4355 gsi = gsi_last_nondebug_bb (cont_bb);
4356 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4357 vmain = gimple_omp_continue_control_use (cont_stmt);
4358 vback = gimple_omp_continue_control_def (cont_stmt);
4360 if (!gimple_omp_for_combined_p (fd->for_stmt))
4362 if (POINTER_TYPE_P (type))
4363 t = fold_build_pointer_plus (vmain, step);
4364 else
4365 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4366 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4367 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4368 true, GSI_SAME_STMT);
4369 assign_stmt = gimple_build_assign (vback, t);
4370 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4372 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4373 t = build2 (EQ_EXPR, boolean_type_node,
4374 build_int_cst (itype, 0),
4375 build_int_cst (itype, 1));
4376 else
4377 t = build2 (fd->loop.cond_code, boolean_type_node,
4378 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4379 ? t : vback, e);
4380 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4383 /* Remove GIMPLE_OMP_CONTINUE. */
4384 gsi_remove (&gsi, true);
4386 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4387 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4389 /* Trip update code goes into TRIP_UPDATE_BB. */
4390 gsi = gsi_start_bb (trip_update_bb);
4392 t = build_int_cst (itype, 1);
4393 t = build2 (PLUS_EXPR, itype, trip_main, t);
4394 assign_stmt = gimple_build_assign (trip_back, t);
4395 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4398 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4399 gsi = gsi_last_nondebug_bb (exit_bb);
4400 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4402 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4403 if (fd->have_reductemp)
4405 tree fn;
4406 if (t)
4407 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4408 else
4409 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4410 gcall *g = gimple_build_call (fn, 0);
4411 if (t)
4413 gimple_call_set_lhs (g, t);
4414 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4415 NOP_EXPR, t),
4416 GSI_SAME_STMT);
4418 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4420 else
4421 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4423 gsi_remove (&gsi, true);
4425 /* Connect the new blocks. */
4426 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4427 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4429 if (!broken_loop)
4431 se = find_edge (cont_bb, body_bb);
4432 if (se == NULL)
4434 se = BRANCH_EDGE (cont_bb);
4435 gcc_assert (single_succ (se->dest) == body_bb);
4437 if (gimple_omp_for_combined_p (fd->for_stmt))
4439 remove_edge (se);
4440 se = NULL;
4442 else if (fd->collapse > 1)
4444 remove_edge (se);
4445 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4447 else
4448 se->flags = EDGE_TRUE_VALUE;
4449 find_edge (cont_bb, trip_update_bb)->flags
4450 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4452 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4453 iter_part_bb);
4456 if (gimple_in_ssa_p (cfun))
4458 gphi_iterator psi;
4459 gphi *phi;
4460 edge re, ene;
4461 edge_var_map *vm;
4462 size_t i;
4464 gcc_assert (fd->collapse == 1 && !broken_loop);
4466 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4467 remove arguments of the phi nodes in fin_bb. We need to create
4468 appropriate phi nodes in iter_part_bb instead. */
4469 se = find_edge (iter_part_bb, fin_bb);
4470 re = single_succ_edge (trip_update_bb);
4471 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4472 ene = single_succ_edge (entry_bb);
4474 psi = gsi_start_phis (fin_bb);
4475 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4476 gsi_next (&psi), ++i)
4478 gphi *nphi;
4479 location_t locus;
4481 phi = psi.phi ();
4482 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4483 redirect_edge_var_map_def (vm), 0))
4484 continue;
4486 t = gimple_phi_result (phi);
4487 gcc_assert (t == redirect_edge_var_map_result (vm));
4489 if (!single_pred_p (fin_bb))
4490 t = copy_ssa_name (t, phi);
4492 nphi = create_phi_node (t, iter_part_bb);
4494 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4495 locus = gimple_phi_arg_location_from_edge (phi, se);
4497 /* A special case -- fd->loop.v is not yet computed in
4498 iter_part_bb, we need to use vextra instead. */
4499 if (t == fd->loop.v)
4500 t = vextra;
4501 add_phi_arg (nphi, t, ene, locus);
4502 locus = redirect_edge_var_map_location (vm);
4503 tree back_arg = redirect_edge_var_map_def (vm);
4504 add_phi_arg (nphi, back_arg, re, locus);
4505 edge ce = find_edge (cont_bb, body_bb);
4506 if (ce == NULL)
4508 ce = BRANCH_EDGE (cont_bb);
4509 gcc_assert (single_succ (ce->dest) == body_bb);
4510 ce = single_succ_edge (ce->dest);
4512 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4513 gcc_assert (inner_loop_phi != NULL);
4514 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4515 find_edge (seq_start_bb, body_bb), locus);
4517 if (!single_pred_p (fin_bb))
4518 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4520 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4521 redirect_edge_var_map_clear (re);
4522 if (single_pred_p (fin_bb))
4523 while (1)
4525 psi = gsi_start_phis (fin_bb);
4526 if (gsi_end_p (psi))
4527 break;
4528 remove_phi_node (&psi, false);
4531 /* Make phi node for trip. */
4532 phi = create_phi_node (trip_main, iter_part_bb);
4533 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4534 UNKNOWN_LOCATION);
4535 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4536 UNKNOWN_LOCATION);
4539 if (!broken_loop)
4540 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4541 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4542 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4543 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4544 recompute_dominator (CDI_DOMINATORS, fin_bb));
4545 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4546 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4547 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4548 recompute_dominator (CDI_DOMINATORS, body_bb));
4550 if (!broken_loop)
4552 struct loop *loop = body_bb->loop_father;
4553 struct loop *trip_loop = alloc_loop ();
4554 trip_loop->header = iter_part_bb;
4555 trip_loop->latch = trip_update_bb;
4556 add_loop (trip_loop, iter_part_bb->loop_father);
4558 if (loop != entry_bb->loop_father)
4560 gcc_assert (loop->header == body_bb);
4561 gcc_assert (loop->latch == region->cont
4562 || single_pred (loop->latch) == region->cont);
4563 trip_loop->inner = loop;
4564 return;
4567 if (!gimple_omp_for_combined_p (fd->for_stmt))
4569 loop = alloc_loop ();
4570 loop->header = body_bb;
4571 if (collapse_bb == NULL)
4572 loop->latch = cont_bb;
4573 add_loop (loop, trip_loop);
4578 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4579 loop. Given parameters:
4581 for (V = N1; V cond N2; V += STEP) BODY;
4583 where COND is "<" or ">", we generate pseudocode
4585 V = N1;
4586 goto L1;
4588 BODY;
4589 V += STEP;
4591 if (V cond N2) goto L0; else goto L2;
4594 For collapsed loops, given parameters:
4595 collapse(3)
4596 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4597 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4598 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4599 BODY;
4601 we generate pseudocode
4603 if (cond3 is <)
4604 adj = STEP3 - 1;
4605 else
4606 adj = STEP3 + 1;
4607 count3 = (adj + N32 - N31) / STEP3;
4608 if (cond2 is <)
4609 adj = STEP2 - 1;
4610 else
4611 adj = STEP2 + 1;
4612 count2 = (adj + N22 - N21) / STEP2;
4613 if (cond1 is <)
4614 adj = STEP1 - 1;
4615 else
4616 adj = STEP1 + 1;
4617 count1 = (adj + N12 - N11) / STEP1;
4618 count = count1 * count2 * count3;
4619 V = 0;
4620 V1 = N11;
4621 V2 = N21;
4622 V3 = N31;
4623 goto L1;
4625 BODY;
4626 V += 1;
4627 V3 += STEP3;
4628 V2 += (V3 cond3 N32) ? 0 : STEP2;
4629 V3 = (V3 cond3 N32) ? V3 : N31;
4630 V1 += (V2 cond2 N22) ? 0 : STEP1;
4631 V2 = (V2 cond2 N22) ? V2 : N21;
4633 if (V < count) goto L0; else goto L2;
4638 static void
4639 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4641 tree type, t;
4642 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4643 gimple_stmt_iterator gsi;
4644 gimple *stmt;
4645 gcond *cond_stmt;
4646 bool broken_loop = region->cont == NULL;
4647 edge e, ne;
4648 tree *counts = NULL;
4649 int i;
4650 int safelen_int = INT_MAX;
4651 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4652 OMP_CLAUSE_SAFELEN);
4653 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4654 OMP_CLAUSE__SIMDUID_);
4655 tree n1, n2;
4657 if (safelen)
4659 poly_uint64 val;
4660 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4661 if (!poly_int_tree_p (safelen, &val))
4662 safelen_int = 0;
4663 else
4664 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4665 if (safelen_int == 1)
4666 safelen_int = 0;
4668 type = TREE_TYPE (fd->loop.v);
4669 entry_bb = region->entry;
4670 cont_bb = region->cont;
4671 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4672 gcc_assert (broken_loop
4673 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4674 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4675 if (!broken_loop)
4677 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4678 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4679 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4680 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4682 else
4684 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4685 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4686 l2_bb = single_succ (l1_bb);
4688 exit_bb = region->exit;
4689 l2_dom_bb = NULL;
4691 gsi = gsi_last_nondebug_bb (entry_bb);
4693 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4694 /* Not needed in SSA form right now. */
4695 gcc_assert (!gimple_in_ssa_p (cfun));
4696 if (fd->collapse > 1)
4698 int first_zero_iter = -1, dummy = -1;
4699 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4701 counts = XALLOCAVEC (tree, fd->collapse);
4702 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4703 zero_iter_bb, first_zero_iter,
4704 dummy_bb, dummy, l2_dom_bb);
4706 if (l2_dom_bb == NULL)
4707 l2_dom_bb = l1_bb;
4709 n1 = fd->loop.n1;
4710 n2 = fd->loop.n2;
4711 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4713 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4714 OMP_CLAUSE__LOOPTEMP_);
4715 gcc_assert (innerc);
4716 n1 = OMP_CLAUSE_DECL (innerc);
4717 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4718 OMP_CLAUSE__LOOPTEMP_);
4719 gcc_assert (innerc);
4720 n2 = OMP_CLAUSE_DECL (innerc);
4722 tree step = fd->loop.step;
4724 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4725 OMP_CLAUSE__SIMT_);
4726 if (is_simt)
4728 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4729 is_simt = safelen_int > 1;
4731 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4732 if (is_simt)
4734 simt_lane = create_tmp_var (unsigned_type_node);
4735 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4736 gimple_call_set_lhs (g, simt_lane);
4737 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4738 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4739 fold_convert (TREE_TYPE (step), simt_lane));
4740 n1 = fold_convert (type, n1);
4741 if (POINTER_TYPE_P (type))
4742 n1 = fold_build_pointer_plus (n1, offset);
4743 else
4744 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4746 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4747 if (fd->collapse > 1)
4748 simt_maxlane = build_one_cst (unsigned_type_node);
4749 else if (safelen_int < omp_max_simt_vf ())
4750 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4751 tree vf
4752 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4753 unsigned_type_node, 0);
4754 if (simt_maxlane)
4755 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4756 vf = fold_convert (TREE_TYPE (step), vf);
4757 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4760 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4761 if (fd->collapse > 1)
4763 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4765 gsi_prev (&gsi);
4766 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4767 gsi_next (&gsi);
4769 else
4770 for (i = 0; i < fd->collapse; i++)
4772 tree itype = TREE_TYPE (fd->loops[i].v);
4773 if (POINTER_TYPE_P (itype))
4774 itype = signed_type_for (itype);
4775 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4776 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4780 /* Remove the GIMPLE_OMP_FOR statement. */
4781 gsi_remove (&gsi, true);
4783 if (!broken_loop)
4785 /* Code to control the increment goes in the CONT_BB. */
4786 gsi = gsi_last_nondebug_bb (cont_bb);
4787 stmt = gsi_stmt (gsi);
4788 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4790 if (POINTER_TYPE_P (type))
4791 t = fold_build_pointer_plus (fd->loop.v, step);
4792 else
4793 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4794 expand_omp_build_assign (&gsi, fd->loop.v, t);
4796 if (fd->collapse > 1)
4798 i = fd->collapse - 1;
4799 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4801 t = fold_convert (sizetype, fd->loops[i].step);
4802 t = fold_build_pointer_plus (fd->loops[i].v, t);
4804 else
4806 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4807 fd->loops[i].step);
4808 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4809 fd->loops[i].v, t);
4811 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4813 for (i = fd->collapse - 1; i > 0; i--)
4815 tree itype = TREE_TYPE (fd->loops[i].v);
4816 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4817 if (POINTER_TYPE_P (itype2))
4818 itype2 = signed_type_for (itype2);
4819 t = fold_convert (itype2, fd->loops[i - 1].step);
4820 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4821 GSI_SAME_STMT);
4822 t = build3 (COND_EXPR, itype2,
4823 build2 (fd->loops[i].cond_code, boolean_type_node,
4824 fd->loops[i].v,
4825 fold_convert (itype, fd->loops[i].n2)),
4826 build_int_cst (itype2, 0), t);
4827 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4828 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4829 else
4830 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4831 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4833 t = fold_convert (itype, fd->loops[i].n1);
4834 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4835 GSI_SAME_STMT);
4836 t = build3 (COND_EXPR, itype,
4837 build2 (fd->loops[i].cond_code, boolean_type_node,
4838 fd->loops[i].v,
4839 fold_convert (itype, fd->loops[i].n2)),
4840 fd->loops[i].v, t);
4841 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4845 /* Remove GIMPLE_OMP_CONTINUE. */
4846 gsi_remove (&gsi, true);
4849 /* Emit the condition in L1_BB. */
4850 gsi = gsi_start_bb (l1_bb);
4852 t = fold_convert (type, n2);
4853 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4854 false, GSI_CONTINUE_LINKING);
4855 tree v = fd->loop.v;
4856 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4857 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4858 false, GSI_CONTINUE_LINKING);
4859 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4860 cond_stmt = gimple_build_cond_empty (t);
4861 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4862 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4863 NULL, NULL)
4864 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4865 NULL, NULL))
4867 gsi = gsi_for_stmt (cond_stmt);
4868 gimple_regimplify_operands (cond_stmt, &gsi);
4871 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4872 if (is_simt)
4874 gsi = gsi_start_bb (l2_bb);
4875 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4876 if (POINTER_TYPE_P (type))
4877 t = fold_build_pointer_plus (fd->loop.v, step);
4878 else
4879 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4880 expand_omp_build_assign (&gsi, fd->loop.v, t);
4883 /* Remove GIMPLE_OMP_RETURN. */
4884 gsi = gsi_last_nondebug_bb (exit_bb);
4885 gsi_remove (&gsi, true);
4887 /* Connect the new blocks. */
4888 remove_edge (FALLTHRU_EDGE (entry_bb));
4890 if (!broken_loop)
4892 remove_edge (BRANCH_EDGE (entry_bb));
4893 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4895 e = BRANCH_EDGE (l1_bb);
4896 ne = FALLTHRU_EDGE (l1_bb);
4897 e->flags = EDGE_TRUE_VALUE;
4899 else
4901 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4903 ne = single_succ_edge (l1_bb);
4904 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4907 ne->flags = EDGE_FALSE_VALUE;
4908 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4909 ne->probability = e->probability.invert ();
4911 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4912 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4914 if (simt_maxlane)
4916 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4917 NULL_TREE, NULL_TREE);
4918 gsi = gsi_last_bb (entry_bb);
4919 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4920 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4921 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4922 FALLTHRU_EDGE (entry_bb)->probability
4923 = profile_probability::guessed_always ().apply_scale (7, 8);
4924 BRANCH_EDGE (entry_bb)->probability
4925 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4926 l2_dom_bb = entry_bb;
4928 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4930 if (!broken_loop)
4932 struct loop *loop = alloc_loop ();
4933 loop->header = l1_bb;
4934 loop->latch = cont_bb;
4935 add_loop (loop, l1_bb->loop_father);
4936 loop->safelen = safelen_int;
4937 if (simduid)
4939 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4940 cfun->has_simduid_loops = true;
4942 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4943 the loop. */
4944 if ((flag_tree_loop_vectorize
4945 || !global_options_set.x_flag_tree_loop_vectorize)
4946 && flag_tree_loop_optimize
4947 && loop->safelen > 1)
4949 loop->force_vectorize = true;
4950 cfun->has_force_vectorize_loops = true;
4953 else if (simduid)
4954 cfun->has_simduid_loops = true;
4957 /* Taskloop construct is represented after gimplification with
4958 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4959 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4960 which should just compute all the needed loop temporaries
4961 for GIMPLE_OMP_TASK. */
4963 static void
4964 expand_omp_taskloop_for_outer (struct omp_region *region,
4965 struct omp_for_data *fd,
4966 gimple *inner_stmt)
4968 tree type, bias = NULL_TREE;
4969 basic_block entry_bb, cont_bb, exit_bb;
4970 gimple_stmt_iterator gsi;
4971 gassign *assign_stmt;
4972 tree *counts = NULL;
4973 int i;
4975 gcc_assert (inner_stmt);
4976 gcc_assert (region->cont);
4977 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4978 && gimple_omp_task_taskloop_p (inner_stmt));
4979 type = TREE_TYPE (fd->loop.v);
4981 /* See if we need to bias by LLONG_MIN. */
4982 if (fd->iter_type == long_long_unsigned_type_node
4983 && TREE_CODE (type) == INTEGER_TYPE
4984 && !TYPE_UNSIGNED (type))
4986 tree n1, n2;
4988 if (fd->loop.cond_code == LT_EXPR)
4990 n1 = fd->loop.n1;
4991 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4993 else
4995 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4996 n2 = fd->loop.n1;
4998 if (TREE_CODE (n1) != INTEGER_CST
4999 || TREE_CODE (n2) != INTEGER_CST
5000 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5001 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5004 entry_bb = region->entry;
5005 cont_bb = region->cont;
5006 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5007 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5008 exit_bb = region->exit;
5010 gsi = gsi_last_nondebug_bb (entry_bb);
5011 gimple *for_stmt = gsi_stmt (gsi);
5012 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5013 if (fd->collapse > 1)
5015 int first_zero_iter = -1, dummy = -1;
5016 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5018 counts = XALLOCAVEC (tree, fd->collapse);
5019 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5020 zero_iter_bb, first_zero_iter,
5021 dummy_bb, dummy, l2_dom_bb);
5023 if (zero_iter_bb)
5025 /* Some counts[i] vars might be uninitialized if
5026 some loop has zero iterations. But the body shouldn't
5027 be executed in that case, so just avoid uninit warnings. */
5028 for (i = first_zero_iter; i < fd->collapse; i++)
5029 if (SSA_VAR_P (counts[i]))
5030 TREE_NO_WARNING (counts[i]) = 1;
5031 gsi_prev (&gsi);
5032 edge e = split_block (entry_bb, gsi_stmt (gsi));
5033 entry_bb = e->dest;
5034 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5035 gsi = gsi_last_bb (entry_bb);
5036 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5037 get_immediate_dominator (CDI_DOMINATORS,
5038 zero_iter_bb));
5042 tree t0, t1;
5043 t1 = fd->loop.n2;
5044 t0 = fd->loop.n1;
5045 if (POINTER_TYPE_P (TREE_TYPE (t0))
5046 && TYPE_PRECISION (TREE_TYPE (t0))
5047 != TYPE_PRECISION (fd->iter_type))
5049 /* Avoid casting pointers to integer of a different size. */
5050 tree itype = signed_type_for (type);
5051 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5052 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5054 else
5056 t1 = fold_convert (fd->iter_type, t1);
5057 t0 = fold_convert (fd->iter_type, t0);
5059 if (bias)
5061 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5062 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5065 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5066 OMP_CLAUSE__LOOPTEMP_);
5067 gcc_assert (innerc);
5068 tree startvar = OMP_CLAUSE_DECL (innerc);
5069 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5070 gcc_assert (innerc);
5071 tree endvar = OMP_CLAUSE_DECL (innerc);
5072 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5074 gcc_assert (innerc);
5075 for (i = 1; i < fd->collapse; i++)
5077 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5078 OMP_CLAUSE__LOOPTEMP_);
5079 gcc_assert (innerc);
5081 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5082 OMP_CLAUSE__LOOPTEMP_);
5083 if (innerc)
5085 /* If needed (inner taskloop has lastprivate clause), propagate
5086 down the total number of iterations. */
5087 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5088 NULL_TREE, false,
5089 GSI_CONTINUE_LINKING);
5090 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5091 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5095 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5096 GSI_CONTINUE_LINKING);
5097 assign_stmt = gimple_build_assign (startvar, t0);
5098 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5100 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5101 GSI_CONTINUE_LINKING);
5102 assign_stmt = gimple_build_assign (endvar, t1);
5103 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5104 if (fd->collapse > 1)
5105 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5107 /* Remove the GIMPLE_OMP_FOR statement. */
5108 gsi = gsi_for_stmt (for_stmt);
5109 gsi_remove (&gsi, true);
5111 gsi = gsi_last_nondebug_bb (cont_bb);
5112 gsi_remove (&gsi, true);
5114 gsi = gsi_last_nondebug_bb (exit_bb);
5115 gsi_remove (&gsi, true);
5117 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5118 remove_edge (BRANCH_EDGE (entry_bb));
5119 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5120 remove_edge (BRANCH_EDGE (cont_bb));
5121 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5122 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5123 recompute_dominator (CDI_DOMINATORS, region->entry));
5126 /* Taskloop construct is represented after gimplification with
5127 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5128 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5129 GOMP_taskloop{,_ull} function arranges for each task to be given just
5130 a single range of iterations. */
5132 static void
5133 expand_omp_taskloop_for_inner (struct omp_region *region,
5134 struct omp_for_data *fd,
5135 gimple *inner_stmt)
5137 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5138 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5139 basic_block fin_bb;
5140 gimple_stmt_iterator gsi;
5141 edge ep;
5142 bool broken_loop = region->cont == NULL;
5143 tree *counts = NULL;
5144 tree n1, n2, step;
5146 itype = type = TREE_TYPE (fd->loop.v);
5147 if (POINTER_TYPE_P (type))
5148 itype = signed_type_for (type);
5150 /* See if we need to bias by LLONG_MIN. */
5151 if (fd->iter_type == long_long_unsigned_type_node
5152 && TREE_CODE (type) == INTEGER_TYPE
5153 && !TYPE_UNSIGNED (type))
5155 tree n1, n2;
5157 if (fd->loop.cond_code == LT_EXPR)
5159 n1 = fd->loop.n1;
5160 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5162 else
5164 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5165 n2 = fd->loop.n1;
5167 if (TREE_CODE (n1) != INTEGER_CST
5168 || TREE_CODE (n2) != INTEGER_CST
5169 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5170 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5173 entry_bb = region->entry;
5174 cont_bb = region->cont;
5175 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5176 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5177 gcc_assert (broken_loop
5178 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5179 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5180 if (!broken_loop)
5182 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5183 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5185 exit_bb = region->exit;
5187 /* Iteration space partitioning goes in ENTRY_BB. */
5188 gsi = gsi_last_nondebug_bb (entry_bb);
5189 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5191 if (fd->collapse > 1)
5193 int first_zero_iter = -1, dummy = -1;
5194 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5196 counts = XALLOCAVEC (tree, fd->collapse);
5197 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5198 fin_bb, first_zero_iter,
5199 dummy_bb, dummy, l2_dom_bb);
5200 t = NULL_TREE;
5202 else
5203 t = integer_one_node;
5205 step = fd->loop.step;
5206 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5207 OMP_CLAUSE__LOOPTEMP_);
5208 gcc_assert (innerc);
5209 n1 = OMP_CLAUSE_DECL (innerc);
5210 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5211 gcc_assert (innerc);
5212 n2 = OMP_CLAUSE_DECL (innerc);
5213 if (bias)
5215 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5216 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5218 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5219 true, NULL_TREE, true, GSI_SAME_STMT);
5220 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5221 true, NULL_TREE, true, GSI_SAME_STMT);
5222 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5223 true, NULL_TREE, true, GSI_SAME_STMT);
5225 tree startvar = fd->loop.v;
5226 tree endvar = NULL_TREE;
5228 if (gimple_omp_for_combined_p (fd->for_stmt))
5230 tree clauses = gimple_omp_for_clauses (inner_stmt);
5231 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5232 gcc_assert (innerc);
5233 startvar = OMP_CLAUSE_DECL (innerc);
5234 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5235 OMP_CLAUSE__LOOPTEMP_);
5236 gcc_assert (innerc);
5237 endvar = OMP_CLAUSE_DECL (innerc);
5239 t = fold_convert (TREE_TYPE (startvar), n1);
5240 t = force_gimple_operand_gsi (&gsi, t,
5241 DECL_P (startvar)
5242 && TREE_ADDRESSABLE (startvar),
5243 NULL_TREE, false, GSI_CONTINUE_LINKING);
5244 gimple *assign_stmt = gimple_build_assign (startvar, t);
5245 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5247 t = fold_convert (TREE_TYPE (startvar), n2);
5248 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5249 false, GSI_CONTINUE_LINKING);
5250 if (endvar)
5252 assign_stmt = gimple_build_assign (endvar, e);
5253 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5254 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5255 assign_stmt = gimple_build_assign (fd->loop.v, e);
5256 else
5257 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5258 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5260 if (fd->collapse > 1)
5261 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5263 if (!broken_loop)
5265 /* The code controlling the sequential loop replaces the
5266 GIMPLE_OMP_CONTINUE. */
5267 gsi = gsi_last_nondebug_bb (cont_bb);
5268 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5269 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5270 vmain = gimple_omp_continue_control_use (cont_stmt);
5271 vback = gimple_omp_continue_control_def (cont_stmt);
5273 if (!gimple_omp_for_combined_p (fd->for_stmt))
5275 if (POINTER_TYPE_P (type))
5276 t = fold_build_pointer_plus (vmain, step);
5277 else
5278 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5279 t = force_gimple_operand_gsi (&gsi, t,
5280 DECL_P (vback)
5281 && TREE_ADDRESSABLE (vback),
5282 NULL_TREE, true, GSI_SAME_STMT);
5283 assign_stmt = gimple_build_assign (vback, t);
5284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5286 t = build2 (fd->loop.cond_code, boolean_type_node,
5287 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5288 ? t : vback, e);
5289 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5292 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5293 gsi_remove (&gsi, true);
5295 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5296 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5299 /* Remove the GIMPLE_OMP_FOR statement. */
5300 gsi = gsi_for_stmt (fd->for_stmt);
5301 gsi_remove (&gsi, true);
5303 /* Remove the GIMPLE_OMP_RETURN statement. */
5304 gsi = gsi_last_nondebug_bb (exit_bb);
5305 gsi_remove (&gsi, true);
5307 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5308 if (!broken_loop)
5309 remove_edge (BRANCH_EDGE (entry_bb));
5310 else
5312 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5313 region->outer->cont = NULL;
5316 /* Connect all the blocks. */
5317 if (!broken_loop)
5319 ep = find_edge (cont_bb, body_bb);
5320 if (gimple_omp_for_combined_p (fd->for_stmt))
5322 remove_edge (ep);
5323 ep = NULL;
5325 else if (fd->collapse > 1)
5327 remove_edge (ep);
5328 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5330 else
5331 ep->flags = EDGE_TRUE_VALUE;
5332 find_edge (cont_bb, fin_bb)->flags
5333 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5336 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5337 recompute_dominator (CDI_DOMINATORS, body_bb));
5338 if (!broken_loop)
5339 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5340 recompute_dominator (CDI_DOMINATORS, fin_bb));
5342 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5344 struct loop *loop = alloc_loop ();
5345 loop->header = body_bb;
5346 if (collapse_bb == NULL)
5347 loop->latch = cont_bb;
5348 add_loop (loop, body_bb->loop_father);
5352 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5353 partitioned loop. The lowering here is abstracted, in that the
5354 loop parameters are passed through internal functions, which are
5355 further lowered by oacc_device_lower, once we get to the target
5356 compiler. The loop is of the form:
5358 for (V = B; V LTGT E; V += S) {BODY}
5360 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5361 (constant 0 for no chunking) and we will have a GWV partitioning
5362 mask, specifying dimensions over which the loop is to be
5363 partitioned (see note below). We generate code that looks like
5364 (this ignores tiling):
5366 <entry_bb> [incoming FALL->body, BRANCH->exit]
5367 typedef signedintify (typeof (V)) T; // underlying signed integral type
5368 T range = E - B;
5369 T chunk_no = 0;
5370 T DIR = LTGT == '<' ? +1 : -1;
5371 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5372 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5374 <head_bb> [created by splitting end of entry_bb]
5375 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5376 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5377 if (!(offset LTGT bound)) goto bottom_bb;
5379 <body_bb> [incoming]
5380 V = B + offset;
5381 {BODY}
5383 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5384 offset += step;
5385 if (offset LTGT bound) goto body_bb; [*]
5387 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5388 chunk_no++;
5389 if (chunk < chunk_max) goto head_bb;
5391 <exit_bb> [incoming]
5392 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5394 [*] Needed if V live at end of loop. */
5396 static void
5397 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5399 tree v = fd->loop.v;
5400 enum tree_code cond_code = fd->loop.cond_code;
5401 enum tree_code plus_code = PLUS_EXPR;
5403 tree chunk_size = integer_minus_one_node;
5404 tree gwv = integer_zero_node;
5405 tree iter_type = TREE_TYPE (v);
5406 tree diff_type = iter_type;
5407 tree plus_type = iter_type;
5408 struct oacc_collapse *counts = NULL;
5410 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5411 == GF_OMP_FOR_KIND_OACC_LOOP);
5412 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5413 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5415 if (POINTER_TYPE_P (iter_type))
5417 plus_code = POINTER_PLUS_EXPR;
5418 plus_type = sizetype;
5420 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5421 diff_type = signed_type_for (diff_type);
5422 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5423 diff_type = integer_type_node;
5425 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5426 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5427 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5428 basic_block bottom_bb = NULL;
5430 /* entry_bb has two sucessors; the branch edge is to the exit
5431 block, fallthrough edge to body. */
5432 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5433 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5435 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5436 body_bb, or to a block whose only successor is the body_bb. Its
5437 fallthrough successor is the final block (same as the branch
5438 successor of the entry_bb). */
5439 if (cont_bb)
5441 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5442 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5444 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5445 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5447 else
5448 gcc_assert (!gimple_in_ssa_p (cfun));
5450 /* The exit block only has entry_bb and cont_bb as predecessors. */
5451 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5453 tree chunk_no;
5454 tree chunk_max = NULL_TREE;
5455 tree bound, offset;
5456 tree step = create_tmp_var (diff_type, ".step");
5457 bool up = cond_code == LT_EXPR;
5458 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5459 bool chunking = !gimple_in_ssa_p (cfun);
5460 bool negating;
5462 /* Tiling vars. */
5463 tree tile_size = NULL_TREE;
5464 tree element_s = NULL_TREE;
5465 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5466 basic_block elem_body_bb = NULL;
5467 basic_block elem_cont_bb = NULL;
5469 /* SSA instances. */
5470 tree offset_incr = NULL_TREE;
5471 tree offset_init = NULL_TREE;
5473 gimple_stmt_iterator gsi;
5474 gassign *ass;
5475 gcall *call;
5476 gimple *stmt;
5477 tree expr;
5478 location_t loc;
5479 edge split, be, fte;
5481 /* Split the end of entry_bb to create head_bb. */
5482 split = split_block (entry_bb, last_stmt (entry_bb));
5483 basic_block head_bb = split->dest;
5484 entry_bb = split->src;
5486 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5487 gsi = gsi_last_nondebug_bb (entry_bb);
5488 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5489 loc = gimple_location (for_stmt);
5491 if (gimple_in_ssa_p (cfun))
5493 offset_init = gimple_omp_for_index (for_stmt, 0);
5494 gcc_assert (integer_zerop (fd->loop.n1));
5495 /* The SSA parallelizer does gang parallelism. */
5496 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5499 if (fd->collapse > 1 || fd->tiling)
5501 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5502 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5503 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5504 TREE_TYPE (fd->loop.n2), loc);
5506 if (SSA_VAR_P (fd->loop.n2))
5508 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5509 true, GSI_SAME_STMT);
5510 ass = gimple_build_assign (fd->loop.n2, total);
5511 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5515 tree b = fd->loop.n1;
5516 tree e = fd->loop.n2;
5517 tree s = fd->loop.step;
5519 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5520 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5522 /* Convert the step, avoiding possible unsigned->signed overflow. */
5523 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5524 if (negating)
5525 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5526 s = fold_convert (diff_type, s);
5527 if (negating)
5528 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5529 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5531 if (!chunking)
5532 chunk_size = integer_zero_node;
5533 expr = fold_convert (diff_type, chunk_size);
5534 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5535 NULL_TREE, true, GSI_SAME_STMT);
5537 if (fd->tiling)
5539 /* Determine the tile size and element step,
5540 modify the outer loop step size. */
5541 tile_size = create_tmp_var (diff_type, ".tile_size");
5542 expr = build_int_cst (diff_type, 1);
5543 for (int ix = 0; ix < fd->collapse; ix++)
5544 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5545 expr = force_gimple_operand_gsi (&gsi, expr, true,
5546 NULL_TREE, true, GSI_SAME_STMT);
5547 ass = gimple_build_assign (tile_size, expr);
5548 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5550 element_s = create_tmp_var (diff_type, ".element_s");
5551 ass = gimple_build_assign (element_s, s);
5552 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5554 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5555 s = force_gimple_operand_gsi (&gsi, expr, true,
5556 NULL_TREE, true, GSI_SAME_STMT);
5559 /* Determine the range, avoiding possible unsigned->signed overflow. */
5560 negating = !up && TYPE_UNSIGNED (iter_type);
5561 expr = fold_build2 (MINUS_EXPR, plus_type,
5562 fold_convert (plus_type, negating ? b : e),
5563 fold_convert (plus_type, negating ? e : b));
5564 expr = fold_convert (diff_type, expr);
5565 if (negating)
5566 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5567 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5568 NULL_TREE, true, GSI_SAME_STMT);
5570 chunk_no = build_int_cst (diff_type, 0);
5571 if (chunking)
5573 gcc_assert (!gimple_in_ssa_p (cfun));
5575 expr = chunk_no;
5576 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5577 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5579 ass = gimple_build_assign (chunk_no, expr);
5580 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5582 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5583 build_int_cst (integer_type_node,
5584 IFN_GOACC_LOOP_CHUNKS),
5585 dir, range, s, chunk_size, gwv);
5586 gimple_call_set_lhs (call, chunk_max);
5587 gimple_set_location (call, loc);
5588 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5590 else
5591 chunk_size = chunk_no;
5593 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5594 build_int_cst (integer_type_node,
5595 IFN_GOACC_LOOP_STEP),
5596 dir, range, s, chunk_size, gwv);
5597 gimple_call_set_lhs (call, step);
5598 gimple_set_location (call, loc);
5599 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5601 /* Remove the GIMPLE_OMP_FOR. */
5602 gsi_remove (&gsi, true);
5604 /* Fixup edges from head_bb. */
5605 be = BRANCH_EDGE (head_bb);
5606 fte = FALLTHRU_EDGE (head_bb);
5607 be->flags |= EDGE_FALSE_VALUE;
5608 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5610 basic_block body_bb = fte->dest;
5612 if (gimple_in_ssa_p (cfun))
5614 gsi = gsi_last_nondebug_bb (cont_bb);
5615 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5617 offset = gimple_omp_continue_control_use (cont_stmt);
5618 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5620 else
5622 offset = create_tmp_var (diff_type, ".offset");
5623 offset_init = offset_incr = offset;
5625 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5627 /* Loop offset & bound go into head_bb. */
5628 gsi = gsi_start_bb (head_bb);
5630 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5631 build_int_cst (integer_type_node,
5632 IFN_GOACC_LOOP_OFFSET),
5633 dir, range, s,
5634 chunk_size, gwv, chunk_no);
5635 gimple_call_set_lhs (call, offset_init);
5636 gimple_set_location (call, loc);
5637 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5639 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5640 build_int_cst (integer_type_node,
5641 IFN_GOACC_LOOP_BOUND),
5642 dir, range, s,
5643 chunk_size, gwv, offset_init);
5644 gimple_call_set_lhs (call, bound);
5645 gimple_set_location (call, loc);
5646 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5648 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5649 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5650 GSI_CONTINUE_LINKING);
5652 /* V assignment goes into body_bb. */
5653 if (!gimple_in_ssa_p (cfun))
5655 gsi = gsi_start_bb (body_bb);
5657 expr = build2 (plus_code, iter_type, b,
5658 fold_convert (plus_type, offset));
5659 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5660 true, GSI_SAME_STMT);
5661 ass = gimple_build_assign (v, expr);
5662 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5664 if (fd->collapse > 1 || fd->tiling)
5665 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5667 if (fd->tiling)
5669 /* Determine the range of the element loop -- usually simply
5670 the tile_size, but could be smaller if the final
5671 iteration of the outer loop is a partial tile. */
5672 tree e_range = create_tmp_var (diff_type, ".e_range");
5674 expr = build2 (MIN_EXPR, diff_type,
5675 build2 (MINUS_EXPR, diff_type, bound, offset),
5676 build2 (MULT_EXPR, diff_type, tile_size,
5677 element_s));
5678 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5679 true, GSI_SAME_STMT);
5680 ass = gimple_build_assign (e_range, expr);
5681 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5683 /* Determine bound, offset & step of inner loop. */
5684 e_bound = create_tmp_var (diff_type, ".e_bound");
5685 e_offset = create_tmp_var (diff_type, ".e_offset");
5686 e_step = create_tmp_var (diff_type, ".e_step");
5688 /* Mark these as element loops. */
5689 tree t, e_gwv = integer_minus_one_node;
5690 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5692 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5693 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5694 element_s, chunk, e_gwv, chunk);
5695 gimple_call_set_lhs (call, e_offset);
5696 gimple_set_location (call, loc);
5697 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5699 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5700 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5701 element_s, chunk, e_gwv, e_offset);
5702 gimple_call_set_lhs (call, e_bound);
5703 gimple_set_location (call, loc);
5704 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5706 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5707 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5708 element_s, chunk, e_gwv);
5709 gimple_call_set_lhs (call, e_step);
5710 gimple_set_location (call, loc);
5711 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5713 /* Add test and split block. */
5714 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5715 stmt = gimple_build_cond_empty (expr);
5716 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5717 split = split_block (body_bb, stmt);
5718 elem_body_bb = split->dest;
5719 if (cont_bb == body_bb)
5720 cont_bb = elem_body_bb;
5721 body_bb = split->src;
5723 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5725 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5726 if (cont_bb == NULL)
5728 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5729 e->probability = profile_probability::even ();
5730 split->probability = profile_probability::even ();
5733 /* Initialize the user's loop vars. */
5734 gsi = gsi_start_bb (elem_body_bb);
5735 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5739 /* Loop increment goes into cont_bb. If this is not a loop, we
5740 will have spawned threads as if it was, and each one will
5741 execute one iteration. The specification is not explicit about
5742 whether such constructs are ill-formed or not, and they can
5743 occur, especially when noreturn routines are involved. */
5744 if (cont_bb)
5746 gsi = gsi_last_nondebug_bb (cont_bb);
5747 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5748 loc = gimple_location (cont_stmt);
5750 if (fd->tiling)
5752 /* Insert element loop increment and test. */
5753 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5754 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5755 true, GSI_SAME_STMT);
5756 ass = gimple_build_assign (e_offset, expr);
5757 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5758 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5760 stmt = gimple_build_cond_empty (expr);
5761 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5762 split = split_block (cont_bb, stmt);
5763 elem_cont_bb = split->src;
5764 cont_bb = split->dest;
5766 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5767 split->probability = profile_probability::unlikely ().guessed ();
5768 edge latch_edge
5769 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5770 latch_edge->probability = profile_probability::likely ().guessed ();
5772 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5773 skip_edge->probability = profile_probability::unlikely ().guessed ();
5774 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5775 loop_entry_edge->probability
5776 = profile_probability::likely ().guessed ();
5778 gsi = gsi_for_stmt (cont_stmt);
5781 /* Increment offset. */
5782 if (gimple_in_ssa_p (cfun))
5783 expr = build2 (plus_code, iter_type, offset,
5784 fold_convert (plus_type, step));
5785 else
5786 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5787 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5788 true, GSI_SAME_STMT);
5789 ass = gimple_build_assign (offset_incr, expr);
5790 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5791 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5792 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5794 /* Remove the GIMPLE_OMP_CONTINUE. */
5795 gsi_remove (&gsi, true);
5797 /* Fixup edges from cont_bb. */
5798 be = BRANCH_EDGE (cont_bb);
5799 fte = FALLTHRU_EDGE (cont_bb);
5800 be->flags |= EDGE_TRUE_VALUE;
5801 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5803 if (chunking)
5805 /* Split the beginning of exit_bb to make bottom_bb. We
5806 need to insert a nop at the start, because splitting is
5807 after a stmt, not before. */
5808 gsi = gsi_start_bb (exit_bb);
5809 stmt = gimple_build_nop ();
5810 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5811 split = split_block (exit_bb, stmt);
5812 bottom_bb = split->src;
5813 exit_bb = split->dest;
5814 gsi = gsi_last_bb (bottom_bb);
5816 /* Chunk increment and test goes into bottom_bb. */
5817 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5818 build_int_cst (diff_type, 1));
5819 ass = gimple_build_assign (chunk_no, expr);
5820 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5822 /* Chunk test at end of bottom_bb. */
5823 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5824 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5825 GSI_CONTINUE_LINKING);
5827 /* Fixup edges from bottom_bb. */
5828 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5829 split->probability = profile_probability::unlikely ().guessed ();
5830 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5831 latch_edge->probability = profile_probability::likely ().guessed ();
5835 gsi = gsi_last_nondebug_bb (exit_bb);
5836 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5837 loc = gimple_location (gsi_stmt (gsi));
5839 if (!gimple_in_ssa_p (cfun))
5841 /* Insert the final value of V, in case it is live. This is the
5842 value for the only thread that survives past the join. */
5843 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5844 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5845 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5846 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5847 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5848 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5849 true, GSI_SAME_STMT);
5850 ass = gimple_build_assign (v, expr);
5851 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5854 /* Remove the OMP_RETURN. */
5855 gsi_remove (&gsi, true);
5857 if (cont_bb)
5859 /* We now have one, two or three nested loops. Update the loop
5860 structures. */
5861 struct loop *parent = entry_bb->loop_father;
5862 struct loop *body = body_bb->loop_father;
5864 if (chunking)
5866 struct loop *chunk_loop = alloc_loop ();
5867 chunk_loop->header = head_bb;
5868 chunk_loop->latch = bottom_bb;
5869 add_loop (chunk_loop, parent);
5870 parent = chunk_loop;
5872 else if (parent != body)
5874 gcc_assert (body->header == body_bb);
5875 gcc_assert (body->latch == cont_bb
5876 || single_pred (body->latch) == cont_bb);
5877 parent = NULL;
5880 if (parent)
5882 struct loop *body_loop = alloc_loop ();
5883 body_loop->header = body_bb;
5884 body_loop->latch = cont_bb;
5885 add_loop (body_loop, parent);
5887 if (fd->tiling)
5889 /* Insert tiling's element loop. */
5890 struct loop *inner_loop = alloc_loop ();
5891 inner_loop->header = elem_body_bb;
5892 inner_loop->latch = elem_cont_bb;
5893 add_loop (inner_loop, body_loop);
5899 /* Expand the OMP loop defined by REGION. */
5901 static void
5902 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5904 struct omp_for_data fd;
5905 struct omp_for_data_loop *loops;
5907 loops
5908 = (struct omp_for_data_loop *)
5909 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5910 * sizeof (struct omp_for_data_loop));
5911 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5912 &fd, loops);
5913 region->sched_kind = fd.sched_kind;
5914 region->sched_modifiers = fd.sched_modifiers;
5916 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5917 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5918 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5919 if (region->cont)
5921 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5922 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5923 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5925 else
5926 /* If there isn't a continue then this is a degerate case where
5927 the introduction of abnormal edges during lowering will prevent
5928 original loops from being detected. Fix that up. */
5929 loops_state_set (LOOPS_NEED_FIXUP);
5931 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5932 expand_omp_simd (region, &fd);
5933 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5935 gcc_assert (!inner_stmt);
5936 expand_oacc_for (region, &fd);
5938 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5940 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5941 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5942 else
5943 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5945 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5946 && !fd.have_ordered)
5948 if (fd.chunk_size == NULL)
5949 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5950 else
5951 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5953 else
5955 int fn_index, start_ix, next_ix;
5956 unsigned HOST_WIDE_INT sched = 0;
5957 tree sched_arg = NULL_TREE;
5959 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5960 == GF_OMP_FOR_KIND_FOR);
5961 if (fd.chunk_size == NULL
5962 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5963 fd.chunk_size = integer_zero_node;
5964 switch (fd.sched_kind)
5966 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5967 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
5969 gcc_assert (!fd.have_ordered);
5970 fn_index = 6;
5971 sched = 4;
5973 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
5974 && !fd.have_ordered)
5975 fn_index = 7;
5976 else
5978 fn_index = 3;
5979 sched = (HOST_WIDE_INT_1U << 31);
5981 break;
5982 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5983 case OMP_CLAUSE_SCHEDULE_GUIDED:
5984 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
5985 && !fd.have_ordered)
5987 fn_index = 3 + fd.sched_kind;
5988 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
5989 break;
5991 fn_index = fd.sched_kind;
5992 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
5993 sched += (HOST_WIDE_INT_1U << 31);
5994 break;
5995 case OMP_CLAUSE_SCHEDULE_STATIC:
5996 gcc_assert (fd.have_ordered);
5997 fn_index = 0;
5998 sched = (HOST_WIDE_INT_1U << 31) + 1;
5999 break;
6000 default:
6001 gcc_unreachable ();
6003 if (!fd.ordered)
6004 fn_index += fd.have_ordered * 8;
6005 if (fd.ordered)
6006 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6007 else
6008 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6009 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6010 if (fd.have_reductemp)
6012 if (fd.ordered)
6013 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6014 else if (fd.have_ordered)
6015 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6016 else
6017 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6018 sched_arg = build_int_cstu (long_integer_type_node, sched);
6019 if (!fd.chunk_size)
6020 fd.chunk_size = integer_zero_node;
6022 if (fd.iter_type == long_long_unsigned_type_node)
6024 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6025 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6026 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6027 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6029 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6030 (enum built_in_function) next_ix, sched_arg,
6031 inner_stmt);
6034 if (gimple_in_ssa_p (cfun))
6035 update_ssa (TODO_update_ssa_only_virtuals);
6038 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6040 v = GOMP_sections_start (n);
6042 switch (v)
6044 case 0:
6045 goto L2;
6046 case 1:
6047 section 1;
6048 goto L1;
6049 case 2:
6051 case n:
6053 default:
6054 abort ();
6057 v = GOMP_sections_next ();
6058 goto L0;
6060 reduction;
6062 If this is a combined parallel sections, replace the call to
6063 GOMP_sections_start with call to GOMP_sections_next. */
6065 static void
6066 expand_omp_sections (struct omp_region *region)
6068 tree t, u, vin = NULL, vmain, vnext, l2;
6069 unsigned len;
6070 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6071 gimple_stmt_iterator si, switch_si;
6072 gomp_sections *sections_stmt;
6073 gimple *stmt;
6074 gomp_continue *cont;
6075 edge_iterator ei;
6076 edge e;
6077 struct omp_region *inner;
6078 unsigned i, casei;
6079 bool exit_reachable = region->cont != NULL;
6081 gcc_assert (region->exit != NULL);
6082 entry_bb = region->entry;
6083 l0_bb = single_succ (entry_bb);
6084 l1_bb = region->cont;
6085 l2_bb = region->exit;
6086 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6087 l2 = gimple_block_label (l2_bb);
6088 else
6090 /* This can happen if there are reductions. */
6091 len = EDGE_COUNT (l0_bb->succs);
6092 gcc_assert (len > 0);
6093 e = EDGE_SUCC (l0_bb, len - 1);
6094 si = gsi_last_nondebug_bb (e->dest);
6095 l2 = NULL_TREE;
6096 if (gsi_end_p (si)
6097 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6098 l2 = gimple_block_label (e->dest);
6099 else
6100 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6102 si = gsi_last_nondebug_bb (e->dest);
6103 if (gsi_end_p (si)
6104 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6106 l2 = gimple_block_label (e->dest);
6107 break;
6111 if (exit_reachable)
6112 default_bb = create_empty_bb (l1_bb->prev_bb);
6113 else
6114 default_bb = create_empty_bb (l0_bb);
6116 /* We will build a switch() with enough cases for all the
6117 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6118 and a default case to abort if something goes wrong. */
6119 len = EDGE_COUNT (l0_bb->succs);
6121 /* Use vec::quick_push on label_vec throughout, since we know the size
6122 in advance. */
6123 auto_vec<tree> label_vec (len);
6125 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6126 GIMPLE_OMP_SECTIONS statement. */
6127 si = gsi_last_nondebug_bb (entry_bb);
6128 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6129 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6130 vin = gimple_omp_sections_control (sections_stmt);
6131 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6132 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6133 if (reductmp)
6135 tree reductions = OMP_CLAUSE_DECL (reductmp);
6136 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6137 gimple *g = SSA_NAME_DEF_STMT (reductions);
6138 reductions = gimple_assign_rhs1 (g);
6139 OMP_CLAUSE_DECL (reductmp) = reductions;
6140 gimple_stmt_iterator gsi = gsi_for_stmt (g);
6141 t = build_int_cst (unsigned_type_node, len - 1);
6142 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6143 stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
6144 gimple_call_set_lhs (stmt, vin);
6145 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6146 gsi_remove (&gsi, true);
6147 release_ssa_name (gimple_assign_lhs (g));
6149 else if (!is_combined_parallel (region))
6151 /* If we are not inside a combined parallel+sections region,
6152 call GOMP_sections_start. */
6153 t = build_int_cst (unsigned_type_node, len - 1);
6154 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6155 stmt = gimple_build_call (u, 1, t);
6157 else
6159 /* Otherwise, call GOMP_sections_next. */
6160 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6161 stmt = gimple_build_call (u, 0);
6163 if (!reductmp)
6165 gimple_call_set_lhs (stmt, vin);
6166 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6168 gsi_remove (&si, true);
6170 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6171 L0_BB. */
6172 switch_si = gsi_last_nondebug_bb (l0_bb);
6173 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6174 if (exit_reachable)
6176 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6177 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6178 vmain = gimple_omp_continue_control_use (cont);
6179 vnext = gimple_omp_continue_control_def (cont);
6181 else
6183 vmain = vin;
6184 vnext = NULL_TREE;
6187 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6188 label_vec.quick_push (t);
6189 i = 1;
6191 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6192 for (inner = region->inner, casei = 1;
6193 inner;
6194 inner = inner->next, i++, casei++)
6196 basic_block s_entry_bb, s_exit_bb;
6198 /* Skip optional reduction region. */
6199 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6201 --i;
6202 --casei;
6203 continue;
6206 s_entry_bb = inner->entry;
6207 s_exit_bb = inner->exit;
6209 t = gimple_block_label (s_entry_bb);
6210 u = build_int_cst (unsigned_type_node, casei);
6211 u = build_case_label (u, NULL, t);
6212 label_vec.quick_push (u);
6214 si = gsi_last_nondebug_bb (s_entry_bb);
6215 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6216 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6217 gsi_remove (&si, true);
6218 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6220 if (s_exit_bb == NULL)
6221 continue;
6223 si = gsi_last_nondebug_bb (s_exit_bb);
6224 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6225 gsi_remove (&si, true);
6227 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6230 /* Error handling code goes in DEFAULT_BB. */
6231 t = gimple_block_label (default_bb);
6232 u = build_case_label (NULL, NULL, t);
6233 make_edge (l0_bb, default_bb, 0);
6234 add_bb_to_loop (default_bb, current_loops->tree_root);
6236 stmt = gimple_build_switch (vmain, u, label_vec);
6237 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6238 gsi_remove (&switch_si, true);
6240 si = gsi_start_bb (default_bb);
6241 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6242 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6244 if (exit_reachable)
6246 tree bfn_decl;
6248 /* Code to get the next section goes in L1_BB. */
6249 si = gsi_last_nondebug_bb (l1_bb);
6250 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6252 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6253 stmt = gimple_build_call (bfn_decl, 0);
6254 gimple_call_set_lhs (stmt, vnext);
6255 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6256 gsi_remove (&si, true);
6258 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6261 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6262 si = gsi_last_nondebug_bb (l2_bb);
6263 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6264 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6265 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6266 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6267 else
6268 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6269 stmt = gimple_build_call (t, 0);
6270 if (gimple_omp_return_lhs (gsi_stmt (si)))
6271 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6272 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6273 gsi_remove (&si, true);
6275 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6278 /* Expand code for an OpenMP single directive. We've already expanded
6279 much of the code, here we simply place the GOMP_barrier call. */
6281 static void
6282 expand_omp_single (struct omp_region *region)
6284 basic_block entry_bb, exit_bb;
6285 gimple_stmt_iterator si;
6287 entry_bb = region->entry;
6288 exit_bb = region->exit;
6290 si = gsi_last_nondebug_bb (entry_bb);
6291 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6292 gsi_remove (&si, true);
6293 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6295 si = gsi_last_nondebug_bb (exit_bb);
6296 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6298 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6299 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6301 gsi_remove (&si, true);
6302 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6305 /* Generic expansion for OpenMP synchronization directives: master,
6306 ordered and critical. All we need to do here is remove the entry
6307 and exit markers for REGION. */
6309 static void
6310 expand_omp_synch (struct omp_region *region)
6312 basic_block entry_bb, exit_bb;
6313 gimple_stmt_iterator si;
6315 entry_bb = region->entry;
6316 exit_bb = region->exit;
6318 si = gsi_last_nondebug_bb (entry_bb);
6319 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6320 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6321 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6322 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6323 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6324 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6325 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6326 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6328 expand_omp_taskreg (region);
6329 return;
6331 gsi_remove (&si, true);
6332 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6334 if (exit_bb)
6336 si = gsi_last_nondebug_bb (exit_bb);
6337 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6338 gsi_remove (&si, true);
6339 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6343 /* Translate enum omp_memory_order to enum memmodel. The two enums
6344 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6345 is 0. */
6347 static enum memmodel
6348 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6350 switch (mo)
6352 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6353 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6354 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6355 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6356 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6357 default: gcc_unreachable ();
6361 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6362 operation as a normal volatile load. */
6364 static bool
6365 expand_omp_atomic_load (basic_block load_bb, tree addr,
6366 tree loaded_val, int index)
6368 enum built_in_function tmpbase;
6369 gimple_stmt_iterator gsi;
6370 basic_block store_bb;
6371 location_t loc;
6372 gimple *stmt;
6373 tree decl, call, type, itype;
6375 gsi = gsi_last_nondebug_bb (load_bb);
6376 stmt = gsi_stmt (gsi);
6377 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6378 loc = gimple_location (stmt);
6380 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6381 is smaller than word size, then expand_atomic_load assumes that the load
6382 is atomic. We could avoid the builtin entirely in this case. */
6384 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6385 decl = builtin_decl_explicit (tmpbase);
6386 if (decl == NULL_TREE)
6387 return false;
6389 type = TREE_TYPE (loaded_val);
6390 itype = TREE_TYPE (TREE_TYPE (decl));
6392 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6393 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6394 call = build_call_expr_loc (loc, decl, 2, addr, mo);
6395 if (!useless_type_conversion_p (type, itype))
6396 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6397 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6399 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6400 gsi_remove (&gsi, true);
6402 store_bb = single_succ (load_bb);
6403 gsi = gsi_last_nondebug_bb (store_bb);
6404 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6405 gsi_remove (&gsi, true);
6407 if (gimple_in_ssa_p (cfun))
6408 update_ssa (TODO_update_ssa_no_phi);
6410 return true;
6413 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6414 operation as a normal volatile store. */
6416 static bool
6417 expand_omp_atomic_store (basic_block load_bb, tree addr,
6418 tree loaded_val, tree stored_val, int index)
6420 enum built_in_function tmpbase;
6421 gimple_stmt_iterator gsi;
6422 basic_block store_bb = single_succ (load_bb);
6423 location_t loc;
6424 gimple *stmt;
6425 tree decl, call, type, itype;
6426 machine_mode imode;
6427 bool exchange;
6429 gsi = gsi_last_nondebug_bb (load_bb);
6430 stmt = gsi_stmt (gsi);
6431 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6433 /* If the load value is needed, then this isn't a store but an exchange. */
6434 exchange = gimple_omp_atomic_need_value_p (stmt);
6436 gsi = gsi_last_nondebug_bb (store_bb);
6437 stmt = gsi_stmt (gsi);
6438 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6439 loc = gimple_location (stmt);
6441 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6442 is smaller than word size, then expand_atomic_store assumes that the store
6443 is atomic. We could avoid the builtin entirely in this case. */
6445 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6446 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6447 decl = builtin_decl_explicit (tmpbase);
6448 if (decl == NULL_TREE)
6449 return false;
6451 type = TREE_TYPE (stored_val);
6453 /* Dig out the type of the function's second argument. */
6454 itype = TREE_TYPE (decl);
6455 itype = TYPE_ARG_TYPES (itype);
6456 itype = TREE_CHAIN (itype);
6457 itype = TREE_VALUE (itype);
6458 imode = TYPE_MODE (itype);
6460 if (exchange && !can_atomic_exchange_p (imode, true))
6461 return false;
6463 if (!useless_type_conversion_p (itype, type))
6464 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6465 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6466 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6467 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6468 if (exchange)
6470 if (!useless_type_conversion_p (type, itype))
6471 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6472 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6475 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6476 gsi_remove (&gsi, true);
6478 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6479 gsi = gsi_last_nondebug_bb (load_bb);
6480 gsi_remove (&gsi, true);
6482 if (gimple_in_ssa_p (cfun))
6483 update_ssa (TODO_update_ssa_no_phi);
6485 return true;
6488 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6489 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6490 size of the data type, and thus usable to find the index of the builtin
6491 decl. Returns false if the expression is not of the proper form. */
6493 static bool
6494 expand_omp_atomic_fetch_op (basic_block load_bb,
6495 tree addr, tree loaded_val,
6496 tree stored_val, int index)
6498 enum built_in_function oldbase, newbase, tmpbase;
6499 tree decl, itype, call;
6500 tree lhs, rhs;
6501 basic_block store_bb = single_succ (load_bb);
6502 gimple_stmt_iterator gsi;
6503 gimple *stmt;
6504 location_t loc;
6505 enum tree_code code;
6506 bool need_old, need_new;
6507 machine_mode imode;
6509 /* We expect to find the following sequences:
6511 load_bb:
6512 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6514 store_bb:
6515 val = tmp OP something; (or: something OP tmp)
6516 GIMPLE_OMP_STORE (val)
6518 ???FIXME: Allow a more flexible sequence.
6519 Perhaps use data flow to pick the statements.
6523 gsi = gsi_after_labels (store_bb);
6524 stmt = gsi_stmt (gsi);
6525 if (is_gimple_debug (stmt))
6527 gsi_next_nondebug (&gsi);
6528 if (gsi_end_p (gsi))
6529 return false;
6530 stmt = gsi_stmt (gsi);
6532 loc = gimple_location (stmt);
6533 if (!is_gimple_assign (stmt))
6534 return false;
6535 gsi_next_nondebug (&gsi);
6536 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6537 return false;
6538 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6539 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6540 enum omp_memory_order omo
6541 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6542 enum memmodel mo = omp_memory_order_to_memmodel (omo);
6543 gcc_checking_assert (!need_old || !need_new);
6545 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6546 return false;
6548 /* Check for one of the supported fetch-op operations. */
6549 code = gimple_assign_rhs_code (stmt);
6550 switch (code)
6552 case PLUS_EXPR:
6553 case POINTER_PLUS_EXPR:
6554 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6555 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6556 break;
6557 case MINUS_EXPR:
6558 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6559 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6560 break;
6561 case BIT_AND_EXPR:
6562 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6563 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6564 break;
6565 case BIT_IOR_EXPR:
6566 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6567 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6568 break;
6569 case BIT_XOR_EXPR:
6570 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6571 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6572 break;
6573 default:
6574 return false;
6577 /* Make sure the expression is of the proper form. */
6578 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6579 rhs = gimple_assign_rhs2 (stmt);
6580 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6581 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6582 rhs = gimple_assign_rhs1 (stmt);
6583 else
6584 return false;
6586 tmpbase = ((enum built_in_function)
6587 ((need_new ? newbase : oldbase) + index + 1));
6588 decl = builtin_decl_explicit (tmpbase);
6589 if (decl == NULL_TREE)
6590 return false;
6591 itype = TREE_TYPE (TREE_TYPE (decl));
6592 imode = TYPE_MODE (itype);
6594 /* We could test all of the various optabs involved, but the fact of the
6595 matter is that (with the exception of i486 vs i586 and xadd) all targets
6596 that support any atomic operaton optab also implements compare-and-swap.
6597 Let optabs.c take care of expanding any compare-and-swap loop. */
6598 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6599 return false;
6601 gsi = gsi_last_nondebug_bb (load_bb);
6602 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6604 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6605 It only requires that the operation happen atomically. Thus we can
6606 use the RELAXED memory model. */
6607 call = build_call_expr_loc (loc, decl, 3, addr,
6608 fold_convert_loc (loc, itype, rhs),
6609 build_int_cst (NULL, mo));
6611 if (need_old || need_new)
6613 lhs = need_old ? loaded_val : stored_val;
6614 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6615 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6617 else
6618 call = fold_convert_loc (loc, void_type_node, call);
6619 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6620 gsi_remove (&gsi, true);
6622 gsi = gsi_last_nondebug_bb (store_bb);
6623 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6624 gsi_remove (&gsi, true);
6625 gsi = gsi_last_nondebug_bb (store_bb);
6626 stmt = gsi_stmt (gsi);
6627 gsi_remove (&gsi, true);
6629 if (gimple_in_ssa_p (cfun))
6631 release_defs (stmt);
6632 update_ssa (TODO_update_ssa_no_phi);
6635 return true;
6638 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6640 oldval = *addr;
6641 repeat:
6642 newval = rhs; // with oldval replacing *addr in rhs
6643 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6644 if (oldval != newval)
6645 goto repeat;
6647 INDEX is log2 of the size of the data type, and thus usable to find the
6648 index of the builtin decl. */
6650 static bool
6651 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6652 tree addr, tree loaded_val, tree stored_val,
6653 int index)
6655 tree loadedi, storedi, initial, new_storedi, old_vali;
6656 tree type, itype, cmpxchg, iaddr, atype;
6657 gimple_stmt_iterator si;
6658 basic_block loop_header = single_succ (load_bb);
6659 gimple *phi, *stmt;
6660 edge e;
6661 enum built_in_function fncode;
6663 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6664 order to use the RELAXED memory model effectively. */
6665 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6666 + index + 1);
6667 cmpxchg = builtin_decl_explicit (fncode);
6668 if (cmpxchg == NULL_TREE)
6669 return false;
6670 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6671 atype = type;
6672 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6674 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6675 || !can_atomic_load_p (TYPE_MODE (itype)))
6676 return false;
6678 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6679 si = gsi_last_nondebug_bb (load_bb);
6680 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6682 /* For floating-point values, we'll need to view-convert them to integers
6683 so that we can perform the atomic compare and swap. Simplify the
6684 following code by always setting up the "i"ntegral variables. */
6685 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6687 tree iaddr_val;
6689 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6690 true));
6691 atype = itype;
6692 iaddr_val
6693 = force_gimple_operand_gsi (&si,
6694 fold_convert (TREE_TYPE (iaddr), addr),
6695 false, NULL_TREE, true, GSI_SAME_STMT);
6696 stmt = gimple_build_assign (iaddr, iaddr_val);
6697 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6698 loadedi = create_tmp_var (itype);
6699 if (gimple_in_ssa_p (cfun))
6700 loadedi = make_ssa_name (loadedi);
6702 else
6704 iaddr = addr;
6705 loadedi = loaded_val;
6708 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6709 tree loaddecl = builtin_decl_explicit (fncode);
6710 if (loaddecl)
6711 initial
6712 = fold_convert (atype,
6713 build_call_expr (loaddecl, 2, iaddr,
6714 build_int_cst (NULL_TREE,
6715 MEMMODEL_RELAXED)));
6716 else
6718 tree off
6719 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6720 true), 0);
6721 initial = build2 (MEM_REF, atype, iaddr, off);
6724 initial
6725 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6726 GSI_SAME_STMT);
6728 /* Move the value to the LOADEDI temporary. */
6729 if (gimple_in_ssa_p (cfun))
6731 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6732 phi = create_phi_node (loadedi, loop_header);
6733 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6734 initial);
6736 else
6737 gsi_insert_before (&si,
6738 gimple_build_assign (loadedi, initial),
6739 GSI_SAME_STMT);
6740 if (loadedi != loaded_val)
6742 gimple_stmt_iterator gsi2;
6743 tree x;
6745 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6746 gsi2 = gsi_start_bb (loop_header);
6747 if (gimple_in_ssa_p (cfun))
6749 gassign *stmt;
6750 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6751 true, GSI_SAME_STMT);
6752 stmt = gimple_build_assign (loaded_val, x);
6753 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6755 else
6757 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6758 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6759 true, GSI_SAME_STMT);
6762 gsi_remove (&si, true);
6764 si = gsi_last_nondebug_bb (store_bb);
6765 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6767 if (iaddr == addr)
6768 storedi = stored_val;
6769 else
6770 storedi
6771 = force_gimple_operand_gsi (&si,
6772 build1 (VIEW_CONVERT_EXPR, itype,
6773 stored_val), true, NULL_TREE, true,
6774 GSI_SAME_STMT);
6776 /* Build the compare&swap statement. */
6777 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6778 new_storedi = force_gimple_operand_gsi (&si,
6779 fold_convert (TREE_TYPE (loadedi),
6780 new_storedi),
6781 true, NULL_TREE,
6782 true, GSI_SAME_STMT);
6784 if (gimple_in_ssa_p (cfun))
6785 old_vali = loadedi;
6786 else
6788 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6789 stmt = gimple_build_assign (old_vali, loadedi);
6790 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6792 stmt = gimple_build_assign (loadedi, new_storedi);
6793 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6796 /* Note that we always perform the comparison as an integer, even for
6797 floating point. This allows the atomic operation to properly
6798 succeed even with NaNs and -0.0. */
6799 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6800 stmt = gimple_build_cond_empty (ne);
6801 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6803 /* Update cfg. */
6804 e = single_succ_edge (store_bb);
6805 e->flags &= ~EDGE_FALLTHRU;
6806 e->flags |= EDGE_FALSE_VALUE;
6807 /* Expect no looping. */
6808 e->probability = profile_probability::guessed_always ();
6810 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6811 e->probability = profile_probability::guessed_never ();
6813 /* Copy the new value to loadedi (we already did that before the condition
6814 if we are not in SSA). */
6815 if (gimple_in_ssa_p (cfun))
6817 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6818 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6821 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6822 gsi_remove (&si, true);
6824 struct loop *loop = alloc_loop ();
6825 loop->header = loop_header;
6826 loop->latch = store_bb;
6827 add_loop (loop, loop_header->loop_father);
6829 if (gimple_in_ssa_p (cfun))
6830 update_ssa (TODO_update_ssa_no_phi);
6832 return true;
6835 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6837 GOMP_atomic_start ();
6838 *addr = rhs;
6839 GOMP_atomic_end ();
6841 The result is not globally atomic, but works so long as all parallel
6842 references are within #pragma omp atomic directives. According to
6843 responses received from omp@openmp.org, appears to be within spec.
6844 Which makes sense, since that's how several other compilers handle
6845 this situation as well.
6846 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6847 expanding. STORED_VAL is the operand of the matching
6848 GIMPLE_OMP_ATOMIC_STORE.
6850 We replace
6851 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6852 loaded_val = *addr;
6854 and replace
6855 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6856 *addr = stored_val;
6859 static bool
6860 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6861 tree addr, tree loaded_val, tree stored_val)
6863 gimple_stmt_iterator si;
6864 gassign *stmt;
6865 tree t;
6867 si = gsi_last_nondebug_bb (load_bb);
6868 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6870 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6871 t = build_call_expr (t, 0);
6872 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6874 tree mem = build_simple_mem_ref (addr);
6875 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6876 TREE_OPERAND (mem, 1)
6877 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6878 true),
6879 TREE_OPERAND (mem, 1));
6880 stmt = gimple_build_assign (loaded_val, mem);
6881 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6882 gsi_remove (&si, true);
6884 si = gsi_last_nondebug_bb (store_bb);
6885 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6887 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6888 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6890 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6891 t = build_call_expr (t, 0);
6892 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6893 gsi_remove (&si, true);
6895 if (gimple_in_ssa_p (cfun))
6896 update_ssa (TODO_update_ssa_no_phi);
6897 return true;
6900 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6901 using expand_omp_atomic_fetch_op. If it failed, we try to
6902 call expand_omp_atomic_pipeline, and if it fails too, the
6903 ultimate fallback is wrapping the operation in a mutex
6904 (expand_omp_atomic_mutex). REGION is the atomic region built
6905 by build_omp_regions_1(). */
6907 static void
6908 expand_omp_atomic (struct omp_region *region)
6910 basic_block load_bb = region->entry, store_bb = region->exit;
6911 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6912 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6913 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6914 tree addr = gimple_omp_atomic_load_rhs (load);
6915 tree stored_val = gimple_omp_atomic_store_val (store);
6916 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6917 HOST_WIDE_INT index;
6919 /* Make sure the type is one of the supported sizes. */
6920 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6921 index = exact_log2 (index);
6922 if (index >= 0 && index <= 4)
6924 unsigned int align = TYPE_ALIGN_UNIT (type);
6926 /* __sync builtins require strict data alignment. */
6927 if (exact_log2 (align) >= index)
6929 /* Atomic load. */
6930 scalar_mode smode;
6931 if (loaded_val == stored_val
6932 && (is_int_mode (TYPE_MODE (type), &smode)
6933 || is_float_mode (TYPE_MODE (type), &smode))
6934 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6935 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6936 return;
6938 /* Atomic store. */
6939 if ((is_int_mode (TYPE_MODE (type), &smode)
6940 || is_float_mode (TYPE_MODE (type), &smode))
6941 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6942 && store_bb == single_succ (load_bb)
6943 && first_stmt (store_bb) == store
6944 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6945 stored_val, index))
6946 return;
6948 /* When possible, use specialized atomic update functions. */
6949 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6950 && store_bb == single_succ (load_bb)
6951 && expand_omp_atomic_fetch_op (load_bb, addr,
6952 loaded_val, stored_val, index))
6953 return;
6955 /* If we don't have specialized __sync builtins, try and implement
6956 as a compare and swap loop. */
6957 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6958 loaded_val, stored_val, index))
6959 return;
6963 /* The ultimate fallback is wrapping the operation in a mutex. */
6964 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6967 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6968 at REGION_EXIT. */
6970 static void
6971 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6972 basic_block region_exit)
6974 struct loop *outer = region_entry->loop_father;
6975 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6977 /* Don't parallelize the kernels region if it contains more than one outer
6978 loop. */
6979 unsigned int nr_outer_loops = 0;
6980 struct loop *single_outer = NULL;
6981 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6983 gcc_assert (loop_outer (loop) == outer);
6985 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6986 continue;
6988 if (region_exit != NULL
6989 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6990 continue;
6992 nr_outer_loops++;
6993 single_outer = loop;
6995 if (nr_outer_loops != 1)
6996 return;
6998 for (struct loop *loop = single_outer->inner;
6999 loop != NULL;
7000 loop = loop->inner)
7001 if (loop->next)
7002 return;
7004 /* Mark the loops in the region. */
7005 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7006 loop->in_oacc_kernels_region = true;
7009 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7011 struct GTY(()) grid_launch_attributes_trees
7013 tree kernel_dim_array_type;
7014 tree kernel_lattrs_dimnum_decl;
7015 tree kernel_lattrs_grid_decl;
7016 tree kernel_lattrs_group_decl;
7017 tree kernel_launch_attributes_type;
7020 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7022 /* Create types used to pass kernel launch attributes to target. */
7024 static void
7025 grid_create_kernel_launch_attr_types (void)
7027 if (grid_attr_trees)
7028 return;
7029 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7031 tree dim_arr_index_type
7032 = build_index_type (build_int_cst (integer_type_node, 2));
7033 grid_attr_trees->kernel_dim_array_type
7034 = build_array_type (uint32_type_node, dim_arr_index_type);
7036 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7037 grid_attr_trees->kernel_lattrs_dimnum_decl
7038 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7039 uint32_type_node);
7040 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7042 grid_attr_trees->kernel_lattrs_grid_decl
7043 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7044 grid_attr_trees->kernel_dim_array_type);
7045 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7046 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7047 grid_attr_trees->kernel_lattrs_group_decl
7048 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7049 grid_attr_trees->kernel_dim_array_type);
7050 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7051 = grid_attr_trees->kernel_lattrs_grid_decl;
7052 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7053 "__gomp_kernel_launch_attributes",
7054 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7057 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7058 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7059 of type uint32_type_node. */
7061 static void
7062 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7063 tree fld_decl, int index, tree value)
7065 tree ref = build4 (ARRAY_REF, uint32_type_node,
7066 build3 (COMPONENT_REF,
7067 grid_attr_trees->kernel_dim_array_type,
7068 range_var, fld_decl, NULL_TREE),
7069 build_int_cst (integer_type_node, index),
7070 NULL_TREE, NULL_TREE);
7071 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7074 /* Return a tree representation of a pointer to a structure with grid and
7075 work-group size information. Statements filling that information will be
7076 inserted before GSI, TGT_STMT is the target statement which has the
7077 necessary information in it. */
7079 static tree
7080 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7081 gomp_target *tgt_stmt)
7083 grid_create_kernel_launch_attr_types ();
7084 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7085 "__kernel_launch_attrs");
7087 unsigned max_dim = 0;
7088 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7089 clause;
7090 clause = OMP_CLAUSE_CHAIN (clause))
7092 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7093 continue;
7095 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7096 max_dim = MAX (dim, max_dim);
7098 grid_insert_store_range_dim (gsi, lattrs,
7099 grid_attr_trees->kernel_lattrs_grid_decl,
7100 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7101 grid_insert_store_range_dim (gsi, lattrs,
7102 grid_attr_trees->kernel_lattrs_group_decl,
7103 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7106 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7107 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7108 gcc_checking_assert (max_dim <= 2);
7109 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7110 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7111 GSI_SAME_STMT);
7112 TREE_ADDRESSABLE (lattrs) = 1;
7113 return build_fold_addr_expr (lattrs);
7116 /* Build target argument identifier from the DEVICE identifier, value
7117 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7119 static tree
7120 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7122 tree t = build_int_cst (integer_type_node, device);
7123 if (subseqent_param)
7124 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7125 build_int_cst (integer_type_node,
7126 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7127 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7128 build_int_cst (integer_type_node, id));
7129 return t;
7132 /* Like above but return it in type that can be directly stored as an element
7133 of the argument array. */
7135 static tree
7136 get_target_argument_identifier (int device, bool subseqent_param, int id)
7138 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7139 return fold_convert (ptr_type_node, t);
7142 /* Return a target argument consisting of DEVICE identifier, value identifier
7143 ID, and the actual VALUE. */
7145 static tree
7146 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7147 tree value)
7149 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7150 fold_convert (integer_type_node, value),
7151 build_int_cst (unsigned_type_node,
7152 GOMP_TARGET_ARG_VALUE_SHIFT));
7153 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7154 get_target_argument_identifier_1 (device, false, id));
7155 t = fold_convert (ptr_type_node, t);
7156 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7159 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7160 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7161 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7162 arguments. */
7164 static void
7165 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7166 int id, tree value, vec <tree> *args)
7168 if (tree_fits_shwi_p (value)
7169 && tree_to_shwi (value) > -(1 << 15)
7170 && tree_to_shwi (value) < (1 << 15))
7171 args->quick_push (get_target_argument_value (gsi, device, id, value));
7172 else
7174 args->quick_push (get_target_argument_identifier (device, true, id));
7175 value = fold_convert (ptr_type_node, value);
7176 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7177 GSI_SAME_STMT);
7178 args->quick_push (value);
7182 /* Create an array of arguments that is then passed to GOMP_target. */
7184 static tree
7185 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7187 auto_vec <tree, 6> args;
7188 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7189 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7190 if (c)
7191 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7192 else
7193 t = integer_minus_one_node;
7194 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7195 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7197 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7198 if (c)
7199 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7200 else
7201 t = integer_minus_one_node;
7202 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7203 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7204 &args);
7206 /* Add HSA-specific grid sizes, if available. */
7207 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7208 OMP_CLAUSE__GRIDDIM_))
7210 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7211 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7212 args.quick_push (t);
7213 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7216 /* Produce more, perhaps device specific, arguments here. */
7218 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7219 args.length () + 1),
7220 ".omp_target_args");
7221 for (unsigned i = 0; i < args.length (); i++)
7223 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7224 build_int_cst (integer_type_node, i),
7225 NULL_TREE, NULL_TREE);
7226 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7227 GSI_SAME_STMT);
7229 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7230 build_int_cst (integer_type_node, args.length ()),
7231 NULL_TREE, NULL_TREE);
7232 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7233 GSI_SAME_STMT);
7234 TREE_ADDRESSABLE (argarray) = 1;
7235 return build_fold_addr_expr (argarray);
7238 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7240 static void
7241 expand_omp_target (struct omp_region *region)
7243 basic_block entry_bb, exit_bb, new_bb;
7244 struct function *child_cfun;
7245 tree child_fn, block, t;
7246 gimple_stmt_iterator gsi;
7247 gomp_target *entry_stmt;
7248 gimple *stmt;
7249 edge e;
7250 bool offloaded, data_region;
7252 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7253 new_bb = region->entry;
7255 offloaded = is_gimple_omp_offloaded (entry_stmt);
7256 switch (gimple_omp_target_kind (entry_stmt))
7258 case GF_OMP_TARGET_KIND_REGION:
7259 case GF_OMP_TARGET_KIND_UPDATE:
7260 case GF_OMP_TARGET_KIND_ENTER_DATA:
7261 case GF_OMP_TARGET_KIND_EXIT_DATA:
7262 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7263 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7264 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7265 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7266 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7267 data_region = false;
7268 break;
7269 case GF_OMP_TARGET_KIND_DATA:
7270 case GF_OMP_TARGET_KIND_OACC_DATA:
7271 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7272 data_region = true;
7273 break;
7274 default:
7275 gcc_unreachable ();
7278 child_fn = NULL_TREE;
7279 child_cfun = NULL;
7280 if (offloaded)
7282 child_fn = gimple_omp_target_child_fn (entry_stmt);
7283 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7286 /* Supported by expand_omp_taskreg, but not here. */
7287 if (child_cfun != NULL)
7288 gcc_checking_assert (!child_cfun->cfg);
7289 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7291 entry_bb = region->entry;
7292 exit_bb = region->exit;
7294 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7296 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7298 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7299 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7300 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7301 DECL_ATTRIBUTES (child_fn)
7302 = tree_cons (get_identifier ("oacc kernels"),
7303 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7306 if (offloaded)
7308 unsigned srcidx, dstidx, num;
7310 /* If the offloading region needs data sent from the parent
7311 function, then the very first statement (except possible
7312 tree profile counter updates) of the offloading body
7313 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7314 &.OMP_DATA_O is passed as an argument to the child function,
7315 we need to replace it with the argument as seen by the child
7316 function.
7318 In most cases, this will end up being the identity assignment
7319 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7320 a function call that has been inlined, the original PARM_DECL
7321 .OMP_DATA_I may have been converted into a different local
7322 variable. In which case, we need to keep the assignment. */
7323 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7324 if (data_arg)
7326 basic_block entry_succ_bb = single_succ (entry_bb);
7327 gimple_stmt_iterator gsi;
7328 tree arg;
7329 gimple *tgtcopy_stmt = NULL;
7330 tree sender = TREE_VEC_ELT (data_arg, 0);
7332 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7334 gcc_assert (!gsi_end_p (gsi));
7335 stmt = gsi_stmt (gsi);
7336 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7337 continue;
7339 if (gimple_num_ops (stmt) == 2)
7341 tree arg = gimple_assign_rhs1 (stmt);
7343 /* We're ignoring the subcode because we're
7344 effectively doing a STRIP_NOPS. */
7346 if (TREE_CODE (arg) == ADDR_EXPR
7347 && TREE_OPERAND (arg, 0) == sender)
7349 tgtcopy_stmt = stmt;
7350 break;
7355 gcc_assert (tgtcopy_stmt != NULL);
7356 arg = DECL_ARGUMENTS (child_fn);
7358 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7359 gsi_remove (&gsi, true);
7362 /* Declare local variables needed in CHILD_CFUN. */
7363 block = DECL_INITIAL (child_fn);
7364 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7365 /* The gimplifier could record temporaries in the offloading block
7366 rather than in containing function's local_decls chain,
7367 which would mean cgraph missed finalizing them. Do it now. */
7368 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7369 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7370 varpool_node::finalize_decl (t);
7371 DECL_SAVED_TREE (child_fn) = NULL;
7372 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7373 gimple_set_body (child_fn, NULL);
7374 TREE_USED (block) = 1;
7376 /* Reset DECL_CONTEXT on function arguments. */
7377 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7378 DECL_CONTEXT (t) = child_fn;
7380 /* Split ENTRY_BB at GIMPLE_*,
7381 so that it can be moved to the child function. */
7382 gsi = gsi_last_nondebug_bb (entry_bb);
7383 stmt = gsi_stmt (gsi);
7384 gcc_assert (stmt
7385 && gimple_code (stmt) == gimple_code (entry_stmt));
7386 e = split_block (entry_bb, stmt);
7387 gsi_remove (&gsi, true);
7388 entry_bb = e->dest;
7389 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7391 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7392 if (exit_bb)
7394 gsi = gsi_last_nondebug_bb (exit_bb);
7395 gcc_assert (!gsi_end_p (gsi)
7396 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7397 stmt = gimple_build_return (NULL);
7398 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7399 gsi_remove (&gsi, true);
7402 /* Make sure to generate early debug for the function before
7403 outlining anything. */
7404 if (! gimple_in_ssa_p (cfun))
7405 (*debug_hooks->early_global_decl) (cfun->decl);
7407 /* Move the offloading region into CHILD_CFUN. */
7409 block = gimple_block (entry_stmt);
7411 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7412 if (exit_bb)
7413 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7414 /* When the OMP expansion process cannot guarantee an up-to-date
7415 loop tree arrange for the child function to fixup loops. */
7416 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7417 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7419 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7420 num = vec_safe_length (child_cfun->local_decls);
7421 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7423 t = (*child_cfun->local_decls)[srcidx];
7424 if (DECL_CONTEXT (t) == cfun->decl)
7425 continue;
7426 if (srcidx != dstidx)
7427 (*child_cfun->local_decls)[dstidx] = t;
7428 dstidx++;
7430 if (dstidx != num)
7431 vec_safe_truncate (child_cfun->local_decls, dstidx);
7433 /* Inform the callgraph about the new function. */
7434 child_cfun->curr_properties = cfun->curr_properties;
7435 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7436 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7437 cgraph_node *node = cgraph_node::get_create (child_fn);
7438 node->parallelized_function = 1;
7439 cgraph_node::add_new_function (child_fn, true);
7441 /* Add the new function to the offload table. */
7442 if (ENABLE_OFFLOADING)
7444 if (in_lto_p)
7445 DECL_PRESERVE_P (child_fn) = 1;
7446 vec_safe_push (offload_funcs, child_fn);
7449 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7450 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7452 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7453 fixed in a following pass. */
7454 push_cfun (child_cfun);
7455 if (need_asm)
7456 assign_assembler_name_if_needed (child_fn);
7457 cgraph_edge::rebuild_edges ();
7459 /* Some EH regions might become dead, see PR34608. If
7460 pass_cleanup_cfg isn't the first pass to happen with the
7461 new child, these dead EH edges might cause problems.
7462 Clean them up now. */
7463 if (flag_exceptions)
7465 basic_block bb;
7466 bool changed = false;
7468 FOR_EACH_BB_FN (bb, cfun)
7469 changed |= gimple_purge_dead_eh_edges (bb);
7470 if (changed)
7471 cleanup_tree_cfg ();
7473 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7474 verify_loop_structure ();
7475 pop_cfun ();
7477 if (dump_file && !gimple_in_ssa_p (cfun))
7479 omp_any_child_fn_dumped = true;
7480 dump_function_header (dump_file, child_fn, dump_flags);
7481 dump_function_to_file (child_fn, dump_file, dump_flags);
7485 /* Emit a library call to launch the offloading region, or do data
7486 transfers. */
7487 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7488 enum built_in_function start_ix;
7489 location_t clause_loc;
7490 unsigned int flags_i = 0;
7492 switch (gimple_omp_target_kind (entry_stmt))
7494 case GF_OMP_TARGET_KIND_REGION:
7495 start_ix = BUILT_IN_GOMP_TARGET;
7496 break;
7497 case GF_OMP_TARGET_KIND_DATA:
7498 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7499 break;
7500 case GF_OMP_TARGET_KIND_UPDATE:
7501 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7502 break;
7503 case GF_OMP_TARGET_KIND_ENTER_DATA:
7504 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7505 break;
7506 case GF_OMP_TARGET_KIND_EXIT_DATA:
7507 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7508 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7509 break;
7510 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7511 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7512 start_ix = BUILT_IN_GOACC_PARALLEL;
7513 break;
7514 case GF_OMP_TARGET_KIND_OACC_DATA:
7515 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7516 start_ix = BUILT_IN_GOACC_DATA_START;
7517 break;
7518 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7519 start_ix = BUILT_IN_GOACC_UPDATE;
7520 break;
7521 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7522 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7523 break;
7524 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7525 start_ix = BUILT_IN_GOACC_DECLARE;
7526 break;
7527 default:
7528 gcc_unreachable ();
7531 clauses = gimple_omp_target_clauses (entry_stmt);
7533 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7534 library choose) and there is no conditional. */
7535 cond = NULL_TREE;
7536 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7538 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7539 if (c)
7540 cond = OMP_CLAUSE_IF_EXPR (c);
7542 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7543 if (c)
7545 /* Even if we pass it to all library function calls, it is currently only
7546 defined/used for the OpenMP target ones. */
7547 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7548 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7549 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7550 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7552 device = OMP_CLAUSE_DEVICE_ID (c);
7553 clause_loc = OMP_CLAUSE_LOCATION (c);
7555 else
7556 clause_loc = gimple_location (entry_stmt);
7558 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7559 if (c)
7560 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7562 /* Ensure 'device' is of the correct type. */
7563 device = fold_convert_loc (clause_loc, integer_type_node, device);
7565 /* If we found the clause 'if (cond)', build
7566 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7567 if (cond)
7569 cond = gimple_boolify (cond);
7571 basic_block cond_bb, then_bb, else_bb;
7572 edge e;
7573 tree tmp_var;
7575 tmp_var = create_tmp_var (TREE_TYPE (device));
7576 if (offloaded)
7577 e = split_block_after_labels (new_bb);
7578 else
7580 gsi = gsi_last_nondebug_bb (new_bb);
7581 gsi_prev (&gsi);
7582 e = split_block (new_bb, gsi_stmt (gsi));
7584 cond_bb = e->src;
7585 new_bb = e->dest;
7586 remove_edge (e);
7588 then_bb = create_empty_bb (cond_bb);
7589 else_bb = create_empty_bb (then_bb);
7590 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7591 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7593 stmt = gimple_build_cond_empty (cond);
7594 gsi = gsi_last_bb (cond_bb);
7595 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7597 gsi = gsi_start_bb (then_bb);
7598 stmt = gimple_build_assign (tmp_var, device);
7599 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7601 gsi = gsi_start_bb (else_bb);
7602 stmt = gimple_build_assign (tmp_var,
7603 build_int_cst (integer_type_node,
7604 GOMP_DEVICE_HOST_FALLBACK));
7605 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7607 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7608 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7609 add_bb_to_loop (then_bb, cond_bb->loop_father);
7610 add_bb_to_loop (else_bb, cond_bb->loop_father);
7611 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7612 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7614 device = tmp_var;
7615 gsi = gsi_last_nondebug_bb (new_bb);
7617 else
7619 gsi = gsi_last_nondebug_bb (new_bb);
7620 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7621 true, GSI_SAME_STMT);
7624 t = gimple_omp_target_data_arg (entry_stmt);
7625 if (t == NULL)
7627 t1 = size_zero_node;
7628 t2 = build_zero_cst (ptr_type_node);
7629 t3 = t2;
7630 t4 = t2;
7632 else
7634 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7635 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7636 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7637 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7638 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7641 gimple *g;
7642 bool tagging = false;
7643 /* The maximum number used by any start_ix, without varargs. */
7644 auto_vec<tree, 11> args;
7645 args.quick_push (device);
7646 if (offloaded)
7647 args.quick_push (build_fold_addr_expr (child_fn));
7648 args.quick_push (t1);
7649 args.quick_push (t2);
7650 args.quick_push (t3);
7651 args.quick_push (t4);
7652 switch (start_ix)
7654 case BUILT_IN_GOACC_DATA_START:
7655 case BUILT_IN_GOACC_DECLARE:
7656 case BUILT_IN_GOMP_TARGET_DATA:
7657 break;
7658 case BUILT_IN_GOMP_TARGET:
7659 case BUILT_IN_GOMP_TARGET_UPDATE:
7660 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7661 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7662 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7663 if (c)
7664 depend = OMP_CLAUSE_DECL (c);
7665 else
7666 depend = build_int_cst (ptr_type_node, 0);
7667 args.quick_push (depend);
7668 if (start_ix == BUILT_IN_GOMP_TARGET)
7669 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7670 break;
7671 case BUILT_IN_GOACC_PARALLEL:
7672 oacc_set_fn_attrib (child_fn, clauses, &args);
7673 tagging = true;
7674 /* FALLTHRU */
7675 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7676 case BUILT_IN_GOACC_UPDATE:
7678 tree t_async = NULL_TREE;
7680 /* If present, use the value specified by the respective
7681 clause, making sure that is of the correct type. */
7682 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7683 if (c)
7684 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7685 integer_type_node,
7686 OMP_CLAUSE_ASYNC_EXPR (c));
7687 else if (!tagging)
7688 /* Default values for t_async. */
7689 t_async = fold_convert_loc (gimple_location (entry_stmt),
7690 integer_type_node,
7691 build_int_cst (integer_type_node,
7692 GOMP_ASYNC_SYNC));
7693 if (tagging && t_async)
7695 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7697 if (TREE_CODE (t_async) == INTEGER_CST)
7699 /* See if we can pack the async arg in to the tag's
7700 operand. */
7701 i_async = TREE_INT_CST_LOW (t_async);
7702 if (i_async < GOMP_LAUNCH_OP_MAX)
7703 t_async = NULL_TREE;
7704 else
7705 i_async = GOMP_LAUNCH_OP_MAX;
7707 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7708 i_async));
7710 if (t_async)
7711 args.safe_push (t_async);
7713 /* Save the argument index, and ... */
7714 unsigned t_wait_idx = args.length ();
7715 unsigned num_waits = 0;
7716 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7717 if (!tagging || c)
7718 /* ... push a placeholder. */
7719 args.safe_push (integer_zero_node);
7721 for (; c; c = OMP_CLAUSE_CHAIN (c))
7722 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7724 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7725 integer_type_node,
7726 OMP_CLAUSE_WAIT_EXPR (c)));
7727 num_waits++;
7730 if (!tagging || num_waits)
7732 tree len;
7734 /* Now that we know the number, update the placeholder. */
7735 if (tagging)
7736 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7737 else
7738 len = build_int_cst (integer_type_node, num_waits);
7739 len = fold_convert_loc (gimple_location (entry_stmt),
7740 unsigned_type_node, len);
7741 args[t_wait_idx] = len;
7744 break;
7745 default:
7746 gcc_unreachable ();
7748 if (tagging)
7749 /* Push terminal marker - zero. */
7750 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7752 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7753 gimple_set_location (g, gimple_location (entry_stmt));
7754 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7755 if (!offloaded)
7757 g = gsi_stmt (gsi);
7758 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7759 gsi_remove (&gsi, true);
7761 if (data_region && region->exit)
7763 gsi = gsi_last_nondebug_bb (region->exit);
7764 g = gsi_stmt (gsi);
7765 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7766 gsi_remove (&gsi, true);
7770 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7771 iteration variable derived from the thread number. INTRA_GROUP means this
7772 is an expansion of a loop iterating over work-items within a separate
7773 iteration over groups. */
7775 static void
7776 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7778 gimple_stmt_iterator gsi;
7779 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7780 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7781 == GF_OMP_FOR_KIND_GRID_LOOP);
7782 size_t collapse = gimple_omp_for_collapse (for_stmt);
7783 struct omp_for_data_loop *loops
7784 = XALLOCAVEC (struct omp_for_data_loop,
7785 gimple_omp_for_collapse (for_stmt));
7786 struct omp_for_data fd;
7788 remove_edge (BRANCH_EDGE (kfor->entry));
7789 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7791 gcc_assert (kfor->cont);
7792 omp_extract_for_data (for_stmt, &fd, loops);
7794 gsi = gsi_start_bb (body_bb);
7796 for (size_t dim = 0; dim < collapse; dim++)
7798 tree type, itype;
7799 itype = type = TREE_TYPE (fd.loops[dim].v);
7800 if (POINTER_TYPE_P (type))
7801 itype = signed_type_for (type);
7803 tree n1 = fd.loops[dim].n1;
7804 tree step = fd.loops[dim].step;
7805 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7806 true, NULL_TREE, true, GSI_SAME_STMT);
7807 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7808 true, NULL_TREE, true, GSI_SAME_STMT);
7809 tree threadid;
7810 if (gimple_omp_for_grid_group_iter (for_stmt))
7812 gcc_checking_assert (!intra_group);
7813 threadid = build_call_expr (builtin_decl_explicit
7814 (BUILT_IN_HSA_WORKGROUPID), 1,
7815 build_int_cstu (unsigned_type_node, dim));
7817 else if (intra_group)
7818 threadid = build_call_expr (builtin_decl_explicit
7819 (BUILT_IN_HSA_WORKITEMID), 1,
7820 build_int_cstu (unsigned_type_node, dim));
7821 else
7822 threadid = build_call_expr (builtin_decl_explicit
7823 (BUILT_IN_HSA_WORKITEMABSID), 1,
7824 build_int_cstu (unsigned_type_node, dim));
7825 threadid = fold_convert (itype, threadid);
7826 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7827 true, GSI_SAME_STMT);
7829 tree startvar = fd.loops[dim].v;
7830 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7831 if (POINTER_TYPE_P (type))
7832 t = fold_build_pointer_plus (n1, t);
7833 else
7834 t = fold_build2 (PLUS_EXPR, type, t, n1);
7835 t = fold_convert (type, t);
7836 t = force_gimple_operand_gsi (&gsi, t,
7837 DECL_P (startvar)
7838 && TREE_ADDRESSABLE (startvar),
7839 NULL_TREE, true, GSI_SAME_STMT);
7840 gassign *assign_stmt = gimple_build_assign (startvar, t);
7841 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7843 /* Remove the omp for statement. */
7844 gsi = gsi_last_nondebug_bb (kfor->entry);
7845 gsi_remove (&gsi, true);
7847 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7848 gsi = gsi_last_nondebug_bb (kfor->cont);
7849 gcc_assert (!gsi_end_p (gsi)
7850 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7851 gsi_remove (&gsi, true);
7853 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7854 gsi = gsi_last_nondebug_bb (kfor->exit);
7855 gcc_assert (!gsi_end_p (gsi)
7856 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7857 if (intra_group)
7858 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7859 gsi_remove (&gsi, true);
7861 /* Fixup the much simpler CFG. */
7862 remove_edge (find_edge (kfor->cont, body_bb));
7864 if (kfor->cont != body_bb)
7865 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7866 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7869 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7870 argument_decls. */
7872 struct grid_arg_decl_map
7874 tree old_arg;
7875 tree new_arg;
7878 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7879 pertaining to kernel function. */
7881 static tree
7882 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7884 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7885 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7886 tree t = *tp;
7888 if (t == adm->old_arg)
7889 *tp = adm->new_arg;
7890 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7891 return NULL_TREE;
7894 /* If TARGET region contains a kernel body for loop, remove its region from the
7895 TARGET and expand it in HSA gridified kernel fashion. */
7897 static void
7898 grid_expand_target_grid_body (struct omp_region *target)
7900 if (!hsa_gen_requested_p ())
7901 return;
7903 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7904 struct omp_region **pp;
7906 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7907 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7908 break;
7910 struct omp_region *gpukernel = *pp;
7912 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7913 if (!gpukernel)
7915 /* HSA cannot handle OACC stuff. */
7916 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7917 return;
7918 gcc_checking_assert (orig_child_fndecl);
7919 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7920 OMP_CLAUSE__GRIDDIM_));
7921 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7923 hsa_register_kernel (n);
7924 return;
7927 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7928 OMP_CLAUSE__GRIDDIM_));
7929 tree inside_block
7930 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7931 *pp = gpukernel->next;
7932 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7933 if ((*pp)->type == GIMPLE_OMP_FOR)
7934 break;
7936 struct omp_region *kfor = *pp;
7937 gcc_assert (kfor);
7938 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7939 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7940 *pp = kfor->next;
7941 if (kfor->inner)
7943 if (gimple_omp_for_grid_group_iter (for_stmt))
7945 struct omp_region **next_pp;
7946 for (pp = &kfor->inner; *pp; pp = next_pp)
7948 next_pp = &(*pp)->next;
7949 if ((*pp)->type != GIMPLE_OMP_FOR)
7950 continue;
7951 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7952 gcc_assert (gimple_omp_for_kind (inner)
7953 == GF_OMP_FOR_KIND_GRID_LOOP);
7954 grid_expand_omp_for_loop (*pp, true);
7955 *pp = (*pp)->next;
7956 next_pp = pp;
7959 expand_omp (kfor->inner);
7961 if (gpukernel->inner)
7962 expand_omp (gpukernel->inner);
7964 tree kern_fndecl = copy_node (orig_child_fndecl);
7965 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
7966 "kernel");
7967 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7968 tree tgtblock = gimple_block (tgt_stmt);
7969 tree fniniblock = make_node (BLOCK);
7970 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
7971 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7972 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7973 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7974 DECL_INITIAL (kern_fndecl) = fniniblock;
7975 push_struct_function (kern_fndecl);
7976 cfun->function_end_locus = gimple_location (tgt_stmt);
7977 init_tree_ssa (cfun);
7978 pop_cfun ();
7980 /* Make sure to generate early debug for the function before
7981 outlining anything. */
7982 if (! gimple_in_ssa_p (cfun))
7983 (*debug_hooks->early_global_decl) (cfun->decl);
7985 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7986 gcc_assert (!DECL_CHAIN (old_parm_decl));
7987 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7988 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7989 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7990 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7991 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7992 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7993 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7994 kern_cfun->curr_properties = cfun->curr_properties;
7996 grid_expand_omp_for_loop (kfor, false);
7998 /* Remove the omp for statement. */
7999 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8000 gsi_remove (&gsi, true);
8001 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8002 return. */
8003 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8004 gcc_assert (!gsi_end_p (gsi)
8005 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8006 gimple *ret_stmt = gimple_build_return (NULL);
8007 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8008 gsi_remove (&gsi, true);
8010 /* Statements in the first BB in the target construct have been produced by
8011 target lowering and must be copied inside the GPUKERNEL, with the two
8012 exceptions of the first OMP statement and the OMP_DATA assignment
8013 statement. */
8014 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8015 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8016 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8017 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8018 !gsi_end_p (tsi); gsi_next (&tsi))
8020 gimple *stmt = gsi_stmt (tsi);
8021 if (is_gimple_omp (stmt))
8022 break;
8023 if (sender
8024 && is_gimple_assign (stmt)
8025 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8026 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8027 continue;
8028 gimple *copy = gimple_copy (stmt);
8029 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8030 gimple_set_block (copy, fniniblock);
8033 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8034 gpukernel->exit, inside_block);
8036 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8037 kcn->mark_force_output ();
8038 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8040 hsa_register_kernel (kcn, orig_child);
8042 cgraph_node::add_new_function (kern_fndecl, true);
8043 push_cfun (kern_cfun);
8044 cgraph_edge::rebuild_edges ();
8046 /* Re-map any mention of the PARM_DECL of the original function to the
8047 PARM_DECL of the new one.
8049 TODO: It would be great if lowering produced references into the GPU
8050 kernel decl straight away and we did not have to do this. */
8051 struct grid_arg_decl_map adm;
8052 adm.old_arg = old_parm_decl;
8053 adm.new_arg = new_parm_decl;
8054 basic_block bb;
8055 FOR_EACH_BB_FN (bb, kern_cfun)
8057 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8059 gimple *stmt = gsi_stmt (gsi);
8060 struct walk_stmt_info wi;
8061 memset (&wi, 0, sizeof (wi));
8062 wi.info = &adm;
8063 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8066 pop_cfun ();
8068 return;
8071 /* Expand the parallel region tree rooted at REGION. Expansion
8072 proceeds in depth-first order. Innermost regions are expanded
8073 first. This way, parallel regions that require a new function to
8074 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8075 internal dependencies in their body. */
8077 static void
8078 expand_omp (struct omp_region *region)
8080 omp_any_child_fn_dumped = false;
8081 while (region)
8083 location_t saved_location;
8084 gimple *inner_stmt = NULL;
8086 /* First, determine whether this is a combined parallel+workshare
8087 region. */
8088 if (region->type == GIMPLE_OMP_PARALLEL)
8089 determine_parallel_type (region);
8090 else if (region->type == GIMPLE_OMP_TARGET)
8091 grid_expand_target_grid_body (region);
8093 if (region->type == GIMPLE_OMP_FOR
8094 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8095 inner_stmt = last_stmt (region->inner->entry);
8097 if (region->inner)
8098 expand_omp (region->inner);
8100 saved_location = input_location;
8101 if (gimple_has_location (last_stmt (region->entry)))
8102 input_location = gimple_location (last_stmt (region->entry));
8104 switch (region->type)
8106 case GIMPLE_OMP_PARALLEL:
8107 case GIMPLE_OMP_TASK:
8108 expand_omp_taskreg (region);
8109 break;
8111 case GIMPLE_OMP_FOR:
8112 expand_omp_for (region, inner_stmt);
8113 break;
8115 case GIMPLE_OMP_SECTIONS:
8116 expand_omp_sections (region);
8117 break;
8119 case GIMPLE_OMP_SECTION:
8120 /* Individual omp sections are handled together with their
8121 parent GIMPLE_OMP_SECTIONS region. */
8122 break;
8124 case GIMPLE_OMP_SINGLE:
8125 expand_omp_single (region);
8126 break;
8128 case GIMPLE_OMP_ORDERED:
8130 gomp_ordered *ord_stmt
8131 = as_a <gomp_ordered *> (last_stmt (region->entry));
8132 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8133 OMP_CLAUSE_DEPEND))
8135 /* We'll expand these when expanding corresponding
8136 worksharing region with ordered(n) clause. */
8137 gcc_assert (region->outer
8138 && region->outer->type == GIMPLE_OMP_FOR);
8139 region->ord_stmt = ord_stmt;
8140 break;
8143 /* FALLTHRU */
8144 case GIMPLE_OMP_MASTER:
8145 case GIMPLE_OMP_TASKGROUP:
8146 case GIMPLE_OMP_CRITICAL:
8147 case GIMPLE_OMP_TEAMS:
8148 expand_omp_synch (region);
8149 break;
8151 case GIMPLE_OMP_ATOMIC_LOAD:
8152 expand_omp_atomic (region);
8153 break;
8155 case GIMPLE_OMP_TARGET:
8156 expand_omp_target (region);
8157 break;
8159 default:
8160 gcc_unreachable ();
8163 input_location = saved_location;
8164 region = region->next;
8166 if (omp_any_child_fn_dumped)
8168 if (dump_file)
8169 dump_function_header (dump_file, current_function_decl, dump_flags);
8170 omp_any_child_fn_dumped = false;
8174 /* Helper for build_omp_regions. Scan the dominator tree starting at
8175 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8176 true, the function ends once a single tree is built (otherwise, whole
8177 forest of OMP constructs may be built). */
8179 static void
8180 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8181 bool single_tree)
8183 gimple_stmt_iterator gsi;
8184 gimple *stmt;
8185 basic_block son;
8187 gsi = gsi_last_nondebug_bb (bb);
8188 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8190 struct omp_region *region;
8191 enum gimple_code code;
8193 stmt = gsi_stmt (gsi);
8194 code = gimple_code (stmt);
8195 if (code == GIMPLE_OMP_RETURN)
8197 /* STMT is the return point out of region PARENT. Mark it
8198 as the exit point and make PARENT the immediately
8199 enclosing region. */
8200 gcc_assert (parent);
8201 region = parent;
8202 region->exit = bb;
8203 parent = parent->outer;
8205 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8207 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8208 GIMPLE_OMP_RETURN, but matches with
8209 GIMPLE_OMP_ATOMIC_LOAD. */
8210 gcc_assert (parent);
8211 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8212 region = parent;
8213 region->exit = bb;
8214 parent = parent->outer;
8216 else if (code == GIMPLE_OMP_CONTINUE)
8218 gcc_assert (parent);
8219 parent->cont = bb;
8221 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8223 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8224 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8226 else
8228 region = new_omp_region (bb, code, parent);
8229 /* Otherwise... */
8230 if (code == GIMPLE_OMP_TARGET)
8232 switch (gimple_omp_target_kind (stmt))
8234 case GF_OMP_TARGET_KIND_REGION:
8235 case GF_OMP_TARGET_KIND_DATA:
8236 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8237 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8238 case GF_OMP_TARGET_KIND_OACC_DATA:
8239 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8240 break;
8241 case GF_OMP_TARGET_KIND_UPDATE:
8242 case GF_OMP_TARGET_KIND_ENTER_DATA:
8243 case GF_OMP_TARGET_KIND_EXIT_DATA:
8244 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8245 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8246 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8247 /* ..., other than for those stand-alone directives... */
8248 region = NULL;
8249 break;
8250 default:
8251 gcc_unreachable ();
8254 else if (code == GIMPLE_OMP_ORDERED
8255 && omp_find_clause (gimple_omp_ordered_clauses
8256 (as_a <gomp_ordered *> (stmt)),
8257 OMP_CLAUSE_DEPEND))
8258 /* #pragma omp ordered depend is also just a stand-alone
8259 directive. */
8260 region = NULL;
8261 else if (code == GIMPLE_OMP_TASK
8262 && gimple_omp_task_taskwait_p (stmt))
8263 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8264 region = NULL;
8265 /* ..., this directive becomes the parent for a new region. */
8266 if (region)
8267 parent = region;
8271 if (single_tree && !parent)
8272 return;
8274 for (son = first_dom_son (CDI_DOMINATORS, bb);
8275 son;
8276 son = next_dom_son (CDI_DOMINATORS, son))
8277 build_omp_regions_1 (son, parent, single_tree);
8280 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8281 root_omp_region. */
8283 static void
8284 build_omp_regions_root (basic_block root)
8286 gcc_assert (root_omp_region == NULL);
8287 build_omp_regions_1 (root, NULL, true);
8288 gcc_assert (root_omp_region != NULL);
8291 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8293 void
8294 omp_expand_local (basic_block head)
8296 build_omp_regions_root (head);
8297 if (dump_file && (dump_flags & TDF_DETAILS))
8299 fprintf (dump_file, "\nOMP region tree\n\n");
8300 dump_omp_region (dump_file, root_omp_region, 0);
8301 fprintf (dump_file, "\n");
8304 remove_exit_barriers (root_omp_region);
8305 expand_omp (root_omp_region);
8307 omp_free_regions ();
8310 /* Scan the CFG and build a tree of OMP regions. Return the root of
8311 the OMP region tree. */
8313 static void
8314 build_omp_regions (void)
8316 gcc_assert (root_omp_region == NULL);
8317 calculate_dominance_info (CDI_DOMINATORS);
8318 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8321 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8323 static unsigned int
8324 execute_expand_omp (void)
8326 build_omp_regions ();
8328 if (!root_omp_region)
8329 return 0;
8331 if (dump_file)
8333 fprintf (dump_file, "\nOMP region tree\n\n");
8334 dump_omp_region (dump_file, root_omp_region, 0);
8335 fprintf (dump_file, "\n");
8338 remove_exit_barriers (root_omp_region);
8340 expand_omp (root_omp_region);
8342 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8343 verify_loop_structure ();
8344 cleanup_tree_cfg ();
8346 omp_free_regions ();
8348 return 0;
8351 /* OMP expansion -- the default pass, run before creation of SSA form. */
8353 namespace {
8355 const pass_data pass_data_expand_omp =
8357 GIMPLE_PASS, /* type */
8358 "ompexp", /* name */
8359 OPTGROUP_OMP, /* optinfo_flags */
8360 TV_NONE, /* tv_id */
8361 PROP_gimple_any, /* properties_required */
8362 PROP_gimple_eomp, /* properties_provided */
8363 0, /* properties_destroyed */
8364 0, /* todo_flags_start */
8365 0, /* todo_flags_finish */
8368 class pass_expand_omp : public gimple_opt_pass
8370 public:
8371 pass_expand_omp (gcc::context *ctxt)
8372 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8375 /* opt_pass methods: */
8376 virtual unsigned int execute (function *)
8378 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8379 || flag_openmp_simd != 0)
8380 && !seen_error ());
8382 /* This pass always runs, to provide PROP_gimple_eomp.
8383 But often, there is nothing to do. */
8384 if (!gate)
8385 return 0;
8387 return execute_expand_omp ();
8390 }; // class pass_expand_omp
8392 } // anon namespace
8394 gimple_opt_pass *
8395 make_pass_expand_omp (gcc::context *ctxt)
8397 return new pass_expand_omp (ctxt);
8400 namespace {
8402 const pass_data pass_data_expand_omp_ssa =
8404 GIMPLE_PASS, /* type */
8405 "ompexpssa", /* name */
8406 OPTGROUP_OMP, /* optinfo_flags */
8407 TV_NONE, /* tv_id */
8408 PROP_cfg | PROP_ssa, /* properties_required */
8409 PROP_gimple_eomp, /* properties_provided */
8410 0, /* properties_destroyed */
8411 0, /* todo_flags_start */
8412 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8415 class pass_expand_omp_ssa : public gimple_opt_pass
8417 public:
8418 pass_expand_omp_ssa (gcc::context *ctxt)
8419 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8422 /* opt_pass methods: */
8423 virtual bool gate (function *fun)
8425 return !(fun->curr_properties & PROP_gimple_eomp);
8427 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8428 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8430 }; // class pass_expand_omp_ssa
8432 } // anon namespace
8434 gimple_opt_pass *
8435 make_pass_expand_omp_ssa (gcc::context *ctxt)
8437 return new pass_expand_omp_ssa (ctxt);
8440 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8441 GIMPLE_* codes. */
8443 bool
8444 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8445 int *region_idx)
8447 gimple *last = last_stmt (bb);
8448 enum gimple_code code = gimple_code (last);
8449 struct omp_region *cur_region = *region;
8450 bool fallthru = false;
8452 switch (code)
8454 case GIMPLE_OMP_PARALLEL:
8455 case GIMPLE_OMP_FOR:
8456 case GIMPLE_OMP_SINGLE:
8457 case GIMPLE_OMP_TEAMS:
8458 case GIMPLE_OMP_MASTER:
8459 case GIMPLE_OMP_TASKGROUP:
8460 case GIMPLE_OMP_CRITICAL:
8461 case GIMPLE_OMP_SECTION:
8462 case GIMPLE_OMP_GRID_BODY:
8463 cur_region = new_omp_region (bb, code, cur_region);
8464 fallthru = true;
8465 break;
8467 case GIMPLE_OMP_TASK:
8468 cur_region = new_omp_region (bb, code, cur_region);
8469 fallthru = true;
8470 if (gimple_omp_task_taskwait_p (last))
8471 cur_region = cur_region->outer;
8472 break;
8474 case GIMPLE_OMP_ORDERED:
8475 cur_region = new_omp_region (bb, code, cur_region);
8476 fallthru = true;
8477 if (omp_find_clause (gimple_omp_ordered_clauses
8478 (as_a <gomp_ordered *> (last)),
8479 OMP_CLAUSE_DEPEND))
8480 cur_region = cur_region->outer;
8481 break;
8483 case GIMPLE_OMP_TARGET:
8484 cur_region = new_omp_region (bb, code, cur_region);
8485 fallthru = true;
8486 switch (gimple_omp_target_kind (last))
8488 case GF_OMP_TARGET_KIND_REGION:
8489 case GF_OMP_TARGET_KIND_DATA:
8490 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8491 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8492 case GF_OMP_TARGET_KIND_OACC_DATA:
8493 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8494 break;
8495 case GF_OMP_TARGET_KIND_UPDATE:
8496 case GF_OMP_TARGET_KIND_ENTER_DATA:
8497 case GF_OMP_TARGET_KIND_EXIT_DATA:
8498 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8499 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8500 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8501 cur_region = cur_region->outer;
8502 break;
8503 default:
8504 gcc_unreachable ();
8506 break;
8508 case GIMPLE_OMP_SECTIONS:
8509 cur_region = new_omp_region (bb, code, cur_region);
8510 fallthru = true;
8511 break;
8513 case GIMPLE_OMP_SECTIONS_SWITCH:
8514 fallthru = false;
8515 break;
8517 case GIMPLE_OMP_ATOMIC_LOAD:
8518 case GIMPLE_OMP_ATOMIC_STORE:
8519 fallthru = true;
8520 break;
8522 case GIMPLE_OMP_RETURN:
8523 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8524 somewhere other than the next block. This will be
8525 created later. */
8526 cur_region->exit = bb;
8527 if (cur_region->type == GIMPLE_OMP_TASK)
8528 /* Add an edge corresponding to not scheduling the task
8529 immediately. */
8530 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8531 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8532 cur_region = cur_region->outer;
8533 break;
8535 case GIMPLE_OMP_CONTINUE:
8536 cur_region->cont = bb;
8537 switch (cur_region->type)
8539 case GIMPLE_OMP_FOR:
8540 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8541 succs edges as abnormal to prevent splitting
8542 them. */
8543 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8544 /* Make the loopback edge. */
8545 make_edge (bb, single_succ (cur_region->entry),
8546 EDGE_ABNORMAL);
8548 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8549 corresponds to the case that the body of the loop
8550 is not executed at all. */
8551 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8552 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8553 fallthru = false;
8554 break;
8556 case GIMPLE_OMP_SECTIONS:
8557 /* Wire up the edges into and out of the nested sections. */
8559 basic_block switch_bb = single_succ (cur_region->entry);
8561 struct omp_region *i;
8562 for (i = cur_region->inner; i ; i = i->next)
8564 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8565 make_edge (switch_bb, i->entry, 0);
8566 make_edge (i->exit, bb, EDGE_FALLTHRU);
8569 /* Make the loopback edge to the block with
8570 GIMPLE_OMP_SECTIONS_SWITCH. */
8571 make_edge (bb, switch_bb, 0);
8573 /* Make the edge from the switch to exit. */
8574 make_edge (switch_bb, bb->next_bb, 0);
8575 fallthru = false;
8577 break;
8579 case GIMPLE_OMP_TASK:
8580 fallthru = true;
8581 break;
8583 default:
8584 gcc_unreachable ();
8586 break;
8588 default:
8589 gcc_unreachable ();
8592 if (*region != cur_region)
8594 *region = cur_region;
8595 if (cur_region)
8596 *region_idx = cur_region->entry->index;
8597 else
8598 *region_idx = 0;
8601 return fallthru;
8604 #include "gt-omp-expand.h"