re PR fortran/88376 (ICE in is_illegal_recursion, at fortran/resolve.c:1689)
[official-gcc.git] / gcc / omp-expand.c
blob5c2dba9d1c5ffc3d1178369331bf2a1d63e38ef4
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2019 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
66 struct omp_region
68 /* The enclosing region. */
69 struct omp_region *outer;
71 /* First child region. */
72 struct omp_region *inner;
74 /* Next peer region. */
75 struct omp_region *next;
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
116 /* Return true if REGION is a combined parallel+workshare region. */
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
121 return region->is_combined_parallel;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
138 Is lowered into:
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
197 return true;
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
206 if (!simd_schedule || integer_zerop (chunk_size))
207 return chunk_size;
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
211 return chunk_size;
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
233 struct omp_for_data fd;
234 tree n1, n2;
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
240 if (gimple_omp_for_combined_into_p (for_stmt))
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
264 if (fd.chunk_size)
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
271 return ws_args;
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
285 gcc_unreachable ();
288 /* Discover whether REGION is a combined parallel+workshare region. */
290 static void
291 determine_parallel_type (struct omp_region *region)
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
331 if (region->inner->type == GIMPLE_OMP_FOR)
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
349 return;
351 else if (region->inner->type == GIMPLE_OMP_SECTIONS
352 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
353 OMP_CLAUSE__REDUCTEMP_))
354 return;
356 region->is_combined_parallel = true;
357 region->inner->is_combined_parallel = true;
358 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
362 /* Debugging dumps for parallel regions. */
363 void dump_omp_region (FILE *, struct omp_region *, int);
364 void debug_omp_region (struct omp_region *);
365 void debug_all_omp_regions (void);
367 /* Dump the parallel region tree rooted at REGION. */
369 void
370 dump_omp_region (FILE *file, struct omp_region *region, int indent)
372 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
373 gimple_code_name[region->type]);
375 if (region->inner)
376 dump_omp_region (file, region->inner, indent + 4);
378 if (region->cont)
380 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
381 region->cont->index);
384 if (region->exit)
385 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
386 region->exit->index);
387 else
388 fprintf (file, "%*s[no exit marker]\n", indent, "");
390 if (region->next)
391 dump_omp_region (file, region->next, indent);
394 DEBUG_FUNCTION void
395 debug_omp_region (struct omp_region *region)
397 dump_omp_region (stderr, region, 0);
400 DEBUG_FUNCTION void
401 debug_all_omp_regions (void)
403 dump_omp_region (stderr, root_omp_region, 0);
406 /* Create a new parallel region starting at STMT inside region PARENT. */
408 static struct omp_region *
409 new_omp_region (basic_block bb, enum gimple_code type,
410 struct omp_region *parent)
412 struct omp_region *region = XCNEW (struct omp_region);
414 region->outer = parent;
415 region->entry = bb;
416 region->type = type;
418 if (parent)
420 /* This is a nested region. Add it to the list of inner
421 regions in PARENT. */
422 region->next = parent->inner;
423 parent->inner = region;
425 else
427 /* This is a toplevel region. Add it to the list of toplevel
428 regions in ROOT_OMP_REGION. */
429 region->next = root_omp_region;
430 root_omp_region = region;
433 return region;
436 /* Release the memory associated with the region tree rooted at REGION. */
438 static void
439 free_omp_region_1 (struct omp_region *region)
441 struct omp_region *i, *n;
443 for (i = region->inner; i ; i = n)
445 n = i->next;
446 free_omp_region_1 (i);
449 free (region);
452 /* Release the memory for the entire omp region tree. */
454 void
455 omp_free_regions (void)
457 struct omp_region *r, *n;
458 for (r = root_omp_region; r ; r = n)
460 n = r->next;
461 free_omp_region_1 (r);
463 root_omp_region = NULL;
466 /* A convenience function to build an empty GIMPLE_COND with just the
467 condition. */
469 static gcond *
470 gimple_build_cond_empty (tree cond)
472 enum tree_code pred_code;
473 tree lhs, rhs;
475 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
476 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
479 /* Return true if a parallel REGION is within a declare target function or
480 within a target region and is not a part of a gridified target. */
482 static bool
483 parallel_needs_hsa_kernel_p (struct omp_region *region)
485 bool indirect = false;
486 for (region = region->outer; region; region = region->outer)
488 if (region->type == GIMPLE_OMP_PARALLEL)
489 indirect = true;
490 else if (region->type == GIMPLE_OMP_TARGET)
492 gomp_target *tgt_stmt
493 = as_a <gomp_target *> (last_stmt (region->entry));
495 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
496 OMP_CLAUSE__GRIDDIM_))
497 return indirect;
498 else
499 return true;
503 if (lookup_attribute ("omp declare target",
504 DECL_ATTRIBUTES (current_function_decl)))
505 return true;
507 return false;
510 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
511 Add CHILD_FNDECL to decl chain of the supercontext of the block
512 ENTRY_BLOCK - this is the block which originally contained the
513 code from which CHILD_FNDECL was created.
515 Together, these actions ensure that the debug info for the outlined
516 function will be emitted with the correct lexical scope. */
518 static void
519 adjust_context_and_scope (struct omp_region *region, tree entry_block,
520 tree child_fndecl)
522 tree parent_fndecl = NULL_TREE;
523 gimple *entry_stmt;
524 /* OMP expansion expands inner regions before outer ones, so if
525 we e.g. have explicit task region nested in parallel region, when
526 expanding the task region current_function_decl will be the original
527 source function, but we actually want to use as context the child
528 function of the parallel. */
529 for (region = region->outer;
530 region && parent_fndecl == NULL_TREE; region = region->outer)
531 switch (region->type)
533 case GIMPLE_OMP_PARALLEL:
534 case GIMPLE_OMP_TASK:
535 case GIMPLE_OMP_TEAMS:
536 entry_stmt = last_stmt (region->entry);
537 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
538 break;
539 case GIMPLE_OMP_TARGET:
540 entry_stmt = last_stmt (region->entry);
541 parent_fndecl
542 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
543 break;
544 default:
545 break;
548 if (parent_fndecl == NULL_TREE)
549 parent_fndecl = current_function_decl;
550 DECL_CONTEXT (child_fndecl) = parent_fndecl;
552 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
554 tree b = BLOCK_SUPERCONTEXT (entry_block);
555 if (TREE_CODE (b) == BLOCK)
557 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
558 BLOCK_VARS (b) = child_fndecl;
563 /* Build the function calls to GOMP_parallel etc to actually
564 generate the parallel operation. REGION is the parallel region
565 being expanded. BB is the block where to insert the code. WS_ARGS
566 will be set if this is a call to a combined parallel+workshare
567 construct, it contains the list of additional arguments needed by
568 the workshare construct. */
570 static void
571 expand_parallel_call (struct omp_region *region, basic_block bb,
572 gomp_parallel *entry_stmt,
573 vec<tree, va_gc> *ws_args)
575 tree t, t1, t2, val, cond, c, clauses, flags;
576 gimple_stmt_iterator gsi;
577 gimple *stmt;
578 enum built_in_function start_ix;
579 int start_ix2;
580 location_t clause_loc;
581 vec<tree, va_gc> *args;
583 clauses = gimple_omp_parallel_clauses (entry_stmt);
585 /* Determine what flavor of GOMP_parallel we will be
586 emitting. */
587 start_ix = BUILT_IN_GOMP_PARALLEL;
588 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
589 if (rtmp)
590 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
591 else if (is_combined_parallel (region))
593 switch (region->inner->type)
595 case GIMPLE_OMP_FOR:
596 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
597 switch (region->inner->sched_kind)
599 case OMP_CLAUSE_SCHEDULE_RUNTIME:
600 if ((region->inner->sched_modifiers
601 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
602 start_ix2 = 6;
603 else if ((region->inner->sched_modifiers
604 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
605 start_ix2 = 7;
606 else
607 start_ix2 = 3;
608 break;
609 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
610 case OMP_CLAUSE_SCHEDULE_GUIDED:
611 if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
614 start_ix2 = 3 + region->inner->sched_kind;
615 break;
617 /* FALLTHRU */
618 default:
619 start_ix2 = region->inner->sched_kind;
620 break;
622 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
623 start_ix = (enum built_in_function) start_ix2;
624 break;
625 case GIMPLE_OMP_SECTIONS:
626 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
627 break;
628 default:
629 gcc_unreachable ();
633 /* By default, the value of NUM_THREADS is zero (selected at run time)
634 and there is no conditional. */
635 cond = NULL_TREE;
636 val = build_int_cst (unsigned_type_node, 0);
637 flags = build_int_cst (unsigned_type_node, 0);
639 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
640 if (c)
641 cond = OMP_CLAUSE_IF_EXPR (c);
643 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
644 if (c)
646 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
647 clause_loc = OMP_CLAUSE_LOCATION (c);
649 else
650 clause_loc = gimple_location (entry_stmt);
652 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
653 if (c)
654 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
656 /* Ensure 'val' is of the correct type. */
657 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
659 /* If we found the clause 'if (cond)', build either
660 (cond != 0) or (cond ? val : 1u). */
661 if (cond)
663 cond = gimple_boolify (cond);
665 if (integer_zerop (val))
666 val = fold_build2_loc (clause_loc,
667 EQ_EXPR, unsigned_type_node, cond,
668 build_int_cst (TREE_TYPE (cond), 0));
669 else
671 basic_block cond_bb, then_bb, else_bb;
672 edge e, e_then, e_else;
673 tree tmp_then, tmp_else, tmp_join, tmp_var;
675 tmp_var = create_tmp_var (TREE_TYPE (val));
676 if (gimple_in_ssa_p (cfun))
678 tmp_then = make_ssa_name (tmp_var);
679 tmp_else = make_ssa_name (tmp_var);
680 tmp_join = make_ssa_name (tmp_var);
682 else
684 tmp_then = tmp_var;
685 tmp_else = tmp_var;
686 tmp_join = tmp_var;
689 e = split_block_after_labels (bb);
690 cond_bb = e->src;
691 bb = e->dest;
692 remove_edge (e);
694 then_bb = create_empty_bb (cond_bb);
695 else_bb = create_empty_bb (then_bb);
696 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
697 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
699 stmt = gimple_build_cond_empty (cond);
700 gsi = gsi_start_bb (cond_bb);
701 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
703 gsi = gsi_start_bb (then_bb);
704 expand_omp_build_assign (&gsi, tmp_then, val, true);
706 gsi = gsi_start_bb (else_bb);
707 expand_omp_build_assign (&gsi, tmp_else,
708 build_int_cst (unsigned_type_node, 1),
709 true);
711 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
712 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
713 add_bb_to_loop (then_bb, cond_bb->loop_father);
714 add_bb_to_loop (else_bb, cond_bb->loop_father);
715 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
716 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
718 if (gimple_in_ssa_p (cfun))
720 gphi *phi = create_phi_node (tmp_join, bb);
721 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
722 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
725 val = tmp_join;
728 gsi = gsi_start_bb (bb);
729 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
730 false, GSI_CONTINUE_LINKING);
733 gsi = gsi_last_nondebug_bb (bb);
734 t = gimple_omp_parallel_data_arg (entry_stmt);
735 if (t == NULL)
736 t1 = null_pointer_node;
737 else
738 t1 = build_fold_addr_expr (t);
739 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
740 t2 = build_fold_addr_expr (child_fndecl);
742 vec_alloc (args, 4 + vec_safe_length (ws_args));
743 args->quick_push (t2);
744 args->quick_push (t1);
745 args->quick_push (val);
746 if (ws_args)
747 args->splice (*ws_args);
748 args->quick_push (flags);
750 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
751 builtin_decl_explicit (start_ix), args);
753 if (rtmp)
755 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
756 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
757 fold_convert (type,
758 fold_convert (pointer_sized_int_node, t)));
760 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
761 false, GSI_CONTINUE_LINKING);
763 if (hsa_gen_requested_p ()
764 && parallel_needs_hsa_kernel_p (region))
766 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
767 hsa_register_kernel (child_cnode);
771 /* Build the function call to GOMP_task to actually
772 generate the task operation. BB is the block where to insert the code. */
774 static void
775 expand_task_call (struct omp_region *region, basic_block bb,
776 gomp_task *entry_stmt)
778 tree t1, t2, t3;
779 gimple_stmt_iterator gsi;
780 location_t loc = gimple_location (entry_stmt);
782 tree clauses = gimple_omp_task_clauses (entry_stmt);
784 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
785 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
786 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
787 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
788 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
789 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
791 unsigned int iflags
792 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
793 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
794 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
796 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
797 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
798 tree num_tasks = NULL_TREE;
799 bool ull = false;
800 if (taskloop_p)
802 gimple *g = last_stmt (region->outer->entry);
803 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
804 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
805 struct omp_for_data fd;
806 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
807 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
808 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
809 OMP_CLAUSE__LOOPTEMP_);
810 startvar = OMP_CLAUSE_DECL (startvar);
811 endvar = OMP_CLAUSE_DECL (endvar);
812 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
813 if (fd.loop.cond_code == LT_EXPR)
814 iflags |= GOMP_TASK_FLAG_UP;
815 tree tclauses = gimple_omp_for_clauses (g);
816 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
817 if (num_tasks)
818 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
819 else
821 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
822 if (num_tasks)
824 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
825 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
827 else
828 num_tasks = integer_zero_node;
830 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
831 if (ifc == NULL_TREE)
832 iflags |= GOMP_TASK_FLAG_IF;
833 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
834 iflags |= GOMP_TASK_FLAG_NOGROUP;
835 ull = fd.iter_type == long_long_unsigned_type_node;
836 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
837 iflags |= GOMP_TASK_FLAG_REDUCTION;
839 else if (priority)
840 iflags |= GOMP_TASK_FLAG_PRIORITY;
842 tree flags = build_int_cst (unsigned_type_node, iflags);
844 tree cond = boolean_true_node;
845 if (ifc)
847 if (taskloop_p)
849 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
850 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
851 build_int_cst (unsigned_type_node,
852 GOMP_TASK_FLAG_IF),
853 build_int_cst (unsigned_type_node, 0));
854 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
855 flags, t);
857 else
858 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
861 if (finalc)
863 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
864 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
865 build_int_cst (unsigned_type_node,
866 GOMP_TASK_FLAG_FINAL),
867 build_int_cst (unsigned_type_node, 0));
868 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
870 if (depend)
871 depend = OMP_CLAUSE_DECL (depend);
872 else
873 depend = build_int_cst (ptr_type_node, 0);
874 if (priority)
875 priority = fold_convert (integer_type_node,
876 OMP_CLAUSE_PRIORITY_EXPR (priority));
877 else
878 priority = integer_zero_node;
880 gsi = gsi_last_nondebug_bb (bb);
881 tree t = gimple_omp_task_data_arg (entry_stmt);
882 if (t == NULL)
883 t2 = null_pointer_node;
884 else
885 t2 = build_fold_addr_expr_loc (loc, t);
886 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
887 t = gimple_omp_task_copy_fn (entry_stmt);
888 if (t == NULL)
889 t3 = null_pointer_node;
890 else
891 t3 = build_fold_addr_expr_loc (loc, t);
893 if (taskloop_p)
894 t = build_call_expr (ull
895 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
896 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
897 11, t1, t2, t3,
898 gimple_omp_task_arg_size (entry_stmt),
899 gimple_omp_task_arg_align (entry_stmt), flags,
900 num_tasks, priority, startvar, endvar, step);
901 else
902 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
903 9, t1, t2, t3,
904 gimple_omp_task_arg_size (entry_stmt),
905 gimple_omp_task_arg_align (entry_stmt), cond, flags,
906 depend, priority);
908 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
909 false, GSI_CONTINUE_LINKING);
912 /* Build the function call to GOMP_taskwait_depend to actually
913 generate the taskwait operation. BB is the block where to insert the
914 code. */
916 static void
917 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
919 tree clauses = gimple_omp_task_clauses (entry_stmt);
920 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
921 if (depend == NULL_TREE)
922 return;
924 depend = OMP_CLAUSE_DECL (depend);
926 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
927 tree t
928 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
929 1, depend);
931 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
932 false, GSI_CONTINUE_LINKING);
935 /* Build the function call to GOMP_teams_reg to actually
936 generate the host teams operation. REGION is the teams region
937 being expanded. BB is the block where to insert the code. */
939 static void
940 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
942 tree clauses = gimple_omp_teams_clauses (entry_stmt);
943 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
944 if (num_teams == NULL_TREE)
945 num_teams = build_int_cst (unsigned_type_node, 0);
946 else
948 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
949 num_teams = fold_convert (unsigned_type_node, num_teams);
951 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
952 if (thread_limit == NULL_TREE)
953 thread_limit = build_int_cst (unsigned_type_node, 0);
954 else
956 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
957 thread_limit = fold_convert (unsigned_type_node, thread_limit);
960 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
961 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
962 if (t == NULL)
963 t1 = null_pointer_node;
964 else
965 t1 = build_fold_addr_expr (t);
966 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
967 tree t2 = build_fold_addr_expr (child_fndecl);
969 vec<tree, va_gc> *args;
970 vec_alloc (args, 5);
971 args->quick_push (t2);
972 args->quick_push (t1);
973 args->quick_push (num_teams);
974 args->quick_push (thread_limit);
975 /* For future extensibility. */
976 args->quick_push (build_zero_cst (unsigned_type_node));
978 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
979 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
980 args);
982 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
983 false, GSI_CONTINUE_LINKING);
986 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
988 static tree
989 vec2chain (vec<tree, va_gc> *v)
991 tree chain = NULL_TREE, t;
992 unsigned ix;
994 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
996 DECL_CHAIN (t) = chain;
997 chain = t;
1000 return chain;
1003 /* Remove barriers in REGION->EXIT's block. Note that this is only
1004 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1005 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1006 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1007 removed. */
1009 static void
1010 remove_exit_barrier (struct omp_region *region)
1012 gimple_stmt_iterator gsi;
1013 basic_block exit_bb;
1014 edge_iterator ei;
1015 edge e;
1016 gimple *stmt;
1017 int any_addressable_vars = -1;
1019 exit_bb = region->exit;
1021 /* If the parallel region doesn't return, we don't have REGION->EXIT
1022 block at all. */
1023 if (! exit_bb)
1024 return;
1026 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1027 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1028 statements that can appear in between are extremely limited -- no
1029 memory operations at all. Here, we allow nothing at all, so the
1030 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1031 gsi = gsi_last_nondebug_bb (exit_bb);
1032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1033 gsi_prev_nondebug (&gsi);
1034 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1035 return;
1037 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1039 gsi = gsi_last_nondebug_bb (e->src);
1040 if (gsi_end_p (gsi))
1041 continue;
1042 stmt = gsi_stmt (gsi);
1043 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1044 && !gimple_omp_return_nowait_p (stmt))
1046 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1047 in many cases. If there could be tasks queued, the barrier
1048 might be needed to let the tasks run before some local
1049 variable of the parallel that the task uses as shared
1050 runs out of scope. The task can be spawned either
1051 from within current function (this would be easy to check)
1052 or from some function it calls and gets passed an address
1053 of such a variable. */
1054 if (any_addressable_vars < 0)
1056 gomp_parallel *parallel_stmt
1057 = as_a <gomp_parallel *> (last_stmt (region->entry));
1058 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1059 tree local_decls, block, decl;
1060 unsigned ix;
1062 any_addressable_vars = 0;
1063 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1064 if (TREE_ADDRESSABLE (decl))
1066 any_addressable_vars = 1;
1067 break;
1069 for (block = gimple_block (stmt);
1070 !any_addressable_vars
1071 && block
1072 && TREE_CODE (block) == BLOCK;
1073 block = BLOCK_SUPERCONTEXT (block))
1075 for (local_decls = BLOCK_VARS (block);
1076 local_decls;
1077 local_decls = DECL_CHAIN (local_decls))
1078 if (TREE_ADDRESSABLE (local_decls))
1080 any_addressable_vars = 1;
1081 break;
1083 if (block == gimple_block (parallel_stmt))
1084 break;
1087 if (!any_addressable_vars)
1088 gimple_omp_return_set_nowait (stmt);
1093 static void
1094 remove_exit_barriers (struct omp_region *region)
1096 if (region->type == GIMPLE_OMP_PARALLEL)
1097 remove_exit_barrier (region);
1099 if (region->inner)
1101 region = region->inner;
1102 remove_exit_barriers (region);
1103 while (region->next)
1105 region = region->next;
1106 remove_exit_barriers (region);
1111 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1112 calls. These can't be declared as const functions, but
1113 within one parallel body they are constant, so they can be
1114 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1115 which are declared const. Similarly for task body, except
1116 that in untied task omp_get_thread_num () can change at any task
1117 scheduling point. */
1119 static void
1120 optimize_omp_library_calls (gimple *entry_stmt)
1122 basic_block bb;
1123 gimple_stmt_iterator gsi;
1124 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1125 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1126 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1127 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1128 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1129 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1130 OMP_CLAUSE_UNTIED) != NULL);
1132 FOR_EACH_BB_FN (bb, cfun)
1133 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1135 gimple *call = gsi_stmt (gsi);
1136 tree decl;
1138 if (is_gimple_call (call)
1139 && (decl = gimple_call_fndecl (call))
1140 && DECL_EXTERNAL (decl)
1141 && TREE_PUBLIC (decl)
1142 && DECL_INITIAL (decl) == NULL)
1144 tree built_in;
1146 if (DECL_NAME (decl) == thr_num_id)
1148 /* In #pragma omp task untied omp_get_thread_num () can change
1149 during the execution of the task region. */
1150 if (untied_task)
1151 continue;
1152 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1154 else if (DECL_NAME (decl) == num_thr_id)
1155 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1156 else
1157 continue;
1159 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1160 || gimple_call_num_args (call) != 0)
1161 continue;
1163 if (flag_exceptions && !TREE_NOTHROW (decl))
1164 continue;
1166 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1167 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1168 TREE_TYPE (TREE_TYPE (built_in))))
1169 continue;
1171 gimple_call_set_fndecl (call, built_in);
1176 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1177 regimplified. */
1179 static tree
1180 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1182 tree t = *tp;
1184 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1185 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1186 return t;
1188 if (TREE_CODE (t) == ADDR_EXPR)
1189 recompute_tree_invariant_for_addr_expr (t);
1191 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1192 return NULL_TREE;
1195 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1197 static void
1198 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1199 bool after)
1201 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1202 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1203 !after, after ? GSI_CONTINUE_LINKING
1204 : GSI_SAME_STMT);
1205 gimple *stmt = gimple_build_assign (to, from);
1206 if (after)
1207 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1208 else
1209 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1210 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1211 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1213 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1214 gimple_regimplify_operands (stmt, &gsi);
1218 /* Expand the OpenMP parallel or task directive starting at REGION. */
1220 static void
1221 expand_omp_taskreg (struct omp_region *region)
1223 basic_block entry_bb, exit_bb, new_bb;
1224 struct function *child_cfun;
1225 tree child_fn, block, t;
1226 gimple_stmt_iterator gsi;
1227 gimple *entry_stmt, *stmt;
1228 edge e;
1229 vec<tree, va_gc> *ws_args;
1231 entry_stmt = last_stmt (region->entry);
1232 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1233 && gimple_omp_task_taskwait_p (entry_stmt))
1235 new_bb = region->entry;
1236 gsi = gsi_last_nondebug_bb (region->entry);
1237 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1238 gsi_remove (&gsi, true);
1239 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1240 return;
1243 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1244 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1246 entry_bb = region->entry;
1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1248 exit_bb = region->cont;
1249 else
1250 exit_bb = region->exit;
1252 if (is_combined_parallel (region))
1253 ws_args = region->ws_args;
1254 else
1255 ws_args = NULL;
1257 if (child_cfun->cfg)
1259 /* Due to inlining, it may happen that we have already outlined
1260 the region, in which case all we need to do is make the
1261 sub-graph unreachable and emit the parallel call. */
1262 edge entry_succ_e, exit_succ_e;
1264 entry_succ_e = single_succ_edge (entry_bb);
1266 gsi = gsi_last_nondebug_bb (entry_bb);
1267 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1268 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1269 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1270 gsi_remove (&gsi, true);
1272 new_bb = entry_bb;
1273 if (exit_bb)
1275 exit_succ_e = single_succ_edge (exit_bb);
1276 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1278 remove_edge_and_dominated_blocks (entry_succ_e);
1280 else
1282 unsigned srcidx, dstidx, num;
1284 /* If the parallel region needs data sent from the parent
1285 function, then the very first statement (except possible
1286 tree profile counter updates) of the parallel body
1287 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1288 &.OMP_DATA_O is passed as an argument to the child function,
1289 we need to replace it with the argument as seen by the child
1290 function.
1292 In most cases, this will end up being the identity assignment
1293 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1294 a function call that has been inlined, the original PARM_DECL
1295 .OMP_DATA_I may have been converted into a different local
1296 variable. In which case, we need to keep the assignment. */
1297 if (gimple_omp_taskreg_data_arg (entry_stmt))
1299 basic_block entry_succ_bb
1300 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1301 : FALLTHRU_EDGE (entry_bb)->dest;
1302 tree arg;
1303 gimple *parcopy_stmt = NULL;
1305 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1307 gimple *stmt;
1309 gcc_assert (!gsi_end_p (gsi));
1310 stmt = gsi_stmt (gsi);
1311 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1312 continue;
1314 if (gimple_num_ops (stmt) == 2)
1316 tree arg = gimple_assign_rhs1 (stmt);
1318 /* We're ignore the subcode because we're
1319 effectively doing a STRIP_NOPS. */
1321 if (TREE_CODE (arg) == ADDR_EXPR
1322 && (TREE_OPERAND (arg, 0)
1323 == gimple_omp_taskreg_data_arg (entry_stmt)))
1325 parcopy_stmt = stmt;
1326 break;
1331 gcc_assert (parcopy_stmt != NULL);
1332 arg = DECL_ARGUMENTS (child_fn);
1334 if (!gimple_in_ssa_p (cfun))
1336 if (gimple_assign_lhs (parcopy_stmt) == arg)
1337 gsi_remove (&gsi, true);
1338 else
1340 /* ?? Is setting the subcode really necessary ?? */
1341 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1342 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1345 else
1347 tree lhs = gimple_assign_lhs (parcopy_stmt);
1348 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1349 /* We'd like to set the rhs to the default def in the child_fn,
1350 but it's too early to create ssa names in the child_fn.
1351 Instead, we set the rhs to the parm. In
1352 move_sese_region_to_fn, we introduce a default def for the
1353 parm, map the parm to it's default def, and once we encounter
1354 this stmt, replace the parm with the default def. */
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 update_stmt (parcopy_stmt);
1360 /* Declare local variables needed in CHILD_CFUN. */
1361 block = DECL_INITIAL (child_fn);
1362 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1363 /* The gimplifier could record temporaries in parallel/task block
1364 rather than in containing function's local_decls chain,
1365 which would mean cgraph missed finalizing them. Do it now. */
1366 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1367 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1368 varpool_node::finalize_decl (t);
1369 DECL_SAVED_TREE (child_fn) = NULL;
1370 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1371 gimple_set_body (child_fn, NULL);
1372 TREE_USED (block) = 1;
1374 /* Reset DECL_CONTEXT on function arguments. */
1375 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1376 DECL_CONTEXT (t) = child_fn;
1378 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1379 so that it can be moved to the child function. */
1380 gsi = gsi_last_nondebug_bb (entry_bb);
1381 stmt = gsi_stmt (gsi);
1382 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1383 || gimple_code (stmt) == GIMPLE_OMP_TASK
1384 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1385 e = split_block (entry_bb, stmt);
1386 gsi_remove (&gsi, true);
1387 entry_bb = e->dest;
1388 edge e2 = NULL;
1389 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1390 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1391 else
1393 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1394 gcc_assert (e2->dest == region->exit);
1395 remove_edge (BRANCH_EDGE (entry_bb));
1396 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1397 gsi = gsi_last_nondebug_bb (region->exit);
1398 gcc_assert (!gsi_end_p (gsi)
1399 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1400 gsi_remove (&gsi, true);
1403 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1404 if (exit_bb)
1406 gsi = gsi_last_nondebug_bb (exit_bb);
1407 gcc_assert (!gsi_end_p (gsi)
1408 && (gimple_code (gsi_stmt (gsi))
1409 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1410 stmt = gimple_build_return (NULL);
1411 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1412 gsi_remove (&gsi, true);
1415 /* Move the parallel region into CHILD_CFUN. */
1417 if (gimple_in_ssa_p (cfun))
1419 init_tree_ssa (child_cfun);
1420 init_ssa_operands (child_cfun);
1421 child_cfun->gimple_df->in_ssa_p = true;
1422 block = NULL_TREE;
1424 else
1425 block = gimple_block (entry_stmt);
1427 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1428 if (exit_bb)
1429 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1430 if (e2)
1432 basic_block dest_bb = e2->dest;
1433 if (!exit_bb)
1434 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1435 remove_edge (e2);
1436 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1438 /* When the OMP expansion process cannot guarantee an up-to-date
1439 loop tree arrange for the child function to fixup loops. */
1440 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1441 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1443 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1444 num = vec_safe_length (child_cfun->local_decls);
1445 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1447 t = (*child_cfun->local_decls)[srcidx];
1448 if (DECL_CONTEXT (t) == cfun->decl)
1449 continue;
1450 if (srcidx != dstidx)
1451 (*child_cfun->local_decls)[dstidx] = t;
1452 dstidx++;
1454 if (dstidx != num)
1455 vec_safe_truncate (child_cfun->local_decls, dstidx);
1457 /* Inform the callgraph about the new function. */
1458 child_cfun->curr_properties = cfun->curr_properties;
1459 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1460 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1461 cgraph_node *node = cgraph_node::get_create (child_fn);
1462 node->parallelized_function = 1;
1463 cgraph_node::add_new_function (child_fn, true);
1465 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1466 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1468 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1469 fixed in a following pass. */
1470 push_cfun (child_cfun);
1471 if (need_asm)
1472 assign_assembler_name_if_needed (child_fn);
1474 if (optimize)
1475 optimize_omp_library_calls (entry_stmt);
1476 update_max_bb_count ();
1477 cgraph_edge::rebuild_edges ();
1479 /* Some EH regions might become dead, see PR34608. If
1480 pass_cleanup_cfg isn't the first pass to happen with the
1481 new child, these dead EH edges might cause problems.
1482 Clean them up now. */
1483 if (flag_exceptions)
1485 basic_block bb;
1486 bool changed = false;
1488 FOR_EACH_BB_FN (bb, cfun)
1489 changed |= gimple_purge_dead_eh_edges (bb);
1490 if (changed)
1491 cleanup_tree_cfg ();
1493 if (gimple_in_ssa_p (cfun))
1494 update_ssa (TODO_update_ssa);
1495 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1496 verify_loop_structure ();
1497 pop_cfun ();
1499 if (dump_file && !gimple_in_ssa_p (cfun))
1501 omp_any_child_fn_dumped = true;
1502 dump_function_header (dump_file, child_fn, dump_flags);
1503 dump_function_to_file (child_fn, dump_file, dump_flags);
1507 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1509 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1510 expand_parallel_call (region, new_bb,
1511 as_a <gomp_parallel *> (entry_stmt), ws_args);
1512 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1513 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1514 else
1515 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1516 if (gimple_in_ssa_p (cfun))
1517 update_ssa (TODO_update_ssa_only_virtuals);
1520 /* Information about members of an OpenACC collapsed loop nest. */
1522 struct oacc_collapse
1524 tree base; /* Base value. */
1525 tree iters; /* Number of steps. */
1526 tree step; /* Step size. */
1527 tree tile; /* Tile increment (if tiled). */
1528 tree outer; /* Tile iterator var. */
1531 /* Helper for expand_oacc_for. Determine collapsed loop information.
1532 Fill in COUNTS array. Emit any initialization code before GSI.
1533 Return the calculated outer loop bound of BOUND_TYPE. */
1535 static tree
1536 expand_oacc_collapse_init (const struct omp_for_data *fd,
1537 gimple_stmt_iterator *gsi,
1538 oacc_collapse *counts, tree bound_type,
1539 location_t loc)
1541 tree tiling = fd->tiling;
1542 tree total = build_int_cst (bound_type, 1);
1543 int ix;
1545 gcc_assert (integer_onep (fd->loop.step));
1546 gcc_assert (integer_zerop (fd->loop.n1));
1548 /* When tiling, the first operand of the tile clause applies to the
1549 innermost loop, and we work outwards from there. Seems
1550 backwards, but whatever. */
1551 for (ix = fd->collapse; ix--;)
1553 const omp_for_data_loop *loop = &fd->loops[ix];
1555 tree iter_type = TREE_TYPE (loop->v);
1556 tree diff_type = iter_type;
1557 tree plus_type = iter_type;
1559 gcc_assert (loop->cond_code == fd->loop.cond_code);
1561 if (POINTER_TYPE_P (iter_type))
1562 plus_type = sizetype;
1563 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1564 diff_type = signed_type_for (diff_type);
1565 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1566 diff_type = integer_type_node;
1568 if (tiling)
1570 tree num = build_int_cst (integer_type_node, fd->collapse);
1571 tree loop_no = build_int_cst (integer_type_node, ix);
1572 tree tile = TREE_VALUE (tiling);
1573 gcall *call
1574 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1575 /* gwv-outer=*/integer_zero_node,
1576 /* gwv-inner=*/integer_zero_node);
1578 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1579 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1580 gimple_call_set_lhs (call, counts[ix].tile);
1581 gimple_set_location (call, loc);
1582 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1584 tiling = TREE_CHAIN (tiling);
1586 else
1588 counts[ix].tile = NULL;
1589 counts[ix].outer = loop->v;
1592 tree b = loop->n1;
1593 tree e = loop->n2;
1594 tree s = loop->step;
1595 bool up = loop->cond_code == LT_EXPR;
1596 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1597 bool negating;
1598 tree expr;
1600 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1601 true, GSI_SAME_STMT);
1602 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1603 true, GSI_SAME_STMT);
1605 /* Convert the step, avoiding possible unsigned->signed overflow. */
1606 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1607 if (negating)
1608 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1609 s = fold_convert (diff_type, s);
1610 if (negating)
1611 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1612 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1615 /* Determine the range, avoiding possible unsigned->signed overflow. */
1616 negating = !up && TYPE_UNSIGNED (iter_type);
1617 expr = fold_build2 (MINUS_EXPR, plus_type,
1618 fold_convert (plus_type, negating ? b : e),
1619 fold_convert (plus_type, negating ? e : b));
1620 expr = fold_convert (diff_type, expr);
1621 if (negating)
1622 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1623 tree range = force_gimple_operand_gsi
1624 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1626 /* Determine number of iterations. */
1627 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1628 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1629 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1631 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1632 true, GSI_SAME_STMT);
1634 counts[ix].base = b;
1635 counts[ix].iters = iters;
1636 counts[ix].step = s;
1638 total = fold_build2 (MULT_EXPR, bound_type, total,
1639 fold_convert (bound_type, iters));
1642 return total;
1645 /* Emit initializers for collapsed loop members. INNER is true if
1646 this is for the element loop of a TILE. IVAR is the outer
1647 loop iteration variable, from which collapsed loop iteration values
1648 are calculated. COUNTS array has been initialized by
1649 expand_oacc_collapse_inits. */
1651 static void
1652 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1653 gimple_stmt_iterator *gsi,
1654 const oacc_collapse *counts, tree ivar)
1656 tree ivar_type = TREE_TYPE (ivar);
1658 /* The most rapidly changing iteration variable is the innermost
1659 one. */
1660 for (int ix = fd->collapse; ix--;)
1662 const omp_for_data_loop *loop = &fd->loops[ix];
1663 const oacc_collapse *collapse = &counts[ix];
1664 tree v = inner ? loop->v : collapse->outer;
1665 tree iter_type = TREE_TYPE (v);
1666 tree diff_type = TREE_TYPE (collapse->step);
1667 tree plus_type = iter_type;
1668 enum tree_code plus_code = PLUS_EXPR;
1669 tree expr;
1671 if (POINTER_TYPE_P (iter_type))
1673 plus_code = POINTER_PLUS_EXPR;
1674 plus_type = sizetype;
1677 expr = ivar;
1678 if (ix)
1680 tree mod = fold_convert (ivar_type, collapse->iters);
1681 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1682 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1683 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1684 true, GSI_SAME_STMT);
1687 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1688 collapse->step);
1689 expr = fold_build2 (plus_code, iter_type,
1690 inner ? collapse->outer : collapse->base,
1691 fold_convert (plus_type, expr));
1692 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1693 true, GSI_SAME_STMT);
1694 gassign *ass = gimple_build_assign (v, expr);
1695 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1699 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1700 of the combined collapse > 1 loop constructs, generate code like:
1701 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1702 if (cond3 is <)
1703 adj = STEP3 - 1;
1704 else
1705 adj = STEP3 + 1;
1706 count3 = (adj + N32 - N31) / STEP3;
1707 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1708 if (cond2 is <)
1709 adj = STEP2 - 1;
1710 else
1711 adj = STEP2 + 1;
1712 count2 = (adj + N22 - N21) / STEP2;
1713 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1714 if (cond1 is <)
1715 adj = STEP1 - 1;
1716 else
1717 adj = STEP1 + 1;
1718 count1 = (adj + N12 - N11) / STEP1;
1719 count = count1 * count2 * count3;
1720 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1721 count = 0;
1722 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1723 of the combined loop constructs, just initialize COUNTS array
1724 from the _looptemp_ clauses. */
1726 /* NOTE: It *could* be better to moosh all of the BBs together,
1727 creating one larger BB with all the computation and the unexpected
1728 jump at the end. I.e.
1730 bool zero3, zero2, zero1, zero;
1732 zero3 = N32 c3 N31;
1733 count3 = (N32 - N31) /[cl] STEP3;
1734 zero2 = N22 c2 N21;
1735 count2 = (N22 - N21) /[cl] STEP2;
1736 zero1 = N12 c1 N11;
1737 count1 = (N12 - N11) /[cl] STEP1;
1738 zero = zero3 || zero2 || zero1;
1739 count = count1 * count2 * count3;
1740 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1742 After all, we expect the zero=false, and thus we expect to have to
1743 evaluate all of the comparison expressions, so short-circuiting
1744 oughtn't be a win. Since the condition isn't protecting a
1745 denominator, we're not concerned about divide-by-zero, so we can
1746 fully evaluate count even if a numerator turned out to be wrong.
1748 It seems like putting this all together would create much better
1749 scheduling opportunities, and less pressure on the chip's branch
1750 predictor. */
1752 static void
1753 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1754 basic_block &entry_bb, tree *counts,
1755 basic_block &zero_iter1_bb, int &first_zero_iter1,
1756 basic_block &zero_iter2_bb, int &first_zero_iter2,
1757 basic_block &l2_dom_bb)
1759 tree t, type = TREE_TYPE (fd->loop.v);
1760 edge e, ne;
1761 int i;
1763 /* Collapsed loops need work for expansion into SSA form. */
1764 gcc_assert (!gimple_in_ssa_p (cfun));
1766 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1767 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1769 gcc_assert (fd->ordered == 0);
1770 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1771 isn't supposed to be handled, as the inner loop doesn't
1772 use it. */
1773 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1774 OMP_CLAUSE__LOOPTEMP_);
1775 gcc_assert (innerc);
1776 for (i = 0; i < fd->collapse; i++)
1778 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1779 OMP_CLAUSE__LOOPTEMP_);
1780 gcc_assert (innerc);
1781 if (i)
1782 counts[i] = OMP_CLAUSE_DECL (innerc);
1783 else
1784 counts[0] = NULL_TREE;
1786 return;
1789 for (i = fd->collapse; i < fd->ordered; i++)
1791 tree itype = TREE_TYPE (fd->loops[i].v);
1792 counts[i] = NULL_TREE;
1793 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1794 fold_convert (itype, fd->loops[i].n1),
1795 fold_convert (itype, fd->loops[i].n2));
1796 if (t && integer_zerop (t))
1798 for (i = fd->collapse; i < fd->ordered; i++)
1799 counts[i] = build_int_cst (type, 0);
1800 break;
1803 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1805 tree itype = TREE_TYPE (fd->loops[i].v);
1807 if (i >= fd->collapse && counts[i])
1808 continue;
1809 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1810 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1811 fold_convert (itype, fd->loops[i].n1),
1812 fold_convert (itype, fd->loops[i].n2)))
1813 == NULL_TREE || !integer_onep (t)))
1815 gcond *cond_stmt;
1816 tree n1, n2;
1817 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1818 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1819 true, GSI_SAME_STMT);
1820 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1821 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1822 true, GSI_SAME_STMT);
1823 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1824 NULL_TREE, NULL_TREE);
1825 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1826 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1827 expand_omp_regimplify_p, NULL, NULL)
1828 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1829 expand_omp_regimplify_p, NULL, NULL))
1831 *gsi = gsi_for_stmt (cond_stmt);
1832 gimple_regimplify_operands (cond_stmt, gsi);
1834 e = split_block (entry_bb, cond_stmt);
1835 basic_block &zero_iter_bb
1836 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1837 int &first_zero_iter
1838 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1839 if (zero_iter_bb == NULL)
1841 gassign *assign_stmt;
1842 first_zero_iter = i;
1843 zero_iter_bb = create_empty_bb (entry_bb);
1844 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1845 *gsi = gsi_after_labels (zero_iter_bb);
1846 if (i < fd->collapse)
1847 assign_stmt = gimple_build_assign (fd->loop.n2,
1848 build_zero_cst (type));
1849 else
1851 counts[i] = create_tmp_reg (type, ".count");
1852 assign_stmt
1853 = gimple_build_assign (counts[i], build_zero_cst (type));
1855 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1856 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1857 entry_bb);
1859 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1860 ne->probability = profile_probability::very_unlikely ();
1861 e->flags = EDGE_TRUE_VALUE;
1862 e->probability = ne->probability.invert ();
1863 if (l2_dom_bb == NULL)
1864 l2_dom_bb = entry_bb;
1865 entry_bb = e->dest;
1866 *gsi = gsi_last_nondebug_bb (entry_bb);
1869 if (POINTER_TYPE_P (itype))
1870 itype = signed_type_for (itype);
1871 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1872 ? -1 : 1));
1873 t = fold_build2 (PLUS_EXPR, itype,
1874 fold_convert (itype, fd->loops[i].step), t);
1875 t = fold_build2 (PLUS_EXPR, itype, t,
1876 fold_convert (itype, fd->loops[i].n2));
1877 t = fold_build2 (MINUS_EXPR, itype, t,
1878 fold_convert (itype, fd->loops[i].n1));
1879 /* ?? We could probably use CEIL_DIV_EXPR instead of
1880 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1881 generate the same code in the end because generically we
1882 don't know that the values involved must be negative for
1883 GT?? */
1884 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1885 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1886 fold_build1 (NEGATE_EXPR, itype, t),
1887 fold_build1 (NEGATE_EXPR, itype,
1888 fold_convert (itype,
1889 fd->loops[i].step)));
1890 else
1891 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1892 fold_convert (itype, fd->loops[i].step));
1893 t = fold_convert (type, t);
1894 if (TREE_CODE (t) == INTEGER_CST)
1895 counts[i] = t;
1896 else
1898 if (i < fd->collapse || i != first_zero_iter2)
1899 counts[i] = create_tmp_reg (type, ".count");
1900 expand_omp_build_assign (gsi, counts[i], t);
1902 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1904 if (i == 0)
1905 t = counts[0];
1906 else
1907 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1908 expand_omp_build_assign (gsi, fd->loop.n2, t);
1913 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1914 T = V;
1915 V3 = N31 + (T % count3) * STEP3;
1916 T = T / count3;
1917 V2 = N21 + (T % count2) * STEP2;
1918 T = T / count2;
1919 V1 = N11 + T * STEP1;
1920 if this loop doesn't have an inner loop construct combined with it.
1921 If it does have an inner loop construct combined with it and the
1922 iteration count isn't known constant, store values from counts array
1923 into its _looptemp_ temporaries instead. */
1925 static void
1926 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1927 tree *counts, gimple *inner_stmt, tree startvar)
1929 int i;
1930 if (gimple_omp_for_combined_p (fd->for_stmt))
1932 /* If fd->loop.n2 is constant, then no propagation of the counts
1933 is needed, they are constant. */
1934 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1935 return;
1937 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1938 ? gimple_omp_taskreg_clauses (inner_stmt)
1939 : gimple_omp_for_clauses (inner_stmt);
1940 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1941 isn't supposed to be handled, as the inner loop doesn't
1942 use it. */
1943 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1944 gcc_assert (innerc);
1945 for (i = 0; i < fd->collapse; i++)
1947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1948 OMP_CLAUSE__LOOPTEMP_);
1949 gcc_assert (innerc);
1950 if (i)
1952 tree tem = OMP_CLAUSE_DECL (innerc);
1953 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1954 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1955 false, GSI_CONTINUE_LINKING);
1956 gassign *stmt = gimple_build_assign (tem, t);
1957 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1960 return;
1963 tree type = TREE_TYPE (fd->loop.v);
1964 tree tem = create_tmp_reg (type, ".tem");
1965 gassign *stmt = gimple_build_assign (tem, startvar);
1966 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1968 for (i = fd->collapse - 1; i >= 0; i--)
1970 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1971 itype = vtype;
1972 if (POINTER_TYPE_P (vtype))
1973 itype = signed_type_for (vtype);
1974 if (i != 0)
1975 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1976 else
1977 t = tem;
1978 t = fold_convert (itype, t);
1979 t = fold_build2 (MULT_EXPR, itype, t,
1980 fold_convert (itype, fd->loops[i].step));
1981 if (POINTER_TYPE_P (vtype))
1982 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1983 else
1984 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1985 t = force_gimple_operand_gsi (gsi, t,
1986 DECL_P (fd->loops[i].v)
1987 && TREE_ADDRESSABLE (fd->loops[i].v),
1988 NULL_TREE, false,
1989 GSI_CONTINUE_LINKING);
1990 stmt = gimple_build_assign (fd->loops[i].v, t);
1991 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1992 if (i != 0)
1994 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1995 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1996 false, GSI_CONTINUE_LINKING);
1997 stmt = gimple_build_assign (tem, t);
1998 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2003 /* Helper function for expand_omp_for_*. Generate code like:
2004 L10:
2005 V3 += STEP3;
2006 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2007 L11:
2008 V3 = N31;
2009 V2 += STEP2;
2010 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2011 L12:
2012 V2 = N21;
2013 V1 += STEP1;
2014 goto BODY_BB; */
2016 static basic_block
2017 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2018 basic_block body_bb)
2020 basic_block last_bb, bb, collapse_bb = NULL;
2021 int i;
2022 gimple_stmt_iterator gsi;
2023 edge e;
2024 tree t;
2025 gimple *stmt;
2027 last_bb = cont_bb;
2028 for (i = fd->collapse - 1; i >= 0; i--)
2030 tree vtype = TREE_TYPE (fd->loops[i].v);
2032 bb = create_empty_bb (last_bb);
2033 add_bb_to_loop (bb, last_bb->loop_father);
2034 gsi = gsi_start_bb (bb);
2036 if (i < fd->collapse - 1)
2038 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2039 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2041 t = fd->loops[i + 1].n1;
2042 t = force_gimple_operand_gsi (&gsi, t,
2043 DECL_P (fd->loops[i + 1].v)
2044 && TREE_ADDRESSABLE (fd->loops[i
2045 + 1].v),
2046 NULL_TREE, false,
2047 GSI_CONTINUE_LINKING);
2048 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2049 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2051 else
2052 collapse_bb = bb;
2054 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2056 if (POINTER_TYPE_P (vtype))
2057 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2058 else
2059 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2060 t = force_gimple_operand_gsi (&gsi, t,
2061 DECL_P (fd->loops[i].v)
2062 && TREE_ADDRESSABLE (fd->loops[i].v),
2063 NULL_TREE, false, GSI_CONTINUE_LINKING);
2064 stmt = gimple_build_assign (fd->loops[i].v, t);
2065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2067 if (i > 0)
2069 t = fd->loops[i].n2;
2070 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2071 false, GSI_CONTINUE_LINKING);
2072 tree v = fd->loops[i].v;
2073 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2074 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2075 false, GSI_CONTINUE_LINKING);
2076 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2077 stmt = gimple_build_cond_empty (t);
2078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2079 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2080 expand_omp_regimplify_p, NULL, NULL)
2081 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2082 expand_omp_regimplify_p, NULL, NULL))
2083 gimple_regimplify_operands (stmt, &gsi);
2084 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2085 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2087 else
2088 make_edge (bb, body_bb, EDGE_FALLTHRU);
2089 last_bb = bb;
2092 return collapse_bb;
2095 /* Expand #pragma omp ordered depend(source). */
2097 static void
2098 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2099 tree *counts, location_t loc)
2101 enum built_in_function source_ix
2102 = fd->iter_type == long_integer_type_node
2103 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2104 gimple *g
2105 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2106 build_fold_addr_expr (counts[fd->ordered]));
2107 gimple_set_location (g, loc);
2108 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2111 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2113 static void
2114 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2115 tree *counts, tree c, location_t loc)
2117 auto_vec<tree, 10> args;
2118 enum built_in_function sink_ix
2119 = fd->iter_type == long_integer_type_node
2120 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2121 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2122 int i;
2123 gimple_stmt_iterator gsi2 = *gsi;
2124 bool warned_step = false;
2126 for (i = 0; i < fd->ordered; i++)
2128 tree step = NULL_TREE;
2129 off = TREE_PURPOSE (deps);
2130 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2132 step = TREE_OPERAND (off, 1);
2133 off = TREE_OPERAND (off, 0);
2135 if (!integer_zerop (off))
2137 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2138 || fd->loops[i].cond_code == GT_EXPR);
2139 bool forward = fd->loops[i].cond_code == LT_EXPR;
2140 if (step)
2142 /* Non-simple Fortran DO loops. If step is variable,
2143 we don't know at compile even the direction, so can't
2144 warn. */
2145 if (TREE_CODE (step) != INTEGER_CST)
2146 break;
2147 forward = tree_int_cst_sgn (step) != -1;
2149 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2150 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2151 "lexically later iteration");
2152 break;
2154 deps = TREE_CHAIN (deps);
2156 /* If all offsets corresponding to the collapsed loops are zero,
2157 this depend clause can be ignored. FIXME: but there is still a
2158 flush needed. We need to emit one __sync_synchronize () for it
2159 though (perhaps conditionally)? Solve this together with the
2160 conservative dependence folding optimization.
2161 if (i >= fd->collapse)
2162 return; */
2164 deps = OMP_CLAUSE_DECL (c);
2165 gsi_prev (&gsi2);
2166 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2167 edge e2 = split_block_after_labels (e1->dest);
2169 gsi2 = gsi_after_labels (e1->dest);
2170 *gsi = gsi_last_bb (e1->src);
2171 for (i = 0; i < fd->ordered; i++)
2173 tree itype = TREE_TYPE (fd->loops[i].v);
2174 tree step = NULL_TREE;
2175 tree orig_off = NULL_TREE;
2176 if (POINTER_TYPE_P (itype))
2177 itype = sizetype;
2178 if (i)
2179 deps = TREE_CHAIN (deps);
2180 off = TREE_PURPOSE (deps);
2181 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2183 step = TREE_OPERAND (off, 1);
2184 off = TREE_OPERAND (off, 0);
2185 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2186 && integer_onep (fd->loops[i].step)
2187 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2189 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2190 if (step)
2192 off = fold_convert_loc (loc, itype, off);
2193 orig_off = off;
2194 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2197 if (integer_zerop (off))
2198 t = boolean_true_node;
2199 else
2201 tree a;
2202 tree co = fold_convert_loc (loc, itype, off);
2203 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2205 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2206 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2207 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2208 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2209 co);
2211 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2212 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2213 fd->loops[i].v, co);
2214 else
2215 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2216 fd->loops[i].v, co);
2217 if (step)
2219 tree t1, t2;
2220 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2221 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2222 fd->loops[i].n1);
2223 else
2224 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2225 fd->loops[i].n2);
2226 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2227 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2228 fd->loops[i].n2);
2229 else
2230 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2231 fd->loops[i].n1);
2232 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2233 step, build_int_cst (TREE_TYPE (step), 0));
2234 if (TREE_CODE (step) != INTEGER_CST)
2236 t1 = unshare_expr (t1);
2237 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2238 false, GSI_CONTINUE_LINKING);
2239 t2 = unshare_expr (t2);
2240 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2241 false, GSI_CONTINUE_LINKING);
2243 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2244 t, t2, t1);
2246 else if (fd->loops[i].cond_code == LT_EXPR)
2248 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2249 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2250 fd->loops[i].n1);
2251 else
2252 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2253 fd->loops[i].n2);
2255 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2256 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2257 fd->loops[i].n2);
2258 else
2259 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2260 fd->loops[i].n1);
2262 if (cond)
2263 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2264 else
2265 cond = t;
2267 off = fold_convert_loc (loc, itype, off);
2269 if (step
2270 || (fd->loops[i].cond_code == LT_EXPR
2271 ? !integer_onep (fd->loops[i].step)
2272 : !integer_minus_onep (fd->loops[i].step)))
2274 if (step == NULL_TREE
2275 && TYPE_UNSIGNED (itype)
2276 && fd->loops[i].cond_code == GT_EXPR)
2277 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2278 fold_build1_loc (loc, NEGATE_EXPR, itype,
2279 s));
2280 else
2281 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2282 orig_off ? orig_off : off, s);
2283 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2284 build_int_cst (itype, 0));
2285 if (integer_zerop (t) && !warned_step)
2287 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2288 "in the iteration space");
2289 warned_step = true;
2291 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2292 cond, t);
2295 if (i <= fd->collapse - 1 && fd->collapse > 1)
2296 t = fd->loop.v;
2297 else if (counts[i])
2298 t = counts[i];
2299 else
2301 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2302 fd->loops[i].v, fd->loops[i].n1);
2303 t = fold_convert_loc (loc, fd->iter_type, t);
2305 if (step)
2306 /* We have divided off by step already earlier. */;
2307 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2308 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2309 fold_build1_loc (loc, NEGATE_EXPR, itype,
2310 s));
2311 else
2312 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2313 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2314 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2315 off = fold_convert_loc (loc, fd->iter_type, off);
2316 if (i <= fd->collapse - 1 && fd->collapse > 1)
2318 if (i)
2319 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2320 off);
2321 if (i < fd->collapse - 1)
2323 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2324 counts[i]);
2325 continue;
2328 off = unshare_expr (off);
2329 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2330 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2331 true, GSI_SAME_STMT);
2332 args.safe_push (t);
2334 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2335 gimple_set_location (g, loc);
2336 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2338 cond = unshare_expr (cond);
2339 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2340 GSI_CONTINUE_LINKING);
2341 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2342 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2343 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2344 e1->probability = e3->probability.invert ();
2345 e1->flags = EDGE_TRUE_VALUE;
2346 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2348 *gsi = gsi_after_labels (e2->dest);
2351 /* Expand all #pragma omp ordered depend(source) and
2352 #pragma omp ordered depend(sink:...) constructs in the current
2353 #pragma omp for ordered(n) region. */
2355 static void
2356 expand_omp_ordered_source_sink (struct omp_region *region,
2357 struct omp_for_data *fd, tree *counts,
2358 basic_block cont_bb)
2360 struct omp_region *inner;
2361 int i;
2362 for (i = fd->collapse - 1; i < fd->ordered; i++)
2363 if (i == fd->collapse - 1 && fd->collapse > 1)
2364 counts[i] = NULL_TREE;
2365 else if (i >= fd->collapse && !cont_bb)
2366 counts[i] = build_zero_cst (fd->iter_type);
2367 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2368 && integer_onep (fd->loops[i].step))
2369 counts[i] = NULL_TREE;
2370 else
2371 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2372 tree atype
2373 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2374 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2375 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2377 for (inner = region->inner; inner; inner = inner->next)
2378 if (inner->type == GIMPLE_OMP_ORDERED)
2380 gomp_ordered *ord_stmt = inner->ord_stmt;
2381 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2382 location_t loc = gimple_location (ord_stmt);
2383 tree c;
2384 for (c = gimple_omp_ordered_clauses (ord_stmt);
2385 c; c = OMP_CLAUSE_CHAIN (c))
2386 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2387 break;
2388 if (c)
2389 expand_omp_ordered_source (&gsi, fd, counts, loc);
2390 for (c = gimple_omp_ordered_clauses (ord_stmt);
2391 c; c = OMP_CLAUSE_CHAIN (c))
2392 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2393 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2394 gsi_remove (&gsi, true);
2398 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2399 collapsed. */
2401 static basic_block
2402 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2403 basic_block cont_bb, basic_block body_bb,
2404 bool ordered_lastprivate)
2406 if (fd->ordered == fd->collapse)
2407 return cont_bb;
2409 if (!cont_bb)
2411 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2412 for (int i = fd->collapse; i < fd->ordered; i++)
2414 tree type = TREE_TYPE (fd->loops[i].v);
2415 tree n1 = fold_convert (type, fd->loops[i].n1);
2416 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2417 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2418 size_int (i - fd->collapse + 1),
2419 NULL_TREE, NULL_TREE);
2420 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2422 return NULL;
2425 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2427 tree t, type = TREE_TYPE (fd->loops[i].v);
2428 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2429 expand_omp_build_assign (&gsi, fd->loops[i].v,
2430 fold_convert (type, fd->loops[i].n1));
2431 if (counts[i])
2432 expand_omp_build_assign (&gsi, counts[i],
2433 build_zero_cst (fd->iter_type));
2434 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2435 size_int (i - fd->collapse + 1),
2436 NULL_TREE, NULL_TREE);
2437 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2438 if (!gsi_end_p (gsi))
2439 gsi_prev (&gsi);
2440 else
2441 gsi = gsi_last_bb (body_bb);
2442 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2443 basic_block new_body = e1->dest;
2444 if (body_bb == cont_bb)
2445 cont_bb = new_body;
2446 edge e2 = NULL;
2447 basic_block new_header;
2448 if (EDGE_COUNT (cont_bb->preds) > 0)
2450 gsi = gsi_last_bb (cont_bb);
2451 if (POINTER_TYPE_P (type))
2452 t = fold_build_pointer_plus (fd->loops[i].v,
2453 fold_convert (sizetype,
2454 fd->loops[i].step));
2455 else
2456 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2457 fold_convert (type, fd->loops[i].step));
2458 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2459 if (counts[i])
2461 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2462 build_int_cst (fd->iter_type, 1));
2463 expand_omp_build_assign (&gsi, counts[i], t);
2464 t = counts[i];
2466 else
2468 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2469 fd->loops[i].v, fd->loops[i].n1);
2470 t = fold_convert (fd->iter_type, t);
2471 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2472 true, GSI_SAME_STMT);
2474 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2475 size_int (i - fd->collapse + 1),
2476 NULL_TREE, NULL_TREE);
2477 expand_omp_build_assign (&gsi, aref, t);
2478 gsi_prev (&gsi);
2479 e2 = split_block (cont_bb, gsi_stmt (gsi));
2480 new_header = e2->dest;
2482 else
2483 new_header = cont_bb;
2484 gsi = gsi_after_labels (new_header);
2485 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2486 true, GSI_SAME_STMT);
2487 tree n2
2488 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2489 true, NULL_TREE, true, GSI_SAME_STMT);
2490 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2491 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2492 edge e3 = split_block (new_header, gsi_stmt (gsi));
2493 cont_bb = e3->dest;
2494 remove_edge (e1);
2495 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2496 e3->flags = EDGE_FALSE_VALUE;
2497 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2498 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2499 e1->probability = e3->probability.invert ();
2501 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2502 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2504 if (e2)
2506 struct loop *loop = alloc_loop ();
2507 loop->header = new_header;
2508 loop->latch = e2->src;
2509 add_loop (loop, body_bb->loop_father);
2513 /* If there are any lastprivate clauses and it is possible some loops
2514 might have zero iterations, ensure all the decls are initialized,
2515 otherwise we could crash evaluating C++ class iterators with lastprivate
2516 clauses. */
2517 bool need_inits = false;
2518 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2519 if (need_inits)
2521 tree type = TREE_TYPE (fd->loops[i].v);
2522 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2523 expand_omp_build_assign (&gsi, fd->loops[i].v,
2524 fold_convert (type, fd->loops[i].n1));
2526 else
2528 tree type = TREE_TYPE (fd->loops[i].v);
2529 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2530 boolean_type_node,
2531 fold_convert (type, fd->loops[i].n1),
2532 fold_convert (type, fd->loops[i].n2));
2533 if (!integer_onep (this_cond))
2534 need_inits = true;
2537 return cont_bb;
2540 /* A subroutine of expand_omp_for. Generate code for a parallel
2541 loop with any schedule. Given parameters:
2543 for (V = N1; V cond N2; V += STEP) BODY;
2545 where COND is "<" or ">", we generate pseudocode
2547 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2548 if (more) goto L0; else goto L3;
2550 V = istart0;
2551 iend = iend0;
2553 BODY;
2554 V += STEP;
2555 if (V cond iend) goto L1; else goto L2;
2557 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2560 If this is a combined omp parallel loop, instead of the call to
2561 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2562 If this is gimple_omp_for_combined_p loop, then instead of assigning
2563 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2564 inner GIMPLE_OMP_FOR and V += STEP; and
2565 if (V cond iend) goto L1; else goto L2; are removed.
2567 For collapsed loops, given parameters:
2568 collapse(3)
2569 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2570 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2571 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2572 BODY;
2574 we generate pseudocode
2576 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2577 if (cond3 is <)
2578 adj = STEP3 - 1;
2579 else
2580 adj = STEP3 + 1;
2581 count3 = (adj + N32 - N31) / STEP3;
2582 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2583 if (cond2 is <)
2584 adj = STEP2 - 1;
2585 else
2586 adj = STEP2 + 1;
2587 count2 = (adj + N22 - N21) / STEP2;
2588 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2589 if (cond1 is <)
2590 adj = STEP1 - 1;
2591 else
2592 adj = STEP1 + 1;
2593 count1 = (adj + N12 - N11) / STEP1;
2594 count = count1 * count2 * count3;
2595 goto Z1;
2597 count = 0;
2599 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2600 if (more) goto L0; else goto L3;
2602 V = istart0;
2603 T = V;
2604 V3 = N31 + (T % count3) * STEP3;
2605 T = T / count3;
2606 V2 = N21 + (T % count2) * STEP2;
2607 T = T / count2;
2608 V1 = N11 + T * STEP1;
2609 iend = iend0;
2611 BODY;
2612 V += 1;
2613 if (V < iend) goto L10; else goto L2;
2614 L10:
2615 V3 += STEP3;
2616 if (V3 cond3 N32) goto L1; else goto L11;
2617 L11:
2618 V3 = N31;
2619 V2 += STEP2;
2620 if (V2 cond2 N22) goto L1; else goto L12;
2621 L12:
2622 V2 = N21;
2623 V1 += STEP1;
2624 goto L1;
2626 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2631 static void
2632 expand_omp_for_generic (struct omp_region *region,
2633 struct omp_for_data *fd,
2634 enum built_in_function start_fn,
2635 enum built_in_function next_fn,
2636 tree sched_arg,
2637 gimple *inner_stmt)
2639 tree type, istart0, iend0, iend;
2640 tree t, vmain, vback, bias = NULL_TREE;
2641 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2642 basic_block l2_bb = NULL, l3_bb = NULL;
2643 gimple_stmt_iterator gsi;
2644 gassign *assign_stmt;
2645 bool in_combined_parallel = is_combined_parallel (region);
2646 bool broken_loop = region->cont == NULL;
2647 edge e, ne;
2648 tree *counts = NULL;
2649 int i;
2650 bool ordered_lastprivate = false;
2652 gcc_assert (!broken_loop || !in_combined_parallel);
2653 gcc_assert (fd->iter_type == long_integer_type_node
2654 || !in_combined_parallel);
2656 entry_bb = region->entry;
2657 cont_bb = region->cont;
2658 collapse_bb = NULL;
2659 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2660 gcc_assert (broken_loop
2661 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2662 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2663 l1_bb = single_succ (l0_bb);
2664 if (!broken_loop)
2666 l2_bb = create_empty_bb (cont_bb);
2667 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2668 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2669 == l1_bb));
2670 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2672 else
2673 l2_bb = NULL;
2674 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2675 exit_bb = region->exit;
2677 gsi = gsi_last_nondebug_bb (entry_bb);
2679 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2680 if (fd->ordered
2681 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2682 OMP_CLAUSE_LASTPRIVATE))
2683 ordered_lastprivate = false;
2684 tree reductions = NULL_TREE;
2685 tree mem = NULL_TREE;
2686 if (sched_arg)
2688 if (fd->have_reductemp)
2690 tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2691 OMP_CLAUSE__REDUCTEMP_);
2692 reductions = OMP_CLAUSE_DECL (c);
2693 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2694 gimple *g = SSA_NAME_DEF_STMT (reductions);
2695 reductions = gimple_assign_rhs1 (g);
2696 OMP_CLAUSE_DECL (c) = reductions;
2697 entry_bb = gimple_bb (g);
2698 edge e = split_block (entry_bb, g);
2699 if (region->entry == entry_bb)
2700 region->entry = e->dest;
2701 gsi = gsi_last_bb (entry_bb);
2703 else
2704 reductions = null_pointer_node;
2705 /* For now. */
2706 mem = null_pointer_node;
2708 if (fd->collapse > 1 || fd->ordered)
2710 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2711 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2713 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2714 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2715 zero_iter1_bb, first_zero_iter1,
2716 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2718 if (zero_iter1_bb)
2720 /* Some counts[i] vars might be uninitialized if
2721 some loop has zero iterations. But the body shouldn't
2722 be executed in that case, so just avoid uninit warnings. */
2723 for (i = first_zero_iter1;
2724 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2725 if (SSA_VAR_P (counts[i]))
2726 TREE_NO_WARNING (counts[i]) = 1;
2727 gsi_prev (&gsi);
2728 e = split_block (entry_bb, gsi_stmt (gsi));
2729 entry_bb = e->dest;
2730 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2731 gsi = gsi_last_nondebug_bb (entry_bb);
2732 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2733 get_immediate_dominator (CDI_DOMINATORS,
2734 zero_iter1_bb));
2736 if (zero_iter2_bb)
2738 /* Some counts[i] vars might be uninitialized if
2739 some loop has zero iterations. But the body shouldn't
2740 be executed in that case, so just avoid uninit warnings. */
2741 for (i = first_zero_iter2; i < fd->ordered; i++)
2742 if (SSA_VAR_P (counts[i]))
2743 TREE_NO_WARNING (counts[i]) = 1;
2744 if (zero_iter1_bb)
2745 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2746 else
2748 gsi_prev (&gsi);
2749 e = split_block (entry_bb, gsi_stmt (gsi));
2750 entry_bb = e->dest;
2751 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2752 gsi = gsi_last_nondebug_bb (entry_bb);
2753 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2754 get_immediate_dominator
2755 (CDI_DOMINATORS, zero_iter2_bb));
2758 if (fd->collapse == 1)
2760 counts[0] = fd->loop.n2;
2761 fd->loop = fd->loops[0];
2765 type = TREE_TYPE (fd->loop.v);
2766 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2767 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2768 TREE_ADDRESSABLE (istart0) = 1;
2769 TREE_ADDRESSABLE (iend0) = 1;
2771 /* See if we need to bias by LLONG_MIN. */
2772 if (fd->iter_type == long_long_unsigned_type_node
2773 && TREE_CODE (type) == INTEGER_TYPE
2774 && !TYPE_UNSIGNED (type)
2775 && fd->ordered == 0)
2777 tree n1, n2;
2779 if (fd->loop.cond_code == LT_EXPR)
2781 n1 = fd->loop.n1;
2782 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2784 else
2786 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2787 n2 = fd->loop.n1;
2789 if (TREE_CODE (n1) != INTEGER_CST
2790 || TREE_CODE (n2) != INTEGER_CST
2791 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2792 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2795 gimple_stmt_iterator gsif = gsi;
2796 gsi_prev (&gsif);
2798 tree arr = NULL_TREE;
2799 if (in_combined_parallel)
2801 gcc_assert (fd->ordered == 0);
2802 /* In a combined parallel loop, emit a call to
2803 GOMP_loop_foo_next. */
2804 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2805 build_fold_addr_expr (istart0),
2806 build_fold_addr_expr (iend0));
2808 else
2810 tree t0, t1, t2, t3, t4;
2811 /* If this is not a combined parallel loop, emit a call to
2812 GOMP_loop_foo_start in ENTRY_BB. */
2813 t4 = build_fold_addr_expr (iend0);
2814 t3 = build_fold_addr_expr (istart0);
2815 if (fd->ordered)
2817 t0 = build_int_cst (unsigned_type_node,
2818 fd->ordered - fd->collapse + 1);
2819 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2820 fd->ordered
2821 - fd->collapse + 1),
2822 ".omp_counts");
2823 DECL_NAMELESS (arr) = 1;
2824 TREE_ADDRESSABLE (arr) = 1;
2825 TREE_STATIC (arr) = 1;
2826 vec<constructor_elt, va_gc> *v;
2827 vec_alloc (v, fd->ordered - fd->collapse + 1);
2828 int idx;
2830 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2832 tree c;
2833 if (idx == 0 && fd->collapse > 1)
2834 c = fd->loop.n2;
2835 else
2836 c = counts[idx + fd->collapse - 1];
2837 tree purpose = size_int (idx);
2838 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2839 if (TREE_CODE (c) != INTEGER_CST)
2840 TREE_STATIC (arr) = 0;
2843 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2844 if (!TREE_STATIC (arr))
2845 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2846 void_type_node, arr),
2847 true, NULL_TREE, true, GSI_SAME_STMT);
2848 t1 = build_fold_addr_expr (arr);
2849 t2 = NULL_TREE;
2851 else
2853 t2 = fold_convert (fd->iter_type, fd->loop.step);
2854 t1 = fd->loop.n2;
2855 t0 = fd->loop.n1;
2856 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2858 tree innerc
2859 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2860 OMP_CLAUSE__LOOPTEMP_);
2861 gcc_assert (innerc);
2862 t0 = OMP_CLAUSE_DECL (innerc);
2863 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2864 OMP_CLAUSE__LOOPTEMP_);
2865 gcc_assert (innerc);
2866 t1 = OMP_CLAUSE_DECL (innerc);
2868 if (POINTER_TYPE_P (TREE_TYPE (t0))
2869 && TYPE_PRECISION (TREE_TYPE (t0))
2870 != TYPE_PRECISION (fd->iter_type))
2872 /* Avoid casting pointers to integer of a different size. */
2873 tree itype = signed_type_for (type);
2874 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2875 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2877 else
2879 t1 = fold_convert (fd->iter_type, t1);
2880 t0 = fold_convert (fd->iter_type, t0);
2882 if (bias)
2884 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2885 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2888 if (fd->iter_type == long_integer_type_node || fd->ordered)
2890 if (fd->chunk_size)
2892 t = fold_convert (fd->iter_type, fd->chunk_size);
2893 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2894 if (sched_arg)
2896 if (fd->ordered)
2897 t = build_call_expr (builtin_decl_explicit (start_fn),
2898 8, t0, t1, sched_arg, t, t3, t4,
2899 reductions, mem);
2900 else
2901 t = build_call_expr (builtin_decl_explicit (start_fn),
2902 9, t0, t1, t2, sched_arg, t, t3, t4,
2903 reductions, mem);
2905 else if (fd->ordered)
2906 t = build_call_expr (builtin_decl_explicit (start_fn),
2907 5, t0, t1, t, t3, t4);
2908 else
2909 t = build_call_expr (builtin_decl_explicit (start_fn),
2910 6, t0, t1, t2, t, t3, t4);
2912 else if (fd->ordered)
2913 t = build_call_expr (builtin_decl_explicit (start_fn),
2914 4, t0, t1, t3, t4);
2915 else
2916 t = build_call_expr (builtin_decl_explicit (start_fn),
2917 5, t0, t1, t2, t3, t4);
2919 else
2921 tree t5;
2922 tree c_bool_type;
2923 tree bfn_decl;
2925 /* The GOMP_loop_ull_*start functions have additional boolean
2926 argument, true for < loops and false for > loops.
2927 In Fortran, the C bool type can be different from
2928 boolean_type_node. */
2929 bfn_decl = builtin_decl_explicit (start_fn);
2930 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2931 t5 = build_int_cst (c_bool_type,
2932 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2933 if (fd->chunk_size)
2935 tree bfn_decl = builtin_decl_explicit (start_fn);
2936 t = fold_convert (fd->iter_type, fd->chunk_size);
2937 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2938 if (sched_arg)
2939 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2940 t, t3, t4, reductions, mem);
2941 else
2942 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2944 else
2945 t = build_call_expr (builtin_decl_explicit (start_fn),
2946 6, t5, t0, t1, t2, t3, t4);
2949 if (TREE_TYPE (t) != boolean_type_node)
2950 t = fold_build2 (NE_EXPR, boolean_type_node,
2951 t, build_int_cst (TREE_TYPE (t), 0));
2952 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2953 true, GSI_SAME_STMT);
2954 if (arr && !TREE_STATIC (arr))
2956 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2957 TREE_THIS_VOLATILE (clobber) = 1;
2958 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2959 GSI_SAME_STMT);
2961 if (fd->have_reductemp)
2963 gimple *g = gsi_stmt (gsi);
2964 gsi_remove (&gsi, true);
2965 release_ssa_name (gimple_assign_lhs (g));
2967 entry_bb = region->entry;
2968 gsi = gsi_last_nondebug_bb (entry_bb);
2970 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2972 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2974 /* Remove the GIMPLE_OMP_FOR statement. */
2975 gsi_remove (&gsi, true);
2977 if (gsi_end_p (gsif))
2978 gsif = gsi_after_labels (gsi_bb (gsif));
2979 gsi_next (&gsif);
2981 /* Iteration setup for sequential loop goes in L0_BB. */
2982 tree startvar = fd->loop.v;
2983 tree endvar = NULL_TREE;
2985 if (gimple_omp_for_combined_p (fd->for_stmt))
2987 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2988 && gimple_omp_for_kind (inner_stmt)
2989 == GF_OMP_FOR_KIND_SIMD);
2990 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2991 OMP_CLAUSE__LOOPTEMP_);
2992 gcc_assert (innerc);
2993 startvar = OMP_CLAUSE_DECL (innerc);
2994 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2995 OMP_CLAUSE__LOOPTEMP_);
2996 gcc_assert (innerc);
2997 endvar = OMP_CLAUSE_DECL (innerc);
3000 gsi = gsi_start_bb (l0_bb);
3001 t = istart0;
3002 if (fd->ordered && fd->collapse == 1)
3003 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3004 fold_convert (fd->iter_type, fd->loop.step));
3005 else if (bias)
3006 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3007 if (fd->ordered && fd->collapse == 1)
3009 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3010 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3011 fd->loop.n1, fold_convert (sizetype, t));
3012 else
3014 t = fold_convert (TREE_TYPE (startvar), t);
3015 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3016 fd->loop.n1, t);
3019 else
3021 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3022 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3023 t = fold_convert (TREE_TYPE (startvar), t);
3025 t = force_gimple_operand_gsi (&gsi, t,
3026 DECL_P (startvar)
3027 && TREE_ADDRESSABLE (startvar),
3028 NULL_TREE, false, GSI_CONTINUE_LINKING);
3029 assign_stmt = gimple_build_assign (startvar, t);
3030 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3032 t = iend0;
3033 if (fd->ordered && fd->collapse == 1)
3034 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3035 fold_convert (fd->iter_type, fd->loop.step));
3036 else if (bias)
3037 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3038 if (fd->ordered && fd->collapse == 1)
3040 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3041 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3042 fd->loop.n1, fold_convert (sizetype, t));
3043 else
3045 t = fold_convert (TREE_TYPE (startvar), t);
3046 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3047 fd->loop.n1, t);
3050 else
3052 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3053 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3054 t = fold_convert (TREE_TYPE (startvar), t);
3056 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3057 false, GSI_CONTINUE_LINKING);
3058 if (endvar)
3060 assign_stmt = gimple_build_assign (endvar, iend);
3061 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3062 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3063 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3064 else
3065 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3066 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3068 /* Handle linear clause adjustments. */
3069 tree itercnt = NULL_TREE;
3070 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3071 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3072 c; c = OMP_CLAUSE_CHAIN (c))
3073 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3074 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3076 tree d = OMP_CLAUSE_DECL (c);
3077 bool is_ref = omp_is_reference (d);
3078 tree t = d, a, dest;
3079 if (is_ref)
3080 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3081 tree type = TREE_TYPE (t);
3082 if (POINTER_TYPE_P (type))
3083 type = sizetype;
3084 dest = unshare_expr (t);
3085 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3086 expand_omp_build_assign (&gsif, v, t);
3087 if (itercnt == NULL_TREE)
3089 itercnt = startvar;
3090 tree n1 = fd->loop.n1;
3091 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3093 itercnt
3094 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3095 itercnt);
3096 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3098 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3099 itercnt, n1);
3100 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3101 itercnt, fd->loop.step);
3102 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3103 NULL_TREE, false,
3104 GSI_CONTINUE_LINKING);
3106 a = fold_build2 (MULT_EXPR, type,
3107 fold_convert (type, itercnt),
3108 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3109 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3110 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3111 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3112 false, GSI_CONTINUE_LINKING);
3113 assign_stmt = gimple_build_assign (dest, t);
3114 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3116 if (fd->collapse > 1)
3117 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3119 if (fd->ordered)
3121 /* Until now, counts array contained number of iterations or
3122 variable containing it for ith loop. From now on, we need
3123 those counts only for collapsed loops, and only for the 2nd
3124 till the last collapsed one. Move those one element earlier,
3125 we'll use counts[fd->collapse - 1] for the first source/sink
3126 iteration counter and so on and counts[fd->ordered]
3127 as the array holding the current counter values for
3128 depend(source). */
3129 if (fd->collapse > 1)
3130 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3131 if (broken_loop)
3133 int i;
3134 for (i = fd->collapse; i < fd->ordered; i++)
3136 tree type = TREE_TYPE (fd->loops[i].v);
3137 tree this_cond
3138 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3139 fold_convert (type, fd->loops[i].n1),
3140 fold_convert (type, fd->loops[i].n2));
3141 if (!integer_onep (this_cond))
3142 break;
3144 if (i < fd->ordered)
3146 cont_bb
3147 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3148 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3149 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3150 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3151 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3152 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3153 make_edge (cont_bb, l1_bb, 0);
3154 l2_bb = create_empty_bb (cont_bb);
3155 broken_loop = false;
3158 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3159 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3160 ordered_lastprivate);
3161 if (counts[fd->collapse - 1])
3163 gcc_assert (fd->collapse == 1);
3164 gsi = gsi_last_bb (l0_bb);
3165 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3166 istart0, true);
3167 gsi = gsi_last_bb (cont_bb);
3168 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3169 build_int_cst (fd->iter_type, 1));
3170 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3171 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3172 size_zero_node, NULL_TREE, NULL_TREE);
3173 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3174 t = counts[fd->collapse - 1];
3176 else if (fd->collapse > 1)
3177 t = fd->loop.v;
3178 else
3180 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3181 fd->loops[0].v, fd->loops[0].n1);
3182 t = fold_convert (fd->iter_type, t);
3184 gsi = gsi_last_bb (l0_bb);
3185 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3186 size_zero_node, NULL_TREE, NULL_TREE);
3187 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3188 false, GSI_CONTINUE_LINKING);
3189 expand_omp_build_assign (&gsi, aref, t, true);
3192 if (!broken_loop)
3194 /* Code to control the increment and predicate for the sequential
3195 loop goes in the CONT_BB. */
3196 gsi = gsi_last_nondebug_bb (cont_bb);
3197 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3198 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3199 vmain = gimple_omp_continue_control_use (cont_stmt);
3200 vback = gimple_omp_continue_control_def (cont_stmt);
3202 if (!gimple_omp_for_combined_p (fd->for_stmt))
3204 if (POINTER_TYPE_P (type))
3205 t = fold_build_pointer_plus (vmain, fd->loop.step);
3206 else
3207 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3208 t = force_gimple_operand_gsi (&gsi, t,
3209 DECL_P (vback)
3210 && TREE_ADDRESSABLE (vback),
3211 NULL_TREE, true, GSI_SAME_STMT);
3212 assign_stmt = gimple_build_assign (vback, t);
3213 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3215 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3217 tree tem;
3218 if (fd->collapse > 1)
3219 tem = fd->loop.v;
3220 else
3222 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3223 fd->loops[0].v, fd->loops[0].n1);
3224 tem = fold_convert (fd->iter_type, tem);
3226 tree aref = build4 (ARRAY_REF, fd->iter_type,
3227 counts[fd->ordered], size_zero_node,
3228 NULL_TREE, NULL_TREE);
3229 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3230 true, GSI_SAME_STMT);
3231 expand_omp_build_assign (&gsi, aref, tem);
3234 t = build2 (fd->loop.cond_code, boolean_type_node,
3235 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3236 iend);
3237 gcond *cond_stmt = gimple_build_cond_empty (t);
3238 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3241 /* Remove GIMPLE_OMP_CONTINUE. */
3242 gsi_remove (&gsi, true);
3244 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3245 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3247 /* Emit code to get the next parallel iteration in L2_BB. */
3248 gsi = gsi_start_bb (l2_bb);
3250 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3251 build_fold_addr_expr (istart0),
3252 build_fold_addr_expr (iend0));
3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3254 false, GSI_CONTINUE_LINKING);
3255 if (TREE_TYPE (t) != boolean_type_node)
3256 t = fold_build2 (NE_EXPR, boolean_type_node,
3257 t, build_int_cst (TREE_TYPE (t), 0));
3258 gcond *cond_stmt = gimple_build_cond_empty (t);
3259 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3262 /* Add the loop cleanup function. */
3263 gsi = gsi_last_nondebug_bb (exit_bb);
3264 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3265 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3266 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3267 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3268 else
3269 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3270 gcall *call_stmt = gimple_build_call (t, 0);
3271 if (fd->ordered)
3273 tree arr = counts[fd->ordered];
3274 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3275 TREE_THIS_VOLATILE (clobber) = 1;
3276 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3277 GSI_SAME_STMT);
3279 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3281 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3282 if (fd->have_reductemp)
3284 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3285 gimple_call_lhs (call_stmt));
3286 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3289 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3290 gsi_remove (&gsi, true);
3292 /* Connect the new blocks. */
3293 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3294 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3296 if (!broken_loop)
3298 gimple_seq phis;
3300 e = find_edge (cont_bb, l3_bb);
3301 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3303 phis = phi_nodes (l3_bb);
3304 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3306 gimple *phi = gsi_stmt (gsi);
3307 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3308 PHI_ARG_DEF_FROM_EDGE (phi, e));
3310 remove_edge (e);
3312 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3313 e = find_edge (cont_bb, l1_bb);
3314 if (e == NULL)
3316 e = BRANCH_EDGE (cont_bb);
3317 gcc_assert (single_succ (e->dest) == l1_bb);
3319 if (gimple_omp_for_combined_p (fd->for_stmt))
3321 remove_edge (e);
3322 e = NULL;
3324 else if (fd->collapse > 1)
3326 remove_edge (e);
3327 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3329 else
3330 e->flags = EDGE_TRUE_VALUE;
3331 if (e)
3333 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3334 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3336 else
3338 e = find_edge (cont_bb, l2_bb);
3339 e->flags = EDGE_FALLTHRU;
3341 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3343 if (gimple_in_ssa_p (cfun))
3345 /* Add phis to the outer loop that connect to the phis in the inner,
3346 original loop, and move the loop entry value of the inner phi to
3347 the loop entry value of the outer phi. */
3348 gphi_iterator psi;
3349 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3351 location_t locus;
3352 gphi *nphi;
3353 gphi *exit_phi = psi.phi ();
3355 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3356 continue;
3358 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3359 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3361 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3362 edge latch_to_l1 = find_edge (latch, l1_bb);
3363 gphi *inner_phi
3364 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3366 tree t = gimple_phi_result (exit_phi);
3367 tree new_res = copy_ssa_name (t, NULL);
3368 nphi = create_phi_node (new_res, l0_bb);
3370 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3371 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3372 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3373 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3374 add_phi_arg (nphi, t, entry_to_l0, locus);
3376 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3377 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3379 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3383 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3384 recompute_dominator (CDI_DOMINATORS, l2_bb));
3385 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3386 recompute_dominator (CDI_DOMINATORS, l3_bb));
3387 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3388 recompute_dominator (CDI_DOMINATORS, l0_bb));
3389 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3390 recompute_dominator (CDI_DOMINATORS, l1_bb));
3392 /* We enter expand_omp_for_generic with a loop. This original loop may
3393 have its own loop struct, or it may be part of an outer loop struct
3394 (which may be the fake loop). */
3395 struct loop *outer_loop = entry_bb->loop_father;
3396 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3398 add_bb_to_loop (l2_bb, outer_loop);
3400 /* We've added a new loop around the original loop. Allocate the
3401 corresponding loop struct. */
3402 struct loop *new_loop = alloc_loop ();
3403 new_loop->header = l0_bb;
3404 new_loop->latch = l2_bb;
3405 add_loop (new_loop, outer_loop);
3407 /* Allocate a loop structure for the original loop unless we already
3408 had one. */
3409 if (!orig_loop_has_loop_struct
3410 && !gimple_omp_for_combined_p (fd->for_stmt))
3412 struct loop *orig_loop = alloc_loop ();
3413 orig_loop->header = l1_bb;
3414 /* The loop may have multiple latches. */
3415 add_loop (orig_loop, new_loop);
3420 /* A subroutine of expand_omp_for. Generate code for a parallel
3421 loop with static schedule and no specified chunk size. Given
3422 parameters:
3424 for (V = N1; V cond N2; V += STEP) BODY;
3426 where COND is "<" or ">", we generate pseudocode
3428 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3429 if (cond is <)
3430 adj = STEP - 1;
3431 else
3432 adj = STEP + 1;
3433 if ((__typeof (V)) -1 > 0 && cond is >)
3434 n = -(adj + N2 - N1) / -STEP;
3435 else
3436 n = (adj + N2 - N1) / STEP;
3437 q = n / nthreads;
3438 tt = n % nthreads;
3439 if (threadid < tt) goto L3; else goto L4;
3441 tt = 0;
3442 q = q + 1;
3444 s0 = q * threadid + tt;
3445 e0 = s0 + q;
3446 V = s0 * STEP + N1;
3447 if (s0 >= e0) goto L2; else goto L0;
3449 e = e0 * STEP + N1;
3451 BODY;
3452 V += STEP;
3453 if (V cond e) goto L1;
3457 static void
3458 expand_omp_for_static_nochunk (struct omp_region *region,
3459 struct omp_for_data *fd,
3460 gimple *inner_stmt)
3462 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3463 tree type, itype, vmain, vback;
3464 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3465 basic_block body_bb, cont_bb, collapse_bb = NULL;
3466 basic_block fin_bb;
3467 gimple_stmt_iterator gsi;
3468 edge ep;
3469 bool broken_loop = region->cont == NULL;
3470 tree *counts = NULL;
3471 tree n1, n2, step;
3472 tree reductions = NULL_TREE;
3474 itype = type = TREE_TYPE (fd->loop.v);
3475 if (POINTER_TYPE_P (type))
3476 itype = signed_type_for (type);
3478 entry_bb = region->entry;
3479 cont_bb = region->cont;
3480 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3481 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3482 gcc_assert (broken_loop
3483 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3484 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3485 body_bb = single_succ (seq_start_bb);
3486 if (!broken_loop)
3488 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3489 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3490 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3492 exit_bb = region->exit;
3494 /* Iteration space partitioning goes in ENTRY_BB. */
3495 gsi = gsi_last_nondebug_bb (entry_bb);
3496 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3498 if (fd->collapse > 1)
3500 int first_zero_iter = -1, dummy = -1;
3501 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3503 counts = XALLOCAVEC (tree, fd->collapse);
3504 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3505 fin_bb, first_zero_iter,
3506 dummy_bb, dummy, l2_dom_bb);
3507 t = NULL_TREE;
3509 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3510 t = integer_one_node;
3511 else
3512 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3513 fold_convert (type, fd->loop.n1),
3514 fold_convert (type, fd->loop.n2));
3515 if (fd->collapse == 1
3516 && TYPE_UNSIGNED (type)
3517 && (t == NULL_TREE || !integer_onep (t)))
3519 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3520 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3521 true, GSI_SAME_STMT);
3522 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3523 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3524 true, GSI_SAME_STMT);
3525 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3526 NULL_TREE, NULL_TREE);
3527 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3528 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3529 expand_omp_regimplify_p, NULL, NULL)
3530 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3531 expand_omp_regimplify_p, NULL, NULL))
3533 gsi = gsi_for_stmt (cond_stmt);
3534 gimple_regimplify_operands (cond_stmt, &gsi);
3536 ep = split_block (entry_bb, cond_stmt);
3537 ep->flags = EDGE_TRUE_VALUE;
3538 entry_bb = ep->dest;
3539 ep->probability = profile_probability::very_likely ();
3540 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3541 ep->probability = profile_probability::very_unlikely ();
3542 if (gimple_in_ssa_p (cfun))
3544 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3545 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3546 !gsi_end_p (gpi); gsi_next (&gpi))
3548 gphi *phi = gpi.phi ();
3549 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3550 ep, UNKNOWN_LOCATION);
3553 gsi = gsi_last_bb (entry_bb);
3556 if (fd->have_reductemp)
3558 tree t1 = build_int_cst (long_integer_type_node, 0);
3559 tree t2 = build_int_cst (long_integer_type_node, 1);
3560 tree t3 = build_int_cstu (long_integer_type_node,
3561 (HOST_WIDE_INT_1U << 31) + 1);
3562 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3563 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3564 reductions = OMP_CLAUSE_DECL (clauses);
3565 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3566 gimple *g = SSA_NAME_DEF_STMT (reductions);
3567 reductions = gimple_assign_rhs1 (g);
3568 OMP_CLAUSE_DECL (clauses) = reductions;
3569 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
3570 tree t
3571 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3572 9, t1, t2, t2, t3, t1, null_pointer_node,
3573 null_pointer_node, reductions, null_pointer_node);
3574 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3575 true, GSI_SAME_STMT);
3576 gsi_remove (&gsi2, true);
3577 release_ssa_name (gimple_assign_lhs (g));
3579 switch (gimple_omp_for_kind (fd->for_stmt))
3581 case GF_OMP_FOR_KIND_FOR:
3582 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3583 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3584 break;
3585 case GF_OMP_FOR_KIND_DISTRIBUTE:
3586 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3587 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3588 break;
3589 default:
3590 gcc_unreachable ();
3592 nthreads = build_call_expr (nthreads, 0);
3593 nthreads = fold_convert (itype, nthreads);
3594 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3595 true, GSI_SAME_STMT);
3596 threadid = build_call_expr (threadid, 0);
3597 threadid = fold_convert (itype, threadid);
3598 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3599 true, GSI_SAME_STMT);
3601 n1 = fd->loop.n1;
3602 n2 = fd->loop.n2;
3603 step = fd->loop.step;
3604 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3606 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3607 OMP_CLAUSE__LOOPTEMP_);
3608 gcc_assert (innerc);
3609 n1 = OMP_CLAUSE_DECL (innerc);
3610 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3611 OMP_CLAUSE__LOOPTEMP_);
3612 gcc_assert (innerc);
3613 n2 = OMP_CLAUSE_DECL (innerc);
3615 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3616 true, NULL_TREE, true, GSI_SAME_STMT);
3617 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3618 true, NULL_TREE, true, GSI_SAME_STMT);
3619 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3620 true, NULL_TREE, true, GSI_SAME_STMT);
3622 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3623 t = fold_build2 (PLUS_EXPR, itype, step, t);
3624 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3625 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3626 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3627 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3628 fold_build1 (NEGATE_EXPR, itype, t),
3629 fold_build1 (NEGATE_EXPR, itype, step));
3630 else
3631 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3632 t = fold_convert (itype, t);
3633 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3635 q = create_tmp_reg (itype, "q");
3636 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3637 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3638 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3640 tt = create_tmp_reg (itype, "tt");
3641 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3642 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3643 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3645 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3646 gcond *cond_stmt = gimple_build_cond_empty (t);
3647 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3649 second_bb = split_block (entry_bb, cond_stmt)->dest;
3650 gsi = gsi_last_nondebug_bb (second_bb);
3651 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3653 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3654 GSI_SAME_STMT);
3655 gassign *assign_stmt
3656 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3657 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3659 third_bb = split_block (second_bb, assign_stmt)->dest;
3660 gsi = gsi_last_nondebug_bb (third_bb);
3661 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3663 t = build2 (MULT_EXPR, itype, q, threadid);
3664 t = build2 (PLUS_EXPR, itype, t, tt);
3665 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3667 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3668 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3670 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3671 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3673 /* Remove the GIMPLE_OMP_FOR statement. */
3674 gsi_remove (&gsi, true);
3676 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3677 gsi = gsi_start_bb (seq_start_bb);
3679 tree startvar = fd->loop.v;
3680 tree endvar = NULL_TREE;
3682 if (gimple_omp_for_combined_p (fd->for_stmt))
3684 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3685 ? gimple_omp_parallel_clauses (inner_stmt)
3686 : gimple_omp_for_clauses (inner_stmt);
3687 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3688 gcc_assert (innerc);
3689 startvar = OMP_CLAUSE_DECL (innerc);
3690 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3691 OMP_CLAUSE__LOOPTEMP_);
3692 gcc_assert (innerc);
3693 endvar = OMP_CLAUSE_DECL (innerc);
3694 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3695 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3697 int i;
3698 for (i = 1; i < fd->collapse; i++)
3700 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3701 OMP_CLAUSE__LOOPTEMP_);
3702 gcc_assert (innerc);
3704 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3705 OMP_CLAUSE__LOOPTEMP_);
3706 if (innerc)
3708 /* If needed (distribute parallel for with lastprivate),
3709 propagate down the total number of iterations. */
3710 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3711 fd->loop.n2);
3712 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3713 GSI_CONTINUE_LINKING);
3714 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3715 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3719 t = fold_convert (itype, s0);
3720 t = fold_build2 (MULT_EXPR, itype, t, step);
3721 if (POINTER_TYPE_P (type))
3723 t = fold_build_pointer_plus (n1, t);
3724 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3725 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3726 t = fold_convert (signed_type_for (type), t);
3728 else
3729 t = fold_build2 (PLUS_EXPR, type, t, n1);
3730 t = fold_convert (TREE_TYPE (startvar), t);
3731 t = force_gimple_operand_gsi (&gsi, t,
3732 DECL_P (startvar)
3733 && TREE_ADDRESSABLE (startvar),
3734 NULL_TREE, false, GSI_CONTINUE_LINKING);
3735 assign_stmt = gimple_build_assign (startvar, t);
3736 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3738 t = fold_convert (itype, e0);
3739 t = fold_build2 (MULT_EXPR, itype, t, step);
3740 if (POINTER_TYPE_P (type))
3742 t = fold_build_pointer_plus (n1, t);
3743 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3744 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3745 t = fold_convert (signed_type_for (type), t);
3747 else
3748 t = fold_build2 (PLUS_EXPR, type, t, n1);
3749 t = fold_convert (TREE_TYPE (startvar), t);
3750 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3751 false, GSI_CONTINUE_LINKING);
3752 if (endvar)
3754 assign_stmt = gimple_build_assign (endvar, e);
3755 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3756 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3757 assign_stmt = gimple_build_assign (fd->loop.v, e);
3758 else
3759 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3760 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3762 /* Handle linear clause adjustments. */
3763 tree itercnt = NULL_TREE;
3764 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3765 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3766 c; c = OMP_CLAUSE_CHAIN (c))
3767 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3768 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3770 tree d = OMP_CLAUSE_DECL (c);
3771 bool is_ref = omp_is_reference (d);
3772 tree t = d, a, dest;
3773 if (is_ref)
3774 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3775 if (itercnt == NULL_TREE)
3777 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3779 itercnt = fold_build2 (MINUS_EXPR, itype,
3780 fold_convert (itype, n1),
3781 fold_convert (itype, fd->loop.n1));
3782 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3783 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3784 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3785 NULL_TREE, false,
3786 GSI_CONTINUE_LINKING);
3788 else
3789 itercnt = s0;
3791 tree type = TREE_TYPE (t);
3792 if (POINTER_TYPE_P (type))
3793 type = sizetype;
3794 a = fold_build2 (MULT_EXPR, type,
3795 fold_convert (type, itercnt),
3796 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3797 dest = unshare_expr (t);
3798 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3799 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3800 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3801 false, GSI_CONTINUE_LINKING);
3802 assign_stmt = gimple_build_assign (dest, t);
3803 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3805 if (fd->collapse > 1)
3806 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3808 if (!broken_loop)
3810 /* The code controlling the sequential loop replaces the
3811 GIMPLE_OMP_CONTINUE. */
3812 gsi = gsi_last_nondebug_bb (cont_bb);
3813 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3814 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3815 vmain = gimple_omp_continue_control_use (cont_stmt);
3816 vback = gimple_omp_continue_control_def (cont_stmt);
3818 if (!gimple_omp_for_combined_p (fd->for_stmt))
3820 if (POINTER_TYPE_P (type))
3821 t = fold_build_pointer_plus (vmain, step);
3822 else
3823 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3824 t = force_gimple_operand_gsi (&gsi, t,
3825 DECL_P (vback)
3826 && TREE_ADDRESSABLE (vback),
3827 NULL_TREE, true, GSI_SAME_STMT);
3828 assign_stmt = gimple_build_assign (vback, t);
3829 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3831 t = build2 (fd->loop.cond_code, boolean_type_node,
3832 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3833 ? t : vback, e);
3834 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3837 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3838 gsi_remove (&gsi, true);
3840 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3841 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3844 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3845 gsi = gsi_last_nondebug_bb (exit_bb);
3846 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3848 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3849 if (fd->have_reductemp)
3851 tree fn;
3852 if (t)
3853 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3854 else
3855 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3856 gcall *g = gimple_build_call (fn, 0);
3857 if (t)
3859 gimple_call_set_lhs (g, t);
3860 gsi_insert_after (&gsi, gimple_build_assign (reductions,
3861 NOP_EXPR, t),
3862 GSI_SAME_STMT);
3864 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3866 else
3867 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3869 gsi_remove (&gsi, true);
3871 /* Connect all the blocks. */
3872 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3873 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3874 ep = find_edge (entry_bb, second_bb);
3875 ep->flags = EDGE_TRUE_VALUE;
3876 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3877 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3878 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3880 if (!broken_loop)
3882 ep = find_edge (cont_bb, body_bb);
3883 if (ep == NULL)
3885 ep = BRANCH_EDGE (cont_bb);
3886 gcc_assert (single_succ (ep->dest) == body_bb);
3888 if (gimple_omp_for_combined_p (fd->for_stmt))
3890 remove_edge (ep);
3891 ep = NULL;
3893 else if (fd->collapse > 1)
3895 remove_edge (ep);
3896 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3898 else
3899 ep->flags = EDGE_TRUE_VALUE;
3900 find_edge (cont_bb, fin_bb)->flags
3901 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3904 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3905 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3906 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3908 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3909 recompute_dominator (CDI_DOMINATORS, body_bb));
3910 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3911 recompute_dominator (CDI_DOMINATORS, fin_bb));
3913 struct loop *loop = body_bb->loop_father;
3914 if (loop != entry_bb->loop_father)
3916 gcc_assert (broken_loop || loop->header == body_bb);
3917 gcc_assert (broken_loop
3918 || loop->latch == region->cont
3919 || single_pred (loop->latch) == region->cont);
3920 return;
3923 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3925 loop = alloc_loop ();
3926 loop->header = body_bb;
3927 if (collapse_bb == NULL)
3928 loop->latch = cont_bb;
3929 add_loop (loop, body_bb->loop_father);
3933 /* Return phi in E->DEST with ARG on edge E. */
3935 static gphi *
3936 find_phi_with_arg_on_edge (tree arg, edge e)
3938 basic_block bb = e->dest;
3940 for (gphi_iterator gpi = gsi_start_phis (bb);
3941 !gsi_end_p (gpi);
3942 gsi_next (&gpi))
3944 gphi *phi = gpi.phi ();
3945 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3946 return phi;
3949 return NULL;
3952 /* A subroutine of expand_omp_for. Generate code for a parallel
3953 loop with static schedule and a specified chunk size. Given
3954 parameters:
3956 for (V = N1; V cond N2; V += STEP) BODY;
3958 where COND is "<" or ">", we generate pseudocode
3960 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3961 if (cond is <)
3962 adj = STEP - 1;
3963 else
3964 adj = STEP + 1;
3965 if ((__typeof (V)) -1 > 0 && cond is >)
3966 n = -(adj + N2 - N1) / -STEP;
3967 else
3968 n = (adj + N2 - N1) / STEP;
3969 trip = 0;
3970 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3971 here so that V is defined
3972 if the loop is not entered
3974 s0 = (trip * nthreads + threadid) * CHUNK;
3975 e0 = min (s0 + CHUNK, n);
3976 if (s0 < n) goto L1; else goto L4;
3978 V = s0 * STEP + N1;
3979 e = e0 * STEP + N1;
3981 BODY;
3982 V += STEP;
3983 if (V cond e) goto L2; else goto L3;
3985 trip += 1;
3986 goto L0;
3990 static void
3991 expand_omp_for_static_chunk (struct omp_region *region,
3992 struct omp_for_data *fd, gimple *inner_stmt)
3994 tree n, s0, e0, e, t;
3995 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3996 tree type, itype, vmain, vback, vextra;
3997 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3998 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3999 gimple_stmt_iterator gsi;
4000 edge se;
4001 bool broken_loop = region->cont == NULL;
4002 tree *counts = NULL;
4003 tree n1, n2, step;
4004 tree reductions = NULL_TREE;
4006 itype = type = TREE_TYPE (fd->loop.v);
4007 if (POINTER_TYPE_P (type))
4008 itype = signed_type_for (type);
4010 entry_bb = region->entry;
4011 se = split_block (entry_bb, last_stmt (entry_bb));
4012 entry_bb = se->src;
4013 iter_part_bb = se->dest;
4014 cont_bb = region->cont;
4015 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4016 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4017 gcc_assert (broken_loop
4018 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4019 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4020 body_bb = single_succ (seq_start_bb);
4021 if (!broken_loop)
4023 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4024 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4025 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4026 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4028 exit_bb = region->exit;
4030 /* Trip and adjustment setup goes in ENTRY_BB. */
4031 gsi = gsi_last_nondebug_bb (entry_bb);
4032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4034 if (fd->collapse > 1)
4036 int first_zero_iter = -1, dummy = -1;
4037 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4039 counts = XALLOCAVEC (tree, fd->collapse);
4040 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4041 fin_bb, first_zero_iter,
4042 dummy_bb, dummy, l2_dom_bb);
4043 t = NULL_TREE;
4045 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4046 t = integer_one_node;
4047 else
4048 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4049 fold_convert (type, fd->loop.n1),
4050 fold_convert (type, fd->loop.n2));
4051 if (fd->collapse == 1
4052 && TYPE_UNSIGNED (type)
4053 && (t == NULL_TREE || !integer_onep (t)))
4055 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4056 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4057 true, GSI_SAME_STMT);
4058 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4059 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4060 true, GSI_SAME_STMT);
4061 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4062 NULL_TREE, NULL_TREE);
4063 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4064 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4065 expand_omp_regimplify_p, NULL, NULL)
4066 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4067 expand_omp_regimplify_p, NULL, NULL))
4069 gsi = gsi_for_stmt (cond_stmt);
4070 gimple_regimplify_operands (cond_stmt, &gsi);
4072 se = split_block (entry_bb, cond_stmt);
4073 se->flags = EDGE_TRUE_VALUE;
4074 entry_bb = se->dest;
4075 se->probability = profile_probability::very_likely ();
4076 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4077 se->probability = profile_probability::very_unlikely ();
4078 if (gimple_in_ssa_p (cfun))
4080 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4081 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4082 !gsi_end_p (gpi); gsi_next (&gpi))
4084 gphi *phi = gpi.phi ();
4085 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4086 se, UNKNOWN_LOCATION);
4089 gsi = gsi_last_bb (entry_bb);
4092 if (fd->have_reductemp)
4094 tree t1 = build_int_cst (long_integer_type_node, 0);
4095 tree t2 = build_int_cst (long_integer_type_node, 1);
4096 tree t3 = build_int_cstu (long_integer_type_node,
4097 (HOST_WIDE_INT_1U << 31) + 1);
4098 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4099 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4100 reductions = OMP_CLAUSE_DECL (clauses);
4101 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4102 gimple *g = SSA_NAME_DEF_STMT (reductions);
4103 reductions = gimple_assign_rhs1 (g);
4104 OMP_CLAUSE_DECL (clauses) = reductions;
4105 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4106 tree t
4107 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4108 9, t1, t2, t2, t3, t1, null_pointer_node,
4109 null_pointer_node, reductions, null_pointer_node);
4110 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4111 true, GSI_SAME_STMT);
4112 gsi_remove (&gsi2, true);
4113 release_ssa_name (gimple_assign_lhs (g));
4115 switch (gimple_omp_for_kind (fd->for_stmt))
4117 case GF_OMP_FOR_KIND_FOR:
4118 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4119 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4120 break;
4121 case GF_OMP_FOR_KIND_DISTRIBUTE:
4122 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4123 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4124 break;
4125 default:
4126 gcc_unreachable ();
4128 nthreads = build_call_expr (nthreads, 0);
4129 nthreads = fold_convert (itype, nthreads);
4130 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4131 true, GSI_SAME_STMT);
4132 threadid = build_call_expr (threadid, 0);
4133 threadid = fold_convert (itype, threadid);
4134 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4135 true, GSI_SAME_STMT);
4137 n1 = fd->loop.n1;
4138 n2 = fd->loop.n2;
4139 step = fd->loop.step;
4140 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4142 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4143 OMP_CLAUSE__LOOPTEMP_);
4144 gcc_assert (innerc);
4145 n1 = OMP_CLAUSE_DECL (innerc);
4146 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4147 OMP_CLAUSE__LOOPTEMP_);
4148 gcc_assert (innerc);
4149 n2 = OMP_CLAUSE_DECL (innerc);
4151 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4152 true, NULL_TREE, true, GSI_SAME_STMT);
4153 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4154 true, NULL_TREE, true, GSI_SAME_STMT);
4155 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4156 true, NULL_TREE, true, GSI_SAME_STMT);
4157 tree chunk_size = fold_convert (itype, fd->chunk_size);
4158 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4159 chunk_size
4160 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4161 GSI_SAME_STMT);
4163 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4164 t = fold_build2 (PLUS_EXPR, itype, step, t);
4165 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4166 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4167 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4168 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4169 fold_build1 (NEGATE_EXPR, itype, t),
4170 fold_build1 (NEGATE_EXPR, itype, step));
4171 else
4172 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4173 t = fold_convert (itype, t);
4174 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4175 true, GSI_SAME_STMT);
4177 trip_var = create_tmp_reg (itype, ".trip");
4178 if (gimple_in_ssa_p (cfun))
4180 trip_init = make_ssa_name (trip_var);
4181 trip_main = make_ssa_name (trip_var);
4182 trip_back = make_ssa_name (trip_var);
4184 else
4186 trip_init = trip_var;
4187 trip_main = trip_var;
4188 trip_back = trip_var;
4191 gassign *assign_stmt
4192 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4193 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4195 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4196 t = fold_build2 (MULT_EXPR, itype, t, step);
4197 if (POINTER_TYPE_P (type))
4198 t = fold_build_pointer_plus (n1, t);
4199 else
4200 t = fold_build2 (PLUS_EXPR, type, t, n1);
4201 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4202 true, GSI_SAME_STMT);
4204 /* Remove the GIMPLE_OMP_FOR. */
4205 gsi_remove (&gsi, true);
4207 gimple_stmt_iterator gsif = gsi;
4209 /* Iteration space partitioning goes in ITER_PART_BB. */
4210 gsi = gsi_last_bb (iter_part_bb);
4212 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4213 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4214 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4215 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4216 false, GSI_CONTINUE_LINKING);
4218 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4219 t = fold_build2 (MIN_EXPR, itype, t, n);
4220 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4221 false, GSI_CONTINUE_LINKING);
4223 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4224 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4226 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4227 gsi = gsi_start_bb (seq_start_bb);
4229 tree startvar = fd->loop.v;
4230 tree endvar = NULL_TREE;
4232 if (gimple_omp_for_combined_p (fd->for_stmt))
4234 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4235 ? gimple_omp_parallel_clauses (inner_stmt)
4236 : gimple_omp_for_clauses (inner_stmt);
4237 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4238 gcc_assert (innerc);
4239 startvar = OMP_CLAUSE_DECL (innerc);
4240 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4241 OMP_CLAUSE__LOOPTEMP_);
4242 gcc_assert (innerc);
4243 endvar = OMP_CLAUSE_DECL (innerc);
4244 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4245 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4247 int i;
4248 for (i = 1; i < fd->collapse; i++)
4250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4251 OMP_CLAUSE__LOOPTEMP_);
4252 gcc_assert (innerc);
4254 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4255 OMP_CLAUSE__LOOPTEMP_);
4256 if (innerc)
4258 /* If needed (distribute parallel for with lastprivate),
4259 propagate down the total number of iterations. */
4260 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4261 fd->loop.n2);
4262 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4263 GSI_CONTINUE_LINKING);
4264 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4265 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4270 t = fold_convert (itype, s0);
4271 t = fold_build2 (MULT_EXPR, itype, t, step);
4272 if (POINTER_TYPE_P (type))
4274 t = fold_build_pointer_plus (n1, t);
4275 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4276 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4277 t = fold_convert (signed_type_for (type), t);
4279 else
4280 t = fold_build2 (PLUS_EXPR, type, t, n1);
4281 t = fold_convert (TREE_TYPE (startvar), t);
4282 t = force_gimple_operand_gsi (&gsi, t,
4283 DECL_P (startvar)
4284 && TREE_ADDRESSABLE (startvar),
4285 NULL_TREE, false, GSI_CONTINUE_LINKING);
4286 assign_stmt = gimple_build_assign (startvar, t);
4287 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4289 t = fold_convert (itype, e0);
4290 t = fold_build2 (MULT_EXPR, itype, t, step);
4291 if (POINTER_TYPE_P (type))
4293 t = fold_build_pointer_plus (n1, t);
4294 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4295 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4296 t = fold_convert (signed_type_for (type), t);
4298 else
4299 t = fold_build2 (PLUS_EXPR, type, t, n1);
4300 t = fold_convert (TREE_TYPE (startvar), t);
4301 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4302 false, GSI_CONTINUE_LINKING);
4303 if (endvar)
4305 assign_stmt = gimple_build_assign (endvar, e);
4306 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4307 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4308 assign_stmt = gimple_build_assign (fd->loop.v, e);
4309 else
4310 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4311 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4313 /* Handle linear clause adjustments. */
4314 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4315 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4316 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4317 c; c = OMP_CLAUSE_CHAIN (c))
4318 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4319 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4321 tree d = OMP_CLAUSE_DECL (c);
4322 bool is_ref = omp_is_reference (d);
4323 tree t = d, a, dest;
4324 if (is_ref)
4325 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4326 tree type = TREE_TYPE (t);
4327 if (POINTER_TYPE_P (type))
4328 type = sizetype;
4329 dest = unshare_expr (t);
4330 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4331 expand_omp_build_assign (&gsif, v, t);
4332 if (itercnt == NULL_TREE)
4334 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4336 itercntbias
4337 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4338 fold_convert (itype, fd->loop.n1));
4339 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4340 itercntbias, step);
4341 itercntbias
4342 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4343 NULL_TREE, true,
4344 GSI_SAME_STMT);
4345 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4346 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4347 NULL_TREE, false,
4348 GSI_CONTINUE_LINKING);
4350 else
4351 itercnt = s0;
4353 a = fold_build2 (MULT_EXPR, type,
4354 fold_convert (type, itercnt),
4355 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4356 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4357 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4358 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4359 false, GSI_CONTINUE_LINKING);
4360 assign_stmt = gimple_build_assign (dest, t);
4361 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4363 if (fd->collapse > 1)
4364 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4366 if (!broken_loop)
4368 /* The code controlling the sequential loop goes in CONT_BB,
4369 replacing the GIMPLE_OMP_CONTINUE. */
4370 gsi = gsi_last_nondebug_bb (cont_bb);
4371 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4372 vmain = gimple_omp_continue_control_use (cont_stmt);
4373 vback = gimple_omp_continue_control_def (cont_stmt);
4375 if (!gimple_omp_for_combined_p (fd->for_stmt))
4377 if (POINTER_TYPE_P (type))
4378 t = fold_build_pointer_plus (vmain, step);
4379 else
4380 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4381 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4382 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4383 true, GSI_SAME_STMT);
4384 assign_stmt = gimple_build_assign (vback, t);
4385 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4387 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4388 t = build2 (EQ_EXPR, boolean_type_node,
4389 build_int_cst (itype, 0),
4390 build_int_cst (itype, 1));
4391 else
4392 t = build2 (fd->loop.cond_code, boolean_type_node,
4393 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4394 ? t : vback, e);
4395 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4398 /* Remove GIMPLE_OMP_CONTINUE. */
4399 gsi_remove (&gsi, true);
4401 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4402 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4404 /* Trip update code goes into TRIP_UPDATE_BB. */
4405 gsi = gsi_start_bb (trip_update_bb);
4407 t = build_int_cst (itype, 1);
4408 t = build2 (PLUS_EXPR, itype, trip_main, t);
4409 assign_stmt = gimple_build_assign (trip_back, t);
4410 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4413 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4414 gsi = gsi_last_nondebug_bb (exit_bb);
4415 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4417 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4418 if (fd->have_reductemp)
4420 tree fn;
4421 if (t)
4422 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4423 else
4424 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4425 gcall *g = gimple_build_call (fn, 0);
4426 if (t)
4428 gimple_call_set_lhs (g, t);
4429 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4430 NOP_EXPR, t),
4431 GSI_SAME_STMT);
4433 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4435 else
4436 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4438 gsi_remove (&gsi, true);
4440 /* Connect the new blocks. */
4441 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4442 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4444 if (!broken_loop)
4446 se = find_edge (cont_bb, body_bb);
4447 if (se == NULL)
4449 se = BRANCH_EDGE (cont_bb);
4450 gcc_assert (single_succ (se->dest) == body_bb);
4452 if (gimple_omp_for_combined_p (fd->for_stmt))
4454 remove_edge (se);
4455 se = NULL;
4457 else if (fd->collapse > 1)
4459 remove_edge (se);
4460 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4462 else
4463 se->flags = EDGE_TRUE_VALUE;
4464 find_edge (cont_bb, trip_update_bb)->flags
4465 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4467 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4468 iter_part_bb);
4471 if (gimple_in_ssa_p (cfun))
4473 gphi_iterator psi;
4474 gphi *phi;
4475 edge re, ene;
4476 edge_var_map *vm;
4477 size_t i;
4479 gcc_assert (fd->collapse == 1 && !broken_loop);
4481 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4482 remove arguments of the phi nodes in fin_bb. We need to create
4483 appropriate phi nodes in iter_part_bb instead. */
4484 se = find_edge (iter_part_bb, fin_bb);
4485 re = single_succ_edge (trip_update_bb);
4486 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4487 ene = single_succ_edge (entry_bb);
4489 psi = gsi_start_phis (fin_bb);
4490 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4491 gsi_next (&psi), ++i)
4493 gphi *nphi;
4494 location_t locus;
4496 phi = psi.phi ();
4497 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4498 redirect_edge_var_map_def (vm), 0))
4499 continue;
4501 t = gimple_phi_result (phi);
4502 gcc_assert (t == redirect_edge_var_map_result (vm));
4504 if (!single_pred_p (fin_bb))
4505 t = copy_ssa_name (t, phi);
4507 nphi = create_phi_node (t, iter_part_bb);
4509 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4510 locus = gimple_phi_arg_location_from_edge (phi, se);
4512 /* A special case -- fd->loop.v is not yet computed in
4513 iter_part_bb, we need to use vextra instead. */
4514 if (t == fd->loop.v)
4515 t = vextra;
4516 add_phi_arg (nphi, t, ene, locus);
4517 locus = redirect_edge_var_map_location (vm);
4518 tree back_arg = redirect_edge_var_map_def (vm);
4519 add_phi_arg (nphi, back_arg, re, locus);
4520 edge ce = find_edge (cont_bb, body_bb);
4521 if (ce == NULL)
4523 ce = BRANCH_EDGE (cont_bb);
4524 gcc_assert (single_succ (ce->dest) == body_bb);
4525 ce = single_succ_edge (ce->dest);
4527 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4528 gcc_assert (inner_loop_phi != NULL);
4529 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4530 find_edge (seq_start_bb, body_bb), locus);
4532 if (!single_pred_p (fin_bb))
4533 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4535 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4536 redirect_edge_var_map_clear (re);
4537 if (single_pred_p (fin_bb))
4538 while (1)
4540 psi = gsi_start_phis (fin_bb);
4541 if (gsi_end_p (psi))
4542 break;
4543 remove_phi_node (&psi, false);
4546 /* Make phi node for trip. */
4547 phi = create_phi_node (trip_main, iter_part_bb);
4548 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4549 UNKNOWN_LOCATION);
4550 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4551 UNKNOWN_LOCATION);
4554 if (!broken_loop)
4555 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4556 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4557 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4558 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4559 recompute_dominator (CDI_DOMINATORS, fin_bb));
4560 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4561 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4562 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4563 recompute_dominator (CDI_DOMINATORS, body_bb));
4565 if (!broken_loop)
4567 struct loop *loop = body_bb->loop_father;
4568 struct loop *trip_loop = alloc_loop ();
4569 trip_loop->header = iter_part_bb;
4570 trip_loop->latch = trip_update_bb;
4571 add_loop (trip_loop, iter_part_bb->loop_father);
4573 if (loop != entry_bb->loop_father)
4575 gcc_assert (loop->header == body_bb);
4576 gcc_assert (loop->latch == region->cont
4577 || single_pred (loop->latch) == region->cont);
4578 trip_loop->inner = loop;
4579 return;
4582 if (!gimple_omp_for_combined_p (fd->for_stmt))
4584 loop = alloc_loop ();
4585 loop->header = body_bb;
4586 if (collapse_bb == NULL)
4587 loop->latch = cont_bb;
4588 add_loop (loop, trip_loop);
4593 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4594 loop. Given parameters:
4596 for (V = N1; V cond N2; V += STEP) BODY;
4598 where COND is "<" or ">", we generate pseudocode
4600 V = N1;
4601 goto L1;
4603 BODY;
4604 V += STEP;
4606 if (V cond N2) goto L0; else goto L2;
4609 For collapsed loops, given parameters:
4610 collapse(3)
4611 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4612 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4613 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4614 BODY;
4616 we generate pseudocode
4618 if (cond3 is <)
4619 adj = STEP3 - 1;
4620 else
4621 adj = STEP3 + 1;
4622 count3 = (adj + N32 - N31) / STEP3;
4623 if (cond2 is <)
4624 adj = STEP2 - 1;
4625 else
4626 adj = STEP2 + 1;
4627 count2 = (adj + N22 - N21) / STEP2;
4628 if (cond1 is <)
4629 adj = STEP1 - 1;
4630 else
4631 adj = STEP1 + 1;
4632 count1 = (adj + N12 - N11) / STEP1;
4633 count = count1 * count2 * count3;
4634 V = 0;
4635 V1 = N11;
4636 V2 = N21;
4637 V3 = N31;
4638 goto L1;
4640 BODY;
4641 V += 1;
4642 V3 += STEP3;
4643 V2 += (V3 cond3 N32) ? 0 : STEP2;
4644 V3 = (V3 cond3 N32) ? V3 : N31;
4645 V1 += (V2 cond2 N22) ? 0 : STEP1;
4646 V2 = (V2 cond2 N22) ? V2 : N21;
4648 if (V < count) goto L0; else goto L2;
4653 static void
4654 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4656 tree type, t;
4657 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4658 gimple_stmt_iterator gsi;
4659 gimple *stmt;
4660 gcond *cond_stmt;
4661 bool broken_loop = region->cont == NULL;
4662 edge e, ne;
4663 tree *counts = NULL;
4664 int i;
4665 int safelen_int = INT_MAX;
4666 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4667 OMP_CLAUSE_SAFELEN);
4668 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4669 OMP_CLAUSE__SIMDUID_);
4670 tree n1, n2;
4672 if (safelen)
4674 poly_uint64 val;
4675 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4676 if (!poly_int_tree_p (safelen, &val))
4677 safelen_int = 0;
4678 else
4679 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4680 if (safelen_int == 1)
4681 safelen_int = 0;
4683 type = TREE_TYPE (fd->loop.v);
4684 entry_bb = region->entry;
4685 cont_bb = region->cont;
4686 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4687 gcc_assert (broken_loop
4688 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4689 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4690 if (!broken_loop)
4692 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4693 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4694 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4695 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4697 else
4699 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4700 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4701 l2_bb = single_succ (l1_bb);
4703 exit_bb = region->exit;
4704 l2_dom_bb = NULL;
4706 gsi = gsi_last_nondebug_bb (entry_bb);
4708 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4709 /* Not needed in SSA form right now. */
4710 gcc_assert (!gimple_in_ssa_p (cfun));
4711 if (fd->collapse > 1)
4713 int first_zero_iter = -1, dummy = -1;
4714 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4716 counts = XALLOCAVEC (tree, fd->collapse);
4717 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4718 zero_iter_bb, first_zero_iter,
4719 dummy_bb, dummy, l2_dom_bb);
4721 if (l2_dom_bb == NULL)
4722 l2_dom_bb = l1_bb;
4724 n1 = fd->loop.n1;
4725 n2 = fd->loop.n2;
4726 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4728 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4729 OMP_CLAUSE__LOOPTEMP_);
4730 gcc_assert (innerc);
4731 n1 = OMP_CLAUSE_DECL (innerc);
4732 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4733 OMP_CLAUSE__LOOPTEMP_);
4734 gcc_assert (innerc);
4735 n2 = OMP_CLAUSE_DECL (innerc);
4737 tree step = fd->loop.step;
4739 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4740 OMP_CLAUSE__SIMT_);
4741 if (is_simt)
4743 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4744 is_simt = safelen_int > 1;
4746 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4747 if (is_simt)
4749 simt_lane = create_tmp_var (unsigned_type_node);
4750 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4751 gimple_call_set_lhs (g, simt_lane);
4752 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4753 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4754 fold_convert (TREE_TYPE (step), simt_lane));
4755 n1 = fold_convert (type, n1);
4756 if (POINTER_TYPE_P (type))
4757 n1 = fold_build_pointer_plus (n1, offset);
4758 else
4759 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4761 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4762 if (fd->collapse > 1)
4763 simt_maxlane = build_one_cst (unsigned_type_node);
4764 else if (safelen_int < omp_max_simt_vf ())
4765 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4766 tree vf
4767 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4768 unsigned_type_node, 0);
4769 if (simt_maxlane)
4770 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4771 vf = fold_convert (TREE_TYPE (step), vf);
4772 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4775 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4776 if (fd->collapse > 1)
4778 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4780 gsi_prev (&gsi);
4781 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4782 gsi_next (&gsi);
4784 else
4785 for (i = 0; i < fd->collapse; i++)
4787 tree itype = TREE_TYPE (fd->loops[i].v);
4788 if (POINTER_TYPE_P (itype))
4789 itype = signed_type_for (itype);
4790 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4791 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4795 /* Remove the GIMPLE_OMP_FOR statement. */
4796 gsi_remove (&gsi, true);
4798 if (!broken_loop)
4800 /* Code to control the increment goes in the CONT_BB. */
4801 gsi = gsi_last_nondebug_bb (cont_bb);
4802 stmt = gsi_stmt (gsi);
4803 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4805 if (POINTER_TYPE_P (type))
4806 t = fold_build_pointer_plus (fd->loop.v, step);
4807 else
4808 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4809 expand_omp_build_assign (&gsi, fd->loop.v, t);
4811 if (fd->collapse > 1)
4813 i = fd->collapse - 1;
4814 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4816 t = fold_convert (sizetype, fd->loops[i].step);
4817 t = fold_build_pointer_plus (fd->loops[i].v, t);
4819 else
4821 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4822 fd->loops[i].step);
4823 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4824 fd->loops[i].v, t);
4826 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4828 for (i = fd->collapse - 1; i > 0; i--)
4830 tree itype = TREE_TYPE (fd->loops[i].v);
4831 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4832 if (POINTER_TYPE_P (itype2))
4833 itype2 = signed_type_for (itype2);
4834 t = fold_convert (itype2, fd->loops[i - 1].step);
4835 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4836 GSI_SAME_STMT);
4837 t = build3 (COND_EXPR, itype2,
4838 build2 (fd->loops[i].cond_code, boolean_type_node,
4839 fd->loops[i].v,
4840 fold_convert (itype, fd->loops[i].n2)),
4841 build_int_cst (itype2, 0), t);
4842 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4843 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4844 else
4845 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4846 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4848 t = fold_convert (itype, fd->loops[i].n1);
4849 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4850 GSI_SAME_STMT);
4851 t = build3 (COND_EXPR, itype,
4852 build2 (fd->loops[i].cond_code, boolean_type_node,
4853 fd->loops[i].v,
4854 fold_convert (itype, fd->loops[i].n2)),
4855 fd->loops[i].v, t);
4856 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4860 /* Remove GIMPLE_OMP_CONTINUE. */
4861 gsi_remove (&gsi, true);
4864 /* Emit the condition in L1_BB. */
4865 gsi = gsi_start_bb (l1_bb);
4867 t = fold_convert (type, n2);
4868 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4869 false, GSI_CONTINUE_LINKING);
4870 tree v = fd->loop.v;
4871 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4872 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4873 false, GSI_CONTINUE_LINKING);
4874 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4875 cond_stmt = gimple_build_cond_empty (t);
4876 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4877 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4878 NULL, NULL)
4879 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4880 NULL, NULL))
4882 gsi = gsi_for_stmt (cond_stmt);
4883 gimple_regimplify_operands (cond_stmt, &gsi);
4886 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4887 if (is_simt)
4889 gsi = gsi_start_bb (l2_bb);
4890 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4891 if (POINTER_TYPE_P (type))
4892 t = fold_build_pointer_plus (fd->loop.v, step);
4893 else
4894 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4895 expand_omp_build_assign (&gsi, fd->loop.v, t);
4898 /* Remove GIMPLE_OMP_RETURN. */
4899 gsi = gsi_last_nondebug_bb (exit_bb);
4900 gsi_remove (&gsi, true);
4902 /* Connect the new blocks. */
4903 remove_edge (FALLTHRU_EDGE (entry_bb));
4905 if (!broken_loop)
4907 remove_edge (BRANCH_EDGE (entry_bb));
4908 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4910 e = BRANCH_EDGE (l1_bb);
4911 ne = FALLTHRU_EDGE (l1_bb);
4912 e->flags = EDGE_TRUE_VALUE;
4914 else
4916 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4918 ne = single_succ_edge (l1_bb);
4919 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4922 ne->flags = EDGE_FALSE_VALUE;
4923 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4924 ne->probability = e->probability.invert ();
4926 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4927 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4929 if (simt_maxlane)
4931 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4932 NULL_TREE, NULL_TREE);
4933 gsi = gsi_last_bb (entry_bb);
4934 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4935 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4936 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4937 FALLTHRU_EDGE (entry_bb)->probability
4938 = profile_probability::guessed_always ().apply_scale (7, 8);
4939 BRANCH_EDGE (entry_bb)->probability
4940 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4941 l2_dom_bb = entry_bb;
4943 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4945 if (!broken_loop)
4947 struct loop *loop = alloc_loop ();
4948 loop->header = l1_bb;
4949 loop->latch = cont_bb;
4950 add_loop (loop, l1_bb->loop_father);
4951 loop->safelen = safelen_int;
4952 if (simduid)
4954 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4955 cfun->has_simduid_loops = true;
4957 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4958 the loop. */
4959 if ((flag_tree_loop_vectorize
4960 || !global_options_set.x_flag_tree_loop_vectorize)
4961 && flag_tree_loop_optimize
4962 && loop->safelen > 1)
4964 loop->force_vectorize = true;
4965 cfun->has_force_vectorize_loops = true;
4968 else if (simduid)
4969 cfun->has_simduid_loops = true;
4972 /* Taskloop construct is represented after gimplification with
4973 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4974 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4975 which should just compute all the needed loop temporaries
4976 for GIMPLE_OMP_TASK. */
4978 static void
4979 expand_omp_taskloop_for_outer (struct omp_region *region,
4980 struct omp_for_data *fd,
4981 gimple *inner_stmt)
4983 tree type, bias = NULL_TREE;
4984 basic_block entry_bb, cont_bb, exit_bb;
4985 gimple_stmt_iterator gsi;
4986 gassign *assign_stmt;
4987 tree *counts = NULL;
4988 int i;
4990 gcc_assert (inner_stmt);
4991 gcc_assert (region->cont);
4992 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4993 && gimple_omp_task_taskloop_p (inner_stmt));
4994 type = TREE_TYPE (fd->loop.v);
4996 /* See if we need to bias by LLONG_MIN. */
4997 if (fd->iter_type == long_long_unsigned_type_node
4998 && TREE_CODE (type) == INTEGER_TYPE
4999 && !TYPE_UNSIGNED (type))
5001 tree n1, n2;
5003 if (fd->loop.cond_code == LT_EXPR)
5005 n1 = fd->loop.n1;
5006 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5008 else
5010 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5011 n2 = fd->loop.n1;
5013 if (TREE_CODE (n1) != INTEGER_CST
5014 || TREE_CODE (n2) != INTEGER_CST
5015 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5016 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5019 entry_bb = region->entry;
5020 cont_bb = region->cont;
5021 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5022 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5023 exit_bb = region->exit;
5025 gsi = gsi_last_nondebug_bb (entry_bb);
5026 gimple *for_stmt = gsi_stmt (gsi);
5027 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5028 if (fd->collapse > 1)
5030 int first_zero_iter = -1, dummy = -1;
5031 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5033 counts = XALLOCAVEC (tree, fd->collapse);
5034 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5035 zero_iter_bb, first_zero_iter,
5036 dummy_bb, dummy, l2_dom_bb);
5038 if (zero_iter_bb)
5040 /* Some counts[i] vars might be uninitialized if
5041 some loop has zero iterations. But the body shouldn't
5042 be executed in that case, so just avoid uninit warnings. */
5043 for (i = first_zero_iter; i < fd->collapse; i++)
5044 if (SSA_VAR_P (counts[i]))
5045 TREE_NO_WARNING (counts[i]) = 1;
5046 gsi_prev (&gsi);
5047 edge e = split_block (entry_bb, gsi_stmt (gsi));
5048 entry_bb = e->dest;
5049 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5050 gsi = gsi_last_bb (entry_bb);
5051 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5052 get_immediate_dominator (CDI_DOMINATORS,
5053 zero_iter_bb));
5057 tree t0, t1;
5058 t1 = fd->loop.n2;
5059 t0 = fd->loop.n1;
5060 if (POINTER_TYPE_P (TREE_TYPE (t0))
5061 && TYPE_PRECISION (TREE_TYPE (t0))
5062 != TYPE_PRECISION (fd->iter_type))
5064 /* Avoid casting pointers to integer of a different size. */
5065 tree itype = signed_type_for (type);
5066 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5067 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5069 else
5071 t1 = fold_convert (fd->iter_type, t1);
5072 t0 = fold_convert (fd->iter_type, t0);
5074 if (bias)
5076 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5077 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5080 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5081 OMP_CLAUSE__LOOPTEMP_);
5082 gcc_assert (innerc);
5083 tree startvar = OMP_CLAUSE_DECL (innerc);
5084 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5085 gcc_assert (innerc);
5086 tree endvar = OMP_CLAUSE_DECL (innerc);
5087 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5089 gcc_assert (innerc);
5090 for (i = 1; i < fd->collapse; i++)
5092 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5093 OMP_CLAUSE__LOOPTEMP_);
5094 gcc_assert (innerc);
5096 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5097 OMP_CLAUSE__LOOPTEMP_);
5098 if (innerc)
5100 /* If needed (inner taskloop has lastprivate clause), propagate
5101 down the total number of iterations. */
5102 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5103 NULL_TREE, false,
5104 GSI_CONTINUE_LINKING);
5105 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5110 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5111 GSI_CONTINUE_LINKING);
5112 assign_stmt = gimple_build_assign (startvar, t0);
5113 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5115 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5116 GSI_CONTINUE_LINKING);
5117 assign_stmt = gimple_build_assign (endvar, t1);
5118 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5119 if (fd->collapse > 1)
5120 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5122 /* Remove the GIMPLE_OMP_FOR statement. */
5123 gsi = gsi_for_stmt (for_stmt);
5124 gsi_remove (&gsi, true);
5126 gsi = gsi_last_nondebug_bb (cont_bb);
5127 gsi_remove (&gsi, true);
5129 gsi = gsi_last_nondebug_bb (exit_bb);
5130 gsi_remove (&gsi, true);
5132 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5133 remove_edge (BRANCH_EDGE (entry_bb));
5134 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5135 remove_edge (BRANCH_EDGE (cont_bb));
5136 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5137 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5138 recompute_dominator (CDI_DOMINATORS, region->entry));
5141 /* Taskloop construct is represented after gimplification with
5142 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5143 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5144 GOMP_taskloop{,_ull} function arranges for each task to be given just
5145 a single range of iterations. */
5147 static void
5148 expand_omp_taskloop_for_inner (struct omp_region *region,
5149 struct omp_for_data *fd,
5150 gimple *inner_stmt)
5152 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5153 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5154 basic_block fin_bb;
5155 gimple_stmt_iterator gsi;
5156 edge ep;
5157 bool broken_loop = region->cont == NULL;
5158 tree *counts = NULL;
5159 tree n1, n2, step;
5161 itype = type = TREE_TYPE (fd->loop.v);
5162 if (POINTER_TYPE_P (type))
5163 itype = signed_type_for (type);
5165 /* See if we need to bias by LLONG_MIN. */
5166 if (fd->iter_type == long_long_unsigned_type_node
5167 && TREE_CODE (type) == INTEGER_TYPE
5168 && !TYPE_UNSIGNED (type))
5170 tree n1, n2;
5172 if (fd->loop.cond_code == LT_EXPR)
5174 n1 = fd->loop.n1;
5175 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5177 else
5179 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5180 n2 = fd->loop.n1;
5182 if (TREE_CODE (n1) != INTEGER_CST
5183 || TREE_CODE (n2) != INTEGER_CST
5184 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5185 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5188 entry_bb = region->entry;
5189 cont_bb = region->cont;
5190 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5191 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5192 gcc_assert (broken_loop
5193 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5194 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5195 if (!broken_loop)
5197 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5198 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5200 exit_bb = region->exit;
5202 /* Iteration space partitioning goes in ENTRY_BB. */
5203 gsi = gsi_last_nondebug_bb (entry_bb);
5204 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5206 if (fd->collapse > 1)
5208 int first_zero_iter = -1, dummy = -1;
5209 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5211 counts = XALLOCAVEC (tree, fd->collapse);
5212 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5213 fin_bb, first_zero_iter,
5214 dummy_bb, dummy, l2_dom_bb);
5215 t = NULL_TREE;
5217 else
5218 t = integer_one_node;
5220 step = fd->loop.step;
5221 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5222 OMP_CLAUSE__LOOPTEMP_);
5223 gcc_assert (innerc);
5224 n1 = OMP_CLAUSE_DECL (innerc);
5225 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5226 gcc_assert (innerc);
5227 n2 = OMP_CLAUSE_DECL (innerc);
5228 if (bias)
5230 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5231 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5233 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5234 true, NULL_TREE, true, GSI_SAME_STMT);
5235 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5236 true, NULL_TREE, true, GSI_SAME_STMT);
5237 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5238 true, NULL_TREE, true, GSI_SAME_STMT);
5240 tree startvar = fd->loop.v;
5241 tree endvar = NULL_TREE;
5243 if (gimple_omp_for_combined_p (fd->for_stmt))
5245 tree clauses = gimple_omp_for_clauses (inner_stmt);
5246 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5247 gcc_assert (innerc);
5248 startvar = OMP_CLAUSE_DECL (innerc);
5249 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5250 OMP_CLAUSE__LOOPTEMP_);
5251 gcc_assert (innerc);
5252 endvar = OMP_CLAUSE_DECL (innerc);
5254 t = fold_convert (TREE_TYPE (startvar), n1);
5255 t = force_gimple_operand_gsi (&gsi, t,
5256 DECL_P (startvar)
5257 && TREE_ADDRESSABLE (startvar),
5258 NULL_TREE, false, GSI_CONTINUE_LINKING);
5259 gimple *assign_stmt = gimple_build_assign (startvar, t);
5260 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5262 t = fold_convert (TREE_TYPE (startvar), n2);
5263 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5264 false, GSI_CONTINUE_LINKING);
5265 if (endvar)
5267 assign_stmt = gimple_build_assign (endvar, e);
5268 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5269 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5270 assign_stmt = gimple_build_assign (fd->loop.v, e);
5271 else
5272 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5273 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5275 if (fd->collapse > 1)
5276 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5278 if (!broken_loop)
5280 /* The code controlling the sequential loop replaces the
5281 GIMPLE_OMP_CONTINUE. */
5282 gsi = gsi_last_nondebug_bb (cont_bb);
5283 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5284 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5285 vmain = gimple_omp_continue_control_use (cont_stmt);
5286 vback = gimple_omp_continue_control_def (cont_stmt);
5288 if (!gimple_omp_for_combined_p (fd->for_stmt))
5290 if (POINTER_TYPE_P (type))
5291 t = fold_build_pointer_plus (vmain, step);
5292 else
5293 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5294 t = force_gimple_operand_gsi (&gsi, t,
5295 DECL_P (vback)
5296 && TREE_ADDRESSABLE (vback),
5297 NULL_TREE, true, GSI_SAME_STMT);
5298 assign_stmt = gimple_build_assign (vback, t);
5299 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5301 t = build2 (fd->loop.cond_code, boolean_type_node,
5302 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5303 ? t : vback, e);
5304 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5307 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5308 gsi_remove (&gsi, true);
5310 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5311 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5314 /* Remove the GIMPLE_OMP_FOR statement. */
5315 gsi = gsi_for_stmt (fd->for_stmt);
5316 gsi_remove (&gsi, true);
5318 /* Remove the GIMPLE_OMP_RETURN statement. */
5319 gsi = gsi_last_nondebug_bb (exit_bb);
5320 gsi_remove (&gsi, true);
5322 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5323 if (!broken_loop)
5324 remove_edge (BRANCH_EDGE (entry_bb));
5325 else
5327 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5328 region->outer->cont = NULL;
5331 /* Connect all the blocks. */
5332 if (!broken_loop)
5334 ep = find_edge (cont_bb, body_bb);
5335 if (gimple_omp_for_combined_p (fd->for_stmt))
5337 remove_edge (ep);
5338 ep = NULL;
5340 else if (fd->collapse > 1)
5342 remove_edge (ep);
5343 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5345 else
5346 ep->flags = EDGE_TRUE_VALUE;
5347 find_edge (cont_bb, fin_bb)->flags
5348 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5351 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5352 recompute_dominator (CDI_DOMINATORS, body_bb));
5353 if (!broken_loop)
5354 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5355 recompute_dominator (CDI_DOMINATORS, fin_bb));
5357 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5359 struct loop *loop = alloc_loop ();
5360 loop->header = body_bb;
5361 if (collapse_bb == NULL)
5362 loop->latch = cont_bb;
5363 add_loop (loop, body_bb->loop_father);
5367 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5368 partitioned loop. The lowering here is abstracted, in that the
5369 loop parameters are passed through internal functions, which are
5370 further lowered by oacc_device_lower, once we get to the target
5371 compiler. The loop is of the form:
5373 for (V = B; V LTGT E; V += S) {BODY}
5375 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5376 (constant 0 for no chunking) and we will have a GWV partitioning
5377 mask, specifying dimensions over which the loop is to be
5378 partitioned (see note below). We generate code that looks like
5379 (this ignores tiling):
5381 <entry_bb> [incoming FALL->body, BRANCH->exit]
5382 typedef signedintify (typeof (V)) T; // underlying signed integral type
5383 T range = E - B;
5384 T chunk_no = 0;
5385 T DIR = LTGT == '<' ? +1 : -1;
5386 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5387 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5389 <head_bb> [created by splitting end of entry_bb]
5390 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5391 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5392 if (!(offset LTGT bound)) goto bottom_bb;
5394 <body_bb> [incoming]
5395 V = B + offset;
5396 {BODY}
5398 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5399 offset += step;
5400 if (offset LTGT bound) goto body_bb; [*]
5402 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5403 chunk_no++;
5404 if (chunk < chunk_max) goto head_bb;
5406 <exit_bb> [incoming]
5407 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5409 [*] Needed if V live at end of loop. */
5411 static void
5412 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5414 tree v = fd->loop.v;
5415 enum tree_code cond_code = fd->loop.cond_code;
5416 enum tree_code plus_code = PLUS_EXPR;
5418 tree chunk_size = integer_minus_one_node;
5419 tree gwv = integer_zero_node;
5420 tree iter_type = TREE_TYPE (v);
5421 tree diff_type = iter_type;
5422 tree plus_type = iter_type;
5423 struct oacc_collapse *counts = NULL;
5425 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5426 == GF_OMP_FOR_KIND_OACC_LOOP);
5427 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5428 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5430 if (POINTER_TYPE_P (iter_type))
5432 plus_code = POINTER_PLUS_EXPR;
5433 plus_type = sizetype;
5435 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5436 diff_type = signed_type_for (diff_type);
5437 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5438 diff_type = integer_type_node;
5440 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5441 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5442 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5443 basic_block bottom_bb = NULL;
5445 /* entry_bb has two sucessors; the branch edge is to the exit
5446 block, fallthrough edge to body. */
5447 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5448 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5450 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5451 body_bb, or to a block whose only successor is the body_bb. Its
5452 fallthrough successor is the final block (same as the branch
5453 successor of the entry_bb). */
5454 if (cont_bb)
5456 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5457 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5459 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5460 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5462 else
5463 gcc_assert (!gimple_in_ssa_p (cfun));
5465 /* The exit block only has entry_bb and cont_bb as predecessors. */
5466 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5468 tree chunk_no;
5469 tree chunk_max = NULL_TREE;
5470 tree bound, offset;
5471 tree step = create_tmp_var (diff_type, ".step");
5472 bool up = cond_code == LT_EXPR;
5473 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5474 bool chunking = !gimple_in_ssa_p (cfun);
5475 bool negating;
5477 /* Tiling vars. */
5478 tree tile_size = NULL_TREE;
5479 tree element_s = NULL_TREE;
5480 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5481 basic_block elem_body_bb = NULL;
5482 basic_block elem_cont_bb = NULL;
5484 /* SSA instances. */
5485 tree offset_incr = NULL_TREE;
5486 tree offset_init = NULL_TREE;
5488 gimple_stmt_iterator gsi;
5489 gassign *ass;
5490 gcall *call;
5491 gimple *stmt;
5492 tree expr;
5493 location_t loc;
5494 edge split, be, fte;
5496 /* Split the end of entry_bb to create head_bb. */
5497 split = split_block (entry_bb, last_stmt (entry_bb));
5498 basic_block head_bb = split->dest;
5499 entry_bb = split->src;
5501 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5502 gsi = gsi_last_nondebug_bb (entry_bb);
5503 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5504 loc = gimple_location (for_stmt);
5506 if (gimple_in_ssa_p (cfun))
5508 offset_init = gimple_omp_for_index (for_stmt, 0);
5509 gcc_assert (integer_zerop (fd->loop.n1));
5510 /* The SSA parallelizer does gang parallelism. */
5511 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5514 if (fd->collapse > 1 || fd->tiling)
5516 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5517 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5518 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5519 TREE_TYPE (fd->loop.n2), loc);
5521 if (SSA_VAR_P (fd->loop.n2))
5523 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5524 true, GSI_SAME_STMT);
5525 ass = gimple_build_assign (fd->loop.n2, total);
5526 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5530 tree b = fd->loop.n1;
5531 tree e = fd->loop.n2;
5532 tree s = fd->loop.step;
5534 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5535 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5537 /* Convert the step, avoiding possible unsigned->signed overflow. */
5538 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5539 if (negating)
5540 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5541 s = fold_convert (diff_type, s);
5542 if (negating)
5543 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5544 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5546 if (!chunking)
5547 chunk_size = integer_zero_node;
5548 expr = fold_convert (diff_type, chunk_size);
5549 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5550 NULL_TREE, true, GSI_SAME_STMT);
5552 if (fd->tiling)
5554 /* Determine the tile size and element step,
5555 modify the outer loop step size. */
5556 tile_size = create_tmp_var (diff_type, ".tile_size");
5557 expr = build_int_cst (diff_type, 1);
5558 for (int ix = 0; ix < fd->collapse; ix++)
5559 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5560 expr = force_gimple_operand_gsi (&gsi, expr, true,
5561 NULL_TREE, true, GSI_SAME_STMT);
5562 ass = gimple_build_assign (tile_size, expr);
5563 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5565 element_s = create_tmp_var (diff_type, ".element_s");
5566 ass = gimple_build_assign (element_s, s);
5567 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5569 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5570 s = force_gimple_operand_gsi (&gsi, expr, true,
5571 NULL_TREE, true, GSI_SAME_STMT);
5574 /* Determine the range, avoiding possible unsigned->signed overflow. */
5575 negating = !up && TYPE_UNSIGNED (iter_type);
5576 expr = fold_build2 (MINUS_EXPR, plus_type,
5577 fold_convert (plus_type, negating ? b : e),
5578 fold_convert (plus_type, negating ? e : b));
5579 expr = fold_convert (diff_type, expr);
5580 if (negating)
5581 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5582 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5583 NULL_TREE, true, GSI_SAME_STMT);
5585 chunk_no = build_int_cst (diff_type, 0);
5586 if (chunking)
5588 gcc_assert (!gimple_in_ssa_p (cfun));
5590 expr = chunk_no;
5591 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5592 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5594 ass = gimple_build_assign (chunk_no, expr);
5595 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5597 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5598 build_int_cst (integer_type_node,
5599 IFN_GOACC_LOOP_CHUNKS),
5600 dir, range, s, chunk_size, gwv);
5601 gimple_call_set_lhs (call, chunk_max);
5602 gimple_set_location (call, loc);
5603 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5605 else
5606 chunk_size = chunk_no;
5608 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5609 build_int_cst (integer_type_node,
5610 IFN_GOACC_LOOP_STEP),
5611 dir, range, s, chunk_size, gwv);
5612 gimple_call_set_lhs (call, step);
5613 gimple_set_location (call, loc);
5614 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5616 /* Remove the GIMPLE_OMP_FOR. */
5617 gsi_remove (&gsi, true);
5619 /* Fixup edges from head_bb. */
5620 be = BRANCH_EDGE (head_bb);
5621 fte = FALLTHRU_EDGE (head_bb);
5622 be->flags |= EDGE_FALSE_VALUE;
5623 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5625 basic_block body_bb = fte->dest;
5627 if (gimple_in_ssa_p (cfun))
5629 gsi = gsi_last_nondebug_bb (cont_bb);
5630 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5632 offset = gimple_omp_continue_control_use (cont_stmt);
5633 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5635 else
5637 offset = create_tmp_var (diff_type, ".offset");
5638 offset_init = offset_incr = offset;
5640 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5642 /* Loop offset & bound go into head_bb. */
5643 gsi = gsi_start_bb (head_bb);
5645 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5646 build_int_cst (integer_type_node,
5647 IFN_GOACC_LOOP_OFFSET),
5648 dir, range, s,
5649 chunk_size, gwv, chunk_no);
5650 gimple_call_set_lhs (call, offset_init);
5651 gimple_set_location (call, loc);
5652 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5654 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5655 build_int_cst (integer_type_node,
5656 IFN_GOACC_LOOP_BOUND),
5657 dir, range, s,
5658 chunk_size, gwv, offset_init);
5659 gimple_call_set_lhs (call, bound);
5660 gimple_set_location (call, loc);
5661 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5663 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5664 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5665 GSI_CONTINUE_LINKING);
5667 /* V assignment goes into body_bb. */
5668 if (!gimple_in_ssa_p (cfun))
5670 gsi = gsi_start_bb (body_bb);
5672 expr = build2 (plus_code, iter_type, b,
5673 fold_convert (plus_type, offset));
5674 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5675 true, GSI_SAME_STMT);
5676 ass = gimple_build_assign (v, expr);
5677 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5679 if (fd->collapse > 1 || fd->tiling)
5680 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5682 if (fd->tiling)
5684 /* Determine the range of the element loop -- usually simply
5685 the tile_size, but could be smaller if the final
5686 iteration of the outer loop is a partial tile. */
5687 tree e_range = create_tmp_var (diff_type, ".e_range");
5689 expr = build2 (MIN_EXPR, diff_type,
5690 build2 (MINUS_EXPR, diff_type, bound, offset),
5691 build2 (MULT_EXPR, diff_type, tile_size,
5692 element_s));
5693 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5694 true, GSI_SAME_STMT);
5695 ass = gimple_build_assign (e_range, expr);
5696 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5698 /* Determine bound, offset & step of inner loop. */
5699 e_bound = create_tmp_var (diff_type, ".e_bound");
5700 e_offset = create_tmp_var (diff_type, ".e_offset");
5701 e_step = create_tmp_var (diff_type, ".e_step");
5703 /* Mark these as element loops. */
5704 tree t, e_gwv = integer_minus_one_node;
5705 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5707 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5708 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5709 element_s, chunk, e_gwv, chunk);
5710 gimple_call_set_lhs (call, e_offset);
5711 gimple_set_location (call, loc);
5712 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5714 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5715 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5716 element_s, chunk, e_gwv, e_offset);
5717 gimple_call_set_lhs (call, e_bound);
5718 gimple_set_location (call, loc);
5719 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5721 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5722 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5723 element_s, chunk, e_gwv);
5724 gimple_call_set_lhs (call, e_step);
5725 gimple_set_location (call, loc);
5726 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5728 /* Add test and split block. */
5729 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5730 stmt = gimple_build_cond_empty (expr);
5731 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5732 split = split_block (body_bb, stmt);
5733 elem_body_bb = split->dest;
5734 if (cont_bb == body_bb)
5735 cont_bb = elem_body_bb;
5736 body_bb = split->src;
5738 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5740 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5741 if (cont_bb == NULL)
5743 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5744 e->probability = profile_probability::even ();
5745 split->probability = profile_probability::even ();
5748 /* Initialize the user's loop vars. */
5749 gsi = gsi_start_bb (elem_body_bb);
5750 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5754 /* Loop increment goes into cont_bb. If this is not a loop, we
5755 will have spawned threads as if it was, and each one will
5756 execute one iteration. The specification is not explicit about
5757 whether such constructs are ill-formed or not, and they can
5758 occur, especially when noreturn routines are involved. */
5759 if (cont_bb)
5761 gsi = gsi_last_nondebug_bb (cont_bb);
5762 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5763 loc = gimple_location (cont_stmt);
5765 if (fd->tiling)
5767 /* Insert element loop increment and test. */
5768 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5769 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5770 true, GSI_SAME_STMT);
5771 ass = gimple_build_assign (e_offset, expr);
5772 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5773 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5775 stmt = gimple_build_cond_empty (expr);
5776 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5777 split = split_block (cont_bb, stmt);
5778 elem_cont_bb = split->src;
5779 cont_bb = split->dest;
5781 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5782 split->probability = profile_probability::unlikely ().guessed ();
5783 edge latch_edge
5784 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5785 latch_edge->probability = profile_probability::likely ().guessed ();
5787 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5788 skip_edge->probability = profile_probability::unlikely ().guessed ();
5789 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5790 loop_entry_edge->probability
5791 = profile_probability::likely ().guessed ();
5793 gsi = gsi_for_stmt (cont_stmt);
5796 /* Increment offset. */
5797 if (gimple_in_ssa_p (cfun))
5798 expr = build2 (plus_code, iter_type, offset,
5799 fold_convert (plus_type, step));
5800 else
5801 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5802 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5803 true, GSI_SAME_STMT);
5804 ass = gimple_build_assign (offset_incr, expr);
5805 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5806 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5807 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5809 /* Remove the GIMPLE_OMP_CONTINUE. */
5810 gsi_remove (&gsi, true);
5812 /* Fixup edges from cont_bb. */
5813 be = BRANCH_EDGE (cont_bb);
5814 fte = FALLTHRU_EDGE (cont_bb);
5815 be->flags |= EDGE_TRUE_VALUE;
5816 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5818 if (chunking)
5820 /* Split the beginning of exit_bb to make bottom_bb. We
5821 need to insert a nop at the start, because splitting is
5822 after a stmt, not before. */
5823 gsi = gsi_start_bb (exit_bb);
5824 stmt = gimple_build_nop ();
5825 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5826 split = split_block (exit_bb, stmt);
5827 bottom_bb = split->src;
5828 exit_bb = split->dest;
5829 gsi = gsi_last_bb (bottom_bb);
5831 /* Chunk increment and test goes into bottom_bb. */
5832 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5833 build_int_cst (diff_type, 1));
5834 ass = gimple_build_assign (chunk_no, expr);
5835 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5837 /* Chunk test at end of bottom_bb. */
5838 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5839 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5840 GSI_CONTINUE_LINKING);
5842 /* Fixup edges from bottom_bb. */
5843 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5844 split->probability = profile_probability::unlikely ().guessed ();
5845 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5846 latch_edge->probability = profile_probability::likely ().guessed ();
5850 gsi = gsi_last_nondebug_bb (exit_bb);
5851 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5852 loc = gimple_location (gsi_stmt (gsi));
5854 if (!gimple_in_ssa_p (cfun))
5856 /* Insert the final value of V, in case it is live. This is the
5857 value for the only thread that survives past the join. */
5858 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5859 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5860 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5861 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5862 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5863 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5864 true, GSI_SAME_STMT);
5865 ass = gimple_build_assign (v, expr);
5866 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5869 /* Remove the OMP_RETURN. */
5870 gsi_remove (&gsi, true);
5872 if (cont_bb)
5874 /* We now have one, two or three nested loops. Update the loop
5875 structures. */
5876 struct loop *parent = entry_bb->loop_father;
5877 struct loop *body = body_bb->loop_father;
5879 if (chunking)
5881 struct loop *chunk_loop = alloc_loop ();
5882 chunk_loop->header = head_bb;
5883 chunk_loop->latch = bottom_bb;
5884 add_loop (chunk_loop, parent);
5885 parent = chunk_loop;
5887 else if (parent != body)
5889 gcc_assert (body->header == body_bb);
5890 gcc_assert (body->latch == cont_bb
5891 || single_pred (body->latch) == cont_bb);
5892 parent = NULL;
5895 if (parent)
5897 struct loop *body_loop = alloc_loop ();
5898 body_loop->header = body_bb;
5899 body_loop->latch = cont_bb;
5900 add_loop (body_loop, parent);
5902 if (fd->tiling)
5904 /* Insert tiling's element loop. */
5905 struct loop *inner_loop = alloc_loop ();
5906 inner_loop->header = elem_body_bb;
5907 inner_loop->latch = elem_cont_bb;
5908 add_loop (inner_loop, body_loop);
5914 /* Expand the OMP loop defined by REGION. */
5916 static void
5917 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5919 struct omp_for_data fd;
5920 struct omp_for_data_loop *loops;
5922 loops
5923 = (struct omp_for_data_loop *)
5924 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5925 * sizeof (struct omp_for_data_loop));
5926 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5927 &fd, loops);
5928 region->sched_kind = fd.sched_kind;
5929 region->sched_modifiers = fd.sched_modifiers;
5931 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5932 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5933 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5934 if (region->cont)
5936 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5937 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5938 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5940 else
5941 /* If there isn't a continue then this is a degerate case where
5942 the introduction of abnormal edges during lowering will prevent
5943 original loops from being detected. Fix that up. */
5944 loops_state_set (LOOPS_NEED_FIXUP);
5946 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5947 expand_omp_simd (region, &fd);
5948 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5950 gcc_assert (!inner_stmt);
5951 expand_oacc_for (region, &fd);
5953 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5955 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5956 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5957 else
5958 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5960 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5961 && !fd.have_ordered)
5963 if (fd.chunk_size == NULL)
5964 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5965 else
5966 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5968 else
5970 int fn_index, start_ix, next_ix;
5971 unsigned HOST_WIDE_INT sched = 0;
5972 tree sched_arg = NULL_TREE;
5974 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5975 == GF_OMP_FOR_KIND_FOR);
5976 if (fd.chunk_size == NULL
5977 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5978 fd.chunk_size = integer_zero_node;
5979 switch (fd.sched_kind)
5981 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5982 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
5984 gcc_assert (!fd.have_ordered);
5985 fn_index = 6;
5986 sched = 4;
5988 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
5989 && !fd.have_ordered)
5990 fn_index = 7;
5991 else
5993 fn_index = 3;
5994 sched = (HOST_WIDE_INT_1U << 31);
5996 break;
5997 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5998 case OMP_CLAUSE_SCHEDULE_GUIDED:
5999 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6000 && !fd.have_ordered)
6002 fn_index = 3 + fd.sched_kind;
6003 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6004 break;
6006 fn_index = fd.sched_kind;
6007 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6008 sched += (HOST_WIDE_INT_1U << 31);
6009 break;
6010 case OMP_CLAUSE_SCHEDULE_STATIC:
6011 gcc_assert (fd.have_ordered);
6012 fn_index = 0;
6013 sched = (HOST_WIDE_INT_1U << 31) + 1;
6014 break;
6015 default:
6016 gcc_unreachable ();
6018 if (!fd.ordered)
6019 fn_index += fd.have_ordered * 8;
6020 if (fd.ordered)
6021 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6022 else
6023 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6024 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6025 if (fd.have_reductemp)
6027 if (fd.ordered)
6028 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6029 else if (fd.have_ordered)
6030 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6031 else
6032 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6033 sched_arg = build_int_cstu (long_integer_type_node, sched);
6034 if (!fd.chunk_size)
6035 fd.chunk_size = integer_zero_node;
6037 if (fd.iter_type == long_long_unsigned_type_node)
6039 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6040 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6041 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6042 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6044 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6045 (enum built_in_function) next_ix, sched_arg,
6046 inner_stmt);
6049 if (gimple_in_ssa_p (cfun))
6050 update_ssa (TODO_update_ssa_only_virtuals);
6053 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6055 v = GOMP_sections_start (n);
6057 switch (v)
6059 case 0:
6060 goto L2;
6061 case 1:
6062 section 1;
6063 goto L1;
6064 case 2:
6066 case n:
6068 default:
6069 abort ();
6072 v = GOMP_sections_next ();
6073 goto L0;
6075 reduction;
6077 If this is a combined parallel sections, replace the call to
6078 GOMP_sections_start with call to GOMP_sections_next. */
6080 static void
6081 expand_omp_sections (struct omp_region *region)
6083 tree t, u, vin = NULL, vmain, vnext, l2;
6084 unsigned len;
6085 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6086 gimple_stmt_iterator si, switch_si;
6087 gomp_sections *sections_stmt;
6088 gimple *stmt;
6089 gomp_continue *cont;
6090 edge_iterator ei;
6091 edge e;
6092 struct omp_region *inner;
6093 unsigned i, casei;
6094 bool exit_reachable = region->cont != NULL;
6096 gcc_assert (region->exit != NULL);
6097 entry_bb = region->entry;
6098 l0_bb = single_succ (entry_bb);
6099 l1_bb = region->cont;
6100 l2_bb = region->exit;
6101 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6102 l2 = gimple_block_label (l2_bb);
6103 else
6105 /* This can happen if there are reductions. */
6106 len = EDGE_COUNT (l0_bb->succs);
6107 gcc_assert (len > 0);
6108 e = EDGE_SUCC (l0_bb, len - 1);
6109 si = gsi_last_nondebug_bb (e->dest);
6110 l2 = NULL_TREE;
6111 if (gsi_end_p (si)
6112 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6113 l2 = gimple_block_label (e->dest);
6114 else
6115 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6117 si = gsi_last_nondebug_bb (e->dest);
6118 if (gsi_end_p (si)
6119 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6121 l2 = gimple_block_label (e->dest);
6122 break;
6126 if (exit_reachable)
6127 default_bb = create_empty_bb (l1_bb->prev_bb);
6128 else
6129 default_bb = create_empty_bb (l0_bb);
6131 /* We will build a switch() with enough cases for all the
6132 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6133 and a default case to abort if something goes wrong. */
6134 len = EDGE_COUNT (l0_bb->succs);
6136 /* Use vec::quick_push on label_vec throughout, since we know the size
6137 in advance. */
6138 auto_vec<tree> label_vec (len);
6140 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6141 GIMPLE_OMP_SECTIONS statement. */
6142 si = gsi_last_nondebug_bb (entry_bb);
6143 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6144 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6145 vin = gimple_omp_sections_control (sections_stmt);
6146 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6147 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6148 if (reductmp)
6150 tree reductions = OMP_CLAUSE_DECL (reductmp);
6151 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6152 gimple *g = SSA_NAME_DEF_STMT (reductions);
6153 reductions = gimple_assign_rhs1 (g);
6154 OMP_CLAUSE_DECL (reductmp) = reductions;
6155 gimple_stmt_iterator gsi = gsi_for_stmt (g);
6156 t = build_int_cst (unsigned_type_node, len - 1);
6157 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6158 stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
6159 gimple_call_set_lhs (stmt, vin);
6160 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6161 gsi_remove (&gsi, true);
6162 release_ssa_name (gimple_assign_lhs (g));
6164 else if (!is_combined_parallel (region))
6166 /* If we are not inside a combined parallel+sections region,
6167 call GOMP_sections_start. */
6168 t = build_int_cst (unsigned_type_node, len - 1);
6169 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6170 stmt = gimple_build_call (u, 1, t);
6172 else
6174 /* Otherwise, call GOMP_sections_next. */
6175 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6176 stmt = gimple_build_call (u, 0);
6178 if (!reductmp)
6180 gimple_call_set_lhs (stmt, vin);
6181 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6183 gsi_remove (&si, true);
6185 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6186 L0_BB. */
6187 switch_si = gsi_last_nondebug_bb (l0_bb);
6188 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6189 if (exit_reachable)
6191 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6192 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6193 vmain = gimple_omp_continue_control_use (cont);
6194 vnext = gimple_omp_continue_control_def (cont);
6196 else
6198 vmain = vin;
6199 vnext = NULL_TREE;
6202 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6203 label_vec.quick_push (t);
6204 i = 1;
6206 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6207 for (inner = region->inner, casei = 1;
6208 inner;
6209 inner = inner->next, i++, casei++)
6211 basic_block s_entry_bb, s_exit_bb;
6213 /* Skip optional reduction region. */
6214 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6216 --i;
6217 --casei;
6218 continue;
6221 s_entry_bb = inner->entry;
6222 s_exit_bb = inner->exit;
6224 t = gimple_block_label (s_entry_bb);
6225 u = build_int_cst (unsigned_type_node, casei);
6226 u = build_case_label (u, NULL, t);
6227 label_vec.quick_push (u);
6229 si = gsi_last_nondebug_bb (s_entry_bb);
6230 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6231 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6232 gsi_remove (&si, true);
6233 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6235 if (s_exit_bb == NULL)
6236 continue;
6238 si = gsi_last_nondebug_bb (s_exit_bb);
6239 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6240 gsi_remove (&si, true);
6242 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6245 /* Error handling code goes in DEFAULT_BB. */
6246 t = gimple_block_label (default_bb);
6247 u = build_case_label (NULL, NULL, t);
6248 make_edge (l0_bb, default_bb, 0);
6249 add_bb_to_loop (default_bb, current_loops->tree_root);
6251 stmt = gimple_build_switch (vmain, u, label_vec);
6252 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6253 gsi_remove (&switch_si, true);
6255 si = gsi_start_bb (default_bb);
6256 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6257 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6259 if (exit_reachable)
6261 tree bfn_decl;
6263 /* Code to get the next section goes in L1_BB. */
6264 si = gsi_last_nondebug_bb (l1_bb);
6265 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6267 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6268 stmt = gimple_build_call (bfn_decl, 0);
6269 gimple_call_set_lhs (stmt, vnext);
6270 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6271 gsi_remove (&si, true);
6273 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6276 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6277 si = gsi_last_nondebug_bb (l2_bb);
6278 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6279 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6280 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6281 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6282 else
6283 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6284 stmt = gimple_build_call (t, 0);
6285 if (gimple_omp_return_lhs (gsi_stmt (si)))
6286 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6287 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6288 gsi_remove (&si, true);
6290 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6293 /* Expand code for an OpenMP single directive. We've already expanded
6294 much of the code, here we simply place the GOMP_barrier call. */
6296 static void
6297 expand_omp_single (struct omp_region *region)
6299 basic_block entry_bb, exit_bb;
6300 gimple_stmt_iterator si;
6302 entry_bb = region->entry;
6303 exit_bb = region->exit;
6305 si = gsi_last_nondebug_bb (entry_bb);
6306 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6307 gsi_remove (&si, true);
6308 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6310 si = gsi_last_nondebug_bb (exit_bb);
6311 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6313 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6314 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6316 gsi_remove (&si, true);
6317 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6320 /* Generic expansion for OpenMP synchronization directives: master,
6321 ordered and critical. All we need to do here is remove the entry
6322 and exit markers for REGION. */
6324 static void
6325 expand_omp_synch (struct omp_region *region)
6327 basic_block entry_bb, exit_bb;
6328 gimple_stmt_iterator si;
6330 entry_bb = region->entry;
6331 exit_bb = region->exit;
6333 si = gsi_last_nondebug_bb (entry_bb);
6334 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6335 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6336 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6337 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6338 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6339 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6340 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6341 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6343 expand_omp_taskreg (region);
6344 return;
6346 gsi_remove (&si, true);
6347 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6349 if (exit_bb)
6351 si = gsi_last_nondebug_bb (exit_bb);
6352 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6353 gsi_remove (&si, true);
6354 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6358 /* Translate enum omp_memory_order to enum memmodel. The two enums
6359 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6360 is 0. */
6362 static enum memmodel
6363 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6365 switch (mo)
6367 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6368 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6369 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6370 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6371 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6372 default: gcc_unreachable ();
6376 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6377 operation as a normal volatile load. */
6379 static bool
6380 expand_omp_atomic_load (basic_block load_bb, tree addr,
6381 tree loaded_val, int index)
6383 enum built_in_function tmpbase;
6384 gimple_stmt_iterator gsi;
6385 basic_block store_bb;
6386 location_t loc;
6387 gimple *stmt;
6388 tree decl, call, type, itype;
6390 gsi = gsi_last_nondebug_bb (load_bb);
6391 stmt = gsi_stmt (gsi);
6392 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6393 loc = gimple_location (stmt);
6395 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6396 is smaller than word size, then expand_atomic_load assumes that the load
6397 is atomic. We could avoid the builtin entirely in this case. */
6399 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6400 decl = builtin_decl_explicit (tmpbase);
6401 if (decl == NULL_TREE)
6402 return false;
6404 type = TREE_TYPE (loaded_val);
6405 itype = TREE_TYPE (TREE_TYPE (decl));
6407 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6408 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6409 call = build_call_expr_loc (loc, decl, 2, addr, mo);
6410 if (!useless_type_conversion_p (type, itype))
6411 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6412 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6414 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6415 gsi_remove (&gsi, true);
6417 store_bb = single_succ (load_bb);
6418 gsi = gsi_last_nondebug_bb (store_bb);
6419 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6420 gsi_remove (&gsi, true);
6422 if (gimple_in_ssa_p (cfun))
6423 update_ssa (TODO_update_ssa_no_phi);
6425 return true;
6428 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6429 operation as a normal volatile store. */
6431 static bool
6432 expand_omp_atomic_store (basic_block load_bb, tree addr,
6433 tree loaded_val, tree stored_val, int index)
6435 enum built_in_function tmpbase;
6436 gimple_stmt_iterator gsi;
6437 basic_block store_bb = single_succ (load_bb);
6438 location_t loc;
6439 gimple *stmt;
6440 tree decl, call, type, itype;
6441 machine_mode imode;
6442 bool exchange;
6444 gsi = gsi_last_nondebug_bb (load_bb);
6445 stmt = gsi_stmt (gsi);
6446 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6448 /* If the load value is needed, then this isn't a store but an exchange. */
6449 exchange = gimple_omp_atomic_need_value_p (stmt);
6451 gsi = gsi_last_nondebug_bb (store_bb);
6452 stmt = gsi_stmt (gsi);
6453 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6454 loc = gimple_location (stmt);
6456 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6457 is smaller than word size, then expand_atomic_store assumes that the store
6458 is atomic. We could avoid the builtin entirely in this case. */
6460 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6461 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6462 decl = builtin_decl_explicit (tmpbase);
6463 if (decl == NULL_TREE)
6464 return false;
6466 type = TREE_TYPE (stored_val);
6468 /* Dig out the type of the function's second argument. */
6469 itype = TREE_TYPE (decl);
6470 itype = TYPE_ARG_TYPES (itype);
6471 itype = TREE_CHAIN (itype);
6472 itype = TREE_VALUE (itype);
6473 imode = TYPE_MODE (itype);
6475 if (exchange && !can_atomic_exchange_p (imode, true))
6476 return false;
6478 if (!useless_type_conversion_p (itype, type))
6479 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6480 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6481 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6482 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6483 if (exchange)
6485 if (!useless_type_conversion_p (type, itype))
6486 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6487 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6490 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6491 gsi_remove (&gsi, true);
6493 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6494 gsi = gsi_last_nondebug_bb (load_bb);
6495 gsi_remove (&gsi, true);
6497 if (gimple_in_ssa_p (cfun))
6498 update_ssa (TODO_update_ssa_no_phi);
6500 return true;
6503 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6504 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6505 size of the data type, and thus usable to find the index of the builtin
6506 decl. Returns false if the expression is not of the proper form. */
6508 static bool
6509 expand_omp_atomic_fetch_op (basic_block load_bb,
6510 tree addr, tree loaded_val,
6511 tree stored_val, int index)
6513 enum built_in_function oldbase, newbase, tmpbase;
6514 tree decl, itype, call;
6515 tree lhs, rhs;
6516 basic_block store_bb = single_succ (load_bb);
6517 gimple_stmt_iterator gsi;
6518 gimple *stmt;
6519 location_t loc;
6520 enum tree_code code;
6521 bool need_old, need_new;
6522 machine_mode imode;
6524 /* We expect to find the following sequences:
6526 load_bb:
6527 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6529 store_bb:
6530 val = tmp OP something; (or: something OP tmp)
6531 GIMPLE_OMP_STORE (val)
6533 ???FIXME: Allow a more flexible sequence.
6534 Perhaps use data flow to pick the statements.
6538 gsi = gsi_after_labels (store_bb);
6539 stmt = gsi_stmt (gsi);
6540 if (is_gimple_debug (stmt))
6542 gsi_next_nondebug (&gsi);
6543 if (gsi_end_p (gsi))
6544 return false;
6545 stmt = gsi_stmt (gsi);
6547 loc = gimple_location (stmt);
6548 if (!is_gimple_assign (stmt))
6549 return false;
6550 gsi_next_nondebug (&gsi);
6551 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6552 return false;
6553 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6554 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6555 enum omp_memory_order omo
6556 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6557 enum memmodel mo = omp_memory_order_to_memmodel (omo);
6558 gcc_checking_assert (!need_old || !need_new);
6560 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6561 return false;
6563 /* Check for one of the supported fetch-op operations. */
6564 code = gimple_assign_rhs_code (stmt);
6565 switch (code)
6567 case PLUS_EXPR:
6568 case POINTER_PLUS_EXPR:
6569 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6570 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6571 break;
6572 case MINUS_EXPR:
6573 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6574 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6575 break;
6576 case BIT_AND_EXPR:
6577 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6578 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6579 break;
6580 case BIT_IOR_EXPR:
6581 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6582 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6583 break;
6584 case BIT_XOR_EXPR:
6585 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6586 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6587 break;
6588 default:
6589 return false;
6592 /* Make sure the expression is of the proper form. */
6593 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6594 rhs = gimple_assign_rhs2 (stmt);
6595 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6596 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6597 rhs = gimple_assign_rhs1 (stmt);
6598 else
6599 return false;
6601 tmpbase = ((enum built_in_function)
6602 ((need_new ? newbase : oldbase) + index + 1));
6603 decl = builtin_decl_explicit (tmpbase);
6604 if (decl == NULL_TREE)
6605 return false;
6606 itype = TREE_TYPE (TREE_TYPE (decl));
6607 imode = TYPE_MODE (itype);
6609 /* We could test all of the various optabs involved, but the fact of the
6610 matter is that (with the exception of i486 vs i586 and xadd) all targets
6611 that support any atomic operaton optab also implements compare-and-swap.
6612 Let optabs.c take care of expanding any compare-and-swap loop. */
6613 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6614 return false;
6616 gsi = gsi_last_nondebug_bb (load_bb);
6617 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6619 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6620 It only requires that the operation happen atomically. Thus we can
6621 use the RELAXED memory model. */
6622 call = build_call_expr_loc (loc, decl, 3, addr,
6623 fold_convert_loc (loc, itype, rhs),
6624 build_int_cst (NULL, mo));
6626 if (need_old || need_new)
6628 lhs = need_old ? loaded_val : stored_val;
6629 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6630 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6632 else
6633 call = fold_convert_loc (loc, void_type_node, call);
6634 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6635 gsi_remove (&gsi, true);
6637 gsi = gsi_last_nondebug_bb (store_bb);
6638 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6639 gsi_remove (&gsi, true);
6640 gsi = gsi_last_nondebug_bb (store_bb);
6641 stmt = gsi_stmt (gsi);
6642 gsi_remove (&gsi, true);
6644 if (gimple_in_ssa_p (cfun))
6646 release_defs (stmt);
6647 update_ssa (TODO_update_ssa_no_phi);
6650 return true;
6653 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6655 oldval = *addr;
6656 repeat:
6657 newval = rhs; // with oldval replacing *addr in rhs
6658 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6659 if (oldval != newval)
6660 goto repeat;
6662 INDEX is log2 of the size of the data type, and thus usable to find the
6663 index of the builtin decl. */
6665 static bool
6666 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6667 tree addr, tree loaded_val, tree stored_val,
6668 int index)
6670 tree loadedi, storedi, initial, new_storedi, old_vali;
6671 tree type, itype, cmpxchg, iaddr, atype;
6672 gimple_stmt_iterator si;
6673 basic_block loop_header = single_succ (load_bb);
6674 gimple *phi, *stmt;
6675 edge e;
6676 enum built_in_function fncode;
6678 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6679 order to use the RELAXED memory model effectively. */
6680 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6681 + index + 1);
6682 cmpxchg = builtin_decl_explicit (fncode);
6683 if (cmpxchg == NULL_TREE)
6684 return false;
6685 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6686 atype = type;
6687 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6689 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6690 || !can_atomic_load_p (TYPE_MODE (itype)))
6691 return false;
6693 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6694 si = gsi_last_nondebug_bb (load_bb);
6695 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6697 /* For floating-point values, we'll need to view-convert them to integers
6698 so that we can perform the atomic compare and swap. Simplify the
6699 following code by always setting up the "i"ntegral variables. */
6700 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6702 tree iaddr_val;
6704 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6705 true));
6706 atype = itype;
6707 iaddr_val
6708 = force_gimple_operand_gsi (&si,
6709 fold_convert (TREE_TYPE (iaddr), addr),
6710 false, NULL_TREE, true, GSI_SAME_STMT);
6711 stmt = gimple_build_assign (iaddr, iaddr_val);
6712 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6713 loadedi = create_tmp_var (itype);
6714 if (gimple_in_ssa_p (cfun))
6715 loadedi = make_ssa_name (loadedi);
6717 else
6719 iaddr = addr;
6720 loadedi = loaded_val;
6723 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6724 tree loaddecl = builtin_decl_explicit (fncode);
6725 if (loaddecl)
6726 initial
6727 = fold_convert (atype,
6728 build_call_expr (loaddecl, 2, iaddr,
6729 build_int_cst (NULL_TREE,
6730 MEMMODEL_RELAXED)));
6731 else
6733 tree off
6734 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6735 true), 0);
6736 initial = build2 (MEM_REF, atype, iaddr, off);
6739 initial
6740 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6741 GSI_SAME_STMT);
6743 /* Move the value to the LOADEDI temporary. */
6744 if (gimple_in_ssa_p (cfun))
6746 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6747 phi = create_phi_node (loadedi, loop_header);
6748 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6749 initial);
6751 else
6752 gsi_insert_before (&si,
6753 gimple_build_assign (loadedi, initial),
6754 GSI_SAME_STMT);
6755 if (loadedi != loaded_val)
6757 gimple_stmt_iterator gsi2;
6758 tree x;
6760 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6761 gsi2 = gsi_start_bb (loop_header);
6762 if (gimple_in_ssa_p (cfun))
6764 gassign *stmt;
6765 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6766 true, GSI_SAME_STMT);
6767 stmt = gimple_build_assign (loaded_val, x);
6768 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6770 else
6772 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6773 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6774 true, GSI_SAME_STMT);
6777 gsi_remove (&si, true);
6779 si = gsi_last_nondebug_bb (store_bb);
6780 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6782 if (iaddr == addr)
6783 storedi = stored_val;
6784 else
6785 storedi
6786 = force_gimple_operand_gsi (&si,
6787 build1 (VIEW_CONVERT_EXPR, itype,
6788 stored_val), true, NULL_TREE, true,
6789 GSI_SAME_STMT);
6791 /* Build the compare&swap statement. */
6792 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6793 new_storedi = force_gimple_operand_gsi (&si,
6794 fold_convert (TREE_TYPE (loadedi),
6795 new_storedi),
6796 true, NULL_TREE,
6797 true, GSI_SAME_STMT);
6799 if (gimple_in_ssa_p (cfun))
6800 old_vali = loadedi;
6801 else
6803 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6804 stmt = gimple_build_assign (old_vali, loadedi);
6805 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6807 stmt = gimple_build_assign (loadedi, new_storedi);
6808 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6811 /* Note that we always perform the comparison as an integer, even for
6812 floating point. This allows the atomic operation to properly
6813 succeed even with NaNs and -0.0. */
6814 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6815 stmt = gimple_build_cond_empty (ne);
6816 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6818 /* Update cfg. */
6819 e = single_succ_edge (store_bb);
6820 e->flags &= ~EDGE_FALLTHRU;
6821 e->flags |= EDGE_FALSE_VALUE;
6822 /* Expect no looping. */
6823 e->probability = profile_probability::guessed_always ();
6825 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6826 e->probability = profile_probability::guessed_never ();
6828 /* Copy the new value to loadedi (we already did that before the condition
6829 if we are not in SSA). */
6830 if (gimple_in_ssa_p (cfun))
6832 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6833 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6836 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6837 gsi_remove (&si, true);
6839 struct loop *loop = alloc_loop ();
6840 loop->header = loop_header;
6841 loop->latch = store_bb;
6842 add_loop (loop, loop_header->loop_father);
6844 if (gimple_in_ssa_p (cfun))
6845 update_ssa (TODO_update_ssa_no_phi);
6847 return true;
6850 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6852 GOMP_atomic_start ();
6853 *addr = rhs;
6854 GOMP_atomic_end ();
6856 The result is not globally atomic, but works so long as all parallel
6857 references are within #pragma omp atomic directives. According to
6858 responses received from omp@openmp.org, appears to be within spec.
6859 Which makes sense, since that's how several other compilers handle
6860 this situation as well.
6861 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6862 expanding. STORED_VAL is the operand of the matching
6863 GIMPLE_OMP_ATOMIC_STORE.
6865 We replace
6866 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6867 loaded_val = *addr;
6869 and replace
6870 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6871 *addr = stored_val;
6874 static bool
6875 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6876 tree addr, tree loaded_val, tree stored_val)
6878 gimple_stmt_iterator si;
6879 gassign *stmt;
6880 tree t;
6882 si = gsi_last_nondebug_bb (load_bb);
6883 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6885 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6886 t = build_call_expr (t, 0);
6887 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6889 tree mem = build_simple_mem_ref (addr);
6890 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6891 TREE_OPERAND (mem, 1)
6892 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6893 true),
6894 TREE_OPERAND (mem, 1));
6895 stmt = gimple_build_assign (loaded_val, mem);
6896 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6897 gsi_remove (&si, true);
6899 si = gsi_last_nondebug_bb (store_bb);
6900 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6902 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6903 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6905 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6906 t = build_call_expr (t, 0);
6907 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6908 gsi_remove (&si, true);
6910 if (gimple_in_ssa_p (cfun))
6911 update_ssa (TODO_update_ssa_no_phi);
6912 return true;
6915 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6916 using expand_omp_atomic_fetch_op. If it failed, we try to
6917 call expand_omp_atomic_pipeline, and if it fails too, the
6918 ultimate fallback is wrapping the operation in a mutex
6919 (expand_omp_atomic_mutex). REGION is the atomic region built
6920 by build_omp_regions_1(). */
6922 static void
6923 expand_omp_atomic (struct omp_region *region)
6925 basic_block load_bb = region->entry, store_bb = region->exit;
6926 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6927 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6928 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6929 tree addr = gimple_omp_atomic_load_rhs (load);
6930 tree stored_val = gimple_omp_atomic_store_val (store);
6931 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6932 HOST_WIDE_INT index;
6934 /* Make sure the type is one of the supported sizes. */
6935 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6936 index = exact_log2 (index);
6937 if (index >= 0 && index <= 4)
6939 unsigned int align = TYPE_ALIGN_UNIT (type);
6941 /* __sync builtins require strict data alignment. */
6942 if (exact_log2 (align) >= index)
6944 /* Atomic load. */
6945 scalar_mode smode;
6946 if (loaded_val == stored_val
6947 && (is_int_mode (TYPE_MODE (type), &smode)
6948 || is_float_mode (TYPE_MODE (type), &smode))
6949 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6950 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6951 return;
6953 /* Atomic store. */
6954 if ((is_int_mode (TYPE_MODE (type), &smode)
6955 || is_float_mode (TYPE_MODE (type), &smode))
6956 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6957 && store_bb == single_succ (load_bb)
6958 && first_stmt (store_bb) == store
6959 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6960 stored_val, index))
6961 return;
6963 /* When possible, use specialized atomic update functions. */
6964 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6965 && store_bb == single_succ (load_bb)
6966 && expand_omp_atomic_fetch_op (load_bb, addr,
6967 loaded_val, stored_val, index))
6968 return;
6970 /* If we don't have specialized __sync builtins, try and implement
6971 as a compare and swap loop. */
6972 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6973 loaded_val, stored_val, index))
6974 return;
6978 /* The ultimate fallback is wrapping the operation in a mutex. */
6979 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6982 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6983 at REGION_EXIT. */
6985 static void
6986 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6987 basic_block region_exit)
6989 struct loop *outer = region_entry->loop_father;
6990 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6992 /* Don't parallelize the kernels region if it contains more than one outer
6993 loop. */
6994 unsigned int nr_outer_loops = 0;
6995 struct loop *single_outer = NULL;
6996 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6998 gcc_assert (loop_outer (loop) == outer);
7000 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7001 continue;
7003 if (region_exit != NULL
7004 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7005 continue;
7007 nr_outer_loops++;
7008 single_outer = loop;
7010 if (nr_outer_loops != 1)
7011 return;
7013 for (struct loop *loop = single_outer->inner;
7014 loop != NULL;
7015 loop = loop->inner)
7016 if (loop->next)
7017 return;
7019 /* Mark the loops in the region. */
7020 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7021 loop->in_oacc_kernels_region = true;
7024 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7026 struct GTY(()) grid_launch_attributes_trees
7028 tree kernel_dim_array_type;
7029 tree kernel_lattrs_dimnum_decl;
7030 tree kernel_lattrs_grid_decl;
7031 tree kernel_lattrs_group_decl;
7032 tree kernel_launch_attributes_type;
7035 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7037 /* Create types used to pass kernel launch attributes to target. */
7039 static void
7040 grid_create_kernel_launch_attr_types (void)
7042 if (grid_attr_trees)
7043 return;
7044 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7046 tree dim_arr_index_type
7047 = build_index_type (build_int_cst (integer_type_node, 2));
7048 grid_attr_trees->kernel_dim_array_type
7049 = build_array_type (uint32_type_node, dim_arr_index_type);
7051 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7052 grid_attr_trees->kernel_lattrs_dimnum_decl
7053 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7054 uint32_type_node);
7055 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7057 grid_attr_trees->kernel_lattrs_grid_decl
7058 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7059 grid_attr_trees->kernel_dim_array_type);
7060 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7061 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7062 grid_attr_trees->kernel_lattrs_group_decl
7063 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7064 grid_attr_trees->kernel_dim_array_type);
7065 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7066 = grid_attr_trees->kernel_lattrs_grid_decl;
7067 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7068 "__gomp_kernel_launch_attributes",
7069 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7072 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7073 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7074 of type uint32_type_node. */
7076 static void
7077 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7078 tree fld_decl, int index, tree value)
7080 tree ref = build4 (ARRAY_REF, uint32_type_node,
7081 build3 (COMPONENT_REF,
7082 grid_attr_trees->kernel_dim_array_type,
7083 range_var, fld_decl, NULL_TREE),
7084 build_int_cst (integer_type_node, index),
7085 NULL_TREE, NULL_TREE);
7086 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7089 /* Return a tree representation of a pointer to a structure with grid and
7090 work-group size information. Statements filling that information will be
7091 inserted before GSI, TGT_STMT is the target statement which has the
7092 necessary information in it. */
7094 static tree
7095 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7096 gomp_target *tgt_stmt)
7098 grid_create_kernel_launch_attr_types ();
7099 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7100 "__kernel_launch_attrs");
7102 unsigned max_dim = 0;
7103 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7104 clause;
7105 clause = OMP_CLAUSE_CHAIN (clause))
7107 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7108 continue;
7110 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7111 max_dim = MAX (dim, max_dim);
7113 grid_insert_store_range_dim (gsi, lattrs,
7114 grid_attr_trees->kernel_lattrs_grid_decl,
7115 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7116 grid_insert_store_range_dim (gsi, lattrs,
7117 grid_attr_trees->kernel_lattrs_group_decl,
7118 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7121 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7122 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7123 gcc_checking_assert (max_dim <= 2);
7124 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7125 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7126 GSI_SAME_STMT);
7127 TREE_ADDRESSABLE (lattrs) = 1;
7128 return build_fold_addr_expr (lattrs);
7131 /* Build target argument identifier from the DEVICE identifier, value
7132 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7134 static tree
7135 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7137 tree t = build_int_cst (integer_type_node, device);
7138 if (subseqent_param)
7139 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7140 build_int_cst (integer_type_node,
7141 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7142 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7143 build_int_cst (integer_type_node, id));
7144 return t;
7147 /* Like above but return it in type that can be directly stored as an element
7148 of the argument array. */
7150 static tree
7151 get_target_argument_identifier (int device, bool subseqent_param, int id)
7153 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7154 return fold_convert (ptr_type_node, t);
7157 /* Return a target argument consisting of DEVICE identifier, value identifier
7158 ID, and the actual VALUE. */
7160 static tree
7161 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7162 tree value)
7164 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7165 fold_convert (integer_type_node, value),
7166 build_int_cst (unsigned_type_node,
7167 GOMP_TARGET_ARG_VALUE_SHIFT));
7168 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7169 get_target_argument_identifier_1 (device, false, id));
7170 t = fold_convert (ptr_type_node, t);
7171 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7174 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7175 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7176 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7177 arguments. */
7179 static void
7180 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7181 int id, tree value, vec <tree> *args)
7183 if (tree_fits_shwi_p (value)
7184 && tree_to_shwi (value) > -(1 << 15)
7185 && tree_to_shwi (value) < (1 << 15))
7186 args->quick_push (get_target_argument_value (gsi, device, id, value));
7187 else
7189 args->quick_push (get_target_argument_identifier (device, true, id));
7190 value = fold_convert (ptr_type_node, value);
7191 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7192 GSI_SAME_STMT);
7193 args->quick_push (value);
7197 /* Create an array of arguments that is then passed to GOMP_target. */
7199 static tree
7200 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7202 auto_vec <tree, 6> args;
7203 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7204 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7205 if (c)
7206 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7207 else
7208 t = integer_minus_one_node;
7209 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7210 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7212 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7213 if (c)
7214 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7215 else
7216 t = integer_minus_one_node;
7217 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7218 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7219 &args);
7221 /* Add HSA-specific grid sizes, if available. */
7222 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7223 OMP_CLAUSE__GRIDDIM_))
7225 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7226 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7227 args.quick_push (t);
7228 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7231 /* Produce more, perhaps device specific, arguments here. */
7233 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7234 args.length () + 1),
7235 ".omp_target_args");
7236 for (unsigned i = 0; i < args.length (); i++)
7238 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7239 build_int_cst (integer_type_node, i),
7240 NULL_TREE, NULL_TREE);
7241 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7242 GSI_SAME_STMT);
7244 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7245 build_int_cst (integer_type_node, args.length ()),
7246 NULL_TREE, NULL_TREE);
7247 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7248 GSI_SAME_STMT);
7249 TREE_ADDRESSABLE (argarray) = 1;
7250 return build_fold_addr_expr (argarray);
7253 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7255 static void
7256 expand_omp_target (struct omp_region *region)
7258 basic_block entry_bb, exit_bb, new_bb;
7259 struct function *child_cfun;
7260 tree child_fn, block, t;
7261 gimple_stmt_iterator gsi;
7262 gomp_target *entry_stmt;
7263 gimple *stmt;
7264 edge e;
7265 bool offloaded, data_region;
7267 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7268 new_bb = region->entry;
7270 offloaded = is_gimple_omp_offloaded (entry_stmt);
7271 switch (gimple_omp_target_kind (entry_stmt))
7273 case GF_OMP_TARGET_KIND_REGION:
7274 case GF_OMP_TARGET_KIND_UPDATE:
7275 case GF_OMP_TARGET_KIND_ENTER_DATA:
7276 case GF_OMP_TARGET_KIND_EXIT_DATA:
7277 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7278 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7279 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7280 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7281 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7282 data_region = false;
7283 break;
7284 case GF_OMP_TARGET_KIND_DATA:
7285 case GF_OMP_TARGET_KIND_OACC_DATA:
7286 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7287 data_region = true;
7288 break;
7289 default:
7290 gcc_unreachable ();
7293 child_fn = NULL_TREE;
7294 child_cfun = NULL;
7295 if (offloaded)
7297 child_fn = gimple_omp_target_child_fn (entry_stmt);
7298 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7301 /* Supported by expand_omp_taskreg, but not here. */
7302 if (child_cfun != NULL)
7303 gcc_checking_assert (!child_cfun->cfg);
7304 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7306 entry_bb = region->entry;
7307 exit_bb = region->exit;
7309 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7311 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7313 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7314 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7315 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7316 DECL_ATTRIBUTES (child_fn)
7317 = tree_cons (get_identifier ("oacc kernels"),
7318 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7321 if (offloaded)
7323 unsigned srcidx, dstidx, num;
7325 /* If the offloading region needs data sent from the parent
7326 function, then the very first statement (except possible
7327 tree profile counter updates) of the offloading body
7328 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7329 &.OMP_DATA_O is passed as an argument to the child function,
7330 we need to replace it with the argument as seen by the child
7331 function.
7333 In most cases, this will end up being the identity assignment
7334 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7335 a function call that has been inlined, the original PARM_DECL
7336 .OMP_DATA_I may have been converted into a different local
7337 variable. In which case, we need to keep the assignment. */
7338 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7339 if (data_arg)
7341 basic_block entry_succ_bb = single_succ (entry_bb);
7342 gimple_stmt_iterator gsi;
7343 tree arg;
7344 gimple *tgtcopy_stmt = NULL;
7345 tree sender = TREE_VEC_ELT (data_arg, 0);
7347 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7349 gcc_assert (!gsi_end_p (gsi));
7350 stmt = gsi_stmt (gsi);
7351 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7352 continue;
7354 if (gimple_num_ops (stmt) == 2)
7356 tree arg = gimple_assign_rhs1 (stmt);
7358 /* We're ignoring the subcode because we're
7359 effectively doing a STRIP_NOPS. */
7361 if (TREE_CODE (arg) == ADDR_EXPR
7362 && TREE_OPERAND (arg, 0) == sender)
7364 tgtcopy_stmt = stmt;
7365 break;
7370 gcc_assert (tgtcopy_stmt != NULL);
7371 arg = DECL_ARGUMENTS (child_fn);
7373 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7374 gsi_remove (&gsi, true);
7377 /* Declare local variables needed in CHILD_CFUN. */
7378 block = DECL_INITIAL (child_fn);
7379 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7380 /* The gimplifier could record temporaries in the offloading block
7381 rather than in containing function's local_decls chain,
7382 which would mean cgraph missed finalizing them. Do it now. */
7383 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7384 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7385 varpool_node::finalize_decl (t);
7386 DECL_SAVED_TREE (child_fn) = NULL;
7387 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7388 gimple_set_body (child_fn, NULL);
7389 TREE_USED (block) = 1;
7391 /* Reset DECL_CONTEXT on function arguments. */
7392 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7393 DECL_CONTEXT (t) = child_fn;
7395 /* Split ENTRY_BB at GIMPLE_*,
7396 so that it can be moved to the child function. */
7397 gsi = gsi_last_nondebug_bb (entry_bb);
7398 stmt = gsi_stmt (gsi);
7399 gcc_assert (stmt
7400 && gimple_code (stmt) == gimple_code (entry_stmt));
7401 e = split_block (entry_bb, stmt);
7402 gsi_remove (&gsi, true);
7403 entry_bb = e->dest;
7404 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7406 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7407 if (exit_bb)
7409 gsi = gsi_last_nondebug_bb (exit_bb);
7410 gcc_assert (!gsi_end_p (gsi)
7411 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7412 stmt = gimple_build_return (NULL);
7413 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7414 gsi_remove (&gsi, true);
7417 /* Move the offloading region into CHILD_CFUN. */
7419 block = gimple_block (entry_stmt);
7421 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7422 if (exit_bb)
7423 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7424 /* When the OMP expansion process cannot guarantee an up-to-date
7425 loop tree arrange for the child function to fixup loops. */
7426 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7427 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7429 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7430 num = vec_safe_length (child_cfun->local_decls);
7431 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7433 t = (*child_cfun->local_decls)[srcidx];
7434 if (DECL_CONTEXT (t) == cfun->decl)
7435 continue;
7436 if (srcidx != dstidx)
7437 (*child_cfun->local_decls)[dstidx] = t;
7438 dstidx++;
7440 if (dstidx != num)
7441 vec_safe_truncate (child_cfun->local_decls, dstidx);
7443 /* Inform the callgraph about the new function. */
7444 child_cfun->curr_properties = cfun->curr_properties;
7445 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7446 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7447 cgraph_node *node = cgraph_node::get_create (child_fn);
7448 node->parallelized_function = 1;
7449 cgraph_node::add_new_function (child_fn, true);
7451 /* Add the new function to the offload table. */
7452 if (ENABLE_OFFLOADING)
7454 if (in_lto_p)
7455 DECL_PRESERVE_P (child_fn) = 1;
7456 vec_safe_push (offload_funcs, child_fn);
7459 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7460 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7462 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7463 fixed in a following pass. */
7464 push_cfun (child_cfun);
7465 if (need_asm)
7466 assign_assembler_name_if_needed (child_fn);
7467 cgraph_edge::rebuild_edges ();
7469 /* Some EH regions might become dead, see PR34608. If
7470 pass_cleanup_cfg isn't the first pass to happen with the
7471 new child, these dead EH edges might cause problems.
7472 Clean them up now. */
7473 if (flag_exceptions)
7475 basic_block bb;
7476 bool changed = false;
7478 FOR_EACH_BB_FN (bb, cfun)
7479 changed |= gimple_purge_dead_eh_edges (bb);
7480 if (changed)
7481 cleanup_tree_cfg ();
7483 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7484 verify_loop_structure ();
7485 pop_cfun ();
7487 if (dump_file && !gimple_in_ssa_p (cfun))
7489 omp_any_child_fn_dumped = true;
7490 dump_function_header (dump_file, child_fn, dump_flags);
7491 dump_function_to_file (child_fn, dump_file, dump_flags);
7494 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7497 /* Emit a library call to launch the offloading region, or do data
7498 transfers. */
7499 tree t1, t2, t3, t4, depend, c, clauses;
7500 enum built_in_function start_ix;
7501 unsigned int flags_i = 0;
7503 switch (gimple_omp_target_kind (entry_stmt))
7505 case GF_OMP_TARGET_KIND_REGION:
7506 start_ix = BUILT_IN_GOMP_TARGET;
7507 break;
7508 case GF_OMP_TARGET_KIND_DATA:
7509 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7510 break;
7511 case GF_OMP_TARGET_KIND_UPDATE:
7512 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7513 break;
7514 case GF_OMP_TARGET_KIND_ENTER_DATA:
7515 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7516 break;
7517 case GF_OMP_TARGET_KIND_EXIT_DATA:
7518 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7519 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7520 break;
7521 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7522 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7523 start_ix = BUILT_IN_GOACC_PARALLEL;
7524 break;
7525 case GF_OMP_TARGET_KIND_OACC_DATA:
7526 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7527 start_ix = BUILT_IN_GOACC_DATA_START;
7528 break;
7529 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7530 start_ix = BUILT_IN_GOACC_UPDATE;
7531 break;
7532 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7533 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7534 break;
7535 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7536 start_ix = BUILT_IN_GOACC_DECLARE;
7537 break;
7538 default:
7539 gcc_unreachable ();
7542 clauses = gimple_omp_target_clauses (entry_stmt);
7544 tree device = NULL_TREE;
7545 location_t device_loc = UNKNOWN_LOCATION;
7546 tree goacc_flags = NULL_TREE;
7547 if (is_gimple_omp_oacc (entry_stmt))
7549 /* By default, no GOACC_FLAGs are set. */
7550 goacc_flags = integer_zero_node;
7552 else
7554 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7555 if (c)
7557 device = OMP_CLAUSE_DEVICE_ID (c);
7558 device_loc = OMP_CLAUSE_LOCATION (c);
7560 else
7562 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7563 library choose). */
7564 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7565 device_loc = gimple_location (entry_stmt);
7568 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7569 if (c)
7570 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7573 /* By default, there is no conditional. */
7574 tree cond = NULL_TREE;
7575 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7576 if (c)
7577 cond = OMP_CLAUSE_IF_EXPR (c);
7578 /* If we found the clause 'if (cond)', build:
7579 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7580 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
7581 if (cond)
7583 tree *tp;
7584 if (is_gimple_omp_oacc (entry_stmt))
7585 tp = &goacc_flags;
7586 else
7588 /* Ensure 'device' is of the correct type. */
7589 device = fold_convert_loc (device_loc, integer_type_node, device);
7591 tp = &device;
7594 cond = gimple_boolify (cond);
7596 basic_block cond_bb, then_bb, else_bb;
7597 edge e;
7598 tree tmp_var;
7600 tmp_var = create_tmp_var (TREE_TYPE (*tp));
7601 if (offloaded)
7602 e = split_block_after_labels (new_bb);
7603 else
7605 gsi = gsi_last_nondebug_bb (new_bb);
7606 gsi_prev (&gsi);
7607 e = split_block (new_bb, gsi_stmt (gsi));
7609 cond_bb = e->src;
7610 new_bb = e->dest;
7611 remove_edge (e);
7613 then_bb = create_empty_bb (cond_bb);
7614 else_bb = create_empty_bb (then_bb);
7615 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7616 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7618 stmt = gimple_build_cond_empty (cond);
7619 gsi = gsi_last_bb (cond_bb);
7620 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7622 gsi = gsi_start_bb (then_bb);
7623 stmt = gimple_build_assign (tmp_var, *tp);
7624 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7626 gsi = gsi_start_bb (else_bb);
7627 if (is_gimple_omp_oacc (entry_stmt))
7628 stmt = gimple_build_assign (tmp_var,
7629 BIT_IOR_EXPR,
7630 *tp,
7631 build_int_cst (integer_type_node,
7632 GOACC_FLAG_HOST_FALLBACK));
7633 else
7634 stmt = gimple_build_assign (tmp_var,
7635 build_int_cst (integer_type_node,
7636 GOMP_DEVICE_HOST_FALLBACK));
7637 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7639 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7640 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7641 add_bb_to_loop (then_bb, cond_bb->loop_father);
7642 add_bb_to_loop (else_bb, cond_bb->loop_father);
7643 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7644 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7646 *tp = tmp_var;
7648 gsi = gsi_last_nondebug_bb (new_bb);
7650 else
7652 gsi = gsi_last_nondebug_bb (new_bb);
7654 if (device != NULL_TREE)
7655 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7656 true, GSI_SAME_STMT);
7659 t = gimple_omp_target_data_arg (entry_stmt);
7660 if (t == NULL)
7662 t1 = size_zero_node;
7663 t2 = build_zero_cst (ptr_type_node);
7664 t3 = t2;
7665 t4 = t2;
7667 else
7669 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7670 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7671 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7672 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7673 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7676 gimple *g;
7677 bool tagging = false;
7678 /* The maximum number used by any start_ix, without varargs. */
7679 auto_vec<tree, 11> args;
7680 if (is_gimple_omp_oacc (entry_stmt))
7682 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
7683 TREE_TYPE (goacc_flags), goacc_flags);
7684 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
7685 NULL_TREE, true,
7686 GSI_SAME_STMT);
7687 args.quick_push (goacc_flags_m);
7689 else
7690 args.quick_push (device);
7691 if (offloaded)
7692 args.quick_push (build_fold_addr_expr (child_fn));
7693 args.quick_push (t1);
7694 args.quick_push (t2);
7695 args.quick_push (t3);
7696 args.quick_push (t4);
7697 switch (start_ix)
7699 case BUILT_IN_GOACC_DATA_START:
7700 case BUILT_IN_GOACC_DECLARE:
7701 case BUILT_IN_GOMP_TARGET_DATA:
7702 break;
7703 case BUILT_IN_GOMP_TARGET:
7704 case BUILT_IN_GOMP_TARGET_UPDATE:
7705 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7706 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7707 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7708 if (c)
7709 depend = OMP_CLAUSE_DECL (c);
7710 else
7711 depend = build_int_cst (ptr_type_node, 0);
7712 args.quick_push (depend);
7713 if (start_ix == BUILT_IN_GOMP_TARGET)
7714 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7715 break;
7716 case BUILT_IN_GOACC_PARALLEL:
7717 oacc_set_fn_attrib (child_fn, clauses, &args);
7718 tagging = true;
7719 /* FALLTHRU */
7720 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7721 case BUILT_IN_GOACC_UPDATE:
7723 tree t_async = NULL_TREE;
7725 /* If present, use the value specified by the respective
7726 clause, making sure that is of the correct type. */
7727 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7728 if (c)
7729 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7730 integer_type_node,
7731 OMP_CLAUSE_ASYNC_EXPR (c));
7732 else if (!tagging)
7733 /* Default values for t_async. */
7734 t_async = fold_convert_loc (gimple_location (entry_stmt),
7735 integer_type_node,
7736 build_int_cst (integer_type_node,
7737 GOMP_ASYNC_SYNC));
7738 if (tagging && t_async)
7740 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7742 if (TREE_CODE (t_async) == INTEGER_CST)
7744 /* See if we can pack the async arg in to the tag's
7745 operand. */
7746 i_async = TREE_INT_CST_LOW (t_async);
7747 if (i_async < GOMP_LAUNCH_OP_MAX)
7748 t_async = NULL_TREE;
7749 else
7750 i_async = GOMP_LAUNCH_OP_MAX;
7752 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7753 i_async));
7755 if (t_async)
7756 args.safe_push (t_async);
7758 /* Save the argument index, and ... */
7759 unsigned t_wait_idx = args.length ();
7760 unsigned num_waits = 0;
7761 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7762 if (!tagging || c)
7763 /* ... push a placeholder. */
7764 args.safe_push (integer_zero_node);
7766 for (; c; c = OMP_CLAUSE_CHAIN (c))
7767 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7769 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7770 integer_type_node,
7771 OMP_CLAUSE_WAIT_EXPR (c)));
7772 num_waits++;
7775 if (!tagging || num_waits)
7777 tree len;
7779 /* Now that we know the number, update the placeholder. */
7780 if (tagging)
7781 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7782 else
7783 len = build_int_cst (integer_type_node, num_waits);
7784 len = fold_convert_loc (gimple_location (entry_stmt),
7785 unsigned_type_node, len);
7786 args[t_wait_idx] = len;
7789 break;
7790 default:
7791 gcc_unreachable ();
7793 if (tagging)
7794 /* Push terminal marker - zero. */
7795 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7797 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7798 gimple_set_location (g, gimple_location (entry_stmt));
7799 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7800 if (!offloaded)
7802 g = gsi_stmt (gsi);
7803 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7804 gsi_remove (&gsi, true);
7806 if (data_region && region->exit)
7808 gsi = gsi_last_nondebug_bb (region->exit);
7809 g = gsi_stmt (gsi);
7810 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7811 gsi_remove (&gsi, true);
7815 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7816 iteration variable derived from the thread number. INTRA_GROUP means this
7817 is an expansion of a loop iterating over work-items within a separate
7818 iteration over groups. */
7820 static void
7821 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7823 gimple_stmt_iterator gsi;
7824 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7825 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7826 == GF_OMP_FOR_KIND_GRID_LOOP);
7827 size_t collapse = gimple_omp_for_collapse (for_stmt);
7828 struct omp_for_data_loop *loops
7829 = XALLOCAVEC (struct omp_for_data_loop,
7830 gimple_omp_for_collapse (for_stmt));
7831 struct omp_for_data fd;
7833 remove_edge (BRANCH_EDGE (kfor->entry));
7834 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7836 gcc_assert (kfor->cont);
7837 omp_extract_for_data (for_stmt, &fd, loops);
7839 gsi = gsi_start_bb (body_bb);
7841 for (size_t dim = 0; dim < collapse; dim++)
7843 tree type, itype;
7844 itype = type = TREE_TYPE (fd.loops[dim].v);
7845 if (POINTER_TYPE_P (type))
7846 itype = signed_type_for (type);
7848 tree n1 = fd.loops[dim].n1;
7849 tree step = fd.loops[dim].step;
7850 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7851 true, NULL_TREE, true, GSI_SAME_STMT);
7852 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7853 true, NULL_TREE, true, GSI_SAME_STMT);
7854 tree threadid;
7855 if (gimple_omp_for_grid_group_iter (for_stmt))
7857 gcc_checking_assert (!intra_group);
7858 threadid = build_call_expr (builtin_decl_explicit
7859 (BUILT_IN_HSA_WORKGROUPID), 1,
7860 build_int_cstu (unsigned_type_node, dim));
7862 else if (intra_group)
7863 threadid = build_call_expr (builtin_decl_explicit
7864 (BUILT_IN_HSA_WORKITEMID), 1,
7865 build_int_cstu (unsigned_type_node, dim));
7866 else
7867 threadid = build_call_expr (builtin_decl_explicit
7868 (BUILT_IN_HSA_WORKITEMABSID), 1,
7869 build_int_cstu (unsigned_type_node, dim));
7870 threadid = fold_convert (itype, threadid);
7871 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7872 true, GSI_SAME_STMT);
7874 tree startvar = fd.loops[dim].v;
7875 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7876 if (POINTER_TYPE_P (type))
7877 t = fold_build_pointer_plus (n1, t);
7878 else
7879 t = fold_build2 (PLUS_EXPR, type, t, n1);
7880 t = fold_convert (type, t);
7881 t = force_gimple_operand_gsi (&gsi, t,
7882 DECL_P (startvar)
7883 && TREE_ADDRESSABLE (startvar),
7884 NULL_TREE, true, GSI_SAME_STMT);
7885 gassign *assign_stmt = gimple_build_assign (startvar, t);
7886 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7888 /* Remove the omp for statement. */
7889 gsi = gsi_last_nondebug_bb (kfor->entry);
7890 gsi_remove (&gsi, true);
7892 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7893 gsi = gsi_last_nondebug_bb (kfor->cont);
7894 gcc_assert (!gsi_end_p (gsi)
7895 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7896 gsi_remove (&gsi, true);
7898 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7899 gsi = gsi_last_nondebug_bb (kfor->exit);
7900 gcc_assert (!gsi_end_p (gsi)
7901 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7902 if (intra_group)
7903 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7904 gsi_remove (&gsi, true);
7906 /* Fixup the much simpler CFG. */
7907 remove_edge (find_edge (kfor->cont, body_bb));
7909 if (kfor->cont != body_bb)
7910 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7911 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7914 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7915 argument_decls. */
7917 struct grid_arg_decl_map
7919 tree old_arg;
7920 tree new_arg;
7923 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7924 pertaining to kernel function. */
7926 static tree
7927 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7929 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7930 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7931 tree t = *tp;
7933 if (t == adm->old_arg)
7934 *tp = adm->new_arg;
7935 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7936 return NULL_TREE;
7939 /* If TARGET region contains a kernel body for loop, remove its region from the
7940 TARGET and expand it in HSA gridified kernel fashion. */
7942 static void
7943 grid_expand_target_grid_body (struct omp_region *target)
7945 if (!hsa_gen_requested_p ())
7946 return;
7948 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7949 struct omp_region **pp;
7951 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7952 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7953 break;
7955 struct omp_region *gpukernel = *pp;
7957 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7958 if (!gpukernel)
7960 /* HSA cannot handle OACC stuff. */
7961 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7962 return;
7963 gcc_checking_assert (orig_child_fndecl);
7964 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7965 OMP_CLAUSE__GRIDDIM_));
7966 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7968 hsa_register_kernel (n);
7969 return;
7972 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7973 OMP_CLAUSE__GRIDDIM_));
7974 tree inside_block
7975 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7976 *pp = gpukernel->next;
7977 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7978 if ((*pp)->type == GIMPLE_OMP_FOR)
7979 break;
7981 struct omp_region *kfor = *pp;
7982 gcc_assert (kfor);
7983 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7984 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7985 *pp = kfor->next;
7986 if (kfor->inner)
7988 if (gimple_omp_for_grid_group_iter (for_stmt))
7990 struct omp_region **next_pp;
7991 for (pp = &kfor->inner; *pp; pp = next_pp)
7993 next_pp = &(*pp)->next;
7994 if ((*pp)->type != GIMPLE_OMP_FOR)
7995 continue;
7996 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7997 gcc_assert (gimple_omp_for_kind (inner)
7998 == GF_OMP_FOR_KIND_GRID_LOOP);
7999 grid_expand_omp_for_loop (*pp, true);
8000 *pp = (*pp)->next;
8001 next_pp = pp;
8004 expand_omp (kfor->inner);
8006 if (gpukernel->inner)
8007 expand_omp (gpukernel->inner);
8009 tree kern_fndecl = copy_node (orig_child_fndecl);
8010 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8011 "kernel");
8012 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8013 tree tgtblock = gimple_block (tgt_stmt);
8014 tree fniniblock = make_node (BLOCK);
8015 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8016 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8017 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8018 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8019 DECL_INITIAL (kern_fndecl) = fniniblock;
8020 push_struct_function (kern_fndecl);
8021 cfun->function_end_locus = gimple_location (tgt_stmt);
8022 init_tree_ssa (cfun);
8023 pop_cfun ();
8025 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8026 gcc_assert (!DECL_CHAIN (old_parm_decl));
8027 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8028 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8029 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8030 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8031 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8032 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8033 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8034 kern_cfun->curr_properties = cfun->curr_properties;
8036 grid_expand_omp_for_loop (kfor, false);
8038 /* Remove the omp for statement. */
8039 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8040 gsi_remove (&gsi, true);
8041 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8042 return. */
8043 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8044 gcc_assert (!gsi_end_p (gsi)
8045 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8046 gimple *ret_stmt = gimple_build_return (NULL);
8047 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8048 gsi_remove (&gsi, true);
8050 /* Statements in the first BB in the target construct have been produced by
8051 target lowering and must be copied inside the GPUKERNEL, with the two
8052 exceptions of the first OMP statement and the OMP_DATA assignment
8053 statement. */
8054 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8055 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8056 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8057 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8058 !gsi_end_p (tsi); gsi_next (&tsi))
8060 gimple *stmt = gsi_stmt (tsi);
8061 if (is_gimple_omp (stmt))
8062 break;
8063 if (sender
8064 && is_gimple_assign (stmt)
8065 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8066 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8067 continue;
8068 gimple *copy = gimple_copy (stmt);
8069 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8070 gimple_set_block (copy, fniniblock);
8073 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8074 gpukernel->exit, inside_block);
8076 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8077 kcn->mark_force_output ();
8078 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8080 hsa_register_kernel (kcn, orig_child);
8082 cgraph_node::add_new_function (kern_fndecl, true);
8083 push_cfun (kern_cfun);
8084 cgraph_edge::rebuild_edges ();
8086 /* Re-map any mention of the PARM_DECL of the original function to the
8087 PARM_DECL of the new one.
8089 TODO: It would be great if lowering produced references into the GPU
8090 kernel decl straight away and we did not have to do this. */
8091 struct grid_arg_decl_map adm;
8092 adm.old_arg = old_parm_decl;
8093 adm.new_arg = new_parm_decl;
8094 basic_block bb;
8095 FOR_EACH_BB_FN (bb, kern_cfun)
8097 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8099 gimple *stmt = gsi_stmt (gsi);
8100 struct walk_stmt_info wi;
8101 memset (&wi, 0, sizeof (wi));
8102 wi.info = &adm;
8103 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8106 pop_cfun ();
8108 return;
8111 /* Expand the parallel region tree rooted at REGION. Expansion
8112 proceeds in depth-first order. Innermost regions are expanded
8113 first. This way, parallel regions that require a new function to
8114 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8115 internal dependencies in their body. */
8117 static void
8118 expand_omp (struct omp_region *region)
8120 omp_any_child_fn_dumped = false;
8121 while (region)
8123 location_t saved_location;
8124 gimple *inner_stmt = NULL;
8126 /* First, determine whether this is a combined parallel+workshare
8127 region. */
8128 if (region->type == GIMPLE_OMP_PARALLEL)
8129 determine_parallel_type (region);
8130 else if (region->type == GIMPLE_OMP_TARGET)
8131 grid_expand_target_grid_body (region);
8133 if (region->type == GIMPLE_OMP_FOR
8134 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8135 inner_stmt = last_stmt (region->inner->entry);
8137 if (region->inner)
8138 expand_omp (region->inner);
8140 saved_location = input_location;
8141 if (gimple_has_location (last_stmt (region->entry)))
8142 input_location = gimple_location (last_stmt (region->entry));
8144 switch (region->type)
8146 case GIMPLE_OMP_PARALLEL:
8147 case GIMPLE_OMP_TASK:
8148 expand_omp_taskreg (region);
8149 break;
8151 case GIMPLE_OMP_FOR:
8152 expand_omp_for (region, inner_stmt);
8153 break;
8155 case GIMPLE_OMP_SECTIONS:
8156 expand_omp_sections (region);
8157 break;
8159 case GIMPLE_OMP_SECTION:
8160 /* Individual omp sections are handled together with their
8161 parent GIMPLE_OMP_SECTIONS region. */
8162 break;
8164 case GIMPLE_OMP_SINGLE:
8165 expand_omp_single (region);
8166 break;
8168 case GIMPLE_OMP_ORDERED:
8170 gomp_ordered *ord_stmt
8171 = as_a <gomp_ordered *> (last_stmt (region->entry));
8172 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8173 OMP_CLAUSE_DEPEND))
8175 /* We'll expand these when expanding corresponding
8176 worksharing region with ordered(n) clause. */
8177 gcc_assert (region->outer
8178 && region->outer->type == GIMPLE_OMP_FOR);
8179 region->ord_stmt = ord_stmt;
8180 break;
8183 /* FALLTHRU */
8184 case GIMPLE_OMP_MASTER:
8185 case GIMPLE_OMP_TASKGROUP:
8186 case GIMPLE_OMP_CRITICAL:
8187 case GIMPLE_OMP_TEAMS:
8188 expand_omp_synch (region);
8189 break;
8191 case GIMPLE_OMP_ATOMIC_LOAD:
8192 expand_omp_atomic (region);
8193 break;
8195 case GIMPLE_OMP_TARGET:
8196 expand_omp_target (region);
8197 break;
8199 default:
8200 gcc_unreachable ();
8203 input_location = saved_location;
8204 region = region->next;
8206 if (omp_any_child_fn_dumped)
8208 if (dump_file)
8209 dump_function_header (dump_file, current_function_decl, dump_flags);
8210 omp_any_child_fn_dumped = false;
8214 /* Helper for build_omp_regions. Scan the dominator tree starting at
8215 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8216 true, the function ends once a single tree is built (otherwise, whole
8217 forest of OMP constructs may be built). */
8219 static void
8220 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8221 bool single_tree)
8223 gimple_stmt_iterator gsi;
8224 gimple *stmt;
8225 basic_block son;
8227 gsi = gsi_last_nondebug_bb (bb);
8228 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8230 struct omp_region *region;
8231 enum gimple_code code;
8233 stmt = gsi_stmt (gsi);
8234 code = gimple_code (stmt);
8235 if (code == GIMPLE_OMP_RETURN)
8237 /* STMT is the return point out of region PARENT. Mark it
8238 as the exit point and make PARENT the immediately
8239 enclosing region. */
8240 gcc_assert (parent);
8241 region = parent;
8242 region->exit = bb;
8243 parent = parent->outer;
8245 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8247 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8248 GIMPLE_OMP_RETURN, but matches with
8249 GIMPLE_OMP_ATOMIC_LOAD. */
8250 gcc_assert (parent);
8251 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8252 region = parent;
8253 region->exit = bb;
8254 parent = parent->outer;
8256 else if (code == GIMPLE_OMP_CONTINUE)
8258 gcc_assert (parent);
8259 parent->cont = bb;
8261 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8263 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8264 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8266 else
8268 region = new_omp_region (bb, code, parent);
8269 /* Otherwise... */
8270 if (code == GIMPLE_OMP_TARGET)
8272 switch (gimple_omp_target_kind (stmt))
8274 case GF_OMP_TARGET_KIND_REGION:
8275 case GF_OMP_TARGET_KIND_DATA:
8276 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8277 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8278 case GF_OMP_TARGET_KIND_OACC_DATA:
8279 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8280 break;
8281 case GF_OMP_TARGET_KIND_UPDATE:
8282 case GF_OMP_TARGET_KIND_ENTER_DATA:
8283 case GF_OMP_TARGET_KIND_EXIT_DATA:
8284 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8285 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8286 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8287 /* ..., other than for those stand-alone directives... */
8288 region = NULL;
8289 break;
8290 default:
8291 gcc_unreachable ();
8294 else if (code == GIMPLE_OMP_ORDERED
8295 && omp_find_clause (gimple_omp_ordered_clauses
8296 (as_a <gomp_ordered *> (stmt)),
8297 OMP_CLAUSE_DEPEND))
8298 /* #pragma omp ordered depend is also just a stand-alone
8299 directive. */
8300 region = NULL;
8301 else if (code == GIMPLE_OMP_TASK
8302 && gimple_omp_task_taskwait_p (stmt))
8303 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8304 region = NULL;
8305 /* ..., this directive becomes the parent for a new region. */
8306 if (region)
8307 parent = region;
8311 if (single_tree && !parent)
8312 return;
8314 for (son = first_dom_son (CDI_DOMINATORS, bb);
8315 son;
8316 son = next_dom_son (CDI_DOMINATORS, son))
8317 build_omp_regions_1 (son, parent, single_tree);
8320 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8321 root_omp_region. */
8323 static void
8324 build_omp_regions_root (basic_block root)
8326 gcc_assert (root_omp_region == NULL);
8327 build_omp_regions_1 (root, NULL, true);
8328 gcc_assert (root_omp_region != NULL);
8331 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8333 void
8334 omp_expand_local (basic_block head)
8336 build_omp_regions_root (head);
8337 if (dump_file && (dump_flags & TDF_DETAILS))
8339 fprintf (dump_file, "\nOMP region tree\n\n");
8340 dump_omp_region (dump_file, root_omp_region, 0);
8341 fprintf (dump_file, "\n");
8344 remove_exit_barriers (root_omp_region);
8345 expand_omp (root_omp_region);
8347 omp_free_regions ();
8350 /* Scan the CFG and build a tree of OMP regions. Return the root of
8351 the OMP region tree. */
8353 static void
8354 build_omp_regions (void)
8356 gcc_assert (root_omp_region == NULL);
8357 calculate_dominance_info (CDI_DOMINATORS);
8358 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8361 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8363 static unsigned int
8364 execute_expand_omp (void)
8366 build_omp_regions ();
8368 if (!root_omp_region)
8369 return 0;
8371 if (dump_file)
8373 fprintf (dump_file, "\nOMP region tree\n\n");
8374 dump_omp_region (dump_file, root_omp_region, 0);
8375 fprintf (dump_file, "\n");
8378 remove_exit_barriers (root_omp_region);
8380 expand_omp (root_omp_region);
8382 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8383 verify_loop_structure ();
8384 cleanup_tree_cfg ();
8386 omp_free_regions ();
8388 return 0;
8391 /* OMP expansion -- the default pass, run before creation of SSA form. */
8393 namespace {
8395 const pass_data pass_data_expand_omp =
8397 GIMPLE_PASS, /* type */
8398 "ompexp", /* name */
8399 OPTGROUP_OMP, /* optinfo_flags */
8400 TV_NONE, /* tv_id */
8401 PROP_gimple_any, /* properties_required */
8402 PROP_gimple_eomp, /* properties_provided */
8403 0, /* properties_destroyed */
8404 0, /* todo_flags_start */
8405 0, /* todo_flags_finish */
8408 class pass_expand_omp : public gimple_opt_pass
8410 public:
8411 pass_expand_omp (gcc::context *ctxt)
8412 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8415 /* opt_pass methods: */
8416 virtual unsigned int execute (function *)
8418 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8419 || flag_openmp_simd != 0)
8420 && !seen_error ());
8422 /* This pass always runs, to provide PROP_gimple_eomp.
8423 But often, there is nothing to do. */
8424 if (!gate)
8425 return 0;
8427 return execute_expand_omp ();
8430 }; // class pass_expand_omp
8432 } // anon namespace
8434 gimple_opt_pass *
8435 make_pass_expand_omp (gcc::context *ctxt)
8437 return new pass_expand_omp (ctxt);
8440 namespace {
8442 const pass_data pass_data_expand_omp_ssa =
8444 GIMPLE_PASS, /* type */
8445 "ompexpssa", /* name */
8446 OPTGROUP_OMP, /* optinfo_flags */
8447 TV_NONE, /* tv_id */
8448 PROP_cfg | PROP_ssa, /* properties_required */
8449 PROP_gimple_eomp, /* properties_provided */
8450 0, /* properties_destroyed */
8451 0, /* todo_flags_start */
8452 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8455 class pass_expand_omp_ssa : public gimple_opt_pass
8457 public:
8458 pass_expand_omp_ssa (gcc::context *ctxt)
8459 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8462 /* opt_pass methods: */
8463 virtual bool gate (function *fun)
8465 return !(fun->curr_properties & PROP_gimple_eomp);
8467 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8468 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8470 }; // class pass_expand_omp_ssa
8472 } // anon namespace
8474 gimple_opt_pass *
8475 make_pass_expand_omp_ssa (gcc::context *ctxt)
8477 return new pass_expand_omp_ssa (ctxt);
8480 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8481 GIMPLE_* codes. */
8483 bool
8484 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8485 int *region_idx)
8487 gimple *last = last_stmt (bb);
8488 enum gimple_code code = gimple_code (last);
8489 struct omp_region *cur_region = *region;
8490 bool fallthru = false;
8492 switch (code)
8494 case GIMPLE_OMP_PARALLEL:
8495 case GIMPLE_OMP_FOR:
8496 case GIMPLE_OMP_SINGLE:
8497 case GIMPLE_OMP_TEAMS:
8498 case GIMPLE_OMP_MASTER:
8499 case GIMPLE_OMP_TASKGROUP:
8500 case GIMPLE_OMP_CRITICAL:
8501 case GIMPLE_OMP_SECTION:
8502 case GIMPLE_OMP_GRID_BODY:
8503 cur_region = new_omp_region (bb, code, cur_region);
8504 fallthru = true;
8505 break;
8507 case GIMPLE_OMP_TASK:
8508 cur_region = new_omp_region (bb, code, cur_region);
8509 fallthru = true;
8510 if (gimple_omp_task_taskwait_p (last))
8511 cur_region = cur_region->outer;
8512 break;
8514 case GIMPLE_OMP_ORDERED:
8515 cur_region = new_omp_region (bb, code, cur_region);
8516 fallthru = true;
8517 if (omp_find_clause (gimple_omp_ordered_clauses
8518 (as_a <gomp_ordered *> (last)),
8519 OMP_CLAUSE_DEPEND))
8520 cur_region = cur_region->outer;
8521 break;
8523 case GIMPLE_OMP_TARGET:
8524 cur_region = new_omp_region (bb, code, cur_region);
8525 fallthru = true;
8526 switch (gimple_omp_target_kind (last))
8528 case GF_OMP_TARGET_KIND_REGION:
8529 case GF_OMP_TARGET_KIND_DATA:
8530 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8531 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8532 case GF_OMP_TARGET_KIND_OACC_DATA:
8533 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8534 break;
8535 case GF_OMP_TARGET_KIND_UPDATE:
8536 case GF_OMP_TARGET_KIND_ENTER_DATA:
8537 case GF_OMP_TARGET_KIND_EXIT_DATA:
8538 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8539 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8540 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8541 cur_region = cur_region->outer;
8542 break;
8543 default:
8544 gcc_unreachable ();
8546 break;
8548 case GIMPLE_OMP_SECTIONS:
8549 cur_region = new_omp_region (bb, code, cur_region);
8550 fallthru = true;
8551 break;
8553 case GIMPLE_OMP_SECTIONS_SWITCH:
8554 fallthru = false;
8555 break;
8557 case GIMPLE_OMP_ATOMIC_LOAD:
8558 case GIMPLE_OMP_ATOMIC_STORE:
8559 fallthru = true;
8560 break;
8562 case GIMPLE_OMP_RETURN:
8563 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8564 somewhere other than the next block. This will be
8565 created later. */
8566 cur_region->exit = bb;
8567 if (cur_region->type == GIMPLE_OMP_TASK)
8568 /* Add an edge corresponding to not scheduling the task
8569 immediately. */
8570 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8571 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8572 cur_region = cur_region->outer;
8573 break;
8575 case GIMPLE_OMP_CONTINUE:
8576 cur_region->cont = bb;
8577 switch (cur_region->type)
8579 case GIMPLE_OMP_FOR:
8580 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8581 succs edges as abnormal to prevent splitting
8582 them. */
8583 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8584 /* Make the loopback edge. */
8585 make_edge (bb, single_succ (cur_region->entry),
8586 EDGE_ABNORMAL);
8588 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8589 corresponds to the case that the body of the loop
8590 is not executed at all. */
8591 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8592 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8593 fallthru = false;
8594 break;
8596 case GIMPLE_OMP_SECTIONS:
8597 /* Wire up the edges into and out of the nested sections. */
8599 basic_block switch_bb = single_succ (cur_region->entry);
8601 struct omp_region *i;
8602 for (i = cur_region->inner; i ; i = i->next)
8604 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8605 make_edge (switch_bb, i->entry, 0);
8606 make_edge (i->exit, bb, EDGE_FALLTHRU);
8609 /* Make the loopback edge to the block with
8610 GIMPLE_OMP_SECTIONS_SWITCH. */
8611 make_edge (bb, switch_bb, 0);
8613 /* Make the edge from the switch to exit. */
8614 make_edge (switch_bb, bb->next_bb, 0);
8615 fallthru = false;
8617 break;
8619 case GIMPLE_OMP_TASK:
8620 fallthru = true;
8621 break;
8623 default:
8624 gcc_unreachable ();
8626 break;
8628 default:
8629 gcc_unreachable ();
8632 if (*region != cur_region)
8634 *region = cur_region;
8635 if (cur_region)
8636 *region_idx = cur_region->entry->index;
8637 else
8638 *region_idx = 0;
8641 return fallthru;
8644 #include "gt-omp-expand.h"