match_asm_constraints: Use copy_rtx where needed (PR88001)
[official-gcc.git] / gcc / omp-expand.c
blob76c09c5883b758d0348a510a7c49feef9deca03d
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
66 struct omp_region
68 /* The enclosing region. */
69 struct omp_region *outer;
71 /* First child region. */
72 struct omp_region *inner;
74 /* Next peer region. */
75 struct omp_region *next;
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
116 /* Return true if REGION is a combined parallel+workshare region. */
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
121 return region->is_combined_parallel;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
138 Is lowered into:
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
197 return true;
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
206 if (!simd_schedule || integer_zerop (chunk_size))
207 return chunk_size;
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
211 return chunk_size;
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
233 struct omp_for_data fd;
234 tree n1, n2;
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
240 if (gimple_omp_for_combined_into_p (for_stmt))
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
264 if (fd.chunk_size)
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
271 return ws_args;
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
285 gcc_unreachable ();
288 /* Discover whether REGION is a combined parallel+workshare region. */
290 static void
291 determine_parallel_type (struct omp_region *region)
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
331 if (region->inner->type == GIMPLE_OMP_FOR)
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
349 return;
351 else if (region->inner->type == GIMPLE_OMP_SECTIONS
352 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
353 OMP_CLAUSE__REDUCTEMP_))
354 return;
356 region->is_combined_parallel = true;
357 region->inner->is_combined_parallel = true;
358 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
362 /* Debugging dumps for parallel regions. */
363 void dump_omp_region (FILE *, struct omp_region *, int);
364 void debug_omp_region (struct omp_region *);
365 void debug_all_omp_regions (void);
367 /* Dump the parallel region tree rooted at REGION. */
369 void
370 dump_omp_region (FILE *file, struct omp_region *region, int indent)
372 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
373 gimple_code_name[region->type]);
375 if (region->inner)
376 dump_omp_region (file, region->inner, indent + 4);
378 if (region->cont)
380 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
381 region->cont->index);
384 if (region->exit)
385 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
386 region->exit->index);
387 else
388 fprintf (file, "%*s[no exit marker]\n", indent, "");
390 if (region->next)
391 dump_omp_region (file, region->next, indent);
394 DEBUG_FUNCTION void
395 debug_omp_region (struct omp_region *region)
397 dump_omp_region (stderr, region, 0);
400 DEBUG_FUNCTION void
401 debug_all_omp_regions (void)
403 dump_omp_region (stderr, root_omp_region, 0);
406 /* Create a new parallel region starting at STMT inside region PARENT. */
408 static struct omp_region *
409 new_omp_region (basic_block bb, enum gimple_code type,
410 struct omp_region *parent)
412 struct omp_region *region = XCNEW (struct omp_region);
414 region->outer = parent;
415 region->entry = bb;
416 region->type = type;
418 if (parent)
420 /* This is a nested region. Add it to the list of inner
421 regions in PARENT. */
422 region->next = parent->inner;
423 parent->inner = region;
425 else
427 /* This is a toplevel region. Add it to the list of toplevel
428 regions in ROOT_OMP_REGION. */
429 region->next = root_omp_region;
430 root_omp_region = region;
433 return region;
436 /* Release the memory associated with the region tree rooted at REGION. */
438 static void
439 free_omp_region_1 (struct omp_region *region)
441 struct omp_region *i, *n;
443 for (i = region->inner; i ; i = n)
445 n = i->next;
446 free_omp_region_1 (i);
449 free (region);
452 /* Release the memory for the entire omp region tree. */
454 void
455 omp_free_regions (void)
457 struct omp_region *r, *n;
458 for (r = root_omp_region; r ; r = n)
460 n = r->next;
461 free_omp_region_1 (r);
463 root_omp_region = NULL;
466 /* A convenience function to build an empty GIMPLE_COND with just the
467 condition. */
469 static gcond *
470 gimple_build_cond_empty (tree cond)
472 enum tree_code pred_code;
473 tree lhs, rhs;
475 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
476 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
479 /* Return true if a parallel REGION is within a declare target function or
480 within a target region and is not a part of a gridified target. */
482 static bool
483 parallel_needs_hsa_kernel_p (struct omp_region *region)
485 bool indirect = false;
486 for (region = region->outer; region; region = region->outer)
488 if (region->type == GIMPLE_OMP_PARALLEL)
489 indirect = true;
490 else if (region->type == GIMPLE_OMP_TARGET)
492 gomp_target *tgt_stmt
493 = as_a <gomp_target *> (last_stmt (region->entry));
495 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
496 OMP_CLAUSE__GRIDDIM_))
497 return indirect;
498 else
499 return true;
503 if (lookup_attribute ("omp declare target",
504 DECL_ATTRIBUTES (current_function_decl)))
505 return true;
507 return false;
510 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
511 Add CHILD_FNDECL to decl chain of the supercontext of the block
512 ENTRY_BLOCK - this is the block which originally contained the
513 code from which CHILD_FNDECL was created.
515 Together, these actions ensure that the debug info for the outlined
516 function will be emitted with the correct lexical scope. */
518 static void
519 adjust_context_and_scope (struct omp_region *region, tree entry_block,
520 tree child_fndecl)
522 tree parent_fndecl = NULL_TREE;
523 gimple *entry_stmt;
524 /* OMP expansion expands inner regions before outer ones, so if
525 we e.g. have explicit task region nested in parallel region, when
526 expanding the task region current_function_decl will be the original
527 source function, but we actually want to use as context the child
528 function of the parallel. */
529 for (region = region->outer;
530 region && parent_fndecl == NULL_TREE; region = region->outer)
531 switch (region->type)
533 case GIMPLE_OMP_PARALLEL:
534 case GIMPLE_OMP_TASK:
535 case GIMPLE_OMP_TEAMS:
536 entry_stmt = last_stmt (region->entry);
537 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
538 break;
539 case GIMPLE_OMP_TARGET:
540 entry_stmt = last_stmt (region->entry);
541 parent_fndecl
542 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
543 break;
544 default:
545 break;
548 if (parent_fndecl == NULL_TREE)
549 parent_fndecl = current_function_decl;
550 DECL_CONTEXT (child_fndecl) = parent_fndecl;
552 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
554 tree b = BLOCK_SUPERCONTEXT (entry_block);
555 if (TREE_CODE (b) == BLOCK)
557 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
558 BLOCK_VARS (b) = child_fndecl;
563 /* Build the function calls to GOMP_parallel etc to actually
564 generate the parallel operation. REGION is the parallel region
565 being expanded. BB is the block where to insert the code. WS_ARGS
566 will be set if this is a call to a combined parallel+workshare
567 construct, it contains the list of additional arguments needed by
568 the workshare construct. */
570 static void
571 expand_parallel_call (struct omp_region *region, basic_block bb,
572 gomp_parallel *entry_stmt,
573 vec<tree, va_gc> *ws_args)
575 tree t, t1, t2, val, cond, c, clauses, flags;
576 gimple_stmt_iterator gsi;
577 gimple *stmt;
578 enum built_in_function start_ix;
579 int start_ix2;
580 location_t clause_loc;
581 vec<tree, va_gc> *args;
583 clauses = gimple_omp_parallel_clauses (entry_stmt);
585 /* Determine what flavor of GOMP_parallel we will be
586 emitting. */
587 start_ix = BUILT_IN_GOMP_PARALLEL;
588 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
589 if (rtmp)
590 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
591 else if (is_combined_parallel (region))
593 switch (region->inner->type)
595 case GIMPLE_OMP_FOR:
596 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
597 switch (region->inner->sched_kind)
599 case OMP_CLAUSE_SCHEDULE_RUNTIME:
600 if ((region->inner->sched_modifiers
601 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
602 start_ix2 = 6;
603 else if ((region->inner->sched_modifiers
604 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
605 start_ix2 = 7;
606 else
607 start_ix2 = 3;
608 break;
609 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
610 case OMP_CLAUSE_SCHEDULE_GUIDED:
611 if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
614 start_ix2 = 3 + region->inner->sched_kind;
615 break;
617 /* FALLTHRU */
618 default:
619 start_ix2 = region->inner->sched_kind;
620 break;
622 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
623 start_ix = (enum built_in_function) start_ix2;
624 break;
625 case GIMPLE_OMP_SECTIONS:
626 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
627 break;
628 default:
629 gcc_unreachable ();
633 /* By default, the value of NUM_THREADS is zero (selected at run time)
634 and there is no conditional. */
635 cond = NULL_TREE;
636 val = build_int_cst (unsigned_type_node, 0);
637 flags = build_int_cst (unsigned_type_node, 0);
639 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
640 if (c)
641 cond = OMP_CLAUSE_IF_EXPR (c);
643 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
644 if (c)
646 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
647 clause_loc = OMP_CLAUSE_LOCATION (c);
649 else
650 clause_loc = gimple_location (entry_stmt);
652 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
653 if (c)
654 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
656 /* Ensure 'val' is of the correct type. */
657 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
659 /* If we found the clause 'if (cond)', build either
660 (cond != 0) or (cond ? val : 1u). */
661 if (cond)
663 cond = gimple_boolify (cond);
665 if (integer_zerop (val))
666 val = fold_build2_loc (clause_loc,
667 EQ_EXPR, unsigned_type_node, cond,
668 build_int_cst (TREE_TYPE (cond), 0));
669 else
671 basic_block cond_bb, then_bb, else_bb;
672 edge e, e_then, e_else;
673 tree tmp_then, tmp_else, tmp_join, tmp_var;
675 tmp_var = create_tmp_var (TREE_TYPE (val));
676 if (gimple_in_ssa_p (cfun))
678 tmp_then = make_ssa_name (tmp_var);
679 tmp_else = make_ssa_name (tmp_var);
680 tmp_join = make_ssa_name (tmp_var);
682 else
684 tmp_then = tmp_var;
685 tmp_else = tmp_var;
686 tmp_join = tmp_var;
689 e = split_block_after_labels (bb);
690 cond_bb = e->src;
691 bb = e->dest;
692 remove_edge (e);
694 then_bb = create_empty_bb (cond_bb);
695 else_bb = create_empty_bb (then_bb);
696 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
697 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
699 stmt = gimple_build_cond_empty (cond);
700 gsi = gsi_start_bb (cond_bb);
701 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
703 gsi = gsi_start_bb (then_bb);
704 expand_omp_build_assign (&gsi, tmp_then, val, true);
706 gsi = gsi_start_bb (else_bb);
707 expand_omp_build_assign (&gsi, tmp_else,
708 build_int_cst (unsigned_type_node, 1),
709 true);
711 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
712 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
713 add_bb_to_loop (then_bb, cond_bb->loop_father);
714 add_bb_to_loop (else_bb, cond_bb->loop_father);
715 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
716 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
718 if (gimple_in_ssa_p (cfun))
720 gphi *phi = create_phi_node (tmp_join, bb);
721 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
722 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
725 val = tmp_join;
728 gsi = gsi_start_bb (bb);
729 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
730 false, GSI_CONTINUE_LINKING);
733 gsi = gsi_last_nondebug_bb (bb);
734 t = gimple_omp_parallel_data_arg (entry_stmt);
735 if (t == NULL)
736 t1 = null_pointer_node;
737 else
738 t1 = build_fold_addr_expr (t);
739 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
740 t2 = build_fold_addr_expr (child_fndecl);
742 vec_alloc (args, 4 + vec_safe_length (ws_args));
743 args->quick_push (t2);
744 args->quick_push (t1);
745 args->quick_push (val);
746 if (ws_args)
747 args->splice (*ws_args);
748 args->quick_push (flags);
750 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
751 builtin_decl_explicit (start_ix), args);
753 if (rtmp)
755 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
756 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
757 fold_convert (type,
758 fold_convert (pointer_sized_int_node, t)));
760 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
761 false, GSI_CONTINUE_LINKING);
763 if (hsa_gen_requested_p ()
764 && parallel_needs_hsa_kernel_p (region))
766 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
767 hsa_register_kernel (child_cnode);
771 /* Build the function call to GOMP_task to actually
772 generate the task operation. BB is the block where to insert the code. */
774 static void
775 expand_task_call (struct omp_region *region, basic_block bb,
776 gomp_task *entry_stmt)
778 tree t1, t2, t3;
779 gimple_stmt_iterator gsi;
780 location_t loc = gimple_location (entry_stmt);
782 tree clauses = gimple_omp_task_clauses (entry_stmt);
784 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
785 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
786 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
787 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
788 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
789 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
791 unsigned int iflags
792 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
793 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
794 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
796 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
797 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
798 tree num_tasks = NULL_TREE;
799 bool ull = false;
800 if (taskloop_p)
802 gimple *g = last_stmt (region->outer->entry);
803 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
804 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
805 struct omp_for_data fd;
806 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
807 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
808 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
809 OMP_CLAUSE__LOOPTEMP_);
810 startvar = OMP_CLAUSE_DECL (startvar);
811 endvar = OMP_CLAUSE_DECL (endvar);
812 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
813 if (fd.loop.cond_code == LT_EXPR)
814 iflags |= GOMP_TASK_FLAG_UP;
815 tree tclauses = gimple_omp_for_clauses (g);
816 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
817 if (num_tasks)
818 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
819 else
821 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
822 if (num_tasks)
824 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
825 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
827 else
828 num_tasks = integer_zero_node;
830 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
831 if (ifc == NULL_TREE)
832 iflags |= GOMP_TASK_FLAG_IF;
833 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
834 iflags |= GOMP_TASK_FLAG_NOGROUP;
835 ull = fd.iter_type == long_long_unsigned_type_node;
836 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
837 iflags |= GOMP_TASK_FLAG_REDUCTION;
839 else if (priority)
840 iflags |= GOMP_TASK_FLAG_PRIORITY;
842 tree flags = build_int_cst (unsigned_type_node, iflags);
844 tree cond = boolean_true_node;
845 if (ifc)
847 if (taskloop_p)
849 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
850 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
851 build_int_cst (unsigned_type_node,
852 GOMP_TASK_FLAG_IF),
853 build_int_cst (unsigned_type_node, 0));
854 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
855 flags, t);
857 else
858 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
861 if (finalc)
863 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
864 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
865 build_int_cst (unsigned_type_node,
866 GOMP_TASK_FLAG_FINAL),
867 build_int_cst (unsigned_type_node, 0));
868 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
870 if (depend)
871 depend = OMP_CLAUSE_DECL (depend);
872 else
873 depend = build_int_cst (ptr_type_node, 0);
874 if (priority)
875 priority = fold_convert (integer_type_node,
876 OMP_CLAUSE_PRIORITY_EXPR (priority));
877 else
878 priority = integer_zero_node;
880 gsi = gsi_last_nondebug_bb (bb);
881 tree t = gimple_omp_task_data_arg (entry_stmt);
882 if (t == NULL)
883 t2 = null_pointer_node;
884 else
885 t2 = build_fold_addr_expr_loc (loc, t);
886 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
887 t = gimple_omp_task_copy_fn (entry_stmt);
888 if (t == NULL)
889 t3 = null_pointer_node;
890 else
891 t3 = build_fold_addr_expr_loc (loc, t);
893 if (taskloop_p)
894 t = build_call_expr (ull
895 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
896 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
897 11, t1, t2, t3,
898 gimple_omp_task_arg_size (entry_stmt),
899 gimple_omp_task_arg_align (entry_stmt), flags,
900 num_tasks, priority, startvar, endvar, step);
901 else
902 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
903 9, t1, t2, t3,
904 gimple_omp_task_arg_size (entry_stmt),
905 gimple_omp_task_arg_align (entry_stmt), cond, flags,
906 depend, priority);
908 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
909 false, GSI_CONTINUE_LINKING);
912 /* Build the function call to GOMP_taskwait_depend to actually
913 generate the taskwait operation. BB is the block where to insert the
914 code. */
916 static void
917 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
919 tree clauses = gimple_omp_task_clauses (entry_stmt);
920 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
921 if (depend == NULL_TREE)
922 return;
924 depend = OMP_CLAUSE_DECL (depend);
926 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
927 tree t
928 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
929 1, depend);
931 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
932 false, GSI_CONTINUE_LINKING);
935 /* Build the function call to GOMP_teams_reg to actually
936 generate the host teams operation. REGION is the teams region
937 being expanded. BB is the block where to insert the code. */
939 static void
940 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
942 tree clauses = gimple_omp_teams_clauses (entry_stmt);
943 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
944 if (num_teams == NULL_TREE)
945 num_teams = build_int_cst (unsigned_type_node, 0);
946 else
948 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
949 num_teams = fold_convert (unsigned_type_node, num_teams);
951 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
952 if (thread_limit == NULL_TREE)
953 thread_limit = build_int_cst (unsigned_type_node, 0);
954 else
956 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
957 thread_limit = fold_convert (unsigned_type_node, thread_limit);
960 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
961 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
962 if (t == NULL)
963 t1 = null_pointer_node;
964 else
965 t1 = build_fold_addr_expr (t);
966 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
967 tree t2 = build_fold_addr_expr (child_fndecl);
969 vec<tree, va_gc> *args;
970 vec_alloc (args, 5);
971 args->quick_push (t2);
972 args->quick_push (t1);
973 args->quick_push (num_teams);
974 args->quick_push (thread_limit);
975 /* For future extensibility. */
976 args->quick_push (build_zero_cst (unsigned_type_node));
978 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
979 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
980 args);
982 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
983 false, GSI_CONTINUE_LINKING);
986 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
988 static tree
989 vec2chain (vec<tree, va_gc> *v)
991 tree chain = NULL_TREE, t;
992 unsigned ix;
994 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
996 DECL_CHAIN (t) = chain;
997 chain = t;
1000 return chain;
1003 /* Remove barriers in REGION->EXIT's block. Note that this is only
1004 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1005 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1006 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1007 removed. */
1009 static void
1010 remove_exit_barrier (struct omp_region *region)
1012 gimple_stmt_iterator gsi;
1013 basic_block exit_bb;
1014 edge_iterator ei;
1015 edge e;
1016 gimple *stmt;
1017 int any_addressable_vars = -1;
1019 exit_bb = region->exit;
1021 /* If the parallel region doesn't return, we don't have REGION->EXIT
1022 block at all. */
1023 if (! exit_bb)
1024 return;
1026 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1027 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1028 statements that can appear in between are extremely limited -- no
1029 memory operations at all. Here, we allow nothing at all, so the
1030 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1031 gsi = gsi_last_nondebug_bb (exit_bb);
1032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1033 gsi_prev_nondebug (&gsi);
1034 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1035 return;
1037 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1039 gsi = gsi_last_nondebug_bb (e->src);
1040 if (gsi_end_p (gsi))
1041 continue;
1042 stmt = gsi_stmt (gsi);
1043 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1044 && !gimple_omp_return_nowait_p (stmt))
1046 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1047 in many cases. If there could be tasks queued, the barrier
1048 might be needed to let the tasks run before some local
1049 variable of the parallel that the task uses as shared
1050 runs out of scope. The task can be spawned either
1051 from within current function (this would be easy to check)
1052 or from some function it calls and gets passed an address
1053 of such a variable. */
1054 if (any_addressable_vars < 0)
1056 gomp_parallel *parallel_stmt
1057 = as_a <gomp_parallel *> (last_stmt (region->entry));
1058 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1059 tree local_decls, block, decl;
1060 unsigned ix;
1062 any_addressable_vars = 0;
1063 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1064 if (TREE_ADDRESSABLE (decl))
1066 any_addressable_vars = 1;
1067 break;
1069 for (block = gimple_block (stmt);
1070 !any_addressable_vars
1071 && block
1072 && TREE_CODE (block) == BLOCK;
1073 block = BLOCK_SUPERCONTEXT (block))
1075 for (local_decls = BLOCK_VARS (block);
1076 local_decls;
1077 local_decls = DECL_CHAIN (local_decls))
1078 if (TREE_ADDRESSABLE (local_decls))
1080 any_addressable_vars = 1;
1081 break;
1083 if (block == gimple_block (parallel_stmt))
1084 break;
1087 if (!any_addressable_vars)
1088 gimple_omp_return_set_nowait (stmt);
1093 static void
1094 remove_exit_barriers (struct omp_region *region)
1096 if (region->type == GIMPLE_OMP_PARALLEL)
1097 remove_exit_barrier (region);
1099 if (region->inner)
1101 region = region->inner;
1102 remove_exit_barriers (region);
1103 while (region->next)
1105 region = region->next;
1106 remove_exit_barriers (region);
1111 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1112 calls. These can't be declared as const functions, but
1113 within one parallel body they are constant, so they can be
1114 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1115 which are declared const. Similarly for task body, except
1116 that in untied task omp_get_thread_num () can change at any task
1117 scheduling point. */
1119 static void
1120 optimize_omp_library_calls (gimple *entry_stmt)
1122 basic_block bb;
1123 gimple_stmt_iterator gsi;
1124 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1125 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1126 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1127 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1128 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1129 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1130 OMP_CLAUSE_UNTIED) != NULL);
1132 FOR_EACH_BB_FN (bb, cfun)
1133 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1135 gimple *call = gsi_stmt (gsi);
1136 tree decl;
1138 if (is_gimple_call (call)
1139 && (decl = gimple_call_fndecl (call))
1140 && DECL_EXTERNAL (decl)
1141 && TREE_PUBLIC (decl)
1142 && DECL_INITIAL (decl) == NULL)
1144 tree built_in;
1146 if (DECL_NAME (decl) == thr_num_id)
1148 /* In #pragma omp task untied omp_get_thread_num () can change
1149 during the execution of the task region. */
1150 if (untied_task)
1151 continue;
1152 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1154 else if (DECL_NAME (decl) == num_thr_id)
1155 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1156 else
1157 continue;
1159 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1160 || gimple_call_num_args (call) != 0)
1161 continue;
1163 if (flag_exceptions && !TREE_NOTHROW (decl))
1164 continue;
1166 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1167 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1168 TREE_TYPE (TREE_TYPE (built_in))))
1169 continue;
1171 gimple_call_set_fndecl (call, built_in);
1176 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1177 regimplified. */
1179 static tree
1180 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1182 tree t = *tp;
1184 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1185 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1186 return t;
1188 if (TREE_CODE (t) == ADDR_EXPR)
1189 recompute_tree_invariant_for_addr_expr (t);
1191 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1192 return NULL_TREE;
1195 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1197 static void
1198 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1199 bool after)
1201 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1202 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1203 !after, after ? GSI_CONTINUE_LINKING
1204 : GSI_SAME_STMT);
1205 gimple *stmt = gimple_build_assign (to, from);
1206 if (after)
1207 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1208 else
1209 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1210 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1211 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1213 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1214 gimple_regimplify_operands (stmt, &gsi);
1218 /* Expand the OpenMP parallel or task directive starting at REGION. */
1220 static void
1221 expand_omp_taskreg (struct omp_region *region)
1223 basic_block entry_bb, exit_bb, new_bb;
1224 struct function *child_cfun;
1225 tree child_fn, block, t;
1226 gimple_stmt_iterator gsi;
1227 gimple *entry_stmt, *stmt;
1228 edge e;
1229 vec<tree, va_gc> *ws_args;
1231 entry_stmt = last_stmt (region->entry);
1232 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1233 && gimple_omp_task_taskwait_p (entry_stmt))
1235 new_bb = region->entry;
1236 gsi = gsi_last_nondebug_bb (region->entry);
1237 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1238 gsi_remove (&gsi, true);
1239 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1240 return;
1243 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1244 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1246 entry_bb = region->entry;
1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1248 exit_bb = region->cont;
1249 else
1250 exit_bb = region->exit;
1252 if (is_combined_parallel (region))
1253 ws_args = region->ws_args;
1254 else
1255 ws_args = NULL;
1257 if (child_cfun->cfg)
1259 /* Due to inlining, it may happen that we have already outlined
1260 the region, in which case all we need to do is make the
1261 sub-graph unreachable and emit the parallel call. */
1262 edge entry_succ_e, exit_succ_e;
1264 entry_succ_e = single_succ_edge (entry_bb);
1266 gsi = gsi_last_nondebug_bb (entry_bb);
1267 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1268 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1269 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1270 gsi_remove (&gsi, true);
1272 new_bb = entry_bb;
1273 if (exit_bb)
1275 exit_succ_e = single_succ_edge (exit_bb);
1276 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1278 remove_edge_and_dominated_blocks (entry_succ_e);
1280 else
1282 unsigned srcidx, dstidx, num;
1284 /* If the parallel region needs data sent from the parent
1285 function, then the very first statement (except possible
1286 tree profile counter updates) of the parallel body
1287 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1288 &.OMP_DATA_O is passed as an argument to the child function,
1289 we need to replace it with the argument as seen by the child
1290 function.
1292 In most cases, this will end up being the identity assignment
1293 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1294 a function call that has been inlined, the original PARM_DECL
1295 .OMP_DATA_I may have been converted into a different local
1296 variable. In which case, we need to keep the assignment. */
1297 if (gimple_omp_taskreg_data_arg (entry_stmt))
1299 basic_block entry_succ_bb
1300 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1301 : FALLTHRU_EDGE (entry_bb)->dest;
1302 tree arg;
1303 gimple *parcopy_stmt = NULL;
1305 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1307 gimple *stmt;
1309 gcc_assert (!gsi_end_p (gsi));
1310 stmt = gsi_stmt (gsi);
1311 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1312 continue;
1314 if (gimple_num_ops (stmt) == 2)
1316 tree arg = gimple_assign_rhs1 (stmt);
1318 /* We're ignore the subcode because we're
1319 effectively doing a STRIP_NOPS. */
1321 if (TREE_CODE (arg) == ADDR_EXPR
1322 && (TREE_OPERAND (arg, 0)
1323 == gimple_omp_taskreg_data_arg (entry_stmt)))
1325 parcopy_stmt = stmt;
1326 break;
1331 gcc_assert (parcopy_stmt != NULL);
1332 arg = DECL_ARGUMENTS (child_fn);
1334 if (!gimple_in_ssa_p (cfun))
1336 if (gimple_assign_lhs (parcopy_stmt) == arg)
1337 gsi_remove (&gsi, true);
1338 else
1340 /* ?? Is setting the subcode really necessary ?? */
1341 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1342 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1345 else
1347 tree lhs = gimple_assign_lhs (parcopy_stmt);
1348 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1349 /* We'd like to set the rhs to the default def in the child_fn,
1350 but it's too early to create ssa names in the child_fn.
1351 Instead, we set the rhs to the parm. In
1352 move_sese_region_to_fn, we introduce a default def for the
1353 parm, map the parm to it's default def, and once we encounter
1354 this stmt, replace the parm with the default def. */
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 update_stmt (parcopy_stmt);
1360 /* Declare local variables needed in CHILD_CFUN. */
1361 block = DECL_INITIAL (child_fn);
1362 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1363 /* The gimplifier could record temporaries in parallel/task block
1364 rather than in containing function's local_decls chain,
1365 which would mean cgraph missed finalizing them. Do it now. */
1366 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1367 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1368 varpool_node::finalize_decl (t);
1369 DECL_SAVED_TREE (child_fn) = NULL;
1370 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1371 gimple_set_body (child_fn, NULL);
1372 TREE_USED (block) = 1;
1374 /* Reset DECL_CONTEXT on function arguments. */
1375 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1376 DECL_CONTEXT (t) = child_fn;
1378 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1379 so that it can be moved to the child function. */
1380 gsi = gsi_last_nondebug_bb (entry_bb);
1381 stmt = gsi_stmt (gsi);
1382 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1383 || gimple_code (stmt) == GIMPLE_OMP_TASK
1384 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1385 e = split_block (entry_bb, stmt);
1386 gsi_remove (&gsi, true);
1387 entry_bb = e->dest;
1388 edge e2 = NULL;
1389 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1390 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1391 else
1393 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1394 gcc_assert (e2->dest == region->exit);
1395 remove_edge (BRANCH_EDGE (entry_bb));
1396 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1397 gsi = gsi_last_nondebug_bb (region->exit);
1398 gcc_assert (!gsi_end_p (gsi)
1399 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1400 gsi_remove (&gsi, true);
1403 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1404 if (exit_bb)
1406 gsi = gsi_last_nondebug_bb (exit_bb);
1407 gcc_assert (!gsi_end_p (gsi)
1408 && (gimple_code (gsi_stmt (gsi))
1409 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1410 stmt = gimple_build_return (NULL);
1411 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1412 gsi_remove (&gsi, true);
1415 /* Move the parallel region into CHILD_CFUN. */
1417 if (gimple_in_ssa_p (cfun))
1419 init_tree_ssa (child_cfun);
1420 init_ssa_operands (child_cfun);
1421 child_cfun->gimple_df->in_ssa_p = true;
1422 block = NULL_TREE;
1424 else
1425 block = gimple_block (entry_stmt);
1427 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1428 if (exit_bb)
1429 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1430 if (e2)
1432 basic_block dest_bb = e2->dest;
1433 if (!exit_bb)
1434 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1435 remove_edge (e2);
1436 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1438 /* When the OMP expansion process cannot guarantee an up-to-date
1439 loop tree arrange for the child function to fixup loops. */
1440 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1441 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1443 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1444 num = vec_safe_length (child_cfun->local_decls);
1445 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1447 t = (*child_cfun->local_decls)[srcidx];
1448 if (DECL_CONTEXT (t) == cfun->decl)
1449 continue;
1450 if (srcidx != dstidx)
1451 (*child_cfun->local_decls)[dstidx] = t;
1452 dstidx++;
1454 if (dstidx != num)
1455 vec_safe_truncate (child_cfun->local_decls, dstidx);
1457 /* Inform the callgraph about the new function. */
1458 child_cfun->curr_properties = cfun->curr_properties;
1459 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1460 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1461 cgraph_node *node = cgraph_node::get_create (child_fn);
1462 node->parallelized_function = 1;
1463 cgraph_node::add_new_function (child_fn, true);
1465 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1466 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1468 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1469 fixed in a following pass. */
1470 push_cfun (child_cfun);
1471 if (need_asm)
1472 assign_assembler_name_if_needed (child_fn);
1474 if (optimize)
1475 optimize_omp_library_calls (entry_stmt);
1476 update_max_bb_count ();
1477 cgraph_edge::rebuild_edges ();
1479 /* Some EH regions might become dead, see PR34608. If
1480 pass_cleanup_cfg isn't the first pass to happen with the
1481 new child, these dead EH edges might cause problems.
1482 Clean them up now. */
1483 if (flag_exceptions)
1485 basic_block bb;
1486 bool changed = false;
1488 FOR_EACH_BB_FN (bb, cfun)
1489 changed |= gimple_purge_dead_eh_edges (bb);
1490 if (changed)
1491 cleanup_tree_cfg ();
1493 if (gimple_in_ssa_p (cfun))
1494 update_ssa (TODO_update_ssa);
1495 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1496 verify_loop_structure ();
1497 pop_cfun ();
1499 if (dump_file && !gimple_in_ssa_p (cfun))
1501 omp_any_child_fn_dumped = true;
1502 dump_function_header (dump_file, child_fn, dump_flags);
1503 dump_function_to_file (child_fn, dump_file, dump_flags);
1507 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1509 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1510 expand_parallel_call (region, new_bb,
1511 as_a <gomp_parallel *> (entry_stmt), ws_args);
1512 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1513 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1514 else
1515 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1516 if (gimple_in_ssa_p (cfun))
1517 update_ssa (TODO_update_ssa_only_virtuals);
1520 /* Information about members of an OpenACC collapsed loop nest. */
1522 struct oacc_collapse
1524 tree base; /* Base value. */
1525 tree iters; /* Number of steps. */
1526 tree step; /* Step size. */
1527 tree tile; /* Tile increment (if tiled). */
1528 tree outer; /* Tile iterator var. */
1531 /* Helper for expand_oacc_for. Determine collapsed loop information.
1532 Fill in COUNTS array. Emit any initialization code before GSI.
1533 Return the calculated outer loop bound of BOUND_TYPE. */
1535 static tree
1536 expand_oacc_collapse_init (const struct omp_for_data *fd,
1537 gimple_stmt_iterator *gsi,
1538 oacc_collapse *counts, tree bound_type,
1539 location_t loc)
1541 tree tiling = fd->tiling;
1542 tree total = build_int_cst (bound_type, 1);
1543 int ix;
1545 gcc_assert (integer_onep (fd->loop.step));
1546 gcc_assert (integer_zerop (fd->loop.n1));
1548 /* When tiling, the first operand of the tile clause applies to the
1549 innermost loop, and we work outwards from there. Seems
1550 backwards, but whatever. */
1551 for (ix = fd->collapse; ix--;)
1553 const omp_for_data_loop *loop = &fd->loops[ix];
1555 tree iter_type = TREE_TYPE (loop->v);
1556 tree diff_type = iter_type;
1557 tree plus_type = iter_type;
1559 gcc_assert (loop->cond_code == fd->loop.cond_code);
1561 if (POINTER_TYPE_P (iter_type))
1562 plus_type = sizetype;
1563 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1564 diff_type = signed_type_for (diff_type);
1565 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1566 diff_type = integer_type_node;
1568 if (tiling)
1570 tree num = build_int_cst (integer_type_node, fd->collapse);
1571 tree loop_no = build_int_cst (integer_type_node, ix);
1572 tree tile = TREE_VALUE (tiling);
1573 gcall *call
1574 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1575 /* gwv-outer=*/integer_zero_node,
1576 /* gwv-inner=*/integer_zero_node);
1578 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1579 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1580 gimple_call_set_lhs (call, counts[ix].tile);
1581 gimple_set_location (call, loc);
1582 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1584 tiling = TREE_CHAIN (tiling);
1586 else
1588 counts[ix].tile = NULL;
1589 counts[ix].outer = loop->v;
1592 tree b = loop->n1;
1593 tree e = loop->n2;
1594 tree s = loop->step;
1595 bool up = loop->cond_code == LT_EXPR;
1596 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1597 bool negating;
1598 tree expr;
1600 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1601 true, GSI_SAME_STMT);
1602 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1603 true, GSI_SAME_STMT);
1605 /* Convert the step, avoiding possible unsigned->signed overflow. */
1606 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1607 if (negating)
1608 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1609 s = fold_convert (diff_type, s);
1610 if (negating)
1611 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1612 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1615 /* Determine the range, avoiding possible unsigned->signed overflow. */
1616 negating = !up && TYPE_UNSIGNED (iter_type);
1617 expr = fold_build2 (MINUS_EXPR, plus_type,
1618 fold_convert (plus_type, negating ? b : e),
1619 fold_convert (plus_type, negating ? e : b));
1620 expr = fold_convert (diff_type, expr);
1621 if (negating)
1622 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1623 tree range = force_gimple_operand_gsi
1624 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1626 /* Determine number of iterations. */
1627 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1628 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1629 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1631 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1632 true, GSI_SAME_STMT);
1634 counts[ix].base = b;
1635 counts[ix].iters = iters;
1636 counts[ix].step = s;
1638 total = fold_build2 (MULT_EXPR, bound_type, total,
1639 fold_convert (bound_type, iters));
1642 return total;
1645 /* Emit initializers for collapsed loop members. INNER is true if
1646 this is for the element loop of a TILE. IVAR is the outer
1647 loop iteration variable, from which collapsed loop iteration values
1648 are calculated. COUNTS array has been initialized by
1649 expand_oacc_collapse_inits. */
1651 static void
1652 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1653 gimple_stmt_iterator *gsi,
1654 const oacc_collapse *counts, tree ivar)
1656 tree ivar_type = TREE_TYPE (ivar);
1658 /* The most rapidly changing iteration variable is the innermost
1659 one. */
1660 for (int ix = fd->collapse; ix--;)
1662 const omp_for_data_loop *loop = &fd->loops[ix];
1663 const oacc_collapse *collapse = &counts[ix];
1664 tree v = inner ? loop->v : collapse->outer;
1665 tree iter_type = TREE_TYPE (v);
1666 tree diff_type = TREE_TYPE (collapse->step);
1667 tree plus_type = iter_type;
1668 enum tree_code plus_code = PLUS_EXPR;
1669 tree expr;
1671 if (POINTER_TYPE_P (iter_type))
1673 plus_code = POINTER_PLUS_EXPR;
1674 plus_type = sizetype;
1677 expr = ivar;
1678 if (ix)
1680 tree mod = fold_convert (ivar_type, collapse->iters);
1681 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1682 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1683 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1684 true, GSI_SAME_STMT);
1687 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1688 collapse->step);
1689 expr = fold_build2 (plus_code, iter_type,
1690 inner ? collapse->outer : collapse->base,
1691 fold_convert (plus_type, expr));
1692 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1693 true, GSI_SAME_STMT);
1694 gassign *ass = gimple_build_assign (v, expr);
1695 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1699 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1700 of the combined collapse > 1 loop constructs, generate code like:
1701 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1702 if (cond3 is <)
1703 adj = STEP3 - 1;
1704 else
1705 adj = STEP3 + 1;
1706 count3 = (adj + N32 - N31) / STEP3;
1707 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1708 if (cond2 is <)
1709 adj = STEP2 - 1;
1710 else
1711 adj = STEP2 + 1;
1712 count2 = (adj + N22 - N21) / STEP2;
1713 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1714 if (cond1 is <)
1715 adj = STEP1 - 1;
1716 else
1717 adj = STEP1 + 1;
1718 count1 = (adj + N12 - N11) / STEP1;
1719 count = count1 * count2 * count3;
1720 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1721 count = 0;
1722 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1723 of the combined loop constructs, just initialize COUNTS array
1724 from the _looptemp_ clauses. */
1726 /* NOTE: It *could* be better to moosh all of the BBs together,
1727 creating one larger BB with all the computation and the unexpected
1728 jump at the end. I.e.
1730 bool zero3, zero2, zero1, zero;
1732 zero3 = N32 c3 N31;
1733 count3 = (N32 - N31) /[cl] STEP3;
1734 zero2 = N22 c2 N21;
1735 count2 = (N22 - N21) /[cl] STEP2;
1736 zero1 = N12 c1 N11;
1737 count1 = (N12 - N11) /[cl] STEP1;
1738 zero = zero3 || zero2 || zero1;
1739 count = count1 * count2 * count3;
1740 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1742 After all, we expect the zero=false, and thus we expect to have to
1743 evaluate all of the comparison expressions, so short-circuiting
1744 oughtn't be a win. Since the condition isn't protecting a
1745 denominator, we're not concerned about divide-by-zero, so we can
1746 fully evaluate count even if a numerator turned out to be wrong.
1748 It seems like putting this all together would create much better
1749 scheduling opportunities, and less pressure on the chip's branch
1750 predictor. */
1752 static void
1753 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1754 basic_block &entry_bb, tree *counts,
1755 basic_block &zero_iter1_bb, int &first_zero_iter1,
1756 basic_block &zero_iter2_bb, int &first_zero_iter2,
1757 basic_block &l2_dom_bb)
1759 tree t, type = TREE_TYPE (fd->loop.v);
1760 edge e, ne;
1761 int i;
1763 /* Collapsed loops need work for expansion into SSA form. */
1764 gcc_assert (!gimple_in_ssa_p (cfun));
1766 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1767 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1769 gcc_assert (fd->ordered == 0);
1770 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1771 isn't supposed to be handled, as the inner loop doesn't
1772 use it. */
1773 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1774 OMP_CLAUSE__LOOPTEMP_);
1775 gcc_assert (innerc);
1776 for (i = 0; i < fd->collapse; i++)
1778 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1779 OMP_CLAUSE__LOOPTEMP_);
1780 gcc_assert (innerc);
1781 if (i)
1782 counts[i] = OMP_CLAUSE_DECL (innerc);
1783 else
1784 counts[0] = NULL_TREE;
1786 return;
1789 for (i = fd->collapse; i < fd->ordered; i++)
1791 tree itype = TREE_TYPE (fd->loops[i].v);
1792 counts[i] = NULL_TREE;
1793 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1794 fold_convert (itype, fd->loops[i].n1),
1795 fold_convert (itype, fd->loops[i].n2));
1796 if (t && integer_zerop (t))
1798 for (i = fd->collapse; i < fd->ordered; i++)
1799 counts[i] = build_int_cst (type, 0);
1800 break;
1803 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1805 tree itype = TREE_TYPE (fd->loops[i].v);
1807 if (i >= fd->collapse && counts[i])
1808 continue;
1809 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1810 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1811 fold_convert (itype, fd->loops[i].n1),
1812 fold_convert (itype, fd->loops[i].n2)))
1813 == NULL_TREE || !integer_onep (t)))
1815 gcond *cond_stmt;
1816 tree n1, n2;
1817 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1818 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1819 true, GSI_SAME_STMT);
1820 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1821 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1822 true, GSI_SAME_STMT);
1823 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1824 NULL_TREE, NULL_TREE);
1825 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1826 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1827 expand_omp_regimplify_p, NULL, NULL)
1828 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1829 expand_omp_regimplify_p, NULL, NULL))
1831 *gsi = gsi_for_stmt (cond_stmt);
1832 gimple_regimplify_operands (cond_stmt, gsi);
1834 e = split_block (entry_bb, cond_stmt);
1835 basic_block &zero_iter_bb
1836 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1837 int &first_zero_iter
1838 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1839 if (zero_iter_bb == NULL)
1841 gassign *assign_stmt;
1842 first_zero_iter = i;
1843 zero_iter_bb = create_empty_bb (entry_bb);
1844 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1845 *gsi = gsi_after_labels (zero_iter_bb);
1846 if (i < fd->collapse)
1847 assign_stmt = gimple_build_assign (fd->loop.n2,
1848 build_zero_cst (type));
1849 else
1851 counts[i] = create_tmp_reg (type, ".count");
1852 assign_stmt
1853 = gimple_build_assign (counts[i], build_zero_cst (type));
1855 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1856 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1857 entry_bb);
1859 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1860 ne->probability = profile_probability::very_unlikely ();
1861 e->flags = EDGE_TRUE_VALUE;
1862 e->probability = ne->probability.invert ();
1863 if (l2_dom_bb == NULL)
1864 l2_dom_bb = entry_bb;
1865 entry_bb = e->dest;
1866 *gsi = gsi_last_nondebug_bb (entry_bb);
1869 if (POINTER_TYPE_P (itype))
1870 itype = signed_type_for (itype);
1871 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1872 ? -1 : 1));
1873 t = fold_build2 (PLUS_EXPR, itype,
1874 fold_convert (itype, fd->loops[i].step), t);
1875 t = fold_build2 (PLUS_EXPR, itype, t,
1876 fold_convert (itype, fd->loops[i].n2));
1877 t = fold_build2 (MINUS_EXPR, itype, t,
1878 fold_convert (itype, fd->loops[i].n1));
1879 /* ?? We could probably use CEIL_DIV_EXPR instead of
1880 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1881 generate the same code in the end because generically we
1882 don't know that the values involved must be negative for
1883 GT?? */
1884 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1885 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1886 fold_build1 (NEGATE_EXPR, itype, t),
1887 fold_build1 (NEGATE_EXPR, itype,
1888 fold_convert (itype,
1889 fd->loops[i].step)));
1890 else
1891 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1892 fold_convert (itype, fd->loops[i].step));
1893 t = fold_convert (type, t);
1894 if (TREE_CODE (t) == INTEGER_CST)
1895 counts[i] = t;
1896 else
1898 if (i < fd->collapse || i != first_zero_iter2)
1899 counts[i] = create_tmp_reg (type, ".count");
1900 expand_omp_build_assign (gsi, counts[i], t);
1902 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1904 if (i == 0)
1905 t = counts[0];
1906 else
1907 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1908 expand_omp_build_assign (gsi, fd->loop.n2, t);
1913 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1914 T = V;
1915 V3 = N31 + (T % count3) * STEP3;
1916 T = T / count3;
1917 V2 = N21 + (T % count2) * STEP2;
1918 T = T / count2;
1919 V1 = N11 + T * STEP1;
1920 if this loop doesn't have an inner loop construct combined with it.
1921 If it does have an inner loop construct combined with it and the
1922 iteration count isn't known constant, store values from counts array
1923 into its _looptemp_ temporaries instead. */
1925 static void
1926 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1927 tree *counts, gimple *inner_stmt, tree startvar)
1929 int i;
1930 if (gimple_omp_for_combined_p (fd->for_stmt))
1932 /* If fd->loop.n2 is constant, then no propagation of the counts
1933 is needed, they are constant. */
1934 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1935 return;
1937 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1938 ? gimple_omp_taskreg_clauses (inner_stmt)
1939 : gimple_omp_for_clauses (inner_stmt);
1940 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1941 isn't supposed to be handled, as the inner loop doesn't
1942 use it. */
1943 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1944 gcc_assert (innerc);
1945 for (i = 0; i < fd->collapse; i++)
1947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1948 OMP_CLAUSE__LOOPTEMP_);
1949 gcc_assert (innerc);
1950 if (i)
1952 tree tem = OMP_CLAUSE_DECL (innerc);
1953 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1954 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1955 false, GSI_CONTINUE_LINKING);
1956 gassign *stmt = gimple_build_assign (tem, t);
1957 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1960 return;
1963 tree type = TREE_TYPE (fd->loop.v);
1964 tree tem = create_tmp_reg (type, ".tem");
1965 gassign *stmt = gimple_build_assign (tem, startvar);
1966 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1968 for (i = fd->collapse - 1; i >= 0; i--)
1970 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1971 itype = vtype;
1972 if (POINTER_TYPE_P (vtype))
1973 itype = signed_type_for (vtype);
1974 if (i != 0)
1975 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1976 else
1977 t = tem;
1978 t = fold_convert (itype, t);
1979 t = fold_build2 (MULT_EXPR, itype, t,
1980 fold_convert (itype, fd->loops[i].step));
1981 if (POINTER_TYPE_P (vtype))
1982 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1983 else
1984 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1985 t = force_gimple_operand_gsi (gsi, t,
1986 DECL_P (fd->loops[i].v)
1987 && TREE_ADDRESSABLE (fd->loops[i].v),
1988 NULL_TREE, false,
1989 GSI_CONTINUE_LINKING);
1990 stmt = gimple_build_assign (fd->loops[i].v, t);
1991 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1992 if (i != 0)
1994 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1995 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1996 false, GSI_CONTINUE_LINKING);
1997 stmt = gimple_build_assign (tem, t);
1998 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2003 /* Helper function for expand_omp_for_*. Generate code like:
2004 L10:
2005 V3 += STEP3;
2006 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2007 L11:
2008 V3 = N31;
2009 V2 += STEP2;
2010 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2011 L12:
2012 V2 = N21;
2013 V1 += STEP1;
2014 goto BODY_BB; */
2016 static basic_block
2017 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2018 basic_block body_bb)
2020 basic_block last_bb, bb, collapse_bb = NULL;
2021 int i;
2022 gimple_stmt_iterator gsi;
2023 edge e;
2024 tree t;
2025 gimple *stmt;
2027 last_bb = cont_bb;
2028 for (i = fd->collapse - 1; i >= 0; i--)
2030 tree vtype = TREE_TYPE (fd->loops[i].v);
2032 bb = create_empty_bb (last_bb);
2033 add_bb_to_loop (bb, last_bb->loop_father);
2034 gsi = gsi_start_bb (bb);
2036 if (i < fd->collapse - 1)
2038 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2039 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2041 t = fd->loops[i + 1].n1;
2042 t = force_gimple_operand_gsi (&gsi, t,
2043 DECL_P (fd->loops[i + 1].v)
2044 && TREE_ADDRESSABLE (fd->loops[i
2045 + 1].v),
2046 NULL_TREE, false,
2047 GSI_CONTINUE_LINKING);
2048 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2049 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2051 else
2052 collapse_bb = bb;
2054 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2056 if (POINTER_TYPE_P (vtype))
2057 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2058 else
2059 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2060 t = force_gimple_operand_gsi (&gsi, t,
2061 DECL_P (fd->loops[i].v)
2062 && TREE_ADDRESSABLE (fd->loops[i].v),
2063 NULL_TREE, false, GSI_CONTINUE_LINKING);
2064 stmt = gimple_build_assign (fd->loops[i].v, t);
2065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2067 if (i > 0)
2069 t = fd->loops[i].n2;
2070 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2071 false, GSI_CONTINUE_LINKING);
2072 tree v = fd->loops[i].v;
2073 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2074 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2075 false, GSI_CONTINUE_LINKING);
2076 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2077 stmt = gimple_build_cond_empty (t);
2078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2079 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2080 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2082 else
2083 make_edge (bb, body_bb, EDGE_FALLTHRU);
2084 last_bb = bb;
2087 return collapse_bb;
2090 /* Expand #pragma omp ordered depend(source). */
2092 static void
2093 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2094 tree *counts, location_t loc)
2096 enum built_in_function source_ix
2097 = fd->iter_type == long_integer_type_node
2098 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2099 gimple *g
2100 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2101 build_fold_addr_expr (counts[fd->ordered]));
2102 gimple_set_location (g, loc);
2103 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2106 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2108 static void
2109 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2110 tree *counts, tree c, location_t loc)
2112 auto_vec<tree, 10> args;
2113 enum built_in_function sink_ix
2114 = fd->iter_type == long_integer_type_node
2115 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2116 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2117 int i;
2118 gimple_stmt_iterator gsi2 = *gsi;
2119 bool warned_step = false;
2121 for (i = 0; i < fd->ordered; i++)
2123 tree step = NULL_TREE;
2124 off = TREE_PURPOSE (deps);
2125 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2127 step = TREE_OPERAND (off, 1);
2128 off = TREE_OPERAND (off, 0);
2130 if (!integer_zerop (off))
2132 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2133 || fd->loops[i].cond_code == GT_EXPR);
2134 bool forward = fd->loops[i].cond_code == LT_EXPR;
2135 if (step)
2137 /* Non-simple Fortran DO loops. If step is variable,
2138 we don't know at compile even the direction, so can't
2139 warn. */
2140 if (TREE_CODE (step) != INTEGER_CST)
2141 break;
2142 forward = tree_int_cst_sgn (step) != -1;
2144 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2145 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2146 "lexically later iteration");
2147 break;
2149 deps = TREE_CHAIN (deps);
2151 /* If all offsets corresponding to the collapsed loops are zero,
2152 this depend clause can be ignored. FIXME: but there is still a
2153 flush needed. We need to emit one __sync_synchronize () for it
2154 though (perhaps conditionally)? Solve this together with the
2155 conservative dependence folding optimization.
2156 if (i >= fd->collapse)
2157 return; */
2159 deps = OMP_CLAUSE_DECL (c);
2160 gsi_prev (&gsi2);
2161 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2162 edge e2 = split_block_after_labels (e1->dest);
2164 gsi2 = gsi_after_labels (e1->dest);
2165 *gsi = gsi_last_bb (e1->src);
2166 for (i = 0; i < fd->ordered; i++)
2168 tree itype = TREE_TYPE (fd->loops[i].v);
2169 tree step = NULL_TREE;
2170 tree orig_off = NULL_TREE;
2171 if (POINTER_TYPE_P (itype))
2172 itype = sizetype;
2173 if (i)
2174 deps = TREE_CHAIN (deps);
2175 off = TREE_PURPOSE (deps);
2176 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2178 step = TREE_OPERAND (off, 1);
2179 off = TREE_OPERAND (off, 0);
2180 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2181 && integer_onep (fd->loops[i].step)
2182 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2184 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2185 if (step)
2187 off = fold_convert_loc (loc, itype, off);
2188 orig_off = off;
2189 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2192 if (integer_zerop (off))
2193 t = boolean_true_node;
2194 else
2196 tree a;
2197 tree co = fold_convert_loc (loc, itype, off);
2198 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2200 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2201 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2202 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2203 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2204 co);
2206 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2207 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2208 fd->loops[i].v, co);
2209 else
2210 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2211 fd->loops[i].v, co);
2212 if (step)
2214 tree t1, t2;
2215 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2216 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2217 fd->loops[i].n1);
2218 else
2219 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2220 fd->loops[i].n2);
2221 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2222 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2223 fd->loops[i].n2);
2224 else
2225 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2226 fd->loops[i].n1);
2227 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2228 step, build_int_cst (TREE_TYPE (step), 0));
2229 if (TREE_CODE (step) != INTEGER_CST)
2231 t1 = unshare_expr (t1);
2232 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2233 false, GSI_CONTINUE_LINKING);
2234 t2 = unshare_expr (t2);
2235 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2236 false, GSI_CONTINUE_LINKING);
2238 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2239 t, t2, t1);
2241 else if (fd->loops[i].cond_code == LT_EXPR)
2243 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2244 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2245 fd->loops[i].n1);
2246 else
2247 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2248 fd->loops[i].n2);
2250 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2251 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2252 fd->loops[i].n2);
2253 else
2254 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2255 fd->loops[i].n1);
2257 if (cond)
2258 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2259 else
2260 cond = t;
2262 off = fold_convert_loc (loc, itype, off);
2264 if (step
2265 || (fd->loops[i].cond_code == LT_EXPR
2266 ? !integer_onep (fd->loops[i].step)
2267 : !integer_minus_onep (fd->loops[i].step)))
2269 if (step == NULL_TREE
2270 && TYPE_UNSIGNED (itype)
2271 && fd->loops[i].cond_code == GT_EXPR)
2272 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2273 fold_build1_loc (loc, NEGATE_EXPR, itype,
2274 s));
2275 else
2276 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2277 orig_off ? orig_off : off, s);
2278 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2279 build_int_cst (itype, 0));
2280 if (integer_zerop (t) && !warned_step)
2282 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2283 "in the iteration space");
2284 warned_step = true;
2286 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2287 cond, t);
2290 if (i <= fd->collapse - 1 && fd->collapse > 1)
2291 t = fd->loop.v;
2292 else if (counts[i])
2293 t = counts[i];
2294 else
2296 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2297 fd->loops[i].v, fd->loops[i].n1);
2298 t = fold_convert_loc (loc, fd->iter_type, t);
2300 if (step)
2301 /* We have divided off by step already earlier. */;
2302 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2303 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2304 fold_build1_loc (loc, NEGATE_EXPR, itype,
2305 s));
2306 else
2307 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2308 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2309 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2310 off = fold_convert_loc (loc, fd->iter_type, off);
2311 if (i <= fd->collapse - 1 && fd->collapse > 1)
2313 if (i)
2314 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2315 off);
2316 if (i < fd->collapse - 1)
2318 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2319 counts[i]);
2320 continue;
2323 off = unshare_expr (off);
2324 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2325 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2326 true, GSI_SAME_STMT);
2327 args.safe_push (t);
2329 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2330 gimple_set_location (g, loc);
2331 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2333 cond = unshare_expr (cond);
2334 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2335 GSI_CONTINUE_LINKING);
2336 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2337 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2338 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2339 e1->probability = e3->probability.invert ();
2340 e1->flags = EDGE_TRUE_VALUE;
2341 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2343 *gsi = gsi_after_labels (e2->dest);
2346 /* Expand all #pragma omp ordered depend(source) and
2347 #pragma omp ordered depend(sink:...) constructs in the current
2348 #pragma omp for ordered(n) region. */
2350 static void
2351 expand_omp_ordered_source_sink (struct omp_region *region,
2352 struct omp_for_data *fd, tree *counts,
2353 basic_block cont_bb)
2355 struct omp_region *inner;
2356 int i;
2357 for (i = fd->collapse - 1; i < fd->ordered; i++)
2358 if (i == fd->collapse - 1 && fd->collapse > 1)
2359 counts[i] = NULL_TREE;
2360 else if (i >= fd->collapse && !cont_bb)
2361 counts[i] = build_zero_cst (fd->iter_type);
2362 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2363 && integer_onep (fd->loops[i].step))
2364 counts[i] = NULL_TREE;
2365 else
2366 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2367 tree atype
2368 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2369 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2370 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2372 for (inner = region->inner; inner; inner = inner->next)
2373 if (inner->type == GIMPLE_OMP_ORDERED)
2375 gomp_ordered *ord_stmt = inner->ord_stmt;
2376 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2377 location_t loc = gimple_location (ord_stmt);
2378 tree c;
2379 for (c = gimple_omp_ordered_clauses (ord_stmt);
2380 c; c = OMP_CLAUSE_CHAIN (c))
2381 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2382 break;
2383 if (c)
2384 expand_omp_ordered_source (&gsi, fd, counts, loc);
2385 for (c = gimple_omp_ordered_clauses (ord_stmt);
2386 c; c = OMP_CLAUSE_CHAIN (c))
2387 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2388 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2389 gsi_remove (&gsi, true);
2393 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2394 collapsed. */
2396 static basic_block
2397 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2398 basic_block cont_bb, basic_block body_bb,
2399 bool ordered_lastprivate)
2401 if (fd->ordered == fd->collapse)
2402 return cont_bb;
2404 if (!cont_bb)
2406 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2407 for (int i = fd->collapse; i < fd->ordered; i++)
2409 tree type = TREE_TYPE (fd->loops[i].v);
2410 tree n1 = fold_convert (type, fd->loops[i].n1);
2411 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2412 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2413 size_int (i - fd->collapse + 1),
2414 NULL_TREE, NULL_TREE);
2415 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2417 return NULL;
2420 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2422 tree t, type = TREE_TYPE (fd->loops[i].v);
2423 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2424 expand_omp_build_assign (&gsi, fd->loops[i].v,
2425 fold_convert (type, fd->loops[i].n1));
2426 if (counts[i])
2427 expand_omp_build_assign (&gsi, counts[i],
2428 build_zero_cst (fd->iter_type));
2429 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2430 size_int (i - fd->collapse + 1),
2431 NULL_TREE, NULL_TREE);
2432 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2433 if (!gsi_end_p (gsi))
2434 gsi_prev (&gsi);
2435 else
2436 gsi = gsi_last_bb (body_bb);
2437 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2438 basic_block new_body = e1->dest;
2439 if (body_bb == cont_bb)
2440 cont_bb = new_body;
2441 edge e2 = NULL;
2442 basic_block new_header;
2443 if (EDGE_COUNT (cont_bb->preds) > 0)
2445 gsi = gsi_last_bb (cont_bb);
2446 if (POINTER_TYPE_P (type))
2447 t = fold_build_pointer_plus (fd->loops[i].v,
2448 fold_convert (sizetype,
2449 fd->loops[i].step));
2450 else
2451 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2452 fold_convert (type, fd->loops[i].step));
2453 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2454 if (counts[i])
2456 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2457 build_int_cst (fd->iter_type, 1));
2458 expand_omp_build_assign (&gsi, counts[i], t);
2459 t = counts[i];
2461 else
2463 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2464 fd->loops[i].v, fd->loops[i].n1);
2465 t = fold_convert (fd->iter_type, t);
2466 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2467 true, GSI_SAME_STMT);
2469 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2470 size_int (i - fd->collapse + 1),
2471 NULL_TREE, NULL_TREE);
2472 expand_omp_build_assign (&gsi, aref, t);
2473 gsi_prev (&gsi);
2474 e2 = split_block (cont_bb, gsi_stmt (gsi));
2475 new_header = e2->dest;
2477 else
2478 new_header = cont_bb;
2479 gsi = gsi_after_labels (new_header);
2480 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2481 true, GSI_SAME_STMT);
2482 tree n2
2483 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2484 true, NULL_TREE, true, GSI_SAME_STMT);
2485 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2486 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2487 edge e3 = split_block (new_header, gsi_stmt (gsi));
2488 cont_bb = e3->dest;
2489 remove_edge (e1);
2490 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2491 e3->flags = EDGE_FALSE_VALUE;
2492 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2493 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2494 e1->probability = e3->probability.invert ();
2496 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2497 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2499 if (e2)
2501 struct loop *loop = alloc_loop ();
2502 loop->header = new_header;
2503 loop->latch = e2->src;
2504 add_loop (loop, body_bb->loop_father);
2508 /* If there are any lastprivate clauses and it is possible some loops
2509 might have zero iterations, ensure all the decls are initialized,
2510 otherwise we could crash evaluating C++ class iterators with lastprivate
2511 clauses. */
2512 bool need_inits = false;
2513 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2514 if (need_inits)
2516 tree type = TREE_TYPE (fd->loops[i].v);
2517 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2518 expand_omp_build_assign (&gsi, fd->loops[i].v,
2519 fold_convert (type, fd->loops[i].n1));
2521 else
2523 tree type = TREE_TYPE (fd->loops[i].v);
2524 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2525 boolean_type_node,
2526 fold_convert (type, fd->loops[i].n1),
2527 fold_convert (type, fd->loops[i].n2));
2528 if (!integer_onep (this_cond))
2529 need_inits = true;
2532 return cont_bb;
2535 /* A subroutine of expand_omp_for. Generate code for a parallel
2536 loop with any schedule. Given parameters:
2538 for (V = N1; V cond N2; V += STEP) BODY;
2540 where COND is "<" or ">", we generate pseudocode
2542 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2543 if (more) goto L0; else goto L3;
2545 V = istart0;
2546 iend = iend0;
2548 BODY;
2549 V += STEP;
2550 if (V cond iend) goto L1; else goto L2;
2552 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2555 If this is a combined omp parallel loop, instead of the call to
2556 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2557 If this is gimple_omp_for_combined_p loop, then instead of assigning
2558 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2559 inner GIMPLE_OMP_FOR and V += STEP; and
2560 if (V cond iend) goto L1; else goto L2; are removed.
2562 For collapsed loops, given parameters:
2563 collapse(3)
2564 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2565 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2566 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2567 BODY;
2569 we generate pseudocode
2571 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2572 if (cond3 is <)
2573 adj = STEP3 - 1;
2574 else
2575 adj = STEP3 + 1;
2576 count3 = (adj + N32 - N31) / STEP3;
2577 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2578 if (cond2 is <)
2579 adj = STEP2 - 1;
2580 else
2581 adj = STEP2 + 1;
2582 count2 = (adj + N22 - N21) / STEP2;
2583 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2584 if (cond1 is <)
2585 adj = STEP1 - 1;
2586 else
2587 adj = STEP1 + 1;
2588 count1 = (adj + N12 - N11) / STEP1;
2589 count = count1 * count2 * count3;
2590 goto Z1;
2592 count = 0;
2594 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2595 if (more) goto L0; else goto L3;
2597 V = istart0;
2598 T = V;
2599 V3 = N31 + (T % count3) * STEP3;
2600 T = T / count3;
2601 V2 = N21 + (T % count2) * STEP2;
2602 T = T / count2;
2603 V1 = N11 + T * STEP1;
2604 iend = iend0;
2606 BODY;
2607 V += 1;
2608 if (V < iend) goto L10; else goto L2;
2609 L10:
2610 V3 += STEP3;
2611 if (V3 cond3 N32) goto L1; else goto L11;
2612 L11:
2613 V3 = N31;
2614 V2 += STEP2;
2615 if (V2 cond2 N22) goto L1; else goto L12;
2616 L12:
2617 V2 = N21;
2618 V1 += STEP1;
2619 goto L1;
2621 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2626 static void
2627 expand_omp_for_generic (struct omp_region *region,
2628 struct omp_for_data *fd,
2629 enum built_in_function start_fn,
2630 enum built_in_function next_fn,
2631 tree sched_arg,
2632 gimple *inner_stmt)
2634 tree type, istart0, iend0, iend;
2635 tree t, vmain, vback, bias = NULL_TREE;
2636 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2637 basic_block l2_bb = NULL, l3_bb = NULL;
2638 gimple_stmt_iterator gsi;
2639 gassign *assign_stmt;
2640 bool in_combined_parallel = is_combined_parallel (region);
2641 bool broken_loop = region->cont == NULL;
2642 edge e, ne;
2643 tree *counts = NULL;
2644 int i;
2645 bool ordered_lastprivate = false;
2647 gcc_assert (!broken_loop || !in_combined_parallel);
2648 gcc_assert (fd->iter_type == long_integer_type_node
2649 || !in_combined_parallel);
2651 entry_bb = region->entry;
2652 cont_bb = region->cont;
2653 collapse_bb = NULL;
2654 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2655 gcc_assert (broken_loop
2656 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2657 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2658 l1_bb = single_succ (l0_bb);
2659 if (!broken_loop)
2661 l2_bb = create_empty_bb (cont_bb);
2662 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2663 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2664 == l1_bb));
2665 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2667 else
2668 l2_bb = NULL;
2669 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2670 exit_bb = region->exit;
2672 gsi = gsi_last_nondebug_bb (entry_bb);
2674 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2675 if (fd->ordered
2676 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2677 OMP_CLAUSE_LASTPRIVATE))
2678 ordered_lastprivate = false;
2679 tree reductions = NULL_TREE;
2680 tree mem = NULL_TREE;
2681 if (sched_arg)
2683 if (fd->have_reductemp)
2685 tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2686 OMP_CLAUSE__REDUCTEMP_);
2687 reductions = OMP_CLAUSE_DECL (c);
2688 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2689 gimple *g = SSA_NAME_DEF_STMT (reductions);
2690 reductions = gimple_assign_rhs1 (g);
2691 OMP_CLAUSE_DECL (c) = reductions;
2692 entry_bb = gimple_bb (g);
2693 edge e = split_block (entry_bb, g);
2694 if (region->entry == entry_bb)
2695 region->entry = e->dest;
2696 gsi = gsi_last_bb (entry_bb);
2698 else
2699 reductions = null_pointer_node;
2700 /* For now. */
2701 mem = null_pointer_node;
2703 if (fd->collapse > 1 || fd->ordered)
2705 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2706 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2708 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2709 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2710 zero_iter1_bb, first_zero_iter1,
2711 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2713 if (zero_iter1_bb)
2715 /* Some counts[i] vars might be uninitialized if
2716 some loop has zero iterations. But the body shouldn't
2717 be executed in that case, so just avoid uninit warnings. */
2718 for (i = first_zero_iter1;
2719 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2720 if (SSA_VAR_P (counts[i]))
2721 TREE_NO_WARNING (counts[i]) = 1;
2722 gsi_prev (&gsi);
2723 e = split_block (entry_bb, gsi_stmt (gsi));
2724 entry_bb = e->dest;
2725 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2726 gsi = gsi_last_nondebug_bb (entry_bb);
2727 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2728 get_immediate_dominator (CDI_DOMINATORS,
2729 zero_iter1_bb));
2731 if (zero_iter2_bb)
2733 /* Some counts[i] vars might be uninitialized if
2734 some loop has zero iterations. But the body shouldn't
2735 be executed in that case, so just avoid uninit warnings. */
2736 for (i = first_zero_iter2; i < fd->ordered; i++)
2737 if (SSA_VAR_P (counts[i]))
2738 TREE_NO_WARNING (counts[i]) = 1;
2739 if (zero_iter1_bb)
2740 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2741 else
2743 gsi_prev (&gsi);
2744 e = split_block (entry_bb, gsi_stmt (gsi));
2745 entry_bb = e->dest;
2746 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2747 gsi = gsi_last_nondebug_bb (entry_bb);
2748 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2749 get_immediate_dominator
2750 (CDI_DOMINATORS, zero_iter2_bb));
2753 if (fd->collapse == 1)
2755 counts[0] = fd->loop.n2;
2756 fd->loop = fd->loops[0];
2760 type = TREE_TYPE (fd->loop.v);
2761 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2762 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2763 TREE_ADDRESSABLE (istart0) = 1;
2764 TREE_ADDRESSABLE (iend0) = 1;
2766 /* See if we need to bias by LLONG_MIN. */
2767 if (fd->iter_type == long_long_unsigned_type_node
2768 && TREE_CODE (type) == INTEGER_TYPE
2769 && !TYPE_UNSIGNED (type)
2770 && fd->ordered == 0)
2772 tree n1, n2;
2774 if (fd->loop.cond_code == LT_EXPR)
2776 n1 = fd->loop.n1;
2777 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2779 else
2781 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2782 n2 = fd->loop.n1;
2784 if (TREE_CODE (n1) != INTEGER_CST
2785 || TREE_CODE (n2) != INTEGER_CST
2786 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2787 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2790 gimple_stmt_iterator gsif = gsi;
2791 gsi_prev (&gsif);
2793 tree arr = NULL_TREE;
2794 if (in_combined_parallel)
2796 gcc_assert (fd->ordered == 0);
2797 /* In a combined parallel loop, emit a call to
2798 GOMP_loop_foo_next. */
2799 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2800 build_fold_addr_expr (istart0),
2801 build_fold_addr_expr (iend0));
2803 else
2805 tree t0, t1, t2, t3, t4;
2806 /* If this is not a combined parallel loop, emit a call to
2807 GOMP_loop_foo_start in ENTRY_BB. */
2808 t4 = build_fold_addr_expr (iend0);
2809 t3 = build_fold_addr_expr (istart0);
2810 if (fd->ordered)
2812 t0 = build_int_cst (unsigned_type_node,
2813 fd->ordered - fd->collapse + 1);
2814 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2815 fd->ordered
2816 - fd->collapse + 1),
2817 ".omp_counts");
2818 DECL_NAMELESS (arr) = 1;
2819 TREE_ADDRESSABLE (arr) = 1;
2820 TREE_STATIC (arr) = 1;
2821 vec<constructor_elt, va_gc> *v;
2822 vec_alloc (v, fd->ordered - fd->collapse + 1);
2823 int idx;
2825 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2827 tree c;
2828 if (idx == 0 && fd->collapse > 1)
2829 c = fd->loop.n2;
2830 else
2831 c = counts[idx + fd->collapse - 1];
2832 tree purpose = size_int (idx);
2833 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2834 if (TREE_CODE (c) != INTEGER_CST)
2835 TREE_STATIC (arr) = 0;
2838 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2839 if (!TREE_STATIC (arr))
2840 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2841 void_type_node, arr),
2842 true, NULL_TREE, true, GSI_SAME_STMT);
2843 t1 = build_fold_addr_expr (arr);
2844 t2 = NULL_TREE;
2846 else
2848 t2 = fold_convert (fd->iter_type, fd->loop.step);
2849 t1 = fd->loop.n2;
2850 t0 = fd->loop.n1;
2851 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2853 tree innerc
2854 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2855 OMP_CLAUSE__LOOPTEMP_);
2856 gcc_assert (innerc);
2857 t0 = OMP_CLAUSE_DECL (innerc);
2858 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2859 OMP_CLAUSE__LOOPTEMP_);
2860 gcc_assert (innerc);
2861 t1 = OMP_CLAUSE_DECL (innerc);
2863 if (POINTER_TYPE_P (TREE_TYPE (t0))
2864 && TYPE_PRECISION (TREE_TYPE (t0))
2865 != TYPE_PRECISION (fd->iter_type))
2867 /* Avoid casting pointers to integer of a different size. */
2868 tree itype = signed_type_for (type);
2869 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2870 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2872 else
2874 t1 = fold_convert (fd->iter_type, t1);
2875 t0 = fold_convert (fd->iter_type, t0);
2877 if (bias)
2879 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2880 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2883 if (fd->iter_type == long_integer_type_node || fd->ordered)
2885 if (fd->chunk_size)
2887 t = fold_convert (fd->iter_type, fd->chunk_size);
2888 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2889 if (sched_arg)
2891 if (fd->ordered)
2892 t = build_call_expr (builtin_decl_explicit (start_fn),
2893 8, t0, t1, sched_arg, t, t3, t4,
2894 reductions, mem);
2895 else
2896 t = build_call_expr (builtin_decl_explicit (start_fn),
2897 9, t0, t1, t2, sched_arg, t, t3, t4,
2898 reductions, mem);
2900 else if (fd->ordered)
2901 t = build_call_expr (builtin_decl_explicit (start_fn),
2902 5, t0, t1, t, t3, t4);
2903 else
2904 t = build_call_expr (builtin_decl_explicit (start_fn),
2905 6, t0, t1, t2, t, t3, t4);
2907 else if (fd->ordered)
2908 t = build_call_expr (builtin_decl_explicit (start_fn),
2909 4, t0, t1, t3, t4);
2910 else
2911 t = build_call_expr (builtin_decl_explicit (start_fn),
2912 5, t0, t1, t2, t3, t4);
2914 else
2916 tree t5;
2917 tree c_bool_type;
2918 tree bfn_decl;
2920 /* The GOMP_loop_ull_*start functions have additional boolean
2921 argument, true for < loops and false for > loops.
2922 In Fortran, the C bool type can be different from
2923 boolean_type_node. */
2924 bfn_decl = builtin_decl_explicit (start_fn);
2925 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2926 t5 = build_int_cst (c_bool_type,
2927 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2928 if (fd->chunk_size)
2930 tree bfn_decl = builtin_decl_explicit (start_fn);
2931 t = fold_convert (fd->iter_type, fd->chunk_size);
2932 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2933 if (sched_arg)
2934 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2935 t, t3, t4, reductions, mem);
2936 else
2937 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2939 else
2940 t = build_call_expr (builtin_decl_explicit (start_fn),
2941 6, t5, t0, t1, t2, t3, t4);
2944 if (TREE_TYPE (t) != boolean_type_node)
2945 t = fold_build2 (NE_EXPR, boolean_type_node,
2946 t, build_int_cst (TREE_TYPE (t), 0));
2947 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2948 true, GSI_SAME_STMT);
2949 if (arr && !TREE_STATIC (arr))
2951 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2952 TREE_THIS_VOLATILE (clobber) = 1;
2953 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2954 GSI_SAME_STMT);
2956 if (fd->have_reductemp)
2958 gimple *g = gsi_stmt (gsi);
2959 gsi_remove (&gsi, true);
2960 release_ssa_name (gimple_assign_lhs (g));
2962 entry_bb = region->entry;
2963 gsi = gsi_last_nondebug_bb (entry_bb);
2965 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2967 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2969 /* Remove the GIMPLE_OMP_FOR statement. */
2970 gsi_remove (&gsi, true);
2972 if (gsi_end_p (gsif))
2973 gsif = gsi_after_labels (gsi_bb (gsif));
2974 gsi_next (&gsif);
2976 /* Iteration setup for sequential loop goes in L0_BB. */
2977 tree startvar = fd->loop.v;
2978 tree endvar = NULL_TREE;
2980 if (gimple_omp_for_combined_p (fd->for_stmt))
2982 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2983 && gimple_omp_for_kind (inner_stmt)
2984 == GF_OMP_FOR_KIND_SIMD);
2985 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2986 OMP_CLAUSE__LOOPTEMP_);
2987 gcc_assert (innerc);
2988 startvar = OMP_CLAUSE_DECL (innerc);
2989 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2990 OMP_CLAUSE__LOOPTEMP_);
2991 gcc_assert (innerc);
2992 endvar = OMP_CLAUSE_DECL (innerc);
2995 gsi = gsi_start_bb (l0_bb);
2996 t = istart0;
2997 if (fd->ordered && fd->collapse == 1)
2998 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2999 fold_convert (fd->iter_type, fd->loop.step));
3000 else if (bias)
3001 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3002 if (fd->ordered && fd->collapse == 1)
3004 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3005 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3006 fd->loop.n1, fold_convert (sizetype, t));
3007 else
3009 t = fold_convert (TREE_TYPE (startvar), t);
3010 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3011 fd->loop.n1, t);
3014 else
3016 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3017 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3018 t = fold_convert (TREE_TYPE (startvar), t);
3020 t = force_gimple_operand_gsi (&gsi, t,
3021 DECL_P (startvar)
3022 && TREE_ADDRESSABLE (startvar),
3023 NULL_TREE, false, GSI_CONTINUE_LINKING);
3024 assign_stmt = gimple_build_assign (startvar, t);
3025 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3027 t = iend0;
3028 if (fd->ordered && fd->collapse == 1)
3029 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3030 fold_convert (fd->iter_type, fd->loop.step));
3031 else if (bias)
3032 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3033 if (fd->ordered && fd->collapse == 1)
3035 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3036 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3037 fd->loop.n1, fold_convert (sizetype, t));
3038 else
3040 t = fold_convert (TREE_TYPE (startvar), t);
3041 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3042 fd->loop.n1, t);
3045 else
3047 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3048 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3049 t = fold_convert (TREE_TYPE (startvar), t);
3051 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3052 false, GSI_CONTINUE_LINKING);
3053 if (endvar)
3055 assign_stmt = gimple_build_assign (endvar, iend);
3056 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3057 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3058 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3059 else
3060 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3061 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3063 /* Handle linear clause adjustments. */
3064 tree itercnt = NULL_TREE;
3065 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3066 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3067 c; c = OMP_CLAUSE_CHAIN (c))
3068 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3069 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3071 tree d = OMP_CLAUSE_DECL (c);
3072 bool is_ref = omp_is_reference (d);
3073 tree t = d, a, dest;
3074 if (is_ref)
3075 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3076 tree type = TREE_TYPE (t);
3077 if (POINTER_TYPE_P (type))
3078 type = sizetype;
3079 dest = unshare_expr (t);
3080 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3081 expand_omp_build_assign (&gsif, v, t);
3082 if (itercnt == NULL_TREE)
3084 itercnt = startvar;
3085 tree n1 = fd->loop.n1;
3086 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3088 itercnt
3089 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3090 itercnt);
3091 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3093 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3094 itercnt, n1);
3095 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3096 itercnt, fd->loop.step);
3097 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3098 NULL_TREE, false,
3099 GSI_CONTINUE_LINKING);
3101 a = fold_build2 (MULT_EXPR, type,
3102 fold_convert (type, itercnt),
3103 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3104 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3105 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3106 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3107 false, GSI_CONTINUE_LINKING);
3108 assign_stmt = gimple_build_assign (dest, t);
3109 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3111 if (fd->collapse > 1)
3112 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3114 if (fd->ordered)
3116 /* Until now, counts array contained number of iterations or
3117 variable containing it for ith loop. From now on, we need
3118 those counts only for collapsed loops, and only for the 2nd
3119 till the last collapsed one. Move those one element earlier,
3120 we'll use counts[fd->collapse - 1] for the first source/sink
3121 iteration counter and so on and counts[fd->ordered]
3122 as the array holding the current counter values for
3123 depend(source). */
3124 if (fd->collapse > 1)
3125 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3126 if (broken_loop)
3128 int i;
3129 for (i = fd->collapse; i < fd->ordered; i++)
3131 tree type = TREE_TYPE (fd->loops[i].v);
3132 tree this_cond
3133 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3134 fold_convert (type, fd->loops[i].n1),
3135 fold_convert (type, fd->loops[i].n2));
3136 if (!integer_onep (this_cond))
3137 break;
3139 if (i < fd->ordered)
3141 cont_bb
3142 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3143 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3144 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3145 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3146 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3147 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3148 make_edge (cont_bb, l1_bb, 0);
3149 l2_bb = create_empty_bb (cont_bb);
3150 broken_loop = false;
3153 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3154 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3155 ordered_lastprivate);
3156 if (counts[fd->collapse - 1])
3158 gcc_assert (fd->collapse == 1);
3159 gsi = gsi_last_bb (l0_bb);
3160 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3161 istart0, true);
3162 gsi = gsi_last_bb (cont_bb);
3163 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3164 build_int_cst (fd->iter_type, 1));
3165 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3166 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3167 size_zero_node, NULL_TREE, NULL_TREE);
3168 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3169 t = counts[fd->collapse - 1];
3171 else if (fd->collapse > 1)
3172 t = fd->loop.v;
3173 else
3175 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3176 fd->loops[0].v, fd->loops[0].n1);
3177 t = fold_convert (fd->iter_type, t);
3179 gsi = gsi_last_bb (l0_bb);
3180 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3181 size_zero_node, NULL_TREE, NULL_TREE);
3182 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3183 false, GSI_CONTINUE_LINKING);
3184 expand_omp_build_assign (&gsi, aref, t, true);
3187 if (!broken_loop)
3189 /* Code to control the increment and predicate for the sequential
3190 loop goes in the CONT_BB. */
3191 gsi = gsi_last_nondebug_bb (cont_bb);
3192 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3193 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3194 vmain = gimple_omp_continue_control_use (cont_stmt);
3195 vback = gimple_omp_continue_control_def (cont_stmt);
3197 if (!gimple_omp_for_combined_p (fd->for_stmt))
3199 if (POINTER_TYPE_P (type))
3200 t = fold_build_pointer_plus (vmain, fd->loop.step);
3201 else
3202 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3203 t = force_gimple_operand_gsi (&gsi, t,
3204 DECL_P (vback)
3205 && TREE_ADDRESSABLE (vback),
3206 NULL_TREE, true, GSI_SAME_STMT);
3207 assign_stmt = gimple_build_assign (vback, t);
3208 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3210 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3212 if (fd->collapse > 1)
3213 t = fd->loop.v;
3214 else
3216 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3217 fd->loops[0].v, fd->loops[0].n1);
3218 t = fold_convert (fd->iter_type, t);
3220 tree aref = build4 (ARRAY_REF, fd->iter_type,
3221 counts[fd->ordered], size_zero_node,
3222 NULL_TREE, NULL_TREE);
3223 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3224 true, GSI_SAME_STMT);
3225 expand_omp_build_assign (&gsi, aref, t);
3228 t = build2 (fd->loop.cond_code, boolean_type_node,
3229 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3230 iend);
3231 gcond *cond_stmt = gimple_build_cond_empty (t);
3232 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3235 /* Remove GIMPLE_OMP_CONTINUE. */
3236 gsi_remove (&gsi, true);
3238 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3239 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3241 /* Emit code to get the next parallel iteration in L2_BB. */
3242 gsi = gsi_start_bb (l2_bb);
3244 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3245 build_fold_addr_expr (istart0),
3246 build_fold_addr_expr (iend0));
3247 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3248 false, GSI_CONTINUE_LINKING);
3249 if (TREE_TYPE (t) != boolean_type_node)
3250 t = fold_build2 (NE_EXPR, boolean_type_node,
3251 t, build_int_cst (TREE_TYPE (t), 0));
3252 gcond *cond_stmt = gimple_build_cond_empty (t);
3253 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3256 /* Add the loop cleanup function. */
3257 gsi = gsi_last_nondebug_bb (exit_bb);
3258 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3259 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3260 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3261 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3262 else
3263 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3264 gcall *call_stmt = gimple_build_call (t, 0);
3265 if (fd->ordered)
3267 tree arr = counts[fd->ordered];
3268 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3269 TREE_THIS_VOLATILE (clobber) = 1;
3270 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3271 GSI_SAME_STMT);
3273 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3275 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3276 if (fd->have_reductemp)
3278 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3279 gimple_call_lhs (call_stmt));
3280 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3283 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3284 gsi_remove (&gsi, true);
3286 /* Connect the new blocks. */
3287 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3288 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3290 if (!broken_loop)
3292 gimple_seq phis;
3294 e = find_edge (cont_bb, l3_bb);
3295 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3297 phis = phi_nodes (l3_bb);
3298 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3300 gimple *phi = gsi_stmt (gsi);
3301 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3302 PHI_ARG_DEF_FROM_EDGE (phi, e));
3304 remove_edge (e);
3306 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3307 e = find_edge (cont_bb, l1_bb);
3308 if (e == NULL)
3310 e = BRANCH_EDGE (cont_bb);
3311 gcc_assert (single_succ (e->dest) == l1_bb);
3313 if (gimple_omp_for_combined_p (fd->for_stmt))
3315 remove_edge (e);
3316 e = NULL;
3318 else if (fd->collapse > 1)
3320 remove_edge (e);
3321 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3323 else
3324 e->flags = EDGE_TRUE_VALUE;
3325 if (e)
3327 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3328 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3330 else
3332 e = find_edge (cont_bb, l2_bb);
3333 e->flags = EDGE_FALLTHRU;
3335 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3337 if (gimple_in_ssa_p (cfun))
3339 /* Add phis to the outer loop that connect to the phis in the inner,
3340 original loop, and move the loop entry value of the inner phi to
3341 the loop entry value of the outer phi. */
3342 gphi_iterator psi;
3343 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3345 location_t locus;
3346 gphi *nphi;
3347 gphi *exit_phi = psi.phi ();
3349 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3350 continue;
3352 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3353 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3355 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3356 edge latch_to_l1 = find_edge (latch, l1_bb);
3357 gphi *inner_phi
3358 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3360 tree t = gimple_phi_result (exit_phi);
3361 tree new_res = copy_ssa_name (t, NULL);
3362 nphi = create_phi_node (new_res, l0_bb);
3364 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3365 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3366 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3367 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3368 add_phi_arg (nphi, t, entry_to_l0, locus);
3370 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3371 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3373 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3377 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3378 recompute_dominator (CDI_DOMINATORS, l2_bb));
3379 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3380 recompute_dominator (CDI_DOMINATORS, l3_bb));
3381 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3382 recompute_dominator (CDI_DOMINATORS, l0_bb));
3383 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3384 recompute_dominator (CDI_DOMINATORS, l1_bb));
3386 /* We enter expand_omp_for_generic with a loop. This original loop may
3387 have its own loop struct, or it may be part of an outer loop struct
3388 (which may be the fake loop). */
3389 struct loop *outer_loop = entry_bb->loop_father;
3390 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3392 add_bb_to_loop (l2_bb, outer_loop);
3394 /* We've added a new loop around the original loop. Allocate the
3395 corresponding loop struct. */
3396 struct loop *new_loop = alloc_loop ();
3397 new_loop->header = l0_bb;
3398 new_loop->latch = l2_bb;
3399 add_loop (new_loop, outer_loop);
3401 /* Allocate a loop structure for the original loop unless we already
3402 had one. */
3403 if (!orig_loop_has_loop_struct
3404 && !gimple_omp_for_combined_p (fd->for_stmt))
3406 struct loop *orig_loop = alloc_loop ();
3407 orig_loop->header = l1_bb;
3408 /* The loop may have multiple latches. */
3409 add_loop (orig_loop, new_loop);
3414 /* A subroutine of expand_omp_for. Generate code for a parallel
3415 loop with static schedule and no specified chunk size. Given
3416 parameters:
3418 for (V = N1; V cond N2; V += STEP) BODY;
3420 where COND is "<" or ">", we generate pseudocode
3422 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3423 if (cond is <)
3424 adj = STEP - 1;
3425 else
3426 adj = STEP + 1;
3427 if ((__typeof (V)) -1 > 0 && cond is >)
3428 n = -(adj + N2 - N1) / -STEP;
3429 else
3430 n = (adj + N2 - N1) / STEP;
3431 q = n / nthreads;
3432 tt = n % nthreads;
3433 if (threadid < tt) goto L3; else goto L4;
3435 tt = 0;
3436 q = q + 1;
3438 s0 = q * threadid + tt;
3439 e0 = s0 + q;
3440 V = s0 * STEP + N1;
3441 if (s0 >= e0) goto L2; else goto L0;
3443 e = e0 * STEP + N1;
3445 BODY;
3446 V += STEP;
3447 if (V cond e) goto L1;
3451 static void
3452 expand_omp_for_static_nochunk (struct omp_region *region,
3453 struct omp_for_data *fd,
3454 gimple *inner_stmt)
3456 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3457 tree type, itype, vmain, vback;
3458 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3459 basic_block body_bb, cont_bb, collapse_bb = NULL;
3460 basic_block fin_bb;
3461 gimple_stmt_iterator gsi;
3462 edge ep;
3463 bool broken_loop = region->cont == NULL;
3464 tree *counts = NULL;
3465 tree n1, n2, step;
3466 tree reductions = NULL_TREE;
3468 itype = type = TREE_TYPE (fd->loop.v);
3469 if (POINTER_TYPE_P (type))
3470 itype = signed_type_for (type);
3472 entry_bb = region->entry;
3473 cont_bb = region->cont;
3474 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3475 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3476 gcc_assert (broken_loop
3477 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3478 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3479 body_bb = single_succ (seq_start_bb);
3480 if (!broken_loop)
3482 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3483 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3484 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3486 exit_bb = region->exit;
3488 /* Iteration space partitioning goes in ENTRY_BB. */
3489 gsi = gsi_last_nondebug_bb (entry_bb);
3490 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3492 if (fd->collapse > 1)
3494 int first_zero_iter = -1, dummy = -1;
3495 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3497 counts = XALLOCAVEC (tree, fd->collapse);
3498 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3499 fin_bb, first_zero_iter,
3500 dummy_bb, dummy, l2_dom_bb);
3501 t = NULL_TREE;
3503 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3504 t = integer_one_node;
3505 else
3506 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3507 fold_convert (type, fd->loop.n1),
3508 fold_convert (type, fd->loop.n2));
3509 if (fd->collapse == 1
3510 && TYPE_UNSIGNED (type)
3511 && (t == NULL_TREE || !integer_onep (t)))
3513 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3514 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3515 true, GSI_SAME_STMT);
3516 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3517 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3518 true, GSI_SAME_STMT);
3519 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3520 NULL_TREE, NULL_TREE);
3521 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3522 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3523 expand_omp_regimplify_p, NULL, NULL)
3524 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3525 expand_omp_regimplify_p, NULL, NULL))
3527 gsi = gsi_for_stmt (cond_stmt);
3528 gimple_regimplify_operands (cond_stmt, &gsi);
3530 ep = split_block (entry_bb, cond_stmt);
3531 ep->flags = EDGE_TRUE_VALUE;
3532 entry_bb = ep->dest;
3533 ep->probability = profile_probability::very_likely ();
3534 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3535 ep->probability = profile_probability::very_unlikely ();
3536 if (gimple_in_ssa_p (cfun))
3538 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3539 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3540 !gsi_end_p (gpi); gsi_next (&gpi))
3542 gphi *phi = gpi.phi ();
3543 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3544 ep, UNKNOWN_LOCATION);
3547 gsi = gsi_last_bb (entry_bb);
3550 if (fd->have_reductemp)
3552 tree t1 = build_int_cst (long_integer_type_node, 0);
3553 tree t2 = build_int_cst (long_integer_type_node, 1);
3554 tree t3 = build_int_cstu (long_integer_type_node,
3555 (HOST_WIDE_INT_1U << 31) + 1);
3556 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3557 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3558 reductions = OMP_CLAUSE_DECL (clauses);
3559 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3560 gimple *g = SSA_NAME_DEF_STMT (reductions);
3561 reductions = gimple_assign_rhs1 (g);
3562 OMP_CLAUSE_DECL (clauses) = reductions;
3563 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
3564 tree t
3565 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3566 9, t1, t2, t2, t3, t1, null_pointer_node,
3567 null_pointer_node, reductions, null_pointer_node);
3568 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3569 true, GSI_SAME_STMT);
3570 gsi_remove (&gsi2, true);
3571 release_ssa_name (gimple_assign_lhs (g));
3573 switch (gimple_omp_for_kind (fd->for_stmt))
3575 case GF_OMP_FOR_KIND_FOR:
3576 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3577 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3578 break;
3579 case GF_OMP_FOR_KIND_DISTRIBUTE:
3580 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3581 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3582 break;
3583 default:
3584 gcc_unreachable ();
3586 nthreads = build_call_expr (nthreads, 0);
3587 nthreads = fold_convert (itype, nthreads);
3588 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3589 true, GSI_SAME_STMT);
3590 threadid = build_call_expr (threadid, 0);
3591 threadid = fold_convert (itype, threadid);
3592 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3593 true, GSI_SAME_STMT);
3595 n1 = fd->loop.n1;
3596 n2 = fd->loop.n2;
3597 step = fd->loop.step;
3598 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3600 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3601 OMP_CLAUSE__LOOPTEMP_);
3602 gcc_assert (innerc);
3603 n1 = OMP_CLAUSE_DECL (innerc);
3604 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3605 OMP_CLAUSE__LOOPTEMP_);
3606 gcc_assert (innerc);
3607 n2 = OMP_CLAUSE_DECL (innerc);
3609 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3610 true, NULL_TREE, true, GSI_SAME_STMT);
3611 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3612 true, NULL_TREE, true, GSI_SAME_STMT);
3613 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3614 true, NULL_TREE, true, GSI_SAME_STMT);
3616 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3617 t = fold_build2 (PLUS_EXPR, itype, step, t);
3618 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3619 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3620 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3621 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3622 fold_build1 (NEGATE_EXPR, itype, t),
3623 fold_build1 (NEGATE_EXPR, itype, step));
3624 else
3625 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3626 t = fold_convert (itype, t);
3627 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3629 q = create_tmp_reg (itype, "q");
3630 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3631 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3632 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3634 tt = create_tmp_reg (itype, "tt");
3635 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3636 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3637 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3639 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3640 gcond *cond_stmt = gimple_build_cond_empty (t);
3641 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3643 second_bb = split_block (entry_bb, cond_stmt)->dest;
3644 gsi = gsi_last_nondebug_bb (second_bb);
3645 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3647 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3648 GSI_SAME_STMT);
3649 gassign *assign_stmt
3650 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3651 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3653 third_bb = split_block (second_bb, assign_stmt)->dest;
3654 gsi = gsi_last_nondebug_bb (third_bb);
3655 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3657 t = build2 (MULT_EXPR, itype, q, threadid);
3658 t = build2 (PLUS_EXPR, itype, t, tt);
3659 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3661 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3662 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3664 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3665 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3667 /* Remove the GIMPLE_OMP_FOR statement. */
3668 gsi_remove (&gsi, true);
3670 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3671 gsi = gsi_start_bb (seq_start_bb);
3673 tree startvar = fd->loop.v;
3674 tree endvar = NULL_TREE;
3676 if (gimple_omp_for_combined_p (fd->for_stmt))
3678 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3679 ? gimple_omp_parallel_clauses (inner_stmt)
3680 : gimple_omp_for_clauses (inner_stmt);
3681 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3682 gcc_assert (innerc);
3683 startvar = OMP_CLAUSE_DECL (innerc);
3684 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3685 OMP_CLAUSE__LOOPTEMP_);
3686 gcc_assert (innerc);
3687 endvar = OMP_CLAUSE_DECL (innerc);
3688 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3689 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3691 int i;
3692 for (i = 1; i < fd->collapse; i++)
3694 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3695 OMP_CLAUSE__LOOPTEMP_);
3696 gcc_assert (innerc);
3698 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3699 OMP_CLAUSE__LOOPTEMP_);
3700 if (innerc)
3702 /* If needed (distribute parallel for with lastprivate),
3703 propagate down the total number of iterations. */
3704 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3705 fd->loop.n2);
3706 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3707 GSI_CONTINUE_LINKING);
3708 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3709 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3713 t = fold_convert (itype, s0);
3714 t = fold_build2 (MULT_EXPR, itype, t, step);
3715 if (POINTER_TYPE_P (type))
3717 t = fold_build_pointer_plus (n1, t);
3718 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3719 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3720 t = fold_convert (signed_type_for (type), t);
3722 else
3723 t = fold_build2 (PLUS_EXPR, type, t, n1);
3724 t = fold_convert (TREE_TYPE (startvar), t);
3725 t = force_gimple_operand_gsi (&gsi, t,
3726 DECL_P (startvar)
3727 && TREE_ADDRESSABLE (startvar),
3728 NULL_TREE, false, GSI_CONTINUE_LINKING);
3729 assign_stmt = gimple_build_assign (startvar, t);
3730 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3732 t = fold_convert (itype, e0);
3733 t = fold_build2 (MULT_EXPR, itype, t, step);
3734 if (POINTER_TYPE_P (type))
3736 t = fold_build_pointer_plus (n1, t);
3737 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3738 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3739 t = fold_convert (signed_type_for (type), t);
3741 else
3742 t = fold_build2 (PLUS_EXPR, type, t, n1);
3743 t = fold_convert (TREE_TYPE (startvar), t);
3744 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3745 false, GSI_CONTINUE_LINKING);
3746 if (endvar)
3748 assign_stmt = gimple_build_assign (endvar, e);
3749 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3750 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3751 assign_stmt = gimple_build_assign (fd->loop.v, e);
3752 else
3753 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3754 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3756 /* Handle linear clause adjustments. */
3757 tree itercnt = NULL_TREE;
3758 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3759 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3760 c; c = OMP_CLAUSE_CHAIN (c))
3761 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3762 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3764 tree d = OMP_CLAUSE_DECL (c);
3765 bool is_ref = omp_is_reference (d);
3766 tree t = d, a, dest;
3767 if (is_ref)
3768 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3769 if (itercnt == NULL_TREE)
3771 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3773 itercnt = fold_build2 (MINUS_EXPR, itype,
3774 fold_convert (itype, n1),
3775 fold_convert (itype, fd->loop.n1));
3776 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3777 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3778 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3779 NULL_TREE, false,
3780 GSI_CONTINUE_LINKING);
3782 else
3783 itercnt = s0;
3785 tree type = TREE_TYPE (t);
3786 if (POINTER_TYPE_P (type))
3787 type = sizetype;
3788 a = fold_build2 (MULT_EXPR, type,
3789 fold_convert (type, itercnt),
3790 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3791 dest = unshare_expr (t);
3792 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3793 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3794 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3795 false, GSI_CONTINUE_LINKING);
3796 assign_stmt = gimple_build_assign (dest, t);
3797 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3799 if (fd->collapse > 1)
3800 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3802 if (!broken_loop)
3804 /* The code controlling the sequential loop replaces the
3805 GIMPLE_OMP_CONTINUE. */
3806 gsi = gsi_last_nondebug_bb (cont_bb);
3807 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3808 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3809 vmain = gimple_omp_continue_control_use (cont_stmt);
3810 vback = gimple_omp_continue_control_def (cont_stmt);
3812 if (!gimple_omp_for_combined_p (fd->for_stmt))
3814 if (POINTER_TYPE_P (type))
3815 t = fold_build_pointer_plus (vmain, step);
3816 else
3817 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3818 t = force_gimple_operand_gsi (&gsi, t,
3819 DECL_P (vback)
3820 && TREE_ADDRESSABLE (vback),
3821 NULL_TREE, true, GSI_SAME_STMT);
3822 assign_stmt = gimple_build_assign (vback, t);
3823 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3825 t = build2 (fd->loop.cond_code, boolean_type_node,
3826 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3827 ? t : vback, e);
3828 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3831 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3832 gsi_remove (&gsi, true);
3834 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3835 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3838 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3839 gsi = gsi_last_nondebug_bb (exit_bb);
3840 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3842 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3843 if (fd->have_reductemp)
3845 tree fn;
3846 if (t)
3847 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3848 else
3849 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3850 gcall *g = gimple_build_call (fn, 0);
3851 if (t)
3853 gimple_call_set_lhs (g, t);
3854 gsi_insert_after (&gsi, gimple_build_assign (reductions,
3855 NOP_EXPR, t),
3856 GSI_SAME_STMT);
3858 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3860 else
3861 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3863 gsi_remove (&gsi, true);
3865 /* Connect all the blocks. */
3866 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3867 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3868 ep = find_edge (entry_bb, second_bb);
3869 ep->flags = EDGE_TRUE_VALUE;
3870 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3871 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3872 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3874 if (!broken_loop)
3876 ep = find_edge (cont_bb, body_bb);
3877 if (ep == NULL)
3879 ep = BRANCH_EDGE (cont_bb);
3880 gcc_assert (single_succ (ep->dest) == body_bb);
3882 if (gimple_omp_for_combined_p (fd->for_stmt))
3884 remove_edge (ep);
3885 ep = NULL;
3887 else if (fd->collapse > 1)
3889 remove_edge (ep);
3890 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3892 else
3893 ep->flags = EDGE_TRUE_VALUE;
3894 find_edge (cont_bb, fin_bb)->flags
3895 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3898 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3899 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3900 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3902 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3903 recompute_dominator (CDI_DOMINATORS, body_bb));
3904 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3905 recompute_dominator (CDI_DOMINATORS, fin_bb));
3907 struct loop *loop = body_bb->loop_father;
3908 if (loop != entry_bb->loop_father)
3910 gcc_assert (broken_loop || loop->header == body_bb);
3911 gcc_assert (broken_loop
3912 || loop->latch == region->cont
3913 || single_pred (loop->latch) == region->cont);
3914 return;
3917 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3919 loop = alloc_loop ();
3920 loop->header = body_bb;
3921 if (collapse_bb == NULL)
3922 loop->latch = cont_bb;
3923 add_loop (loop, body_bb->loop_father);
3927 /* Return phi in E->DEST with ARG on edge E. */
3929 static gphi *
3930 find_phi_with_arg_on_edge (tree arg, edge e)
3932 basic_block bb = e->dest;
3934 for (gphi_iterator gpi = gsi_start_phis (bb);
3935 !gsi_end_p (gpi);
3936 gsi_next (&gpi))
3938 gphi *phi = gpi.phi ();
3939 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3940 return phi;
3943 return NULL;
3946 /* A subroutine of expand_omp_for. Generate code for a parallel
3947 loop with static schedule and a specified chunk size. Given
3948 parameters:
3950 for (V = N1; V cond N2; V += STEP) BODY;
3952 where COND is "<" or ">", we generate pseudocode
3954 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3955 if (cond is <)
3956 adj = STEP - 1;
3957 else
3958 adj = STEP + 1;
3959 if ((__typeof (V)) -1 > 0 && cond is >)
3960 n = -(adj + N2 - N1) / -STEP;
3961 else
3962 n = (adj + N2 - N1) / STEP;
3963 trip = 0;
3964 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3965 here so that V is defined
3966 if the loop is not entered
3968 s0 = (trip * nthreads + threadid) * CHUNK;
3969 e0 = min (s0 + CHUNK, n);
3970 if (s0 < n) goto L1; else goto L4;
3972 V = s0 * STEP + N1;
3973 e = e0 * STEP + N1;
3975 BODY;
3976 V += STEP;
3977 if (V cond e) goto L2; else goto L3;
3979 trip += 1;
3980 goto L0;
3984 static void
3985 expand_omp_for_static_chunk (struct omp_region *region,
3986 struct omp_for_data *fd, gimple *inner_stmt)
3988 tree n, s0, e0, e, t;
3989 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3990 tree type, itype, vmain, vback, vextra;
3991 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3992 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3993 gimple_stmt_iterator gsi;
3994 edge se;
3995 bool broken_loop = region->cont == NULL;
3996 tree *counts = NULL;
3997 tree n1, n2, step;
3998 tree reductions = NULL_TREE;
4000 itype = type = TREE_TYPE (fd->loop.v);
4001 if (POINTER_TYPE_P (type))
4002 itype = signed_type_for (type);
4004 entry_bb = region->entry;
4005 se = split_block (entry_bb, last_stmt (entry_bb));
4006 entry_bb = se->src;
4007 iter_part_bb = se->dest;
4008 cont_bb = region->cont;
4009 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4010 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4011 gcc_assert (broken_loop
4012 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4013 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4014 body_bb = single_succ (seq_start_bb);
4015 if (!broken_loop)
4017 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4018 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4019 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4020 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4022 exit_bb = region->exit;
4024 /* Trip and adjustment setup goes in ENTRY_BB. */
4025 gsi = gsi_last_nondebug_bb (entry_bb);
4026 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4028 if (fd->collapse > 1)
4030 int first_zero_iter = -1, dummy = -1;
4031 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4033 counts = XALLOCAVEC (tree, fd->collapse);
4034 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4035 fin_bb, first_zero_iter,
4036 dummy_bb, dummy, l2_dom_bb);
4037 t = NULL_TREE;
4039 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4040 t = integer_one_node;
4041 else
4042 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4043 fold_convert (type, fd->loop.n1),
4044 fold_convert (type, fd->loop.n2));
4045 if (fd->collapse == 1
4046 && TYPE_UNSIGNED (type)
4047 && (t == NULL_TREE || !integer_onep (t)))
4049 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4050 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4051 true, GSI_SAME_STMT);
4052 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4053 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4054 true, GSI_SAME_STMT);
4055 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4056 NULL_TREE, NULL_TREE);
4057 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4058 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4059 expand_omp_regimplify_p, NULL, NULL)
4060 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4061 expand_omp_regimplify_p, NULL, NULL))
4063 gsi = gsi_for_stmt (cond_stmt);
4064 gimple_regimplify_operands (cond_stmt, &gsi);
4066 se = split_block (entry_bb, cond_stmt);
4067 se->flags = EDGE_TRUE_VALUE;
4068 entry_bb = se->dest;
4069 se->probability = profile_probability::very_likely ();
4070 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4071 se->probability = profile_probability::very_unlikely ();
4072 if (gimple_in_ssa_p (cfun))
4074 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4075 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4076 !gsi_end_p (gpi); gsi_next (&gpi))
4078 gphi *phi = gpi.phi ();
4079 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4080 se, UNKNOWN_LOCATION);
4083 gsi = gsi_last_bb (entry_bb);
4086 if (fd->have_reductemp)
4088 tree t1 = build_int_cst (long_integer_type_node, 0);
4089 tree t2 = build_int_cst (long_integer_type_node, 1);
4090 tree t3 = build_int_cstu (long_integer_type_node,
4091 (HOST_WIDE_INT_1U << 31) + 1);
4092 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4093 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4094 reductions = OMP_CLAUSE_DECL (clauses);
4095 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4096 gimple *g = SSA_NAME_DEF_STMT (reductions);
4097 reductions = gimple_assign_rhs1 (g);
4098 OMP_CLAUSE_DECL (clauses) = reductions;
4099 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4100 tree t
4101 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4102 9, t1, t2, t2, t3, t1, null_pointer_node,
4103 null_pointer_node, reductions, null_pointer_node);
4104 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4105 true, GSI_SAME_STMT);
4106 gsi_remove (&gsi2, true);
4107 release_ssa_name (gimple_assign_lhs (g));
4109 switch (gimple_omp_for_kind (fd->for_stmt))
4111 case GF_OMP_FOR_KIND_FOR:
4112 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4113 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4114 break;
4115 case GF_OMP_FOR_KIND_DISTRIBUTE:
4116 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4117 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4118 break;
4119 default:
4120 gcc_unreachable ();
4122 nthreads = build_call_expr (nthreads, 0);
4123 nthreads = fold_convert (itype, nthreads);
4124 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4125 true, GSI_SAME_STMT);
4126 threadid = build_call_expr (threadid, 0);
4127 threadid = fold_convert (itype, threadid);
4128 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4129 true, GSI_SAME_STMT);
4131 n1 = fd->loop.n1;
4132 n2 = fd->loop.n2;
4133 step = fd->loop.step;
4134 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4136 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4137 OMP_CLAUSE__LOOPTEMP_);
4138 gcc_assert (innerc);
4139 n1 = OMP_CLAUSE_DECL (innerc);
4140 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4141 OMP_CLAUSE__LOOPTEMP_);
4142 gcc_assert (innerc);
4143 n2 = OMP_CLAUSE_DECL (innerc);
4145 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4146 true, NULL_TREE, true, GSI_SAME_STMT);
4147 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4148 true, NULL_TREE, true, GSI_SAME_STMT);
4149 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4150 true, NULL_TREE, true, GSI_SAME_STMT);
4151 tree chunk_size = fold_convert (itype, fd->chunk_size);
4152 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4153 chunk_size
4154 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4155 GSI_SAME_STMT);
4157 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4158 t = fold_build2 (PLUS_EXPR, itype, step, t);
4159 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4160 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4161 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4162 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4163 fold_build1 (NEGATE_EXPR, itype, t),
4164 fold_build1 (NEGATE_EXPR, itype, step));
4165 else
4166 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4167 t = fold_convert (itype, t);
4168 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4169 true, GSI_SAME_STMT);
4171 trip_var = create_tmp_reg (itype, ".trip");
4172 if (gimple_in_ssa_p (cfun))
4174 trip_init = make_ssa_name (trip_var);
4175 trip_main = make_ssa_name (trip_var);
4176 trip_back = make_ssa_name (trip_var);
4178 else
4180 trip_init = trip_var;
4181 trip_main = trip_var;
4182 trip_back = trip_var;
4185 gassign *assign_stmt
4186 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4187 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4189 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4190 t = fold_build2 (MULT_EXPR, itype, t, step);
4191 if (POINTER_TYPE_P (type))
4192 t = fold_build_pointer_plus (n1, t);
4193 else
4194 t = fold_build2 (PLUS_EXPR, type, t, n1);
4195 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4196 true, GSI_SAME_STMT);
4198 /* Remove the GIMPLE_OMP_FOR. */
4199 gsi_remove (&gsi, true);
4201 gimple_stmt_iterator gsif = gsi;
4203 /* Iteration space partitioning goes in ITER_PART_BB. */
4204 gsi = gsi_last_bb (iter_part_bb);
4206 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4207 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4208 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4209 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4210 false, GSI_CONTINUE_LINKING);
4212 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4213 t = fold_build2 (MIN_EXPR, itype, t, n);
4214 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4215 false, GSI_CONTINUE_LINKING);
4217 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4218 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4220 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4221 gsi = gsi_start_bb (seq_start_bb);
4223 tree startvar = fd->loop.v;
4224 tree endvar = NULL_TREE;
4226 if (gimple_omp_for_combined_p (fd->for_stmt))
4228 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4229 ? gimple_omp_parallel_clauses (inner_stmt)
4230 : gimple_omp_for_clauses (inner_stmt);
4231 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4232 gcc_assert (innerc);
4233 startvar = OMP_CLAUSE_DECL (innerc);
4234 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4235 OMP_CLAUSE__LOOPTEMP_);
4236 gcc_assert (innerc);
4237 endvar = OMP_CLAUSE_DECL (innerc);
4238 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4239 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4241 int i;
4242 for (i = 1; i < fd->collapse; i++)
4244 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4245 OMP_CLAUSE__LOOPTEMP_);
4246 gcc_assert (innerc);
4248 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4249 OMP_CLAUSE__LOOPTEMP_);
4250 if (innerc)
4252 /* If needed (distribute parallel for with lastprivate),
4253 propagate down the total number of iterations. */
4254 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4255 fd->loop.n2);
4256 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4257 GSI_CONTINUE_LINKING);
4258 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4259 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4264 t = fold_convert (itype, s0);
4265 t = fold_build2 (MULT_EXPR, itype, t, step);
4266 if (POINTER_TYPE_P (type))
4268 t = fold_build_pointer_plus (n1, t);
4269 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4270 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4271 t = fold_convert (signed_type_for (type), t);
4273 else
4274 t = fold_build2 (PLUS_EXPR, type, t, n1);
4275 t = fold_convert (TREE_TYPE (startvar), t);
4276 t = force_gimple_operand_gsi (&gsi, t,
4277 DECL_P (startvar)
4278 && TREE_ADDRESSABLE (startvar),
4279 NULL_TREE, false, GSI_CONTINUE_LINKING);
4280 assign_stmt = gimple_build_assign (startvar, t);
4281 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4283 t = fold_convert (itype, e0);
4284 t = fold_build2 (MULT_EXPR, itype, t, step);
4285 if (POINTER_TYPE_P (type))
4287 t = fold_build_pointer_plus (n1, t);
4288 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4289 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4290 t = fold_convert (signed_type_for (type), t);
4292 else
4293 t = fold_build2 (PLUS_EXPR, type, t, n1);
4294 t = fold_convert (TREE_TYPE (startvar), t);
4295 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4296 false, GSI_CONTINUE_LINKING);
4297 if (endvar)
4299 assign_stmt = gimple_build_assign (endvar, e);
4300 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4301 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4302 assign_stmt = gimple_build_assign (fd->loop.v, e);
4303 else
4304 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4305 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4307 /* Handle linear clause adjustments. */
4308 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4309 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4310 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4311 c; c = OMP_CLAUSE_CHAIN (c))
4312 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4313 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4315 tree d = OMP_CLAUSE_DECL (c);
4316 bool is_ref = omp_is_reference (d);
4317 tree t = d, a, dest;
4318 if (is_ref)
4319 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4320 tree type = TREE_TYPE (t);
4321 if (POINTER_TYPE_P (type))
4322 type = sizetype;
4323 dest = unshare_expr (t);
4324 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4325 expand_omp_build_assign (&gsif, v, t);
4326 if (itercnt == NULL_TREE)
4328 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4330 itercntbias
4331 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4332 fold_convert (itype, fd->loop.n1));
4333 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4334 itercntbias, step);
4335 itercntbias
4336 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4337 NULL_TREE, true,
4338 GSI_SAME_STMT);
4339 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4340 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4341 NULL_TREE, false,
4342 GSI_CONTINUE_LINKING);
4344 else
4345 itercnt = s0;
4347 a = fold_build2 (MULT_EXPR, type,
4348 fold_convert (type, itercnt),
4349 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4350 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4351 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4352 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4353 false, GSI_CONTINUE_LINKING);
4354 assign_stmt = gimple_build_assign (dest, t);
4355 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4357 if (fd->collapse > 1)
4358 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4360 if (!broken_loop)
4362 /* The code controlling the sequential loop goes in CONT_BB,
4363 replacing the GIMPLE_OMP_CONTINUE. */
4364 gsi = gsi_last_nondebug_bb (cont_bb);
4365 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4366 vmain = gimple_omp_continue_control_use (cont_stmt);
4367 vback = gimple_omp_continue_control_def (cont_stmt);
4369 if (!gimple_omp_for_combined_p (fd->for_stmt))
4371 if (POINTER_TYPE_P (type))
4372 t = fold_build_pointer_plus (vmain, step);
4373 else
4374 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4375 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4376 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4377 true, GSI_SAME_STMT);
4378 assign_stmt = gimple_build_assign (vback, t);
4379 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4381 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4382 t = build2 (EQ_EXPR, boolean_type_node,
4383 build_int_cst (itype, 0),
4384 build_int_cst (itype, 1));
4385 else
4386 t = build2 (fd->loop.cond_code, boolean_type_node,
4387 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4388 ? t : vback, e);
4389 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4392 /* Remove GIMPLE_OMP_CONTINUE. */
4393 gsi_remove (&gsi, true);
4395 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4396 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4398 /* Trip update code goes into TRIP_UPDATE_BB. */
4399 gsi = gsi_start_bb (trip_update_bb);
4401 t = build_int_cst (itype, 1);
4402 t = build2 (PLUS_EXPR, itype, trip_main, t);
4403 assign_stmt = gimple_build_assign (trip_back, t);
4404 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4407 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4408 gsi = gsi_last_nondebug_bb (exit_bb);
4409 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4411 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4412 if (fd->have_reductemp)
4414 tree fn;
4415 if (t)
4416 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4417 else
4418 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4419 gcall *g = gimple_build_call (fn, 0);
4420 if (t)
4422 gimple_call_set_lhs (g, t);
4423 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4424 NOP_EXPR, t),
4425 GSI_SAME_STMT);
4427 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4429 else
4430 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4432 gsi_remove (&gsi, true);
4434 /* Connect the new blocks. */
4435 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4436 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4438 if (!broken_loop)
4440 se = find_edge (cont_bb, body_bb);
4441 if (se == NULL)
4443 se = BRANCH_EDGE (cont_bb);
4444 gcc_assert (single_succ (se->dest) == body_bb);
4446 if (gimple_omp_for_combined_p (fd->for_stmt))
4448 remove_edge (se);
4449 se = NULL;
4451 else if (fd->collapse > 1)
4453 remove_edge (se);
4454 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4456 else
4457 se->flags = EDGE_TRUE_VALUE;
4458 find_edge (cont_bb, trip_update_bb)->flags
4459 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4461 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4462 iter_part_bb);
4465 if (gimple_in_ssa_p (cfun))
4467 gphi_iterator psi;
4468 gphi *phi;
4469 edge re, ene;
4470 edge_var_map *vm;
4471 size_t i;
4473 gcc_assert (fd->collapse == 1 && !broken_loop);
4475 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4476 remove arguments of the phi nodes in fin_bb. We need to create
4477 appropriate phi nodes in iter_part_bb instead. */
4478 se = find_edge (iter_part_bb, fin_bb);
4479 re = single_succ_edge (trip_update_bb);
4480 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4481 ene = single_succ_edge (entry_bb);
4483 psi = gsi_start_phis (fin_bb);
4484 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4485 gsi_next (&psi), ++i)
4487 gphi *nphi;
4488 location_t locus;
4490 phi = psi.phi ();
4491 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4492 redirect_edge_var_map_def (vm), 0))
4493 continue;
4495 t = gimple_phi_result (phi);
4496 gcc_assert (t == redirect_edge_var_map_result (vm));
4498 if (!single_pred_p (fin_bb))
4499 t = copy_ssa_name (t, phi);
4501 nphi = create_phi_node (t, iter_part_bb);
4503 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4504 locus = gimple_phi_arg_location_from_edge (phi, se);
4506 /* A special case -- fd->loop.v is not yet computed in
4507 iter_part_bb, we need to use vextra instead. */
4508 if (t == fd->loop.v)
4509 t = vextra;
4510 add_phi_arg (nphi, t, ene, locus);
4511 locus = redirect_edge_var_map_location (vm);
4512 tree back_arg = redirect_edge_var_map_def (vm);
4513 add_phi_arg (nphi, back_arg, re, locus);
4514 edge ce = find_edge (cont_bb, body_bb);
4515 if (ce == NULL)
4517 ce = BRANCH_EDGE (cont_bb);
4518 gcc_assert (single_succ (ce->dest) == body_bb);
4519 ce = single_succ_edge (ce->dest);
4521 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4522 gcc_assert (inner_loop_phi != NULL);
4523 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4524 find_edge (seq_start_bb, body_bb), locus);
4526 if (!single_pred_p (fin_bb))
4527 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4529 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4530 redirect_edge_var_map_clear (re);
4531 if (single_pred_p (fin_bb))
4532 while (1)
4534 psi = gsi_start_phis (fin_bb);
4535 if (gsi_end_p (psi))
4536 break;
4537 remove_phi_node (&psi, false);
4540 /* Make phi node for trip. */
4541 phi = create_phi_node (trip_main, iter_part_bb);
4542 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4543 UNKNOWN_LOCATION);
4544 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4545 UNKNOWN_LOCATION);
4548 if (!broken_loop)
4549 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4550 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4551 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4552 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4553 recompute_dominator (CDI_DOMINATORS, fin_bb));
4554 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4555 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4556 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4557 recompute_dominator (CDI_DOMINATORS, body_bb));
4559 if (!broken_loop)
4561 struct loop *loop = body_bb->loop_father;
4562 struct loop *trip_loop = alloc_loop ();
4563 trip_loop->header = iter_part_bb;
4564 trip_loop->latch = trip_update_bb;
4565 add_loop (trip_loop, iter_part_bb->loop_father);
4567 if (loop != entry_bb->loop_father)
4569 gcc_assert (loop->header == body_bb);
4570 gcc_assert (loop->latch == region->cont
4571 || single_pred (loop->latch) == region->cont);
4572 trip_loop->inner = loop;
4573 return;
4576 if (!gimple_omp_for_combined_p (fd->for_stmt))
4578 loop = alloc_loop ();
4579 loop->header = body_bb;
4580 if (collapse_bb == NULL)
4581 loop->latch = cont_bb;
4582 add_loop (loop, trip_loop);
4587 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4588 loop. Given parameters:
4590 for (V = N1; V cond N2; V += STEP) BODY;
4592 where COND is "<" or ">", we generate pseudocode
4594 V = N1;
4595 goto L1;
4597 BODY;
4598 V += STEP;
4600 if (V cond N2) goto L0; else goto L2;
4603 For collapsed loops, given parameters:
4604 collapse(3)
4605 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4606 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4607 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4608 BODY;
4610 we generate pseudocode
4612 if (cond3 is <)
4613 adj = STEP3 - 1;
4614 else
4615 adj = STEP3 + 1;
4616 count3 = (adj + N32 - N31) / STEP3;
4617 if (cond2 is <)
4618 adj = STEP2 - 1;
4619 else
4620 adj = STEP2 + 1;
4621 count2 = (adj + N22 - N21) / STEP2;
4622 if (cond1 is <)
4623 adj = STEP1 - 1;
4624 else
4625 adj = STEP1 + 1;
4626 count1 = (adj + N12 - N11) / STEP1;
4627 count = count1 * count2 * count3;
4628 V = 0;
4629 V1 = N11;
4630 V2 = N21;
4631 V3 = N31;
4632 goto L1;
4634 BODY;
4635 V += 1;
4636 V3 += STEP3;
4637 V2 += (V3 cond3 N32) ? 0 : STEP2;
4638 V3 = (V3 cond3 N32) ? V3 : N31;
4639 V1 += (V2 cond2 N22) ? 0 : STEP1;
4640 V2 = (V2 cond2 N22) ? V2 : N21;
4642 if (V < count) goto L0; else goto L2;
4647 static void
4648 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4650 tree type, t;
4651 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4652 gimple_stmt_iterator gsi;
4653 gimple *stmt;
4654 gcond *cond_stmt;
4655 bool broken_loop = region->cont == NULL;
4656 edge e, ne;
4657 tree *counts = NULL;
4658 int i;
4659 int safelen_int = INT_MAX;
4660 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4661 OMP_CLAUSE_SAFELEN);
4662 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4663 OMP_CLAUSE__SIMDUID_);
4664 tree n1, n2;
4666 if (safelen)
4668 poly_uint64 val;
4669 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4670 if (!poly_int_tree_p (safelen, &val))
4671 safelen_int = 0;
4672 else
4673 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4674 if (safelen_int == 1)
4675 safelen_int = 0;
4677 type = TREE_TYPE (fd->loop.v);
4678 entry_bb = region->entry;
4679 cont_bb = region->cont;
4680 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4681 gcc_assert (broken_loop
4682 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4683 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4684 if (!broken_loop)
4686 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4687 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4688 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4689 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4691 else
4693 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4694 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4695 l2_bb = single_succ (l1_bb);
4697 exit_bb = region->exit;
4698 l2_dom_bb = NULL;
4700 gsi = gsi_last_nondebug_bb (entry_bb);
4702 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4703 /* Not needed in SSA form right now. */
4704 gcc_assert (!gimple_in_ssa_p (cfun));
4705 if (fd->collapse > 1)
4707 int first_zero_iter = -1, dummy = -1;
4708 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4710 counts = XALLOCAVEC (tree, fd->collapse);
4711 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4712 zero_iter_bb, first_zero_iter,
4713 dummy_bb, dummy, l2_dom_bb);
4715 if (l2_dom_bb == NULL)
4716 l2_dom_bb = l1_bb;
4718 n1 = fd->loop.n1;
4719 n2 = fd->loop.n2;
4720 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4722 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4723 OMP_CLAUSE__LOOPTEMP_);
4724 gcc_assert (innerc);
4725 n1 = OMP_CLAUSE_DECL (innerc);
4726 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4727 OMP_CLAUSE__LOOPTEMP_);
4728 gcc_assert (innerc);
4729 n2 = OMP_CLAUSE_DECL (innerc);
4731 tree step = fd->loop.step;
4733 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4734 OMP_CLAUSE__SIMT_);
4735 if (is_simt)
4737 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4738 is_simt = safelen_int > 1;
4740 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4741 if (is_simt)
4743 simt_lane = create_tmp_var (unsigned_type_node);
4744 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4745 gimple_call_set_lhs (g, simt_lane);
4746 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4747 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4748 fold_convert (TREE_TYPE (step), simt_lane));
4749 n1 = fold_convert (type, n1);
4750 if (POINTER_TYPE_P (type))
4751 n1 = fold_build_pointer_plus (n1, offset);
4752 else
4753 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4755 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4756 if (fd->collapse > 1)
4757 simt_maxlane = build_one_cst (unsigned_type_node);
4758 else if (safelen_int < omp_max_simt_vf ())
4759 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4760 tree vf
4761 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4762 unsigned_type_node, 0);
4763 if (simt_maxlane)
4764 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4765 vf = fold_convert (TREE_TYPE (step), vf);
4766 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4769 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4770 if (fd->collapse > 1)
4772 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4774 gsi_prev (&gsi);
4775 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4776 gsi_next (&gsi);
4778 else
4779 for (i = 0; i < fd->collapse; i++)
4781 tree itype = TREE_TYPE (fd->loops[i].v);
4782 if (POINTER_TYPE_P (itype))
4783 itype = signed_type_for (itype);
4784 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4785 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4789 /* Remove the GIMPLE_OMP_FOR statement. */
4790 gsi_remove (&gsi, true);
4792 if (!broken_loop)
4794 /* Code to control the increment goes in the CONT_BB. */
4795 gsi = gsi_last_nondebug_bb (cont_bb);
4796 stmt = gsi_stmt (gsi);
4797 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4799 if (POINTER_TYPE_P (type))
4800 t = fold_build_pointer_plus (fd->loop.v, step);
4801 else
4802 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4803 expand_omp_build_assign (&gsi, fd->loop.v, t);
4805 if (fd->collapse > 1)
4807 i = fd->collapse - 1;
4808 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4810 t = fold_convert (sizetype, fd->loops[i].step);
4811 t = fold_build_pointer_plus (fd->loops[i].v, t);
4813 else
4815 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4816 fd->loops[i].step);
4817 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4818 fd->loops[i].v, t);
4820 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4822 for (i = fd->collapse - 1; i > 0; i--)
4824 tree itype = TREE_TYPE (fd->loops[i].v);
4825 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4826 if (POINTER_TYPE_P (itype2))
4827 itype2 = signed_type_for (itype2);
4828 t = fold_convert (itype2, fd->loops[i - 1].step);
4829 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4830 GSI_SAME_STMT);
4831 t = build3 (COND_EXPR, itype2,
4832 build2 (fd->loops[i].cond_code, boolean_type_node,
4833 fd->loops[i].v,
4834 fold_convert (itype, fd->loops[i].n2)),
4835 build_int_cst (itype2, 0), t);
4836 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4837 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4838 else
4839 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4840 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4842 t = fold_convert (itype, fd->loops[i].n1);
4843 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4844 GSI_SAME_STMT);
4845 t = build3 (COND_EXPR, itype,
4846 build2 (fd->loops[i].cond_code, boolean_type_node,
4847 fd->loops[i].v,
4848 fold_convert (itype, fd->loops[i].n2)),
4849 fd->loops[i].v, t);
4850 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4854 /* Remove GIMPLE_OMP_CONTINUE. */
4855 gsi_remove (&gsi, true);
4858 /* Emit the condition in L1_BB. */
4859 gsi = gsi_start_bb (l1_bb);
4861 t = fold_convert (type, n2);
4862 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4863 false, GSI_CONTINUE_LINKING);
4864 tree v = fd->loop.v;
4865 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4866 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4867 false, GSI_CONTINUE_LINKING);
4868 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4869 cond_stmt = gimple_build_cond_empty (t);
4870 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4871 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4872 NULL, NULL)
4873 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4874 NULL, NULL))
4876 gsi = gsi_for_stmt (cond_stmt);
4877 gimple_regimplify_operands (cond_stmt, &gsi);
4880 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4881 if (is_simt)
4883 gsi = gsi_start_bb (l2_bb);
4884 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4885 if (POINTER_TYPE_P (type))
4886 t = fold_build_pointer_plus (fd->loop.v, step);
4887 else
4888 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4889 expand_omp_build_assign (&gsi, fd->loop.v, t);
4892 /* Remove GIMPLE_OMP_RETURN. */
4893 gsi = gsi_last_nondebug_bb (exit_bb);
4894 gsi_remove (&gsi, true);
4896 /* Connect the new blocks. */
4897 remove_edge (FALLTHRU_EDGE (entry_bb));
4899 if (!broken_loop)
4901 remove_edge (BRANCH_EDGE (entry_bb));
4902 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4904 e = BRANCH_EDGE (l1_bb);
4905 ne = FALLTHRU_EDGE (l1_bb);
4906 e->flags = EDGE_TRUE_VALUE;
4908 else
4910 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4912 ne = single_succ_edge (l1_bb);
4913 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4916 ne->flags = EDGE_FALSE_VALUE;
4917 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4918 ne->probability = e->probability.invert ();
4920 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4921 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4923 if (simt_maxlane)
4925 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4926 NULL_TREE, NULL_TREE);
4927 gsi = gsi_last_bb (entry_bb);
4928 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4929 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4930 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4931 FALLTHRU_EDGE (entry_bb)->probability
4932 = profile_probability::guessed_always ().apply_scale (7, 8);
4933 BRANCH_EDGE (entry_bb)->probability
4934 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4935 l2_dom_bb = entry_bb;
4937 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4939 if (!broken_loop)
4941 struct loop *loop = alloc_loop ();
4942 loop->header = l1_bb;
4943 loop->latch = cont_bb;
4944 add_loop (loop, l1_bb->loop_father);
4945 loop->safelen = safelen_int;
4946 if (simduid)
4948 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4949 cfun->has_simduid_loops = true;
4951 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4952 the loop. */
4953 if ((flag_tree_loop_vectorize
4954 || !global_options_set.x_flag_tree_loop_vectorize)
4955 && flag_tree_loop_optimize
4956 && loop->safelen > 1)
4958 loop->force_vectorize = true;
4959 cfun->has_force_vectorize_loops = true;
4962 else if (simduid)
4963 cfun->has_simduid_loops = true;
4966 /* Taskloop construct is represented after gimplification with
4967 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4968 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4969 which should just compute all the needed loop temporaries
4970 for GIMPLE_OMP_TASK. */
4972 static void
4973 expand_omp_taskloop_for_outer (struct omp_region *region,
4974 struct omp_for_data *fd,
4975 gimple *inner_stmt)
4977 tree type, bias = NULL_TREE;
4978 basic_block entry_bb, cont_bb, exit_bb;
4979 gimple_stmt_iterator gsi;
4980 gassign *assign_stmt;
4981 tree *counts = NULL;
4982 int i;
4984 gcc_assert (inner_stmt);
4985 gcc_assert (region->cont);
4986 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4987 && gimple_omp_task_taskloop_p (inner_stmt));
4988 type = TREE_TYPE (fd->loop.v);
4990 /* See if we need to bias by LLONG_MIN. */
4991 if (fd->iter_type == long_long_unsigned_type_node
4992 && TREE_CODE (type) == INTEGER_TYPE
4993 && !TYPE_UNSIGNED (type))
4995 tree n1, n2;
4997 if (fd->loop.cond_code == LT_EXPR)
4999 n1 = fd->loop.n1;
5000 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5002 else
5004 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5005 n2 = fd->loop.n1;
5007 if (TREE_CODE (n1) != INTEGER_CST
5008 || TREE_CODE (n2) != INTEGER_CST
5009 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5010 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5013 entry_bb = region->entry;
5014 cont_bb = region->cont;
5015 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5016 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5017 exit_bb = region->exit;
5019 gsi = gsi_last_nondebug_bb (entry_bb);
5020 gimple *for_stmt = gsi_stmt (gsi);
5021 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5022 if (fd->collapse > 1)
5024 int first_zero_iter = -1, dummy = -1;
5025 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5027 counts = XALLOCAVEC (tree, fd->collapse);
5028 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5029 zero_iter_bb, first_zero_iter,
5030 dummy_bb, dummy, l2_dom_bb);
5032 if (zero_iter_bb)
5034 /* Some counts[i] vars might be uninitialized if
5035 some loop has zero iterations. But the body shouldn't
5036 be executed in that case, so just avoid uninit warnings. */
5037 for (i = first_zero_iter; i < fd->collapse; i++)
5038 if (SSA_VAR_P (counts[i]))
5039 TREE_NO_WARNING (counts[i]) = 1;
5040 gsi_prev (&gsi);
5041 edge e = split_block (entry_bb, gsi_stmt (gsi));
5042 entry_bb = e->dest;
5043 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5044 gsi = gsi_last_bb (entry_bb);
5045 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5046 get_immediate_dominator (CDI_DOMINATORS,
5047 zero_iter_bb));
5051 tree t0, t1;
5052 t1 = fd->loop.n2;
5053 t0 = fd->loop.n1;
5054 if (POINTER_TYPE_P (TREE_TYPE (t0))
5055 && TYPE_PRECISION (TREE_TYPE (t0))
5056 != TYPE_PRECISION (fd->iter_type))
5058 /* Avoid casting pointers to integer of a different size. */
5059 tree itype = signed_type_for (type);
5060 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5061 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5063 else
5065 t1 = fold_convert (fd->iter_type, t1);
5066 t0 = fold_convert (fd->iter_type, t0);
5068 if (bias)
5070 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5071 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5074 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5075 OMP_CLAUSE__LOOPTEMP_);
5076 gcc_assert (innerc);
5077 tree startvar = OMP_CLAUSE_DECL (innerc);
5078 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5079 gcc_assert (innerc);
5080 tree endvar = OMP_CLAUSE_DECL (innerc);
5081 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5083 gcc_assert (innerc);
5084 for (i = 1; i < fd->collapse; i++)
5086 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5087 OMP_CLAUSE__LOOPTEMP_);
5088 gcc_assert (innerc);
5090 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5091 OMP_CLAUSE__LOOPTEMP_);
5092 if (innerc)
5094 /* If needed (inner taskloop has lastprivate clause), propagate
5095 down the total number of iterations. */
5096 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5097 NULL_TREE, false,
5098 GSI_CONTINUE_LINKING);
5099 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5100 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5104 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5105 GSI_CONTINUE_LINKING);
5106 assign_stmt = gimple_build_assign (startvar, t0);
5107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5109 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5110 GSI_CONTINUE_LINKING);
5111 assign_stmt = gimple_build_assign (endvar, t1);
5112 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5113 if (fd->collapse > 1)
5114 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5116 /* Remove the GIMPLE_OMP_FOR statement. */
5117 gsi = gsi_for_stmt (for_stmt);
5118 gsi_remove (&gsi, true);
5120 gsi = gsi_last_nondebug_bb (cont_bb);
5121 gsi_remove (&gsi, true);
5123 gsi = gsi_last_nondebug_bb (exit_bb);
5124 gsi_remove (&gsi, true);
5126 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5127 remove_edge (BRANCH_EDGE (entry_bb));
5128 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5129 remove_edge (BRANCH_EDGE (cont_bb));
5130 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5131 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5132 recompute_dominator (CDI_DOMINATORS, region->entry));
5135 /* Taskloop construct is represented after gimplification with
5136 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5137 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5138 GOMP_taskloop{,_ull} function arranges for each task to be given just
5139 a single range of iterations. */
5141 static void
5142 expand_omp_taskloop_for_inner (struct omp_region *region,
5143 struct omp_for_data *fd,
5144 gimple *inner_stmt)
5146 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5147 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5148 basic_block fin_bb;
5149 gimple_stmt_iterator gsi;
5150 edge ep;
5151 bool broken_loop = region->cont == NULL;
5152 tree *counts = NULL;
5153 tree n1, n2, step;
5155 itype = type = TREE_TYPE (fd->loop.v);
5156 if (POINTER_TYPE_P (type))
5157 itype = signed_type_for (type);
5159 /* See if we need to bias by LLONG_MIN. */
5160 if (fd->iter_type == long_long_unsigned_type_node
5161 && TREE_CODE (type) == INTEGER_TYPE
5162 && !TYPE_UNSIGNED (type))
5164 tree n1, n2;
5166 if (fd->loop.cond_code == LT_EXPR)
5168 n1 = fd->loop.n1;
5169 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5171 else
5173 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5174 n2 = fd->loop.n1;
5176 if (TREE_CODE (n1) != INTEGER_CST
5177 || TREE_CODE (n2) != INTEGER_CST
5178 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5179 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5182 entry_bb = region->entry;
5183 cont_bb = region->cont;
5184 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5185 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5186 gcc_assert (broken_loop
5187 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5188 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5189 if (!broken_loop)
5191 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5192 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5194 exit_bb = region->exit;
5196 /* Iteration space partitioning goes in ENTRY_BB. */
5197 gsi = gsi_last_nondebug_bb (entry_bb);
5198 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5200 if (fd->collapse > 1)
5202 int first_zero_iter = -1, dummy = -1;
5203 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5205 counts = XALLOCAVEC (tree, fd->collapse);
5206 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5207 fin_bb, first_zero_iter,
5208 dummy_bb, dummy, l2_dom_bb);
5209 t = NULL_TREE;
5211 else
5212 t = integer_one_node;
5214 step = fd->loop.step;
5215 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5216 OMP_CLAUSE__LOOPTEMP_);
5217 gcc_assert (innerc);
5218 n1 = OMP_CLAUSE_DECL (innerc);
5219 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5220 gcc_assert (innerc);
5221 n2 = OMP_CLAUSE_DECL (innerc);
5222 if (bias)
5224 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5225 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5227 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5228 true, NULL_TREE, true, GSI_SAME_STMT);
5229 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5230 true, NULL_TREE, true, GSI_SAME_STMT);
5231 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5232 true, NULL_TREE, true, GSI_SAME_STMT);
5234 tree startvar = fd->loop.v;
5235 tree endvar = NULL_TREE;
5237 if (gimple_omp_for_combined_p (fd->for_stmt))
5239 tree clauses = gimple_omp_for_clauses (inner_stmt);
5240 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5241 gcc_assert (innerc);
5242 startvar = OMP_CLAUSE_DECL (innerc);
5243 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5244 OMP_CLAUSE__LOOPTEMP_);
5245 gcc_assert (innerc);
5246 endvar = OMP_CLAUSE_DECL (innerc);
5248 t = fold_convert (TREE_TYPE (startvar), n1);
5249 t = force_gimple_operand_gsi (&gsi, t,
5250 DECL_P (startvar)
5251 && TREE_ADDRESSABLE (startvar),
5252 NULL_TREE, false, GSI_CONTINUE_LINKING);
5253 gimple *assign_stmt = gimple_build_assign (startvar, t);
5254 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5256 t = fold_convert (TREE_TYPE (startvar), n2);
5257 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5258 false, GSI_CONTINUE_LINKING);
5259 if (endvar)
5261 assign_stmt = gimple_build_assign (endvar, e);
5262 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5263 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5264 assign_stmt = gimple_build_assign (fd->loop.v, e);
5265 else
5266 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5267 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5269 if (fd->collapse > 1)
5270 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5272 if (!broken_loop)
5274 /* The code controlling the sequential loop replaces the
5275 GIMPLE_OMP_CONTINUE. */
5276 gsi = gsi_last_nondebug_bb (cont_bb);
5277 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5278 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5279 vmain = gimple_omp_continue_control_use (cont_stmt);
5280 vback = gimple_omp_continue_control_def (cont_stmt);
5282 if (!gimple_omp_for_combined_p (fd->for_stmt))
5284 if (POINTER_TYPE_P (type))
5285 t = fold_build_pointer_plus (vmain, step);
5286 else
5287 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5288 t = force_gimple_operand_gsi (&gsi, t,
5289 DECL_P (vback)
5290 && TREE_ADDRESSABLE (vback),
5291 NULL_TREE, true, GSI_SAME_STMT);
5292 assign_stmt = gimple_build_assign (vback, t);
5293 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5295 t = build2 (fd->loop.cond_code, boolean_type_node,
5296 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5297 ? t : vback, e);
5298 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5301 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5302 gsi_remove (&gsi, true);
5304 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5305 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5308 /* Remove the GIMPLE_OMP_FOR statement. */
5309 gsi = gsi_for_stmt (fd->for_stmt);
5310 gsi_remove (&gsi, true);
5312 /* Remove the GIMPLE_OMP_RETURN statement. */
5313 gsi = gsi_last_nondebug_bb (exit_bb);
5314 gsi_remove (&gsi, true);
5316 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5317 if (!broken_loop)
5318 remove_edge (BRANCH_EDGE (entry_bb));
5319 else
5321 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5322 region->outer->cont = NULL;
5325 /* Connect all the blocks. */
5326 if (!broken_loop)
5328 ep = find_edge (cont_bb, body_bb);
5329 if (gimple_omp_for_combined_p (fd->for_stmt))
5331 remove_edge (ep);
5332 ep = NULL;
5334 else if (fd->collapse > 1)
5336 remove_edge (ep);
5337 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5339 else
5340 ep->flags = EDGE_TRUE_VALUE;
5341 find_edge (cont_bb, fin_bb)->flags
5342 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5345 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5346 recompute_dominator (CDI_DOMINATORS, body_bb));
5347 if (!broken_loop)
5348 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5349 recompute_dominator (CDI_DOMINATORS, fin_bb));
5351 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5353 struct loop *loop = alloc_loop ();
5354 loop->header = body_bb;
5355 if (collapse_bb == NULL)
5356 loop->latch = cont_bb;
5357 add_loop (loop, body_bb->loop_father);
5361 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5362 partitioned loop. The lowering here is abstracted, in that the
5363 loop parameters are passed through internal functions, which are
5364 further lowered by oacc_device_lower, once we get to the target
5365 compiler. The loop is of the form:
5367 for (V = B; V LTGT E; V += S) {BODY}
5369 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5370 (constant 0 for no chunking) and we will have a GWV partitioning
5371 mask, specifying dimensions over which the loop is to be
5372 partitioned (see note below). We generate code that looks like
5373 (this ignores tiling):
5375 <entry_bb> [incoming FALL->body, BRANCH->exit]
5376 typedef signedintify (typeof (V)) T; // underlying signed integral type
5377 T range = E - B;
5378 T chunk_no = 0;
5379 T DIR = LTGT == '<' ? +1 : -1;
5380 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5381 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5383 <head_bb> [created by splitting end of entry_bb]
5384 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5385 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5386 if (!(offset LTGT bound)) goto bottom_bb;
5388 <body_bb> [incoming]
5389 V = B + offset;
5390 {BODY}
5392 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5393 offset += step;
5394 if (offset LTGT bound) goto body_bb; [*]
5396 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5397 chunk_no++;
5398 if (chunk < chunk_max) goto head_bb;
5400 <exit_bb> [incoming]
5401 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5403 [*] Needed if V live at end of loop. */
5405 static void
5406 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5408 tree v = fd->loop.v;
5409 enum tree_code cond_code = fd->loop.cond_code;
5410 enum tree_code plus_code = PLUS_EXPR;
5412 tree chunk_size = integer_minus_one_node;
5413 tree gwv = integer_zero_node;
5414 tree iter_type = TREE_TYPE (v);
5415 tree diff_type = iter_type;
5416 tree plus_type = iter_type;
5417 struct oacc_collapse *counts = NULL;
5419 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5420 == GF_OMP_FOR_KIND_OACC_LOOP);
5421 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5422 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5424 if (POINTER_TYPE_P (iter_type))
5426 plus_code = POINTER_PLUS_EXPR;
5427 plus_type = sizetype;
5429 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5430 diff_type = signed_type_for (diff_type);
5431 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5432 diff_type = integer_type_node;
5434 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5435 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5436 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5437 basic_block bottom_bb = NULL;
5439 /* entry_bb has two sucessors; the branch edge is to the exit
5440 block, fallthrough edge to body. */
5441 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5442 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5444 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5445 body_bb, or to a block whose only successor is the body_bb. Its
5446 fallthrough successor is the final block (same as the branch
5447 successor of the entry_bb). */
5448 if (cont_bb)
5450 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5451 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5453 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5454 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5456 else
5457 gcc_assert (!gimple_in_ssa_p (cfun));
5459 /* The exit block only has entry_bb and cont_bb as predecessors. */
5460 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5462 tree chunk_no;
5463 tree chunk_max = NULL_TREE;
5464 tree bound, offset;
5465 tree step = create_tmp_var (diff_type, ".step");
5466 bool up = cond_code == LT_EXPR;
5467 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5468 bool chunking = !gimple_in_ssa_p (cfun);
5469 bool negating;
5471 /* Tiling vars. */
5472 tree tile_size = NULL_TREE;
5473 tree element_s = NULL_TREE;
5474 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5475 basic_block elem_body_bb = NULL;
5476 basic_block elem_cont_bb = NULL;
5478 /* SSA instances. */
5479 tree offset_incr = NULL_TREE;
5480 tree offset_init = NULL_TREE;
5482 gimple_stmt_iterator gsi;
5483 gassign *ass;
5484 gcall *call;
5485 gimple *stmt;
5486 tree expr;
5487 location_t loc;
5488 edge split, be, fte;
5490 /* Split the end of entry_bb to create head_bb. */
5491 split = split_block (entry_bb, last_stmt (entry_bb));
5492 basic_block head_bb = split->dest;
5493 entry_bb = split->src;
5495 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5496 gsi = gsi_last_nondebug_bb (entry_bb);
5497 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5498 loc = gimple_location (for_stmt);
5500 if (gimple_in_ssa_p (cfun))
5502 offset_init = gimple_omp_for_index (for_stmt, 0);
5503 gcc_assert (integer_zerop (fd->loop.n1));
5504 /* The SSA parallelizer does gang parallelism. */
5505 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5508 if (fd->collapse > 1 || fd->tiling)
5510 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5511 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5512 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5513 TREE_TYPE (fd->loop.n2), loc);
5515 if (SSA_VAR_P (fd->loop.n2))
5517 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5518 true, GSI_SAME_STMT);
5519 ass = gimple_build_assign (fd->loop.n2, total);
5520 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5524 tree b = fd->loop.n1;
5525 tree e = fd->loop.n2;
5526 tree s = fd->loop.step;
5528 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5529 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5531 /* Convert the step, avoiding possible unsigned->signed overflow. */
5532 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5533 if (negating)
5534 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5535 s = fold_convert (diff_type, s);
5536 if (negating)
5537 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5538 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5540 if (!chunking)
5541 chunk_size = integer_zero_node;
5542 expr = fold_convert (diff_type, chunk_size);
5543 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5544 NULL_TREE, true, GSI_SAME_STMT);
5546 if (fd->tiling)
5548 /* Determine the tile size and element step,
5549 modify the outer loop step size. */
5550 tile_size = create_tmp_var (diff_type, ".tile_size");
5551 expr = build_int_cst (diff_type, 1);
5552 for (int ix = 0; ix < fd->collapse; ix++)
5553 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5554 expr = force_gimple_operand_gsi (&gsi, expr, true,
5555 NULL_TREE, true, GSI_SAME_STMT);
5556 ass = gimple_build_assign (tile_size, expr);
5557 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5559 element_s = create_tmp_var (diff_type, ".element_s");
5560 ass = gimple_build_assign (element_s, s);
5561 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5563 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5564 s = force_gimple_operand_gsi (&gsi, expr, true,
5565 NULL_TREE, true, GSI_SAME_STMT);
5568 /* Determine the range, avoiding possible unsigned->signed overflow. */
5569 negating = !up && TYPE_UNSIGNED (iter_type);
5570 expr = fold_build2 (MINUS_EXPR, plus_type,
5571 fold_convert (plus_type, negating ? b : e),
5572 fold_convert (plus_type, negating ? e : b));
5573 expr = fold_convert (diff_type, expr);
5574 if (negating)
5575 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5576 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5577 NULL_TREE, true, GSI_SAME_STMT);
5579 chunk_no = build_int_cst (diff_type, 0);
5580 if (chunking)
5582 gcc_assert (!gimple_in_ssa_p (cfun));
5584 expr = chunk_no;
5585 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5586 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5588 ass = gimple_build_assign (chunk_no, expr);
5589 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5591 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5592 build_int_cst (integer_type_node,
5593 IFN_GOACC_LOOP_CHUNKS),
5594 dir, range, s, chunk_size, gwv);
5595 gimple_call_set_lhs (call, chunk_max);
5596 gimple_set_location (call, loc);
5597 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5599 else
5600 chunk_size = chunk_no;
5602 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5603 build_int_cst (integer_type_node,
5604 IFN_GOACC_LOOP_STEP),
5605 dir, range, s, chunk_size, gwv);
5606 gimple_call_set_lhs (call, step);
5607 gimple_set_location (call, loc);
5608 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5610 /* Remove the GIMPLE_OMP_FOR. */
5611 gsi_remove (&gsi, true);
5613 /* Fixup edges from head_bb. */
5614 be = BRANCH_EDGE (head_bb);
5615 fte = FALLTHRU_EDGE (head_bb);
5616 be->flags |= EDGE_FALSE_VALUE;
5617 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5619 basic_block body_bb = fte->dest;
5621 if (gimple_in_ssa_p (cfun))
5623 gsi = gsi_last_nondebug_bb (cont_bb);
5624 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5626 offset = gimple_omp_continue_control_use (cont_stmt);
5627 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5629 else
5631 offset = create_tmp_var (diff_type, ".offset");
5632 offset_init = offset_incr = offset;
5634 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5636 /* Loop offset & bound go into head_bb. */
5637 gsi = gsi_start_bb (head_bb);
5639 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5640 build_int_cst (integer_type_node,
5641 IFN_GOACC_LOOP_OFFSET),
5642 dir, range, s,
5643 chunk_size, gwv, chunk_no);
5644 gimple_call_set_lhs (call, offset_init);
5645 gimple_set_location (call, loc);
5646 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5648 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5649 build_int_cst (integer_type_node,
5650 IFN_GOACC_LOOP_BOUND),
5651 dir, range, s,
5652 chunk_size, gwv, offset_init);
5653 gimple_call_set_lhs (call, bound);
5654 gimple_set_location (call, loc);
5655 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5657 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5658 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5659 GSI_CONTINUE_LINKING);
5661 /* V assignment goes into body_bb. */
5662 if (!gimple_in_ssa_p (cfun))
5664 gsi = gsi_start_bb (body_bb);
5666 expr = build2 (plus_code, iter_type, b,
5667 fold_convert (plus_type, offset));
5668 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5669 true, GSI_SAME_STMT);
5670 ass = gimple_build_assign (v, expr);
5671 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5673 if (fd->collapse > 1 || fd->tiling)
5674 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5676 if (fd->tiling)
5678 /* Determine the range of the element loop -- usually simply
5679 the tile_size, but could be smaller if the final
5680 iteration of the outer loop is a partial tile. */
5681 tree e_range = create_tmp_var (diff_type, ".e_range");
5683 expr = build2 (MIN_EXPR, diff_type,
5684 build2 (MINUS_EXPR, diff_type, bound, offset),
5685 build2 (MULT_EXPR, diff_type, tile_size,
5686 element_s));
5687 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5688 true, GSI_SAME_STMT);
5689 ass = gimple_build_assign (e_range, expr);
5690 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5692 /* Determine bound, offset & step of inner loop. */
5693 e_bound = create_tmp_var (diff_type, ".e_bound");
5694 e_offset = create_tmp_var (diff_type, ".e_offset");
5695 e_step = create_tmp_var (diff_type, ".e_step");
5697 /* Mark these as element loops. */
5698 tree t, e_gwv = integer_minus_one_node;
5699 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5701 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5702 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5703 element_s, chunk, e_gwv, chunk);
5704 gimple_call_set_lhs (call, e_offset);
5705 gimple_set_location (call, loc);
5706 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5708 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5709 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5710 element_s, chunk, e_gwv, e_offset);
5711 gimple_call_set_lhs (call, e_bound);
5712 gimple_set_location (call, loc);
5713 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5715 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5716 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5717 element_s, chunk, e_gwv);
5718 gimple_call_set_lhs (call, e_step);
5719 gimple_set_location (call, loc);
5720 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5722 /* Add test and split block. */
5723 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5724 stmt = gimple_build_cond_empty (expr);
5725 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5726 split = split_block (body_bb, stmt);
5727 elem_body_bb = split->dest;
5728 if (cont_bb == body_bb)
5729 cont_bb = elem_body_bb;
5730 body_bb = split->src;
5732 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5734 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5735 if (cont_bb == NULL)
5737 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5738 e->probability = profile_probability::even ();
5739 split->probability = profile_probability::even ();
5742 /* Initialize the user's loop vars. */
5743 gsi = gsi_start_bb (elem_body_bb);
5744 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5748 /* Loop increment goes into cont_bb. If this is not a loop, we
5749 will have spawned threads as if it was, and each one will
5750 execute one iteration. The specification is not explicit about
5751 whether such constructs are ill-formed or not, and they can
5752 occur, especially when noreturn routines are involved. */
5753 if (cont_bb)
5755 gsi = gsi_last_nondebug_bb (cont_bb);
5756 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5757 loc = gimple_location (cont_stmt);
5759 if (fd->tiling)
5761 /* Insert element loop increment and test. */
5762 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5763 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5764 true, GSI_SAME_STMT);
5765 ass = gimple_build_assign (e_offset, expr);
5766 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5767 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5769 stmt = gimple_build_cond_empty (expr);
5770 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5771 split = split_block (cont_bb, stmt);
5772 elem_cont_bb = split->src;
5773 cont_bb = split->dest;
5775 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5776 split->probability = profile_probability::unlikely ().guessed ();
5777 edge latch_edge
5778 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5779 latch_edge->probability = profile_probability::likely ().guessed ();
5781 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5782 skip_edge->probability = profile_probability::unlikely ().guessed ();
5783 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5784 loop_entry_edge->probability
5785 = profile_probability::likely ().guessed ();
5787 gsi = gsi_for_stmt (cont_stmt);
5790 /* Increment offset. */
5791 if (gimple_in_ssa_p (cfun))
5792 expr = build2 (plus_code, iter_type, offset,
5793 fold_convert (plus_type, step));
5794 else
5795 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5796 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5797 true, GSI_SAME_STMT);
5798 ass = gimple_build_assign (offset_incr, expr);
5799 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5800 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5801 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5803 /* Remove the GIMPLE_OMP_CONTINUE. */
5804 gsi_remove (&gsi, true);
5806 /* Fixup edges from cont_bb. */
5807 be = BRANCH_EDGE (cont_bb);
5808 fte = FALLTHRU_EDGE (cont_bb);
5809 be->flags |= EDGE_TRUE_VALUE;
5810 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5812 if (chunking)
5814 /* Split the beginning of exit_bb to make bottom_bb. We
5815 need to insert a nop at the start, because splitting is
5816 after a stmt, not before. */
5817 gsi = gsi_start_bb (exit_bb);
5818 stmt = gimple_build_nop ();
5819 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5820 split = split_block (exit_bb, stmt);
5821 bottom_bb = split->src;
5822 exit_bb = split->dest;
5823 gsi = gsi_last_bb (bottom_bb);
5825 /* Chunk increment and test goes into bottom_bb. */
5826 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5827 build_int_cst (diff_type, 1));
5828 ass = gimple_build_assign (chunk_no, expr);
5829 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5831 /* Chunk test at end of bottom_bb. */
5832 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5833 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5834 GSI_CONTINUE_LINKING);
5836 /* Fixup edges from bottom_bb. */
5837 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5838 split->probability = profile_probability::unlikely ().guessed ();
5839 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5840 latch_edge->probability = profile_probability::likely ().guessed ();
5844 gsi = gsi_last_nondebug_bb (exit_bb);
5845 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5846 loc = gimple_location (gsi_stmt (gsi));
5848 if (!gimple_in_ssa_p (cfun))
5850 /* Insert the final value of V, in case it is live. This is the
5851 value for the only thread that survives past the join. */
5852 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5853 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5854 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5855 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5856 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5857 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5858 true, GSI_SAME_STMT);
5859 ass = gimple_build_assign (v, expr);
5860 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5863 /* Remove the OMP_RETURN. */
5864 gsi_remove (&gsi, true);
5866 if (cont_bb)
5868 /* We now have one, two or three nested loops. Update the loop
5869 structures. */
5870 struct loop *parent = entry_bb->loop_father;
5871 struct loop *body = body_bb->loop_father;
5873 if (chunking)
5875 struct loop *chunk_loop = alloc_loop ();
5876 chunk_loop->header = head_bb;
5877 chunk_loop->latch = bottom_bb;
5878 add_loop (chunk_loop, parent);
5879 parent = chunk_loop;
5881 else if (parent != body)
5883 gcc_assert (body->header == body_bb);
5884 gcc_assert (body->latch == cont_bb
5885 || single_pred (body->latch) == cont_bb);
5886 parent = NULL;
5889 if (parent)
5891 struct loop *body_loop = alloc_loop ();
5892 body_loop->header = body_bb;
5893 body_loop->latch = cont_bb;
5894 add_loop (body_loop, parent);
5896 if (fd->tiling)
5898 /* Insert tiling's element loop. */
5899 struct loop *inner_loop = alloc_loop ();
5900 inner_loop->header = elem_body_bb;
5901 inner_loop->latch = elem_cont_bb;
5902 add_loop (inner_loop, body_loop);
5908 /* Expand the OMP loop defined by REGION. */
5910 static void
5911 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5913 struct omp_for_data fd;
5914 struct omp_for_data_loop *loops;
5916 loops
5917 = (struct omp_for_data_loop *)
5918 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5919 * sizeof (struct omp_for_data_loop));
5920 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5921 &fd, loops);
5922 region->sched_kind = fd.sched_kind;
5923 region->sched_modifiers = fd.sched_modifiers;
5925 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5926 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5927 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5928 if (region->cont)
5930 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5931 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5932 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5934 else
5935 /* If there isn't a continue then this is a degerate case where
5936 the introduction of abnormal edges during lowering will prevent
5937 original loops from being detected. Fix that up. */
5938 loops_state_set (LOOPS_NEED_FIXUP);
5940 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5941 expand_omp_simd (region, &fd);
5942 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5944 gcc_assert (!inner_stmt);
5945 expand_oacc_for (region, &fd);
5947 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5949 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5950 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5951 else
5952 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5954 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5955 && !fd.have_ordered)
5957 if (fd.chunk_size == NULL)
5958 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5959 else
5960 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5962 else
5964 int fn_index, start_ix, next_ix;
5965 unsigned HOST_WIDE_INT sched = 0;
5966 tree sched_arg = NULL_TREE;
5968 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5969 == GF_OMP_FOR_KIND_FOR);
5970 if (fd.chunk_size == NULL
5971 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5972 fd.chunk_size = integer_zero_node;
5973 switch (fd.sched_kind)
5975 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5976 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
5978 gcc_assert (!fd.have_ordered);
5979 fn_index = 6;
5980 sched = 4;
5982 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
5983 && !fd.have_ordered)
5984 fn_index = 7;
5985 else
5987 fn_index = 3;
5988 sched = (HOST_WIDE_INT_1U << 31);
5990 break;
5991 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5992 case OMP_CLAUSE_SCHEDULE_GUIDED:
5993 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
5994 && !fd.have_ordered)
5996 fn_index = 3 + fd.sched_kind;
5997 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
5998 break;
6000 fn_index = fd.sched_kind;
6001 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6002 sched += (HOST_WIDE_INT_1U << 31);
6003 break;
6004 case OMP_CLAUSE_SCHEDULE_STATIC:
6005 gcc_assert (fd.have_ordered);
6006 fn_index = 0;
6007 sched = (HOST_WIDE_INT_1U << 31) + 1;
6008 break;
6009 default:
6010 gcc_unreachable ();
6012 if (!fd.ordered)
6013 fn_index += fd.have_ordered * 8;
6014 if (fd.ordered)
6015 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6016 else
6017 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6018 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6019 if (fd.have_reductemp)
6021 if (fd.ordered)
6022 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6023 else if (fd.have_ordered)
6024 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6025 else
6026 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6027 sched_arg = build_int_cstu (long_integer_type_node, sched);
6028 if (!fd.chunk_size)
6029 fd.chunk_size = integer_zero_node;
6031 if (fd.iter_type == long_long_unsigned_type_node)
6033 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6034 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6035 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6036 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6038 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6039 (enum built_in_function) next_ix, sched_arg,
6040 inner_stmt);
6043 if (gimple_in_ssa_p (cfun))
6044 update_ssa (TODO_update_ssa_only_virtuals);
6047 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6049 v = GOMP_sections_start (n);
6051 switch (v)
6053 case 0:
6054 goto L2;
6055 case 1:
6056 section 1;
6057 goto L1;
6058 case 2:
6060 case n:
6062 default:
6063 abort ();
6066 v = GOMP_sections_next ();
6067 goto L0;
6069 reduction;
6071 If this is a combined parallel sections, replace the call to
6072 GOMP_sections_start with call to GOMP_sections_next. */
6074 static void
6075 expand_omp_sections (struct omp_region *region)
6077 tree t, u, vin = NULL, vmain, vnext, l2;
6078 unsigned len;
6079 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6080 gimple_stmt_iterator si, switch_si;
6081 gomp_sections *sections_stmt;
6082 gimple *stmt;
6083 gomp_continue *cont;
6084 edge_iterator ei;
6085 edge e;
6086 struct omp_region *inner;
6087 unsigned i, casei;
6088 bool exit_reachable = region->cont != NULL;
6090 gcc_assert (region->exit != NULL);
6091 entry_bb = region->entry;
6092 l0_bb = single_succ (entry_bb);
6093 l1_bb = region->cont;
6094 l2_bb = region->exit;
6095 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6096 l2 = gimple_block_label (l2_bb);
6097 else
6099 /* This can happen if there are reductions. */
6100 len = EDGE_COUNT (l0_bb->succs);
6101 gcc_assert (len > 0);
6102 e = EDGE_SUCC (l0_bb, len - 1);
6103 si = gsi_last_nondebug_bb (e->dest);
6104 l2 = NULL_TREE;
6105 if (gsi_end_p (si)
6106 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6107 l2 = gimple_block_label (e->dest);
6108 else
6109 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6111 si = gsi_last_nondebug_bb (e->dest);
6112 if (gsi_end_p (si)
6113 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6115 l2 = gimple_block_label (e->dest);
6116 break;
6120 if (exit_reachable)
6121 default_bb = create_empty_bb (l1_bb->prev_bb);
6122 else
6123 default_bb = create_empty_bb (l0_bb);
6125 /* We will build a switch() with enough cases for all the
6126 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6127 and a default case to abort if something goes wrong. */
6128 len = EDGE_COUNT (l0_bb->succs);
6130 /* Use vec::quick_push on label_vec throughout, since we know the size
6131 in advance. */
6132 auto_vec<tree> label_vec (len);
6134 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6135 GIMPLE_OMP_SECTIONS statement. */
6136 si = gsi_last_nondebug_bb (entry_bb);
6137 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6138 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6139 vin = gimple_omp_sections_control (sections_stmt);
6140 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6141 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6142 if (reductmp)
6144 tree reductions = OMP_CLAUSE_DECL (reductmp);
6145 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6146 gimple *g = SSA_NAME_DEF_STMT (reductions);
6147 reductions = gimple_assign_rhs1 (g);
6148 OMP_CLAUSE_DECL (reductmp) = reductions;
6149 gimple_stmt_iterator gsi = gsi_for_stmt (g);
6150 t = build_int_cst (unsigned_type_node, len - 1);
6151 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6152 stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
6153 gimple_call_set_lhs (stmt, vin);
6154 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6155 gsi_remove (&gsi, true);
6156 release_ssa_name (gimple_assign_lhs (g));
6158 else if (!is_combined_parallel (region))
6160 /* If we are not inside a combined parallel+sections region,
6161 call GOMP_sections_start. */
6162 t = build_int_cst (unsigned_type_node, len - 1);
6163 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6164 stmt = gimple_build_call (u, 1, t);
6166 else
6168 /* Otherwise, call GOMP_sections_next. */
6169 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6170 stmt = gimple_build_call (u, 0);
6172 if (!reductmp)
6174 gimple_call_set_lhs (stmt, vin);
6175 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6177 gsi_remove (&si, true);
6179 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6180 L0_BB. */
6181 switch_si = gsi_last_nondebug_bb (l0_bb);
6182 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6183 if (exit_reachable)
6185 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6186 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6187 vmain = gimple_omp_continue_control_use (cont);
6188 vnext = gimple_omp_continue_control_def (cont);
6190 else
6192 vmain = vin;
6193 vnext = NULL_TREE;
6196 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6197 label_vec.quick_push (t);
6198 i = 1;
6200 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6201 for (inner = region->inner, casei = 1;
6202 inner;
6203 inner = inner->next, i++, casei++)
6205 basic_block s_entry_bb, s_exit_bb;
6207 /* Skip optional reduction region. */
6208 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6210 --i;
6211 --casei;
6212 continue;
6215 s_entry_bb = inner->entry;
6216 s_exit_bb = inner->exit;
6218 t = gimple_block_label (s_entry_bb);
6219 u = build_int_cst (unsigned_type_node, casei);
6220 u = build_case_label (u, NULL, t);
6221 label_vec.quick_push (u);
6223 si = gsi_last_nondebug_bb (s_entry_bb);
6224 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6225 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6226 gsi_remove (&si, true);
6227 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6229 if (s_exit_bb == NULL)
6230 continue;
6232 si = gsi_last_nondebug_bb (s_exit_bb);
6233 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6234 gsi_remove (&si, true);
6236 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6239 /* Error handling code goes in DEFAULT_BB. */
6240 t = gimple_block_label (default_bb);
6241 u = build_case_label (NULL, NULL, t);
6242 make_edge (l0_bb, default_bb, 0);
6243 add_bb_to_loop (default_bb, current_loops->tree_root);
6245 stmt = gimple_build_switch (vmain, u, label_vec);
6246 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6247 gsi_remove (&switch_si, true);
6249 si = gsi_start_bb (default_bb);
6250 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6251 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6253 if (exit_reachable)
6255 tree bfn_decl;
6257 /* Code to get the next section goes in L1_BB. */
6258 si = gsi_last_nondebug_bb (l1_bb);
6259 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6261 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6262 stmt = gimple_build_call (bfn_decl, 0);
6263 gimple_call_set_lhs (stmt, vnext);
6264 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6265 gsi_remove (&si, true);
6267 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6270 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6271 si = gsi_last_nondebug_bb (l2_bb);
6272 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6273 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6274 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6275 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6276 else
6277 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6278 stmt = gimple_build_call (t, 0);
6279 if (gimple_omp_return_lhs (gsi_stmt (si)))
6280 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6281 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6282 gsi_remove (&si, true);
6284 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6287 /* Expand code for an OpenMP single directive. We've already expanded
6288 much of the code, here we simply place the GOMP_barrier call. */
6290 static void
6291 expand_omp_single (struct omp_region *region)
6293 basic_block entry_bb, exit_bb;
6294 gimple_stmt_iterator si;
6296 entry_bb = region->entry;
6297 exit_bb = region->exit;
6299 si = gsi_last_nondebug_bb (entry_bb);
6300 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6301 gsi_remove (&si, true);
6302 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6304 si = gsi_last_nondebug_bb (exit_bb);
6305 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6307 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6308 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6310 gsi_remove (&si, true);
6311 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6314 /* Generic expansion for OpenMP synchronization directives: master,
6315 ordered and critical. All we need to do here is remove the entry
6316 and exit markers for REGION. */
6318 static void
6319 expand_omp_synch (struct omp_region *region)
6321 basic_block entry_bb, exit_bb;
6322 gimple_stmt_iterator si;
6324 entry_bb = region->entry;
6325 exit_bb = region->exit;
6327 si = gsi_last_nondebug_bb (entry_bb);
6328 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6329 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6330 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6331 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6332 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6333 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6334 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6335 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6337 expand_omp_taskreg (region);
6338 return;
6340 gsi_remove (&si, true);
6341 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6343 if (exit_bb)
6345 si = gsi_last_nondebug_bb (exit_bb);
6346 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6347 gsi_remove (&si, true);
6348 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6352 /* Translate enum omp_memory_order to enum memmodel. The two enums
6353 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6354 is 0. */
6356 static enum memmodel
6357 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6359 switch (mo)
6361 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6362 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6363 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6364 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6365 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6366 default: gcc_unreachable ();
6370 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6371 operation as a normal volatile load. */
6373 static bool
6374 expand_omp_atomic_load (basic_block load_bb, tree addr,
6375 tree loaded_val, int index)
6377 enum built_in_function tmpbase;
6378 gimple_stmt_iterator gsi;
6379 basic_block store_bb;
6380 location_t loc;
6381 gimple *stmt;
6382 tree decl, call, type, itype;
6384 gsi = gsi_last_nondebug_bb (load_bb);
6385 stmt = gsi_stmt (gsi);
6386 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6387 loc = gimple_location (stmt);
6389 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6390 is smaller than word size, then expand_atomic_load assumes that the load
6391 is atomic. We could avoid the builtin entirely in this case. */
6393 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6394 decl = builtin_decl_explicit (tmpbase);
6395 if (decl == NULL_TREE)
6396 return false;
6398 type = TREE_TYPE (loaded_val);
6399 itype = TREE_TYPE (TREE_TYPE (decl));
6401 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6402 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6403 call = build_call_expr_loc (loc, decl, 2, addr, mo);
6404 if (!useless_type_conversion_p (type, itype))
6405 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6406 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6408 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6409 gsi_remove (&gsi, true);
6411 store_bb = single_succ (load_bb);
6412 gsi = gsi_last_nondebug_bb (store_bb);
6413 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6414 gsi_remove (&gsi, true);
6416 if (gimple_in_ssa_p (cfun))
6417 update_ssa (TODO_update_ssa_no_phi);
6419 return true;
6422 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6423 operation as a normal volatile store. */
6425 static bool
6426 expand_omp_atomic_store (basic_block load_bb, tree addr,
6427 tree loaded_val, tree stored_val, int index)
6429 enum built_in_function tmpbase;
6430 gimple_stmt_iterator gsi;
6431 basic_block store_bb = single_succ (load_bb);
6432 location_t loc;
6433 gimple *stmt;
6434 tree decl, call, type, itype;
6435 machine_mode imode;
6436 bool exchange;
6438 gsi = gsi_last_nondebug_bb (load_bb);
6439 stmt = gsi_stmt (gsi);
6440 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6442 /* If the load value is needed, then this isn't a store but an exchange. */
6443 exchange = gimple_omp_atomic_need_value_p (stmt);
6445 gsi = gsi_last_nondebug_bb (store_bb);
6446 stmt = gsi_stmt (gsi);
6447 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6448 loc = gimple_location (stmt);
6450 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6451 is smaller than word size, then expand_atomic_store assumes that the store
6452 is atomic. We could avoid the builtin entirely in this case. */
6454 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6455 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6456 decl = builtin_decl_explicit (tmpbase);
6457 if (decl == NULL_TREE)
6458 return false;
6460 type = TREE_TYPE (stored_val);
6462 /* Dig out the type of the function's second argument. */
6463 itype = TREE_TYPE (decl);
6464 itype = TYPE_ARG_TYPES (itype);
6465 itype = TREE_CHAIN (itype);
6466 itype = TREE_VALUE (itype);
6467 imode = TYPE_MODE (itype);
6469 if (exchange && !can_atomic_exchange_p (imode, true))
6470 return false;
6472 if (!useless_type_conversion_p (itype, type))
6473 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6474 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6475 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6476 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6477 if (exchange)
6479 if (!useless_type_conversion_p (type, itype))
6480 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6481 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6484 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6485 gsi_remove (&gsi, true);
6487 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6488 gsi = gsi_last_nondebug_bb (load_bb);
6489 gsi_remove (&gsi, true);
6491 if (gimple_in_ssa_p (cfun))
6492 update_ssa (TODO_update_ssa_no_phi);
6494 return true;
6497 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6498 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6499 size of the data type, and thus usable to find the index of the builtin
6500 decl. Returns false if the expression is not of the proper form. */
6502 static bool
6503 expand_omp_atomic_fetch_op (basic_block load_bb,
6504 tree addr, tree loaded_val,
6505 tree stored_val, int index)
6507 enum built_in_function oldbase, newbase, tmpbase;
6508 tree decl, itype, call;
6509 tree lhs, rhs;
6510 basic_block store_bb = single_succ (load_bb);
6511 gimple_stmt_iterator gsi;
6512 gimple *stmt;
6513 location_t loc;
6514 enum tree_code code;
6515 bool need_old, need_new;
6516 machine_mode imode;
6518 /* We expect to find the following sequences:
6520 load_bb:
6521 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6523 store_bb:
6524 val = tmp OP something; (or: something OP tmp)
6525 GIMPLE_OMP_STORE (val)
6527 ???FIXME: Allow a more flexible sequence.
6528 Perhaps use data flow to pick the statements.
6532 gsi = gsi_after_labels (store_bb);
6533 stmt = gsi_stmt (gsi);
6534 if (is_gimple_debug (stmt))
6536 gsi_next_nondebug (&gsi);
6537 if (gsi_end_p (gsi))
6538 return false;
6539 stmt = gsi_stmt (gsi);
6541 loc = gimple_location (stmt);
6542 if (!is_gimple_assign (stmt))
6543 return false;
6544 gsi_next_nondebug (&gsi);
6545 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6546 return false;
6547 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6548 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6549 enum omp_memory_order omo
6550 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6551 enum memmodel mo = omp_memory_order_to_memmodel (omo);
6552 gcc_checking_assert (!need_old || !need_new);
6554 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6555 return false;
6557 /* Check for one of the supported fetch-op operations. */
6558 code = gimple_assign_rhs_code (stmt);
6559 switch (code)
6561 case PLUS_EXPR:
6562 case POINTER_PLUS_EXPR:
6563 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6564 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6565 break;
6566 case MINUS_EXPR:
6567 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6568 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6569 break;
6570 case BIT_AND_EXPR:
6571 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6572 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6573 break;
6574 case BIT_IOR_EXPR:
6575 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6576 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6577 break;
6578 case BIT_XOR_EXPR:
6579 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6580 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6581 break;
6582 default:
6583 return false;
6586 /* Make sure the expression is of the proper form. */
6587 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6588 rhs = gimple_assign_rhs2 (stmt);
6589 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6590 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6591 rhs = gimple_assign_rhs1 (stmt);
6592 else
6593 return false;
6595 tmpbase = ((enum built_in_function)
6596 ((need_new ? newbase : oldbase) + index + 1));
6597 decl = builtin_decl_explicit (tmpbase);
6598 if (decl == NULL_TREE)
6599 return false;
6600 itype = TREE_TYPE (TREE_TYPE (decl));
6601 imode = TYPE_MODE (itype);
6603 /* We could test all of the various optabs involved, but the fact of the
6604 matter is that (with the exception of i486 vs i586 and xadd) all targets
6605 that support any atomic operaton optab also implements compare-and-swap.
6606 Let optabs.c take care of expanding any compare-and-swap loop. */
6607 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6608 return false;
6610 gsi = gsi_last_nondebug_bb (load_bb);
6611 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6613 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6614 It only requires that the operation happen atomically. Thus we can
6615 use the RELAXED memory model. */
6616 call = build_call_expr_loc (loc, decl, 3, addr,
6617 fold_convert_loc (loc, itype, rhs),
6618 build_int_cst (NULL, mo));
6620 if (need_old || need_new)
6622 lhs = need_old ? loaded_val : stored_val;
6623 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6624 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6626 else
6627 call = fold_convert_loc (loc, void_type_node, call);
6628 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6629 gsi_remove (&gsi, true);
6631 gsi = gsi_last_nondebug_bb (store_bb);
6632 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6633 gsi_remove (&gsi, true);
6634 gsi = gsi_last_nondebug_bb (store_bb);
6635 stmt = gsi_stmt (gsi);
6636 gsi_remove (&gsi, true);
6638 if (gimple_in_ssa_p (cfun))
6640 release_defs (stmt);
6641 update_ssa (TODO_update_ssa_no_phi);
6644 return true;
6647 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6649 oldval = *addr;
6650 repeat:
6651 newval = rhs; // with oldval replacing *addr in rhs
6652 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6653 if (oldval != newval)
6654 goto repeat;
6656 INDEX is log2 of the size of the data type, and thus usable to find the
6657 index of the builtin decl. */
6659 static bool
6660 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6661 tree addr, tree loaded_val, tree stored_val,
6662 int index)
6664 tree loadedi, storedi, initial, new_storedi, old_vali;
6665 tree type, itype, cmpxchg, iaddr, atype;
6666 gimple_stmt_iterator si;
6667 basic_block loop_header = single_succ (load_bb);
6668 gimple *phi, *stmt;
6669 edge e;
6670 enum built_in_function fncode;
6672 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6673 order to use the RELAXED memory model effectively. */
6674 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6675 + index + 1);
6676 cmpxchg = builtin_decl_explicit (fncode);
6677 if (cmpxchg == NULL_TREE)
6678 return false;
6679 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6680 atype = type;
6681 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6683 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6684 || !can_atomic_load_p (TYPE_MODE (itype)))
6685 return false;
6687 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6688 si = gsi_last_nondebug_bb (load_bb);
6689 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6691 /* For floating-point values, we'll need to view-convert them to integers
6692 so that we can perform the atomic compare and swap. Simplify the
6693 following code by always setting up the "i"ntegral variables. */
6694 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6696 tree iaddr_val;
6698 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6699 true));
6700 atype = itype;
6701 iaddr_val
6702 = force_gimple_operand_gsi (&si,
6703 fold_convert (TREE_TYPE (iaddr), addr),
6704 false, NULL_TREE, true, GSI_SAME_STMT);
6705 stmt = gimple_build_assign (iaddr, iaddr_val);
6706 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6707 loadedi = create_tmp_var (itype);
6708 if (gimple_in_ssa_p (cfun))
6709 loadedi = make_ssa_name (loadedi);
6711 else
6713 iaddr = addr;
6714 loadedi = loaded_val;
6717 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6718 tree loaddecl = builtin_decl_explicit (fncode);
6719 if (loaddecl)
6720 initial
6721 = fold_convert (atype,
6722 build_call_expr (loaddecl, 2, iaddr,
6723 build_int_cst (NULL_TREE,
6724 MEMMODEL_RELAXED)));
6725 else
6727 tree off
6728 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6729 true), 0);
6730 initial = build2 (MEM_REF, atype, iaddr, off);
6733 initial
6734 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6735 GSI_SAME_STMT);
6737 /* Move the value to the LOADEDI temporary. */
6738 if (gimple_in_ssa_p (cfun))
6740 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6741 phi = create_phi_node (loadedi, loop_header);
6742 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6743 initial);
6745 else
6746 gsi_insert_before (&si,
6747 gimple_build_assign (loadedi, initial),
6748 GSI_SAME_STMT);
6749 if (loadedi != loaded_val)
6751 gimple_stmt_iterator gsi2;
6752 tree x;
6754 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6755 gsi2 = gsi_start_bb (loop_header);
6756 if (gimple_in_ssa_p (cfun))
6758 gassign *stmt;
6759 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6760 true, GSI_SAME_STMT);
6761 stmt = gimple_build_assign (loaded_val, x);
6762 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6764 else
6766 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6767 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6768 true, GSI_SAME_STMT);
6771 gsi_remove (&si, true);
6773 si = gsi_last_nondebug_bb (store_bb);
6774 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6776 if (iaddr == addr)
6777 storedi = stored_val;
6778 else
6779 storedi
6780 = force_gimple_operand_gsi (&si,
6781 build1 (VIEW_CONVERT_EXPR, itype,
6782 stored_val), true, NULL_TREE, true,
6783 GSI_SAME_STMT);
6785 /* Build the compare&swap statement. */
6786 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6787 new_storedi = force_gimple_operand_gsi (&si,
6788 fold_convert (TREE_TYPE (loadedi),
6789 new_storedi),
6790 true, NULL_TREE,
6791 true, GSI_SAME_STMT);
6793 if (gimple_in_ssa_p (cfun))
6794 old_vali = loadedi;
6795 else
6797 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6798 stmt = gimple_build_assign (old_vali, loadedi);
6799 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6801 stmt = gimple_build_assign (loadedi, new_storedi);
6802 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6805 /* Note that we always perform the comparison as an integer, even for
6806 floating point. This allows the atomic operation to properly
6807 succeed even with NaNs and -0.0. */
6808 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6809 stmt = gimple_build_cond_empty (ne);
6810 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6812 /* Update cfg. */
6813 e = single_succ_edge (store_bb);
6814 e->flags &= ~EDGE_FALLTHRU;
6815 e->flags |= EDGE_FALSE_VALUE;
6816 /* Expect no looping. */
6817 e->probability = profile_probability::guessed_always ();
6819 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6820 e->probability = profile_probability::guessed_never ();
6822 /* Copy the new value to loadedi (we already did that before the condition
6823 if we are not in SSA). */
6824 if (gimple_in_ssa_p (cfun))
6826 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6827 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6830 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6831 gsi_remove (&si, true);
6833 struct loop *loop = alloc_loop ();
6834 loop->header = loop_header;
6835 loop->latch = store_bb;
6836 add_loop (loop, loop_header->loop_father);
6838 if (gimple_in_ssa_p (cfun))
6839 update_ssa (TODO_update_ssa_no_phi);
6841 return true;
6844 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6846 GOMP_atomic_start ();
6847 *addr = rhs;
6848 GOMP_atomic_end ();
6850 The result is not globally atomic, but works so long as all parallel
6851 references are within #pragma omp atomic directives. According to
6852 responses received from omp@openmp.org, appears to be within spec.
6853 Which makes sense, since that's how several other compilers handle
6854 this situation as well.
6855 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6856 expanding. STORED_VAL is the operand of the matching
6857 GIMPLE_OMP_ATOMIC_STORE.
6859 We replace
6860 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6861 loaded_val = *addr;
6863 and replace
6864 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6865 *addr = stored_val;
6868 static bool
6869 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6870 tree addr, tree loaded_val, tree stored_val)
6872 gimple_stmt_iterator si;
6873 gassign *stmt;
6874 tree t;
6876 si = gsi_last_nondebug_bb (load_bb);
6877 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6879 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6880 t = build_call_expr (t, 0);
6881 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6883 tree mem = build_simple_mem_ref (addr);
6884 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6885 TREE_OPERAND (mem, 1)
6886 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6887 true),
6888 TREE_OPERAND (mem, 1));
6889 stmt = gimple_build_assign (loaded_val, mem);
6890 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6891 gsi_remove (&si, true);
6893 si = gsi_last_nondebug_bb (store_bb);
6894 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6896 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6897 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6899 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6900 t = build_call_expr (t, 0);
6901 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6902 gsi_remove (&si, true);
6904 if (gimple_in_ssa_p (cfun))
6905 update_ssa (TODO_update_ssa_no_phi);
6906 return true;
6909 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6910 using expand_omp_atomic_fetch_op. If it failed, we try to
6911 call expand_omp_atomic_pipeline, and if it fails too, the
6912 ultimate fallback is wrapping the operation in a mutex
6913 (expand_omp_atomic_mutex). REGION is the atomic region built
6914 by build_omp_regions_1(). */
6916 static void
6917 expand_omp_atomic (struct omp_region *region)
6919 basic_block load_bb = region->entry, store_bb = region->exit;
6920 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6921 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6922 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6923 tree addr = gimple_omp_atomic_load_rhs (load);
6924 tree stored_val = gimple_omp_atomic_store_val (store);
6925 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6926 HOST_WIDE_INT index;
6928 /* Make sure the type is one of the supported sizes. */
6929 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6930 index = exact_log2 (index);
6931 if (index >= 0 && index <= 4)
6933 unsigned int align = TYPE_ALIGN_UNIT (type);
6935 /* __sync builtins require strict data alignment. */
6936 if (exact_log2 (align) >= index)
6938 /* Atomic load. */
6939 scalar_mode smode;
6940 if (loaded_val == stored_val
6941 && (is_int_mode (TYPE_MODE (type), &smode)
6942 || is_float_mode (TYPE_MODE (type), &smode))
6943 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6944 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6945 return;
6947 /* Atomic store. */
6948 if ((is_int_mode (TYPE_MODE (type), &smode)
6949 || is_float_mode (TYPE_MODE (type), &smode))
6950 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6951 && store_bb == single_succ (load_bb)
6952 && first_stmt (store_bb) == store
6953 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6954 stored_val, index))
6955 return;
6957 /* When possible, use specialized atomic update functions. */
6958 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6959 && store_bb == single_succ (load_bb)
6960 && expand_omp_atomic_fetch_op (load_bb, addr,
6961 loaded_val, stored_val, index))
6962 return;
6964 /* If we don't have specialized __sync builtins, try and implement
6965 as a compare and swap loop. */
6966 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6967 loaded_val, stored_val, index))
6968 return;
6972 /* The ultimate fallback is wrapping the operation in a mutex. */
6973 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6976 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6977 at REGION_EXIT. */
6979 static void
6980 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6981 basic_block region_exit)
6983 struct loop *outer = region_entry->loop_father;
6984 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6986 /* Don't parallelize the kernels region if it contains more than one outer
6987 loop. */
6988 unsigned int nr_outer_loops = 0;
6989 struct loop *single_outer = NULL;
6990 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6992 gcc_assert (loop_outer (loop) == outer);
6994 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6995 continue;
6997 if (region_exit != NULL
6998 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6999 continue;
7001 nr_outer_loops++;
7002 single_outer = loop;
7004 if (nr_outer_loops != 1)
7005 return;
7007 for (struct loop *loop = single_outer->inner;
7008 loop != NULL;
7009 loop = loop->inner)
7010 if (loop->next)
7011 return;
7013 /* Mark the loops in the region. */
7014 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7015 loop->in_oacc_kernels_region = true;
7018 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7020 struct GTY(()) grid_launch_attributes_trees
7022 tree kernel_dim_array_type;
7023 tree kernel_lattrs_dimnum_decl;
7024 tree kernel_lattrs_grid_decl;
7025 tree kernel_lattrs_group_decl;
7026 tree kernel_launch_attributes_type;
7029 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7031 /* Create types used to pass kernel launch attributes to target. */
7033 static void
7034 grid_create_kernel_launch_attr_types (void)
7036 if (grid_attr_trees)
7037 return;
7038 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7040 tree dim_arr_index_type
7041 = build_index_type (build_int_cst (integer_type_node, 2));
7042 grid_attr_trees->kernel_dim_array_type
7043 = build_array_type (uint32_type_node, dim_arr_index_type);
7045 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7046 grid_attr_trees->kernel_lattrs_dimnum_decl
7047 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7048 uint32_type_node);
7049 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7051 grid_attr_trees->kernel_lattrs_grid_decl
7052 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7053 grid_attr_trees->kernel_dim_array_type);
7054 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7055 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7056 grid_attr_trees->kernel_lattrs_group_decl
7057 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7058 grid_attr_trees->kernel_dim_array_type);
7059 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7060 = grid_attr_trees->kernel_lattrs_grid_decl;
7061 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7062 "__gomp_kernel_launch_attributes",
7063 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7066 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7067 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7068 of type uint32_type_node. */
7070 static void
7071 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7072 tree fld_decl, int index, tree value)
7074 tree ref = build4 (ARRAY_REF, uint32_type_node,
7075 build3 (COMPONENT_REF,
7076 grid_attr_trees->kernel_dim_array_type,
7077 range_var, fld_decl, NULL_TREE),
7078 build_int_cst (integer_type_node, index),
7079 NULL_TREE, NULL_TREE);
7080 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7083 /* Return a tree representation of a pointer to a structure with grid and
7084 work-group size information. Statements filling that information will be
7085 inserted before GSI, TGT_STMT is the target statement which has the
7086 necessary information in it. */
7088 static tree
7089 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7090 gomp_target *tgt_stmt)
7092 grid_create_kernel_launch_attr_types ();
7093 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7094 "__kernel_launch_attrs");
7096 unsigned max_dim = 0;
7097 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7098 clause;
7099 clause = OMP_CLAUSE_CHAIN (clause))
7101 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7102 continue;
7104 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7105 max_dim = MAX (dim, max_dim);
7107 grid_insert_store_range_dim (gsi, lattrs,
7108 grid_attr_trees->kernel_lattrs_grid_decl,
7109 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7110 grid_insert_store_range_dim (gsi, lattrs,
7111 grid_attr_trees->kernel_lattrs_group_decl,
7112 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7115 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7116 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7117 gcc_checking_assert (max_dim <= 2);
7118 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7119 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7120 GSI_SAME_STMT);
7121 TREE_ADDRESSABLE (lattrs) = 1;
7122 return build_fold_addr_expr (lattrs);
7125 /* Build target argument identifier from the DEVICE identifier, value
7126 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7128 static tree
7129 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7131 tree t = build_int_cst (integer_type_node, device);
7132 if (subseqent_param)
7133 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7134 build_int_cst (integer_type_node,
7135 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7136 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7137 build_int_cst (integer_type_node, id));
7138 return t;
7141 /* Like above but return it in type that can be directly stored as an element
7142 of the argument array. */
7144 static tree
7145 get_target_argument_identifier (int device, bool subseqent_param, int id)
7147 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7148 return fold_convert (ptr_type_node, t);
7151 /* Return a target argument consisting of DEVICE identifier, value identifier
7152 ID, and the actual VALUE. */
7154 static tree
7155 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7156 tree value)
7158 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7159 fold_convert (integer_type_node, value),
7160 build_int_cst (unsigned_type_node,
7161 GOMP_TARGET_ARG_VALUE_SHIFT));
7162 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7163 get_target_argument_identifier_1 (device, false, id));
7164 t = fold_convert (ptr_type_node, t);
7165 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7168 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7169 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7170 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7171 arguments. */
7173 static void
7174 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7175 int id, tree value, vec <tree> *args)
7177 if (tree_fits_shwi_p (value)
7178 && tree_to_shwi (value) > -(1 << 15)
7179 && tree_to_shwi (value) < (1 << 15))
7180 args->quick_push (get_target_argument_value (gsi, device, id, value));
7181 else
7183 args->quick_push (get_target_argument_identifier (device, true, id));
7184 value = fold_convert (ptr_type_node, value);
7185 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7186 GSI_SAME_STMT);
7187 args->quick_push (value);
7191 /* Create an array of arguments that is then passed to GOMP_target. */
7193 static tree
7194 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7196 auto_vec <tree, 6> args;
7197 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7198 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7199 if (c)
7200 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7201 else
7202 t = integer_minus_one_node;
7203 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7204 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7206 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7207 if (c)
7208 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7209 else
7210 t = integer_minus_one_node;
7211 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7212 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7213 &args);
7215 /* Add HSA-specific grid sizes, if available. */
7216 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7217 OMP_CLAUSE__GRIDDIM_))
7219 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7220 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7221 args.quick_push (t);
7222 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7225 /* Produce more, perhaps device specific, arguments here. */
7227 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7228 args.length () + 1),
7229 ".omp_target_args");
7230 for (unsigned i = 0; i < args.length (); i++)
7232 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7233 build_int_cst (integer_type_node, i),
7234 NULL_TREE, NULL_TREE);
7235 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7236 GSI_SAME_STMT);
7238 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7239 build_int_cst (integer_type_node, args.length ()),
7240 NULL_TREE, NULL_TREE);
7241 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7242 GSI_SAME_STMT);
7243 TREE_ADDRESSABLE (argarray) = 1;
7244 return build_fold_addr_expr (argarray);
7247 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7249 static void
7250 expand_omp_target (struct omp_region *region)
7252 basic_block entry_bb, exit_bb, new_bb;
7253 struct function *child_cfun;
7254 tree child_fn, block, t;
7255 gimple_stmt_iterator gsi;
7256 gomp_target *entry_stmt;
7257 gimple *stmt;
7258 edge e;
7259 bool offloaded, data_region;
7261 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7262 new_bb = region->entry;
7264 offloaded = is_gimple_omp_offloaded (entry_stmt);
7265 switch (gimple_omp_target_kind (entry_stmt))
7267 case GF_OMP_TARGET_KIND_REGION:
7268 case GF_OMP_TARGET_KIND_UPDATE:
7269 case GF_OMP_TARGET_KIND_ENTER_DATA:
7270 case GF_OMP_TARGET_KIND_EXIT_DATA:
7271 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7272 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7273 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7274 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7275 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7276 data_region = false;
7277 break;
7278 case GF_OMP_TARGET_KIND_DATA:
7279 case GF_OMP_TARGET_KIND_OACC_DATA:
7280 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7281 data_region = true;
7282 break;
7283 default:
7284 gcc_unreachable ();
7287 child_fn = NULL_TREE;
7288 child_cfun = NULL;
7289 if (offloaded)
7291 child_fn = gimple_omp_target_child_fn (entry_stmt);
7292 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7295 /* Supported by expand_omp_taskreg, but not here. */
7296 if (child_cfun != NULL)
7297 gcc_checking_assert (!child_cfun->cfg);
7298 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7300 entry_bb = region->entry;
7301 exit_bb = region->exit;
7303 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7305 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7307 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7308 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7309 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7310 DECL_ATTRIBUTES (child_fn)
7311 = tree_cons (get_identifier ("oacc kernels"),
7312 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7315 if (offloaded)
7317 unsigned srcidx, dstidx, num;
7319 /* If the offloading region needs data sent from the parent
7320 function, then the very first statement (except possible
7321 tree profile counter updates) of the offloading body
7322 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7323 &.OMP_DATA_O is passed as an argument to the child function,
7324 we need to replace it with the argument as seen by the child
7325 function.
7327 In most cases, this will end up being the identity assignment
7328 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7329 a function call that has been inlined, the original PARM_DECL
7330 .OMP_DATA_I may have been converted into a different local
7331 variable. In which case, we need to keep the assignment. */
7332 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7333 if (data_arg)
7335 basic_block entry_succ_bb = single_succ (entry_bb);
7336 gimple_stmt_iterator gsi;
7337 tree arg;
7338 gimple *tgtcopy_stmt = NULL;
7339 tree sender = TREE_VEC_ELT (data_arg, 0);
7341 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7343 gcc_assert (!gsi_end_p (gsi));
7344 stmt = gsi_stmt (gsi);
7345 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7346 continue;
7348 if (gimple_num_ops (stmt) == 2)
7350 tree arg = gimple_assign_rhs1 (stmt);
7352 /* We're ignoring the subcode because we're
7353 effectively doing a STRIP_NOPS. */
7355 if (TREE_CODE (arg) == ADDR_EXPR
7356 && TREE_OPERAND (arg, 0) == sender)
7358 tgtcopy_stmt = stmt;
7359 break;
7364 gcc_assert (tgtcopy_stmt != NULL);
7365 arg = DECL_ARGUMENTS (child_fn);
7367 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7368 gsi_remove (&gsi, true);
7371 /* Declare local variables needed in CHILD_CFUN. */
7372 block = DECL_INITIAL (child_fn);
7373 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7374 /* The gimplifier could record temporaries in the offloading block
7375 rather than in containing function's local_decls chain,
7376 which would mean cgraph missed finalizing them. Do it now. */
7377 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7378 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7379 varpool_node::finalize_decl (t);
7380 DECL_SAVED_TREE (child_fn) = NULL;
7381 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7382 gimple_set_body (child_fn, NULL);
7383 TREE_USED (block) = 1;
7385 /* Reset DECL_CONTEXT on function arguments. */
7386 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7387 DECL_CONTEXT (t) = child_fn;
7389 /* Split ENTRY_BB at GIMPLE_*,
7390 so that it can be moved to the child function. */
7391 gsi = gsi_last_nondebug_bb (entry_bb);
7392 stmt = gsi_stmt (gsi);
7393 gcc_assert (stmt
7394 && gimple_code (stmt) == gimple_code (entry_stmt));
7395 e = split_block (entry_bb, stmt);
7396 gsi_remove (&gsi, true);
7397 entry_bb = e->dest;
7398 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7400 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7401 if (exit_bb)
7403 gsi = gsi_last_nondebug_bb (exit_bb);
7404 gcc_assert (!gsi_end_p (gsi)
7405 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7406 stmt = gimple_build_return (NULL);
7407 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7408 gsi_remove (&gsi, true);
7411 /* Move the offloading region into CHILD_CFUN. */
7413 block = gimple_block (entry_stmt);
7415 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7416 if (exit_bb)
7417 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7418 /* When the OMP expansion process cannot guarantee an up-to-date
7419 loop tree arrange for the child function to fixup loops. */
7420 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7421 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7423 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7424 num = vec_safe_length (child_cfun->local_decls);
7425 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7427 t = (*child_cfun->local_decls)[srcidx];
7428 if (DECL_CONTEXT (t) == cfun->decl)
7429 continue;
7430 if (srcidx != dstidx)
7431 (*child_cfun->local_decls)[dstidx] = t;
7432 dstidx++;
7434 if (dstidx != num)
7435 vec_safe_truncate (child_cfun->local_decls, dstidx);
7437 /* Inform the callgraph about the new function. */
7438 child_cfun->curr_properties = cfun->curr_properties;
7439 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7440 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7441 cgraph_node *node = cgraph_node::get_create (child_fn);
7442 node->parallelized_function = 1;
7443 cgraph_node::add_new_function (child_fn, true);
7445 /* Add the new function to the offload table. */
7446 if (ENABLE_OFFLOADING)
7448 if (in_lto_p)
7449 DECL_PRESERVE_P (child_fn) = 1;
7450 vec_safe_push (offload_funcs, child_fn);
7453 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7454 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7456 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7457 fixed in a following pass. */
7458 push_cfun (child_cfun);
7459 if (need_asm)
7460 assign_assembler_name_if_needed (child_fn);
7461 cgraph_edge::rebuild_edges ();
7463 /* Some EH regions might become dead, see PR34608. If
7464 pass_cleanup_cfg isn't the first pass to happen with the
7465 new child, these dead EH edges might cause problems.
7466 Clean them up now. */
7467 if (flag_exceptions)
7469 basic_block bb;
7470 bool changed = false;
7472 FOR_EACH_BB_FN (bb, cfun)
7473 changed |= gimple_purge_dead_eh_edges (bb);
7474 if (changed)
7475 cleanup_tree_cfg ();
7477 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7478 verify_loop_structure ();
7479 pop_cfun ();
7481 if (dump_file && !gimple_in_ssa_p (cfun))
7483 omp_any_child_fn_dumped = true;
7484 dump_function_header (dump_file, child_fn, dump_flags);
7485 dump_function_to_file (child_fn, dump_file, dump_flags);
7488 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7491 /* Emit a library call to launch the offloading region, or do data
7492 transfers. */
7493 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7494 enum built_in_function start_ix;
7495 location_t clause_loc;
7496 unsigned int flags_i = 0;
7498 switch (gimple_omp_target_kind (entry_stmt))
7500 case GF_OMP_TARGET_KIND_REGION:
7501 start_ix = BUILT_IN_GOMP_TARGET;
7502 break;
7503 case GF_OMP_TARGET_KIND_DATA:
7504 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7505 break;
7506 case GF_OMP_TARGET_KIND_UPDATE:
7507 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7508 break;
7509 case GF_OMP_TARGET_KIND_ENTER_DATA:
7510 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7511 break;
7512 case GF_OMP_TARGET_KIND_EXIT_DATA:
7513 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7514 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7515 break;
7516 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7517 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7518 start_ix = BUILT_IN_GOACC_PARALLEL;
7519 break;
7520 case GF_OMP_TARGET_KIND_OACC_DATA:
7521 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7522 start_ix = BUILT_IN_GOACC_DATA_START;
7523 break;
7524 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7525 start_ix = BUILT_IN_GOACC_UPDATE;
7526 break;
7527 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7528 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7529 break;
7530 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7531 start_ix = BUILT_IN_GOACC_DECLARE;
7532 break;
7533 default:
7534 gcc_unreachable ();
7537 clauses = gimple_omp_target_clauses (entry_stmt);
7539 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7540 library choose) and there is no conditional. */
7541 cond = NULL_TREE;
7542 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7544 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7545 if (c)
7546 cond = OMP_CLAUSE_IF_EXPR (c);
7548 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7549 if (c)
7551 /* Even if we pass it to all library function calls, it is currently only
7552 defined/used for the OpenMP target ones. */
7553 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7554 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7555 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7556 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7558 device = OMP_CLAUSE_DEVICE_ID (c);
7559 clause_loc = OMP_CLAUSE_LOCATION (c);
7561 else
7562 clause_loc = gimple_location (entry_stmt);
7564 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7565 if (c)
7566 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7568 /* Ensure 'device' is of the correct type. */
7569 device = fold_convert_loc (clause_loc, integer_type_node, device);
7571 /* If we found the clause 'if (cond)', build
7572 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7573 if (cond)
7575 cond = gimple_boolify (cond);
7577 basic_block cond_bb, then_bb, else_bb;
7578 edge e;
7579 tree tmp_var;
7581 tmp_var = create_tmp_var (TREE_TYPE (device));
7582 if (offloaded)
7583 e = split_block_after_labels (new_bb);
7584 else
7586 gsi = gsi_last_nondebug_bb (new_bb);
7587 gsi_prev (&gsi);
7588 e = split_block (new_bb, gsi_stmt (gsi));
7590 cond_bb = e->src;
7591 new_bb = e->dest;
7592 remove_edge (e);
7594 then_bb = create_empty_bb (cond_bb);
7595 else_bb = create_empty_bb (then_bb);
7596 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7597 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7599 stmt = gimple_build_cond_empty (cond);
7600 gsi = gsi_last_bb (cond_bb);
7601 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7603 gsi = gsi_start_bb (then_bb);
7604 stmt = gimple_build_assign (tmp_var, device);
7605 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7607 gsi = gsi_start_bb (else_bb);
7608 stmt = gimple_build_assign (tmp_var,
7609 build_int_cst (integer_type_node,
7610 GOMP_DEVICE_HOST_FALLBACK));
7611 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7613 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7614 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7615 add_bb_to_loop (then_bb, cond_bb->loop_father);
7616 add_bb_to_loop (else_bb, cond_bb->loop_father);
7617 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7618 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7620 device = tmp_var;
7621 gsi = gsi_last_nondebug_bb (new_bb);
7623 else
7625 gsi = gsi_last_nondebug_bb (new_bb);
7626 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7627 true, GSI_SAME_STMT);
7630 t = gimple_omp_target_data_arg (entry_stmt);
7631 if (t == NULL)
7633 t1 = size_zero_node;
7634 t2 = build_zero_cst (ptr_type_node);
7635 t3 = t2;
7636 t4 = t2;
7638 else
7640 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7641 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7642 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7643 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7644 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7647 gimple *g;
7648 bool tagging = false;
7649 /* The maximum number used by any start_ix, without varargs. */
7650 auto_vec<tree, 11> args;
7651 args.quick_push (device);
7652 if (offloaded)
7653 args.quick_push (build_fold_addr_expr (child_fn));
7654 args.quick_push (t1);
7655 args.quick_push (t2);
7656 args.quick_push (t3);
7657 args.quick_push (t4);
7658 switch (start_ix)
7660 case BUILT_IN_GOACC_DATA_START:
7661 case BUILT_IN_GOACC_DECLARE:
7662 case BUILT_IN_GOMP_TARGET_DATA:
7663 break;
7664 case BUILT_IN_GOMP_TARGET:
7665 case BUILT_IN_GOMP_TARGET_UPDATE:
7666 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7667 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7668 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7669 if (c)
7670 depend = OMP_CLAUSE_DECL (c);
7671 else
7672 depend = build_int_cst (ptr_type_node, 0);
7673 args.quick_push (depend);
7674 if (start_ix == BUILT_IN_GOMP_TARGET)
7675 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7676 break;
7677 case BUILT_IN_GOACC_PARALLEL:
7678 oacc_set_fn_attrib (child_fn, clauses, &args);
7679 tagging = true;
7680 /* FALLTHRU */
7681 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7682 case BUILT_IN_GOACC_UPDATE:
7684 tree t_async = NULL_TREE;
7686 /* If present, use the value specified by the respective
7687 clause, making sure that is of the correct type. */
7688 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7689 if (c)
7690 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7691 integer_type_node,
7692 OMP_CLAUSE_ASYNC_EXPR (c));
7693 else if (!tagging)
7694 /* Default values for t_async. */
7695 t_async = fold_convert_loc (gimple_location (entry_stmt),
7696 integer_type_node,
7697 build_int_cst (integer_type_node,
7698 GOMP_ASYNC_SYNC));
7699 if (tagging && t_async)
7701 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7703 if (TREE_CODE (t_async) == INTEGER_CST)
7705 /* See if we can pack the async arg in to the tag's
7706 operand. */
7707 i_async = TREE_INT_CST_LOW (t_async);
7708 if (i_async < GOMP_LAUNCH_OP_MAX)
7709 t_async = NULL_TREE;
7710 else
7711 i_async = GOMP_LAUNCH_OP_MAX;
7713 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7714 i_async));
7716 if (t_async)
7717 args.safe_push (t_async);
7719 /* Save the argument index, and ... */
7720 unsigned t_wait_idx = args.length ();
7721 unsigned num_waits = 0;
7722 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7723 if (!tagging || c)
7724 /* ... push a placeholder. */
7725 args.safe_push (integer_zero_node);
7727 for (; c; c = OMP_CLAUSE_CHAIN (c))
7728 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7730 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7731 integer_type_node,
7732 OMP_CLAUSE_WAIT_EXPR (c)));
7733 num_waits++;
7736 if (!tagging || num_waits)
7738 tree len;
7740 /* Now that we know the number, update the placeholder. */
7741 if (tagging)
7742 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7743 else
7744 len = build_int_cst (integer_type_node, num_waits);
7745 len = fold_convert_loc (gimple_location (entry_stmt),
7746 unsigned_type_node, len);
7747 args[t_wait_idx] = len;
7750 break;
7751 default:
7752 gcc_unreachable ();
7754 if (tagging)
7755 /* Push terminal marker - zero. */
7756 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7758 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7759 gimple_set_location (g, gimple_location (entry_stmt));
7760 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7761 if (!offloaded)
7763 g = gsi_stmt (gsi);
7764 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7765 gsi_remove (&gsi, true);
7767 if (data_region && region->exit)
7769 gsi = gsi_last_nondebug_bb (region->exit);
7770 g = gsi_stmt (gsi);
7771 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7772 gsi_remove (&gsi, true);
7776 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7777 iteration variable derived from the thread number. INTRA_GROUP means this
7778 is an expansion of a loop iterating over work-items within a separate
7779 iteration over groups. */
7781 static void
7782 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7784 gimple_stmt_iterator gsi;
7785 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7786 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7787 == GF_OMP_FOR_KIND_GRID_LOOP);
7788 size_t collapse = gimple_omp_for_collapse (for_stmt);
7789 struct omp_for_data_loop *loops
7790 = XALLOCAVEC (struct omp_for_data_loop,
7791 gimple_omp_for_collapse (for_stmt));
7792 struct omp_for_data fd;
7794 remove_edge (BRANCH_EDGE (kfor->entry));
7795 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7797 gcc_assert (kfor->cont);
7798 omp_extract_for_data (for_stmt, &fd, loops);
7800 gsi = gsi_start_bb (body_bb);
7802 for (size_t dim = 0; dim < collapse; dim++)
7804 tree type, itype;
7805 itype = type = TREE_TYPE (fd.loops[dim].v);
7806 if (POINTER_TYPE_P (type))
7807 itype = signed_type_for (type);
7809 tree n1 = fd.loops[dim].n1;
7810 tree step = fd.loops[dim].step;
7811 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7812 true, NULL_TREE, true, GSI_SAME_STMT);
7813 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7814 true, NULL_TREE, true, GSI_SAME_STMT);
7815 tree threadid;
7816 if (gimple_omp_for_grid_group_iter (for_stmt))
7818 gcc_checking_assert (!intra_group);
7819 threadid = build_call_expr (builtin_decl_explicit
7820 (BUILT_IN_HSA_WORKGROUPID), 1,
7821 build_int_cstu (unsigned_type_node, dim));
7823 else if (intra_group)
7824 threadid = build_call_expr (builtin_decl_explicit
7825 (BUILT_IN_HSA_WORKITEMID), 1,
7826 build_int_cstu (unsigned_type_node, dim));
7827 else
7828 threadid = build_call_expr (builtin_decl_explicit
7829 (BUILT_IN_HSA_WORKITEMABSID), 1,
7830 build_int_cstu (unsigned_type_node, dim));
7831 threadid = fold_convert (itype, threadid);
7832 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7833 true, GSI_SAME_STMT);
7835 tree startvar = fd.loops[dim].v;
7836 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7837 if (POINTER_TYPE_P (type))
7838 t = fold_build_pointer_plus (n1, t);
7839 else
7840 t = fold_build2 (PLUS_EXPR, type, t, n1);
7841 t = fold_convert (type, t);
7842 t = force_gimple_operand_gsi (&gsi, t,
7843 DECL_P (startvar)
7844 && TREE_ADDRESSABLE (startvar),
7845 NULL_TREE, true, GSI_SAME_STMT);
7846 gassign *assign_stmt = gimple_build_assign (startvar, t);
7847 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7849 /* Remove the omp for statement. */
7850 gsi = gsi_last_nondebug_bb (kfor->entry);
7851 gsi_remove (&gsi, true);
7853 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7854 gsi = gsi_last_nondebug_bb (kfor->cont);
7855 gcc_assert (!gsi_end_p (gsi)
7856 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7857 gsi_remove (&gsi, true);
7859 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7860 gsi = gsi_last_nondebug_bb (kfor->exit);
7861 gcc_assert (!gsi_end_p (gsi)
7862 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7863 if (intra_group)
7864 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7865 gsi_remove (&gsi, true);
7867 /* Fixup the much simpler CFG. */
7868 remove_edge (find_edge (kfor->cont, body_bb));
7870 if (kfor->cont != body_bb)
7871 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7872 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7875 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7876 argument_decls. */
7878 struct grid_arg_decl_map
7880 tree old_arg;
7881 tree new_arg;
7884 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7885 pertaining to kernel function. */
7887 static tree
7888 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7890 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7891 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7892 tree t = *tp;
7894 if (t == adm->old_arg)
7895 *tp = adm->new_arg;
7896 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7897 return NULL_TREE;
7900 /* If TARGET region contains a kernel body for loop, remove its region from the
7901 TARGET and expand it in HSA gridified kernel fashion. */
7903 static void
7904 grid_expand_target_grid_body (struct omp_region *target)
7906 if (!hsa_gen_requested_p ())
7907 return;
7909 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7910 struct omp_region **pp;
7912 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7913 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7914 break;
7916 struct omp_region *gpukernel = *pp;
7918 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7919 if (!gpukernel)
7921 /* HSA cannot handle OACC stuff. */
7922 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7923 return;
7924 gcc_checking_assert (orig_child_fndecl);
7925 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7926 OMP_CLAUSE__GRIDDIM_));
7927 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7929 hsa_register_kernel (n);
7930 return;
7933 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7934 OMP_CLAUSE__GRIDDIM_));
7935 tree inside_block
7936 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7937 *pp = gpukernel->next;
7938 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7939 if ((*pp)->type == GIMPLE_OMP_FOR)
7940 break;
7942 struct omp_region *kfor = *pp;
7943 gcc_assert (kfor);
7944 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7945 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7946 *pp = kfor->next;
7947 if (kfor->inner)
7949 if (gimple_omp_for_grid_group_iter (for_stmt))
7951 struct omp_region **next_pp;
7952 for (pp = &kfor->inner; *pp; pp = next_pp)
7954 next_pp = &(*pp)->next;
7955 if ((*pp)->type != GIMPLE_OMP_FOR)
7956 continue;
7957 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7958 gcc_assert (gimple_omp_for_kind (inner)
7959 == GF_OMP_FOR_KIND_GRID_LOOP);
7960 grid_expand_omp_for_loop (*pp, true);
7961 *pp = (*pp)->next;
7962 next_pp = pp;
7965 expand_omp (kfor->inner);
7967 if (gpukernel->inner)
7968 expand_omp (gpukernel->inner);
7970 tree kern_fndecl = copy_node (orig_child_fndecl);
7971 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
7972 "kernel");
7973 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7974 tree tgtblock = gimple_block (tgt_stmt);
7975 tree fniniblock = make_node (BLOCK);
7976 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
7977 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7978 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7979 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7980 DECL_INITIAL (kern_fndecl) = fniniblock;
7981 push_struct_function (kern_fndecl);
7982 cfun->function_end_locus = gimple_location (tgt_stmt);
7983 init_tree_ssa (cfun);
7984 pop_cfun ();
7986 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7987 gcc_assert (!DECL_CHAIN (old_parm_decl));
7988 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7989 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7990 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7991 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7992 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7993 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7994 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7995 kern_cfun->curr_properties = cfun->curr_properties;
7997 grid_expand_omp_for_loop (kfor, false);
7999 /* Remove the omp for statement. */
8000 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8001 gsi_remove (&gsi, true);
8002 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8003 return. */
8004 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8005 gcc_assert (!gsi_end_p (gsi)
8006 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8007 gimple *ret_stmt = gimple_build_return (NULL);
8008 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8009 gsi_remove (&gsi, true);
8011 /* Statements in the first BB in the target construct have been produced by
8012 target lowering and must be copied inside the GPUKERNEL, with the two
8013 exceptions of the first OMP statement and the OMP_DATA assignment
8014 statement. */
8015 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8016 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8017 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8018 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8019 !gsi_end_p (tsi); gsi_next (&tsi))
8021 gimple *stmt = gsi_stmt (tsi);
8022 if (is_gimple_omp (stmt))
8023 break;
8024 if (sender
8025 && is_gimple_assign (stmt)
8026 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8027 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8028 continue;
8029 gimple *copy = gimple_copy (stmt);
8030 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8031 gimple_set_block (copy, fniniblock);
8034 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8035 gpukernel->exit, inside_block);
8037 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8038 kcn->mark_force_output ();
8039 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8041 hsa_register_kernel (kcn, orig_child);
8043 cgraph_node::add_new_function (kern_fndecl, true);
8044 push_cfun (kern_cfun);
8045 cgraph_edge::rebuild_edges ();
8047 /* Re-map any mention of the PARM_DECL of the original function to the
8048 PARM_DECL of the new one.
8050 TODO: It would be great if lowering produced references into the GPU
8051 kernel decl straight away and we did not have to do this. */
8052 struct grid_arg_decl_map adm;
8053 adm.old_arg = old_parm_decl;
8054 adm.new_arg = new_parm_decl;
8055 basic_block bb;
8056 FOR_EACH_BB_FN (bb, kern_cfun)
8058 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8060 gimple *stmt = gsi_stmt (gsi);
8061 struct walk_stmt_info wi;
8062 memset (&wi, 0, sizeof (wi));
8063 wi.info = &adm;
8064 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8067 pop_cfun ();
8069 return;
8072 /* Expand the parallel region tree rooted at REGION. Expansion
8073 proceeds in depth-first order. Innermost regions are expanded
8074 first. This way, parallel regions that require a new function to
8075 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8076 internal dependencies in their body. */
8078 static void
8079 expand_omp (struct omp_region *region)
8081 omp_any_child_fn_dumped = false;
8082 while (region)
8084 location_t saved_location;
8085 gimple *inner_stmt = NULL;
8087 /* First, determine whether this is a combined parallel+workshare
8088 region. */
8089 if (region->type == GIMPLE_OMP_PARALLEL)
8090 determine_parallel_type (region);
8091 else if (region->type == GIMPLE_OMP_TARGET)
8092 grid_expand_target_grid_body (region);
8094 if (region->type == GIMPLE_OMP_FOR
8095 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8096 inner_stmt = last_stmt (region->inner->entry);
8098 if (region->inner)
8099 expand_omp (region->inner);
8101 saved_location = input_location;
8102 if (gimple_has_location (last_stmt (region->entry)))
8103 input_location = gimple_location (last_stmt (region->entry));
8105 switch (region->type)
8107 case GIMPLE_OMP_PARALLEL:
8108 case GIMPLE_OMP_TASK:
8109 expand_omp_taskreg (region);
8110 break;
8112 case GIMPLE_OMP_FOR:
8113 expand_omp_for (region, inner_stmt);
8114 break;
8116 case GIMPLE_OMP_SECTIONS:
8117 expand_omp_sections (region);
8118 break;
8120 case GIMPLE_OMP_SECTION:
8121 /* Individual omp sections are handled together with their
8122 parent GIMPLE_OMP_SECTIONS region. */
8123 break;
8125 case GIMPLE_OMP_SINGLE:
8126 expand_omp_single (region);
8127 break;
8129 case GIMPLE_OMP_ORDERED:
8131 gomp_ordered *ord_stmt
8132 = as_a <gomp_ordered *> (last_stmt (region->entry));
8133 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8134 OMP_CLAUSE_DEPEND))
8136 /* We'll expand these when expanding corresponding
8137 worksharing region with ordered(n) clause. */
8138 gcc_assert (region->outer
8139 && region->outer->type == GIMPLE_OMP_FOR);
8140 region->ord_stmt = ord_stmt;
8141 break;
8144 /* FALLTHRU */
8145 case GIMPLE_OMP_MASTER:
8146 case GIMPLE_OMP_TASKGROUP:
8147 case GIMPLE_OMP_CRITICAL:
8148 case GIMPLE_OMP_TEAMS:
8149 expand_omp_synch (region);
8150 break;
8152 case GIMPLE_OMP_ATOMIC_LOAD:
8153 expand_omp_atomic (region);
8154 break;
8156 case GIMPLE_OMP_TARGET:
8157 expand_omp_target (region);
8158 break;
8160 default:
8161 gcc_unreachable ();
8164 input_location = saved_location;
8165 region = region->next;
8167 if (omp_any_child_fn_dumped)
8169 if (dump_file)
8170 dump_function_header (dump_file, current_function_decl, dump_flags);
8171 omp_any_child_fn_dumped = false;
8175 /* Helper for build_omp_regions. Scan the dominator tree starting at
8176 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8177 true, the function ends once a single tree is built (otherwise, whole
8178 forest of OMP constructs may be built). */
8180 static void
8181 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8182 bool single_tree)
8184 gimple_stmt_iterator gsi;
8185 gimple *stmt;
8186 basic_block son;
8188 gsi = gsi_last_nondebug_bb (bb);
8189 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8191 struct omp_region *region;
8192 enum gimple_code code;
8194 stmt = gsi_stmt (gsi);
8195 code = gimple_code (stmt);
8196 if (code == GIMPLE_OMP_RETURN)
8198 /* STMT is the return point out of region PARENT. Mark it
8199 as the exit point and make PARENT the immediately
8200 enclosing region. */
8201 gcc_assert (parent);
8202 region = parent;
8203 region->exit = bb;
8204 parent = parent->outer;
8206 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8208 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8209 GIMPLE_OMP_RETURN, but matches with
8210 GIMPLE_OMP_ATOMIC_LOAD. */
8211 gcc_assert (parent);
8212 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8213 region = parent;
8214 region->exit = bb;
8215 parent = parent->outer;
8217 else if (code == GIMPLE_OMP_CONTINUE)
8219 gcc_assert (parent);
8220 parent->cont = bb;
8222 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8224 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8225 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8227 else
8229 region = new_omp_region (bb, code, parent);
8230 /* Otherwise... */
8231 if (code == GIMPLE_OMP_TARGET)
8233 switch (gimple_omp_target_kind (stmt))
8235 case GF_OMP_TARGET_KIND_REGION:
8236 case GF_OMP_TARGET_KIND_DATA:
8237 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8238 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8239 case GF_OMP_TARGET_KIND_OACC_DATA:
8240 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8241 break;
8242 case GF_OMP_TARGET_KIND_UPDATE:
8243 case GF_OMP_TARGET_KIND_ENTER_DATA:
8244 case GF_OMP_TARGET_KIND_EXIT_DATA:
8245 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8246 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8247 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8248 /* ..., other than for those stand-alone directives... */
8249 region = NULL;
8250 break;
8251 default:
8252 gcc_unreachable ();
8255 else if (code == GIMPLE_OMP_ORDERED
8256 && omp_find_clause (gimple_omp_ordered_clauses
8257 (as_a <gomp_ordered *> (stmt)),
8258 OMP_CLAUSE_DEPEND))
8259 /* #pragma omp ordered depend is also just a stand-alone
8260 directive. */
8261 region = NULL;
8262 else if (code == GIMPLE_OMP_TASK
8263 && gimple_omp_task_taskwait_p (stmt))
8264 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8265 region = NULL;
8266 /* ..., this directive becomes the parent for a new region. */
8267 if (region)
8268 parent = region;
8272 if (single_tree && !parent)
8273 return;
8275 for (son = first_dom_son (CDI_DOMINATORS, bb);
8276 son;
8277 son = next_dom_son (CDI_DOMINATORS, son))
8278 build_omp_regions_1 (son, parent, single_tree);
8281 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8282 root_omp_region. */
8284 static void
8285 build_omp_regions_root (basic_block root)
8287 gcc_assert (root_omp_region == NULL);
8288 build_omp_regions_1 (root, NULL, true);
8289 gcc_assert (root_omp_region != NULL);
8292 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8294 void
8295 omp_expand_local (basic_block head)
8297 build_omp_regions_root (head);
8298 if (dump_file && (dump_flags & TDF_DETAILS))
8300 fprintf (dump_file, "\nOMP region tree\n\n");
8301 dump_omp_region (dump_file, root_omp_region, 0);
8302 fprintf (dump_file, "\n");
8305 remove_exit_barriers (root_omp_region);
8306 expand_omp (root_omp_region);
8308 omp_free_regions ();
8311 /* Scan the CFG and build a tree of OMP regions. Return the root of
8312 the OMP region tree. */
8314 static void
8315 build_omp_regions (void)
8317 gcc_assert (root_omp_region == NULL);
8318 calculate_dominance_info (CDI_DOMINATORS);
8319 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8322 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8324 static unsigned int
8325 execute_expand_omp (void)
8327 build_omp_regions ();
8329 if (!root_omp_region)
8330 return 0;
8332 if (dump_file)
8334 fprintf (dump_file, "\nOMP region tree\n\n");
8335 dump_omp_region (dump_file, root_omp_region, 0);
8336 fprintf (dump_file, "\n");
8339 remove_exit_barriers (root_omp_region);
8341 expand_omp (root_omp_region);
8343 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8344 verify_loop_structure ();
8345 cleanup_tree_cfg ();
8347 omp_free_regions ();
8349 return 0;
8352 /* OMP expansion -- the default pass, run before creation of SSA form. */
8354 namespace {
8356 const pass_data pass_data_expand_omp =
8358 GIMPLE_PASS, /* type */
8359 "ompexp", /* name */
8360 OPTGROUP_OMP, /* optinfo_flags */
8361 TV_NONE, /* tv_id */
8362 PROP_gimple_any, /* properties_required */
8363 PROP_gimple_eomp, /* properties_provided */
8364 0, /* properties_destroyed */
8365 0, /* todo_flags_start */
8366 0, /* todo_flags_finish */
8369 class pass_expand_omp : public gimple_opt_pass
8371 public:
8372 pass_expand_omp (gcc::context *ctxt)
8373 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8376 /* opt_pass methods: */
8377 virtual unsigned int execute (function *)
8379 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8380 || flag_openmp_simd != 0)
8381 && !seen_error ());
8383 /* This pass always runs, to provide PROP_gimple_eomp.
8384 But often, there is nothing to do. */
8385 if (!gate)
8386 return 0;
8388 return execute_expand_omp ();
8391 }; // class pass_expand_omp
8393 } // anon namespace
8395 gimple_opt_pass *
8396 make_pass_expand_omp (gcc::context *ctxt)
8398 return new pass_expand_omp (ctxt);
8401 namespace {
8403 const pass_data pass_data_expand_omp_ssa =
8405 GIMPLE_PASS, /* type */
8406 "ompexpssa", /* name */
8407 OPTGROUP_OMP, /* optinfo_flags */
8408 TV_NONE, /* tv_id */
8409 PROP_cfg | PROP_ssa, /* properties_required */
8410 PROP_gimple_eomp, /* properties_provided */
8411 0, /* properties_destroyed */
8412 0, /* todo_flags_start */
8413 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8416 class pass_expand_omp_ssa : public gimple_opt_pass
8418 public:
8419 pass_expand_omp_ssa (gcc::context *ctxt)
8420 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8423 /* opt_pass methods: */
8424 virtual bool gate (function *fun)
8426 return !(fun->curr_properties & PROP_gimple_eomp);
8428 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8429 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8431 }; // class pass_expand_omp_ssa
8433 } // anon namespace
8435 gimple_opt_pass *
8436 make_pass_expand_omp_ssa (gcc::context *ctxt)
8438 return new pass_expand_omp_ssa (ctxt);
8441 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8442 GIMPLE_* codes. */
8444 bool
8445 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8446 int *region_idx)
8448 gimple *last = last_stmt (bb);
8449 enum gimple_code code = gimple_code (last);
8450 struct omp_region *cur_region = *region;
8451 bool fallthru = false;
8453 switch (code)
8455 case GIMPLE_OMP_PARALLEL:
8456 case GIMPLE_OMP_FOR:
8457 case GIMPLE_OMP_SINGLE:
8458 case GIMPLE_OMP_TEAMS:
8459 case GIMPLE_OMP_MASTER:
8460 case GIMPLE_OMP_TASKGROUP:
8461 case GIMPLE_OMP_CRITICAL:
8462 case GIMPLE_OMP_SECTION:
8463 case GIMPLE_OMP_GRID_BODY:
8464 cur_region = new_omp_region (bb, code, cur_region);
8465 fallthru = true;
8466 break;
8468 case GIMPLE_OMP_TASK:
8469 cur_region = new_omp_region (bb, code, cur_region);
8470 fallthru = true;
8471 if (gimple_omp_task_taskwait_p (last))
8472 cur_region = cur_region->outer;
8473 break;
8475 case GIMPLE_OMP_ORDERED:
8476 cur_region = new_omp_region (bb, code, cur_region);
8477 fallthru = true;
8478 if (omp_find_clause (gimple_omp_ordered_clauses
8479 (as_a <gomp_ordered *> (last)),
8480 OMP_CLAUSE_DEPEND))
8481 cur_region = cur_region->outer;
8482 break;
8484 case GIMPLE_OMP_TARGET:
8485 cur_region = new_omp_region (bb, code, cur_region);
8486 fallthru = true;
8487 switch (gimple_omp_target_kind (last))
8489 case GF_OMP_TARGET_KIND_REGION:
8490 case GF_OMP_TARGET_KIND_DATA:
8491 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8492 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8493 case GF_OMP_TARGET_KIND_OACC_DATA:
8494 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8495 break;
8496 case GF_OMP_TARGET_KIND_UPDATE:
8497 case GF_OMP_TARGET_KIND_ENTER_DATA:
8498 case GF_OMP_TARGET_KIND_EXIT_DATA:
8499 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8500 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8501 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8502 cur_region = cur_region->outer;
8503 break;
8504 default:
8505 gcc_unreachable ();
8507 break;
8509 case GIMPLE_OMP_SECTIONS:
8510 cur_region = new_omp_region (bb, code, cur_region);
8511 fallthru = true;
8512 break;
8514 case GIMPLE_OMP_SECTIONS_SWITCH:
8515 fallthru = false;
8516 break;
8518 case GIMPLE_OMP_ATOMIC_LOAD:
8519 case GIMPLE_OMP_ATOMIC_STORE:
8520 fallthru = true;
8521 break;
8523 case GIMPLE_OMP_RETURN:
8524 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8525 somewhere other than the next block. This will be
8526 created later. */
8527 cur_region->exit = bb;
8528 if (cur_region->type == GIMPLE_OMP_TASK)
8529 /* Add an edge corresponding to not scheduling the task
8530 immediately. */
8531 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8532 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8533 cur_region = cur_region->outer;
8534 break;
8536 case GIMPLE_OMP_CONTINUE:
8537 cur_region->cont = bb;
8538 switch (cur_region->type)
8540 case GIMPLE_OMP_FOR:
8541 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8542 succs edges as abnormal to prevent splitting
8543 them. */
8544 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8545 /* Make the loopback edge. */
8546 make_edge (bb, single_succ (cur_region->entry),
8547 EDGE_ABNORMAL);
8549 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8550 corresponds to the case that the body of the loop
8551 is not executed at all. */
8552 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8553 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8554 fallthru = false;
8555 break;
8557 case GIMPLE_OMP_SECTIONS:
8558 /* Wire up the edges into and out of the nested sections. */
8560 basic_block switch_bb = single_succ (cur_region->entry);
8562 struct omp_region *i;
8563 for (i = cur_region->inner; i ; i = i->next)
8565 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8566 make_edge (switch_bb, i->entry, 0);
8567 make_edge (i->exit, bb, EDGE_FALLTHRU);
8570 /* Make the loopback edge to the block with
8571 GIMPLE_OMP_SECTIONS_SWITCH. */
8572 make_edge (bb, switch_bb, 0);
8574 /* Make the edge from the switch to exit. */
8575 make_edge (switch_bb, bb->next_bb, 0);
8576 fallthru = false;
8578 break;
8580 case GIMPLE_OMP_TASK:
8581 fallthru = true;
8582 break;
8584 default:
8585 gcc_unreachable ();
8587 break;
8589 default:
8590 gcc_unreachable ();
8593 if (*region != cur_region)
8595 *region = cur_region;
8596 if (cur_region)
8597 *region_idx = cur_region->entry->index;
8598 else
8599 *region_idx = 0;
8602 return fallthru;
8605 #include "gt-omp-expand.h"