c++: class NTTP and nested anon union [PR108566]
[official-gcc.git] / gcc / omp-expand.cc
blob940454ba5b675400c975bdf51e88258d4c05e972
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2023 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
336 if (region->inner->type == GIMPLE_OMP_FOR)
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
376 /* Dump the parallel region tree rooted at REGION. */
378 void
379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
387 if (region->cont)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
403 DEBUG_FUNCTION void
404 debug_omp_region (struct omp_region *region)
406 dump_omp_region (stderr, region, 0);
409 DEBUG_FUNCTION void
410 debug_all_omp_regions (void)
412 dump_omp_region (stderr, root_omp_region, 0);
415 /* Create a new parallel region starting at STMT inside region PARENT. */
417 static struct omp_region *
418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
421 struct omp_region *region = XCNEW (struct omp_region);
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
427 if (parent)
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
434 else
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
442 return region;
445 /* Release the memory associated with the region tree rooted at REGION. */
447 static void
448 free_omp_region_1 (struct omp_region *region)
450 struct omp_region *i, *n;
452 for (i = region->inner; i ; i = n)
454 n = i->next;
455 free_omp_region_1 (i);
458 free (region);
461 /* Release the memory for the entire omp region tree. */
463 void
464 omp_free_regions (void)
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
469 n = r->next;
470 free_omp_region_1 (r);
472 root_omp_region = NULL;
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
478 static gcond *
479 gimple_build_cond_empty (tree cond)
481 enum tree_code pred_code;
482 tree lhs, rhs;
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
496 static void
497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
548 static void
549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
571 switch (region->inner->type)
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
632 else
633 clause_loc = gimple_location (entry_stmt);
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
646 cond = gimple_boolify (cond);
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
665 else
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701 if (gimple_in_ssa_p (cfun))
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
708 val = tmp_join;
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
736 if (rtmp)
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
750 static void
751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 else
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 else
811 num_tasks = integer_zero_node;
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 else
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
830 tree flags = build_int_cst (unsigned_type_node, iflags);
832 tree cond = boolean_true_node;
833 if (ifc)
835 if (taskloop_p)
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
849 if (finalc)
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
868 gsi = gsi_last_nondebug_bb (bb);
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
909 static void
910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
917 depend = OMP_CLAUSE_DECL (depend);
919 bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE;
920 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
921 enum built_in_function f = (nowait
922 ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
923 : BUILT_IN_GOMP_TASKWAIT_DEPEND);
924 tree t = build_call_expr (builtin_decl_explicit (f), 1, depend);
926 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
927 false, GSI_CONTINUE_LINKING);
930 /* Build the function call to GOMP_teams_reg to actually
931 generate the host teams operation. REGION is the teams region
932 being expanded. BB is the block where to insert the code. */
934 static void
935 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
937 tree clauses = gimple_omp_teams_clauses (entry_stmt);
938 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
939 if (num_teams == NULL_TREE)
940 num_teams = build_int_cst (unsigned_type_node, 0);
941 else
943 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
944 num_teams = fold_convert (unsigned_type_node, num_teams);
946 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
947 if (thread_limit == NULL_TREE)
948 thread_limit = build_int_cst (unsigned_type_node, 0);
949 else
951 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
952 thread_limit = fold_convert (unsigned_type_node, thread_limit);
955 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
956 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
957 if (t == NULL)
958 t1 = null_pointer_node;
959 else
960 t1 = build_fold_addr_expr (t);
961 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
962 tree t2 = build_fold_addr_expr (child_fndecl);
964 vec<tree, va_gc> *args;
965 vec_alloc (args, 5);
966 args->quick_push (t2);
967 args->quick_push (t1);
968 args->quick_push (num_teams);
969 args->quick_push (thread_limit);
970 /* For future extensibility. */
971 args->quick_push (build_zero_cst (unsigned_type_node));
973 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
974 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
975 args);
977 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
978 false, GSI_CONTINUE_LINKING);
981 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
983 static tree
984 vec2chain (vec<tree, va_gc> *v)
986 tree chain = NULL_TREE, t;
987 unsigned ix;
989 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
991 DECL_CHAIN (t) = chain;
992 chain = t;
995 return chain;
998 /* Remove barriers in REGION->EXIT's block. Note that this is only
999 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1000 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1001 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1002 removed. */
1004 static void
1005 remove_exit_barrier (struct omp_region *region)
1007 gimple_stmt_iterator gsi;
1008 basic_block exit_bb;
1009 edge_iterator ei;
1010 edge e;
1011 gimple *stmt;
1012 int any_addressable_vars = -1;
1014 exit_bb = region->exit;
1016 /* If the parallel region doesn't return, we don't have REGION->EXIT
1017 block at all. */
1018 if (! exit_bb)
1019 return;
1021 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1022 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1023 statements that can appear in between are extremely limited -- no
1024 memory operations at all. Here, we allow nothing at all, so the
1025 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1026 gsi = gsi_last_nondebug_bb (exit_bb);
1027 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1028 gsi_prev_nondebug (&gsi);
1029 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1030 return;
1032 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1034 gsi = gsi_last_nondebug_bb (e->src);
1035 if (gsi_end_p (gsi))
1036 continue;
1037 stmt = gsi_stmt (gsi);
1038 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1039 && !gimple_omp_return_nowait_p (stmt))
1041 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1042 in many cases. If there could be tasks queued, the barrier
1043 might be needed to let the tasks run before some local
1044 variable of the parallel that the task uses as shared
1045 runs out of scope. The task can be spawned either
1046 from within current function (this would be easy to check)
1047 or from some function it calls and gets passed an address
1048 of such a variable. */
1049 if (any_addressable_vars < 0)
1051 gomp_parallel *parallel_stmt
1052 = as_a <gomp_parallel *> (last_stmt (region->entry));
1053 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1054 tree local_decls, block, decl;
1055 unsigned ix;
1057 any_addressable_vars = 0;
1058 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1059 if (TREE_ADDRESSABLE (decl))
1061 any_addressable_vars = 1;
1062 break;
1064 for (block = gimple_block (stmt);
1065 !any_addressable_vars
1066 && block
1067 && TREE_CODE (block) == BLOCK;
1068 block = BLOCK_SUPERCONTEXT (block))
1070 for (local_decls = BLOCK_VARS (block);
1071 local_decls;
1072 local_decls = DECL_CHAIN (local_decls))
1073 if (TREE_ADDRESSABLE (local_decls))
1075 any_addressable_vars = 1;
1076 break;
1078 if (block == gimple_block (parallel_stmt))
1079 break;
1082 if (!any_addressable_vars)
1083 gimple_omp_return_set_nowait (stmt);
1088 static void
1089 remove_exit_barriers (struct omp_region *region)
1091 if (region->type == GIMPLE_OMP_PARALLEL)
1092 remove_exit_barrier (region);
1094 if (region->inner)
1096 region = region->inner;
1097 remove_exit_barriers (region);
1098 while (region->next)
1100 region = region->next;
1101 remove_exit_barriers (region);
1106 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1107 calls. These can't be declared as const functions, but
1108 within one parallel body they are constant, so they can be
1109 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1110 which are declared const. Similarly for task body, except
1111 that in untied task omp_get_thread_num () can change at any task
1112 scheduling point. */
1114 static void
1115 optimize_omp_library_calls (gimple *entry_stmt)
1117 basic_block bb;
1118 gimple_stmt_iterator gsi;
1119 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1120 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1121 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1122 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1123 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1124 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1125 OMP_CLAUSE_UNTIED) != NULL);
1127 FOR_EACH_BB_FN (bb, cfun)
1128 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1130 gimple *call = gsi_stmt (gsi);
1131 tree decl;
1133 if (is_gimple_call (call)
1134 && (decl = gimple_call_fndecl (call))
1135 && DECL_EXTERNAL (decl)
1136 && TREE_PUBLIC (decl)
1137 && DECL_INITIAL (decl) == NULL)
1139 tree built_in;
1141 if (DECL_NAME (decl) == thr_num_id)
1143 /* In #pragma omp task untied omp_get_thread_num () can change
1144 during the execution of the task region. */
1145 if (untied_task)
1146 continue;
1147 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1149 else if (DECL_NAME (decl) == num_thr_id)
1150 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1151 else
1152 continue;
1154 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1155 || gimple_call_num_args (call) != 0)
1156 continue;
1158 if (flag_exceptions && !TREE_NOTHROW (decl))
1159 continue;
1161 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1162 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1163 TREE_TYPE (TREE_TYPE (built_in))))
1164 continue;
1166 gimple_call_set_fndecl (call, built_in);
1171 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1172 regimplified. */
1174 static tree
1175 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1177 tree t = *tp;
1179 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1180 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1181 return t;
1183 if (TREE_CODE (t) == ADDR_EXPR)
1184 recompute_tree_invariant_for_addr_expr (t);
1186 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1187 return NULL_TREE;
1190 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1192 static void
1193 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1194 bool after)
1196 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1197 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1198 !after, after ? GSI_CONTINUE_LINKING
1199 : GSI_SAME_STMT);
1200 gimple *stmt = gimple_build_assign (to, from);
1201 if (after)
1202 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1203 else
1204 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1205 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1206 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1208 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1209 gimple_regimplify_operands (stmt, &gsi);
1213 /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1215 static gcond *
1216 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1217 tree lhs, tree rhs, bool after = false)
1219 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1220 if (after)
1221 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1222 else
1223 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1224 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 NULL, NULL)
1226 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1227 NULL, NULL))
1229 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1230 gimple_regimplify_operands (cond_stmt, &gsi);
1232 return cond_stmt;
1235 /* Expand the OpenMP parallel or task directive starting at REGION. */
1237 static void
1238 expand_omp_taskreg (struct omp_region *region)
1240 basic_block entry_bb, exit_bb, new_bb;
1241 struct function *child_cfun;
1242 tree child_fn, block, t;
1243 gimple_stmt_iterator gsi;
1244 gimple *entry_stmt, *stmt;
1245 edge e;
1246 vec<tree, va_gc> *ws_args;
1248 entry_stmt = last_stmt (region->entry);
1249 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1250 && gimple_omp_task_taskwait_p (entry_stmt))
1252 new_bb = region->entry;
1253 gsi = gsi_last_nondebug_bb (region->entry);
1254 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1255 gsi_remove (&gsi, true);
1256 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1257 return;
1260 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1261 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1263 entry_bb = region->entry;
1264 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1265 exit_bb = region->cont;
1266 else
1267 exit_bb = region->exit;
1269 if (is_combined_parallel (region))
1270 ws_args = region->ws_args;
1271 else
1272 ws_args = NULL;
1274 if (child_cfun->cfg)
1276 /* Due to inlining, it may happen that we have already outlined
1277 the region, in which case all we need to do is make the
1278 sub-graph unreachable and emit the parallel call. */
1279 edge entry_succ_e, exit_succ_e;
1281 entry_succ_e = single_succ_edge (entry_bb);
1283 gsi = gsi_last_nondebug_bb (entry_bb);
1284 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1285 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1287 gsi_remove (&gsi, true);
1289 new_bb = entry_bb;
1290 if (exit_bb)
1292 exit_succ_e = single_succ_edge (exit_bb);
1293 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1295 remove_edge_and_dominated_blocks (entry_succ_e);
1297 else
1299 unsigned srcidx, dstidx, num;
1301 /* If the parallel region needs data sent from the parent
1302 function, then the very first statement (except possible
1303 tree profile counter updates) of the parallel body
1304 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1305 &.OMP_DATA_O is passed as an argument to the child function,
1306 we need to replace it with the argument as seen by the child
1307 function.
1309 In most cases, this will end up being the identity assignment
1310 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1311 a function call that has been inlined, the original PARM_DECL
1312 .OMP_DATA_I may have been converted into a different local
1313 variable. In which case, we need to keep the assignment. */
1314 if (gimple_omp_taskreg_data_arg (entry_stmt))
1316 basic_block entry_succ_bb
1317 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1318 : FALLTHRU_EDGE (entry_bb)->dest;
1319 tree arg;
1320 gimple *parcopy_stmt = NULL;
1322 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1324 gimple *stmt;
1326 gcc_assert (!gsi_end_p (gsi));
1327 stmt = gsi_stmt (gsi);
1328 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1329 continue;
1331 if (gimple_num_ops (stmt) == 2)
1333 tree arg = gimple_assign_rhs1 (stmt);
1335 /* We're ignore the subcode because we're
1336 effectively doing a STRIP_NOPS. */
1338 if (TREE_CODE (arg) == ADDR_EXPR
1339 && (TREE_OPERAND (arg, 0)
1340 == gimple_omp_taskreg_data_arg (entry_stmt)))
1342 parcopy_stmt = stmt;
1343 break;
1348 gcc_assert (parcopy_stmt != NULL);
1349 arg = DECL_ARGUMENTS (child_fn);
1351 if (!gimple_in_ssa_p (cfun))
1353 if (gimple_assign_lhs (parcopy_stmt) == arg)
1354 gsi_remove (&gsi, true);
1355 else
1357 /* ?? Is setting the subcode really necessary ?? */
1358 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1359 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1362 else
1364 tree lhs = gimple_assign_lhs (parcopy_stmt);
1365 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1366 /* We'd like to set the rhs to the default def in the child_fn,
1367 but it's too early to create ssa names in the child_fn.
1368 Instead, we set the rhs to the parm. In
1369 move_sese_region_to_fn, we introduce a default def for the
1370 parm, map the parm to it's default def, and once we encounter
1371 this stmt, replace the parm with the default def. */
1372 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1373 update_stmt (parcopy_stmt);
1377 /* Declare local variables needed in CHILD_CFUN. */
1378 block = DECL_INITIAL (child_fn);
1379 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1380 /* The gimplifier could record temporaries in parallel/task block
1381 rather than in containing function's local_decls chain,
1382 which would mean cgraph missed finalizing them. Do it now. */
1383 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1384 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1385 varpool_node::finalize_decl (t);
1386 DECL_SAVED_TREE (child_fn) = NULL;
1387 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1388 gimple_set_body (child_fn, NULL);
1389 TREE_USED (block) = 1;
1391 /* Reset DECL_CONTEXT on function arguments. */
1392 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1393 DECL_CONTEXT (t) = child_fn;
1395 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1396 so that it can be moved to the child function. */
1397 gsi = gsi_last_nondebug_bb (entry_bb);
1398 stmt = gsi_stmt (gsi);
1399 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1400 || gimple_code (stmt) == GIMPLE_OMP_TASK
1401 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1402 e = split_block (entry_bb, stmt);
1403 gsi_remove (&gsi, true);
1404 entry_bb = e->dest;
1405 edge e2 = NULL;
1406 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1407 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1408 else
1410 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1411 gcc_assert (e2->dest == region->exit);
1412 remove_edge (BRANCH_EDGE (entry_bb));
1413 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1414 gsi = gsi_last_nondebug_bb (region->exit);
1415 gcc_assert (!gsi_end_p (gsi)
1416 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1417 gsi_remove (&gsi, true);
1420 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1421 if (exit_bb)
1423 gsi = gsi_last_nondebug_bb (exit_bb);
1424 gcc_assert (!gsi_end_p (gsi)
1425 && (gimple_code (gsi_stmt (gsi))
1426 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1427 stmt = gimple_build_return (NULL);
1428 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1429 gsi_remove (&gsi, true);
1432 /* Move the parallel region into CHILD_CFUN. */
1434 if (gimple_in_ssa_p (cfun))
1436 init_tree_ssa (child_cfun);
1437 init_ssa_operands (child_cfun);
1438 child_cfun->gimple_df->in_ssa_p = true;
1439 block = NULL_TREE;
1441 else
1442 block = gimple_block (entry_stmt);
1444 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1445 if (exit_bb)
1446 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1447 if (e2)
1449 basic_block dest_bb = e2->dest;
1450 if (!exit_bb)
1451 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1452 remove_edge (e2);
1453 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1455 /* When the OMP expansion process cannot guarantee an up-to-date
1456 loop tree arrange for the child function to fixup loops. */
1457 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1458 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1460 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1461 num = vec_safe_length (child_cfun->local_decls);
1462 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1464 t = (*child_cfun->local_decls)[srcidx];
1465 if (DECL_CONTEXT (t) == cfun->decl)
1466 continue;
1467 if (srcidx != dstidx)
1468 (*child_cfun->local_decls)[dstidx] = t;
1469 dstidx++;
1471 if (dstidx != num)
1472 vec_safe_truncate (child_cfun->local_decls, dstidx);
1474 /* Inform the callgraph about the new function. */
1475 child_cfun->curr_properties = cfun->curr_properties;
1476 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1477 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1478 cgraph_node *node = cgraph_node::get_create (child_fn);
1479 node->parallelized_function = 1;
1480 cgraph_node::add_new_function (child_fn, true);
1482 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1483 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1485 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1486 fixed in a following pass. */
1487 push_cfun (child_cfun);
1488 if (need_asm)
1489 assign_assembler_name_if_needed (child_fn);
1491 if (optimize)
1492 optimize_omp_library_calls (entry_stmt);
1493 update_max_bb_count ();
1494 cgraph_edge::rebuild_edges ();
1496 /* Some EH regions might become dead, see PR34608. If
1497 pass_cleanup_cfg isn't the first pass to happen with the
1498 new child, these dead EH edges might cause problems.
1499 Clean them up now. */
1500 if (flag_exceptions)
1502 basic_block bb;
1503 bool changed = false;
1505 FOR_EACH_BB_FN (bb, cfun)
1506 changed |= gimple_purge_dead_eh_edges (bb);
1507 if (changed)
1508 cleanup_tree_cfg ();
1510 if (gimple_in_ssa_p (cfun))
1511 update_ssa (TODO_update_ssa);
1512 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1513 verify_loop_structure ();
1514 pop_cfun ();
1516 if (dump_file && !gimple_in_ssa_p (cfun))
1518 omp_any_child_fn_dumped = true;
1519 dump_function_header (dump_file, child_fn, dump_flags);
1520 dump_function_to_file (child_fn, dump_file, dump_flags);
1524 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1526 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1527 expand_parallel_call (region, new_bb,
1528 as_a <gomp_parallel *> (entry_stmt), ws_args);
1529 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1530 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1531 else
1532 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1535 /* Information about members of an OpenACC collapsed loop nest. */
1537 struct oacc_collapse
1539 tree base; /* Base value. */
1540 tree iters; /* Number of steps. */
1541 tree step; /* Step size. */
1542 tree tile; /* Tile increment (if tiled). */
1543 tree outer; /* Tile iterator var. */
1546 /* Helper for expand_oacc_for. Determine collapsed loop information.
1547 Fill in COUNTS array. Emit any initialization code before GSI.
1548 Return the calculated outer loop bound of BOUND_TYPE. */
1550 static tree
1551 expand_oacc_collapse_init (const struct omp_for_data *fd,
1552 gimple_stmt_iterator *gsi,
1553 oacc_collapse *counts, tree diff_type,
1554 tree bound_type, location_t loc)
1556 tree tiling = fd->tiling;
1557 tree total = build_int_cst (bound_type, 1);
1558 int ix;
1560 gcc_assert (integer_onep (fd->loop.step));
1561 gcc_assert (integer_zerop (fd->loop.n1));
1563 /* When tiling, the first operand of the tile clause applies to the
1564 innermost loop, and we work outwards from there. Seems
1565 backwards, but whatever. */
1566 for (ix = fd->collapse; ix--;)
1568 const omp_for_data_loop *loop = &fd->loops[ix];
1570 tree iter_type = TREE_TYPE (loop->v);
1571 tree plus_type = iter_type;
1573 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1575 if (POINTER_TYPE_P (iter_type))
1576 plus_type = sizetype;
1578 if (tiling)
1580 tree num = build_int_cst (integer_type_node, fd->collapse);
1581 tree loop_no = build_int_cst (integer_type_node, ix);
1582 tree tile = TREE_VALUE (tiling);
1583 gcall *call
1584 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585 /* gwv-outer=*/integer_zero_node,
1586 /* gwv-inner=*/integer_zero_node);
1588 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590 gimple_call_set_lhs (call, counts[ix].tile);
1591 gimple_set_location (call, loc);
1592 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1594 tiling = TREE_CHAIN (tiling);
1596 else
1598 counts[ix].tile = NULL;
1599 counts[ix].outer = loop->v;
1602 tree b = loop->n1;
1603 tree e = loop->n2;
1604 tree s = loop->step;
1605 bool up = loop->cond_code == LT_EXPR;
1606 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607 bool negating;
1608 tree expr;
1610 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611 true, GSI_SAME_STMT);
1612 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1615 /* Convert the step, avoiding possible unsigned->signed overflow. */
1616 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617 if (negating)
1618 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619 s = fold_convert (diff_type, s);
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623 true, GSI_SAME_STMT);
1625 /* Determine the range, avoiding possible unsigned->signed overflow. */
1626 negating = !up && TYPE_UNSIGNED (iter_type);
1627 expr = fold_build2 (MINUS_EXPR, plus_type,
1628 fold_convert (plus_type, negating ? b : e),
1629 fold_convert (plus_type, negating ? e : b));
1630 expr = fold_convert (diff_type, expr);
1631 if (negating)
1632 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633 tree range = force_gimple_operand_gsi
1634 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1636 /* Determine number of iterations. */
1637 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1641 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642 true, GSI_SAME_STMT);
1644 counts[ix].base = b;
1645 counts[ix].iters = iters;
1646 counts[ix].step = s;
1648 total = fold_build2 (MULT_EXPR, bound_type, total,
1649 fold_convert (bound_type, iters));
1652 return total;
1655 /* Emit initializers for collapsed loop members. INNER is true if
1656 this is for the element loop of a TILE. IVAR is the outer
1657 loop iteration variable, from which collapsed loop iteration values
1658 are calculated. COUNTS array has been initialized by
1659 expand_oacc_collapse_inits. */
1661 static void
1662 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663 gimple_stmt_iterator *gsi,
1664 const oacc_collapse *counts, tree ivar,
1665 tree diff_type)
1667 tree ivar_type = TREE_TYPE (ivar);
1669 /* The most rapidly changing iteration variable is the innermost
1670 one. */
1671 for (int ix = fd->collapse; ix--;)
1673 const omp_for_data_loop *loop = &fd->loops[ix];
1674 const oacc_collapse *collapse = &counts[ix];
1675 tree v = inner ? loop->v : collapse->outer;
1676 tree iter_type = TREE_TYPE (v);
1677 tree plus_type = iter_type;
1678 enum tree_code plus_code = PLUS_EXPR;
1679 tree expr;
1681 if (POINTER_TYPE_P (iter_type))
1683 plus_code = POINTER_PLUS_EXPR;
1684 plus_type = sizetype;
1687 expr = ivar;
1688 if (ix)
1690 tree mod = fold_convert (ivar_type, collapse->iters);
1691 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694 true, GSI_SAME_STMT);
1697 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698 fold_convert (diff_type, collapse->step));
1699 expr = fold_build2 (plus_code, iter_type,
1700 inner ? collapse->outer : collapse->base,
1701 fold_convert (plus_type, expr));
1702 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703 true, GSI_SAME_STMT);
1704 gassign *ass = gimple_build_assign (v, expr);
1705 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1709 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1710 of the combined collapse > 1 loop constructs, generate code like:
1711 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712 if (cond3 is <)
1713 adj = STEP3 - 1;
1714 else
1715 adj = STEP3 + 1;
1716 count3 = (adj + N32 - N31) / STEP3;
1717 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718 if (cond2 is <)
1719 adj = STEP2 - 1;
1720 else
1721 adj = STEP2 + 1;
1722 count2 = (adj + N22 - N21) / STEP2;
1723 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724 if (cond1 is <)
1725 adj = STEP1 - 1;
1726 else
1727 adj = STEP1 + 1;
1728 count1 = (adj + N12 - N11) / STEP1;
1729 count = count1 * count2 * count3;
1730 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731 count = 0;
1732 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1733 of the combined loop constructs, just initialize COUNTS array
1734 from the _looptemp_ clauses. For loop nests with non-rectangular
1735 loops, do this only for the rectangular loops. Then pick
1736 the loops which reference outer vars in their bound expressions
1737 and the loops which they refer to and for this sub-nest compute
1738 number of iterations. For triangular loops use Faulhaber's formula,
1739 otherwise as a fallback, compute by iterating the loops.
1740 If e.g. the sub-nest is
1741 for (I = N11; I COND1 N12; I += STEP1)
1742 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1745 COUNT = 0;
1746 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747 for (tmpj = M21 * tmpi + N21;
1748 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1750 int tmpk1 = M31 * tmpj + N31;
1751 int tmpk2 = M32 * tmpj + N32;
1752 if (tmpk1 COND3 tmpk2)
1754 if (COND3 is <)
1755 adj = STEP3 - 1;
1756 else
1757 adj = STEP3 + 1;
1758 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1761 and finally multiply the counts of the rectangular loops not
1762 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1763 store number of iterations of the loops from fd->first_nonrect
1764 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765 by the counts of rectangular loops not referenced in any non-rectangular
1766 loops sandwitched in between those. */
1768 /* NOTE: It *could* be better to moosh all of the BBs together,
1769 creating one larger BB with all the computation and the unexpected
1770 jump at the end. I.e.
1772 bool zero3, zero2, zero1, zero;
1774 zero3 = N32 c3 N31;
1775 count3 = (N32 - N31) /[cl] STEP3;
1776 zero2 = N22 c2 N21;
1777 count2 = (N22 - N21) /[cl] STEP2;
1778 zero1 = N12 c1 N11;
1779 count1 = (N12 - N11) /[cl] STEP1;
1780 zero = zero3 || zero2 || zero1;
1781 count = count1 * count2 * count3;
1782 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1784 After all, we expect the zero=false, and thus we expect to have to
1785 evaluate all of the comparison expressions, so short-circuiting
1786 oughtn't be a win. Since the condition isn't protecting a
1787 denominator, we're not concerned about divide-by-zero, so we can
1788 fully evaluate count even if a numerator turned out to be wrong.
1790 It seems like putting this all together would create much better
1791 scheduling opportunities, and less pressure on the chip's branch
1792 predictor. */
1794 static void
1795 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 basic_block &entry_bb, tree *counts,
1797 basic_block &zero_iter1_bb, int &first_zero_iter1,
1798 basic_block &zero_iter2_bb, int &first_zero_iter2,
1799 basic_block &l2_dom_bb)
1801 tree t, type = TREE_TYPE (fd->loop.v);
1802 edge e, ne;
1803 int i;
1805 /* Collapsed loops need work for expansion into SSA form. */
1806 gcc_assert (!gimple_in_ssa_p (cfun));
1808 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1811 gcc_assert (fd->ordered == 0);
1812 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813 isn't supposed to be handled, as the inner loop doesn't
1814 use it. */
1815 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816 OMP_CLAUSE__LOOPTEMP_);
1817 gcc_assert (innerc);
1818 for (i = 0; i < fd->collapse; i++)
1820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821 OMP_CLAUSE__LOOPTEMP_);
1822 gcc_assert (innerc);
1823 if (i)
1824 counts[i] = OMP_CLAUSE_DECL (innerc);
1825 else
1826 counts[0] = NULL_TREE;
1828 if (fd->non_rect
1829 && fd->last_nonrect == fd->first_nonrect + 1
1830 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1832 tree c[4];
1833 for (i = 0; i < 4; i++)
1835 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836 OMP_CLAUSE__LOOPTEMP_);
1837 gcc_assert (innerc);
1838 c[i] = OMP_CLAUSE_DECL (innerc);
1840 counts[0] = c[0];
1841 fd->first_inner_iterations = c[1];
1842 fd->factor = c[2];
1843 fd->adjn1 = c[3];
1845 return;
1848 for (i = fd->collapse; i < fd->ordered; i++)
1850 tree itype = TREE_TYPE (fd->loops[i].v);
1851 counts[i] = NULL_TREE;
1852 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853 fold_convert (itype, fd->loops[i].n1),
1854 fold_convert (itype, fd->loops[i].n2));
1855 if (t && integer_zerop (t))
1857 for (i = fd->collapse; i < fd->ordered; i++)
1858 counts[i] = build_int_cst (type, 0);
1859 break;
1862 bool rect_count_seen = false;
1863 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1865 tree itype = TREE_TYPE (fd->loops[i].v);
1867 if (i >= fd->collapse && counts[i])
1868 continue;
1869 if (fd->non_rect)
1871 /* Skip loops that use outer iterators in their expressions
1872 during this phase. */
1873 if (fd->loops[i].m1 || fd->loops[i].m2)
1875 counts[i] = build_zero_cst (type);
1876 continue;
1879 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881 fold_convert (itype, fd->loops[i].n1),
1882 fold_convert (itype, fd->loops[i].n2)))
1883 == NULL_TREE || !integer_onep (t)))
1885 gcond *cond_stmt;
1886 tree n1, n2;
1887 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889 true, GSI_SAME_STMT);
1890 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892 true, GSI_SAME_STMT);
1893 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894 n1, n2);
1895 e = split_block (entry_bb, cond_stmt);
1896 basic_block &zero_iter_bb
1897 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898 int &first_zero_iter
1899 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900 if (zero_iter_bb == NULL)
1902 gassign *assign_stmt;
1903 first_zero_iter = i;
1904 zero_iter_bb = create_empty_bb (entry_bb);
1905 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906 *gsi = gsi_after_labels (zero_iter_bb);
1907 if (i < fd->collapse)
1908 assign_stmt = gimple_build_assign (fd->loop.n2,
1909 build_zero_cst (type));
1910 else
1912 counts[i] = create_tmp_reg (type, ".count");
1913 assign_stmt
1914 = gimple_build_assign (counts[i], build_zero_cst (type));
1916 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918 entry_bb);
1920 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921 ne->probability = profile_probability::very_unlikely ();
1922 e->flags = EDGE_TRUE_VALUE;
1923 e->probability = ne->probability.invert ();
1924 if (l2_dom_bb == NULL)
1925 l2_dom_bb = entry_bb;
1926 entry_bb = e->dest;
1927 *gsi = gsi_last_nondebug_bb (entry_bb);
1930 if (POINTER_TYPE_P (itype))
1931 itype = signed_type_for (itype);
1932 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933 ? -1 : 1));
1934 t = fold_build2 (PLUS_EXPR, itype,
1935 fold_convert (itype, fd->loops[i].step), t);
1936 t = fold_build2 (PLUS_EXPR, itype, t,
1937 fold_convert (itype, fd->loops[i].n2));
1938 t = fold_build2 (MINUS_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].n1));
1940 /* ?? We could probably use CEIL_DIV_EXPR instead of
1941 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1942 generate the same code in the end because generically we
1943 don't know that the values involved must be negative for
1944 GT?? */
1945 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947 fold_build1 (NEGATE_EXPR, itype, t),
1948 fold_build1 (NEGATE_EXPR, itype,
1949 fold_convert (itype,
1950 fd->loops[i].step)));
1951 else
1952 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953 fold_convert (itype, fd->loops[i].step));
1954 t = fold_convert (type, t);
1955 if (TREE_CODE (t) == INTEGER_CST)
1956 counts[i] = t;
1957 else
1959 if (i < fd->collapse || i != first_zero_iter2)
1960 counts[i] = create_tmp_reg (type, ".count");
1961 expand_omp_build_assign (gsi, counts[i], t);
1963 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1965 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966 continue;
1967 if (!rect_count_seen)
1969 t = counts[i];
1970 rect_count_seen = true;
1972 else
1973 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974 expand_omp_build_assign (gsi, fd->loop.n2, t);
1977 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1979 gcc_assert (fd->last_nonrect != -1);
1981 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983 build_zero_cst (type));
1984 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985 if (fd->loops[i].m1
1986 || fd->loops[i].m2
1987 || fd->loops[i].non_rect_referenced)
1988 break;
1989 if (i == fd->last_nonrect
1990 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1994 int o = fd->first_nonrect;
1995 tree itype = TREE_TYPE (fd->loops[o].v);
1996 tree n1o = create_tmp_reg (itype, ".n1o");
1997 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998 expand_omp_build_assign (gsi, n1o, t);
1999 tree n2o = create_tmp_reg (itype, ".n2o");
2000 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001 expand_omp_build_assign (gsi, n2o, t);
2002 if (fd->loops[i].m1 && fd->loops[i].m2)
2003 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004 unshare_expr (fd->loops[i].m1));
2005 else if (fd->loops[i].m1)
2006 t = fold_build1 (NEGATE_EXPR, itype,
2007 unshare_expr (fd->loops[i].m1));
2008 else
2009 t = unshare_expr (fd->loops[i].m2);
2010 tree m2minusm1
2011 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2014 gimple_stmt_iterator gsi2 = *gsi;
2015 gsi_prev (&gsi2);
2016 e = split_block (entry_bb, gsi_stmt (gsi2));
2017 e = split_block (e->dest, (gimple *) NULL);
2018 basic_block bb1 = e->src;
2019 entry_bb = e->dest;
2020 *gsi = gsi_after_labels (entry_bb);
2022 gsi2 = gsi_after_labels (bb1);
2023 tree ostep = fold_convert (itype, fd->loops[o].step);
2024 t = build_int_cst (itype, (fd->loops[o].cond_code
2025 == LT_EXPR ? -1 : 1));
2026 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029 if (TYPE_UNSIGNED (itype)
2030 && fd->loops[o].cond_code == GT_EXPR)
2031 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032 fold_build1 (NEGATE_EXPR, itype, t),
2033 fold_build1 (NEGATE_EXPR, itype, ostep));
2034 else
2035 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036 tree outer_niters
2037 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038 true, GSI_SAME_STMT);
2039 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040 build_one_cst (itype));
2041 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044 true, GSI_SAME_STMT);
2045 tree n1, n2, n1e, n2e;
2046 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047 if (fd->loops[i].m1)
2049 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2053 else
2054 n1 = t;
2055 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056 true, GSI_SAME_STMT);
2057 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058 if (fd->loops[i].m2)
2060 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2064 else
2065 n2 = t;
2066 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067 true, GSI_SAME_STMT);
2068 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069 if (fd->loops[i].m1)
2071 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2075 else
2076 n1e = t;
2077 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078 true, GSI_SAME_STMT);
2079 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080 if (fd->loops[i].m2)
2082 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2086 else
2087 n2e = t;
2088 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089 true, GSI_SAME_STMT);
2090 gcond *cond_stmt
2091 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092 n1, n2);
2093 e = split_block (bb1, cond_stmt);
2094 e->flags = EDGE_TRUE_VALUE;
2095 e->probability = profile_probability::likely ().guessed ();
2096 basic_block bb2 = e->dest;
2097 gsi2 = gsi_after_labels (bb2);
2099 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100 n1e, n2e);
2101 e = split_block (bb2, cond_stmt);
2102 e->flags = EDGE_TRUE_VALUE;
2103 e->probability = profile_probability::likely ().guessed ();
2104 gsi2 = gsi_after_labels (e->dest);
2106 tree step = fold_convert (itype, fd->loops[i].step);
2107 t = build_int_cst (itype, (fd->loops[i].cond_code
2108 == LT_EXPR ? -1 : 1));
2109 t = fold_build2 (PLUS_EXPR, itype, step, t);
2110 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112 if (TYPE_UNSIGNED (itype)
2113 && fd->loops[i].cond_code == GT_EXPR)
2114 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115 fold_build1 (NEGATE_EXPR, itype, t),
2116 fold_build1 (NEGATE_EXPR, itype, step));
2117 else
2118 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119 tree first_inner_iterations
2120 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121 true, GSI_SAME_STMT);
2122 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123 if (TYPE_UNSIGNED (itype)
2124 && fd->loops[i].cond_code == GT_EXPR)
2125 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126 fold_build1 (NEGATE_EXPR, itype, t),
2127 fold_build1 (NEGATE_EXPR, itype, step));
2128 else
2129 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130 tree factor
2131 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132 true, GSI_SAME_STMT);
2133 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134 build_one_cst (itype));
2135 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137 t = fold_build2 (MULT_EXPR, itype, factor, t);
2138 t = fold_build2 (PLUS_EXPR, itype,
2139 fold_build2 (MULT_EXPR, itype, outer_niters,
2140 first_inner_iterations), t);
2141 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142 fold_convert (type, t));
2144 basic_block bb3 = create_empty_bb (bb1);
2145 add_bb_to_loop (bb3, bb1->loop_father);
2147 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148 e->probability = profile_probability::unlikely ().guessed ();
2150 gsi2 = gsi_after_labels (bb3);
2151 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152 n1e, n2e);
2153 e = split_block (bb3, cond_stmt);
2154 e->flags = EDGE_TRUE_VALUE;
2155 e->probability = profile_probability::likely ().guessed ();
2156 basic_block bb4 = e->dest;
2158 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159 ne->probability = e->probability.invert ();
2161 basic_block bb5 = create_empty_bb (bb2);
2162 add_bb_to_loop (bb5, bb2->loop_father);
2164 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165 ne->probability = profile_probability::unlikely ().guessed ();
2167 for (int j = 0; j < 2; j++)
2169 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170 t = fold_build2 (MINUS_EXPR, itype,
2171 unshare_expr (fd->loops[i].n1),
2172 unshare_expr (fd->loops[i].n2));
2173 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174 tree tem
2175 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176 true, GSI_SAME_STMT);
2177 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181 true, GSI_SAME_STMT);
2182 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183 if (fd->loops[i].m1)
2185 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2189 else
2190 n1 = t;
2191 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192 true, GSI_SAME_STMT);
2193 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194 if (fd->loops[i].m2)
2196 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2200 else
2201 n2 = t;
2202 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203 true, GSI_SAME_STMT);
2204 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2206 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207 n1, n2);
2208 e = split_block (gsi_bb (gsi2), cond_stmt);
2209 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210 e->probability = profile_probability::unlikely ().guessed ();
2211 ne = make_edge (e->src, bb1,
2212 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213 ne->probability = e->probability.invert ();
2214 gsi2 = gsi_after_labels (e->dest);
2216 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2219 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2222 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2226 if (fd->first_nonrect + 1 == fd->last_nonrect)
2228 fd->first_inner_iterations = first_inner_iterations;
2229 fd->factor = factor;
2230 fd->adjn1 = n1o;
2233 else
2235 /* Fallback implementation. Evaluate the loops with m1/m2
2236 non-NULL as well as their outer loops at runtime using temporaries
2237 instead of the original iteration variables, and in the
2238 body just bump the counter. */
2239 gimple_stmt_iterator gsi2 = *gsi;
2240 gsi_prev (&gsi2);
2241 e = split_block (entry_bb, gsi_stmt (gsi2));
2242 e = split_block (e->dest, (gimple *) NULL);
2243 basic_block cur_bb = e->src;
2244 basic_block next_bb = e->dest;
2245 entry_bb = e->dest;
2246 *gsi = gsi_after_labels (entry_bb);
2248 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2251 for (i = 0; i <= fd->last_nonrect; i++)
2253 if (fd->loops[i].m1 == NULL_TREE
2254 && fd->loops[i].m2 == NULL_TREE
2255 && !fd->loops[i].non_rect_referenced)
2256 continue;
2258 tree itype = TREE_TYPE (fd->loops[i].v);
2260 gsi2 = gsi_after_labels (cur_bb);
2261 tree n1, n2;
2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263 if (fd->loops[i].m1 == NULL_TREE)
2264 n1 = t;
2265 else if (POINTER_TYPE_P (itype))
2267 gcc_assert (integer_onep (fd->loops[i].m1));
2268 t = unshare_expr (fd->loops[i].n1);
2269 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271 else
2273 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2274 n1 = fold_build2 (MULT_EXPR, itype,
2275 vs[i - fd->loops[i].outer], n1);
2276 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2279 true, GSI_SAME_STMT);
2280 if (i < fd->last_nonrect)
2282 vs[i] = create_tmp_reg (itype, ".it");
2283 expand_omp_build_assign (&gsi2, vs[i], n1);
2285 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2286 if (fd->loops[i].m2 == NULL_TREE)
2287 n2 = t;
2288 else if (POINTER_TYPE_P (itype))
2290 gcc_assert (integer_onep (fd->loops[i].m2));
2291 t = unshare_expr (fd->loops[i].n2);
2292 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2294 else
2296 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2297 n2 = fold_build2 (MULT_EXPR, itype,
2298 vs[i - fd->loops[i].outer], n2);
2299 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2301 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2302 true, GSI_SAME_STMT);
2303 if (POINTER_TYPE_P (itype))
2304 itype = signed_type_for (itype);
2305 if (i == fd->last_nonrect)
2307 gcond *cond_stmt
2308 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2309 n1, n2);
2310 e = split_block (cur_bb, cond_stmt);
2311 e->flags = EDGE_TRUE_VALUE;
2312 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2313 e->probability = profile_probability::likely ().guessed ();
2314 ne->probability = e->probability.invert ();
2315 gsi2 = gsi_after_labels (e->dest);
2317 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2318 ? -1 : 1));
2319 t = fold_build2 (PLUS_EXPR, itype,
2320 fold_convert (itype, fd->loops[i].step), t);
2321 t = fold_build2 (PLUS_EXPR, itype, t,
2322 fold_convert (itype, n2));
2323 t = fold_build2 (MINUS_EXPR, itype, t,
2324 fold_convert (itype, n1));
2325 tree step = fold_convert (itype, fd->loops[i].step);
2326 if (TYPE_UNSIGNED (itype)
2327 && fd->loops[i].cond_code == GT_EXPR)
2328 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2329 fold_build1 (NEGATE_EXPR, itype, t),
2330 fold_build1 (NEGATE_EXPR, itype, step));
2331 else
2332 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2333 t = fold_convert (type, t);
2334 t = fold_build2 (PLUS_EXPR, type,
2335 counts[fd->last_nonrect], t);
2336 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2337 true, GSI_SAME_STMT);
2338 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2339 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2340 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2341 break;
2343 e = split_block (cur_bb, last_stmt (cur_bb));
2345 basic_block new_cur_bb = create_empty_bb (cur_bb);
2346 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2348 gsi2 = gsi_after_labels (e->dest);
2349 tree step = fold_convert (itype,
2350 unshare_expr (fd->loops[i].step));
2351 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2352 t = fold_build_pointer_plus (vs[i], step);
2353 else
2354 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2355 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2356 true, GSI_SAME_STMT);
2357 expand_omp_build_assign (&gsi2, vs[i], t);
2359 ne = split_block (e->dest, last_stmt (e->dest));
2360 gsi2 = gsi_after_labels (ne->dest);
2362 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2363 edge e3, e4;
2364 if (next_bb == entry_bb)
2366 e3 = find_edge (ne->dest, next_bb);
2367 e3->flags = EDGE_FALSE_VALUE;
2369 else
2370 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2371 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2372 e4->probability = profile_probability::likely ().guessed ();
2373 e3->probability = e4->probability.invert ();
2374 basic_block esrc = e->src;
2375 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2376 cur_bb = new_cur_bb;
2377 basic_block latch_bb = next_bb;
2378 next_bb = e->dest;
2379 remove_edge (e);
2380 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2381 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2382 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2385 t = NULL_TREE;
2386 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2387 if (!fd->loops[i].non_rect_referenced
2388 && fd->loops[i].m1 == NULL_TREE
2389 && fd->loops[i].m2 == NULL_TREE)
2391 if (t == NULL_TREE)
2392 t = counts[i];
2393 else
2394 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2396 if (t)
2398 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2399 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2401 if (!rect_count_seen)
2402 t = counts[fd->last_nonrect];
2403 else
2404 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2405 counts[fd->last_nonrect]);
2406 expand_omp_build_assign (gsi, fd->loop.n2, t);
2408 else if (fd->non_rect)
2410 tree t = fd->loop.n2;
2411 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2412 int non_rect_referenced = 0, non_rect = 0;
2413 for (i = 0; i < fd->collapse; i++)
2415 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2416 && !integer_zerop (counts[i]))
2417 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2418 if (fd->loops[i].non_rect_referenced)
2419 non_rect_referenced++;
2420 if (fd->loops[i].m1 || fd->loops[i].m2)
2421 non_rect++;
2423 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2424 counts[fd->last_nonrect] = t;
2428 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2429 T = V;
2430 V3 = N31 + (T % count3) * STEP3;
2431 T = T / count3;
2432 V2 = N21 + (T % count2) * STEP2;
2433 T = T / count2;
2434 V1 = N11 + T * STEP1;
2435 if this loop doesn't have an inner loop construct combined with it.
2436 If it does have an inner loop construct combined with it and the
2437 iteration count isn't known constant, store values from counts array
2438 into its _looptemp_ temporaries instead.
2439 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2440 inclusive), use the count of all those loops together, and either
2441 find quadratic etc. equation roots, or as a fallback, do:
2442 COUNT = 0;
2443 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2444 for (tmpj = M21 * tmpi + N21;
2445 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2447 int tmpk1 = M31 * tmpj + N31;
2448 int tmpk2 = M32 * tmpj + N32;
2449 if (tmpk1 COND3 tmpk2)
2451 if (COND3 is <)
2452 adj = STEP3 - 1;
2453 else
2454 adj = STEP3 + 1;
2455 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2456 if (COUNT + temp > T)
2458 V1 = tmpi;
2459 V2 = tmpj;
2460 V3 = tmpk1 + (T - COUNT) * STEP3;
2461 goto done;
2463 else
2464 COUNT += temp;
2467 done:;
2468 but for optional innermost or outermost rectangular loops that aren't
2469 referenced by other loop expressions keep doing the division/modulo. */
2471 static void
2472 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2473 tree *counts, tree *nonrect_bounds,
2474 gimple *inner_stmt, tree startvar)
2476 int i;
2477 if (gimple_omp_for_combined_p (fd->for_stmt))
2479 /* If fd->loop.n2 is constant, then no propagation of the counts
2480 is needed, they are constant. */
2481 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2482 return;
2484 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2485 ? gimple_omp_taskreg_clauses (inner_stmt)
2486 : gimple_omp_for_clauses (inner_stmt);
2487 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2488 isn't supposed to be handled, as the inner loop doesn't
2489 use it. */
2490 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2491 gcc_assert (innerc);
2492 int count = 0;
2493 if (fd->non_rect
2494 && fd->last_nonrect == fd->first_nonrect + 1
2495 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2496 count = 4;
2497 for (i = 0; i < fd->collapse + count; i++)
2499 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2500 OMP_CLAUSE__LOOPTEMP_);
2501 gcc_assert (innerc);
2502 if (i)
2504 tree tem = OMP_CLAUSE_DECL (innerc);
2505 tree t;
2506 if (i < fd->collapse)
2507 t = counts[i];
2508 else
2509 switch (i - fd->collapse)
2511 case 0: t = counts[0]; break;
2512 case 1: t = fd->first_inner_iterations; break;
2513 case 2: t = fd->factor; break;
2514 case 3: t = fd->adjn1; break;
2515 default: gcc_unreachable ();
2517 t = fold_convert (TREE_TYPE (tem), t);
2518 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2519 false, GSI_CONTINUE_LINKING);
2520 gassign *stmt = gimple_build_assign (tem, t);
2521 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2524 return;
2527 tree type = TREE_TYPE (fd->loop.v);
2528 tree tem = create_tmp_reg (type, ".tem");
2529 gassign *stmt = gimple_build_assign (tem, startvar);
2530 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2532 for (i = fd->collapse - 1; i >= 0; i--)
2534 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2535 itype = vtype;
2536 if (POINTER_TYPE_P (vtype))
2537 itype = signed_type_for (vtype);
2538 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2539 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2540 else
2541 t = tem;
2542 if (i == fd->last_nonrect)
2544 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2545 false, GSI_CONTINUE_LINKING);
2546 tree stopval = t;
2547 tree idx = create_tmp_reg (type, ".count");
2548 expand_omp_build_assign (gsi, idx,
2549 build_zero_cst (type), true);
2550 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2551 if (fd->first_nonrect + 1 == fd->last_nonrect
2552 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2553 || fd->first_inner_iterations)
2554 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2555 != CODE_FOR_nothing)
2556 && !integer_zerop (fd->loop.n2))
2558 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2559 tree itype = TREE_TYPE (fd->loops[i].v);
2560 tree first_inner_iterations = fd->first_inner_iterations;
2561 tree factor = fd->factor;
2562 gcond *cond_stmt
2563 = expand_omp_build_cond (gsi, NE_EXPR, factor,
2564 build_zero_cst (TREE_TYPE (factor)));
2565 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2566 basic_block bb0 = e->src;
2567 e->flags = EDGE_TRUE_VALUE;
2568 e->probability = profile_probability::likely ();
2569 bb_triang_dom = bb0;
2570 *gsi = gsi_after_labels (e->dest);
2571 tree slltype = long_long_integer_type_node;
2572 tree ulltype = long_long_unsigned_type_node;
2573 tree stopvalull = fold_convert (ulltype, stopval);
2574 stopvalull
2575 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2576 false, GSI_CONTINUE_LINKING);
2577 first_inner_iterations
2578 = fold_convert (slltype, first_inner_iterations);
2579 first_inner_iterations
2580 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2581 NULL_TREE, false,
2582 GSI_CONTINUE_LINKING);
2583 factor = fold_convert (slltype, factor);
2584 factor
2585 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2586 false, GSI_CONTINUE_LINKING);
2587 tree first_inner_iterationsd
2588 = fold_build1 (FLOAT_EXPR, double_type_node,
2589 first_inner_iterations);
2590 first_inner_iterationsd
2591 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2592 NULL_TREE, false,
2593 GSI_CONTINUE_LINKING);
2594 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2595 factor);
2596 factord = force_gimple_operand_gsi (gsi, factord, true,
2597 NULL_TREE, false,
2598 GSI_CONTINUE_LINKING);
2599 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2600 stopvalull);
2601 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2602 NULL_TREE, false,
2603 GSI_CONTINUE_LINKING);
2604 /* Temporarily disable flag_rounding_math, values will be
2605 decimal numbers divided by 2 and worst case imprecisions
2606 due to too large values ought to be caught later by the
2607 checks for fallback. */
2608 int save_flag_rounding_math = flag_rounding_math;
2609 flag_rounding_math = 0;
2610 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2611 build_real (double_type_node, dconst2));
2612 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2613 first_inner_iterationsd, t);
2614 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2615 GSI_CONTINUE_LINKING);
2616 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2617 build_real (double_type_node, dconst2));
2618 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2619 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2620 fold_build2 (MULT_EXPR, double_type_node,
2621 t3, t3));
2622 flag_rounding_math = save_flag_rounding_math;
2623 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2624 GSI_CONTINUE_LINKING);
2625 if (flag_exceptions
2626 && cfun->can_throw_non_call_exceptions
2627 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2629 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2630 build_zero_cst (double_type_node));
2631 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2632 false, GSI_CONTINUE_LINKING);
2633 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2634 boolean_false_node,
2635 NULL_TREE, NULL_TREE);
2637 else
2638 cond_stmt
2639 = gimple_build_cond (LT_EXPR, t,
2640 build_zero_cst (double_type_node),
2641 NULL_TREE, NULL_TREE);
2642 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2643 e = split_block (gsi_bb (*gsi), cond_stmt);
2644 basic_block bb1 = e->src;
2645 e->flags = EDGE_FALSE_VALUE;
2646 e->probability = profile_probability::very_likely ();
2647 *gsi = gsi_after_labels (e->dest);
2648 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2649 tree sqrtr = create_tmp_var (double_type_node);
2650 gimple_call_set_lhs (call, sqrtr);
2651 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2652 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2653 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2654 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2655 tree c = create_tmp_var (ulltype);
2656 tree d = create_tmp_var (ulltype);
2657 expand_omp_build_assign (gsi, c, t, true);
2658 t = fold_build2 (MINUS_EXPR, ulltype, c,
2659 build_one_cst (ulltype));
2660 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2661 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2662 t = fold_build2 (MULT_EXPR, ulltype,
2663 fold_convert (ulltype, fd->factor), t);
2664 tree t2
2665 = fold_build2 (MULT_EXPR, ulltype, c,
2666 fold_convert (ulltype,
2667 fd->first_inner_iterations));
2668 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2669 expand_omp_build_assign (gsi, d, t, true);
2670 t = fold_build2 (MULT_EXPR, ulltype,
2671 fold_convert (ulltype, fd->factor), c);
2672 t = fold_build2 (PLUS_EXPR, ulltype,
2673 t, fold_convert (ulltype,
2674 fd->first_inner_iterations));
2675 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2676 GSI_CONTINUE_LINKING);
2677 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2678 NULL_TREE, NULL_TREE);
2679 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2680 e = split_block (gsi_bb (*gsi), cond_stmt);
2681 basic_block bb2 = e->src;
2682 e->flags = EDGE_TRUE_VALUE;
2683 e->probability = profile_probability::very_likely ();
2684 *gsi = gsi_after_labels (e->dest);
2685 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2686 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2687 GSI_CONTINUE_LINKING);
2688 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2689 NULL_TREE, NULL_TREE);
2690 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2691 e = split_block (gsi_bb (*gsi), cond_stmt);
2692 basic_block bb3 = e->src;
2693 e->flags = EDGE_FALSE_VALUE;
2694 e->probability = profile_probability::very_likely ();
2695 *gsi = gsi_after_labels (e->dest);
2696 t = fold_convert (itype, c);
2697 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2698 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2699 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2700 GSI_CONTINUE_LINKING);
2701 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2702 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2703 t2 = fold_convert (itype, t2);
2704 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2705 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2706 if (fd->loops[i].m1)
2708 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2709 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2711 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2712 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2713 bb_triang = e->src;
2714 *gsi = gsi_after_labels (e->dest);
2715 remove_edge (e);
2716 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2717 e->probability = profile_probability::very_unlikely ();
2718 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2719 e->probability = profile_probability::very_unlikely ();
2720 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2721 e->probability = profile_probability::very_unlikely ();
2723 basic_block bb4 = create_empty_bb (bb0);
2724 add_bb_to_loop (bb4, bb0->loop_father);
2725 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2726 e->probability = profile_probability::unlikely ();
2727 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2728 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2729 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2730 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2731 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2732 counts[i], counts[i - 1]);
2733 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2734 GSI_CONTINUE_LINKING);
2735 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2736 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2737 t = fold_convert (itype, t);
2738 t2 = fold_convert (itype, t2);
2739 t = fold_build2 (MULT_EXPR, itype, t,
2740 fold_convert (itype, fd->loops[i].step));
2741 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2742 t2 = fold_build2 (MULT_EXPR, itype, t2,
2743 fold_convert (itype, fd->loops[i - 1].step));
2744 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2745 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2746 false, GSI_CONTINUE_LINKING);
2747 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2748 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2749 if (fd->loops[i].m1)
2751 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2752 fd->loops[i - 1].v);
2753 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2755 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2756 false, GSI_CONTINUE_LINKING);
2757 stmt = gimple_build_assign (fd->loops[i].v, t);
2758 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2760 /* Fallback implementation. Evaluate the loops in between
2761 (inclusive) fd->first_nonrect and fd->last_nonrect at
2762 runtime unsing temporaries instead of the original iteration
2763 variables, in the body just bump the counter and compare
2764 with the desired value. */
2765 gimple_stmt_iterator gsi2 = *gsi;
2766 basic_block entry_bb = gsi_bb (gsi2);
2767 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2768 e = split_block (e->dest, (gimple *) NULL);
2769 basic_block dom_bb = NULL;
2770 basic_block cur_bb = e->src;
2771 basic_block next_bb = e->dest;
2772 entry_bb = e->dest;
2773 *gsi = gsi_after_labels (entry_bb);
2775 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2776 tree n1 = NULL_TREE, n2 = NULL_TREE;
2777 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2779 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2781 tree itype = TREE_TYPE (fd->loops[j].v);
2782 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2783 && fd->loops[j].m2 == NULL_TREE
2784 && !fd->loops[j].non_rect_referenced);
2785 gsi2 = gsi_after_labels (cur_bb);
2786 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2787 if (fd->loops[j].m1 == NULL_TREE)
2788 n1 = rect_p ? build_zero_cst (type) : t;
2789 else if (POINTER_TYPE_P (itype))
2791 gcc_assert (integer_onep (fd->loops[j].m1));
2792 t = unshare_expr (fd->loops[j].n1);
2793 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2795 else
2797 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2798 n1 = fold_build2 (MULT_EXPR, itype,
2799 vs[j - fd->loops[j].outer], n1);
2800 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2802 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2803 true, GSI_SAME_STMT);
2804 if (j < fd->last_nonrect)
2806 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2807 expand_omp_build_assign (&gsi2, vs[j], n1);
2809 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2810 if (fd->loops[j].m2 == NULL_TREE)
2811 n2 = rect_p ? counts[j] : t;
2812 else if (POINTER_TYPE_P (itype))
2814 gcc_assert (integer_onep (fd->loops[j].m2));
2815 t = unshare_expr (fd->loops[j].n2);
2816 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2818 else
2820 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2821 n2 = fold_build2 (MULT_EXPR, itype,
2822 vs[j - fd->loops[j].outer], n2);
2823 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2825 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2826 true, GSI_SAME_STMT);
2827 if (POINTER_TYPE_P (itype))
2828 itype = signed_type_for (itype);
2829 if (j == fd->last_nonrect)
2831 gcond *cond_stmt
2832 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2833 n1, n2);
2834 e = split_block (cur_bb, cond_stmt);
2835 e->flags = EDGE_TRUE_VALUE;
2836 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2837 e->probability = profile_probability::likely ().guessed ();
2838 ne->probability = e->probability.invert ();
2839 gsi2 = gsi_after_labels (e->dest);
2841 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2842 ? -1 : 1));
2843 t = fold_build2 (PLUS_EXPR, itype,
2844 fold_convert (itype, fd->loops[j].step), t);
2845 t = fold_build2 (PLUS_EXPR, itype, t,
2846 fold_convert (itype, n2));
2847 t = fold_build2 (MINUS_EXPR, itype, t,
2848 fold_convert (itype, n1));
2849 tree step = fold_convert (itype, fd->loops[j].step);
2850 if (TYPE_UNSIGNED (itype)
2851 && fd->loops[j].cond_code == GT_EXPR)
2852 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2853 fold_build1 (NEGATE_EXPR, itype, t),
2854 fold_build1 (NEGATE_EXPR, itype, step));
2855 else
2856 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2857 t = fold_convert (type, t);
2858 t = fold_build2 (PLUS_EXPR, type, idx, t);
2859 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2860 true, GSI_SAME_STMT);
2861 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2862 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2863 cond_stmt
2864 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2865 NULL_TREE);
2866 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2867 e = split_block (gsi_bb (gsi2), cond_stmt);
2868 e->flags = EDGE_TRUE_VALUE;
2869 e->probability = profile_probability::likely ().guessed ();
2870 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2871 ne->probability = e->probability.invert ();
2872 gsi2 = gsi_after_labels (e->dest);
2873 expand_omp_build_assign (&gsi2, idx, t);
2874 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2875 break;
2877 e = split_block (cur_bb, last_stmt (cur_bb));
2879 basic_block new_cur_bb = create_empty_bb (cur_bb);
2880 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2882 gsi2 = gsi_after_labels (e->dest);
2883 if (rect_p)
2884 t = fold_build2 (PLUS_EXPR, type, vs[j],
2885 build_one_cst (type));
2886 else
2888 tree step
2889 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2890 if (POINTER_TYPE_P (vtype))
2891 t = fold_build_pointer_plus (vs[j], step);
2892 else
2893 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2895 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2896 true, GSI_SAME_STMT);
2897 expand_omp_build_assign (&gsi2, vs[j], t);
2899 edge ne = split_block (e->dest, last_stmt (e->dest));
2900 gsi2 = gsi_after_labels (ne->dest);
2902 gcond *cond_stmt;
2903 if (next_bb == entry_bb)
2904 /* No need to actually check the outermost condition. */
2905 cond_stmt
2906 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2907 boolean_true_node,
2908 NULL_TREE, NULL_TREE);
2909 else
2910 cond_stmt
2911 = gimple_build_cond (rect_p ? LT_EXPR
2912 : fd->loops[j].cond_code,
2913 vs[j], n2, NULL_TREE, NULL_TREE);
2914 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2915 edge e3, e4;
2916 if (next_bb == entry_bb)
2918 e3 = find_edge (ne->dest, next_bb);
2919 e3->flags = EDGE_FALSE_VALUE;
2920 dom_bb = ne->dest;
2922 else
2923 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2924 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2925 e4->probability = profile_probability::likely ().guessed ();
2926 e3->probability = e4->probability.invert ();
2927 basic_block esrc = e->src;
2928 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2929 cur_bb = new_cur_bb;
2930 basic_block latch_bb = next_bb;
2931 next_bb = e->dest;
2932 remove_edge (e);
2933 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2934 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2935 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2937 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2939 tree vtype = TREE_TYPE (fd->loops[j].v);
2940 tree itype = vtype;
2941 if (POINTER_TYPE_P (itype))
2942 itype = signed_type_for (itype);
2943 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2944 && fd->loops[j].m2 == NULL_TREE
2945 && !fd->loops[j].non_rect_referenced);
2946 if (j == fd->last_nonrect)
2948 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2949 t = fold_convert (itype, t);
2950 tree t2
2951 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2952 t = fold_build2 (MULT_EXPR, itype, t, t2);
2953 if (POINTER_TYPE_P (vtype))
2954 t = fold_build_pointer_plus (n1, t);
2955 else
2956 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2958 else if (rect_p)
2960 t = fold_convert (itype, vs[j]);
2961 t = fold_build2 (MULT_EXPR, itype, t,
2962 fold_convert (itype, fd->loops[j].step));
2963 if (POINTER_TYPE_P (vtype))
2964 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2965 else
2966 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2968 else
2969 t = vs[j];
2970 t = force_gimple_operand_gsi (gsi, t, false,
2971 NULL_TREE, true,
2972 GSI_SAME_STMT);
2973 stmt = gimple_build_assign (fd->loops[j].v, t);
2974 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2976 if (gsi_end_p (*gsi))
2977 *gsi = gsi_last_bb (gsi_bb (*gsi));
2978 else
2979 gsi_prev (gsi);
2980 if (bb_triang)
2982 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2983 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2984 *gsi = gsi_after_labels (e->dest);
2985 if (!gsi_end_p (*gsi))
2986 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2987 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2990 else
2992 t = fold_convert (itype, t);
2993 t = fold_build2 (MULT_EXPR, itype, t,
2994 fold_convert (itype, fd->loops[i].step));
2995 if (POINTER_TYPE_P (vtype))
2996 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2997 else
2998 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2999 t = force_gimple_operand_gsi (gsi, t,
3000 DECL_P (fd->loops[i].v)
3001 && TREE_ADDRESSABLE (fd->loops[i].v),
3002 NULL_TREE, false,
3003 GSI_CONTINUE_LINKING);
3004 stmt = gimple_build_assign (fd->loops[i].v, t);
3005 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3007 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3009 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3010 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3011 false, GSI_CONTINUE_LINKING);
3012 stmt = gimple_build_assign (tem, t);
3013 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3015 if (i == fd->last_nonrect)
3016 i = fd->first_nonrect;
3018 if (fd->non_rect)
3019 for (i = 0; i <= fd->last_nonrect; i++)
3020 if (fd->loops[i].m2)
3022 tree itype = TREE_TYPE (fd->loops[i].v);
3024 tree t;
3025 if (POINTER_TYPE_P (itype))
3027 gcc_assert (integer_onep (fd->loops[i].m2));
3028 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3029 unshare_expr (fd->loops[i].n2));
3031 else
3033 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3034 t = fold_build2 (MULT_EXPR, itype,
3035 fd->loops[i - fd->loops[i].outer].v, t);
3036 t = fold_build2 (PLUS_EXPR, itype, t,
3037 fold_convert (itype,
3038 unshare_expr (fd->loops[i].n2)));
3040 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3041 t = force_gimple_operand_gsi (gsi, t, false,
3042 NULL_TREE, false,
3043 GSI_CONTINUE_LINKING);
3044 stmt = gimple_build_assign (nonrect_bounds[i], t);
3045 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3049 /* Helper function for expand_omp_for_*. Generate code like:
3050 L10:
3051 V3 += STEP3;
3052 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3053 L11:
3054 V3 = N31;
3055 V2 += STEP2;
3056 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3057 L12:
3058 V2 = N21;
3059 V1 += STEP1;
3060 goto BODY_BB;
3061 For non-rectangular loops, use temporaries stored in nonrect_bounds
3062 for the upper bounds if M?2 multiplier is present. Given e.g.
3063 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3064 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3065 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3066 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3068 L10:
3069 V4 += STEP4;
3070 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3071 L11:
3072 V4 = N41 + M41 * V2; // This can be left out if the loop
3073 // refers to the immediate parent loop
3074 V3 += STEP3;
3075 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3076 L12:
3077 V3 = N31;
3078 V2 += STEP2;
3079 if (V2 cond2 N22) goto L120; else goto L13;
3080 L120:
3081 V4 = N41 + M41 * V2;
3082 NONRECT_BOUND4 = N42 + M42 * V2;
3083 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3084 L13:
3085 V2 = N21;
3086 V1 += STEP1;
3087 goto L120; */
3089 static basic_block
3090 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3091 basic_block cont_bb, basic_block body_bb)
3093 basic_block last_bb, bb, collapse_bb = NULL;
3094 int i;
3095 gimple_stmt_iterator gsi;
3096 edge e;
3097 tree t;
3098 gimple *stmt;
3100 last_bb = cont_bb;
3101 for (i = fd->collapse - 1; i >= 0; i--)
3103 tree vtype = TREE_TYPE (fd->loops[i].v);
3105 bb = create_empty_bb (last_bb);
3106 add_bb_to_loop (bb, last_bb->loop_father);
3107 gsi = gsi_start_bb (bb);
3109 if (i < fd->collapse - 1)
3111 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3112 e->probability = profile_probability::guessed_always () / 8;
3114 struct omp_for_data_loop *l = &fd->loops[i + 1];
3115 if (l->m1 == NULL_TREE || l->outer != 1)
3117 t = l->n1;
3118 if (l->m1)
3120 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3121 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3123 else
3125 tree t2
3126 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3127 fd->loops[i + 1 - l->outer].v, l->m1);
3128 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3131 t = force_gimple_operand_gsi (&gsi, t,
3132 DECL_P (l->v)
3133 && TREE_ADDRESSABLE (l->v),
3134 NULL_TREE, false,
3135 GSI_CONTINUE_LINKING);
3136 stmt = gimple_build_assign (l->v, t);
3137 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3140 else
3141 collapse_bb = bb;
3143 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3145 if (POINTER_TYPE_P (vtype))
3146 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3147 else
3148 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3149 t = force_gimple_operand_gsi (&gsi, t,
3150 DECL_P (fd->loops[i].v)
3151 && TREE_ADDRESSABLE (fd->loops[i].v),
3152 NULL_TREE, false, GSI_CONTINUE_LINKING);
3153 stmt = gimple_build_assign (fd->loops[i].v, t);
3154 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3156 if (fd->loops[i].non_rect_referenced)
3158 basic_block update_bb = NULL, prev_bb = NULL;
3159 for (int j = i + 1; j <= fd->last_nonrect; j++)
3160 if (j - fd->loops[j].outer == i)
3162 tree n1, n2;
3163 struct omp_for_data_loop *l = &fd->loops[j];
3164 basic_block this_bb = create_empty_bb (last_bb);
3165 add_bb_to_loop (this_bb, last_bb->loop_father);
3166 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3167 if (prev_bb)
3169 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3170 e->probability
3171 = profile_probability::guessed_always ().apply_scale (7,
3173 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3175 if (l->m1)
3177 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3178 t = fold_build_pointer_plus (fd->loops[i].v, l->n1);
3179 else
3181 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3182 fd->loops[i].v);
3183 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3184 t, l->n1);
3186 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3187 false,
3188 GSI_CONTINUE_LINKING);
3189 stmt = gimple_build_assign (l->v, n1);
3190 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3191 n1 = l->v;
3193 else
3194 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3195 NULL_TREE, false,
3196 GSI_CONTINUE_LINKING);
3197 if (l->m2)
3199 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3200 t = fold_build_pointer_plus (fd->loops[i].v, l->n2);
3201 else
3203 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3204 fd->loops[i].v);
3205 t = fold_build2 (PLUS_EXPR,
3206 TREE_TYPE (nonrect_bounds[j]),
3207 t, unshare_expr (l->n2));
3209 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3210 false,
3211 GSI_CONTINUE_LINKING);
3212 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3213 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3214 n2 = nonrect_bounds[j];
3216 else
3217 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3218 true, NULL_TREE, false,
3219 GSI_CONTINUE_LINKING);
3220 gcond *cond_stmt
3221 = gimple_build_cond (l->cond_code, n1, n2,
3222 NULL_TREE, NULL_TREE);
3223 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3224 if (update_bb == NULL)
3225 update_bb = this_bb;
3226 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3227 e->probability = profile_probability::guessed_always () / 8;
3228 if (prev_bb == NULL)
3229 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3230 prev_bb = this_bb;
3232 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3233 e->probability
3234 = profile_probability::guessed_always ().apply_scale (7, 8);
3235 body_bb = update_bb;
3238 if (i > 0)
3240 if (fd->loops[i].m2)
3241 t = nonrect_bounds[i];
3242 else
3243 t = unshare_expr (fd->loops[i].n2);
3244 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3245 false, GSI_CONTINUE_LINKING);
3246 tree v = fd->loops[i].v;
3247 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3248 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3249 false, GSI_CONTINUE_LINKING);
3250 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3251 stmt = gimple_build_cond_empty (t);
3252 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3253 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3254 expand_omp_regimplify_p, NULL, NULL)
3255 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3256 expand_omp_regimplify_p, NULL, NULL))
3257 gimple_regimplify_operands (stmt, &gsi);
3258 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3259 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3261 else
3262 make_edge (bb, body_bb, EDGE_FALLTHRU);
3263 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3264 last_bb = bb;
3267 return collapse_bb;
3270 /* Expand #pragma omp ordered depend(source). */
3272 static void
3273 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3274 tree *counts, location_t loc)
3276 enum built_in_function source_ix
3277 = fd->iter_type == long_integer_type_node
3278 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3279 gimple *g
3280 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3281 build_fold_addr_expr (counts[fd->ordered]));
3282 gimple_set_location (g, loc);
3283 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3286 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3288 static void
3289 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3290 tree *counts, tree c, location_t loc,
3291 basic_block cont_bb)
3293 auto_vec<tree, 10> args;
3294 enum built_in_function sink_ix
3295 = fd->iter_type == long_integer_type_node
3296 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3297 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3298 int i;
3299 gimple_stmt_iterator gsi2 = *gsi;
3300 bool warned_step = false;
3302 if (deps == NULL)
3304 /* Handle doacross(sink: omp_cur_iteration - 1). */
3305 gsi_prev (&gsi2);
3306 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3307 edge e2 = split_block_after_labels (e1->dest);
3308 gsi2 = gsi_after_labels (e1->dest);
3309 *gsi = gsi_last_bb (e1->src);
3310 gimple_stmt_iterator gsi3 = *gsi;
3312 if (counts[fd->collapse - 1])
3314 gcc_assert (fd->collapse == 1);
3315 t = counts[fd->collapse - 1];
3317 else if (fd->collapse > 1)
3318 t = fd->loop.v;
3319 else
3321 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3322 fd->loops[0].v, fd->loops[0].n1);
3323 t = fold_convert (fd->iter_type, t);
3326 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
3327 false, GSI_CONTINUE_LINKING);
3328 gsi_insert_after (gsi, gimple_build_cond (NE_EXPR, t,
3329 build_zero_cst (TREE_TYPE (t)),
3330 NULL_TREE, NULL_TREE),
3331 GSI_NEW_STMT);
3333 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3334 build_minus_one_cst (TREE_TYPE (t)));
3335 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3336 true, GSI_SAME_STMT);
3337 args.safe_push (t);
3338 for (i = fd->collapse; i < fd->ordered; i++)
3340 t = counts[fd->ordered + 2 + (i - fd->collapse)];
3341 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3342 build_minus_one_cst (TREE_TYPE (t)));
3343 t = fold_convert (fd->iter_type, t);
3344 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3345 true, GSI_SAME_STMT);
3346 args.safe_push (t);
3349 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix),
3350 args);
3351 gimple_set_location (g, loc);
3352 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3354 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3355 e3->probability = profile_probability::guessed_always () / 8;
3356 e1->probability = e3->probability.invert ();
3357 e1->flags = EDGE_TRUE_VALUE;
3358 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3360 if (fd->ordered > fd->collapse && cont_bb)
3362 if (counts[fd->ordered + 1] == NULL_TREE)
3363 counts[fd->ordered + 1]
3364 = create_tmp_var (boolean_type_node, ".first");
3366 edge e4;
3367 if (gsi_end_p (gsi3))
3368 e4 = split_block_after_labels (e1->src);
3369 else
3371 gsi_prev (&gsi3);
3372 e4 = split_block (gsi_bb (gsi3), gsi_stmt (gsi3));
3374 gsi3 = gsi_last_bb (e4->src);
3376 gsi_insert_after (&gsi3,
3377 gimple_build_cond (NE_EXPR,
3378 counts[fd->ordered + 1],
3379 boolean_false_node,
3380 NULL_TREE, NULL_TREE),
3381 GSI_NEW_STMT);
3383 edge e5 = make_edge (e4->src, e2->dest, EDGE_FALSE_VALUE);
3384 e4->probability = profile_probability::guessed_always () / 8;
3385 e5->probability = e4->probability.invert ();
3386 e4->flags = EDGE_TRUE_VALUE;
3387 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e4->src);
3390 *gsi = gsi_after_labels (e2->dest);
3391 return;
3393 for (i = 0; i < fd->ordered; i++)
3395 tree step = NULL_TREE;
3396 off = TREE_PURPOSE (deps);
3397 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3399 step = TREE_OPERAND (off, 1);
3400 off = TREE_OPERAND (off, 0);
3402 if (!integer_zerop (off))
3404 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3405 || fd->loops[i].cond_code == GT_EXPR);
3406 bool forward = fd->loops[i].cond_code == LT_EXPR;
3407 if (step)
3409 /* Non-simple Fortran DO loops. If step is variable,
3410 we don't know at compile even the direction, so can't
3411 warn. */
3412 if (TREE_CODE (step) != INTEGER_CST)
3413 break;
3414 forward = tree_int_cst_sgn (step) != -1;
3416 if (forward ^ OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3417 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3418 "waiting for lexically later iteration",
3419 OMP_CLAUSE_DOACROSS_DEPEND (c)
3420 ? "depend" : "doacross");
3421 break;
3423 deps = TREE_CHAIN (deps);
3425 /* If all offsets corresponding to the collapsed loops are zero,
3426 this depend clause can be ignored. FIXME: but there is still a
3427 flush needed. We need to emit one __sync_synchronize () for it
3428 though (perhaps conditionally)? Solve this together with the
3429 conservative dependence folding optimization.
3430 if (i >= fd->collapse)
3431 return; */
3433 deps = OMP_CLAUSE_DECL (c);
3434 gsi_prev (&gsi2);
3435 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3436 edge e2 = split_block_after_labels (e1->dest);
3438 gsi2 = gsi_after_labels (e1->dest);
3439 *gsi = gsi_last_bb (e1->src);
3440 for (i = 0; i < fd->ordered; i++)
3442 tree itype = TREE_TYPE (fd->loops[i].v);
3443 tree step = NULL_TREE;
3444 tree orig_off = NULL_TREE;
3445 if (POINTER_TYPE_P (itype))
3446 itype = sizetype;
3447 if (i)
3448 deps = TREE_CHAIN (deps);
3449 off = TREE_PURPOSE (deps);
3450 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3452 step = TREE_OPERAND (off, 1);
3453 off = TREE_OPERAND (off, 0);
3454 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3455 && integer_onep (fd->loops[i].step)
3456 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3458 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3459 if (step)
3461 off = fold_convert_loc (loc, itype, off);
3462 orig_off = off;
3463 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3466 if (integer_zerop (off))
3467 t = boolean_true_node;
3468 else
3470 tree a;
3471 tree co = fold_convert_loc (loc, itype, off);
3472 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3474 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3475 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3476 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3477 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3478 co);
3480 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3481 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3482 fd->loops[i].v, co);
3483 else
3484 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3485 fd->loops[i].v, co);
3486 if (step)
3488 tree t1, t2;
3489 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3490 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3491 fd->loops[i].n1);
3492 else
3493 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3494 fd->loops[i].n2);
3495 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3496 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3497 fd->loops[i].n2);
3498 else
3499 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3500 fd->loops[i].n1);
3501 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3502 step, build_int_cst (TREE_TYPE (step), 0));
3503 if (TREE_CODE (step) != INTEGER_CST)
3505 t1 = unshare_expr (t1);
3506 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3507 false, GSI_CONTINUE_LINKING);
3508 t2 = unshare_expr (t2);
3509 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3510 false, GSI_CONTINUE_LINKING);
3512 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3513 t, t2, t1);
3515 else if (fd->loops[i].cond_code == LT_EXPR)
3517 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3518 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3519 fd->loops[i].n1);
3520 else
3521 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3522 fd->loops[i].n2);
3524 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3525 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3526 fd->loops[i].n2);
3527 else
3528 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3529 fd->loops[i].n1);
3531 if (cond)
3532 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3533 else
3534 cond = t;
3536 off = fold_convert_loc (loc, itype, off);
3538 if (step
3539 || (fd->loops[i].cond_code == LT_EXPR
3540 ? !integer_onep (fd->loops[i].step)
3541 : !integer_minus_onep (fd->loops[i].step)))
3543 if (step == NULL_TREE
3544 && TYPE_UNSIGNED (itype)
3545 && fd->loops[i].cond_code == GT_EXPR)
3546 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3547 fold_build1_loc (loc, NEGATE_EXPR, itype,
3548 s));
3549 else
3550 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3551 orig_off ? orig_off : off, s);
3552 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3553 build_int_cst (itype, 0));
3554 if (integer_zerop (t) && !warned_step)
3556 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3557 "refers to iteration never in the iteration "
3558 "space",
3559 OMP_CLAUSE_DOACROSS_DEPEND (c)
3560 ? "depend" : "doacross");
3561 warned_step = true;
3563 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3564 cond, t);
3567 if (i <= fd->collapse - 1 && fd->collapse > 1)
3568 t = fd->loop.v;
3569 else if (counts[i])
3570 t = counts[i];
3571 else
3573 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3574 fd->loops[i].v, fd->loops[i].n1);
3575 t = fold_convert_loc (loc, fd->iter_type, t);
3577 if (step)
3578 /* We have divided off by step already earlier. */;
3579 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3580 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3581 fold_build1_loc (loc, NEGATE_EXPR, itype,
3582 s));
3583 else
3584 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3585 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3586 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3587 off = fold_convert_loc (loc, fd->iter_type, off);
3588 if (i <= fd->collapse - 1 && fd->collapse > 1)
3590 if (i)
3591 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3592 off);
3593 if (i < fd->collapse - 1)
3595 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3596 counts[i]);
3597 continue;
3600 off = unshare_expr (off);
3601 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3602 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3603 true, GSI_SAME_STMT);
3604 args.safe_push (t);
3606 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3607 gimple_set_location (g, loc);
3608 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3610 cond = unshare_expr (cond);
3611 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3612 GSI_CONTINUE_LINKING);
3613 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3614 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3615 e3->probability = profile_probability::guessed_always () / 8;
3616 e1->probability = e3->probability.invert ();
3617 e1->flags = EDGE_TRUE_VALUE;
3618 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3620 *gsi = gsi_after_labels (e2->dest);
3623 /* Expand all #pragma omp ordered depend(source) and
3624 #pragma omp ordered depend(sink:...) constructs in the current
3625 #pragma omp for ordered(n) region. */
3627 static void
3628 expand_omp_ordered_source_sink (struct omp_region *region,
3629 struct omp_for_data *fd, tree *counts,
3630 basic_block cont_bb)
3632 struct omp_region *inner;
3633 int i;
3634 for (i = fd->collapse - 1; i < fd->ordered; i++)
3635 if (i == fd->collapse - 1 && fd->collapse > 1)
3636 counts[i] = NULL_TREE;
3637 else if (i >= fd->collapse && !cont_bb)
3638 counts[i] = build_zero_cst (fd->iter_type);
3639 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3640 && integer_onep (fd->loops[i].step))
3641 counts[i] = NULL_TREE;
3642 else
3643 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3644 tree atype
3645 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3646 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3647 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3648 counts[fd->ordered + 1] = NULL_TREE;
3650 for (inner = region->inner; inner; inner = inner->next)
3651 if (inner->type == GIMPLE_OMP_ORDERED)
3653 gomp_ordered *ord_stmt = inner->ord_stmt;
3654 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3655 location_t loc = gimple_location (ord_stmt);
3656 tree c;
3657 for (c = gimple_omp_ordered_clauses (ord_stmt);
3658 c; c = OMP_CLAUSE_CHAIN (c))
3659 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SOURCE)
3660 break;
3661 if (c)
3662 expand_omp_ordered_source (&gsi, fd, counts, loc);
3663 for (c = gimple_omp_ordered_clauses (ord_stmt);
3664 c; c = OMP_CLAUSE_CHAIN (c))
3665 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SINK)
3666 expand_omp_ordered_sink (&gsi, fd, counts, c, loc, cont_bb);
3667 gsi_remove (&gsi, true);
3671 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3672 collapsed. */
3674 static basic_block
3675 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3676 basic_block cont_bb, basic_block body_bb,
3677 bool ordered_lastprivate)
3679 if (fd->ordered == fd->collapse)
3680 return cont_bb;
3682 if (!cont_bb)
3684 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3685 for (int i = fd->collapse; i < fd->ordered; i++)
3687 tree type = TREE_TYPE (fd->loops[i].v);
3688 tree n1 = fold_convert (type, fd->loops[i].n1);
3689 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3690 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3691 size_int (i - fd->collapse + 1),
3692 NULL_TREE, NULL_TREE);
3693 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3695 return NULL;
3698 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3700 tree t, type = TREE_TYPE (fd->loops[i].v);
3701 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3702 if (counts[fd->ordered + 1] && i == fd->collapse)
3703 expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3704 boolean_true_node);
3705 expand_omp_build_assign (&gsi, fd->loops[i].v,
3706 fold_convert (type, fd->loops[i].n1));
3707 if (counts[i])
3708 expand_omp_build_assign (&gsi, counts[i],
3709 build_zero_cst (fd->iter_type));
3710 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3711 size_int (i - fd->collapse + 1),
3712 NULL_TREE, NULL_TREE);
3713 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3714 if (!gsi_end_p (gsi))
3715 gsi_prev (&gsi);
3716 else
3717 gsi = gsi_last_bb (body_bb);
3718 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3719 basic_block new_body = e1->dest;
3720 if (body_bb == cont_bb)
3721 cont_bb = new_body;
3722 edge e2 = NULL;
3723 basic_block new_header;
3724 if (EDGE_COUNT (cont_bb->preds) > 0)
3726 gsi = gsi_last_bb (cont_bb);
3727 if (POINTER_TYPE_P (type))
3728 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3729 else
3730 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3731 fold_convert (type, fd->loops[i].step));
3732 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3733 if (counts[i])
3735 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3736 build_int_cst (fd->iter_type, 1));
3737 expand_omp_build_assign (&gsi, counts[i], t);
3738 t = counts[i];
3740 else
3742 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3743 fd->loops[i].v, fd->loops[i].n1);
3744 t = fold_convert (fd->iter_type, t);
3745 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3746 true, GSI_SAME_STMT);
3748 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3749 size_int (i - fd->collapse + 1),
3750 NULL_TREE, NULL_TREE);
3751 expand_omp_build_assign (&gsi, aref, t);
3752 if (counts[fd->ordered + 1] && i == fd->ordered - 1)
3753 expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3754 boolean_false_node);
3755 gsi_prev (&gsi);
3756 e2 = split_block (cont_bb, gsi_stmt (gsi));
3757 new_header = e2->dest;
3759 else
3760 new_header = cont_bb;
3761 gsi = gsi_after_labels (new_header);
3762 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3763 true, GSI_SAME_STMT);
3764 tree n2
3765 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3766 true, NULL_TREE, true, GSI_SAME_STMT);
3767 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3768 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3769 edge e3 = split_block (new_header, gsi_stmt (gsi));
3770 cont_bb = e3->dest;
3771 remove_edge (e1);
3772 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3773 e3->flags = EDGE_FALSE_VALUE;
3774 e3->probability = profile_probability::guessed_always () / 8;
3775 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3776 e1->probability = e3->probability.invert ();
3778 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3779 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3781 if (e2)
3783 class loop *loop = alloc_loop ();
3784 loop->header = new_header;
3785 loop->latch = e2->src;
3786 add_loop (loop, body_bb->loop_father);
3790 /* If there are any lastprivate clauses and it is possible some loops
3791 might have zero iterations, ensure all the decls are initialized,
3792 otherwise we could crash evaluating C++ class iterators with lastprivate
3793 clauses. */
3794 bool need_inits = false;
3795 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3796 if (need_inits)
3798 tree type = TREE_TYPE (fd->loops[i].v);
3799 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3800 expand_omp_build_assign (&gsi, fd->loops[i].v,
3801 fold_convert (type, fd->loops[i].n1));
3803 else
3805 tree type = TREE_TYPE (fd->loops[i].v);
3806 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3807 boolean_type_node,
3808 fold_convert (type, fd->loops[i].n1),
3809 fold_convert (type, fd->loops[i].n2));
3810 if (!integer_onep (this_cond))
3811 need_inits = true;
3814 return cont_bb;
3817 /* A subroutine of expand_omp_for. Generate code for a parallel
3818 loop with any schedule. Given parameters:
3820 for (V = N1; V cond N2; V += STEP) BODY;
3822 where COND is "<" or ">", we generate pseudocode
3824 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3825 if (more) goto L0; else goto L3;
3827 V = istart0;
3828 iend = iend0;
3830 BODY;
3831 V += STEP;
3832 if (V cond iend) goto L1; else goto L2;
3834 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3837 If this is a combined omp parallel loop, instead of the call to
3838 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3839 If this is gimple_omp_for_combined_p loop, then instead of assigning
3840 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3841 inner GIMPLE_OMP_FOR and V += STEP; and
3842 if (V cond iend) goto L1; else goto L2; are removed.
3844 For collapsed loops, given parameters:
3845 collapse(3)
3846 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3847 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3848 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3849 BODY;
3851 we generate pseudocode
3853 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3854 if (cond3 is <)
3855 adj = STEP3 - 1;
3856 else
3857 adj = STEP3 + 1;
3858 count3 = (adj + N32 - N31) / STEP3;
3859 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3860 if (cond2 is <)
3861 adj = STEP2 - 1;
3862 else
3863 adj = STEP2 + 1;
3864 count2 = (adj + N22 - N21) / STEP2;
3865 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3866 if (cond1 is <)
3867 adj = STEP1 - 1;
3868 else
3869 adj = STEP1 + 1;
3870 count1 = (adj + N12 - N11) / STEP1;
3871 count = count1 * count2 * count3;
3872 goto Z1;
3874 count = 0;
3876 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3877 if (more) goto L0; else goto L3;
3879 V = istart0;
3880 T = V;
3881 V3 = N31 + (T % count3) * STEP3;
3882 T = T / count3;
3883 V2 = N21 + (T % count2) * STEP2;
3884 T = T / count2;
3885 V1 = N11 + T * STEP1;
3886 iend = iend0;
3888 BODY;
3889 V += 1;
3890 if (V < iend) goto L10; else goto L2;
3891 L10:
3892 V3 += STEP3;
3893 if (V3 cond3 N32) goto L1; else goto L11;
3894 L11:
3895 V3 = N31;
3896 V2 += STEP2;
3897 if (V2 cond2 N22) goto L1; else goto L12;
3898 L12:
3899 V2 = N21;
3900 V1 += STEP1;
3901 goto L1;
3903 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3908 static void
3909 expand_omp_for_generic (struct omp_region *region,
3910 struct omp_for_data *fd,
3911 enum built_in_function start_fn,
3912 enum built_in_function next_fn,
3913 tree sched_arg,
3914 gimple *inner_stmt)
3916 tree type, istart0, iend0, iend;
3917 tree t, vmain, vback, bias = NULL_TREE;
3918 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3919 basic_block l2_bb = NULL, l3_bb = NULL;
3920 gimple_stmt_iterator gsi;
3921 gassign *assign_stmt;
3922 bool in_combined_parallel = is_combined_parallel (region);
3923 bool broken_loop = region->cont == NULL;
3924 edge e, ne;
3925 tree *counts = NULL;
3926 int i;
3927 bool ordered_lastprivate = false;
3929 gcc_assert (!broken_loop || !in_combined_parallel);
3930 gcc_assert (fd->iter_type == long_integer_type_node
3931 || !in_combined_parallel);
3933 entry_bb = region->entry;
3934 cont_bb = region->cont;
3935 collapse_bb = NULL;
3936 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3937 gcc_assert (broken_loop
3938 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3939 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3940 l1_bb = single_succ (l0_bb);
3941 if (!broken_loop)
3943 l2_bb = create_empty_bb (cont_bb);
3944 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3945 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3946 == l1_bb));
3947 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3949 else
3950 l2_bb = NULL;
3951 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3952 exit_bb = region->exit;
3954 gsi = gsi_last_nondebug_bb (entry_bb);
3956 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3957 if (fd->ordered
3958 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3959 OMP_CLAUSE_LASTPRIVATE))
3960 ordered_lastprivate = false;
3961 tree reductions = NULL_TREE;
3962 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3963 tree memv = NULL_TREE;
3964 if (fd->lastprivate_conditional)
3966 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3967 OMP_CLAUSE__CONDTEMP_);
3968 if (fd->have_pointer_condtemp)
3969 condtemp = OMP_CLAUSE_DECL (c);
3970 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3971 cond_var = OMP_CLAUSE_DECL (c);
3973 if (sched_arg)
3975 if (fd->have_reductemp)
3977 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3978 OMP_CLAUSE__REDUCTEMP_);
3979 reductions = OMP_CLAUSE_DECL (c);
3980 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3981 gimple *g = SSA_NAME_DEF_STMT (reductions);
3982 reductions = gimple_assign_rhs1 (g);
3983 OMP_CLAUSE_DECL (c) = reductions;
3984 entry_bb = gimple_bb (g);
3985 edge e = split_block (entry_bb, g);
3986 if (region->entry == entry_bb)
3987 region->entry = e->dest;
3988 gsi = gsi_last_bb (entry_bb);
3990 else
3991 reductions = null_pointer_node;
3992 if (fd->have_pointer_condtemp)
3994 tree type = TREE_TYPE (condtemp);
3995 memv = create_tmp_var (type);
3996 TREE_ADDRESSABLE (memv) = 1;
3997 unsigned HOST_WIDE_INT sz
3998 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3999 sz *= fd->lastprivate_conditional;
4000 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
4001 false);
4002 mem = build_fold_addr_expr (memv);
4004 else
4005 mem = null_pointer_node;
4007 if (fd->collapse > 1 || fd->ordered)
4009 int first_zero_iter1 = -1, first_zero_iter2 = -1;
4010 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
4012 counts = XALLOCAVEC (tree, fd->ordered
4013 ? fd->ordered + 2
4014 + (fd->ordered - fd->collapse)
4015 : fd->collapse);
4016 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4017 zero_iter1_bb, first_zero_iter1,
4018 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
4020 if (zero_iter1_bb)
4022 /* Some counts[i] vars might be uninitialized if
4023 some loop has zero iterations. But the body shouldn't
4024 be executed in that case, so just avoid uninit warnings. */
4025 for (i = first_zero_iter1;
4026 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
4027 if (SSA_VAR_P (counts[i]))
4028 suppress_warning (counts[i], OPT_Wuninitialized);
4029 gsi_prev (&gsi);
4030 e = split_block (entry_bb, gsi_stmt (gsi));
4031 entry_bb = e->dest;
4032 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
4033 gsi = gsi_last_nondebug_bb (entry_bb);
4034 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4035 get_immediate_dominator (CDI_DOMINATORS,
4036 zero_iter1_bb));
4038 if (zero_iter2_bb)
4040 /* Some counts[i] vars might be uninitialized if
4041 some loop has zero iterations. But the body shouldn't
4042 be executed in that case, so just avoid uninit warnings. */
4043 for (i = first_zero_iter2; i < fd->ordered; i++)
4044 if (SSA_VAR_P (counts[i]))
4045 suppress_warning (counts[i], OPT_Wuninitialized);
4046 if (zero_iter1_bb)
4047 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4048 else
4050 gsi_prev (&gsi);
4051 e = split_block (entry_bb, gsi_stmt (gsi));
4052 entry_bb = e->dest;
4053 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4054 gsi = gsi_last_nondebug_bb (entry_bb);
4055 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4056 get_immediate_dominator
4057 (CDI_DOMINATORS, zero_iter2_bb));
4060 if (fd->collapse == 1)
4062 counts[0] = fd->loop.n2;
4063 fd->loop = fd->loops[0];
4067 type = TREE_TYPE (fd->loop.v);
4068 istart0 = create_tmp_var (fd->iter_type, ".istart0");
4069 iend0 = create_tmp_var (fd->iter_type, ".iend0");
4070 TREE_ADDRESSABLE (istart0) = 1;
4071 TREE_ADDRESSABLE (iend0) = 1;
4073 /* See if we need to bias by LLONG_MIN. */
4074 if (fd->iter_type == long_long_unsigned_type_node
4075 && TREE_CODE (type) == INTEGER_TYPE
4076 && !TYPE_UNSIGNED (type)
4077 && fd->ordered == 0)
4079 tree n1, n2;
4081 if (fd->loop.cond_code == LT_EXPR)
4083 n1 = fd->loop.n1;
4084 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4086 else
4088 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4089 n2 = fd->loop.n1;
4091 if (TREE_CODE (n1) != INTEGER_CST
4092 || TREE_CODE (n2) != INTEGER_CST
4093 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4094 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4097 gimple_stmt_iterator gsif = gsi;
4098 gsi_prev (&gsif);
4100 tree arr = NULL_TREE;
4101 if (in_combined_parallel)
4103 gcc_assert (fd->ordered == 0);
4104 /* In a combined parallel loop, emit a call to
4105 GOMP_loop_foo_next. */
4106 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4107 build_fold_addr_expr (istart0),
4108 build_fold_addr_expr (iend0));
4110 else
4112 tree t0, t1, t2, t3, t4;
4113 /* If this is not a combined parallel loop, emit a call to
4114 GOMP_loop_foo_start in ENTRY_BB. */
4115 t4 = build_fold_addr_expr (iend0);
4116 t3 = build_fold_addr_expr (istart0);
4117 if (fd->ordered)
4119 t0 = build_int_cst (unsigned_type_node,
4120 fd->ordered - fd->collapse + 1);
4121 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4122 fd->ordered
4123 - fd->collapse + 1),
4124 ".omp_counts");
4125 DECL_NAMELESS (arr) = 1;
4126 TREE_ADDRESSABLE (arr) = 1;
4127 TREE_STATIC (arr) = 1;
4128 vec<constructor_elt, va_gc> *v;
4129 vec_alloc (v, fd->ordered - fd->collapse + 1);
4130 int idx;
4132 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4134 tree c;
4135 if (idx == 0 && fd->collapse > 1)
4136 c = fd->loop.n2;
4137 else
4138 c = counts[idx + fd->collapse - 1];
4139 tree purpose = size_int (idx);
4140 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4141 if (TREE_CODE (c) != INTEGER_CST)
4142 TREE_STATIC (arr) = 0;
4145 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4146 if (!TREE_STATIC (arr))
4147 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4148 void_type_node, arr),
4149 true, NULL_TREE, true, GSI_SAME_STMT);
4150 t1 = build_fold_addr_expr (arr);
4151 t2 = NULL_TREE;
4153 else
4155 t2 = fold_convert (fd->iter_type, fd->loop.step);
4156 t1 = fd->loop.n2;
4157 t0 = fd->loop.n1;
4158 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4160 tree innerc
4161 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4162 OMP_CLAUSE__LOOPTEMP_);
4163 gcc_assert (innerc);
4164 t0 = OMP_CLAUSE_DECL (innerc);
4165 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4166 OMP_CLAUSE__LOOPTEMP_);
4167 gcc_assert (innerc);
4168 t1 = OMP_CLAUSE_DECL (innerc);
4170 if (POINTER_TYPE_P (TREE_TYPE (t0))
4171 && TYPE_PRECISION (TREE_TYPE (t0))
4172 != TYPE_PRECISION (fd->iter_type))
4174 /* Avoid casting pointers to integer of a different size. */
4175 tree itype = signed_type_for (type);
4176 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4177 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4179 else
4181 t1 = fold_convert (fd->iter_type, t1);
4182 t0 = fold_convert (fd->iter_type, t0);
4184 if (bias)
4186 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4187 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4190 if (fd->iter_type == long_integer_type_node || fd->ordered)
4192 if (fd->chunk_size)
4194 t = fold_convert (fd->iter_type, fd->chunk_size);
4195 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4196 if (sched_arg)
4198 if (fd->ordered)
4199 t = build_call_expr (builtin_decl_explicit (start_fn),
4200 8, t0, t1, sched_arg, t, t3, t4,
4201 reductions, mem);
4202 else
4203 t = build_call_expr (builtin_decl_explicit (start_fn),
4204 9, t0, t1, t2, sched_arg, t, t3, t4,
4205 reductions, mem);
4207 else if (fd->ordered)
4208 t = build_call_expr (builtin_decl_explicit (start_fn),
4209 5, t0, t1, t, t3, t4);
4210 else
4211 t = build_call_expr (builtin_decl_explicit (start_fn),
4212 6, t0, t1, t2, t, t3, t4);
4214 else if (fd->ordered)
4215 t = build_call_expr (builtin_decl_explicit (start_fn),
4216 4, t0, t1, t3, t4);
4217 else
4218 t = build_call_expr (builtin_decl_explicit (start_fn),
4219 5, t0, t1, t2, t3, t4);
4221 else
4223 tree t5;
4224 tree c_bool_type;
4225 tree bfn_decl;
4227 /* The GOMP_loop_ull_*start functions have additional boolean
4228 argument, true for < loops and false for > loops.
4229 In Fortran, the C bool type can be different from
4230 boolean_type_node. */
4231 bfn_decl = builtin_decl_explicit (start_fn);
4232 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4233 t5 = build_int_cst (c_bool_type,
4234 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4235 if (fd->chunk_size)
4237 tree bfn_decl = builtin_decl_explicit (start_fn);
4238 t = fold_convert (fd->iter_type, fd->chunk_size);
4239 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4240 if (sched_arg)
4241 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4242 t, t3, t4, reductions, mem);
4243 else
4244 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4246 else
4247 t = build_call_expr (builtin_decl_explicit (start_fn),
4248 6, t5, t0, t1, t2, t3, t4);
4251 if (TREE_TYPE (t) != boolean_type_node)
4252 t = fold_build2 (NE_EXPR, boolean_type_node,
4253 t, build_int_cst (TREE_TYPE (t), 0));
4254 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4255 true, GSI_SAME_STMT);
4256 if (arr && !TREE_STATIC (arr))
4258 tree clobber = build_clobber (TREE_TYPE (arr));
4259 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4260 GSI_SAME_STMT);
4262 if (fd->have_pointer_condtemp)
4263 expand_omp_build_assign (&gsi, condtemp, memv, false);
4264 if (fd->have_reductemp)
4266 gimple *g = gsi_stmt (gsi);
4267 gsi_remove (&gsi, true);
4268 release_ssa_name (gimple_assign_lhs (g));
4270 entry_bb = region->entry;
4271 gsi = gsi_last_nondebug_bb (entry_bb);
4273 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4275 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4277 /* Remove the GIMPLE_OMP_FOR statement. */
4278 gsi_remove (&gsi, true);
4280 if (gsi_end_p (gsif))
4281 gsif = gsi_after_labels (gsi_bb (gsif));
4282 gsi_next (&gsif);
4284 /* Iteration setup for sequential loop goes in L0_BB. */
4285 tree startvar = fd->loop.v;
4286 tree endvar = NULL_TREE;
4288 if (gimple_omp_for_combined_p (fd->for_stmt))
4290 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4291 && gimple_omp_for_kind (inner_stmt)
4292 == GF_OMP_FOR_KIND_SIMD);
4293 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4294 OMP_CLAUSE__LOOPTEMP_);
4295 gcc_assert (innerc);
4296 startvar = OMP_CLAUSE_DECL (innerc);
4297 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4298 OMP_CLAUSE__LOOPTEMP_);
4299 gcc_assert (innerc);
4300 endvar = OMP_CLAUSE_DECL (innerc);
4303 gsi = gsi_start_bb (l0_bb);
4304 t = istart0;
4305 if (fd->ordered && fd->collapse == 1)
4306 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4307 fold_convert (fd->iter_type, fd->loop.step));
4308 else if (bias)
4309 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4310 if (fd->ordered && fd->collapse == 1)
4312 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4313 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4314 fd->loop.n1, fold_convert (sizetype, t));
4315 else
4317 t = fold_convert (TREE_TYPE (startvar), t);
4318 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4319 fd->loop.n1, t);
4322 else
4324 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4325 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4326 t = fold_convert (TREE_TYPE (startvar), t);
4328 t = force_gimple_operand_gsi (&gsi, t,
4329 DECL_P (startvar)
4330 && TREE_ADDRESSABLE (startvar),
4331 NULL_TREE, false, GSI_CONTINUE_LINKING);
4332 assign_stmt = gimple_build_assign (startvar, t);
4333 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4334 if (cond_var)
4336 tree itype = TREE_TYPE (cond_var);
4337 /* For lastprivate(conditional:) itervar, we need some iteration
4338 counter that starts at unsigned non-zero and increases.
4339 Prefer as few IVs as possible, so if we can use startvar
4340 itself, use that, or startvar + constant (those would be
4341 incremented with step), and as last resort use the s0 + 1
4342 incremented by 1. */
4343 if ((fd->ordered && fd->collapse == 1)
4344 || bias
4345 || POINTER_TYPE_P (type)
4346 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4347 || fd->loop.cond_code != LT_EXPR)
4348 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4349 build_int_cst (itype, 1));
4350 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4351 t = fold_convert (itype, t);
4352 else
4354 tree c = fold_convert (itype, fd->loop.n1);
4355 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4356 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4358 t = force_gimple_operand_gsi (&gsi, t, false,
4359 NULL_TREE, false, GSI_CONTINUE_LINKING);
4360 assign_stmt = gimple_build_assign (cond_var, t);
4361 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4364 t = iend0;
4365 if (fd->ordered && fd->collapse == 1)
4366 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4367 fold_convert (fd->iter_type, fd->loop.step));
4368 else if (bias)
4369 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4370 if (fd->ordered && fd->collapse == 1)
4372 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4373 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4374 fd->loop.n1, fold_convert (sizetype, t));
4375 else
4377 t = fold_convert (TREE_TYPE (startvar), t);
4378 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4379 fd->loop.n1, t);
4382 else
4384 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4385 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4386 t = fold_convert (TREE_TYPE (startvar), t);
4388 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4389 false, GSI_CONTINUE_LINKING);
4390 if (endvar)
4392 assign_stmt = gimple_build_assign (endvar, iend);
4393 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4394 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4395 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4396 else
4397 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4398 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4400 /* Handle linear clause adjustments. */
4401 tree itercnt = NULL_TREE;
4402 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4403 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4404 c; c = OMP_CLAUSE_CHAIN (c))
4405 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4406 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4408 tree d = OMP_CLAUSE_DECL (c);
4409 tree t = d, a, dest;
4410 if (omp_privatize_by_reference (t))
4411 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4412 tree type = TREE_TYPE (t);
4413 if (POINTER_TYPE_P (type))
4414 type = sizetype;
4415 dest = unshare_expr (t);
4416 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4417 expand_omp_build_assign (&gsif, v, t);
4418 if (itercnt == NULL_TREE)
4420 itercnt = startvar;
4421 tree n1 = fd->loop.n1;
4422 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4424 itercnt
4425 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4426 itercnt);
4427 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4429 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4430 itercnt, n1);
4431 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4432 itercnt, fd->loop.step);
4433 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4434 NULL_TREE, false,
4435 GSI_CONTINUE_LINKING);
4437 a = fold_build2 (MULT_EXPR, type,
4438 fold_convert (type, itercnt),
4439 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4440 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4441 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4442 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4443 false, GSI_CONTINUE_LINKING);
4444 expand_omp_build_assign (&gsi, dest, t, true);
4446 if (fd->collapse > 1)
4447 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4449 if (fd->ordered)
4451 /* Until now, counts array contained number of iterations or
4452 variable containing it for ith loop. From now on, we usually need
4453 those counts only for collapsed loops, and only for the 2nd
4454 till the last collapsed one. Move those one element earlier,
4455 we'll use counts[fd->collapse - 1] for the first source/sink
4456 iteration counter and so on and counts[fd->ordered]
4457 as the array holding the current counter values for
4458 depend(source). For doacross(sink:omp_cur_iteration - 1) we need
4459 the counts from fd->collapse to fd->ordered - 1; make a copy of
4460 those to counts[fd->ordered + 2] and onwards.
4461 counts[fd->ordered + 1] can be a flag whether it is the first
4462 iteration with a new collapsed counter (used only if
4463 fd->ordered > fd->collapse). */
4464 if (fd->ordered > fd->collapse)
4465 memcpy (counts + fd->ordered + 2, counts + fd->collapse,
4466 (fd->ordered - fd->collapse) * sizeof (counts[0]));
4467 if (fd->collapse > 1)
4468 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4469 if (broken_loop)
4471 int i;
4472 for (i = fd->collapse; i < fd->ordered; i++)
4474 tree type = TREE_TYPE (fd->loops[i].v);
4475 tree this_cond
4476 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4477 fold_convert (type, fd->loops[i].n1),
4478 fold_convert (type, fd->loops[i].n2));
4479 if (!integer_onep (this_cond))
4480 break;
4482 if (i < fd->ordered)
4484 cont_bb
4485 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4486 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4487 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4488 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4489 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4490 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4491 make_edge (cont_bb, l1_bb, 0);
4492 l2_bb = create_empty_bb (cont_bb);
4493 broken_loop = false;
4496 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4497 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4498 ordered_lastprivate);
4499 if (counts[fd->collapse - 1])
4501 gcc_assert (fd->collapse == 1);
4502 gsi = gsi_last_bb (l0_bb);
4503 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4504 istart0, true);
4505 if (cont_bb)
4507 gsi = gsi_last_bb (cont_bb);
4508 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4509 counts[fd->collapse - 1],
4510 build_int_cst (fd->iter_type, 1));
4511 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4512 tree aref = build4 (ARRAY_REF, fd->iter_type,
4513 counts[fd->ordered], size_zero_node,
4514 NULL_TREE, NULL_TREE);
4515 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4517 t = counts[fd->collapse - 1];
4519 else if (fd->collapse > 1)
4520 t = fd->loop.v;
4521 else
4523 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4524 fd->loops[0].v, fd->loops[0].n1);
4525 t = fold_convert (fd->iter_type, t);
4527 gsi = gsi_last_bb (l0_bb);
4528 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4529 size_zero_node, NULL_TREE, NULL_TREE);
4530 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4531 false, GSI_CONTINUE_LINKING);
4532 expand_omp_build_assign (&gsi, aref, t, true);
4535 if (!broken_loop)
4537 /* Code to control the increment and predicate for the sequential
4538 loop goes in the CONT_BB. */
4539 gsi = gsi_last_nondebug_bb (cont_bb);
4540 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4541 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4542 vmain = gimple_omp_continue_control_use (cont_stmt);
4543 vback = gimple_omp_continue_control_def (cont_stmt);
4545 if (cond_var)
4547 tree itype = TREE_TYPE (cond_var);
4548 tree t2;
4549 if ((fd->ordered && fd->collapse == 1)
4550 || bias
4551 || POINTER_TYPE_P (type)
4552 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4553 || fd->loop.cond_code != LT_EXPR)
4554 t2 = build_int_cst (itype, 1);
4555 else
4556 t2 = fold_convert (itype, fd->loop.step);
4557 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4558 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4559 NULL_TREE, true, GSI_SAME_STMT);
4560 assign_stmt = gimple_build_assign (cond_var, t2);
4561 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4564 if (!gimple_omp_for_combined_p (fd->for_stmt))
4566 if (POINTER_TYPE_P (type))
4567 t = fold_build_pointer_plus (vmain, fd->loop.step);
4568 else
4569 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4570 t = force_gimple_operand_gsi (&gsi, t,
4571 DECL_P (vback)
4572 && TREE_ADDRESSABLE (vback),
4573 NULL_TREE, true, GSI_SAME_STMT);
4574 assign_stmt = gimple_build_assign (vback, t);
4575 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4577 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4579 tree tem;
4580 if (fd->collapse > 1)
4581 tem = fd->loop.v;
4582 else
4584 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4585 fd->loops[0].v, fd->loops[0].n1);
4586 tem = fold_convert (fd->iter_type, tem);
4588 tree aref = build4 (ARRAY_REF, fd->iter_type,
4589 counts[fd->ordered], size_zero_node,
4590 NULL_TREE, NULL_TREE);
4591 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4592 true, GSI_SAME_STMT);
4593 expand_omp_build_assign (&gsi, aref, tem);
4596 t = build2 (fd->loop.cond_code, boolean_type_node,
4597 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4598 iend);
4599 gcond *cond_stmt = gimple_build_cond_empty (t);
4600 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4603 /* Remove GIMPLE_OMP_CONTINUE. */
4604 gsi_remove (&gsi, true);
4606 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4607 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4609 /* Emit code to get the next parallel iteration in L2_BB. */
4610 gsi = gsi_start_bb (l2_bb);
4612 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4613 build_fold_addr_expr (istart0),
4614 build_fold_addr_expr (iend0));
4615 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4616 false, GSI_CONTINUE_LINKING);
4617 if (TREE_TYPE (t) != boolean_type_node)
4618 t = fold_build2 (NE_EXPR, boolean_type_node,
4619 t, build_int_cst (TREE_TYPE (t), 0));
4620 gcond *cond_stmt = gimple_build_cond_empty (t);
4621 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4624 /* Add the loop cleanup function. */
4625 gsi = gsi_last_nondebug_bb (exit_bb);
4626 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4627 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4628 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4629 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4630 else
4631 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4632 gcall *call_stmt = gimple_build_call (t, 0);
4633 if (fd->ordered)
4635 tree arr = counts[fd->ordered];
4636 tree clobber = build_clobber (TREE_TYPE (arr));
4637 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4638 GSI_SAME_STMT);
4640 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4642 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4643 if (fd->have_reductemp)
4645 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4646 gimple_call_lhs (call_stmt));
4647 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4650 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4651 gsi_remove (&gsi, true);
4653 /* Connect the new blocks. */
4654 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4655 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4657 if (!broken_loop)
4659 gimple_seq phis;
4661 e = find_edge (cont_bb, l3_bb);
4662 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4664 phis = phi_nodes (l3_bb);
4665 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4667 gimple *phi = gsi_stmt (gsi);
4668 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4669 PHI_ARG_DEF_FROM_EDGE (phi, e));
4671 remove_edge (e);
4673 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4674 e = find_edge (cont_bb, l1_bb);
4675 if (e == NULL)
4677 e = BRANCH_EDGE (cont_bb);
4678 gcc_assert (single_succ (e->dest) == l1_bb);
4680 if (gimple_omp_for_combined_p (fd->for_stmt))
4682 remove_edge (e);
4683 e = NULL;
4685 else if (fd->collapse > 1)
4687 remove_edge (e);
4688 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4690 else
4691 e->flags = EDGE_TRUE_VALUE;
4692 if (e)
4694 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4695 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4697 else
4699 e = find_edge (cont_bb, l2_bb);
4700 e->flags = EDGE_FALLTHRU;
4702 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4704 if (gimple_in_ssa_p (cfun))
4706 /* Add phis to the outer loop that connect to the phis in the inner,
4707 original loop, and move the loop entry value of the inner phi to
4708 the loop entry value of the outer phi. */
4709 gphi_iterator psi;
4710 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4712 location_t locus;
4713 gphi *nphi;
4714 gphi *exit_phi = psi.phi ();
4716 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4717 continue;
4719 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4720 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4722 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4723 edge latch_to_l1 = find_edge (latch, l1_bb);
4724 gphi *inner_phi
4725 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4727 tree t = gimple_phi_result (exit_phi);
4728 tree new_res = copy_ssa_name (t, NULL);
4729 nphi = create_phi_node (new_res, l0_bb);
4731 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4732 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4733 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4734 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4735 add_phi_arg (nphi, t, entry_to_l0, locus);
4737 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4738 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4740 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4744 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4745 recompute_dominator (CDI_DOMINATORS, l2_bb));
4746 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4747 recompute_dominator (CDI_DOMINATORS, l3_bb));
4748 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4749 recompute_dominator (CDI_DOMINATORS, l0_bb));
4750 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4751 recompute_dominator (CDI_DOMINATORS, l1_bb));
4753 /* We enter expand_omp_for_generic with a loop. This original loop may
4754 have its own loop struct, or it may be part of an outer loop struct
4755 (which may be the fake loop). */
4756 class loop *outer_loop = entry_bb->loop_father;
4757 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4759 add_bb_to_loop (l2_bb, outer_loop);
4761 /* We've added a new loop around the original loop. Allocate the
4762 corresponding loop struct. */
4763 class loop *new_loop = alloc_loop ();
4764 new_loop->header = l0_bb;
4765 new_loop->latch = l2_bb;
4766 add_loop (new_loop, outer_loop);
4768 /* Allocate a loop structure for the original loop unless we already
4769 had one. */
4770 if (!orig_loop_has_loop_struct
4771 && !gimple_omp_for_combined_p (fd->for_stmt))
4773 class loop *orig_loop = alloc_loop ();
4774 orig_loop->header = l1_bb;
4775 /* The loop may have multiple latches. */
4776 add_loop (orig_loop, new_loop);
4781 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4782 compute needed allocation size. If !ALLOC of team allocations,
4783 if ALLOC of thread allocation. SZ is the initial needed size for
4784 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4785 CNT number of elements of each array, for !ALLOC this is
4786 omp_get_num_threads (), for ALLOC number of iterations handled by the
4787 current thread. If PTR is non-NULL, it is the start of the allocation
4788 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4789 clauses pointers to the corresponding arrays. */
4791 static tree
4792 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4793 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4794 gimple_stmt_iterator *gsi, bool alloc)
4796 tree eltsz = NULL_TREE;
4797 unsigned HOST_WIDE_INT preval = 0;
4798 if (ptr && sz)
4799 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4800 ptr, size_int (sz));
4801 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4802 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4803 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4804 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4806 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4807 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4808 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4810 unsigned HOST_WIDE_INT szl
4811 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4812 szl = least_bit_hwi (szl);
4813 if (szl)
4814 al = MIN (al, szl);
4816 if (ptr == NULL_TREE)
4818 if (eltsz == NULL_TREE)
4819 eltsz = TYPE_SIZE_UNIT (pointee_type);
4820 else
4821 eltsz = size_binop (PLUS_EXPR, eltsz,
4822 TYPE_SIZE_UNIT (pointee_type));
4824 if (preval == 0 && al <= alloc_align)
4826 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4827 sz += diff;
4828 if (diff && ptr)
4829 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4830 ptr, size_int (diff));
4832 else if (al > preval)
4834 if (ptr)
4836 ptr = fold_convert (pointer_sized_int_node, ptr);
4837 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4838 build_int_cst (pointer_sized_int_node,
4839 al - 1));
4840 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4841 build_int_cst (pointer_sized_int_node,
4842 -(HOST_WIDE_INT) al));
4843 ptr = fold_convert (ptr_type_node, ptr);
4845 else
4846 sz += al - 1;
4848 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4849 preval = al;
4850 else
4851 preval = 1;
4852 if (ptr)
4854 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4855 ptr = OMP_CLAUSE_DECL (c);
4856 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4857 size_binop (MULT_EXPR, cnt,
4858 TYPE_SIZE_UNIT (pointee_type)));
4862 if (ptr == NULL_TREE)
4864 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4865 if (sz)
4866 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4867 return eltsz;
4869 else
4870 return ptr;
4873 /* Return the last _looptemp_ clause if one has been created for
4874 lastprivate on distribute parallel for{, simd} or taskloop.
4875 FD is the loop data and INNERC should be the second _looptemp_
4876 clause (the one holding the end of the range).
4877 This is followed by collapse - 1 _looptemp_ clauses for the
4878 counts[1] and up, and for triangular loops followed by 4
4879 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4880 one factor and one adjn1). After this there is optionally one
4881 _looptemp_ clause that this function returns. */
4883 static tree
4884 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4886 gcc_assert (innerc);
4887 int count = fd->collapse - 1;
4888 if (fd->non_rect
4889 && fd->last_nonrect == fd->first_nonrect + 1
4890 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4891 count += 4;
4892 for (int i = 0; i < count; i++)
4894 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4895 OMP_CLAUSE__LOOPTEMP_);
4896 gcc_assert (innerc);
4898 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4899 OMP_CLAUSE__LOOPTEMP_);
4902 /* A subroutine of expand_omp_for. Generate code for a parallel
4903 loop with static schedule and no specified chunk size. Given
4904 parameters:
4906 for (V = N1; V cond N2; V += STEP) BODY;
4908 where COND is "<" or ">", we generate pseudocode
4910 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4911 if (cond is <)
4912 adj = STEP - 1;
4913 else
4914 adj = STEP + 1;
4915 if ((__typeof (V)) -1 > 0 && cond is >)
4916 n = -(adj + N2 - N1) / -STEP;
4917 else
4918 n = (adj + N2 - N1) / STEP;
4919 q = n / nthreads;
4920 tt = n % nthreads;
4921 if (threadid < tt) goto L3; else goto L4;
4923 tt = 0;
4924 q = q + 1;
4926 s0 = q * threadid + tt;
4927 e0 = s0 + q;
4928 V = s0 * STEP + N1;
4929 if (s0 >= e0) goto L2; else goto L0;
4931 e = e0 * STEP + N1;
4933 BODY;
4934 V += STEP;
4935 if (V cond e) goto L1;
4939 static void
4940 expand_omp_for_static_nochunk (struct omp_region *region,
4941 struct omp_for_data *fd,
4942 gimple *inner_stmt)
4944 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4945 tree type, itype, vmain, vback;
4946 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4947 basic_block body_bb, cont_bb, collapse_bb = NULL;
4948 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4949 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4950 gimple_stmt_iterator gsi, gsip;
4951 edge ep;
4952 bool broken_loop = region->cont == NULL;
4953 tree *counts = NULL;
4954 tree n1, n2, step;
4955 tree reductions = NULL_TREE;
4956 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4958 itype = type = TREE_TYPE (fd->loop.v);
4959 if (POINTER_TYPE_P (type))
4960 itype = signed_type_for (type);
4962 entry_bb = region->entry;
4963 cont_bb = region->cont;
4964 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4965 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4966 gcc_assert (broken_loop
4967 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4968 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4969 body_bb = single_succ (seq_start_bb);
4970 if (!broken_loop)
4972 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4973 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4974 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4976 exit_bb = region->exit;
4978 /* Iteration space partitioning goes in ENTRY_BB. */
4979 gsi = gsi_last_nondebug_bb (entry_bb);
4980 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4981 gsip = gsi;
4982 gsi_prev (&gsip);
4984 if (fd->collapse > 1)
4986 int first_zero_iter = -1, dummy = -1;
4987 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4989 counts = XALLOCAVEC (tree, fd->collapse);
4990 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4991 fin_bb, first_zero_iter,
4992 dummy_bb, dummy, l2_dom_bb);
4993 t = NULL_TREE;
4995 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4996 t = integer_one_node;
4997 else
4998 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4999 fold_convert (type, fd->loop.n1),
5000 fold_convert (type, fd->loop.n2));
5001 if (fd->collapse == 1
5002 && TYPE_UNSIGNED (type)
5003 && (t == NULL_TREE || !integer_onep (t)))
5005 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5006 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5007 true, GSI_SAME_STMT);
5008 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5009 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5010 true, GSI_SAME_STMT);
5011 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5012 n1, n2);
5013 ep = split_block (entry_bb, cond_stmt);
5014 ep->flags = EDGE_TRUE_VALUE;
5015 entry_bb = ep->dest;
5016 ep->probability = profile_probability::very_likely ();
5017 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
5018 ep->probability = profile_probability::very_unlikely ();
5019 if (gimple_in_ssa_p (cfun))
5021 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
5022 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5023 !gsi_end_p (gpi); gsi_next (&gpi))
5025 gphi *phi = gpi.phi ();
5026 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5027 ep, UNKNOWN_LOCATION);
5030 gsi = gsi_last_bb (entry_bb);
5033 if (fd->lastprivate_conditional)
5035 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5036 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5037 if (fd->have_pointer_condtemp)
5038 condtemp = OMP_CLAUSE_DECL (c);
5039 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5040 cond_var = OMP_CLAUSE_DECL (c);
5042 if (fd->have_reductemp
5043 /* For scan, we don't want to reinitialize condtemp before the
5044 second loop. */
5045 || (fd->have_pointer_condtemp && !fd->have_scantemp)
5046 || fd->have_nonctrl_scantemp)
5048 tree t1 = build_int_cst (long_integer_type_node, 0);
5049 tree t2 = build_int_cst (long_integer_type_node, 1);
5050 tree t3 = build_int_cstu (long_integer_type_node,
5051 (HOST_WIDE_INT_1U << 31) + 1);
5052 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5053 gimple_stmt_iterator gsi2 = gsi_none ();
5054 gimple *g = NULL;
5055 tree mem = null_pointer_node, memv = NULL_TREE;
5056 unsigned HOST_WIDE_INT condtemp_sz = 0;
5057 unsigned HOST_WIDE_INT alloc_align = 0;
5058 if (fd->have_reductemp)
5060 gcc_assert (!fd->have_nonctrl_scantemp);
5061 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5062 reductions = OMP_CLAUSE_DECL (c);
5063 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5064 g = SSA_NAME_DEF_STMT (reductions);
5065 reductions = gimple_assign_rhs1 (g);
5066 OMP_CLAUSE_DECL (c) = reductions;
5067 gsi2 = gsi_for_stmt (g);
5069 else
5071 if (gsi_end_p (gsip))
5072 gsi2 = gsi_after_labels (region->entry);
5073 else
5074 gsi2 = gsip;
5075 reductions = null_pointer_node;
5077 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
5079 tree type;
5080 if (fd->have_pointer_condtemp)
5081 type = TREE_TYPE (condtemp);
5082 else
5083 type = ptr_type_node;
5084 memv = create_tmp_var (type);
5085 TREE_ADDRESSABLE (memv) = 1;
5086 unsigned HOST_WIDE_INT sz = 0;
5087 tree size = NULL_TREE;
5088 if (fd->have_pointer_condtemp)
5090 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5091 sz *= fd->lastprivate_conditional;
5092 condtemp_sz = sz;
5094 if (fd->have_nonctrl_scantemp)
5096 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5097 gimple *g = gimple_build_call (nthreads, 0);
5098 nthreads = create_tmp_var (integer_type_node);
5099 gimple_call_set_lhs (g, nthreads);
5100 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5101 nthreads = fold_convert (sizetype, nthreads);
5102 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5103 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5104 alloc_align, nthreads, NULL,
5105 false);
5106 size = fold_convert (type, size);
5108 else
5109 size = build_int_cst (type, sz);
5110 expand_omp_build_assign (&gsi2, memv, size, false);
5111 mem = build_fold_addr_expr (memv);
5113 tree t
5114 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5115 9, t1, t2, t2, t3, t1, null_pointer_node,
5116 null_pointer_node, reductions, mem);
5117 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5118 true, GSI_SAME_STMT);
5119 if (fd->have_pointer_condtemp)
5120 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5121 if (fd->have_nonctrl_scantemp)
5123 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5124 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5125 alloc_align, nthreads, &gsi2, false);
5127 if (fd->have_reductemp)
5129 gsi_remove (&gsi2, true);
5130 release_ssa_name (gimple_assign_lhs (g));
5133 switch (gimple_omp_for_kind (fd->for_stmt))
5135 case GF_OMP_FOR_KIND_FOR:
5136 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5137 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5138 break;
5139 case GF_OMP_FOR_KIND_DISTRIBUTE:
5140 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5141 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5142 break;
5143 default:
5144 gcc_unreachable ();
5146 nthreads = build_call_expr (nthreads, 0);
5147 nthreads = fold_convert (itype, nthreads);
5148 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5149 true, GSI_SAME_STMT);
5150 threadid = build_call_expr (threadid, 0);
5151 threadid = fold_convert (itype, threadid);
5152 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5153 true, GSI_SAME_STMT);
5155 n1 = fd->loop.n1;
5156 n2 = fd->loop.n2;
5157 step = fd->loop.step;
5158 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5160 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5161 OMP_CLAUSE__LOOPTEMP_);
5162 gcc_assert (innerc);
5163 n1 = OMP_CLAUSE_DECL (innerc);
5164 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5165 OMP_CLAUSE__LOOPTEMP_);
5166 gcc_assert (innerc);
5167 n2 = OMP_CLAUSE_DECL (innerc);
5169 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5170 true, NULL_TREE, true, GSI_SAME_STMT);
5171 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5172 true, NULL_TREE, true, GSI_SAME_STMT);
5173 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5174 true, NULL_TREE, true, GSI_SAME_STMT);
5176 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5177 t = fold_build2 (PLUS_EXPR, itype, step, t);
5178 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5179 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5180 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5181 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5182 fold_build1 (NEGATE_EXPR, itype, t),
5183 fold_build1 (NEGATE_EXPR, itype, step));
5184 else
5185 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5186 t = fold_convert (itype, t);
5187 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5189 q = create_tmp_reg (itype, "q");
5190 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5191 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5192 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5194 tt = create_tmp_reg (itype, "tt");
5195 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5196 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5197 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5199 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5200 gcond *cond_stmt = gimple_build_cond_empty (t);
5201 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5203 second_bb = split_block (entry_bb, cond_stmt)->dest;
5204 gsi = gsi_last_nondebug_bb (second_bb);
5205 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5207 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5208 GSI_SAME_STMT);
5209 gassign *assign_stmt
5210 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5211 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5213 third_bb = split_block (second_bb, assign_stmt)->dest;
5214 gsi = gsi_last_nondebug_bb (third_bb);
5215 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5217 if (fd->have_nonctrl_scantemp)
5219 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5220 tree controlp = NULL_TREE, controlb = NULL_TREE;
5221 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5222 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5223 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5225 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5226 controlb = OMP_CLAUSE_DECL (c);
5227 else
5228 controlp = OMP_CLAUSE_DECL (c);
5229 if (controlb && controlp)
5230 break;
5232 gcc_assert (controlp && controlb);
5233 tree cnt = create_tmp_var (sizetype);
5234 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5235 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5236 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5237 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5238 alloc_align, cnt, NULL, true);
5239 tree size = create_tmp_var (sizetype);
5240 expand_omp_build_assign (&gsi, size, sz, false);
5241 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5242 size, size_int (16384));
5243 expand_omp_build_assign (&gsi, controlb, cmp);
5244 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5245 NULL_TREE, NULL_TREE);
5246 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5247 fourth_bb = split_block (third_bb, g)->dest;
5248 gsi = gsi_last_nondebug_bb (fourth_bb);
5249 /* FIXME: Once we have allocators, this should use allocator. */
5250 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5251 gimple_call_set_lhs (g, controlp);
5252 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5253 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5254 &gsi, true);
5255 gsi_prev (&gsi);
5256 g = gsi_stmt (gsi);
5257 fifth_bb = split_block (fourth_bb, g)->dest;
5258 gsi = gsi_last_nondebug_bb (fifth_bb);
5260 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5261 gimple_call_set_lhs (g, controlp);
5262 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5263 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5264 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5265 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5266 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5268 tree tmp = create_tmp_var (sizetype);
5269 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5270 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5271 TYPE_SIZE_UNIT (pointee_type));
5272 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5273 g = gimple_build_call (alloca_decl, 2, tmp,
5274 size_int (TYPE_ALIGN (pointee_type)));
5275 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5276 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5279 sixth_bb = split_block (fifth_bb, g)->dest;
5280 gsi = gsi_last_nondebug_bb (sixth_bb);
5283 t = build2 (MULT_EXPR, itype, q, threadid);
5284 t = build2 (PLUS_EXPR, itype, t, tt);
5285 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5287 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5288 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5290 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5291 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5293 /* Remove the GIMPLE_OMP_FOR statement. */
5294 gsi_remove (&gsi, true);
5296 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5297 gsi = gsi_start_bb (seq_start_bb);
5299 tree startvar = fd->loop.v;
5300 tree endvar = NULL_TREE;
5302 if (gimple_omp_for_combined_p (fd->for_stmt))
5304 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5305 ? gimple_omp_parallel_clauses (inner_stmt)
5306 : gimple_omp_for_clauses (inner_stmt);
5307 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5308 gcc_assert (innerc);
5309 startvar = OMP_CLAUSE_DECL (innerc);
5310 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5311 OMP_CLAUSE__LOOPTEMP_);
5312 gcc_assert (innerc);
5313 endvar = OMP_CLAUSE_DECL (innerc);
5314 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5315 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5317 innerc = find_lastprivate_looptemp (fd, innerc);
5318 if (innerc)
5320 /* If needed (distribute parallel for with lastprivate),
5321 propagate down the total number of iterations. */
5322 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5323 fd->loop.n2);
5324 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5325 GSI_CONTINUE_LINKING);
5326 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5327 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5331 t = fold_convert (itype, s0);
5332 t = fold_build2 (MULT_EXPR, itype, t, step);
5333 if (POINTER_TYPE_P (type))
5335 t = fold_build_pointer_plus (n1, t);
5336 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5337 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5338 t = fold_convert (signed_type_for (type), t);
5340 else
5341 t = fold_build2 (PLUS_EXPR, type, t, n1);
5342 t = fold_convert (TREE_TYPE (startvar), t);
5343 t = force_gimple_operand_gsi (&gsi, t,
5344 DECL_P (startvar)
5345 && TREE_ADDRESSABLE (startvar),
5346 NULL_TREE, false, GSI_CONTINUE_LINKING);
5347 assign_stmt = gimple_build_assign (startvar, t);
5348 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5349 if (cond_var)
5351 tree itype = TREE_TYPE (cond_var);
5352 /* For lastprivate(conditional:) itervar, we need some iteration
5353 counter that starts at unsigned non-zero and increases.
5354 Prefer as few IVs as possible, so if we can use startvar
5355 itself, use that, or startvar + constant (those would be
5356 incremented with step), and as last resort use the s0 + 1
5357 incremented by 1. */
5358 if (POINTER_TYPE_P (type)
5359 || TREE_CODE (n1) != INTEGER_CST
5360 || fd->loop.cond_code != LT_EXPR)
5361 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5362 build_int_cst (itype, 1));
5363 else if (tree_int_cst_sgn (n1) == 1)
5364 t = fold_convert (itype, t);
5365 else
5367 tree c = fold_convert (itype, n1);
5368 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5369 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5371 t = force_gimple_operand_gsi (&gsi, t, false,
5372 NULL_TREE, false, GSI_CONTINUE_LINKING);
5373 assign_stmt = gimple_build_assign (cond_var, t);
5374 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5377 t = fold_convert (itype, e0);
5378 t = fold_build2 (MULT_EXPR, itype, t, step);
5379 if (POINTER_TYPE_P (type))
5381 t = fold_build_pointer_plus (n1, t);
5382 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5383 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5384 t = fold_convert (signed_type_for (type), t);
5386 else
5387 t = fold_build2 (PLUS_EXPR, type, t, n1);
5388 t = fold_convert (TREE_TYPE (startvar), t);
5389 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5390 false, GSI_CONTINUE_LINKING);
5391 if (endvar)
5393 assign_stmt = gimple_build_assign (endvar, e);
5394 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5395 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5396 assign_stmt = gimple_build_assign (fd->loop.v, e);
5397 else
5398 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5399 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5401 /* Handle linear clause adjustments. */
5402 tree itercnt = NULL_TREE;
5403 tree *nonrect_bounds = NULL;
5404 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5405 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5406 c; c = OMP_CLAUSE_CHAIN (c))
5407 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5408 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5410 tree d = OMP_CLAUSE_DECL (c);
5411 tree t = d, a, dest;
5412 if (omp_privatize_by_reference (t))
5413 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5414 if (itercnt == NULL_TREE)
5416 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5418 itercnt = fold_build2 (MINUS_EXPR, itype,
5419 fold_convert (itype, n1),
5420 fold_convert (itype, fd->loop.n1));
5421 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5422 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5423 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5424 NULL_TREE, false,
5425 GSI_CONTINUE_LINKING);
5427 else
5428 itercnt = s0;
5430 tree type = TREE_TYPE (t);
5431 if (POINTER_TYPE_P (type))
5432 type = sizetype;
5433 a = fold_build2 (MULT_EXPR, type,
5434 fold_convert (type, itercnt),
5435 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5436 dest = unshare_expr (t);
5437 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5438 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5439 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5440 false, GSI_CONTINUE_LINKING);
5441 expand_omp_build_assign (&gsi, dest, t, true);
5443 if (fd->collapse > 1)
5445 if (fd->non_rect)
5447 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5448 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5450 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5451 startvar);
5454 if (!broken_loop)
5456 /* The code controlling the sequential loop replaces the
5457 GIMPLE_OMP_CONTINUE. */
5458 gsi = gsi_last_nondebug_bb (cont_bb);
5459 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5460 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5461 vmain = gimple_omp_continue_control_use (cont_stmt);
5462 vback = gimple_omp_continue_control_def (cont_stmt);
5464 if (cond_var)
5466 tree itype = TREE_TYPE (cond_var);
5467 tree t2;
5468 if (POINTER_TYPE_P (type)
5469 || TREE_CODE (n1) != INTEGER_CST
5470 || fd->loop.cond_code != LT_EXPR)
5471 t2 = build_int_cst (itype, 1);
5472 else
5473 t2 = fold_convert (itype, step);
5474 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5475 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5476 NULL_TREE, true, GSI_SAME_STMT);
5477 assign_stmt = gimple_build_assign (cond_var, t2);
5478 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5481 if (!gimple_omp_for_combined_p (fd->for_stmt))
5483 if (POINTER_TYPE_P (type))
5484 t = fold_build_pointer_plus (vmain, step);
5485 else
5486 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5487 t = force_gimple_operand_gsi (&gsi, t,
5488 DECL_P (vback)
5489 && TREE_ADDRESSABLE (vback),
5490 NULL_TREE, true, GSI_SAME_STMT);
5491 assign_stmt = gimple_build_assign (vback, t);
5492 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5494 t = build2 (fd->loop.cond_code, boolean_type_node,
5495 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5496 ? t : vback, e);
5497 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5500 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5501 gsi_remove (&gsi, true);
5503 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5504 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5505 cont_bb, body_bb);
5508 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5509 gsi = gsi_last_nondebug_bb (exit_bb);
5510 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5512 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5513 if (fd->have_reductemp
5514 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5515 && !fd->have_nonctrl_scantemp))
5517 tree fn;
5518 if (t)
5519 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5520 else
5521 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5522 gcall *g = gimple_build_call (fn, 0);
5523 if (t)
5525 gimple_call_set_lhs (g, t);
5526 if (fd->have_reductemp)
5527 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5528 NOP_EXPR, t),
5529 GSI_SAME_STMT);
5531 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5533 else
5534 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5536 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5537 && !fd->have_nonctrl_scantemp)
5539 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5540 gcall *g = gimple_build_call (fn, 0);
5541 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5543 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5545 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5546 tree controlp = NULL_TREE, controlb = NULL_TREE;
5547 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5548 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5549 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5551 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5552 controlb = OMP_CLAUSE_DECL (c);
5553 else
5554 controlp = OMP_CLAUSE_DECL (c);
5555 if (controlb && controlp)
5556 break;
5558 gcc_assert (controlp && controlb);
5559 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5560 NULL_TREE, NULL_TREE);
5561 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5562 exit1_bb = split_block (exit_bb, g)->dest;
5563 gsi = gsi_after_labels (exit1_bb);
5564 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5565 controlp);
5566 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5567 exit2_bb = split_block (exit1_bb, g)->dest;
5568 gsi = gsi_after_labels (exit2_bb);
5569 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5570 controlp);
5571 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5572 exit3_bb = split_block (exit2_bb, g)->dest;
5573 gsi = gsi_after_labels (exit3_bb);
5575 gsi_remove (&gsi, true);
5577 /* Connect all the blocks. */
5578 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5579 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5580 ep = find_edge (entry_bb, second_bb);
5581 ep->flags = EDGE_TRUE_VALUE;
5582 ep->probability = profile_probability::guessed_always () / 4;
5583 if (fourth_bb)
5585 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5586 ep->probability = profile_probability::guessed_always () / 2;
5587 ep = find_edge (third_bb, fourth_bb);
5588 ep->flags = EDGE_TRUE_VALUE;
5589 ep->probability = profile_probability::guessed_always () / 2;
5590 ep = find_edge (fourth_bb, fifth_bb);
5591 redirect_edge_and_branch (ep, sixth_bb);
5593 else
5594 sixth_bb = third_bb;
5595 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5596 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5597 if (exit1_bb)
5599 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5600 ep->probability = profile_probability::guessed_always () / 2;
5601 ep = find_edge (exit_bb, exit1_bb);
5602 ep->flags = EDGE_TRUE_VALUE;
5603 ep->probability = profile_probability::guessed_always () / 2;
5604 ep = find_edge (exit1_bb, exit2_bb);
5605 redirect_edge_and_branch (ep, exit3_bb);
5608 if (!broken_loop)
5610 ep = find_edge (cont_bb, body_bb);
5611 if (ep == NULL)
5613 ep = BRANCH_EDGE (cont_bb);
5614 gcc_assert (single_succ (ep->dest) == body_bb);
5616 if (gimple_omp_for_combined_p (fd->for_stmt))
5618 remove_edge (ep);
5619 ep = NULL;
5621 else if (fd->collapse > 1)
5623 remove_edge (ep);
5624 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5626 else
5627 ep->flags = EDGE_TRUE_VALUE;
5628 find_edge (cont_bb, fin_bb)->flags
5629 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5632 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5633 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5634 if (fourth_bb)
5636 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5637 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5639 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5641 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5642 recompute_dominator (CDI_DOMINATORS, body_bb));
5643 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5644 recompute_dominator (CDI_DOMINATORS, fin_bb));
5645 if (exit1_bb)
5647 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5648 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5651 class loop *loop = body_bb->loop_father;
5652 if (loop != entry_bb->loop_father)
5654 gcc_assert (broken_loop || loop->header == body_bb);
5655 gcc_assert (broken_loop
5656 || loop->latch == region->cont
5657 || single_pred (loop->latch) == region->cont);
5658 return;
5661 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5663 loop = alloc_loop ();
5664 loop->header = body_bb;
5665 if (collapse_bb == NULL)
5666 loop->latch = cont_bb;
5667 add_loop (loop, body_bb->loop_father);
5671 /* Return phi in E->DEST with ARG on edge E. */
5673 static gphi *
5674 find_phi_with_arg_on_edge (tree arg, edge e)
5676 basic_block bb = e->dest;
5678 for (gphi_iterator gpi = gsi_start_phis (bb);
5679 !gsi_end_p (gpi);
5680 gsi_next (&gpi))
5682 gphi *phi = gpi.phi ();
5683 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5684 return phi;
5687 return NULL;
5690 /* A subroutine of expand_omp_for. Generate code for a parallel
5691 loop with static schedule and a specified chunk size. Given
5692 parameters:
5694 for (V = N1; V cond N2; V += STEP) BODY;
5696 where COND is "<" or ">", we generate pseudocode
5698 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5699 if (cond is <)
5700 adj = STEP - 1;
5701 else
5702 adj = STEP + 1;
5703 if ((__typeof (V)) -1 > 0 && cond is >)
5704 n = -(adj + N2 - N1) / -STEP;
5705 else
5706 n = (adj + N2 - N1) / STEP;
5707 trip = 0;
5708 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5709 here so that V is defined
5710 if the loop is not entered
5712 s0 = (trip * nthreads + threadid) * CHUNK;
5713 e0 = min (s0 + CHUNK, n);
5714 if (s0 < n) goto L1; else goto L4;
5716 V = s0 * STEP + N1;
5717 e = e0 * STEP + N1;
5719 BODY;
5720 V += STEP;
5721 if (V cond e) goto L2; else goto L3;
5723 trip += 1;
5724 goto L0;
5728 static void
5729 expand_omp_for_static_chunk (struct omp_region *region,
5730 struct omp_for_data *fd, gimple *inner_stmt)
5732 tree n, s0, e0, e, t;
5733 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5734 tree type, itype, vmain, vback, vextra;
5735 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5736 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5737 gimple_stmt_iterator gsi, gsip;
5738 edge se;
5739 bool broken_loop = region->cont == NULL;
5740 tree *counts = NULL;
5741 tree n1, n2, step;
5742 tree reductions = NULL_TREE;
5743 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5745 itype = type = TREE_TYPE (fd->loop.v);
5746 if (POINTER_TYPE_P (type))
5747 itype = signed_type_for (type);
5749 entry_bb = region->entry;
5750 se = split_block (entry_bb, last_stmt (entry_bb));
5751 entry_bb = se->src;
5752 iter_part_bb = se->dest;
5753 cont_bb = region->cont;
5754 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5755 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5756 gcc_assert (broken_loop
5757 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5758 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5759 body_bb = single_succ (seq_start_bb);
5760 if (!broken_loop)
5762 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5763 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5764 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5765 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5767 exit_bb = region->exit;
5769 /* Trip and adjustment setup goes in ENTRY_BB. */
5770 gsi = gsi_last_nondebug_bb (entry_bb);
5771 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5772 gsip = gsi;
5773 gsi_prev (&gsip);
5775 if (fd->collapse > 1)
5777 int first_zero_iter = -1, dummy = -1;
5778 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5780 counts = XALLOCAVEC (tree, fd->collapse);
5781 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5782 fin_bb, first_zero_iter,
5783 dummy_bb, dummy, l2_dom_bb);
5784 t = NULL_TREE;
5786 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5787 t = integer_one_node;
5788 else
5789 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5790 fold_convert (type, fd->loop.n1),
5791 fold_convert (type, fd->loop.n2));
5792 if (fd->collapse == 1
5793 && TYPE_UNSIGNED (type)
5794 && (t == NULL_TREE || !integer_onep (t)))
5796 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5797 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5798 true, GSI_SAME_STMT);
5799 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5800 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5801 true, GSI_SAME_STMT);
5802 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5803 n1, n2);
5804 se = split_block (entry_bb, cond_stmt);
5805 se->flags = EDGE_TRUE_VALUE;
5806 entry_bb = se->dest;
5807 se->probability = profile_probability::very_likely ();
5808 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5809 se->probability = profile_probability::very_unlikely ();
5810 if (gimple_in_ssa_p (cfun))
5812 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5813 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5814 !gsi_end_p (gpi); gsi_next (&gpi))
5816 gphi *phi = gpi.phi ();
5817 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5818 se, UNKNOWN_LOCATION);
5821 gsi = gsi_last_bb (entry_bb);
5824 if (fd->lastprivate_conditional)
5826 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5827 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5828 if (fd->have_pointer_condtemp)
5829 condtemp = OMP_CLAUSE_DECL (c);
5830 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5831 cond_var = OMP_CLAUSE_DECL (c);
5833 if (fd->have_reductemp || fd->have_pointer_condtemp)
5835 tree t1 = build_int_cst (long_integer_type_node, 0);
5836 tree t2 = build_int_cst (long_integer_type_node, 1);
5837 tree t3 = build_int_cstu (long_integer_type_node,
5838 (HOST_WIDE_INT_1U << 31) + 1);
5839 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5840 gimple_stmt_iterator gsi2 = gsi_none ();
5841 gimple *g = NULL;
5842 tree mem = null_pointer_node, memv = NULL_TREE;
5843 if (fd->have_reductemp)
5845 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5846 reductions = OMP_CLAUSE_DECL (c);
5847 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5848 g = SSA_NAME_DEF_STMT (reductions);
5849 reductions = gimple_assign_rhs1 (g);
5850 OMP_CLAUSE_DECL (c) = reductions;
5851 gsi2 = gsi_for_stmt (g);
5853 else
5855 if (gsi_end_p (gsip))
5856 gsi2 = gsi_after_labels (region->entry);
5857 else
5858 gsi2 = gsip;
5859 reductions = null_pointer_node;
5861 if (fd->have_pointer_condtemp)
5863 tree type = TREE_TYPE (condtemp);
5864 memv = create_tmp_var (type);
5865 TREE_ADDRESSABLE (memv) = 1;
5866 unsigned HOST_WIDE_INT sz
5867 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5868 sz *= fd->lastprivate_conditional;
5869 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5870 false);
5871 mem = build_fold_addr_expr (memv);
5873 tree t
5874 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5875 9, t1, t2, t2, t3, t1, null_pointer_node,
5876 null_pointer_node, reductions, mem);
5877 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5878 true, GSI_SAME_STMT);
5879 if (fd->have_pointer_condtemp)
5880 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5881 if (fd->have_reductemp)
5883 gsi_remove (&gsi2, true);
5884 release_ssa_name (gimple_assign_lhs (g));
5887 switch (gimple_omp_for_kind (fd->for_stmt))
5889 case GF_OMP_FOR_KIND_FOR:
5890 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5891 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5892 break;
5893 case GF_OMP_FOR_KIND_DISTRIBUTE:
5894 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5895 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5896 break;
5897 default:
5898 gcc_unreachable ();
5900 nthreads = build_call_expr (nthreads, 0);
5901 nthreads = fold_convert (itype, nthreads);
5902 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5903 true, GSI_SAME_STMT);
5904 threadid = build_call_expr (threadid, 0);
5905 threadid = fold_convert (itype, threadid);
5906 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5907 true, GSI_SAME_STMT);
5909 n1 = fd->loop.n1;
5910 n2 = fd->loop.n2;
5911 step = fd->loop.step;
5912 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5914 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5915 OMP_CLAUSE__LOOPTEMP_);
5916 gcc_assert (innerc);
5917 n1 = OMP_CLAUSE_DECL (innerc);
5918 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5919 OMP_CLAUSE__LOOPTEMP_);
5920 gcc_assert (innerc);
5921 n2 = OMP_CLAUSE_DECL (innerc);
5923 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5924 true, NULL_TREE, true, GSI_SAME_STMT);
5925 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5926 true, NULL_TREE, true, GSI_SAME_STMT);
5927 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5928 true, NULL_TREE, true, GSI_SAME_STMT);
5929 tree chunk_size = fold_convert (itype, fd->chunk_size);
5930 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5931 chunk_size
5932 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5933 GSI_SAME_STMT);
5935 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5936 t = fold_build2 (PLUS_EXPR, itype, step, t);
5937 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5938 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5939 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5940 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5941 fold_build1 (NEGATE_EXPR, itype, t),
5942 fold_build1 (NEGATE_EXPR, itype, step));
5943 else
5944 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5945 t = fold_convert (itype, t);
5946 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5947 true, GSI_SAME_STMT);
5949 trip_var = create_tmp_reg (itype, ".trip");
5950 if (gimple_in_ssa_p (cfun))
5952 trip_init = make_ssa_name (trip_var);
5953 trip_main = make_ssa_name (trip_var);
5954 trip_back = make_ssa_name (trip_var);
5956 else
5958 trip_init = trip_var;
5959 trip_main = trip_var;
5960 trip_back = trip_var;
5963 gassign *assign_stmt
5964 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5965 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5967 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5968 t = fold_build2 (MULT_EXPR, itype, t, step);
5969 if (POINTER_TYPE_P (type))
5970 t = fold_build_pointer_plus (n1, t);
5971 else
5972 t = fold_build2 (PLUS_EXPR, type, t, n1);
5973 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5974 true, GSI_SAME_STMT);
5976 /* Remove the GIMPLE_OMP_FOR. */
5977 gsi_remove (&gsi, true);
5979 gimple_stmt_iterator gsif = gsi;
5981 /* Iteration space partitioning goes in ITER_PART_BB. */
5982 gsi = gsi_last_bb (iter_part_bb);
5984 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5985 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5986 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5987 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5988 false, GSI_CONTINUE_LINKING);
5990 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5991 t = fold_build2 (MIN_EXPR, itype, t, n);
5992 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5993 false, GSI_CONTINUE_LINKING);
5995 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5996 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5998 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5999 gsi = gsi_start_bb (seq_start_bb);
6001 tree startvar = fd->loop.v;
6002 tree endvar = NULL_TREE;
6004 if (gimple_omp_for_combined_p (fd->for_stmt))
6006 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
6007 ? gimple_omp_parallel_clauses (inner_stmt)
6008 : gimple_omp_for_clauses (inner_stmt);
6009 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6010 gcc_assert (innerc);
6011 startvar = OMP_CLAUSE_DECL (innerc);
6012 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6013 OMP_CLAUSE__LOOPTEMP_);
6014 gcc_assert (innerc);
6015 endvar = OMP_CLAUSE_DECL (innerc);
6016 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
6017 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
6019 innerc = find_lastprivate_looptemp (fd, innerc);
6020 if (innerc)
6022 /* If needed (distribute parallel for with lastprivate),
6023 propagate down the total number of iterations. */
6024 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
6025 fd->loop.n2);
6026 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
6027 GSI_CONTINUE_LINKING);
6028 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6029 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6034 t = fold_convert (itype, s0);
6035 t = fold_build2 (MULT_EXPR, itype, t, step);
6036 if (POINTER_TYPE_P (type))
6038 t = fold_build_pointer_plus (n1, t);
6039 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6040 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6041 t = fold_convert (signed_type_for (type), t);
6043 else
6044 t = fold_build2 (PLUS_EXPR, type, t, n1);
6045 t = fold_convert (TREE_TYPE (startvar), t);
6046 t = force_gimple_operand_gsi (&gsi, t,
6047 DECL_P (startvar)
6048 && TREE_ADDRESSABLE (startvar),
6049 NULL_TREE, false, GSI_CONTINUE_LINKING);
6050 assign_stmt = gimple_build_assign (startvar, t);
6051 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6052 if (cond_var)
6054 tree itype = TREE_TYPE (cond_var);
6055 /* For lastprivate(conditional:) itervar, we need some iteration
6056 counter that starts at unsigned non-zero and increases.
6057 Prefer as few IVs as possible, so if we can use startvar
6058 itself, use that, or startvar + constant (those would be
6059 incremented with step), and as last resort use the s0 + 1
6060 incremented by 1. */
6061 if (POINTER_TYPE_P (type)
6062 || TREE_CODE (n1) != INTEGER_CST
6063 || fd->loop.cond_code != LT_EXPR)
6064 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
6065 build_int_cst (itype, 1));
6066 else if (tree_int_cst_sgn (n1) == 1)
6067 t = fold_convert (itype, t);
6068 else
6070 tree c = fold_convert (itype, n1);
6071 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
6072 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
6074 t = force_gimple_operand_gsi (&gsi, t, false,
6075 NULL_TREE, false, GSI_CONTINUE_LINKING);
6076 assign_stmt = gimple_build_assign (cond_var, t);
6077 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6080 t = fold_convert (itype, e0);
6081 t = fold_build2 (MULT_EXPR, itype, t, step);
6082 if (POINTER_TYPE_P (type))
6084 t = fold_build_pointer_plus (n1, t);
6085 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6086 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6087 t = fold_convert (signed_type_for (type), t);
6089 else
6090 t = fold_build2 (PLUS_EXPR, type, t, n1);
6091 t = fold_convert (TREE_TYPE (startvar), t);
6092 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6093 false, GSI_CONTINUE_LINKING);
6094 if (endvar)
6096 assign_stmt = gimple_build_assign (endvar, e);
6097 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6098 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6099 assign_stmt = gimple_build_assign (fd->loop.v, e);
6100 else
6101 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6102 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6104 /* Handle linear clause adjustments. */
6105 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6106 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6107 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6108 c; c = OMP_CLAUSE_CHAIN (c))
6109 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6110 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6112 tree d = OMP_CLAUSE_DECL (c);
6113 tree t = d, a, dest;
6114 if (omp_privatize_by_reference (t))
6115 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6116 tree type = TREE_TYPE (t);
6117 if (POINTER_TYPE_P (type))
6118 type = sizetype;
6119 dest = unshare_expr (t);
6120 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6121 expand_omp_build_assign (&gsif, v, t);
6122 if (itercnt == NULL_TREE)
6124 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6126 itercntbias
6127 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6128 fold_convert (itype, fd->loop.n1));
6129 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6130 itercntbias, step);
6131 itercntbias
6132 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6133 NULL_TREE, true,
6134 GSI_SAME_STMT);
6135 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6136 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6137 NULL_TREE, false,
6138 GSI_CONTINUE_LINKING);
6140 else
6141 itercnt = s0;
6143 a = fold_build2 (MULT_EXPR, type,
6144 fold_convert (type, itercnt),
6145 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6146 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6147 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6148 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6149 false, GSI_CONTINUE_LINKING);
6150 expand_omp_build_assign (&gsi, dest, t, true);
6152 if (fd->collapse > 1)
6153 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6155 if (!broken_loop)
6157 /* The code controlling the sequential loop goes in CONT_BB,
6158 replacing the GIMPLE_OMP_CONTINUE. */
6159 gsi = gsi_last_nondebug_bb (cont_bb);
6160 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6161 vmain = gimple_omp_continue_control_use (cont_stmt);
6162 vback = gimple_omp_continue_control_def (cont_stmt);
6164 if (cond_var)
6166 tree itype = TREE_TYPE (cond_var);
6167 tree t2;
6168 if (POINTER_TYPE_P (type)
6169 || TREE_CODE (n1) != INTEGER_CST
6170 || fd->loop.cond_code != LT_EXPR)
6171 t2 = build_int_cst (itype, 1);
6172 else
6173 t2 = fold_convert (itype, step);
6174 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6175 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6176 NULL_TREE, true, GSI_SAME_STMT);
6177 assign_stmt = gimple_build_assign (cond_var, t2);
6178 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6181 if (!gimple_omp_for_combined_p (fd->for_stmt))
6183 if (POINTER_TYPE_P (type))
6184 t = fold_build_pointer_plus (vmain, step);
6185 else
6186 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6187 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6188 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6189 true, GSI_SAME_STMT);
6190 assign_stmt = gimple_build_assign (vback, t);
6191 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6193 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6194 t = build2 (EQ_EXPR, boolean_type_node,
6195 build_int_cst (itype, 0),
6196 build_int_cst (itype, 1));
6197 else
6198 t = build2 (fd->loop.cond_code, boolean_type_node,
6199 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6200 ? t : vback, e);
6201 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6204 /* Remove GIMPLE_OMP_CONTINUE. */
6205 gsi_remove (&gsi, true);
6207 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6208 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6210 /* Trip update code goes into TRIP_UPDATE_BB. */
6211 gsi = gsi_start_bb (trip_update_bb);
6213 t = build_int_cst (itype, 1);
6214 t = build2 (PLUS_EXPR, itype, trip_main, t);
6215 assign_stmt = gimple_build_assign (trip_back, t);
6216 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6219 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6220 gsi = gsi_last_nondebug_bb (exit_bb);
6221 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6223 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6224 if (fd->have_reductemp || fd->have_pointer_condtemp)
6226 tree fn;
6227 if (t)
6228 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6229 else
6230 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6231 gcall *g = gimple_build_call (fn, 0);
6232 if (t)
6234 gimple_call_set_lhs (g, t);
6235 if (fd->have_reductemp)
6236 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6237 NOP_EXPR, t),
6238 GSI_SAME_STMT);
6240 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6242 else
6243 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6245 else if (fd->have_pointer_condtemp)
6247 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6248 gcall *g = gimple_build_call (fn, 0);
6249 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6251 gsi_remove (&gsi, true);
6253 /* Connect the new blocks. */
6254 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6255 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6257 if (!broken_loop)
6259 se = find_edge (cont_bb, body_bb);
6260 if (se == NULL)
6262 se = BRANCH_EDGE (cont_bb);
6263 gcc_assert (single_succ (se->dest) == body_bb);
6265 if (gimple_omp_for_combined_p (fd->for_stmt))
6267 remove_edge (se);
6268 se = NULL;
6270 else if (fd->collapse > 1)
6272 remove_edge (se);
6273 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6275 else
6276 se->flags = EDGE_TRUE_VALUE;
6277 find_edge (cont_bb, trip_update_bb)->flags
6278 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6280 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6281 iter_part_bb);
6284 if (gimple_in_ssa_p (cfun))
6286 gphi_iterator psi;
6287 gphi *phi;
6288 edge re, ene;
6289 edge_var_map *vm;
6290 size_t i;
6292 gcc_assert (fd->collapse == 1 && !broken_loop);
6294 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6295 remove arguments of the phi nodes in fin_bb. We need to create
6296 appropriate phi nodes in iter_part_bb instead. */
6297 se = find_edge (iter_part_bb, fin_bb);
6298 re = single_succ_edge (trip_update_bb);
6299 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6300 ene = single_succ_edge (entry_bb);
6302 psi = gsi_start_phis (fin_bb);
6303 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6304 gsi_next (&psi), ++i)
6306 gphi *nphi;
6307 location_t locus;
6309 phi = psi.phi ();
6310 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6311 redirect_edge_var_map_def (vm), 0))
6312 continue;
6314 t = gimple_phi_result (phi);
6315 gcc_assert (t == redirect_edge_var_map_result (vm));
6317 if (!single_pred_p (fin_bb))
6318 t = copy_ssa_name (t, phi);
6320 nphi = create_phi_node (t, iter_part_bb);
6322 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6323 locus = gimple_phi_arg_location_from_edge (phi, se);
6325 /* A special case -- fd->loop.v is not yet computed in
6326 iter_part_bb, we need to use vextra instead. */
6327 if (t == fd->loop.v)
6328 t = vextra;
6329 add_phi_arg (nphi, t, ene, locus);
6330 locus = redirect_edge_var_map_location (vm);
6331 tree back_arg = redirect_edge_var_map_def (vm);
6332 add_phi_arg (nphi, back_arg, re, locus);
6333 edge ce = find_edge (cont_bb, body_bb);
6334 if (ce == NULL)
6336 ce = BRANCH_EDGE (cont_bb);
6337 gcc_assert (single_succ (ce->dest) == body_bb);
6338 ce = single_succ_edge (ce->dest);
6340 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6341 gcc_assert (inner_loop_phi != NULL);
6342 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6343 find_edge (seq_start_bb, body_bb), locus);
6345 if (!single_pred_p (fin_bb))
6346 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6348 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6349 redirect_edge_var_map_clear (re);
6350 if (single_pred_p (fin_bb))
6351 while (1)
6353 psi = gsi_start_phis (fin_bb);
6354 if (gsi_end_p (psi))
6355 break;
6356 remove_phi_node (&psi, false);
6359 /* Make phi node for trip. */
6360 phi = create_phi_node (trip_main, iter_part_bb);
6361 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6362 UNKNOWN_LOCATION);
6363 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6364 UNKNOWN_LOCATION);
6367 if (!broken_loop)
6368 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6369 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6370 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6371 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6372 recompute_dominator (CDI_DOMINATORS, fin_bb));
6373 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6374 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6375 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6376 recompute_dominator (CDI_DOMINATORS, body_bb));
6378 if (!broken_loop)
6380 class loop *loop = body_bb->loop_father;
6381 class loop *trip_loop = alloc_loop ();
6382 trip_loop->header = iter_part_bb;
6383 trip_loop->latch = trip_update_bb;
6384 add_loop (trip_loop, iter_part_bb->loop_father);
6386 if (loop != entry_bb->loop_father)
6388 gcc_assert (loop->header == body_bb);
6389 gcc_assert (loop->latch == region->cont
6390 || single_pred (loop->latch) == region->cont);
6391 trip_loop->inner = loop;
6392 return;
6395 if (!gimple_omp_for_combined_p (fd->for_stmt))
6397 loop = alloc_loop ();
6398 loop->header = body_bb;
6399 if (collapse_bb == NULL)
6400 loop->latch = cont_bb;
6401 add_loop (loop, trip_loop);
6406 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6407 loop. Given parameters:
6409 for (V = N1; V cond N2; V += STEP) BODY;
6411 where COND is "<" or ">", we generate pseudocode
6413 V = N1;
6414 goto L1;
6416 BODY;
6417 V += STEP;
6419 if (V cond N2) goto L0; else goto L2;
6422 For collapsed loops, emit the outer loops as scalar
6423 and only try to vectorize the innermost loop. */
6425 static void
6426 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6428 tree type, t;
6429 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6430 gimple_stmt_iterator gsi;
6431 gimple *stmt;
6432 gcond *cond_stmt;
6433 bool broken_loop = region->cont == NULL;
6434 edge e, ne;
6435 tree *counts = NULL;
6436 int i;
6437 int safelen_int = INT_MAX;
6438 bool dont_vectorize = false;
6439 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6440 OMP_CLAUSE_SAFELEN);
6441 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6442 OMP_CLAUSE__SIMDUID_);
6443 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6444 OMP_CLAUSE_IF);
6445 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6446 OMP_CLAUSE_SIMDLEN);
6447 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6448 OMP_CLAUSE__CONDTEMP_);
6449 tree n1, n2;
6450 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6452 if (safelen)
6454 poly_uint64 val;
6455 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6456 if (!poly_int_tree_p (safelen, &val))
6457 safelen_int = 0;
6458 else
6459 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6460 if (safelen_int == 1)
6461 safelen_int = 0;
6463 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6464 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6466 safelen_int = 0;
6467 dont_vectorize = true;
6469 type = TREE_TYPE (fd->loop.v);
6470 entry_bb = region->entry;
6471 cont_bb = region->cont;
6472 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6473 gcc_assert (broken_loop
6474 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6475 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6476 if (!broken_loop)
6478 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6479 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6480 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6481 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6483 else
6485 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6486 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6487 l2_bb = single_succ (l1_bb);
6489 exit_bb = region->exit;
6490 l2_dom_bb = NULL;
6492 gsi = gsi_last_nondebug_bb (entry_bb);
6494 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6495 /* Not needed in SSA form right now. */
6496 gcc_assert (!gimple_in_ssa_p (cfun));
6497 if (fd->collapse > 1
6498 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6499 || broken_loop))
6501 int first_zero_iter = -1, dummy = -1;
6502 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6504 counts = XALLOCAVEC (tree, fd->collapse);
6505 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6506 zero_iter_bb, first_zero_iter,
6507 dummy_bb, dummy, l2_dom_bb);
6509 if (l2_dom_bb == NULL)
6510 l2_dom_bb = l1_bb;
6512 n1 = fd->loop.n1;
6513 n2 = fd->loop.n2;
6514 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6516 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6517 OMP_CLAUSE__LOOPTEMP_);
6518 gcc_assert (innerc);
6519 n1 = OMP_CLAUSE_DECL (innerc);
6520 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6521 OMP_CLAUSE__LOOPTEMP_);
6522 gcc_assert (innerc);
6523 n2 = OMP_CLAUSE_DECL (innerc);
6525 tree step = fd->loop.step;
6526 tree orig_step = step; /* May be different from step if is_simt. */
6528 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6529 OMP_CLAUSE__SIMT_);
6530 if (is_simt)
6532 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6533 is_simt = safelen_int > 1;
6535 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6536 if (is_simt)
6538 simt_lane = create_tmp_var (unsigned_type_node);
6539 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6540 gimple_call_set_lhs (g, simt_lane);
6541 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6542 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6543 fold_convert (TREE_TYPE (step), simt_lane));
6544 n1 = fold_convert (type, n1);
6545 if (POINTER_TYPE_P (type))
6546 n1 = fold_build_pointer_plus (n1, offset);
6547 else
6548 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6550 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6551 if (fd->collapse > 1)
6552 simt_maxlane = build_one_cst (unsigned_type_node);
6553 else if (safelen_int < omp_max_simt_vf ())
6554 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6555 tree vf
6556 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6557 unsigned_type_node, 0);
6558 if (simt_maxlane)
6559 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6560 vf = fold_convert (TREE_TYPE (step), vf);
6561 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6564 tree n2var = NULL_TREE;
6565 tree n2v = NULL_TREE;
6566 tree *nonrect_bounds = NULL;
6567 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6568 if (fd->collapse > 1)
6570 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6572 if (fd->non_rect)
6574 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6575 memset (nonrect_bounds, 0,
6576 sizeof (tree) * (fd->last_nonrect + 1));
6578 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6579 gcc_assert (entry_bb == gsi_bb (gsi));
6580 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6581 gsi_prev (&gsi);
6582 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6583 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6584 NULL, n1);
6585 gsi = gsi_for_stmt (fd->for_stmt);
6587 if (broken_loop)
6589 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6591 /* Compute in n2var the limit for the first innermost loop,
6592 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6593 where cnt is how many iterations would the loop have if
6594 all further iterations were assigned to the current task. */
6595 n2var = create_tmp_var (type);
6596 i = fd->collapse - 1;
6597 tree itype = TREE_TYPE (fd->loops[i].v);
6598 if (POINTER_TYPE_P (itype))
6599 itype = signed_type_for (itype);
6600 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6601 ? -1 : 1));
6602 t = fold_build2 (PLUS_EXPR, itype,
6603 fold_convert (itype, fd->loops[i].step), t);
6604 t = fold_build2 (PLUS_EXPR, itype, t,
6605 fold_convert (itype, fd->loops[i].n2));
6606 if (fd->loops[i].m2)
6608 tree t2 = fold_convert (itype,
6609 fd->loops[i - fd->loops[i].outer].v);
6610 tree t3 = fold_convert (itype, fd->loops[i].m2);
6611 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6612 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6614 t = fold_build2 (MINUS_EXPR, itype, t,
6615 fold_convert (itype, fd->loops[i].v));
6616 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6617 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6618 fold_build1 (NEGATE_EXPR, itype, t),
6619 fold_build1 (NEGATE_EXPR, itype,
6620 fold_convert (itype,
6621 fd->loops[i].step)));
6622 else
6623 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6624 fold_convert (itype, fd->loops[i].step));
6625 t = fold_convert (type, t);
6626 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6627 min_arg1 = create_tmp_var (type);
6628 expand_omp_build_assign (&gsi, min_arg1, t2);
6629 min_arg2 = create_tmp_var (type);
6630 expand_omp_build_assign (&gsi, min_arg2, t);
6632 else
6634 if (TREE_CODE (n2) == INTEGER_CST)
6636 /* Indicate for lastprivate handling that at least one iteration
6637 has been performed, without wasting runtime. */
6638 if (integer_nonzerop (n2))
6639 expand_omp_build_assign (&gsi, fd->loop.v,
6640 fold_convert (type, n2));
6641 else
6642 /* Indicate that no iteration has been performed. */
6643 expand_omp_build_assign (&gsi, fd->loop.v,
6644 build_one_cst (type));
6646 else
6648 expand_omp_build_assign (&gsi, fd->loop.v,
6649 build_zero_cst (type));
6650 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6652 for (i = 0; i < fd->collapse; i++)
6654 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6655 if (fd->loops[i].m1)
6657 tree t2
6658 = fold_convert (TREE_TYPE (t),
6659 fd->loops[i - fd->loops[i].outer].v);
6660 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6661 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6662 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6664 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6665 /* For normal non-combined collapsed loops just initialize
6666 the outermost iterator in the entry_bb. */
6667 if (!broken_loop)
6668 break;
6672 else
6673 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6674 tree altv = NULL_TREE, altn2 = NULL_TREE;
6675 if (fd->collapse == 1
6676 && !broken_loop
6677 && TREE_CODE (orig_step) != INTEGER_CST)
6679 /* The vectorizer currently punts on loops with non-constant steps
6680 for the main IV (can't compute number of iterations and gives up
6681 because of that). As for OpenMP loops it is always possible to
6682 compute the number of iterations upfront, use an alternate IV
6683 as the loop iterator:
6684 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6685 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6686 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6687 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6688 tree itype = TREE_TYPE (fd->loop.v);
6689 if (POINTER_TYPE_P (itype))
6690 itype = signed_type_for (itype);
6691 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6692 t = fold_build2 (PLUS_EXPR, itype,
6693 fold_convert (itype, step), t);
6694 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6695 t = fold_build2 (MINUS_EXPR, itype, t,
6696 fold_convert (itype, fd->loop.v));
6697 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6698 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6699 fold_build1 (NEGATE_EXPR, itype, t),
6700 fold_build1 (NEGATE_EXPR, itype,
6701 fold_convert (itype, step)));
6702 else
6703 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6704 fold_convert (itype, step));
6705 t = fold_convert (TREE_TYPE (altv), t);
6706 altn2 = create_tmp_var (TREE_TYPE (altv));
6707 expand_omp_build_assign (&gsi, altn2, t);
6708 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6709 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6710 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6711 true, GSI_SAME_STMT);
6712 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6713 build_zero_cst (TREE_TYPE (altv)));
6714 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6716 else if (fd->collapse > 1
6717 && !broken_loop
6718 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6719 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6721 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6722 altn2 = create_tmp_var (TREE_TYPE (altv));
6724 if (cond_var)
6726 if (POINTER_TYPE_P (type)
6727 || TREE_CODE (n1) != INTEGER_CST
6728 || fd->loop.cond_code != LT_EXPR
6729 || tree_int_cst_sgn (n1) != 1)
6730 expand_omp_build_assign (&gsi, cond_var,
6731 build_one_cst (TREE_TYPE (cond_var)));
6732 else
6733 expand_omp_build_assign (&gsi, cond_var,
6734 fold_convert (TREE_TYPE (cond_var), n1));
6737 /* Remove the GIMPLE_OMP_FOR statement. */
6738 gsi_remove (&gsi, true);
6740 if (!broken_loop)
6742 /* Code to control the increment goes in the CONT_BB. */
6743 gsi = gsi_last_nondebug_bb (cont_bb);
6744 stmt = gsi_stmt (gsi);
6745 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6747 if (fd->collapse == 1
6748 || gimple_omp_for_combined_into_p (fd->for_stmt))
6750 if (POINTER_TYPE_P (type))
6751 t = fold_build_pointer_plus (fd->loop.v, step);
6752 else
6753 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6754 expand_omp_build_assign (&gsi, fd->loop.v, t);
6756 else if (TREE_CODE (n2) != INTEGER_CST)
6757 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6758 if (altv)
6760 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6761 build_one_cst (TREE_TYPE (altv)));
6762 expand_omp_build_assign (&gsi, altv, t);
6765 if (fd->collapse > 1)
6767 i = fd->collapse - 1;
6768 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6769 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6770 else
6772 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6773 fd->loops[i].step);
6774 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6775 fd->loops[i].v, t);
6777 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6779 if (cond_var)
6781 if (POINTER_TYPE_P (type)
6782 || TREE_CODE (n1) != INTEGER_CST
6783 || fd->loop.cond_code != LT_EXPR
6784 || tree_int_cst_sgn (n1) != 1)
6785 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6786 build_one_cst (TREE_TYPE (cond_var)));
6787 else
6788 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6789 fold_convert (TREE_TYPE (cond_var), step));
6790 expand_omp_build_assign (&gsi, cond_var, t);
6793 /* Remove GIMPLE_OMP_CONTINUE. */
6794 gsi_remove (&gsi, true);
6797 /* Emit the condition in L1_BB. */
6798 gsi = gsi_start_bb (l1_bb);
6800 if (altv)
6801 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6802 else if (fd->collapse > 1
6803 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6804 && !broken_loop)
6806 i = fd->collapse - 1;
6807 tree itype = TREE_TYPE (fd->loops[i].v);
6808 if (fd->loops[i].m2)
6809 t = n2v = create_tmp_var (itype);
6810 else
6811 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6812 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6813 false, GSI_CONTINUE_LINKING);
6814 tree v = fd->loops[i].v;
6815 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6816 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6817 false, GSI_CONTINUE_LINKING);
6818 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6820 else
6822 if (fd->collapse > 1 && !broken_loop)
6823 t = n2var;
6824 else
6825 t = fold_convert (type, unshare_expr (n2));
6826 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6827 false, GSI_CONTINUE_LINKING);
6828 tree v = fd->loop.v;
6829 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6830 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6831 false, GSI_CONTINUE_LINKING);
6832 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6834 cond_stmt = gimple_build_cond_empty (t);
6835 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6836 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6837 NULL, NULL)
6838 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6839 NULL, NULL))
6841 gsi = gsi_for_stmt (cond_stmt);
6842 gimple_regimplify_operands (cond_stmt, &gsi);
6845 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6846 if (is_simt)
6848 gsi = gsi_start_bb (l2_bb);
6849 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6850 if (POINTER_TYPE_P (type))
6851 t = fold_build_pointer_plus (fd->loop.v, step);
6852 else
6853 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6854 expand_omp_build_assign (&gsi, fd->loop.v, t);
6857 /* Remove GIMPLE_OMP_RETURN. */
6858 gsi = gsi_last_nondebug_bb (exit_bb);
6859 gsi_remove (&gsi, true);
6861 /* Connect the new blocks. */
6862 remove_edge (FALLTHRU_EDGE (entry_bb));
6864 if (!broken_loop)
6866 remove_edge (BRANCH_EDGE (entry_bb));
6867 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6869 e = BRANCH_EDGE (l1_bb);
6870 ne = FALLTHRU_EDGE (l1_bb);
6871 e->flags = EDGE_TRUE_VALUE;
6873 else
6875 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6877 ne = single_succ_edge (l1_bb);
6878 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6881 ne->flags = EDGE_FALSE_VALUE;
6882 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6883 ne->probability = e->probability.invert ();
6885 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6886 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6888 if (simt_maxlane)
6890 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6891 NULL_TREE, NULL_TREE);
6892 gsi = gsi_last_bb (entry_bb);
6893 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6894 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6895 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6896 FALLTHRU_EDGE (entry_bb)->probability
6897 = profile_probability::guessed_always ().apply_scale (7, 8);
6898 BRANCH_EDGE (entry_bb)->probability
6899 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6900 l2_dom_bb = entry_bb;
6902 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6904 if (!broken_loop && fd->collapse > 1)
6906 basic_block last_bb = l1_bb;
6907 basic_block init_bb = NULL;
6908 for (i = fd->collapse - 2; i >= 0; i--)
6910 tree nextn2v = NULL_TREE;
6911 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6912 e = EDGE_SUCC (last_bb, 0);
6913 else
6914 e = EDGE_SUCC (last_bb, 1);
6915 basic_block bb = split_edge (e);
6916 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6917 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6918 else
6920 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6921 fd->loops[i].step);
6922 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6923 fd->loops[i].v, t);
6925 gsi = gsi_after_labels (bb);
6926 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6928 bb = split_block (bb, last_stmt (bb))->dest;
6929 gsi = gsi_start_bb (bb);
6930 tree itype = TREE_TYPE (fd->loops[i].v);
6931 if (fd->loops[i].m2)
6932 t = nextn2v = create_tmp_var (itype);
6933 else
6934 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6935 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6936 false, GSI_CONTINUE_LINKING);
6937 tree v = fd->loops[i].v;
6938 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6939 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6940 false, GSI_CONTINUE_LINKING);
6941 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6942 cond_stmt = gimple_build_cond_empty (t);
6943 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6944 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6945 expand_omp_regimplify_p, NULL, NULL)
6946 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6947 expand_omp_regimplify_p, NULL, NULL))
6949 gsi = gsi_for_stmt (cond_stmt);
6950 gimple_regimplify_operands (cond_stmt, &gsi);
6952 ne = single_succ_edge (bb);
6953 ne->flags = EDGE_FALSE_VALUE;
6955 init_bb = create_empty_bb (bb);
6956 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6957 add_bb_to_loop (init_bb, bb->loop_father);
6958 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6959 e->probability
6960 = profile_probability::guessed_always ().apply_scale (7, 8);
6961 ne->probability = e->probability.invert ();
6963 gsi = gsi_after_labels (init_bb);
6964 if (fd->loops[i + 1].m1)
6966 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6967 fd->loops[i + 1
6968 - fd->loops[i + 1].outer].v);
6969 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6970 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6971 else
6973 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6974 fd->loops[i + 1].n1);
6975 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6976 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6977 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6980 else
6981 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6982 fd->loops[i + 1].n1);
6983 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6984 if (fd->loops[i + 1].m2)
6986 if (i + 2 == fd->collapse && (n2var || altv))
6988 gcc_assert (n2v == NULL_TREE);
6989 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6991 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6992 fd->loops[i + 1
6993 - fd->loops[i + 1].outer].v);
6994 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6995 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
6996 else
6998 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6999 fd->loops[i + 1].n2);
7000 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
7001 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
7002 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
7004 expand_omp_build_assign (&gsi, n2v, t);
7006 if (i + 2 == fd->collapse && n2var)
7008 /* For composite simd, n2 is the first iteration the current
7009 task shouldn't already handle, so we effectively want to use
7010 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
7011 as the vectorized loop. Except the vectorizer will not
7012 vectorize that, so instead compute N2VAR as
7013 N2VAR = V + MIN (N2 - V, COUNTS3) and use
7014 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
7015 as the loop to vectorize. */
7016 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
7017 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
7019 tree itype = TREE_TYPE (fd->loops[i].v);
7020 if (POINTER_TYPE_P (itype))
7021 itype = signed_type_for (itype);
7022 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
7023 == LT_EXPR ? -1 : 1));
7024 t = fold_build2 (PLUS_EXPR, itype,
7025 fold_convert (itype,
7026 fd->loops[i + 1].step), t);
7027 if (fd->loops[i + 1].m2 == NULL_TREE)
7028 t = fold_build2 (PLUS_EXPR, itype, t,
7029 fold_convert (itype,
7030 fd->loops[i + 1].n2));
7031 else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
7033 t = fold_build_pointer_plus (n2v, t);
7034 t = fold_convert (itype, t);
7036 else
7037 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
7038 t = fold_build2 (MINUS_EXPR, itype, t,
7039 fold_convert (itype, fd->loops[i + 1].v));
7040 tree step = fold_convert (itype, fd->loops[i + 1].step);
7041 if (TYPE_UNSIGNED (itype)
7042 && fd->loops[i + 1].cond_code == GT_EXPR)
7043 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7044 fold_build1 (NEGATE_EXPR, itype, t),
7045 fold_build1 (NEGATE_EXPR, itype, step));
7046 else
7047 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7048 t = fold_convert (type, t);
7050 else
7051 t = counts[i + 1];
7052 expand_omp_build_assign (&gsi, min_arg1, t2);
7053 expand_omp_build_assign (&gsi, min_arg2, t);
7054 e = split_block (init_bb, last_stmt (init_bb));
7055 gsi = gsi_after_labels (e->dest);
7056 init_bb = e->dest;
7057 remove_edge (FALLTHRU_EDGE (entry_bb));
7058 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
7059 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
7060 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
7061 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
7062 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
7063 expand_omp_build_assign (&gsi, n2var, t);
7065 if (i + 2 == fd->collapse && altv)
7067 /* The vectorizer currently punts on loops with non-constant
7068 steps for the main IV (can't compute number of iterations
7069 and gives up because of that). As for OpenMP loops it is
7070 always possible to compute the number of iterations upfront,
7071 use an alternate IV as the loop iterator. */
7072 expand_omp_build_assign (&gsi, altv,
7073 build_zero_cst (TREE_TYPE (altv)));
7074 tree itype = TREE_TYPE (fd->loops[i + 1].v);
7075 if (POINTER_TYPE_P (itype))
7076 itype = signed_type_for (itype);
7077 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
7078 ? -1 : 1));
7079 t = fold_build2 (PLUS_EXPR, itype,
7080 fold_convert (itype, fd->loops[i + 1].step), t);
7081 t = fold_build2 (PLUS_EXPR, itype, t,
7082 fold_convert (itype,
7083 fd->loops[i + 1].m2
7084 ? n2v : fd->loops[i + 1].n2));
7085 t = fold_build2 (MINUS_EXPR, itype, t,
7086 fold_convert (itype, fd->loops[i + 1].v));
7087 tree step = fold_convert (itype, fd->loops[i + 1].step);
7088 if (TYPE_UNSIGNED (itype)
7089 && fd->loops[i + 1].cond_code == GT_EXPR)
7090 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7091 fold_build1 (NEGATE_EXPR, itype, t),
7092 fold_build1 (NEGATE_EXPR, itype, step));
7093 else
7094 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7095 t = fold_convert (TREE_TYPE (altv), t);
7096 expand_omp_build_assign (&gsi, altn2, t);
7097 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7098 fd->loops[i + 1].m2
7099 ? n2v : fd->loops[i + 1].n2);
7100 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7101 fd->loops[i + 1].v, t2);
7102 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7103 true, GSI_SAME_STMT);
7104 gassign *g
7105 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7106 build_zero_cst (TREE_TYPE (altv)));
7107 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7109 n2v = nextn2v;
7111 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7112 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7114 e = find_edge (entry_bb, last_bb);
7115 redirect_edge_succ (e, bb);
7116 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7117 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7120 last_bb = bb;
7123 if (!broken_loop)
7125 class loop *loop = alloc_loop ();
7126 loop->header = l1_bb;
7127 loop->latch = cont_bb;
7128 add_loop (loop, l1_bb->loop_father);
7129 loop->safelen = safelen_int;
7130 if (simduid)
7132 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7133 cfun->has_simduid_loops = true;
7135 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7136 the loop. */
7137 if ((flag_tree_loop_vectorize
7138 || !OPTION_SET_P (flag_tree_loop_vectorize))
7139 && flag_tree_loop_optimize
7140 && loop->safelen > 1)
7142 loop->force_vectorize = true;
7143 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7145 unsigned HOST_WIDE_INT v
7146 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7147 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7148 loop->simdlen = v;
7150 cfun->has_force_vectorize_loops = true;
7152 else if (dont_vectorize)
7153 loop->dont_vectorize = true;
7155 else if (simduid)
7156 cfun->has_simduid_loops = true;
7159 /* Taskloop construct is represented after gimplification with
7160 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7161 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7162 which should just compute all the needed loop temporaries
7163 for GIMPLE_OMP_TASK. */
7165 static void
7166 expand_omp_taskloop_for_outer (struct omp_region *region,
7167 struct omp_for_data *fd,
7168 gimple *inner_stmt)
7170 tree type, bias = NULL_TREE;
7171 basic_block entry_bb, cont_bb, exit_bb;
7172 gimple_stmt_iterator gsi;
7173 gassign *assign_stmt;
7174 tree *counts = NULL;
7175 int i;
7177 gcc_assert (inner_stmt);
7178 gcc_assert (region->cont);
7179 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7180 && gimple_omp_task_taskloop_p (inner_stmt));
7181 type = TREE_TYPE (fd->loop.v);
7183 /* See if we need to bias by LLONG_MIN. */
7184 if (fd->iter_type == long_long_unsigned_type_node
7185 && TREE_CODE (type) == INTEGER_TYPE
7186 && !TYPE_UNSIGNED (type))
7188 tree n1, n2;
7190 if (fd->loop.cond_code == LT_EXPR)
7192 n1 = fd->loop.n1;
7193 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7195 else
7197 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7198 n2 = fd->loop.n1;
7200 if (TREE_CODE (n1) != INTEGER_CST
7201 || TREE_CODE (n2) != INTEGER_CST
7202 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7203 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7206 entry_bb = region->entry;
7207 cont_bb = region->cont;
7208 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7209 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7210 exit_bb = region->exit;
7212 gsi = gsi_last_nondebug_bb (entry_bb);
7213 gimple *for_stmt = gsi_stmt (gsi);
7214 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7215 if (fd->collapse > 1)
7217 int first_zero_iter = -1, dummy = -1;
7218 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7220 counts = XALLOCAVEC (tree, fd->collapse);
7221 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7222 zero_iter_bb, first_zero_iter,
7223 dummy_bb, dummy, l2_dom_bb);
7225 if (zero_iter_bb)
7227 /* Some counts[i] vars might be uninitialized if
7228 some loop has zero iterations. But the body shouldn't
7229 be executed in that case, so just avoid uninit warnings. */
7230 for (i = first_zero_iter; i < fd->collapse; i++)
7231 if (SSA_VAR_P (counts[i]))
7232 suppress_warning (counts[i], OPT_Wuninitialized);
7233 gsi_prev (&gsi);
7234 edge e = split_block (entry_bb, gsi_stmt (gsi));
7235 entry_bb = e->dest;
7236 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7237 gsi = gsi_last_bb (entry_bb);
7238 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7239 get_immediate_dominator (CDI_DOMINATORS,
7240 zero_iter_bb));
7244 tree t0, t1;
7245 t1 = fd->loop.n2;
7246 t0 = fd->loop.n1;
7247 if (POINTER_TYPE_P (TREE_TYPE (t0))
7248 && TYPE_PRECISION (TREE_TYPE (t0))
7249 != TYPE_PRECISION (fd->iter_type))
7251 /* Avoid casting pointers to integer of a different size. */
7252 tree itype = signed_type_for (type);
7253 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7254 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7256 else
7258 t1 = fold_convert (fd->iter_type, t1);
7259 t0 = fold_convert (fd->iter_type, t0);
7261 if (bias)
7263 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7264 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7267 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7268 OMP_CLAUSE__LOOPTEMP_);
7269 gcc_assert (innerc);
7270 tree startvar = OMP_CLAUSE_DECL (innerc);
7271 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7272 gcc_assert (innerc);
7273 tree endvar = OMP_CLAUSE_DECL (innerc);
7274 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7276 innerc = find_lastprivate_looptemp (fd, innerc);
7277 if (innerc)
7279 /* If needed (inner taskloop has lastprivate clause), propagate
7280 down the total number of iterations. */
7281 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7282 NULL_TREE, false,
7283 GSI_CONTINUE_LINKING);
7284 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7285 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7289 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7290 GSI_CONTINUE_LINKING);
7291 assign_stmt = gimple_build_assign (startvar, t0);
7292 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7294 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7295 GSI_CONTINUE_LINKING);
7296 assign_stmt = gimple_build_assign (endvar, t1);
7297 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7298 if (fd->collapse > 1)
7299 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7301 /* Remove the GIMPLE_OMP_FOR statement. */
7302 gsi = gsi_for_stmt (for_stmt);
7303 gsi_remove (&gsi, true);
7305 gsi = gsi_last_nondebug_bb (cont_bb);
7306 gsi_remove (&gsi, true);
7308 gsi = gsi_last_nondebug_bb (exit_bb);
7309 gsi_remove (&gsi, true);
7311 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7312 remove_edge (BRANCH_EDGE (entry_bb));
7313 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7314 remove_edge (BRANCH_EDGE (cont_bb));
7315 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7316 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7317 recompute_dominator (CDI_DOMINATORS, region->entry));
7320 /* Taskloop construct is represented after gimplification with
7321 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7322 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7323 GOMP_taskloop{,_ull} function arranges for each task to be given just
7324 a single range of iterations. */
7326 static void
7327 expand_omp_taskloop_for_inner (struct omp_region *region,
7328 struct omp_for_data *fd,
7329 gimple *inner_stmt)
7331 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7332 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7333 basic_block fin_bb;
7334 gimple_stmt_iterator gsi;
7335 edge ep;
7336 bool broken_loop = region->cont == NULL;
7337 tree *counts = NULL;
7338 tree n1, n2, step;
7340 itype = type = TREE_TYPE (fd->loop.v);
7341 if (POINTER_TYPE_P (type))
7342 itype = signed_type_for (type);
7344 /* See if we need to bias by LLONG_MIN. */
7345 if (fd->iter_type == long_long_unsigned_type_node
7346 && TREE_CODE (type) == INTEGER_TYPE
7347 && !TYPE_UNSIGNED (type))
7349 tree n1, n2;
7351 if (fd->loop.cond_code == LT_EXPR)
7353 n1 = fd->loop.n1;
7354 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7356 else
7358 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7359 n2 = fd->loop.n1;
7361 if (TREE_CODE (n1) != INTEGER_CST
7362 || TREE_CODE (n2) != INTEGER_CST
7363 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7364 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7367 entry_bb = region->entry;
7368 cont_bb = region->cont;
7369 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7370 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7371 gcc_assert (broken_loop
7372 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7373 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7374 if (!broken_loop)
7376 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7377 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7379 exit_bb = region->exit;
7381 /* Iteration space partitioning goes in ENTRY_BB. */
7382 gsi = gsi_last_nondebug_bb (entry_bb);
7383 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7385 if (fd->collapse > 1)
7387 int first_zero_iter = -1, dummy = -1;
7388 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7390 counts = XALLOCAVEC (tree, fd->collapse);
7391 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7392 fin_bb, first_zero_iter,
7393 dummy_bb, dummy, l2_dom_bb);
7394 t = NULL_TREE;
7396 else
7397 t = integer_one_node;
7399 step = fd->loop.step;
7400 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7401 OMP_CLAUSE__LOOPTEMP_);
7402 gcc_assert (innerc);
7403 n1 = OMP_CLAUSE_DECL (innerc);
7404 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7405 gcc_assert (innerc);
7406 n2 = OMP_CLAUSE_DECL (innerc);
7407 if (bias)
7409 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7410 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7412 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7413 true, NULL_TREE, true, GSI_SAME_STMT);
7414 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7415 true, NULL_TREE, true, GSI_SAME_STMT);
7416 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7417 true, NULL_TREE, true, GSI_SAME_STMT);
7419 tree startvar = fd->loop.v;
7420 tree endvar = NULL_TREE;
7422 if (gimple_omp_for_combined_p (fd->for_stmt))
7424 tree clauses = gimple_omp_for_clauses (inner_stmt);
7425 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7426 gcc_assert (innerc);
7427 startvar = OMP_CLAUSE_DECL (innerc);
7428 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7429 OMP_CLAUSE__LOOPTEMP_);
7430 gcc_assert (innerc);
7431 endvar = OMP_CLAUSE_DECL (innerc);
7433 t = fold_convert (TREE_TYPE (startvar), n1);
7434 t = force_gimple_operand_gsi (&gsi, t,
7435 DECL_P (startvar)
7436 && TREE_ADDRESSABLE (startvar),
7437 NULL_TREE, false, GSI_CONTINUE_LINKING);
7438 gimple *assign_stmt = gimple_build_assign (startvar, t);
7439 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7441 t = fold_convert (TREE_TYPE (startvar), n2);
7442 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7443 false, GSI_CONTINUE_LINKING);
7444 if (endvar)
7446 assign_stmt = gimple_build_assign (endvar, e);
7447 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7448 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7449 assign_stmt = gimple_build_assign (fd->loop.v, e);
7450 else
7451 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7452 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7455 tree *nonrect_bounds = NULL;
7456 if (fd->collapse > 1)
7458 if (fd->non_rect)
7460 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7461 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7463 gcc_assert (gsi_bb (gsi) == entry_bb);
7464 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7465 startvar);
7466 entry_bb = gsi_bb (gsi);
7469 if (!broken_loop)
7471 /* The code controlling the sequential loop replaces the
7472 GIMPLE_OMP_CONTINUE. */
7473 gsi = gsi_last_nondebug_bb (cont_bb);
7474 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7475 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7476 vmain = gimple_omp_continue_control_use (cont_stmt);
7477 vback = gimple_omp_continue_control_def (cont_stmt);
7479 if (!gimple_omp_for_combined_p (fd->for_stmt))
7481 if (POINTER_TYPE_P (type))
7482 t = fold_build_pointer_plus (vmain, step);
7483 else
7484 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7485 t = force_gimple_operand_gsi (&gsi, t,
7486 DECL_P (vback)
7487 && TREE_ADDRESSABLE (vback),
7488 NULL_TREE, true, GSI_SAME_STMT);
7489 assign_stmt = gimple_build_assign (vback, t);
7490 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7492 t = build2 (fd->loop.cond_code, boolean_type_node,
7493 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7494 ? t : vback, e);
7495 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7498 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7499 gsi_remove (&gsi, true);
7501 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7502 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7503 cont_bb, body_bb);
7506 /* Remove the GIMPLE_OMP_FOR statement. */
7507 gsi = gsi_for_stmt (fd->for_stmt);
7508 gsi_remove (&gsi, true);
7510 /* Remove the GIMPLE_OMP_RETURN statement. */
7511 gsi = gsi_last_nondebug_bb (exit_bb);
7512 gsi_remove (&gsi, true);
7514 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7515 if (!broken_loop)
7516 remove_edge (BRANCH_EDGE (entry_bb));
7517 else
7519 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7520 region->outer->cont = NULL;
7523 /* Connect all the blocks. */
7524 if (!broken_loop)
7526 ep = find_edge (cont_bb, body_bb);
7527 if (gimple_omp_for_combined_p (fd->for_stmt))
7529 remove_edge (ep);
7530 ep = NULL;
7532 else if (fd->collapse > 1)
7534 remove_edge (ep);
7535 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7537 else
7538 ep->flags = EDGE_TRUE_VALUE;
7539 find_edge (cont_bb, fin_bb)->flags
7540 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7543 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7544 recompute_dominator (CDI_DOMINATORS, body_bb));
7545 if (!broken_loop)
7546 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7547 recompute_dominator (CDI_DOMINATORS, fin_bb));
7549 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7551 class loop *loop = alloc_loop ();
7552 loop->header = body_bb;
7553 if (collapse_bb == NULL)
7554 loop->latch = cont_bb;
7555 add_loop (loop, body_bb->loop_father);
7559 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7560 partitioned loop. The lowering here is abstracted, in that the
7561 loop parameters are passed through internal functions, which are
7562 further lowered by oacc_device_lower, once we get to the target
7563 compiler. The loop is of the form:
7565 for (V = B; V LTGT E; V += S) {BODY}
7567 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7568 (constant 0 for no chunking) and we will have a GWV partitioning
7569 mask, specifying dimensions over which the loop is to be
7570 partitioned (see note below). We generate code that looks like
7571 (this ignores tiling):
7573 <entry_bb> [incoming FALL->body, BRANCH->exit]
7574 typedef signedintify (typeof (V)) T; // underlying signed integral type
7575 T range = E - B;
7576 T chunk_no = 0;
7577 T DIR = LTGT == '<' ? +1 : -1;
7578 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7579 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7581 <head_bb> [created by splitting end of entry_bb]
7582 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7583 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7584 if (!(offset LTGT bound)) goto bottom_bb;
7586 <body_bb> [incoming]
7587 V = B + offset;
7588 {BODY}
7590 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7591 offset += step;
7592 if (offset LTGT bound) goto body_bb; [*]
7594 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7595 chunk_no++;
7596 if (chunk < chunk_max) goto head_bb;
7598 <exit_bb> [incoming]
7599 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7601 [*] Needed if V live at end of loop. */
7603 static void
7604 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7606 bool is_oacc_kernels_parallelized
7607 = (lookup_attribute ("oacc kernels parallelized",
7608 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7610 bool is_oacc_kernels
7611 = (lookup_attribute ("oacc kernels",
7612 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7613 if (is_oacc_kernels_parallelized)
7614 gcc_checking_assert (is_oacc_kernels);
7616 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7617 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7618 for SSA specifics, and some are for 'parloops' OpenACC
7619 'kernels'-parallelized specifics. */
7621 tree v = fd->loop.v;
7622 enum tree_code cond_code = fd->loop.cond_code;
7623 enum tree_code plus_code = PLUS_EXPR;
7625 tree chunk_size = integer_minus_one_node;
7626 tree gwv = integer_zero_node;
7627 tree iter_type = TREE_TYPE (v);
7628 tree diff_type = iter_type;
7629 tree plus_type = iter_type;
7630 struct oacc_collapse *counts = NULL;
7632 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7633 == GF_OMP_FOR_KIND_OACC_LOOP);
7634 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7635 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7637 if (POINTER_TYPE_P (iter_type))
7639 plus_code = POINTER_PLUS_EXPR;
7640 plus_type = sizetype;
7642 for (int ix = fd->collapse; ix--;)
7644 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7645 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7646 diff_type = diff_type2;
7648 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7649 diff_type = signed_type_for (diff_type);
7650 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7651 diff_type = integer_type_node;
7653 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7654 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7655 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7656 basic_block bottom_bb = NULL;
7658 /* entry_bb has two successors; the branch edge is to the exit
7659 block, fallthrough edge to body. */
7660 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7661 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7663 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7664 body_bb, or to a block whose only successor is the body_bb. Its
7665 fallthrough successor is the final block (same as the branch
7666 successor of the entry_bb). */
7667 if (cont_bb)
7669 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7670 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7672 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7673 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7675 else
7676 gcc_assert (!gimple_in_ssa_p (cfun));
7678 /* The exit block only has entry_bb and cont_bb as predecessors. */
7679 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7681 tree chunk_no;
7682 tree chunk_max = NULL_TREE;
7683 tree bound, offset;
7684 tree step = create_tmp_var (diff_type, ".step");
7685 bool up = cond_code == LT_EXPR;
7686 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7687 bool chunking = !gimple_in_ssa_p (cfun);
7688 bool negating;
7690 /* Tiling vars. */
7691 tree tile_size = NULL_TREE;
7692 tree element_s = NULL_TREE;
7693 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7694 basic_block elem_body_bb = NULL;
7695 basic_block elem_cont_bb = NULL;
7697 /* SSA instances. */
7698 tree offset_incr = NULL_TREE;
7699 tree offset_init = NULL_TREE;
7701 gimple_stmt_iterator gsi;
7702 gassign *ass;
7703 gcall *call;
7704 gimple *stmt;
7705 tree expr;
7706 location_t loc;
7707 edge split, be, fte;
7709 /* Split the end of entry_bb to create head_bb. */
7710 split = split_block (entry_bb, last_stmt (entry_bb));
7711 basic_block head_bb = split->dest;
7712 entry_bb = split->src;
7714 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7715 gsi = gsi_last_nondebug_bb (entry_bb);
7716 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7717 loc = gimple_location (for_stmt);
7719 if (gimple_in_ssa_p (cfun))
7721 offset_init = gimple_omp_for_index (for_stmt, 0);
7722 gcc_assert (integer_zerop (fd->loop.n1));
7723 /* The SSA parallelizer does gang parallelism. */
7724 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7727 if (fd->collapse > 1 || fd->tiling)
7729 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7730 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7731 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7732 TREE_TYPE (fd->loop.n2), loc);
7734 if (SSA_VAR_P (fd->loop.n2))
7736 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7737 true, GSI_SAME_STMT);
7738 ass = gimple_build_assign (fd->loop.n2, total);
7739 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7743 tree b = fd->loop.n1;
7744 tree e = fd->loop.n2;
7745 tree s = fd->loop.step;
7747 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7748 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7750 /* Convert the step, avoiding possible unsigned->signed overflow. */
7751 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7752 if (negating)
7753 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7754 s = fold_convert (diff_type, s);
7755 if (negating)
7756 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7757 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7759 if (!chunking)
7760 chunk_size = integer_zero_node;
7761 expr = fold_convert (diff_type, chunk_size);
7762 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7763 NULL_TREE, true, GSI_SAME_STMT);
7765 if (fd->tiling)
7767 /* Determine the tile size and element step,
7768 modify the outer loop step size. */
7769 tile_size = create_tmp_var (diff_type, ".tile_size");
7770 expr = build_int_cst (diff_type, 1);
7771 for (int ix = 0; ix < fd->collapse; ix++)
7772 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7773 expr = force_gimple_operand_gsi (&gsi, expr, true,
7774 NULL_TREE, true, GSI_SAME_STMT);
7775 ass = gimple_build_assign (tile_size, expr);
7776 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7778 element_s = create_tmp_var (diff_type, ".element_s");
7779 ass = gimple_build_assign (element_s, s);
7780 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7782 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7783 s = force_gimple_operand_gsi (&gsi, expr, true,
7784 NULL_TREE, true, GSI_SAME_STMT);
7787 /* Determine the range, avoiding possible unsigned->signed overflow. */
7788 negating = !up && TYPE_UNSIGNED (iter_type);
7789 expr = fold_build2 (MINUS_EXPR, plus_type,
7790 fold_convert (plus_type, negating ? b : e),
7791 fold_convert (plus_type, negating ? e : b));
7792 expr = fold_convert (diff_type, expr);
7793 if (negating)
7794 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7795 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7796 NULL_TREE, true, GSI_SAME_STMT);
7798 chunk_no = build_int_cst (diff_type, 0);
7799 if (chunking)
7801 gcc_assert (!gimple_in_ssa_p (cfun));
7803 expr = chunk_no;
7804 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7805 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7807 ass = gimple_build_assign (chunk_no, expr);
7808 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7810 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7811 build_int_cst (integer_type_node,
7812 IFN_GOACC_LOOP_CHUNKS),
7813 dir, range, s, chunk_size, gwv);
7814 gimple_call_set_lhs (call, chunk_max);
7815 gimple_set_location (call, loc);
7816 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7818 else
7819 chunk_size = chunk_no;
7821 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7822 build_int_cst (integer_type_node,
7823 IFN_GOACC_LOOP_STEP),
7824 dir, range, s, chunk_size, gwv);
7825 gimple_call_set_lhs (call, step);
7826 gimple_set_location (call, loc);
7827 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7829 /* Remove the GIMPLE_OMP_FOR. */
7830 gsi_remove (&gsi, true);
7832 /* Fixup edges from head_bb. */
7833 be = BRANCH_EDGE (head_bb);
7834 fte = FALLTHRU_EDGE (head_bb);
7835 be->flags |= EDGE_FALSE_VALUE;
7836 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7838 basic_block body_bb = fte->dest;
7840 if (gimple_in_ssa_p (cfun))
7842 gsi = gsi_last_nondebug_bb (cont_bb);
7843 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7845 offset = gimple_omp_continue_control_use (cont_stmt);
7846 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7848 else
7850 offset = create_tmp_var (diff_type, ".offset");
7851 offset_init = offset_incr = offset;
7853 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7855 /* Loop offset & bound go into head_bb. */
7856 gsi = gsi_start_bb (head_bb);
7858 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7859 build_int_cst (integer_type_node,
7860 IFN_GOACC_LOOP_OFFSET),
7861 dir, range, s,
7862 chunk_size, gwv, chunk_no);
7863 gimple_call_set_lhs (call, offset_init);
7864 gimple_set_location (call, loc);
7865 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7867 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7868 build_int_cst (integer_type_node,
7869 IFN_GOACC_LOOP_BOUND),
7870 dir, range, s,
7871 chunk_size, gwv, offset_init);
7872 gimple_call_set_lhs (call, bound);
7873 gimple_set_location (call, loc);
7874 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7876 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7877 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7878 GSI_CONTINUE_LINKING);
7880 /* V assignment goes into body_bb. */
7881 if (!gimple_in_ssa_p (cfun))
7883 gsi = gsi_start_bb (body_bb);
7885 expr = build2 (plus_code, iter_type, b,
7886 fold_convert (plus_type, offset));
7887 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7888 true, GSI_SAME_STMT);
7889 ass = gimple_build_assign (v, expr);
7890 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7892 if (fd->collapse > 1 || fd->tiling)
7893 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7895 if (fd->tiling)
7897 /* Determine the range of the element loop -- usually simply
7898 the tile_size, but could be smaller if the final
7899 iteration of the outer loop is a partial tile. */
7900 tree e_range = create_tmp_var (diff_type, ".e_range");
7902 expr = build2 (MIN_EXPR, diff_type,
7903 build2 (MINUS_EXPR, diff_type, bound, offset),
7904 build2 (MULT_EXPR, diff_type, tile_size,
7905 element_s));
7906 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7907 true, GSI_SAME_STMT);
7908 ass = gimple_build_assign (e_range, expr);
7909 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7911 /* Determine bound, offset & step of inner loop. */
7912 e_bound = create_tmp_var (diff_type, ".e_bound");
7913 e_offset = create_tmp_var (diff_type, ".e_offset");
7914 e_step = create_tmp_var (diff_type, ".e_step");
7916 /* Mark these as element loops. */
7917 tree t, e_gwv = integer_minus_one_node;
7918 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7920 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7921 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7922 element_s, chunk, e_gwv, chunk);
7923 gimple_call_set_lhs (call, e_offset);
7924 gimple_set_location (call, loc);
7925 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7927 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7928 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7929 element_s, chunk, e_gwv, e_offset);
7930 gimple_call_set_lhs (call, e_bound);
7931 gimple_set_location (call, loc);
7932 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7934 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7935 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7936 element_s, chunk, e_gwv);
7937 gimple_call_set_lhs (call, e_step);
7938 gimple_set_location (call, loc);
7939 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7941 /* Add test and split block. */
7942 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7943 stmt = gimple_build_cond_empty (expr);
7944 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7945 split = split_block (body_bb, stmt);
7946 elem_body_bb = split->dest;
7947 if (cont_bb == body_bb)
7948 cont_bb = elem_body_bb;
7949 body_bb = split->src;
7951 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7953 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7954 if (cont_bb == NULL)
7956 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7957 e->probability = profile_probability::even ();
7958 split->probability = profile_probability::even ();
7961 /* Initialize the user's loop vars. */
7962 gsi = gsi_start_bb (elem_body_bb);
7963 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7964 diff_type);
7968 /* Loop increment goes into cont_bb. If this is not a loop, we
7969 will have spawned threads as if it was, and each one will
7970 execute one iteration. The specification is not explicit about
7971 whether such constructs are ill-formed or not, and they can
7972 occur, especially when noreturn routines are involved. */
7973 if (cont_bb)
7975 gsi = gsi_last_nondebug_bb (cont_bb);
7976 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7977 loc = gimple_location (cont_stmt);
7979 if (fd->tiling)
7981 /* Insert element loop increment and test. */
7982 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7983 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7984 true, GSI_SAME_STMT);
7985 ass = gimple_build_assign (e_offset, expr);
7986 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7987 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7989 stmt = gimple_build_cond_empty (expr);
7990 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7991 split = split_block (cont_bb, stmt);
7992 elem_cont_bb = split->src;
7993 cont_bb = split->dest;
7995 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7996 split->probability = profile_probability::unlikely ().guessed ();
7997 edge latch_edge
7998 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7999 latch_edge->probability = profile_probability::likely ().guessed ();
8001 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
8002 skip_edge->probability = profile_probability::unlikely ().guessed ();
8003 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
8004 loop_entry_edge->probability
8005 = profile_probability::likely ().guessed ();
8007 gsi = gsi_for_stmt (cont_stmt);
8010 /* Increment offset. */
8011 if (gimple_in_ssa_p (cfun))
8012 expr = build2 (plus_code, iter_type, offset,
8013 fold_convert (plus_type, step));
8014 else
8015 expr = build2 (PLUS_EXPR, diff_type, offset, step);
8016 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8017 true, GSI_SAME_STMT);
8018 ass = gimple_build_assign (offset_incr, expr);
8019 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8020 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
8021 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
8023 /* Remove the GIMPLE_OMP_CONTINUE. */
8024 gsi_remove (&gsi, true);
8026 /* Fixup edges from cont_bb. */
8027 be = BRANCH_EDGE (cont_bb);
8028 fte = FALLTHRU_EDGE (cont_bb);
8029 be->flags |= EDGE_TRUE_VALUE;
8030 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8032 if (chunking)
8034 /* Split the beginning of exit_bb to make bottom_bb. We
8035 need to insert a nop at the start, because splitting is
8036 after a stmt, not before. */
8037 gsi = gsi_start_bb (exit_bb);
8038 stmt = gimple_build_nop ();
8039 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8040 split = split_block (exit_bb, stmt);
8041 bottom_bb = split->src;
8042 exit_bb = split->dest;
8043 gsi = gsi_last_bb (bottom_bb);
8045 /* Chunk increment and test goes into bottom_bb. */
8046 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
8047 build_int_cst (diff_type, 1));
8048 ass = gimple_build_assign (chunk_no, expr);
8049 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
8051 /* Chunk test at end of bottom_bb. */
8052 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
8053 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
8054 GSI_CONTINUE_LINKING);
8056 /* Fixup edges from bottom_bb. */
8057 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8058 split->probability = profile_probability::unlikely ().guessed ();
8059 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
8060 latch_edge->probability = profile_probability::likely ().guessed ();
8064 gsi = gsi_last_nondebug_bb (exit_bb);
8065 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8066 loc = gimple_location (gsi_stmt (gsi));
8068 if (!gimple_in_ssa_p (cfun))
8070 /* Insert the final value of V, in case it is live. This is the
8071 value for the only thread that survives past the join. */
8072 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
8073 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
8074 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
8075 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
8076 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
8077 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8078 true, GSI_SAME_STMT);
8079 ass = gimple_build_assign (v, expr);
8080 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8083 /* Remove the OMP_RETURN. */
8084 gsi_remove (&gsi, true);
8086 if (cont_bb)
8088 /* We now have one, two or three nested loops. Update the loop
8089 structures. */
8090 class loop *parent = entry_bb->loop_father;
8091 class loop *body = body_bb->loop_father;
8093 if (chunking)
8095 class loop *chunk_loop = alloc_loop ();
8096 chunk_loop->header = head_bb;
8097 chunk_loop->latch = bottom_bb;
8098 add_loop (chunk_loop, parent);
8099 parent = chunk_loop;
8101 else if (parent != body)
8103 gcc_assert (body->header == body_bb);
8104 gcc_assert (body->latch == cont_bb
8105 || single_pred (body->latch) == cont_bb);
8106 parent = NULL;
8109 if (parent)
8111 class loop *body_loop = alloc_loop ();
8112 body_loop->header = body_bb;
8113 body_loop->latch = cont_bb;
8114 add_loop (body_loop, parent);
8116 if (fd->tiling)
8118 /* Insert tiling's element loop. */
8119 class loop *inner_loop = alloc_loop ();
8120 inner_loop->header = elem_body_bb;
8121 inner_loop->latch = elem_cont_bb;
8122 add_loop (inner_loop, body_loop);
8128 /* Expand the OMP loop defined by REGION. */
8130 static void
8131 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8133 struct omp_for_data fd;
8134 struct omp_for_data_loop *loops;
8136 loops = XALLOCAVEC (struct omp_for_data_loop,
8137 gimple_omp_for_collapse (last_stmt (region->entry)));
8138 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8139 &fd, loops);
8140 region->sched_kind = fd.sched_kind;
8141 region->sched_modifiers = fd.sched_modifiers;
8142 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8143 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8145 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8146 if ((loops[i].m1 || loops[i].m2)
8147 && (loops[i].m1 == NULL_TREE
8148 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8149 && (loops[i].m2 == NULL_TREE
8150 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8151 && TREE_CODE (loops[i].step) == INTEGER_CST
8152 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8154 tree t;
8155 tree itype = TREE_TYPE (loops[i].v);
8156 if (loops[i].m1 && loops[i].m2)
8157 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8158 else if (loops[i].m1)
8159 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8160 else
8161 t = loops[i].m2;
8162 t = fold_build2 (MULT_EXPR, itype, t,
8163 fold_convert (itype,
8164 loops[i - loops[i].outer].step));
8165 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8166 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8167 fold_build1 (NEGATE_EXPR, itype, t),
8168 fold_build1 (NEGATE_EXPR, itype,
8169 fold_convert (itype,
8170 loops[i].step)));
8171 else
8172 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8173 fold_convert (itype, loops[i].step));
8174 if (integer_nonzerop (t))
8175 error_at (gimple_location (fd.for_stmt),
8176 "invalid OpenMP non-rectangular loop step; "
8177 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8178 "step %qE",
8179 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8180 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8181 loops[i - loops[i].outer].step, i + 1,
8182 loops[i].step);
8186 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8187 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8188 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8189 if (region->cont)
8191 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8192 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8193 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8195 else
8196 /* If there isn't a continue then this is a degerate case where
8197 the introduction of abnormal edges during lowering will prevent
8198 original loops from being detected. Fix that up. */
8199 loops_state_set (LOOPS_NEED_FIXUP);
8201 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8202 expand_omp_simd (region, &fd);
8203 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8205 gcc_assert (!inner_stmt && !fd.non_rect);
8206 expand_oacc_for (region, &fd);
8208 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8210 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8211 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8212 else
8213 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8215 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8216 && !fd.have_ordered)
8218 if (fd.chunk_size == NULL)
8219 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8220 else
8221 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8223 else
8225 int fn_index, start_ix, next_ix;
8226 unsigned HOST_WIDE_INT sched = 0;
8227 tree sched_arg = NULL_TREE;
8229 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8230 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8231 if (fd.chunk_size == NULL
8232 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8233 fd.chunk_size = integer_zero_node;
8234 switch (fd.sched_kind)
8236 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8237 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8238 && fd.lastprivate_conditional == 0)
8240 gcc_assert (!fd.have_ordered);
8241 fn_index = 6;
8242 sched = 4;
8244 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8245 && !fd.have_ordered
8246 && fd.lastprivate_conditional == 0)
8247 fn_index = 7;
8248 else
8250 fn_index = 3;
8251 sched = (HOST_WIDE_INT_1U << 31);
8253 break;
8254 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8255 case OMP_CLAUSE_SCHEDULE_GUIDED:
8256 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8257 && !fd.have_ordered
8258 && fd.lastprivate_conditional == 0)
8260 fn_index = 3 + fd.sched_kind;
8261 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8262 break;
8264 fn_index = fd.sched_kind;
8265 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8266 sched += (HOST_WIDE_INT_1U << 31);
8267 break;
8268 case OMP_CLAUSE_SCHEDULE_STATIC:
8269 gcc_assert (fd.have_ordered);
8270 fn_index = 0;
8271 sched = (HOST_WIDE_INT_1U << 31) + 1;
8272 break;
8273 default:
8274 gcc_unreachable ();
8276 if (!fd.ordered)
8277 fn_index += fd.have_ordered * 8;
8278 if (fd.ordered)
8279 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8280 else
8281 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8282 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8283 if (fd.have_reductemp || fd.have_pointer_condtemp)
8285 if (fd.ordered)
8286 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8287 else if (fd.have_ordered)
8288 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8289 else
8290 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8291 sched_arg = build_int_cstu (long_integer_type_node, sched);
8292 if (!fd.chunk_size)
8293 fd.chunk_size = integer_zero_node;
8295 if (fd.iter_type == long_long_unsigned_type_node)
8297 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8298 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8299 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8300 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8302 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8303 (enum built_in_function) next_ix, sched_arg,
8304 inner_stmt);
8308 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8310 v = GOMP_sections_start (n);
8312 switch (v)
8314 case 0:
8315 goto L2;
8316 case 1:
8317 section 1;
8318 goto L1;
8319 case 2:
8321 case n:
8323 default:
8324 abort ();
8327 v = GOMP_sections_next ();
8328 goto L0;
8330 reduction;
8332 If this is a combined parallel sections, replace the call to
8333 GOMP_sections_start with call to GOMP_sections_next. */
8335 static void
8336 expand_omp_sections (struct omp_region *region)
8338 tree t, u, vin = NULL, vmain, vnext, l2;
8339 unsigned len;
8340 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8341 gimple_stmt_iterator si, switch_si;
8342 gomp_sections *sections_stmt;
8343 gimple *stmt;
8344 gomp_continue *cont;
8345 edge_iterator ei;
8346 edge e;
8347 struct omp_region *inner;
8348 unsigned i, casei;
8349 bool exit_reachable = region->cont != NULL;
8351 gcc_assert (region->exit != NULL);
8352 entry_bb = region->entry;
8353 l0_bb = single_succ (entry_bb);
8354 l1_bb = region->cont;
8355 l2_bb = region->exit;
8356 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8357 l2 = gimple_block_label (l2_bb);
8358 else
8360 /* This can happen if there are reductions. */
8361 len = EDGE_COUNT (l0_bb->succs);
8362 gcc_assert (len > 0);
8363 e = EDGE_SUCC (l0_bb, len - 1);
8364 si = gsi_last_nondebug_bb (e->dest);
8365 l2 = NULL_TREE;
8366 if (gsi_end_p (si)
8367 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8368 l2 = gimple_block_label (e->dest);
8369 else
8370 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8372 si = gsi_last_nondebug_bb (e->dest);
8373 if (gsi_end_p (si)
8374 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8376 l2 = gimple_block_label (e->dest);
8377 break;
8381 if (exit_reachable)
8382 default_bb = create_empty_bb (l1_bb->prev_bb);
8383 else
8384 default_bb = create_empty_bb (l0_bb);
8386 /* We will build a switch() with enough cases for all the
8387 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8388 and a default case to abort if something goes wrong. */
8389 len = EDGE_COUNT (l0_bb->succs);
8391 /* Use vec::quick_push on label_vec throughout, since we know the size
8392 in advance. */
8393 auto_vec<tree> label_vec (len);
8395 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8396 GIMPLE_OMP_SECTIONS statement. */
8397 si = gsi_last_nondebug_bb (entry_bb);
8398 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8399 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8400 vin = gimple_omp_sections_control (sections_stmt);
8401 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8402 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8403 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8404 tree cond_var = NULL_TREE;
8405 if (reductmp || condtmp)
8407 tree reductions = null_pointer_node, mem = null_pointer_node;
8408 tree memv = NULL_TREE, condtemp = NULL_TREE;
8409 gimple_stmt_iterator gsi = gsi_none ();
8410 gimple *g = NULL;
8411 if (reductmp)
8413 reductions = OMP_CLAUSE_DECL (reductmp);
8414 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8415 g = SSA_NAME_DEF_STMT (reductions);
8416 reductions = gimple_assign_rhs1 (g);
8417 OMP_CLAUSE_DECL (reductmp) = reductions;
8418 gsi = gsi_for_stmt (g);
8420 else
8421 gsi = si;
8422 if (condtmp)
8424 condtemp = OMP_CLAUSE_DECL (condtmp);
8425 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8426 OMP_CLAUSE__CONDTEMP_);
8427 cond_var = OMP_CLAUSE_DECL (c);
8428 tree type = TREE_TYPE (condtemp);
8429 memv = create_tmp_var (type);
8430 TREE_ADDRESSABLE (memv) = 1;
8431 unsigned cnt = 0;
8432 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8433 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8434 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8435 ++cnt;
8436 unsigned HOST_WIDE_INT sz
8437 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8438 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8439 false);
8440 mem = build_fold_addr_expr (memv);
8442 t = build_int_cst (unsigned_type_node, len - 1);
8443 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8444 stmt = gimple_build_call (u, 3, t, reductions, mem);
8445 gimple_call_set_lhs (stmt, vin);
8446 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8447 if (condtmp)
8449 expand_omp_build_assign (&gsi, condtemp, memv, false);
8450 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8451 vin, build_one_cst (TREE_TYPE (cond_var)));
8452 expand_omp_build_assign (&gsi, cond_var, t, false);
8454 if (reductmp)
8456 gsi_remove (&gsi, true);
8457 release_ssa_name (gimple_assign_lhs (g));
8460 else if (!is_combined_parallel (region))
8462 /* If we are not inside a combined parallel+sections region,
8463 call GOMP_sections_start. */
8464 t = build_int_cst (unsigned_type_node, len - 1);
8465 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8466 stmt = gimple_build_call (u, 1, t);
8468 else
8470 /* Otherwise, call GOMP_sections_next. */
8471 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8472 stmt = gimple_build_call (u, 0);
8474 if (!reductmp && !condtmp)
8476 gimple_call_set_lhs (stmt, vin);
8477 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8479 gsi_remove (&si, true);
8481 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8482 L0_BB. */
8483 switch_si = gsi_last_nondebug_bb (l0_bb);
8484 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8485 if (exit_reachable)
8487 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8488 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8489 vmain = gimple_omp_continue_control_use (cont);
8490 vnext = gimple_omp_continue_control_def (cont);
8492 else
8494 vmain = vin;
8495 vnext = NULL_TREE;
8498 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8499 label_vec.quick_push (t);
8500 i = 1;
8502 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8503 for (inner = region->inner, casei = 1;
8504 inner;
8505 inner = inner->next, i++, casei++)
8507 basic_block s_entry_bb, s_exit_bb;
8509 /* Skip optional reduction region. */
8510 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8512 --i;
8513 --casei;
8514 continue;
8517 s_entry_bb = inner->entry;
8518 s_exit_bb = inner->exit;
8520 t = gimple_block_label (s_entry_bb);
8521 u = build_int_cst (unsigned_type_node, casei);
8522 u = build_case_label (u, NULL, t);
8523 label_vec.quick_push (u);
8525 si = gsi_last_nondebug_bb (s_entry_bb);
8526 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8527 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8528 gsi_remove (&si, true);
8529 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8531 if (s_exit_bb == NULL)
8532 continue;
8534 si = gsi_last_nondebug_bb (s_exit_bb);
8535 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8536 gsi_remove (&si, true);
8538 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8541 /* Error handling code goes in DEFAULT_BB. */
8542 t = gimple_block_label (default_bb);
8543 u = build_case_label (NULL, NULL, t);
8544 make_edge (l0_bb, default_bb, 0);
8545 add_bb_to_loop (default_bb, current_loops->tree_root);
8547 stmt = gimple_build_switch (vmain, u, label_vec);
8548 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8549 gsi_remove (&switch_si, true);
8551 si = gsi_start_bb (default_bb);
8552 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8553 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8555 if (exit_reachable)
8557 tree bfn_decl;
8559 /* Code to get the next section goes in L1_BB. */
8560 si = gsi_last_nondebug_bb (l1_bb);
8561 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8563 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8564 stmt = gimple_build_call (bfn_decl, 0);
8565 gimple_call_set_lhs (stmt, vnext);
8566 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8567 if (cond_var)
8569 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8570 vnext, build_one_cst (TREE_TYPE (cond_var)));
8571 expand_omp_build_assign (&si, cond_var, t, false);
8573 gsi_remove (&si, true);
8575 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8578 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8579 si = gsi_last_nondebug_bb (l2_bb);
8580 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8581 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8582 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8583 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8584 else
8585 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8586 stmt = gimple_build_call (t, 0);
8587 if (gimple_omp_return_lhs (gsi_stmt (si)))
8588 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8589 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8590 gsi_remove (&si, true);
8592 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8595 /* Expand code for an OpenMP single or scope directive. We've already expanded
8596 much of the code, here we simply place the GOMP_barrier call. */
8598 static void
8599 expand_omp_single (struct omp_region *region)
8601 basic_block entry_bb, exit_bb;
8602 gimple_stmt_iterator si;
8604 entry_bb = region->entry;
8605 exit_bb = region->exit;
8607 si = gsi_last_nondebug_bb (entry_bb);
8608 enum gimple_code code = gimple_code (gsi_stmt (si));
8609 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8610 gsi_remove (&si, true);
8611 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8613 if (exit_bb == NULL)
8615 gcc_assert (code == GIMPLE_OMP_SCOPE);
8616 return;
8619 si = gsi_last_nondebug_bb (exit_bb);
8620 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8622 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8623 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8625 gsi_remove (&si, true);
8626 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8629 /* Generic expansion for OpenMP synchronization directives: master,
8630 ordered and critical. All we need to do here is remove the entry
8631 and exit markers for REGION. */
8633 static void
8634 expand_omp_synch (struct omp_region *region)
8636 basic_block entry_bb, exit_bb;
8637 gimple_stmt_iterator si;
8639 entry_bb = region->entry;
8640 exit_bb = region->exit;
8642 si = gsi_last_nondebug_bb (entry_bb);
8643 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8644 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8645 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8646 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8647 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8648 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8649 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8650 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8651 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8653 expand_omp_taskreg (region);
8654 return;
8656 gsi_remove (&si, true);
8657 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8659 if (exit_bb)
8661 si = gsi_last_nondebug_bb (exit_bb);
8662 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8663 gsi_remove (&si, true);
8664 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8668 /* Translate enum omp_memory_order to enum memmodel for the embedded
8669 fail clause in there. */
8671 static enum memmodel
8672 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8674 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8676 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8677 switch (mo & OMP_MEMORY_ORDER_MASK)
8679 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8680 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8681 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8682 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8683 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8684 default: break;
8686 gcc_unreachable ();
8687 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8688 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8689 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8690 default: gcc_unreachable ();
8694 /* Translate enum omp_memory_order to enum memmodel. The two enums
8695 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8696 is 0 and omp_memory_order has the fail mode encoded in it too. */
8698 static enum memmodel
8699 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8701 enum memmodel ret, fail_ret;
8702 switch (mo & OMP_MEMORY_ORDER_MASK)
8704 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8705 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8706 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8707 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8708 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8709 default: gcc_unreachable ();
8711 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8712 we can just return ret here unconditionally. Otherwise, work around
8713 it here and make sure fail memmodel is not stronger. */
8714 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8715 return ret;
8716 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8717 if (fail_ret > ret)
8718 return fail_ret;
8719 return ret;
8722 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8723 operation as a normal volatile load. */
8725 static bool
8726 expand_omp_atomic_load (basic_block load_bb, tree addr,
8727 tree loaded_val, int index)
8729 enum built_in_function tmpbase;
8730 gimple_stmt_iterator gsi;
8731 basic_block store_bb;
8732 location_t loc;
8733 gimple *stmt;
8734 tree decl, type, itype;
8736 gsi = gsi_last_nondebug_bb (load_bb);
8737 stmt = gsi_stmt (gsi);
8738 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8739 loc = gimple_location (stmt);
8741 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8742 is smaller than word size, then expand_atomic_load assumes that the load
8743 is atomic. We could avoid the builtin entirely in this case. */
8745 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8746 decl = builtin_decl_explicit (tmpbase);
8747 if (decl == NULL_TREE)
8748 return false;
8750 type = TREE_TYPE (loaded_val);
8751 itype = TREE_TYPE (TREE_TYPE (decl));
8753 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8754 tree mo = build_int_cst (integer_type_node,
8755 omp_memory_order_to_memmodel (omo));
8756 gcall *call = gimple_build_call (decl, 2, addr, mo);
8757 gimple_set_location (call, loc);
8758 gimple_set_vuse (call, gimple_vuse (stmt));
8759 gimple *repl;
8760 if (!useless_type_conversion_p (type, itype))
8762 tree lhs = make_ssa_name (itype);
8763 gimple_call_set_lhs (call, lhs);
8764 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8765 repl = gimple_build_assign (loaded_val,
8766 build1 (VIEW_CONVERT_EXPR, type, lhs));
8767 gimple_set_location (repl, loc);
8769 else
8771 gimple_call_set_lhs (call, loaded_val);
8772 repl = call;
8774 gsi_replace (&gsi, repl, true);
8776 store_bb = single_succ (load_bb);
8777 gsi = gsi_last_nondebug_bb (store_bb);
8778 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8779 gsi_remove (&gsi, true);
8781 return true;
8784 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8785 operation as a normal volatile store. */
8787 static bool
8788 expand_omp_atomic_store (basic_block load_bb, tree addr,
8789 tree loaded_val, tree stored_val, int index)
8791 enum built_in_function tmpbase;
8792 gimple_stmt_iterator gsi;
8793 basic_block store_bb = single_succ (load_bb);
8794 location_t loc;
8795 gimple *stmt;
8796 tree decl, type, itype;
8797 machine_mode imode;
8798 bool exchange;
8800 gsi = gsi_last_nondebug_bb (load_bb);
8801 stmt = gsi_stmt (gsi);
8802 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8804 /* If the load value is needed, then this isn't a store but an exchange. */
8805 exchange = gimple_omp_atomic_need_value_p (stmt);
8807 gsi = gsi_last_nondebug_bb (store_bb);
8808 stmt = gsi_stmt (gsi);
8809 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8810 loc = gimple_location (stmt);
8812 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8813 is smaller than word size, then expand_atomic_store assumes that the store
8814 is atomic. We could avoid the builtin entirely in this case. */
8816 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8817 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8818 decl = builtin_decl_explicit (tmpbase);
8819 if (decl == NULL_TREE)
8820 return false;
8822 type = TREE_TYPE (stored_val);
8824 /* Dig out the type of the function's second argument. */
8825 itype = TREE_TYPE (decl);
8826 itype = TYPE_ARG_TYPES (itype);
8827 itype = TREE_CHAIN (itype);
8828 itype = TREE_VALUE (itype);
8829 imode = TYPE_MODE (itype);
8831 if (exchange && !can_atomic_exchange_p (imode, true))
8832 return false;
8834 if (!useless_type_conversion_p (itype, type))
8835 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8836 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8837 tree mo = build_int_cst (integer_type_node,
8838 omp_memory_order_to_memmodel (omo));
8839 stored_val = force_gimple_operand_gsi (&gsi, stored_val, true, NULL_TREE,
8840 true, GSI_SAME_STMT);
8841 gcall *call = gimple_build_call (decl, 3, addr, stored_val, mo);
8842 gimple_set_location (call, loc);
8843 gimple_set_vuse (call, gimple_vuse (stmt));
8844 gimple_set_vdef (call, gimple_vdef (stmt));
8846 gimple *repl = call;
8847 if (exchange)
8849 if (!useless_type_conversion_p (type, itype))
8851 tree lhs = make_ssa_name (itype);
8852 gimple_call_set_lhs (call, lhs);
8853 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8854 repl = gimple_build_assign (loaded_val,
8855 build1 (VIEW_CONVERT_EXPR, type, lhs));
8856 gimple_set_location (repl, loc);
8858 else
8859 gimple_call_set_lhs (call, loaded_val);
8861 gsi_replace (&gsi, repl, true);
8863 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8864 gsi = gsi_last_nondebug_bb (load_bb);
8865 gsi_remove (&gsi, true);
8867 return true;
8870 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8871 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8872 size of the data type, and thus usable to find the index of the builtin
8873 decl. Returns false if the expression is not of the proper form. */
8875 static bool
8876 expand_omp_atomic_fetch_op (basic_block load_bb,
8877 tree addr, tree loaded_val,
8878 tree stored_val, int index)
8880 enum built_in_function oldbase, newbase, tmpbase;
8881 tree decl, itype, call;
8882 tree lhs, rhs;
8883 basic_block store_bb = single_succ (load_bb);
8884 gimple_stmt_iterator gsi;
8885 gimple *stmt;
8886 location_t loc;
8887 enum tree_code code;
8888 bool need_old, need_new;
8889 machine_mode imode;
8891 /* We expect to find the following sequences:
8893 load_bb:
8894 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8896 store_bb:
8897 val = tmp OP something; (or: something OP tmp)
8898 GIMPLE_OMP_STORE (val)
8900 ???FIXME: Allow a more flexible sequence.
8901 Perhaps use data flow to pick the statements.
8905 gsi = gsi_after_labels (store_bb);
8906 stmt = gsi_stmt (gsi);
8907 if (is_gimple_debug (stmt))
8909 gsi_next_nondebug (&gsi);
8910 if (gsi_end_p (gsi))
8911 return false;
8912 stmt = gsi_stmt (gsi);
8914 loc = gimple_location (stmt);
8915 if (!is_gimple_assign (stmt))
8916 return false;
8917 gsi_next_nondebug (&gsi);
8918 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8919 return false;
8920 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8921 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8922 enum omp_memory_order omo
8923 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8924 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8925 gcc_checking_assert (!need_old || !need_new);
8927 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8928 return false;
8930 /* Check for one of the supported fetch-op operations. */
8931 code = gimple_assign_rhs_code (stmt);
8932 switch (code)
8934 case PLUS_EXPR:
8935 case POINTER_PLUS_EXPR:
8936 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8937 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8938 break;
8939 case MINUS_EXPR:
8940 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8941 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8942 break;
8943 case BIT_AND_EXPR:
8944 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8945 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8946 break;
8947 case BIT_IOR_EXPR:
8948 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8949 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8950 break;
8951 case BIT_XOR_EXPR:
8952 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8953 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8954 break;
8955 default:
8956 return false;
8959 /* Make sure the expression is of the proper form. */
8960 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8961 rhs = gimple_assign_rhs2 (stmt);
8962 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8963 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8964 rhs = gimple_assign_rhs1 (stmt);
8965 else
8966 return false;
8968 tmpbase = ((enum built_in_function)
8969 ((need_new ? newbase : oldbase) + index + 1));
8970 decl = builtin_decl_explicit (tmpbase);
8971 if (decl == NULL_TREE)
8972 return false;
8973 itype = TREE_TYPE (TREE_TYPE (decl));
8974 imode = TYPE_MODE (itype);
8976 /* We could test all of the various optabs involved, but the fact of the
8977 matter is that (with the exception of i486 vs i586 and xadd) all targets
8978 that support any atomic operaton optab also implements compare-and-swap.
8979 Let optabs.cc take care of expanding any compare-and-swap loop. */
8980 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8981 return false;
8983 gsi = gsi_last_nondebug_bb (load_bb);
8984 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8986 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8987 It only requires that the operation happen atomically. Thus we can
8988 use the RELAXED memory model. */
8989 call = build_call_expr_loc (loc, decl, 3, addr,
8990 fold_convert_loc (loc, itype, rhs),
8991 build_int_cst (NULL, mo));
8993 if (need_old || need_new)
8995 lhs = need_old ? loaded_val : stored_val;
8996 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8997 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8999 else
9000 call = fold_convert_loc (loc, void_type_node, call);
9001 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
9002 gsi_remove (&gsi, true);
9004 gsi = gsi_last_nondebug_bb (store_bb);
9005 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
9006 gsi_remove (&gsi, true);
9007 gsi = gsi_last_nondebug_bb (store_bb);
9008 stmt = gsi_stmt (gsi);
9009 gsi_remove (&gsi, true);
9011 if (gimple_in_ssa_p (cfun))
9012 release_defs (stmt);
9014 return true;
9017 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
9018 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
9019 Returns false if the expression is not of the proper form. */
9021 static bool
9022 expand_omp_atomic_cas (basic_block load_bb, tree addr,
9023 tree loaded_val, tree stored_val, int index)
9025 /* We expect to find the following sequences:
9027 load_bb:
9028 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
9030 store_bb:
9031 val = tmp == e ? d : tmp;
9032 GIMPLE_OMP_ATOMIC_STORE (val)
9034 or in store_bb instead:
9035 tmp2 = tmp == e;
9036 val = tmp2 ? d : tmp;
9037 GIMPLE_OMP_ATOMIC_STORE (val)
9040 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
9041 val = e == tmp3 ? d : tmp;
9042 GIMPLE_OMP_ATOMIC_STORE (val)
9044 etc. */
9047 basic_block store_bb = single_succ (load_bb);
9048 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
9049 gimple *store_stmt = gsi_stmt (gsi);
9050 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
9051 return false;
9052 gsi_prev_nondebug (&gsi);
9053 if (gsi_end_p (gsi))
9054 return false;
9055 gimple *condexpr_stmt = gsi_stmt (gsi);
9056 if (!is_gimple_assign (condexpr_stmt)
9057 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
9058 return false;
9059 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
9060 return false;
9061 gimple *cond_stmt = NULL;
9062 gimple *vce_stmt = NULL;
9063 gsi_prev_nondebug (&gsi);
9064 if (!gsi_end_p (gsi))
9066 cond_stmt = gsi_stmt (gsi);
9067 if (!is_gimple_assign (cond_stmt))
9068 return false;
9069 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
9071 gsi_prev_nondebug (&gsi);
9072 if (!gsi_end_p (gsi))
9074 vce_stmt = gsi_stmt (gsi);
9075 if (!is_gimple_assign (vce_stmt)
9076 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
9077 return false;
9080 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
9081 std::swap (vce_stmt, cond_stmt);
9082 else
9083 return false;
9084 if (vce_stmt)
9086 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
9087 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
9088 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
9089 return false;
9090 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
9091 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
9092 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
9093 TYPE_SIZE (TREE_TYPE (loaded_val))))
9094 return false;
9095 gsi_prev_nondebug (&gsi);
9096 if (!gsi_end_p (gsi))
9097 return false;
9100 tree cond = gimple_assign_rhs1 (condexpr_stmt);
9101 tree cond_op1, cond_op2;
9102 if (cond_stmt)
9104 /* We should now always get a separate cond_stmt. */
9105 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9106 return false;
9107 cond_op1 = gimple_assign_rhs1 (cond_stmt);
9108 cond_op2 = gimple_assign_rhs2 (cond_stmt);
9110 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9111 return false;
9112 else
9114 cond_op1 = TREE_OPERAND (cond, 0);
9115 cond_op2 = TREE_OPERAND (cond, 1);
9117 tree d;
9118 if (TREE_CODE (cond) == NE_EXPR)
9120 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9121 return false;
9122 d = gimple_assign_rhs3 (condexpr_stmt);
9124 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9125 return false;
9126 else
9127 d = gimple_assign_rhs2 (condexpr_stmt);
9128 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9129 if (operand_equal_p (e, cond_op1))
9130 e = cond_op2;
9131 else if (operand_equal_p (e, cond_op2))
9132 e = cond_op1;
9133 else
9134 return false;
9136 location_t loc = gimple_location (store_stmt);
9137 gimple *load_stmt = last_stmt (load_bb);
9138 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9139 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9140 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9141 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9142 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9143 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9144 gcc_checking_assert (!need_old || !need_new);
9146 enum built_in_function fncode
9147 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9148 + index + 1);
9149 tree cmpxchg = builtin_decl_explicit (fncode);
9150 if (cmpxchg == NULL_TREE)
9151 return false;
9152 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9154 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9155 || !can_atomic_load_p (TYPE_MODE (itype)))
9156 return false;
9158 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9159 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9160 return false;
9162 gsi = gsi_for_stmt (store_stmt);
9163 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9165 tree ne = create_tmp_reg (itype);
9166 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9167 gimple_set_location (g, loc);
9168 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9169 e = ne;
9171 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9173 tree nd = create_tmp_reg (itype);
9174 enum tree_code code;
9175 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9177 code = VIEW_CONVERT_EXPR;
9178 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9180 else
9181 code = NOP_EXPR;
9182 gimple *g = gimple_build_assign (nd, code, d);
9183 gimple_set_location (g, loc);
9184 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9185 d = nd;
9188 tree ctype = build_complex_type (itype);
9189 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9190 gimple *g
9191 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9192 build_int_cst (integer_type_node, flag),
9193 mo, fmo);
9194 tree cres = create_tmp_reg (ctype);
9195 gimple_call_set_lhs (g, cres);
9196 gimple_set_location (g, loc);
9197 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9199 if (cond_stmt || need_old || need_new)
9201 tree im = create_tmp_reg (itype);
9202 g = gimple_build_assign (im, IMAGPART_EXPR,
9203 build1 (IMAGPART_EXPR, itype, cres));
9204 gimple_set_location (g, loc);
9205 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9207 tree re = NULL_TREE;
9208 if (need_old || need_new)
9210 re = create_tmp_reg (itype);
9211 g = gimple_build_assign (re, REALPART_EXPR,
9212 build1 (REALPART_EXPR, itype, cres));
9213 gimple_set_location (g, loc);
9214 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9217 if (cond_stmt)
9219 g = gimple_build_assign (cond, NOP_EXPR, im);
9220 gimple_set_location (g, loc);
9221 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9224 if (need_new)
9226 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9227 cond_stmt
9228 ? cond : build2 (NE_EXPR, boolean_type_node,
9229 im, build_zero_cst (itype)),
9230 d, re);
9231 gimple_set_location (g, loc);
9232 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9233 re = gimple_assign_lhs (g);
9236 if (need_old || need_new)
9238 tree v = need_old ? loaded_val : stored_val;
9239 enum tree_code code;
9240 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9242 code = VIEW_CONVERT_EXPR;
9243 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9245 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9246 code = NOP_EXPR;
9247 else
9248 code = TREE_CODE (re);
9249 g = gimple_build_assign (v, code, re);
9250 gimple_set_location (g, loc);
9251 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9255 gsi_remove (&gsi, true);
9256 gsi = gsi_for_stmt (load_stmt);
9257 gsi_remove (&gsi, true);
9258 gsi = gsi_for_stmt (condexpr_stmt);
9259 gsi_remove (&gsi, true);
9260 if (cond_stmt)
9262 gsi = gsi_for_stmt (cond_stmt);
9263 gsi_remove (&gsi, true);
9265 if (vce_stmt)
9267 gsi = gsi_for_stmt (vce_stmt);
9268 gsi_remove (&gsi, true);
9271 return true;
9274 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9276 oldval = *addr;
9277 repeat:
9278 newval = rhs; // with oldval replacing *addr in rhs
9279 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9280 if (oldval != newval)
9281 goto repeat;
9283 INDEX is log2 of the size of the data type, and thus usable to find the
9284 index of the builtin decl. */
9286 static bool
9287 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9288 tree addr, tree loaded_val, tree stored_val,
9289 int index)
9291 tree loadedi, storedi, initial, new_storedi, old_vali;
9292 tree type, itype, cmpxchg, iaddr, atype;
9293 gimple_stmt_iterator si;
9294 basic_block loop_header = single_succ (load_bb);
9295 gimple *phi, *stmt;
9296 edge e;
9297 enum built_in_function fncode;
9299 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9300 + index + 1);
9301 cmpxchg = builtin_decl_explicit (fncode);
9302 if (cmpxchg == NULL_TREE)
9303 return false;
9304 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9305 atype = type;
9306 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9308 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9309 || !can_atomic_load_p (TYPE_MODE (itype)))
9310 return false;
9312 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9313 si = gsi_last_nondebug_bb (load_bb);
9314 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9315 location_t loc = gimple_location (gsi_stmt (si));
9316 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9317 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9318 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9320 /* For floating-point values, we'll need to view-convert them to integers
9321 so that we can perform the atomic compare and swap. Simplify the
9322 following code by always setting up the "i"ntegral variables. */
9323 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9325 tree iaddr_val;
9327 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9328 true));
9329 atype = itype;
9330 iaddr_val
9331 = force_gimple_operand_gsi (&si,
9332 fold_convert (TREE_TYPE (iaddr), addr),
9333 false, NULL_TREE, true, GSI_SAME_STMT);
9334 stmt = gimple_build_assign (iaddr, iaddr_val);
9335 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9336 loadedi = create_tmp_var (itype);
9337 if (gimple_in_ssa_p (cfun))
9338 loadedi = make_ssa_name (loadedi);
9340 else
9342 iaddr = addr;
9343 loadedi = loaded_val;
9346 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9347 tree loaddecl = builtin_decl_explicit (fncode);
9348 if (loaddecl)
9349 initial
9350 = fold_convert (atype,
9351 build_call_expr (loaddecl, 2, iaddr,
9352 build_int_cst (NULL_TREE,
9353 MEMMODEL_RELAXED)));
9354 else
9356 tree off
9357 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9358 true), 0);
9359 initial = build2 (MEM_REF, atype, iaddr, off);
9362 initial
9363 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9364 GSI_SAME_STMT);
9366 /* Move the value to the LOADEDI temporary. */
9367 if (gimple_in_ssa_p (cfun))
9369 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9370 phi = create_phi_node (loadedi, loop_header);
9371 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9372 initial);
9374 else
9375 gsi_insert_before (&si,
9376 gimple_build_assign (loadedi, initial),
9377 GSI_SAME_STMT);
9378 if (loadedi != loaded_val)
9380 gimple_stmt_iterator gsi2;
9381 tree x;
9383 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9384 gsi2 = gsi_start_bb (loop_header);
9385 if (gimple_in_ssa_p (cfun))
9387 gassign *stmt;
9388 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9389 true, GSI_SAME_STMT);
9390 stmt = gimple_build_assign (loaded_val, x);
9391 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9393 else
9395 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9396 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9397 true, GSI_SAME_STMT);
9400 gsi_remove (&si, true);
9402 si = gsi_last_nondebug_bb (store_bb);
9403 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9405 if (iaddr == addr)
9406 storedi = stored_val;
9407 else
9408 storedi
9409 = force_gimple_operand_gsi (&si,
9410 build1 (VIEW_CONVERT_EXPR, itype,
9411 stored_val), true, NULL_TREE, true,
9412 GSI_SAME_STMT);
9414 /* Build the compare&swap statement. */
9415 tree ctype = build_complex_type (itype);
9416 int flag = int_size_in_bytes (itype);
9417 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9418 ctype, 6, iaddr, loadedi,
9419 storedi,
9420 build_int_cst (integer_type_node,
9421 flag),
9422 mo, fmo);
9423 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9424 new_storedi = force_gimple_operand_gsi (&si,
9425 fold_convert (TREE_TYPE (loadedi),
9426 new_storedi),
9427 true, NULL_TREE,
9428 true, GSI_SAME_STMT);
9430 if (gimple_in_ssa_p (cfun))
9431 old_vali = loadedi;
9432 else
9434 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9435 stmt = gimple_build_assign (old_vali, loadedi);
9436 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9438 stmt = gimple_build_assign (loadedi, new_storedi);
9439 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9442 /* Note that we always perform the comparison as an integer, even for
9443 floating point. This allows the atomic operation to properly
9444 succeed even with NaNs and -0.0. */
9445 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9446 stmt = gimple_build_cond_empty (ne);
9447 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9449 /* Update cfg. */
9450 e = single_succ_edge (store_bb);
9451 e->flags &= ~EDGE_FALLTHRU;
9452 e->flags |= EDGE_FALSE_VALUE;
9453 /* Expect no looping. */
9454 e->probability = profile_probability::guessed_always ();
9456 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9457 e->probability = profile_probability::guessed_never ();
9459 /* Copy the new value to loadedi (we already did that before the condition
9460 if we are not in SSA). */
9461 if (gimple_in_ssa_p (cfun))
9463 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9464 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9467 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9468 stmt = gsi_stmt (si);
9469 gsi_remove (&si, true);
9470 if (gimple_in_ssa_p (cfun))
9471 release_defs (stmt);
9473 class loop *loop = alloc_loop ();
9474 loop->header = loop_header;
9475 loop->latch = store_bb;
9476 add_loop (loop, loop_header->loop_father);
9478 return true;
9481 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9483 GOMP_atomic_start ();
9484 *addr = rhs;
9485 GOMP_atomic_end ();
9487 The result is not globally atomic, but works so long as all parallel
9488 references are within #pragma omp atomic directives. According to
9489 responses received from omp@openmp.org, appears to be within spec.
9490 Which makes sense, since that's how several other compilers handle
9491 this situation as well.
9492 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9493 expanding. STORED_VAL is the operand of the matching
9494 GIMPLE_OMP_ATOMIC_STORE.
9496 We replace
9497 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9498 loaded_val = *addr;
9500 and replace
9501 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9502 *addr = stored_val;
9505 static bool
9506 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9507 tree addr, tree loaded_val, tree stored_val)
9509 gimple_stmt_iterator si;
9510 gassign *stmt;
9511 tree t;
9513 si = gsi_last_nondebug_bb (load_bb);
9514 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9516 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9517 t = build_call_expr (t, 0);
9518 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9520 tree mem = build_simple_mem_ref (addr);
9521 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9522 TREE_OPERAND (mem, 1)
9523 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9524 true),
9525 TREE_OPERAND (mem, 1));
9526 stmt = gimple_build_assign (loaded_val, mem);
9527 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9528 gsi_remove (&si, true);
9530 si = gsi_last_nondebug_bb (store_bb);
9531 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9533 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9534 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (si)));
9535 gimple_set_vdef (stmt, gimple_vdef (gsi_stmt (si)));
9536 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9538 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9539 t = build_call_expr (t, 0);
9540 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9541 gsi_remove (&si, true);
9542 return true;
9545 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9546 using expand_omp_atomic_fetch_op. If it failed, we try to
9547 call expand_omp_atomic_pipeline, and if it fails too, the
9548 ultimate fallback is wrapping the operation in a mutex
9549 (expand_omp_atomic_mutex). REGION is the atomic region built
9550 by build_omp_regions_1(). */
9552 static void
9553 expand_omp_atomic (struct omp_region *region)
9555 basic_block load_bb = region->entry, store_bb = region->exit;
9556 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9557 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9558 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9559 tree addr = gimple_omp_atomic_load_rhs (load);
9560 tree stored_val = gimple_omp_atomic_store_val (store);
9561 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9562 HOST_WIDE_INT index;
9564 /* Make sure the type is one of the supported sizes. */
9565 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9566 index = exact_log2 (index);
9567 if (index >= 0 && index <= 4)
9569 unsigned int align = TYPE_ALIGN_UNIT (type);
9571 /* __sync builtins require strict data alignment. */
9572 if (exact_log2 (align) >= index)
9574 /* Atomic load. */
9575 scalar_mode smode;
9576 if (loaded_val == stored_val
9577 && (is_int_mode (TYPE_MODE (type), &smode)
9578 || is_float_mode (TYPE_MODE (type), &smode))
9579 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9580 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9581 return;
9583 /* Atomic store. */
9584 if ((is_int_mode (TYPE_MODE (type), &smode)
9585 || is_float_mode (TYPE_MODE (type), &smode))
9586 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9587 && store_bb == single_succ (load_bb)
9588 && first_stmt (store_bb) == store
9589 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9590 stored_val, index))
9591 return;
9593 /* When possible, use specialized atomic update functions. */
9594 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9595 && store_bb == single_succ (load_bb)
9596 && expand_omp_atomic_fetch_op (load_bb, addr,
9597 loaded_val, stored_val, index))
9598 return;
9600 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9601 if (store_bb == single_succ (load_bb)
9602 && !gimple_in_ssa_p (cfun)
9603 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9604 index))
9605 return;
9607 /* If we don't have specialized __sync builtins, try and implement
9608 as a compare and swap loop. */
9609 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9610 loaded_val, stored_val, index))
9611 return;
9615 /* The ultimate fallback is wrapping the operation in a mutex. */
9616 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9619 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9620 at REGION_EXIT. */
9622 static void
9623 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9624 basic_block region_exit)
9626 class loop *outer = region_entry->loop_father;
9627 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9629 /* Don't parallelize the kernels region if it contains more than one outer
9630 loop. */
9631 unsigned int nr_outer_loops = 0;
9632 class loop *single_outer = NULL;
9633 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9635 gcc_assert (loop_outer (loop) == outer);
9637 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9638 continue;
9640 if (region_exit != NULL
9641 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9642 continue;
9644 nr_outer_loops++;
9645 single_outer = loop;
9647 if (nr_outer_loops != 1)
9648 return;
9650 for (class loop *loop = single_outer->inner;
9651 loop != NULL;
9652 loop = loop->inner)
9653 if (loop->next)
9654 return;
9656 /* Mark the loops in the region. */
9657 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9658 loop->in_oacc_kernels_region = true;
9661 /* Build target argument identifier from the DEVICE identifier, value
9662 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9664 static tree
9665 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9667 tree t = build_int_cst (integer_type_node, device);
9668 if (subseqent_param)
9669 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9670 build_int_cst (integer_type_node,
9671 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9672 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9673 build_int_cst (integer_type_node, id));
9674 return t;
9677 /* Like above but return it in type that can be directly stored as an element
9678 of the argument array. */
9680 static tree
9681 get_target_argument_identifier (int device, bool subseqent_param, int id)
9683 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9684 return fold_convert (ptr_type_node, t);
9687 /* Return a target argument consisting of DEVICE identifier, value identifier
9688 ID, and the actual VALUE. */
9690 static tree
9691 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9692 tree value)
9694 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9695 fold_convert (integer_type_node, value),
9696 build_int_cst (unsigned_type_node,
9697 GOMP_TARGET_ARG_VALUE_SHIFT));
9698 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9699 get_target_argument_identifier_1 (device, false, id));
9700 t = fold_convert (ptr_type_node, t);
9701 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9704 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9705 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9706 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9707 arguments. */
9709 static void
9710 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9711 int id, tree value, vec <tree> *args)
9713 if (tree_fits_shwi_p (value)
9714 && tree_to_shwi (value) > -(1 << 15)
9715 && tree_to_shwi (value) < (1 << 15))
9716 args->quick_push (get_target_argument_value (gsi, device, id, value));
9717 else
9719 args->quick_push (get_target_argument_identifier (device, true, id));
9720 value = fold_convert (ptr_type_node, value);
9721 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9722 GSI_SAME_STMT);
9723 args->quick_push (value);
9727 /* Create an array of arguments that is then passed to GOMP_target. */
9729 static tree
9730 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9732 auto_vec <tree, 6> args;
9733 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9734 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9735 if (c)
9736 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9737 else
9738 t = integer_minus_one_node;
9739 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9740 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9742 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9743 if (c)
9744 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9745 else
9746 t = integer_minus_one_node;
9747 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9748 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9749 &args);
9751 /* Produce more, perhaps device specific, arguments here. */
9753 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9754 args.length () + 1),
9755 ".omp_target_args");
9756 for (unsigned i = 0; i < args.length (); i++)
9758 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9759 build_int_cst (integer_type_node, i),
9760 NULL_TREE, NULL_TREE);
9761 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9762 GSI_SAME_STMT);
9764 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9765 build_int_cst (integer_type_node, args.length ()),
9766 NULL_TREE, NULL_TREE);
9767 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9768 GSI_SAME_STMT);
9769 TREE_ADDRESSABLE (argarray) = 1;
9770 return build_fold_addr_expr (argarray);
9773 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9775 static void
9776 expand_omp_target (struct omp_region *region)
9778 basic_block entry_bb, exit_bb, new_bb;
9779 struct function *child_cfun;
9780 tree child_fn, child_fn2, block, t, c;
9781 gimple_stmt_iterator gsi;
9782 gomp_target *entry_stmt;
9783 gimple *stmt;
9784 edge e;
9785 bool offloaded;
9786 int target_kind;
9788 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9789 target_kind = gimple_omp_target_kind (entry_stmt);
9790 new_bb = region->entry;
9792 offloaded = is_gimple_omp_offloaded (entry_stmt);
9793 switch (target_kind)
9795 case GF_OMP_TARGET_KIND_REGION:
9796 case GF_OMP_TARGET_KIND_UPDATE:
9797 case GF_OMP_TARGET_KIND_ENTER_DATA:
9798 case GF_OMP_TARGET_KIND_EXIT_DATA:
9799 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9800 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9801 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9802 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9803 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9804 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9805 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9806 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9807 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9808 case GF_OMP_TARGET_KIND_DATA:
9809 case GF_OMP_TARGET_KIND_OACC_DATA:
9810 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9811 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9812 break;
9813 default:
9814 gcc_unreachable ();
9817 tree clauses = gimple_omp_target_clauses (entry_stmt);
9819 bool is_ancestor = false;
9820 child_fn = child_fn2 = NULL_TREE;
9821 child_cfun = NULL;
9822 if (offloaded)
9824 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9825 if (ENABLE_OFFLOADING && c)
9826 is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
9827 child_fn = gimple_omp_target_child_fn (entry_stmt);
9828 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9831 /* Supported by expand_omp_taskreg, but not here. */
9832 if (child_cfun != NULL)
9833 gcc_checking_assert (!child_cfun->cfg);
9834 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9836 entry_bb = region->entry;
9837 exit_bb = region->exit;
9839 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9840 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9842 /* Going on, all OpenACC compute constructs are mapped to
9843 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9844 To distinguish between them, we attach attributes. */
9845 switch (target_kind)
9847 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9848 DECL_ATTRIBUTES (child_fn)
9849 = tree_cons (get_identifier ("oacc parallel"),
9850 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9851 break;
9852 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9853 DECL_ATTRIBUTES (child_fn)
9854 = tree_cons (get_identifier ("oacc kernels"),
9855 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9856 break;
9857 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9858 DECL_ATTRIBUTES (child_fn)
9859 = tree_cons (get_identifier ("oacc serial"),
9860 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9861 break;
9862 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9863 DECL_ATTRIBUTES (child_fn)
9864 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9865 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9866 break;
9867 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9868 DECL_ATTRIBUTES (child_fn)
9869 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9870 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9871 break;
9872 default:
9873 /* Make sure we don't miss any. */
9874 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9875 && is_gimple_omp_offloaded (entry_stmt)));
9876 break;
9879 if (offloaded)
9881 unsigned srcidx, dstidx, num;
9883 /* If the offloading region needs data sent from the parent
9884 function, then the very first statement (except possible
9885 tree profile counter updates) of the offloading body
9886 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9887 &.OMP_DATA_O is passed as an argument to the child function,
9888 we need to replace it with the argument as seen by the child
9889 function.
9891 In most cases, this will end up being the identity assignment
9892 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9893 a function call that has been inlined, the original PARM_DECL
9894 .OMP_DATA_I may have been converted into a different local
9895 variable. In which case, we need to keep the assignment. */
9896 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9897 if (data_arg)
9899 basic_block entry_succ_bb = single_succ (entry_bb);
9900 gimple_stmt_iterator gsi;
9901 tree arg;
9902 gimple *tgtcopy_stmt = NULL;
9903 tree sender = TREE_VEC_ELT (data_arg, 0);
9905 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9907 gcc_assert (!gsi_end_p (gsi));
9908 stmt = gsi_stmt (gsi);
9909 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9910 continue;
9912 if (gimple_num_ops (stmt) == 2)
9914 tree arg = gimple_assign_rhs1 (stmt);
9916 /* We're ignoring the subcode because we're
9917 effectively doing a STRIP_NOPS. */
9919 if (TREE_CODE (arg) == ADDR_EXPR
9920 && TREE_OPERAND (arg, 0) == sender)
9922 tgtcopy_stmt = stmt;
9923 break;
9928 gcc_assert (tgtcopy_stmt != NULL);
9929 arg = DECL_ARGUMENTS (child_fn);
9931 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9932 gsi_remove (&gsi, true);
9935 /* Declare local variables needed in CHILD_CFUN. */
9936 block = DECL_INITIAL (child_fn);
9937 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9938 /* The gimplifier could record temporaries in the offloading block
9939 rather than in containing function's local_decls chain,
9940 which would mean cgraph missed finalizing them. Do it now. */
9941 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9942 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9943 varpool_node::finalize_decl (t);
9944 DECL_SAVED_TREE (child_fn) = NULL;
9945 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9946 gimple_set_body (child_fn, NULL);
9947 TREE_USED (block) = 1;
9949 /* Reset DECL_CONTEXT on function arguments. */
9950 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9951 DECL_CONTEXT (t) = child_fn;
9953 /* Split ENTRY_BB at GIMPLE_*,
9954 so that it can be moved to the child function. */
9955 gsi = gsi_last_nondebug_bb (entry_bb);
9956 stmt = gsi_stmt (gsi);
9957 gcc_assert (stmt
9958 && gimple_code (stmt) == gimple_code (entry_stmt));
9959 e = split_block (entry_bb, stmt);
9960 gsi_remove (&gsi, true);
9961 entry_bb = e->dest;
9962 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9964 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9965 if (exit_bb)
9967 gsi = gsi_last_nondebug_bb (exit_bb);
9968 gcc_assert (!gsi_end_p (gsi)
9969 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9970 stmt = gimple_build_return (NULL);
9971 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9972 gsi_remove (&gsi, true);
9975 /* Move the offloading region into CHILD_CFUN. */
9977 block = gimple_block (entry_stmt);
9979 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9980 if (exit_bb)
9981 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9982 /* When the OMP expansion process cannot guarantee an up-to-date
9983 loop tree arrange for the child function to fixup loops. */
9984 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9985 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9987 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9988 num = vec_safe_length (child_cfun->local_decls);
9989 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9991 t = (*child_cfun->local_decls)[srcidx];
9992 if (DECL_CONTEXT (t) == cfun->decl)
9993 continue;
9994 if (srcidx != dstidx)
9995 (*child_cfun->local_decls)[dstidx] = t;
9996 dstidx++;
9998 if (dstidx != num)
9999 vec_safe_truncate (child_cfun->local_decls, dstidx);
10001 /* Inform the callgraph about the new function. */
10002 child_cfun->curr_properties = cfun->curr_properties;
10003 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
10004 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
10005 cgraph_node *node = cgraph_node::get_create (child_fn);
10006 node->parallelized_function = 1;
10007 cgraph_node::add_new_function (child_fn, true);
10009 /* Add the new function to the offload table. */
10010 if (ENABLE_OFFLOADING)
10012 if (in_lto_p)
10013 DECL_PRESERVE_P (child_fn) = 1;
10014 if (!is_ancestor)
10015 vec_safe_push (offload_funcs, child_fn);
10018 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
10019 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
10021 /* Fix the callgraph edges for child_cfun. Those for cfun will be
10022 fixed in a following pass. */
10023 push_cfun (child_cfun);
10024 if (need_asm)
10025 assign_assembler_name_if_needed (child_fn);
10026 cgraph_edge::rebuild_edges ();
10028 /* Some EH regions might become dead, see PR34608. If
10029 pass_cleanup_cfg isn't the first pass to happen with the
10030 new child, these dead EH edges might cause problems.
10031 Clean them up now. */
10032 if (flag_exceptions)
10034 basic_block bb;
10035 bool changed = false;
10037 FOR_EACH_BB_FN (bb, cfun)
10038 changed |= gimple_purge_dead_eh_edges (bb);
10039 if (changed)
10040 cleanup_tree_cfg ();
10042 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10043 verify_loop_structure ();
10044 pop_cfun ();
10046 if (dump_file && !gimple_in_ssa_p (cfun))
10048 omp_any_child_fn_dumped = true;
10049 dump_function_header (dump_file, child_fn, dump_flags);
10050 dump_function_to_file (child_fn, dump_file, dump_flags);
10053 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
10055 /* Handle the case that an inner ancestor:1 target is called by an outer
10056 target region. */
10057 if (is_ancestor)
10059 cgraph_node *fn2_node;
10060 child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
10061 FUNCTION_DECL,
10062 clone_function_name (child_fn, "nohost"),
10063 TREE_TYPE (child_fn));
10064 if (in_lto_p)
10065 DECL_PRESERVE_P (child_fn2) = 1;
10066 TREE_STATIC (child_fn2) = 1;
10067 DECL_ARTIFICIAL (child_fn2) = 1;
10068 DECL_IGNORED_P (child_fn2) = 0;
10069 TREE_PUBLIC (child_fn2) = 0;
10070 DECL_UNINLINABLE (child_fn2) = 1;
10071 DECL_EXTERNAL (child_fn2) = 0;
10072 DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn);
10073 DECL_INITIAL (child_fn2) = make_node (BLOCK);
10074 BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
10075 DECL_ATTRIBUTES (child_fn)
10076 = remove_attribute ("omp target entrypoint",
10077 DECL_ATTRIBUTES (child_fn));
10078 DECL_ATTRIBUTES (child_fn2)
10079 = tree_cons (get_identifier ("omp target device_ancestor_nohost"),
10080 NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
10081 DECL_ATTRIBUTES (child_fn)
10082 = tree_cons (get_identifier ("omp target device_ancestor_host"),
10083 NULL_TREE, DECL_ATTRIBUTES (child_fn));
10084 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
10085 = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
10086 DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
10087 = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
10088 DECL_FUNCTION_VERSIONED (child_fn2)
10089 = DECL_FUNCTION_VERSIONED (current_function_decl);
10091 fn2_node = cgraph_node::get_create (child_fn2);
10092 fn2_node->offloadable = 1;
10093 fn2_node->force_output = 1;
10094 node->offloadable = 0;
10096 /* Enable pass_omp_device_lower pass. */
10097 fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn));
10098 fn2_node->calls_declare_variant_alt = 1;
10100 t = build_decl (DECL_SOURCE_LOCATION (child_fn),
10101 RESULT_DECL, NULL_TREE, void_type_node);
10102 DECL_ARTIFICIAL (t) = 1;
10103 DECL_IGNORED_P (t) = 1;
10104 DECL_CONTEXT (t) = child_fn2;
10105 DECL_RESULT (child_fn2) = t;
10106 DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
10107 void_type_node, NULL);
10108 tree tmp = DECL_ARGUMENTS (child_fn);
10109 t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
10110 DECL_NAME (tmp), TREE_TYPE (tmp));
10111 DECL_ARTIFICIAL (t) = 1;
10112 DECL_NAMELESS (t) = 1;
10113 DECL_ARG_TYPE (t) = ptr_type_node;
10114 DECL_CONTEXT (t) = current_function_decl;
10115 TREE_USED (t) = 1;
10116 TREE_READONLY (t) = 1;
10117 DECL_ARGUMENTS (child_fn2) = t;
10118 gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
10120 gimplify_function_tree (child_fn2);
10121 cgraph_node::add_new_function (child_fn2, true);
10123 vec_safe_push (offload_funcs, child_fn2);
10124 if (dump_file && !gimple_in_ssa_p (cfun))
10126 dump_function_header (dump_file, child_fn2, dump_flags);
10127 dump_function_to_file (child_fn2, dump_file, dump_flags);
10132 /* Emit a library call to launch the offloading region, or do data
10133 transfers. */
10134 tree t1, t2, t3, t4, depend;
10135 enum built_in_function start_ix;
10136 unsigned int flags_i = 0;
10138 switch (gimple_omp_target_kind (entry_stmt))
10140 case GF_OMP_TARGET_KIND_REGION:
10141 start_ix = BUILT_IN_GOMP_TARGET;
10142 break;
10143 case GF_OMP_TARGET_KIND_DATA:
10144 start_ix = BUILT_IN_GOMP_TARGET_DATA;
10145 break;
10146 case GF_OMP_TARGET_KIND_UPDATE:
10147 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
10148 break;
10149 case GF_OMP_TARGET_KIND_ENTER_DATA:
10150 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10151 break;
10152 case GF_OMP_TARGET_KIND_EXIT_DATA:
10153 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10154 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
10155 break;
10156 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10157 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10158 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10159 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10160 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10161 start_ix = BUILT_IN_GOACC_PARALLEL;
10162 break;
10163 case GF_OMP_TARGET_KIND_OACC_DATA:
10164 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10165 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10166 start_ix = BUILT_IN_GOACC_DATA_START;
10167 break;
10168 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10169 start_ix = BUILT_IN_GOACC_UPDATE;
10170 break;
10171 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10172 start_ix = BUILT_IN_GOACC_ENTER_DATA;
10173 break;
10174 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10175 start_ix = BUILT_IN_GOACC_EXIT_DATA;
10176 break;
10177 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10178 start_ix = BUILT_IN_GOACC_DECLARE;
10179 break;
10180 default:
10181 gcc_unreachable ();
10184 tree device = NULL_TREE;
10185 location_t device_loc = UNKNOWN_LOCATION;
10186 tree goacc_flags = NULL_TREE;
10187 bool need_device_adjustment = false;
10188 gimple_stmt_iterator adj_gsi;
10189 if (is_gimple_omp_oacc (entry_stmt))
10191 /* By default, no GOACC_FLAGs are set. */
10192 goacc_flags = integer_zero_node;
10194 else
10196 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10197 if (c)
10199 device = OMP_CLAUSE_DEVICE_ID (c);
10200 /* Ensure 'device' is of the correct type. */
10201 device = fold_convert_loc (device_loc, integer_type_node, device);
10202 if (TREE_CODE (device) == INTEGER_CST)
10204 if (wi::to_wide (device) == GOMP_DEVICE_ICV)
10205 device = build_int_cst (integer_type_node,
10206 GOMP_DEVICE_HOST_FALLBACK);
10207 else if (wi::to_wide (device) == GOMP_DEVICE_HOST_FALLBACK)
10208 device = build_int_cst (integer_type_node,
10209 GOMP_DEVICE_HOST_FALLBACK - 1);
10211 else
10212 need_device_adjustment = true;
10213 device_loc = OMP_CLAUSE_LOCATION (c);
10214 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10215 device = build_int_cst (integer_type_node,
10216 GOMP_DEVICE_HOST_FALLBACK);
10218 else
10220 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10221 library choose). */
10222 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10223 device_loc = gimple_location (entry_stmt);
10226 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10227 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10228 nowait doesn't appear. */
10229 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10230 c = NULL;
10231 if (c)
10232 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10235 /* By default, there is no conditional. */
10236 tree cond = NULL_TREE;
10237 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10238 if (c)
10239 cond = OMP_CLAUSE_IF_EXPR (c);
10240 /* If we found the clause 'if (cond)', build:
10241 OpenACC: goacc_flags = (cond ? goacc_flags
10242 : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10243 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10244 if (cond)
10246 tree *tp;
10247 if (is_gimple_omp_oacc (entry_stmt))
10248 tp = &goacc_flags;
10249 else
10250 tp = &device;
10252 cond = gimple_boolify (cond);
10254 basic_block cond_bb, then_bb, else_bb;
10255 edge e;
10256 tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10257 if (offloaded)
10258 e = split_block_after_labels (new_bb);
10259 else
10261 gsi = gsi_last_nondebug_bb (new_bb);
10262 gsi_prev (&gsi);
10263 e = split_block (new_bb, gsi_stmt (gsi));
10265 cond_bb = e->src;
10266 new_bb = e->dest;
10267 remove_edge (e);
10269 then_bb = create_empty_bb (cond_bb);
10270 else_bb = create_empty_bb (then_bb);
10271 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10272 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10274 stmt = gimple_build_cond_empty (cond);
10275 gsi = gsi_last_bb (cond_bb);
10276 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10278 gsi = gsi_start_bb (then_bb);
10279 stmt = gimple_build_assign (tmp_var, *tp);
10280 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10281 adj_gsi = gsi;
10283 gsi = gsi_start_bb (else_bb);
10284 if (is_gimple_omp_oacc (entry_stmt))
10285 stmt = gimple_build_assign (tmp_var,
10286 BIT_IOR_EXPR,
10287 *tp,
10288 build_int_cst (integer_type_node,
10289 GOACC_FLAG_HOST_FALLBACK));
10290 else
10291 stmt = gimple_build_assign (tmp_var,
10292 build_int_cst (integer_type_node,
10293 GOMP_DEVICE_HOST_FALLBACK));
10294 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10296 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10297 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10298 add_bb_to_loop (then_bb, cond_bb->loop_father);
10299 add_bb_to_loop (else_bb, cond_bb->loop_father);
10300 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10301 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10303 *tp = tmp_var;
10305 gsi = gsi_last_nondebug_bb (new_bb);
10307 else
10309 gsi = gsi_last_nondebug_bb (new_bb);
10311 if (device != NULL_TREE)
10312 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10313 true, GSI_SAME_STMT);
10314 if (need_device_adjustment)
10316 tree tmp_var = create_tmp_var (TREE_TYPE (device));
10317 stmt = gimple_build_assign (tmp_var, device);
10318 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10319 adj_gsi = gsi_for_stmt (stmt);
10320 device = tmp_var;
10324 if (need_device_adjustment)
10326 tree uns = fold_convert (unsigned_type_node, device);
10327 uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10328 false, GSI_CONTINUE_LINKING);
10329 edge e = split_block (gsi_bb (adj_gsi), gsi_stmt (adj_gsi));
10330 basic_block cond_bb = e->src;
10331 basic_block else_bb = e->dest;
10332 if (gsi_bb (adj_gsi) == new_bb)
10334 new_bb = else_bb;
10335 gsi = gsi_last_nondebug_bb (new_bb);
10338 basic_block then_bb = create_empty_bb (cond_bb);
10339 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10341 cond = build2 (GT_EXPR, boolean_type_node, uns,
10342 build_int_cst (unsigned_type_node,
10343 GOMP_DEVICE_HOST_FALLBACK - 1));
10344 stmt = gimple_build_cond_empty (cond);
10345 adj_gsi = gsi_last_bb (cond_bb);
10346 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10348 adj_gsi = gsi_start_bb (then_bb);
10349 tree add = build2 (PLUS_EXPR, integer_type_node, device,
10350 build_int_cst (integer_type_node, -1));
10351 stmt = gimple_build_assign (device, add);
10352 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10354 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10355 e->flags = EDGE_FALSE_VALUE;
10356 add_bb_to_loop (then_bb, cond_bb->loop_father);
10357 make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10360 t = gimple_omp_target_data_arg (entry_stmt);
10361 if (t == NULL)
10363 t1 = size_zero_node;
10364 t2 = build_zero_cst (ptr_type_node);
10365 t3 = t2;
10366 t4 = t2;
10368 else
10370 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10371 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10372 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10373 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10374 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10377 gimple *g;
10378 bool tagging = false;
10379 /* The maximum number used by any start_ix, without varargs. */
10380 auto_vec<tree, 11> args;
10381 if (is_gimple_omp_oacc (entry_stmt))
10383 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10384 TREE_TYPE (goacc_flags), goacc_flags);
10385 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10386 NULL_TREE, true,
10387 GSI_SAME_STMT);
10388 args.quick_push (goacc_flags_m);
10390 else
10391 args.quick_push (device);
10392 if (offloaded)
10393 args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
10394 args.quick_push (t1);
10395 args.quick_push (t2);
10396 args.quick_push (t3);
10397 args.quick_push (t4);
10398 switch (start_ix)
10400 case BUILT_IN_GOACC_DATA_START:
10401 case BUILT_IN_GOACC_DECLARE:
10402 case BUILT_IN_GOMP_TARGET_DATA:
10403 break;
10404 case BUILT_IN_GOMP_TARGET:
10405 case BUILT_IN_GOMP_TARGET_UPDATE:
10406 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10407 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10408 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10409 if (c)
10410 depend = OMP_CLAUSE_DECL (c);
10411 else
10412 depend = build_int_cst (ptr_type_node, 0);
10413 args.quick_push (depend);
10414 if (start_ix == BUILT_IN_GOMP_TARGET)
10415 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10416 break;
10417 case BUILT_IN_GOACC_PARALLEL:
10418 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10420 tree dims = NULL_TREE;
10421 unsigned int ix;
10423 /* For serial constructs we set all dimensions to 1. */
10424 for (ix = GOMP_DIM_MAX; ix--;)
10425 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10426 oacc_replace_fn_attrib (child_fn, dims);
10428 else
10429 oacc_set_fn_attrib (child_fn, clauses, &args);
10430 tagging = true;
10431 /* FALLTHRU */
10432 case BUILT_IN_GOACC_ENTER_DATA:
10433 case BUILT_IN_GOACC_EXIT_DATA:
10434 case BUILT_IN_GOACC_UPDATE:
10436 tree t_async = NULL_TREE;
10438 /* If present, use the value specified by the respective
10439 clause, making sure that is of the correct type. */
10440 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10441 if (c)
10442 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10443 integer_type_node,
10444 OMP_CLAUSE_ASYNC_EXPR (c));
10445 else if (!tagging)
10446 /* Default values for t_async. */
10447 t_async = fold_convert_loc (gimple_location (entry_stmt),
10448 integer_type_node,
10449 build_int_cst (integer_type_node,
10450 GOMP_ASYNC_SYNC));
10451 if (tagging && t_async)
10453 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10455 if (TREE_CODE (t_async) == INTEGER_CST)
10457 /* See if we can pack the async arg in to the tag's
10458 operand. */
10459 i_async = TREE_INT_CST_LOW (t_async);
10460 if (i_async < GOMP_LAUNCH_OP_MAX)
10461 t_async = NULL_TREE;
10462 else
10463 i_async = GOMP_LAUNCH_OP_MAX;
10465 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10466 i_async));
10468 if (t_async)
10469 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10470 NULL_TREE, true,
10471 GSI_SAME_STMT));
10473 /* Save the argument index, and ... */
10474 unsigned t_wait_idx = args.length ();
10475 unsigned num_waits = 0;
10476 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10477 if (!tagging || c)
10478 /* ... push a placeholder. */
10479 args.safe_push (integer_zero_node);
10481 for (; c; c = OMP_CLAUSE_CHAIN (c))
10482 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10484 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10485 integer_type_node,
10486 OMP_CLAUSE_WAIT_EXPR (c));
10487 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10488 GSI_SAME_STMT);
10489 args.safe_push (arg);
10490 num_waits++;
10493 if (!tagging || num_waits)
10495 tree len;
10497 /* Now that we know the number, update the placeholder. */
10498 if (tagging)
10499 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10500 else
10501 len = build_int_cst (integer_type_node, num_waits);
10502 len = fold_convert_loc (gimple_location (entry_stmt),
10503 unsigned_type_node, len);
10504 args[t_wait_idx] = len;
10507 break;
10508 default:
10509 gcc_unreachable ();
10511 if (tagging)
10512 /* Push terminal marker - zero. */
10513 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10515 if (child_fn2)
10517 g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
10518 build_fold_addr_expr (child_fn));
10519 gimple_set_location (g, gimple_location (entry_stmt));
10520 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10523 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10524 gimple_set_location (g, gimple_location (entry_stmt));
10525 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10526 if (!offloaded)
10528 g = gsi_stmt (gsi);
10529 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10530 gsi_remove (&gsi, true);
10534 /* Expand the parallel region tree rooted at REGION. Expansion
10535 proceeds in depth-first order. Innermost regions are expanded
10536 first. This way, parallel regions that require a new function to
10537 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10538 internal dependencies in their body. */
10540 static void
10541 expand_omp (struct omp_region *region)
10543 omp_any_child_fn_dumped = false;
10544 while (region)
10546 location_t saved_location;
10547 gimple *inner_stmt = NULL;
10549 /* First, determine whether this is a combined parallel+workshare
10550 region. */
10551 if (region->type == GIMPLE_OMP_PARALLEL)
10552 determine_parallel_type (region);
10554 if (region->type == GIMPLE_OMP_FOR
10555 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10556 inner_stmt = last_stmt (region->inner->entry);
10558 if (region->inner)
10559 expand_omp (region->inner);
10561 saved_location = input_location;
10562 if (gimple_has_location (last_stmt (region->entry)))
10563 input_location = gimple_location (last_stmt (region->entry));
10565 switch (region->type)
10567 case GIMPLE_OMP_PARALLEL:
10568 case GIMPLE_OMP_TASK:
10569 expand_omp_taskreg (region);
10570 break;
10572 case GIMPLE_OMP_FOR:
10573 expand_omp_for (region, inner_stmt);
10574 break;
10576 case GIMPLE_OMP_SECTIONS:
10577 expand_omp_sections (region);
10578 break;
10580 case GIMPLE_OMP_SECTION:
10581 /* Individual omp sections are handled together with their
10582 parent GIMPLE_OMP_SECTIONS region. */
10583 break;
10585 case GIMPLE_OMP_SINGLE:
10586 case GIMPLE_OMP_SCOPE:
10587 expand_omp_single (region);
10588 break;
10590 case GIMPLE_OMP_ORDERED:
10592 gomp_ordered *ord_stmt
10593 = as_a <gomp_ordered *> (last_stmt (region->entry));
10594 if (gimple_omp_ordered_standalone_p (ord_stmt))
10596 /* We'll expand these when expanding corresponding
10597 worksharing region with ordered(n) clause. */
10598 gcc_assert (region->outer
10599 && region->outer->type == GIMPLE_OMP_FOR);
10600 region->ord_stmt = ord_stmt;
10601 break;
10604 /* FALLTHRU */
10605 case GIMPLE_OMP_MASTER:
10606 case GIMPLE_OMP_MASKED:
10607 case GIMPLE_OMP_TASKGROUP:
10608 case GIMPLE_OMP_CRITICAL:
10609 case GIMPLE_OMP_TEAMS:
10610 expand_omp_synch (region);
10611 break;
10613 case GIMPLE_OMP_ATOMIC_LOAD:
10614 expand_omp_atomic (region);
10615 break;
10617 case GIMPLE_OMP_TARGET:
10618 expand_omp_target (region);
10619 break;
10621 default:
10622 gcc_unreachable ();
10625 input_location = saved_location;
10626 region = region->next;
10628 if (omp_any_child_fn_dumped)
10630 if (dump_file)
10631 dump_function_header (dump_file, current_function_decl, dump_flags);
10632 omp_any_child_fn_dumped = false;
10636 /* Helper for build_omp_regions. Scan the dominator tree starting at
10637 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10638 true, the function ends once a single tree is built (otherwise, whole
10639 forest of OMP constructs may be built). */
10641 static void
10642 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10643 bool single_tree)
10645 gimple_stmt_iterator gsi;
10646 gimple *stmt;
10647 basic_block son;
10649 gsi = gsi_last_nondebug_bb (bb);
10650 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10652 struct omp_region *region;
10653 enum gimple_code code;
10655 stmt = gsi_stmt (gsi);
10656 code = gimple_code (stmt);
10657 if (code == GIMPLE_OMP_RETURN)
10659 /* STMT is the return point out of region PARENT. Mark it
10660 as the exit point and make PARENT the immediately
10661 enclosing region. */
10662 gcc_assert (parent);
10663 region = parent;
10664 region->exit = bb;
10665 parent = parent->outer;
10667 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10669 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10670 GIMPLE_OMP_RETURN, but matches with
10671 GIMPLE_OMP_ATOMIC_LOAD. */
10672 gcc_assert (parent);
10673 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10674 region = parent;
10675 region->exit = bb;
10676 parent = parent->outer;
10678 else if (code == GIMPLE_OMP_CONTINUE)
10680 gcc_assert (parent);
10681 parent->cont = bb;
10683 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10685 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10686 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10688 else
10690 region = new_omp_region (bb, code, parent);
10691 /* Otherwise... */
10692 if (code == GIMPLE_OMP_TARGET)
10694 switch (gimple_omp_target_kind (stmt))
10696 case GF_OMP_TARGET_KIND_REGION:
10697 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10698 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10699 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10700 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10701 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10702 break;
10703 case GF_OMP_TARGET_KIND_UPDATE:
10704 case GF_OMP_TARGET_KIND_ENTER_DATA:
10705 case GF_OMP_TARGET_KIND_EXIT_DATA:
10706 case GF_OMP_TARGET_KIND_DATA:
10707 case GF_OMP_TARGET_KIND_OACC_DATA:
10708 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10709 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10710 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10711 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10712 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10713 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10714 /* ..., other than for those stand-alone directives...
10715 To be precise, target data isn't stand-alone, but
10716 gimplifier put the end API call into try finally block
10717 for it, so omp expansion can treat it as such. */
10718 region = NULL;
10719 break;
10720 default:
10721 gcc_unreachable ();
10724 else if (code == GIMPLE_OMP_ORDERED
10725 && gimple_omp_ordered_standalone_p (stmt))
10726 /* #pragma omp ordered depend is also just a stand-alone
10727 directive. */
10728 region = NULL;
10729 else if (code == GIMPLE_OMP_TASK
10730 && gimple_omp_task_taskwait_p (stmt))
10731 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10732 region = NULL;
10733 else if (code == GIMPLE_OMP_TASKGROUP)
10734 /* #pragma omp taskgroup isn't a stand-alone directive, but
10735 gimplifier put the end API call into try finall block
10736 for it, so omp expansion can treat it as such. */
10737 region = NULL;
10738 /* ..., this directive becomes the parent for a new region. */
10739 if (region)
10740 parent = region;
10744 if (single_tree && !parent)
10745 return;
10747 for (son = first_dom_son (CDI_DOMINATORS, bb);
10748 son;
10749 son = next_dom_son (CDI_DOMINATORS, son))
10750 build_omp_regions_1 (son, parent, single_tree);
10753 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10754 root_omp_region. */
10756 static void
10757 build_omp_regions_root (basic_block root)
10759 gcc_assert (root_omp_region == NULL);
10760 build_omp_regions_1 (root, NULL, true);
10761 gcc_assert (root_omp_region != NULL);
10764 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10766 void
10767 omp_expand_local (basic_block head)
10769 build_omp_regions_root (head);
10770 if (dump_file && (dump_flags & TDF_DETAILS))
10772 fprintf (dump_file, "\nOMP region tree\n\n");
10773 dump_omp_region (dump_file, root_omp_region, 0);
10774 fprintf (dump_file, "\n");
10777 remove_exit_barriers (root_omp_region);
10778 expand_omp (root_omp_region);
10780 omp_free_regions ();
10783 /* Scan the CFG and build a tree of OMP regions. Return the root of
10784 the OMP region tree. */
10786 static void
10787 build_omp_regions (void)
10789 gcc_assert (root_omp_region == NULL);
10790 calculate_dominance_info (CDI_DOMINATORS);
10791 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10794 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10796 static unsigned int
10797 execute_expand_omp (void)
10799 build_omp_regions ();
10801 if (!root_omp_region)
10802 return 0;
10804 if (dump_file)
10806 fprintf (dump_file, "\nOMP region tree\n\n");
10807 dump_omp_region (dump_file, root_omp_region, 0);
10808 fprintf (dump_file, "\n");
10811 remove_exit_barriers (root_omp_region);
10813 expand_omp (root_omp_region);
10815 omp_free_regions ();
10817 return (TODO_cleanup_cfg
10818 | (gimple_in_ssa_p (cfun) ? TODO_update_ssa_only_virtuals : 0));
10821 /* OMP expansion -- the default pass, run before creation of SSA form. */
10823 namespace {
10825 const pass_data pass_data_expand_omp =
10827 GIMPLE_PASS, /* type */
10828 "ompexp", /* name */
10829 OPTGROUP_OMP, /* optinfo_flags */
10830 TV_NONE, /* tv_id */
10831 PROP_gimple_any, /* properties_required */
10832 PROP_gimple_eomp, /* properties_provided */
10833 0, /* properties_destroyed */
10834 0, /* todo_flags_start */
10835 0, /* todo_flags_finish */
10838 class pass_expand_omp : public gimple_opt_pass
10840 public:
10841 pass_expand_omp (gcc::context *ctxt)
10842 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10845 /* opt_pass methods: */
10846 unsigned int execute (function *) final override
10848 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10849 || flag_openmp_simd != 0)
10850 && !seen_error ());
10852 /* This pass always runs, to provide PROP_gimple_eomp.
10853 But often, there is nothing to do. */
10854 if (!gate)
10855 return 0;
10857 return execute_expand_omp ();
10860 }; // class pass_expand_omp
10862 } // anon namespace
10864 gimple_opt_pass *
10865 make_pass_expand_omp (gcc::context *ctxt)
10867 return new pass_expand_omp (ctxt);
10870 namespace {
10872 const pass_data pass_data_expand_omp_ssa =
10874 GIMPLE_PASS, /* type */
10875 "ompexpssa", /* name */
10876 OPTGROUP_OMP, /* optinfo_flags */
10877 TV_NONE, /* tv_id */
10878 PROP_cfg | PROP_ssa, /* properties_required */
10879 PROP_gimple_eomp, /* properties_provided */
10880 0, /* properties_destroyed */
10881 0, /* todo_flags_start */
10882 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10885 class pass_expand_omp_ssa : public gimple_opt_pass
10887 public:
10888 pass_expand_omp_ssa (gcc::context *ctxt)
10889 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10892 /* opt_pass methods: */
10893 bool gate (function *fun) final override
10895 return !(fun->curr_properties & PROP_gimple_eomp);
10897 unsigned int execute (function *) final override
10899 return execute_expand_omp ();
10901 opt_pass * clone () final override
10903 return new pass_expand_omp_ssa (m_ctxt);
10906 }; // class pass_expand_omp_ssa
10908 } // anon namespace
10910 gimple_opt_pass *
10911 make_pass_expand_omp_ssa (gcc::context *ctxt)
10913 return new pass_expand_omp_ssa (ctxt);
10916 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10917 GIMPLE_* codes. */
10919 bool
10920 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10921 int *region_idx)
10923 gimple *last = last_stmt (bb);
10924 enum gimple_code code = gimple_code (last);
10925 struct omp_region *cur_region = *region;
10926 bool fallthru = false;
10928 switch (code)
10930 case GIMPLE_OMP_PARALLEL:
10931 case GIMPLE_OMP_FOR:
10932 case GIMPLE_OMP_SINGLE:
10933 case GIMPLE_OMP_TEAMS:
10934 case GIMPLE_OMP_MASTER:
10935 case GIMPLE_OMP_MASKED:
10936 case GIMPLE_OMP_SCOPE:
10937 case GIMPLE_OMP_CRITICAL:
10938 case GIMPLE_OMP_SECTION:
10939 cur_region = new_omp_region (bb, code, cur_region);
10940 fallthru = true;
10941 break;
10943 case GIMPLE_OMP_TASKGROUP:
10944 cur_region = new_omp_region (bb, code, cur_region);
10945 fallthru = true;
10946 cur_region = cur_region->outer;
10947 break;
10949 case GIMPLE_OMP_TASK:
10950 cur_region = new_omp_region (bb, code, cur_region);
10951 fallthru = true;
10952 if (gimple_omp_task_taskwait_p (last))
10953 cur_region = cur_region->outer;
10954 break;
10956 case GIMPLE_OMP_ORDERED:
10957 cur_region = new_omp_region (bb, code, cur_region);
10958 fallthru = true;
10959 if (gimple_omp_ordered_standalone_p (last))
10960 cur_region = cur_region->outer;
10961 break;
10963 case GIMPLE_OMP_TARGET:
10964 cur_region = new_omp_region (bb, code, cur_region);
10965 fallthru = true;
10966 switch (gimple_omp_target_kind (last))
10968 case GF_OMP_TARGET_KIND_REGION:
10969 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10970 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10971 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10972 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10973 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10974 break;
10975 case GF_OMP_TARGET_KIND_UPDATE:
10976 case GF_OMP_TARGET_KIND_ENTER_DATA:
10977 case GF_OMP_TARGET_KIND_EXIT_DATA:
10978 case GF_OMP_TARGET_KIND_DATA:
10979 case GF_OMP_TARGET_KIND_OACC_DATA:
10980 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10981 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10982 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10983 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10984 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10985 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10986 cur_region = cur_region->outer;
10987 break;
10988 default:
10989 gcc_unreachable ();
10991 break;
10993 case GIMPLE_OMP_SECTIONS:
10994 cur_region = new_omp_region (bb, code, cur_region);
10995 fallthru = true;
10996 break;
10998 case GIMPLE_OMP_SECTIONS_SWITCH:
10999 fallthru = false;
11000 break;
11002 case GIMPLE_OMP_ATOMIC_LOAD:
11003 case GIMPLE_OMP_ATOMIC_STORE:
11004 fallthru = true;
11005 break;
11007 case GIMPLE_OMP_RETURN:
11008 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
11009 somewhere other than the next block. This will be
11010 created later. */
11011 cur_region->exit = bb;
11012 if (cur_region->type == GIMPLE_OMP_TASK)
11013 /* Add an edge corresponding to not scheduling the task
11014 immediately. */
11015 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
11016 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
11017 cur_region = cur_region->outer;
11018 break;
11020 case GIMPLE_OMP_CONTINUE:
11021 cur_region->cont = bb;
11022 switch (cur_region->type)
11024 case GIMPLE_OMP_FOR:
11025 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
11026 succs edges as abnormal to prevent splitting
11027 them. */
11028 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
11029 /* Make the loopback edge. */
11030 make_edge (bb, single_succ (cur_region->entry),
11031 EDGE_ABNORMAL);
11033 /* Create an edge from GIMPLE_OMP_FOR to exit, which
11034 corresponds to the case that the body of the loop
11035 is not executed at all. */
11036 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
11037 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
11038 fallthru = false;
11039 break;
11041 case GIMPLE_OMP_SECTIONS:
11042 /* Wire up the edges into and out of the nested sections. */
11044 basic_block switch_bb = single_succ (cur_region->entry);
11046 struct omp_region *i;
11047 for (i = cur_region->inner; i ; i = i->next)
11049 gcc_assert (i->type == GIMPLE_OMP_SECTION);
11050 make_edge (switch_bb, i->entry, 0);
11051 make_edge (i->exit, bb, EDGE_FALLTHRU);
11054 /* Make the loopback edge to the block with
11055 GIMPLE_OMP_SECTIONS_SWITCH. */
11056 make_edge (bb, switch_bb, 0);
11058 /* Make the edge from the switch to exit. */
11059 make_edge (switch_bb, bb->next_bb, 0);
11060 fallthru = false;
11062 break;
11064 case GIMPLE_OMP_TASK:
11065 fallthru = true;
11066 break;
11068 default:
11069 gcc_unreachable ();
11071 break;
11073 default:
11074 gcc_unreachable ();
11077 if (*region != cur_region)
11079 *region = cur_region;
11080 if (cur_region)
11081 *region_idx = cur_region->entry->index;
11082 else
11083 *region_idx = 0;
11086 return fallthru;