c++, tree: declare some basic functions inline
[official-gcc.git] / gcc / omp-expand.cc
blob1ccee29c52a25ae8c39565724d637d1a58838f35
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2023 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
336 if (region->inner->type == GIMPLE_OMP_FOR)
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
376 /* Dump the parallel region tree rooted at REGION. */
378 void
379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
387 if (region->cont)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
403 DEBUG_FUNCTION void
404 debug_omp_region (struct omp_region *region)
406 dump_omp_region (stderr, region, 0);
409 DEBUG_FUNCTION void
410 debug_all_omp_regions (void)
412 dump_omp_region (stderr, root_omp_region, 0);
415 /* Create a new parallel region starting at STMT inside region PARENT. */
417 static struct omp_region *
418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
421 struct omp_region *region = XCNEW (struct omp_region);
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
427 if (parent)
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
434 else
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
442 return region;
445 /* Release the memory associated with the region tree rooted at REGION. */
447 static void
448 free_omp_region_1 (struct omp_region *region)
450 struct omp_region *i, *n;
452 for (i = region->inner; i ; i = n)
454 n = i->next;
455 free_omp_region_1 (i);
458 free (region);
461 /* Release the memory for the entire omp region tree. */
463 void
464 omp_free_regions (void)
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
469 n = r->next;
470 free_omp_region_1 (r);
472 root_omp_region = NULL;
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
478 static gcond *
479 gimple_build_cond_empty (tree cond)
481 enum tree_code pred_code;
482 tree lhs, rhs;
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
496 static void
497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
548 static void
549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
571 switch (region->inner->type)
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
632 else
633 clause_loc = gimple_location (entry_stmt);
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
646 cond = gimple_boolify (cond);
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
665 else
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701 if (gimple_in_ssa_p (cfun))
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
708 val = tmp_join;
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
736 if (rtmp)
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
750 static void
751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 else
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 else
811 num_tasks = integer_zero_node;
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 else
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
830 tree flags = build_int_cst (unsigned_type_node, iflags);
832 tree cond = boolean_true_node;
833 if (ifc)
835 if (taskloop_p)
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
849 if (finalc)
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
868 gsi = gsi_last_nondebug_bb (bb);
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
909 static void
910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
917 depend = OMP_CLAUSE_DECL (depend);
919 bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE;
920 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
921 enum built_in_function f = (nowait
922 ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
923 : BUILT_IN_GOMP_TASKWAIT_DEPEND);
924 tree t = build_call_expr (builtin_decl_explicit (f), 1, depend);
926 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
927 false, GSI_CONTINUE_LINKING);
930 /* Build the function call to GOMP_teams_reg to actually
931 generate the host teams operation. REGION is the teams region
932 being expanded. BB is the block where to insert the code. */
934 static void
935 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
937 tree clauses = gimple_omp_teams_clauses (entry_stmt);
938 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
939 if (num_teams == NULL_TREE)
940 num_teams = build_int_cst (unsigned_type_node, 0);
941 else
943 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
944 num_teams = fold_convert (unsigned_type_node, num_teams);
946 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
947 if (thread_limit == NULL_TREE)
948 thread_limit = build_int_cst (unsigned_type_node, 0);
949 else
951 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
952 thread_limit = fold_convert (unsigned_type_node, thread_limit);
955 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
956 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
957 if (t == NULL)
958 t1 = null_pointer_node;
959 else
960 t1 = build_fold_addr_expr (t);
961 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
962 tree t2 = build_fold_addr_expr (child_fndecl);
964 vec<tree, va_gc> *args;
965 vec_alloc (args, 5);
966 args->quick_push (t2);
967 args->quick_push (t1);
968 args->quick_push (num_teams);
969 args->quick_push (thread_limit);
970 /* For future extensibility. */
971 args->quick_push (build_zero_cst (unsigned_type_node));
973 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
974 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
975 args);
977 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
978 false, GSI_CONTINUE_LINKING);
981 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
983 static tree
984 vec2chain (vec<tree, va_gc> *v)
986 tree chain = NULL_TREE, t;
987 unsigned ix;
989 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
991 DECL_CHAIN (t) = chain;
992 chain = t;
995 return chain;
998 /* Remove barriers in REGION->EXIT's block. Note that this is only
999 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1000 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1001 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1002 removed. */
1004 static void
1005 remove_exit_barrier (struct omp_region *region)
1007 gimple_stmt_iterator gsi;
1008 basic_block exit_bb;
1009 edge_iterator ei;
1010 edge e;
1011 gimple *stmt;
1012 int any_addressable_vars = -1;
1014 exit_bb = region->exit;
1016 /* If the parallel region doesn't return, we don't have REGION->EXIT
1017 block at all. */
1018 if (! exit_bb)
1019 return;
1021 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1022 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1023 statements that can appear in between are extremely limited -- no
1024 memory operations at all. Here, we allow nothing at all, so the
1025 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1026 gsi = gsi_last_nondebug_bb (exit_bb);
1027 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1028 gsi_prev_nondebug (&gsi);
1029 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1030 return;
1032 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1034 gsi = gsi_last_nondebug_bb (e->src);
1035 if (gsi_end_p (gsi))
1036 continue;
1037 stmt = gsi_stmt (gsi);
1038 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1039 && !gimple_omp_return_nowait_p (stmt))
1041 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1042 in many cases. If there could be tasks queued, the barrier
1043 might be needed to let the tasks run before some local
1044 variable of the parallel that the task uses as shared
1045 runs out of scope. The task can be spawned either
1046 from within current function (this would be easy to check)
1047 or from some function it calls and gets passed an address
1048 of such a variable. */
1049 if (any_addressable_vars < 0)
1051 gomp_parallel *parallel_stmt
1052 = as_a <gomp_parallel *> (last_stmt (region->entry));
1053 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1054 tree local_decls, block, decl;
1055 unsigned ix;
1057 any_addressable_vars = 0;
1058 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1059 if (TREE_ADDRESSABLE (decl))
1061 any_addressable_vars = 1;
1062 break;
1064 for (block = gimple_block (stmt);
1065 !any_addressable_vars
1066 && block
1067 && TREE_CODE (block) == BLOCK;
1068 block = BLOCK_SUPERCONTEXT (block))
1070 for (local_decls = BLOCK_VARS (block);
1071 local_decls;
1072 local_decls = DECL_CHAIN (local_decls))
1073 if (TREE_ADDRESSABLE (local_decls))
1075 any_addressable_vars = 1;
1076 break;
1078 if (block == gimple_block (parallel_stmt))
1079 break;
1082 if (!any_addressable_vars)
1083 gimple_omp_return_set_nowait (stmt);
1088 static void
1089 remove_exit_barriers (struct omp_region *region)
1091 if (region->type == GIMPLE_OMP_PARALLEL)
1092 remove_exit_barrier (region);
1094 if (region->inner)
1096 region = region->inner;
1097 remove_exit_barriers (region);
1098 while (region->next)
1100 region = region->next;
1101 remove_exit_barriers (region);
1106 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1107 calls. These can't be declared as const functions, but
1108 within one parallel body they are constant, so they can be
1109 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1110 which are declared const. Similarly for task body, except
1111 that in untied task omp_get_thread_num () can change at any task
1112 scheduling point. */
1114 static void
1115 optimize_omp_library_calls (gimple *entry_stmt)
1117 basic_block bb;
1118 gimple_stmt_iterator gsi;
1119 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1120 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1121 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1122 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1123 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1124 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1125 OMP_CLAUSE_UNTIED) != NULL);
1127 FOR_EACH_BB_FN (bb, cfun)
1128 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1130 gimple *call = gsi_stmt (gsi);
1131 tree decl;
1133 if (is_gimple_call (call)
1134 && (decl = gimple_call_fndecl (call))
1135 && DECL_EXTERNAL (decl)
1136 && TREE_PUBLIC (decl)
1137 && DECL_INITIAL (decl) == NULL)
1139 tree built_in;
1141 if (DECL_NAME (decl) == thr_num_id)
1143 /* In #pragma omp task untied omp_get_thread_num () can change
1144 during the execution of the task region. */
1145 if (untied_task)
1146 continue;
1147 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1149 else if (DECL_NAME (decl) == num_thr_id)
1150 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1151 else
1152 continue;
1154 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1155 || gimple_call_num_args (call) != 0)
1156 continue;
1158 if (flag_exceptions && !TREE_NOTHROW (decl))
1159 continue;
1161 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1162 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1163 TREE_TYPE (TREE_TYPE (built_in))))
1164 continue;
1166 gimple_call_set_fndecl (call, built_in);
1171 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1172 regimplified. */
1174 static tree
1175 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1177 tree t = *tp;
1179 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1180 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1181 return t;
1183 if (TREE_CODE (t) == ADDR_EXPR)
1184 recompute_tree_invariant_for_addr_expr (t);
1186 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1187 return NULL_TREE;
1190 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1192 static void
1193 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1194 bool after)
1196 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1197 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1198 !after, after ? GSI_CONTINUE_LINKING
1199 : GSI_SAME_STMT);
1200 gimple *stmt = gimple_build_assign (to, from);
1201 if (after)
1202 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1203 else
1204 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1205 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1206 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1208 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1209 gimple_regimplify_operands (stmt, &gsi);
1213 /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1215 static gcond *
1216 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1217 tree lhs, tree rhs, bool after = false)
1219 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1220 if (after)
1221 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1222 else
1223 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1224 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 NULL, NULL)
1226 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1227 NULL, NULL))
1229 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1230 gimple_regimplify_operands (cond_stmt, &gsi);
1232 return cond_stmt;
1235 /* Expand the OpenMP parallel or task directive starting at REGION. */
1237 static void
1238 expand_omp_taskreg (struct omp_region *region)
1240 basic_block entry_bb, exit_bb, new_bb;
1241 struct function *child_cfun;
1242 tree child_fn, block, t;
1243 gimple_stmt_iterator gsi;
1244 gimple *entry_stmt, *stmt;
1245 edge e;
1246 vec<tree, va_gc> *ws_args;
1248 entry_stmt = last_stmt (region->entry);
1249 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1250 && gimple_omp_task_taskwait_p (entry_stmt))
1252 new_bb = region->entry;
1253 gsi = gsi_last_nondebug_bb (region->entry);
1254 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1255 gsi_remove (&gsi, true);
1256 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1257 return;
1260 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1261 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1263 entry_bb = region->entry;
1264 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1265 exit_bb = region->cont;
1266 else
1267 exit_bb = region->exit;
1269 if (is_combined_parallel (region))
1270 ws_args = region->ws_args;
1271 else
1272 ws_args = NULL;
1274 if (child_cfun->cfg)
1276 /* Due to inlining, it may happen that we have already outlined
1277 the region, in which case all we need to do is make the
1278 sub-graph unreachable and emit the parallel call. */
1279 edge entry_succ_e, exit_succ_e;
1281 entry_succ_e = single_succ_edge (entry_bb);
1283 gsi = gsi_last_nondebug_bb (entry_bb);
1284 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1285 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1287 gsi_remove (&gsi, true);
1289 new_bb = entry_bb;
1290 if (exit_bb)
1292 exit_succ_e = single_succ_edge (exit_bb);
1293 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1295 remove_edge_and_dominated_blocks (entry_succ_e);
1297 else
1299 unsigned srcidx, dstidx, num;
1301 /* If the parallel region needs data sent from the parent
1302 function, then the very first statement (except possible
1303 tree profile counter updates) of the parallel body
1304 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1305 &.OMP_DATA_O is passed as an argument to the child function,
1306 we need to replace it with the argument as seen by the child
1307 function.
1309 In most cases, this will end up being the identity assignment
1310 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1311 a function call that has been inlined, the original PARM_DECL
1312 .OMP_DATA_I may have been converted into a different local
1313 variable. In which case, we need to keep the assignment. */
1314 if (gimple_omp_taskreg_data_arg (entry_stmt))
1316 basic_block entry_succ_bb
1317 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1318 : FALLTHRU_EDGE (entry_bb)->dest;
1319 tree arg;
1320 gimple *parcopy_stmt = NULL;
1322 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1324 gimple *stmt;
1326 gcc_assert (!gsi_end_p (gsi));
1327 stmt = gsi_stmt (gsi);
1328 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1329 continue;
1331 if (gimple_num_ops (stmt) == 2)
1333 tree arg = gimple_assign_rhs1 (stmt);
1335 /* We're ignore the subcode because we're
1336 effectively doing a STRIP_NOPS. */
1338 if (TREE_CODE (arg) == ADDR_EXPR
1339 && (TREE_OPERAND (arg, 0)
1340 == gimple_omp_taskreg_data_arg (entry_stmt)))
1342 parcopy_stmt = stmt;
1343 break;
1348 gcc_assert (parcopy_stmt != NULL);
1349 arg = DECL_ARGUMENTS (child_fn);
1351 if (!gimple_in_ssa_p (cfun))
1353 if (gimple_assign_lhs (parcopy_stmt) == arg)
1354 gsi_remove (&gsi, true);
1355 else
1357 /* ?? Is setting the subcode really necessary ?? */
1358 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1359 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1362 else
1364 tree lhs = gimple_assign_lhs (parcopy_stmt);
1365 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1366 /* We'd like to set the rhs to the default def in the child_fn,
1367 but it's too early to create ssa names in the child_fn.
1368 Instead, we set the rhs to the parm. In
1369 move_sese_region_to_fn, we introduce a default def for the
1370 parm, map the parm to it's default def, and once we encounter
1371 this stmt, replace the parm with the default def. */
1372 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1373 update_stmt (parcopy_stmt);
1377 /* Declare local variables needed in CHILD_CFUN. */
1378 block = DECL_INITIAL (child_fn);
1379 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1380 /* The gimplifier could record temporaries in parallel/task block
1381 rather than in containing function's local_decls chain,
1382 which would mean cgraph missed finalizing them. Do it now. */
1383 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1384 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1385 varpool_node::finalize_decl (t);
1386 DECL_SAVED_TREE (child_fn) = NULL;
1387 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1388 gimple_set_body (child_fn, NULL);
1389 TREE_USED (block) = 1;
1391 /* Reset DECL_CONTEXT on function arguments. */
1392 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1393 DECL_CONTEXT (t) = child_fn;
1395 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1396 so that it can be moved to the child function. */
1397 gsi = gsi_last_nondebug_bb (entry_bb);
1398 stmt = gsi_stmt (gsi);
1399 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1400 || gimple_code (stmt) == GIMPLE_OMP_TASK
1401 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1402 e = split_block (entry_bb, stmt);
1403 gsi_remove (&gsi, true);
1404 entry_bb = e->dest;
1405 edge e2 = NULL;
1406 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1407 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1408 else
1410 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1411 gcc_assert (e2->dest == region->exit);
1412 remove_edge (BRANCH_EDGE (entry_bb));
1413 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1414 gsi = gsi_last_nondebug_bb (region->exit);
1415 gcc_assert (!gsi_end_p (gsi)
1416 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1417 gsi_remove (&gsi, true);
1420 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1421 if (exit_bb)
1423 gsi = gsi_last_nondebug_bb (exit_bb);
1424 gcc_assert (!gsi_end_p (gsi)
1425 && (gimple_code (gsi_stmt (gsi))
1426 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1427 stmt = gimple_build_return (NULL);
1428 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1429 gsi_remove (&gsi, true);
1432 /* Move the parallel region into CHILD_CFUN. */
1434 if (gimple_in_ssa_p (cfun))
1436 init_tree_ssa (child_cfun);
1437 init_ssa_operands (child_cfun);
1438 child_cfun->gimple_df->in_ssa_p = true;
1439 block = NULL_TREE;
1441 else
1442 block = gimple_block (entry_stmt);
1444 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1445 if (exit_bb)
1446 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1447 if (e2)
1449 basic_block dest_bb = e2->dest;
1450 if (!exit_bb)
1451 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1452 remove_edge (e2);
1453 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1455 /* When the OMP expansion process cannot guarantee an up-to-date
1456 loop tree arrange for the child function to fixup loops. */
1457 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1458 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1460 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1461 num = vec_safe_length (child_cfun->local_decls);
1462 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1464 t = (*child_cfun->local_decls)[srcidx];
1465 if (DECL_CONTEXT (t) == cfun->decl)
1466 continue;
1467 if (srcidx != dstidx)
1468 (*child_cfun->local_decls)[dstidx] = t;
1469 dstidx++;
1471 if (dstidx != num)
1472 vec_safe_truncate (child_cfun->local_decls, dstidx);
1474 /* Inform the callgraph about the new function. */
1475 child_cfun->curr_properties = cfun->curr_properties;
1476 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1477 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1478 cgraph_node *node = cgraph_node::get_create (child_fn);
1479 node->parallelized_function = 1;
1480 cgraph_node::add_new_function (child_fn, true);
1482 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1483 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1485 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1486 fixed in a following pass. */
1487 push_cfun (child_cfun);
1488 if (need_asm)
1489 assign_assembler_name_if_needed (child_fn);
1491 if (optimize)
1492 optimize_omp_library_calls (entry_stmt);
1493 update_max_bb_count ();
1494 cgraph_edge::rebuild_edges ();
1496 /* Some EH regions might become dead, see PR34608. If
1497 pass_cleanup_cfg isn't the first pass to happen with the
1498 new child, these dead EH edges might cause problems.
1499 Clean them up now. */
1500 if (flag_exceptions)
1502 basic_block bb;
1503 bool changed = false;
1505 FOR_EACH_BB_FN (bb, cfun)
1506 changed |= gimple_purge_dead_eh_edges (bb);
1507 if (changed)
1508 cleanup_tree_cfg ();
1510 if (gimple_in_ssa_p (cfun))
1511 update_ssa (TODO_update_ssa);
1512 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1513 verify_loop_structure ();
1514 pop_cfun ();
1516 if (dump_file && !gimple_in_ssa_p (cfun))
1518 omp_any_child_fn_dumped = true;
1519 dump_function_header (dump_file, child_fn, dump_flags);
1520 dump_function_to_file (child_fn, dump_file, dump_flags);
1524 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1526 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1527 expand_parallel_call (region, new_bb,
1528 as_a <gomp_parallel *> (entry_stmt), ws_args);
1529 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1530 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1531 else
1532 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1535 /* Information about members of an OpenACC collapsed loop nest. */
1537 struct oacc_collapse
1539 tree base; /* Base value. */
1540 tree iters; /* Number of steps. */
1541 tree step; /* Step size. */
1542 tree tile; /* Tile increment (if tiled). */
1543 tree outer; /* Tile iterator var. */
1546 /* Helper for expand_oacc_for. Determine collapsed loop information.
1547 Fill in COUNTS array. Emit any initialization code before GSI.
1548 Return the calculated outer loop bound of BOUND_TYPE. */
1550 static tree
1551 expand_oacc_collapse_init (const struct omp_for_data *fd,
1552 gimple_stmt_iterator *gsi,
1553 oacc_collapse *counts, tree diff_type,
1554 tree bound_type, location_t loc)
1556 tree tiling = fd->tiling;
1557 tree total = build_int_cst (bound_type, 1);
1558 int ix;
1560 gcc_assert (integer_onep (fd->loop.step));
1561 gcc_assert (integer_zerop (fd->loop.n1));
1563 /* When tiling, the first operand of the tile clause applies to the
1564 innermost loop, and we work outwards from there. Seems
1565 backwards, but whatever. */
1566 for (ix = fd->collapse; ix--;)
1568 const omp_for_data_loop *loop = &fd->loops[ix];
1570 tree iter_type = TREE_TYPE (loop->v);
1571 tree plus_type = iter_type;
1573 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1575 if (POINTER_TYPE_P (iter_type))
1576 plus_type = sizetype;
1578 if (tiling)
1580 tree num = build_int_cst (integer_type_node, fd->collapse);
1581 tree loop_no = build_int_cst (integer_type_node, ix);
1582 tree tile = TREE_VALUE (tiling);
1583 gcall *call
1584 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585 /* gwv-outer=*/integer_zero_node,
1586 /* gwv-inner=*/integer_zero_node);
1588 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590 gimple_call_set_lhs (call, counts[ix].tile);
1591 gimple_set_location (call, loc);
1592 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1594 tiling = TREE_CHAIN (tiling);
1596 else
1598 counts[ix].tile = NULL;
1599 counts[ix].outer = loop->v;
1602 tree b = loop->n1;
1603 tree e = loop->n2;
1604 tree s = loop->step;
1605 bool up = loop->cond_code == LT_EXPR;
1606 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607 bool negating;
1608 tree expr;
1610 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611 true, GSI_SAME_STMT);
1612 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1615 /* Convert the step, avoiding possible unsigned->signed overflow. */
1616 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617 if (negating)
1618 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619 s = fold_convert (diff_type, s);
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623 true, GSI_SAME_STMT);
1625 /* Determine the range, avoiding possible unsigned->signed overflow. */
1626 negating = !up && TYPE_UNSIGNED (iter_type);
1627 expr = fold_build2 (MINUS_EXPR, plus_type,
1628 fold_convert (plus_type, negating ? b : e),
1629 fold_convert (plus_type, negating ? e : b));
1630 expr = fold_convert (diff_type, expr);
1631 if (negating)
1632 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633 tree range = force_gimple_operand_gsi
1634 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1636 /* Determine number of iterations. */
1637 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1641 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642 true, GSI_SAME_STMT);
1644 counts[ix].base = b;
1645 counts[ix].iters = iters;
1646 counts[ix].step = s;
1648 total = fold_build2 (MULT_EXPR, bound_type, total,
1649 fold_convert (bound_type, iters));
1652 return total;
1655 /* Emit initializers for collapsed loop members. INNER is true if
1656 this is for the element loop of a TILE. IVAR is the outer
1657 loop iteration variable, from which collapsed loop iteration values
1658 are calculated. COUNTS array has been initialized by
1659 expand_oacc_collapse_inits. */
1661 static void
1662 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663 gimple_stmt_iterator *gsi,
1664 const oacc_collapse *counts, tree ivar,
1665 tree diff_type)
1667 tree ivar_type = TREE_TYPE (ivar);
1669 /* The most rapidly changing iteration variable is the innermost
1670 one. */
1671 for (int ix = fd->collapse; ix--;)
1673 const omp_for_data_loop *loop = &fd->loops[ix];
1674 const oacc_collapse *collapse = &counts[ix];
1675 tree v = inner ? loop->v : collapse->outer;
1676 tree iter_type = TREE_TYPE (v);
1677 tree plus_type = iter_type;
1678 enum tree_code plus_code = PLUS_EXPR;
1679 tree expr;
1681 if (POINTER_TYPE_P (iter_type))
1683 plus_code = POINTER_PLUS_EXPR;
1684 plus_type = sizetype;
1687 expr = ivar;
1688 if (ix)
1690 tree mod = fold_convert (ivar_type, collapse->iters);
1691 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694 true, GSI_SAME_STMT);
1697 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698 fold_convert (diff_type, collapse->step));
1699 expr = fold_build2 (plus_code, iter_type,
1700 inner ? collapse->outer : collapse->base,
1701 fold_convert (plus_type, expr));
1702 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703 true, GSI_SAME_STMT);
1704 gassign *ass = gimple_build_assign (v, expr);
1705 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1709 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1710 of the combined collapse > 1 loop constructs, generate code like:
1711 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712 if (cond3 is <)
1713 adj = STEP3 - 1;
1714 else
1715 adj = STEP3 + 1;
1716 count3 = (adj + N32 - N31) / STEP3;
1717 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718 if (cond2 is <)
1719 adj = STEP2 - 1;
1720 else
1721 adj = STEP2 + 1;
1722 count2 = (adj + N22 - N21) / STEP2;
1723 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724 if (cond1 is <)
1725 adj = STEP1 - 1;
1726 else
1727 adj = STEP1 + 1;
1728 count1 = (adj + N12 - N11) / STEP1;
1729 count = count1 * count2 * count3;
1730 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731 count = 0;
1732 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1733 of the combined loop constructs, just initialize COUNTS array
1734 from the _looptemp_ clauses. For loop nests with non-rectangular
1735 loops, do this only for the rectangular loops. Then pick
1736 the loops which reference outer vars in their bound expressions
1737 and the loops which they refer to and for this sub-nest compute
1738 number of iterations. For triangular loops use Faulhaber's formula,
1739 otherwise as a fallback, compute by iterating the loops.
1740 If e.g. the sub-nest is
1741 for (I = N11; I COND1 N12; I += STEP1)
1742 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1745 COUNT = 0;
1746 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747 for (tmpj = M21 * tmpi + N21;
1748 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1750 int tmpk1 = M31 * tmpj + N31;
1751 int tmpk2 = M32 * tmpj + N32;
1752 if (tmpk1 COND3 tmpk2)
1754 if (COND3 is <)
1755 adj = STEP3 - 1;
1756 else
1757 adj = STEP3 + 1;
1758 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1761 and finally multiply the counts of the rectangular loops not
1762 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1763 store number of iterations of the loops from fd->first_nonrect
1764 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765 by the counts of rectangular loops not referenced in any non-rectangular
1766 loops sandwitched in between those. */
1768 /* NOTE: It *could* be better to moosh all of the BBs together,
1769 creating one larger BB with all the computation and the unexpected
1770 jump at the end. I.e.
1772 bool zero3, zero2, zero1, zero;
1774 zero3 = N32 c3 N31;
1775 count3 = (N32 - N31) /[cl] STEP3;
1776 zero2 = N22 c2 N21;
1777 count2 = (N22 - N21) /[cl] STEP2;
1778 zero1 = N12 c1 N11;
1779 count1 = (N12 - N11) /[cl] STEP1;
1780 zero = zero3 || zero2 || zero1;
1781 count = count1 * count2 * count3;
1782 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1784 After all, we expect the zero=false, and thus we expect to have to
1785 evaluate all of the comparison expressions, so short-circuiting
1786 oughtn't be a win. Since the condition isn't protecting a
1787 denominator, we're not concerned about divide-by-zero, so we can
1788 fully evaluate count even if a numerator turned out to be wrong.
1790 It seems like putting this all together would create much better
1791 scheduling opportunities, and less pressure on the chip's branch
1792 predictor. */
1794 static void
1795 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 basic_block &entry_bb, tree *counts,
1797 basic_block &zero_iter1_bb, int &first_zero_iter1,
1798 basic_block &zero_iter2_bb, int &first_zero_iter2,
1799 basic_block &l2_dom_bb)
1801 tree t, type = TREE_TYPE (fd->loop.v);
1802 edge e, ne;
1803 int i;
1805 /* Collapsed loops need work for expansion into SSA form. */
1806 gcc_assert (!gimple_in_ssa_p (cfun));
1808 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1811 gcc_assert (fd->ordered == 0);
1812 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813 isn't supposed to be handled, as the inner loop doesn't
1814 use it. */
1815 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816 OMP_CLAUSE__LOOPTEMP_);
1817 gcc_assert (innerc);
1818 for (i = 0; i < fd->collapse; i++)
1820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821 OMP_CLAUSE__LOOPTEMP_);
1822 gcc_assert (innerc);
1823 if (i)
1824 counts[i] = OMP_CLAUSE_DECL (innerc);
1825 else
1826 counts[0] = NULL_TREE;
1828 if (fd->non_rect
1829 && fd->last_nonrect == fd->first_nonrect + 1
1830 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1832 tree c[4];
1833 for (i = 0; i < 4; i++)
1835 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836 OMP_CLAUSE__LOOPTEMP_);
1837 gcc_assert (innerc);
1838 c[i] = OMP_CLAUSE_DECL (innerc);
1840 counts[0] = c[0];
1841 fd->first_inner_iterations = c[1];
1842 fd->factor = c[2];
1843 fd->adjn1 = c[3];
1845 return;
1848 for (i = fd->collapse; i < fd->ordered; i++)
1850 tree itype = TREE_TYPE (fd->loops[i].v);
1851 counts[i] = NULL_TREE;
1852 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853 fold_convert (itype, fd->loops[i].n1),
1854 fold_convert (itype, fd->loops[i].n2));
1855 if (t && integer_zerop (t))
1857 for (i = fd->collapse; i < fd->ordered; i++)
1858 counts[i] = build_int_cst (type, 0);
1859 break;
1862 bool rect_count_seen = false;
1863 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1865 tree itype = TREE_TYPE (fd->loops[i].v);
1867 if (i >= fd->collapse && counts[i])
1868 continue;
1869 if (fd->non_rect)
1871 /* Skip loops that use outer iterators in their expressions
1872 during this phase. */
1873 if (fd->loops[i].m1 || fd->loops[i].m2)
1875 counts[i] = build_zero_cst (type);
1876 continue;
1879 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881 fold_convert (itype, fd->loops[i].n1),
1882 fold_convert (itype, fd->loops[i].n2)))
1883 == NULL_TREE || !integer_onep (t)))
1885 gcond *cond_stmt;
1886 tree n1, n2;
1887 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889 true, GSI_SAME_STMT);
1890 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892 true, GSI_SAME_STMT);
1893 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894 n1, n2);
1895 e = split_block (entry_bb, cond_stmt);
1896 basic_block &zero_iter_bb
1897 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898 int &first_zero_iter
1899 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900 if (zero_iter_bb == NULL)
1902 gassign *assign_stmt;
1903 first_zero_iter = i;
1904 zero_iter_bb = create_empty_bb (entry_bb);
1905 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906 *gsi = gsi_after_labels (zero_iter_bb);
1907 if (i < fd->collapse)
1908 assign_stmt = gimple_build_assign (fd->loop.n2,
1909 build_zero_cst (type));
1910 else
1912 counts[i] = create_tmp_reg (type, ".count");
1913 assign_stmt
1914 = gimple_build_assign (counts[i], build_zero_cst (type));
1916 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918 entry_bb);
1920 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921 ne->probability = profile_probability::very_unlikely ();
1922 e->flags = EDGE_TRUE_VALUE;
1923 e->probability = ne->probability.invert ();
1924 if (l2_dom_bb == NULL)
1925 l2_dom_bb = entry_bb;
1926 entry_bb = e->dest;
1927 *gsi = gsi_last_nondebug_bb (entry_bb);
1930 if (POINTER_TYPE_P (itype))
1931 itype = signed_type_for (itype);
1932 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933 ? -1 : 1));
1934 t = fold_build2 (PLUS_EXPR, itype,
1935 fold_convert (itype, fd->loops[i].step), t);
1936 t = fold_build2 (PLUS_EXPR, itype, t,
1937 fold_convert (itype, fd->loops[i].n2));
1938 t = fold_build2 (MINUS_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].n1));
1940 /* ?? We could probably use CEIL_DIV_EXPR instead of
1941 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1942 generate the same code in the end because generically we
1943 don't know that the values involved must be negative for
1944 GT?? */
1945 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947 fold_build1 (NEGATE_EXPR, itype, t),
1948 fold_build1 (NEGATE_EXPR, itype,
1949 fold_convert (itype,
1950 fd->loops[i].step)));
1951 else
1952 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953 fold_convert (itype, fd->loops[i].step));
1954 t = fold_convert (type, t);
1955 if (TREE_CODE (t) == INTEGER_CST)
1956 counts[i] = t;
1957 else
1959 if (i < fd->collapse || i != first_zero_iter2)
1960 counts[i] = create_tmp_reg (type, ".count");
1961 expand_omp_build_assign (gsi, counts[i], t);
1963 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1965 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966 continue;
1967 if (!rect_count_seen)
1969 t = counts[i];
1970 rect_count_seen = true;
1972 else
1973 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974 expand_omp_build_assign (gsi, fd->loop.n2, t);
1977 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1979 gcc_assert (fd->last_nonrect != -1);
1981 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983 build_zero_cst (type));
1984 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985 if (fd->loops[i].m1
1986 || fd->loops[i].m2
1987 || fd->loops[i].non_rect_referenced)
1988 break;
1989 if (i == fd->last_nonrect
1990 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1994 int o = fd->first_nonrect;
1995 tree itype = TREE_TYPE (fd->loops[o].v);
1996 tree n1o = create_tmp_reg (itype, ".n1o");
1997 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998 expand_omp_build_assign (gsi, n1o, t);
1999 tree n2o = create_tmp_reg (itype, ".n2o");
2000 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001 expand_omp_build_assign (gsi, n2o, t);
2002 if (fd->loops[i].m1 && fd->loops[i].m2)
2003 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004 unshare_expr (fd->loops[i].m1));
2005 else if (fd->loops[i].m1)
2006 t = fold_build1 (NEGATE_EXPR, itype,
2007 unshare_expr (fd->loops[i].m1));
2008 else
2009 t = unshare_expr (fd->loops[i].m2);
2010 tree m2minusm1
2011 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2014 gimple_stmt_iterator gsi2 = *gsi;
2015 gsi_prev (&gsi2);
2016 e = split_block (entry_bb, gsi_stmt (gsi2));
2017 e = split_block (e->dest, (gimple *) NULL);
2018 basic_block bb1 = e->src;
2019 entry_bb = e->dest;
2020 *gsi = gsi_after_labels (entry_bb);
2022 gsi2 = gsi_after_labels (bb1);
2023 tree ostep = fold_convert (itype, fd->loops[o].step);
2024 t = build_int_cst (itype, (fd->loops[o].cond_code
2025 == LT_EXPR ? -1 : 1));
2026 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029 if (TYPE_UNSIGNED (itype)
2030 && fd->loops[o].cond_code == GT_EXPR)
2031 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032 fold_build1 (NEGATE_EXPR, itype, t),
2033 fold_build1 (NEGATE_EXPR, itype, ostep));
2034 else
2035 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036 tree outer_niters
2037 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038 true, GSI_SAME_STMT);
2039 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040 build_one_cst (itype));
2041 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044 true, GSI_SAME_STMT);
2045 tree n1, n2, n1e, n2e;
2046 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047 if (fd->loops[i].m1)
2049 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2053 else
2054 n1 = t;
2055 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056 true, GSI_SAME_STMT);
2057 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058 if (fd->loops[i].m2)
2060 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2064 else
2065 n2 = t;
2066 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067 true, GSI_SAME_STMT);
2068 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069 if (fd->loops[i].m1)
2071 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2075 else
2076 n1e = t;
2077 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078 true, GSI_SAME_STMT);
2079 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080 if (fd->loops[i].m2)
2082 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2086 else
2087 n2e = t;
2088 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089 true, GSI_SAME_STMT);
2090 gcond *cond_stmt
2091 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092 n1, n2);
2093 e = split_block (bb1, cond_stmt);
2094 e->flags = EDGE_TRUE_VALUE;
2095 e->probability = profile_probability::likely ().guessed ();
2096 basic_block bb2 = e->dest;
2097 gsi2 = gsi_after_labels (bb2);
2099 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100 n1e, n2e);
2101 e = split_block (bb2, cond_stmt);
2102 e->flags = EDGE_TRUE_VALUE;
2103 e->probability = profile_probability::likely ().guessed ();
2104 gsi2 = gsi_after_labels (e->dest);
2106 tree step = fold_convert (itype, fd->loops[i].step);
2107 t = build_int_cst (itype, (fd->loops[i].cond_code
2108 == LT_EXPR ? -1 : 1));
2109 t = fold_build2 (PLUS_EXPR, itype, step, t);
2110 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112 if (TYPE_UNSIGNED (itype)
2113 && fd->loops[i].cond_code == GT_EXPR)
2114 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115 fold_build1 (NEGATE_EXPR, itype, t),
2116 fold_build1 (NEGATE_EXPR, itype, step));
2117 else
2118 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119 tree first_inner_iterations
2120 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121 true, GSI_SAME_STMT);
2122 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123 if (TYPE_UNSIGNED (itype)
2124 && fd->loops[i].cond_code == GT_EXPR)
2125 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126 fold_build1 (NEGATE_EXPR, itype, t),
2127 fold_build1 (NEGATE_EXPR, itype, step));
2128 else
2129 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130 tree factor
2131 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132 true, GSI_SAME_STMT);
2133 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134 build_one_cst (itype));
2135 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137 t = fold_build2 (MULT_EXPR, itype, factor, t);
2138 t = fold_build2 (PLUS_EXPR, itype,
2139 fold_build2 (MULT_EXPR, itype, outer_niters,
2140 first_inner_iterations), t);
2141 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142 fold_convert (type, t));
2144 basic_block bb3 = create_empty_bb (bb1);
2145 add_bb_to_loop (bb3, bb1->loop_father);
2147 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148 e->probability = profile_probability::unlikely ().guessed ();
2150 gsi2 = gsi_after_labels (bb3);
2151 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152 n1e, n2e);
2153 e = split_block (bb3, cond_stmt);
2154 e->flags = EDGE_TRUE_VALUE;
2155 e->probability = profile_probability::likely ().guessed ();
2156 basic_block bb4 = e->dest;
2158 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159 ne->probability = e->probability.invert ();
2161 basic_block bb5 = create_empty_bb (bb2);
2162 add_bb_to_loop (bb5, bb2->loop_father);
2164 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165 ne->probability = profile_probability::unlikely ().guessed ();
2167 for (int j = 0; j < 2; j++)
2169 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170 t = fold_build2 (MINUS_EXPR, itype,
2171 unshare_expr (fd->loops[i].n1),
2172 unshare_expr (fd->loops[i].n2));
2173 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174 tree tem
2175 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176 true, GSI_SAME_STMT);
2177 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181 true, GSI_SAME_STMT);
2182 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183 if (fd->loops[i].m1)
2185 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2189 else
2190 n1 = t;
2191 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192 true, GSI_SAME_STMT);
2193 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194 if (fd->loops[i].m2)
2196 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2200 else
2201 n2 = t;
2202 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203 true, GSI_SAME_STMT);
2204 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2206 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207 n1, n2);
2208 e = split_block (gsi_bb (gsi2), cond_stmt);
2209 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210 e->probability = profile_probability::unlikely ().guessed ();
2211 ne = make_edge (e->src, bb1,
2212 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213 ne->probability = e->probability.invert ();
2214 gsi2 = gsi_after_labels (e->dest);
2216 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2219 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2222 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2226 if (fd->first_nonrect + 1 == fd->last_nonrect)
2228 fd->first_inner_iterations = first_inner_iterations;
2229 fd->factor = factor;
2230 fd->adjn1 = n1o;
2233 else
2235 /* Fallback implementation. Evaluate the loops with m1/m2
2236 non-NULL as well as their outer loops at runtime using temporaries
2237 instead of the original iteration variables, and in the
2238 body just bump the counter. */
2239 gimple_stmt_iterator gsi2 = *gsi;
2240 gsi_prev (&gsi2);
2241 e = split_block (entry_bb, gsi_stmt (gsi2));
2242 e = split_block (e->dest, (gimple *) NULL);
2243 basic_block cur_bb = e->src;
2244 basic_block next_bb = e->dest;
2245 entry_bb = e->dest;
2246 *gsi = gsi_after_labels (entry_bb);
2248 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2251 for (i = 0; i <= fd->last_nonrect; i++)
2253 if (fd->loops[i].m1 == NULL_TREE
2254 && fd->loops[i].m2 == NULL_TREE
2255 && !fd->loops[i].non_rect_referenced)
2256 continue;
2258 tree itype = TREE_TYPE (fd->loops[i].v);
2260 gsi2 = gsi_after_labels (cur_bb);
2261 tree n1, n2;
2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263 if (fd->loops[i].m1 == NULL_TREE)
2264 n1 = t;
2265 else if (POINTER_TYPE_P (itype))
2267 gcc_assert (integer_onep (fd->loops[i].m1));
2268 t = unshare_expr (fd->loops[i].n1);
2269 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271 else
2273 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2274 n1 = fold_build2 (MULT_EXPR, itype,
2275 vs[i - fd->loops[i].outer], n1);
2276 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2279 true, GSI_SAME_STMT);
2280 if (i < fd->last_nonrect)
2282 vs[i] = create_tmp_reg (itype, ".it");
2283 expand_omp_build_assign (&gsi2, vs[i], n1);
2285 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2286 if (fd->loops[i].m2 == NULL_TREE)
2287 n2 = t;
2288 else if (POINTER_TYPE_P (itype))
2290 gcc_assert (integer_onep (fd->loops[i].m2));
2291 t = unshare_expr (fd->loops[i].n2);
2292 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2294 else
2296 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2297 n2 = fold_build2 (MULT_EXPR, itype,
2298 vs[i - fd->loops[i].outer], n2);
2299 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2301 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2302 true, GSI_SAME_STMT);
2303 if (POINTER_TYPE_P (itype))
2304 itype = signed_type_for (itype);
2305 if (i == fd->last_nonrect)
2307 gcond *cond_stmt
2308 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2309 n1, n2);
2310 e = split_block (cur_bb, cond_stmt);
2311 e->flags = EDGE_TRUE_VALUE;
2312 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2313 e->probability = profile_probability::likely ().guessed ();
2314 ne->probability = e->probability.invert ();
2315 gsi2 = gsi_after_labels (e->dest);
2317 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2318 ? -1 : 1));
2319 t = fold_build2 (PLUS_EXPR, itype,
2320 fold_convert (itype, fd->loops[i].step), t);
2321 t = fold_build2 (PLUS_EXPR, itype, t,
2322 fold_convert (itype, n2));
2323 t = fold_build2 (MINUS_EXPR, itype, t,
2324 fold_convert (itype, n1));
2325 tree step = fold_convert (itype, fd->loops[i].step);
2326 if (TYPE_UNSIGNED (itype)
2327 && fd->loops[i].cond_code == GT_EXPR)
2328 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2329 fold_build1 (NEGATE_EXPR, itype, t),
2330 fold_build1 (NEGATE_EXPR, itype, step));
2331 else
2332 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2333 t = fold_convert (type, t);
2334 t = fold_build2 (PLUS_EXPR, type,
2335 counts[fd->last_nonrect], t);
2336 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2337 true, GSI_SAME_STMT);
2338 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2339 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2340 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2341 break;
2343 e = split_block (cur_bb, last_stmt (cur_bb));
2345 basic_block new_cur_bb = create_empty_bb (cur_bb);
2346 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2348 gsi2 = gsi_after_labels (e->dest);
2349 tree step = fold_convert (itype,
2350 unshare_expr (fd->loops[i].step));
2351 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2352 t = fold_build_pointer_plus (vs[i], step);
2353 else
2354 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2355 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2356 true, GSI_SAME_STMT);
2357 expand_omp_build_assign (&gsi2, vs[i], t);
2359 ne = split_block (e->dest, last_stmt (e->dest));
2360 gsi2 = gsi_after_labels (ne->dest);
2362 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2363 edge e3, e4;
2364 if (next_bb == entry_bb)
2366 e3 = find_edge (ne->dest, next_bb);
2367 e3->flags = EDGE_FALSE_VALUE;
2369 else
2370 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2371 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2372 e4->probability = profile_probability::likely ().guessed ();
2373 e3->probability = e4->probability.invert ();
2374 basic_block esrc = e->src;
2375 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2376 cur_bb = new_cur_bb;
2377 basic_block latch_bb = next_bb;
2378 next_bb = e->dest;
2379 remove_edge (e);
2380 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2381 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2382 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2385 t = NULL_TREE;
2386 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2387 if (!fd->loops[i].non_rect_referenced
2388 && fd->loops[i].m1 == NULL_TREE
2389 && fd->loops[i].m2 == NULL_TREE)
2391 if (t == NULL_TREE)
2392 t = counts[i];
2393 else
2394 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2396 if (t)
2398 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2399 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2401 if (!rect_count_seen)
2402 t = counts[fd->last_nonrect];
2403 else
2404 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2405 counts[fd->last_nonrect]);
2406 expand_omp_build_assign (gsi, fd->loop.n2, t);
2408 else if (fd->non_rect)
2410 tree t = fd->loop.n2;
2411 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2412 int non_rect_referenced = 0, non_rect = 0;
2413 for (i = 0; i < fd->collapse; i++)
2415 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2416 && !integer_zerop (counts[i]))
2417 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2418 if (fd->loops[i].non_rect_referenced)
2419 non_rect_referenced++;
2420 if (fd->loops[i].m1 || fd->loops[i].m2)
2421 non_rect++;
2423 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2424 counts[fd->last_nonrect] = t;
2428 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2429 T = V;
2430 V3 = N31 + (T % count3) * STEP3;
2431 T = T / count3;
2432 V2 = N21 + (T % count2) * STEP2;
2433 T = T / count2;
2434 V1 = N11 + T * STEP1;
2435 if this loop doesn't have an inner loop construct combined with it.
2436 If it does have an inner loop construct combined with it and the
2437 iteration count isn't known constant, store values from counts array
2438 into its _looptemp_ temporaries instead.
2439 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2440 inclusive), use the count of all those loops together, and either
2441 find quadratic etc. equation roots, or as a fallback, do:
2442 COUNT = 0;
2443 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2444 for (tmpj = M21 * tmpi + N21;
2445 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2447 int tmpk1 = M31 * tmpj + N31;
2448 int tmpk2 = M32 * tmpj + N32;
2449 if (tmpk1 COND3 tmpk2)
2451 if (COND3 is <)
2452 adj = STEP3 - 1;
2453 else
2454 adj = STEP3 + 1;
2455 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2456 if (COUNT + temp > T)
2458 V1 = tmpi;
2459 V2 = tmpj;
2460 V3 = tmpk1 + (T - COUNT) * STEP3;
2461 goto done;
2463 else
2464 COUNT += temp;
2467 done:;
2468 but for optional innermost or outermost rectangular loops that aren't
2469 referenced by other loop expressions keep doing the division/modulo. */
2471 static void
2472 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2473 tree *counts, tree *nonrect_bounds,
2474 gimple *inner_stmt, tree startvar)
2476 int i;
2477 if (gimple_omp_for_combined_p (fd->for_stmt))
2479 /* If fd->loop.n2 is constant, then no propagation of the counts
2480 is needed, they are constant. */
2481 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2482 return;
2484 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2485 ? gimple_omp_taskreg_clauses (inner_stmt)
2486 : gimple_omp_for_clauses (inner_stmt);
2487 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2488 isn't supposed to be handled, as the inner loop doesn't
2489 use it. */
2490 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2491 gcc_assert (innerc);
2492 int count = 0;
2493 if (fd->non_rect
2494 && fd->last_nonrect == fd->first_nonrect + 1
2495 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2496 count = 4;
2497 for (i = 0; i < fd->collapse + count; i++)
2499 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2500 OMP_CLAUSE__LOOPTEMP_);
2501 gcc_assert (innerc);
2502 if (i)
2504 tree tem = OMP_CLAUSE_DECL (innerc);
2505 tree t;
2506 if (i < fd->collapse)
2507 t = counts[i];
2508 else
2509 switch (i - fd->collapse)
2511 case 0: t = counts[0]; break;
2512 case 1: t = fd->first_inner_iterations; break;
2513 case 2: t = fd->factor; break;
2514 case 3: t = fd->adjn1; break;
2515 default: gcc_unreachable ();
2517 t = fold_convert (TREE_TYPE (tem), t);
2518 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2519 false, GSI_CONTINUE_LINKING);
2520 gassign *stmt = gimple_build_assign (tem, t);
2521 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2524 return;
2527 tree type = TREE_TYPE (fd->loop.v);
2528 tree tem = create_tmp_reg (type, ".tem");
2529 gassign *stmt = gimple_build_assign (tem, startvar);
2530 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2532 for (i = fd->collapse - 1; i >= 0; i--)
2534 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2535 itype = vtype;
2536 if (POINTER_TYPE_P (vtype))
2537 itype = signed_type_for (vtype);
2538 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2539 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2540 else
2541 t = tem;
2542 if (i == fd->last_nonrect)
2544 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2545 false, GSI_CONTINUE_LINKING);
2546 tree stopval = t;
2547 tree idx = create_tmp_reg (type, ".count");
2548 expand_omp_build_assign (gsi, idx,
2549 build_zero_cst (type), true);
2550 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2551 if (fd->first_nonrect + 1 == fd->last_nonrect
2552 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2553 || fd->first_inner_iterations)
2554 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2555 != CODE_FOR_nothing)
2556 && !integer_zerop (fd->loop.n2))
2558 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2559 tree itype = TREE_TYPE (fd->loops[i].v);
2560 tree first_inner_iterations = fd->first_inner_iterations;
2561 tree factor = fd->factor;
2562 gcond *cond_stmt
2563 = expand_omp_build_cond (gsi, NE_EXPR, factor,
2564 build_zero_cst (TREE_TYPE (factor)));
2565 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2566 basic_block bb0 = e->src;
2567 e->flags = EDGE_TRUE_VALUE;
2568 e->probability = profile_probability::likely ();
2569 bb_triang_dom = bb0;
2570 *gsi = gsi_after_labels (e->dest);
2571 tree slltype = long_long_integer_type_node;
2572 tree ulltype = long_long_unsigned_type_node;
2573 tree stopvalull = fold_convert (ulltype, stopval);
2574 stopvalull
2575 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2576 false, GSI_CONTINUE_LINKING);
2577 first_inner_iterations
2578 = fold_convert (slltype, first_inner_iterations);
2579 first_inner_iterations
2580 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2581 NULL_TREE, false,
2582 GSI_CONTINUE_LINKING);
2583 factor = fold_convert (slltype, factor);
2584 factor
2585 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2586 false, GSI_CONTINUE_LINKING);
2587 tree first_inner_iterationsd
2588 = fold_build1 (FLOAT_EXPR, double_type_node,
2589 first_inner_iterations);
2590 first_inner_iterationsd
2591 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2592 NULL_TREE, false,
2593 GSI_CONTINUE_LINKING);
2594 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2595 factor);
2596 factord = force_gimple_operand_gsi (gsi, factord, true,
2597 NULL_TREE, false,
2598 GSI_CONTINUE_LINKING);
2599 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2600 stopvalull);
2601 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2602 NULL_TREE, false,
2603 GSI_CONTINUE_LINKING);
2604 /* Temporarily disable flag_rounding_math, values will be
2605 decimal numbers divided by 2 and worst case imprecisions
2606 due to too large values ought to be caught later by the
2607 checks for fallback. */
2608 int save_flag_rounding_math = flag_rounding_math;
2609 flag_rounding_math = 0;
2610 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2611 build_real (double_type_node, dconst2));
2612 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2613 first_inner_iterationsd, t);
2614 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2615 GSI_CONTINUE_LINKING);
2616 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2617 build_real (double_type_node, dconst2));
2618 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2619 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2620 fold_build2 (MULT_EXPR, double_type_node,
2621 t3, t3));
2622 flag_rounding_math = save_flag_rounding_math;
2623 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2624 GSI_CONTINUE_LINKING);
2625 if (flag_exceptions
2626 && cfun->can_throw_non_call_exceptions
2627 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2629 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2630 build_zero_cst (double_type_node));
2631 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2632 false, GSI_CONTINUE_LINKING);
2633 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2634 boolean_false_node,
2635 NULL_TREE, NULL_TREE);
2637 else
2638 cond_stmt
2639 = gimple_build_cond (LT_EXPR, t,
2640 build_zero_cst (double_type_node),
2641 NULL_TREE, NULL_TREE);
2642 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2643 e = split_block (gsi_bb (*gsi), cond_stmt);
2644 basic_block bb1 = e->src;
2645 e->flags = EDGE_FALSE_VALUE;
2646 e->probability = profile_probability::very_likely ();
2647 *gsi = gsi_after_labels (e->dest);
2648 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2649 tree sqrtr = create_tmp_var (double_type_node);
2650 gimple_call_set_lhs (call, sqrtr);
2651 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2652 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2653 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2654 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2655 tree c = create_tmp_var (ulltype);
2656 tree d = create_tmp_var (ulltype);
2657 expand_omp_build_assign (gsi, c, t, true);
2658 t = fold_build2 (MINUS_EXPR, ulltype, c,
2659 build_one_cst (ulltype));
2660 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2661 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2662 t = fold_build2 (MULT_EXPR, ulltype,
2663 fold_convert (ulltype, fd->factor), t);
2664 tree t2
2665 = fold_build2 (MULT_EXPR, ulltype, c,
2666 fold_convert (ulltype,
2667 fd->first_inner_iterations));
2668 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2669 expand_omp_build_assign (gsi, d, t, true);
2670 t = fold_build2 (MULT_EXPR, ulltype,
2671 fold_convert (ulltype, fd->factor), c);
2672 t = fold_build2 (PLUS_EXPR, ulltype,
2673 t, fold_convert (ulltype,
2674 fd->first_inner_iterations));
2675 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2676 GSI_CONTINUE_LINKING);
2677 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2678 NULL_TREE, NULL_TREE);
2679 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2680 e = split_block (gsi_bb (*gsi), cond_stmt);
2681 basic_block bb2 = e->src;
2682 e->flags = EDGE_TRUE_VALUE;
2683 e->probability = profile_probability::very_likely ();
2684 *gsi = gsi_after_labels (e->dest);
2685 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2686 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2687 GSI_CONTINUE_LINKING);
2688 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2689 NULL_TREE, NULL_TREE);
2690 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2691 e = split_block (gsi_bb (*gsi), cond_stmt);
2692 basic_block bb3 = e->src;
2693 e->flags = EDGE_FALSE_VALUE;
2694 e->probability = profile_probability::very_likely ();
2695 *gsi = gsi_after_labels (e->dest);
2696 t = fold_convert (itype, c);
2697 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2698 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2699 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2700 GSI_CONTINUE_LINKING);
2701 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2702 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2703 t2 = fold_convert (itype, t2);
2704 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2705 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2706 if (fd->loops[i].m1)
2708 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2709 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2711 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2712 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2713 bb_triang = e->src;
2714 *gsi = gsi_after_labels (e->dest);
2715 remove_edge (e);
2716 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2717 e->probability = profile_probability::very_unlikely ();
2718 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2719 e->probability = profile_probability::very_unlikely ();
2720 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2721 e->probability = profile_probability::very_unlikely ();
2723 basic_block bb4 = create_empty_bb (bb0);
2724 add_bb_to_loop (bb4, bb0->loop_father);
2725 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2726 e->probability = profile_probability::unlikely ();
2727 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2728 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2729 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2730 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2731 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2732 counts[i], counts[i - 1]);
2733 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2734 GSI_CONTINUE_LINKING);
2735 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2736 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2737 t = fold_convert (itype, t);
2738 t2 = fold_convert (itype, t2);
2739 t = fold_build2 (MULT_EXPR, itype, t,
2740 fold_convert (itype, fd->loops[i].step));
2741 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2742 t2 = fold_build2 (MULT_EXPR, itype, t2,
2743 fold_convert (itype, fd->loops[i - 1].step));
2744 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2745 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2746 false, GSI_CONTINUE_LINKING);
2747 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2748 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2749 if (fd->loops[i].m1)
2751 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2752 fd->loops[i - 1].v);
2753 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2755 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2756 false, GSI_CONTINUE_LINKING);
2757 stmt = gimple_build_assign (fd->loops[i].v, t);
2758 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2760 /* Fallback implementation. Evaluate the loops in between
2761 (inclusive) fd->first_nonrect and fd->last_nonrect at
2762 runtime unsing temporaries instead of the original iteration
2763 variables, in the body just bump the counter and compare
2764 with the desired value. */
2765 gimple_stmt_iterator gsi2 = *gsi;
2766 basic_block entry_bb = gsi_bb (gsi2);
2767 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2768 e = split_block (e->dest, (gimple *) NULL);
2769 basic_block dom_bb = NULL;
2770 basic_block cur_bb = e->src;
2771 basic_block next_bb = e->dest;
2772 entry_bb = e->dest;
2773 *gsi = gsi_after_labels (entry_bb);
2775 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2776 tree n1 = NULL_TREE, n2 = NULL_TREE;
2777 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2779 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2781 tree itype = TREE_TYPE (fd->loops[j].v);
2782 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2783 && fd->loops[j].m2 == NULL_TREE
2784 && !fd->loops[j].non_rect_referenced);
2785 gsi2 = gsi_after_labels (cur_bb);
2786 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2787 if (fd->loops[j].m1 == NULL_TREE)
2788 n1 = rect_p ? build_zero_cst (type) : t;
2789 else if (POINTER_TYPE_P (itype))
2791 gcc_assert (integer_onep (fd->loops[j].m1));
2792 t = unshare_expr (fd->loops[j].n1);
2793 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2795 else
2797 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2798 n1 = fold_build2 (MULT_EXPR, itype,
2799 vs[j - fd->loops[j].outer], n1);
2800 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2802 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2803 true, GSI_SAME_STMT);
2804 if (j < fd->last_nonrect)
2806 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2807 expand_omp_build_assign (&gsi2, vs[j], n1);
2809 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2810 if (fd->loops[j].m2 == NULL_TREE)
2811 n2 = rect_p ? counts[j] : t;
2812 else if (POINTER_TYPE_P (itype))
2814 gcc_assert (integer_onep (fd->loops[j].m2));
2815 t = unshare_expr (fd->loops[j].n2);
2816 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2818 else
2820 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2821 n2 = fold_build2 (MULT_EXPR, itype,
2822 vs[j - fd->loops[j].outer], n2);
2823 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2825 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2826 true, GSI_SAME_STMT);
2827 if (POINTER_TYPE_P (itype))
2828 itype = signed_type_for (itype);
2829 if (j == fd->last_nonrect)
2831 gcond *cond_stmt
2832 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2833 n1, n2);
2834 e = split_block (cur_bb, cond_stmt);
2835 e->flags = EDGE_TRUE_VALUE;
2836 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2837 e->probability = profile_probability::likely ().guessed ();
2838 ne->probability = e->probability.invert ();
2839 gsi2 = gsi_after_labels (e->dest);
2841 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2842 ? -1 : 1));
2843 t = fold_build2 (PLUS_EXPR, itype,
2844 fold_convert (itype, fd->loops[j].step), t);
2845 t = fold_build2 (PLUS_EXPR, itype, t,
2846 fold_convert (itype, n2));
2847 t = fold_build2 (MINUS_EXPR, itype, t,
2848 fold_convert (itype, n1));
2849 tree step = fold_convert (itype, fd->loops[j].step);
2850 if (TYPE_UNSIGNED (itype)
2851 && fd->loops[j].cond_code == GT_EXPR)
2852 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2853 fold_build1 (NEGATE_EXPR, itype, t),
2854 fold_build1 (NEGATE_EXPR, itype, step));
2855 else
2856 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2857 t = fold_convert (type, t);
2858 t = fold_build2 (PLUS_EXPR, type, idx, t);
2859 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2860 true, GSI_SAME_STMT);
2861 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2862 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2863 cond_stmt
2864 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2865 NULL_TREE);
2866 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2867 e = split_block (gsi_bb (gsi2), cond_stmt);
2868 e->flags = EDGE_TRUE_VALUE;
2869 e->probability = profile_probability::likely ().guessed ();
2870 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2871 ne->probability = e->probability.invert ();
2872 gsi2 = gsi_after_labels (e->dest);
2873 expand_omp_build_assign (&gsi2, idx, t);
2874 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2875 break;
2877 e = split_block (cur_bb, last_stmt (cur_bb));
2879 basic_block new_cur_bb = create_empty_bb (cur_bb);
2880 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2882 gsi2 = gsi_after_labels (e->dest);
2883 if (rect_p)
2884 t = fold_build2 (PLUS_EXPR, type, vs[j],
2885 build_one_cst (type));
2886 else
2888 tree step
2889 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2890 if (POINTER_TYPE_P (vtype))
2891 t = fold_build_pointer_plus (vs[j], step);
2892 else
2893 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2895 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2896 true, GSI_SAME_STMT);
2897 expand_omp_build_assign (&gsi2, vs[j], t);
2899 edge ne = split_block (e->dest, last_stmt (e->dest));
2900 gsi2 = gsi_after_labels (ne->dest);
2902 gcond *cond_stmt;
2903 if (next_bb == entry_bb)
2904 /* No need to actually check the outermost condition. */
2905 cond_stmt
2906 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2907 boolean_true_node,
2908 NULL_TREE, NULL_TREE);
2909 else
2910 cond_stmt
2911 = gimple_build_cond (rect_p ? LT_EXPR
2912 : fd->loops[j].cond_code,
2913 vs[j], n2, NULL_TREE, NULL_TREE);
2914 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2915 edge e3, e4;
2916 if (next_bb == entry_bb)
2918 e3 = find_edge (ne->dest, next_bb);
2919 e3->flags = EDGE_FALSE_VALUE;
2920 dom_bb = ne->dest;
2922 else
2923 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2924 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2925 e4->probability = profile_probability::likely ().guessed ();
2926 e3->probability = e4->probability.invert ();
2927 basic_block esrc = e->src;
2928 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2929 cur_bb = new_cur_bb;
2930 basic_block latch_bb = next_bb;
2931 next_bb = e->dest;
2932 remove_edge (e);
2933 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2934 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2935 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2937 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2939 tree vtype = TREE_TYPE (fd->loops[j].v);
2940 tree itype = vtype;
2941 if (POINTER_TYPE_P (itype))
2942 itype = signed_type_for (itype);
2943 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2944 && fd->loops[j].m2 == NULL_TREE
2945 && !fd->loops[j].non_rect_referenced);
2946 if (j == fd->last_nonrect)
2948 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2949 t = fold_convert (itype, t);
2950 tree t2
2951 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2952 t = fold_build2 (MULT_EXPR, itype, t, t2);
2953 if (POINTER_TYPE_P (vtype))
2954 t = fold_build_pointer_plus (n1, t);
2955 else
2956 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2958 else if (rect_p)
2960 t = fold_convert (itype, vs[j]);
2961 t = fold_build2 (MULT_EXPR, itype, t,
2962 fold_convert (itype, fd->loops[j].step));
2963 if (POINTER_TYPE_P (vtype))
2964 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2965 else
2966 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2968 else
2969 t = vs[j];
2970 t = force_gimple_operand_gsi (gsi, t, false,
2971 NULL_TREE, true,
2972 GSI_SAME_STMT);
2973 stmt = gimple_build_assign (fd->loops[j].v, t);
2974 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2976 if (gsi_end_p (*gsi))
2977 *gsi = gsi_last_bb (gsi_bb (*gsi));
2978 else
2979 gsi_prev (gsi);
2980 if (bb_triang)
2982 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2983 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2984 *gsi = gsi_after_labels (e->dest);
2985 if (!gsi_end_p (*gsi))
2986 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2987 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2990 else
2992 t = fold_convert (itype, t);
2993 t = fold_build2 (MULT_EXPR, itype, t,
2994 fold_convert (itype, fd->loops[i].step));
2995 if (POINTER_TYPE_P (vtype))
2996 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2997 else
2998 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2999 t = force_gimple_operand_gsi (gsi, t,
3000 DECL_P (fd->loops[i].v)
3001 && TREE_ADDRESSABLE (fd->loops[i].v),
3002 NULL_TREE, false,
3003 GSI_CONTINUE_LINKING);
3004 stmt = gimple_build_assign (fd->loops[i].v, t);
3005 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3007 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3009 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3010 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3011 false, GSI_CONTINUE_LINKING);
3012 stmt = gimple_build_assign (tem, t);
3013 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3015 if (i == fd->last_nonrect)
3016 i = fd->first_nonrect;
3018 if (fd->non_rect)
3019 for (i = 0; i <= fd->last_nonrect; i++)
3020 if (fd->loops[i].m2)
3022 tree itype = TREE_TYPE (fd->loops[i].v);
3024 tree t;
3025 if (POINTER_TYPE_P (itype))
3027 gcc_assert (integer_onep (fd->loops[i].m2));
3028 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3029 unshare_expr (fd->loops[i].n2));
3031 else
3033 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3034 t = fold_build2 (MULT_EXPR, itype,
3035 fd->loops[i - fd->loops[i].outer].v, t);
3036 t = fold_build2 (PLUS_EXPR, itype, t,
3037 fold_convert (itype,
3038 unshare_expr (fd->loops[i].n2)));
3040 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3041 t = force_gimple_operand_gsi (gsi, t, false,
3042 NULL_TREE, false,
3043 GSI_CONTINUE_LINKING);
3044 stmt = gimple_build_assign (nonrect_bounds[i], t);
3045 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3049 /* Helper function for expand_omp_for_*. Generate code like:
3050 L10:
3051 V3 += STEP3;
3052 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3053 L11:
3054 V3 = N31;
3055 V2 += STEP2;
3056 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3057 L12:
3058 V2 = N21;
3059 V1 += STEP1;
3060 goto BODY_BB;
3061 For non-rectangular loops, use temporaries stored in nonrect_bounds
3062 for the upper bounds if M?2 multiplier is present. Given e.g.
3063 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3064 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3065 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3066 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3068 L10:
3069 V4 += STEP4;
3070 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3071 L11:
3072 V4 = N41 + M41 * V2; // This can be left out if the loop
3073 // refers to the immediate parent loop
3074 V3 += STEP3;
3075 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3076 L12:
3077 V3 = N31;
3078 V2 += STEP2;
3079 if (V2 cond2 N22) goto L120; else goto L13;
3080 L120:
3081 V4 = N41 + M41 * V2;
3082 NONRECT_BOUND4 = N42 + M42 * V2;
3083 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3084 L13:
3085 V2 = N21;
3086 V1 += STEP1;
3087 goto L120; */
3089 static basic_block
3090 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3091 basic_block cont_bb, basic_block body_bb)
3093 basic_block last_bb, bb, collapse_bb = NULL;
3094 int i;
3095 gimple_stmt_iterator gsi;
3096 edge e;
3097 tree t;
3098 gimple *stmt;
3100 last_bb = cont_bb;
3101 for (i = fd->collapse - 1; i >= 0; i--)
3103 tree vtype = TREE_TYPE (fd->loops[i].v);
3105 bb = create_empty_bb (last_bb);
3106 add_bb_to_loop (bb, last_bb->loop_father);
3107 gsi = gsi_start_bb (bb);
3109 if (i < fd->collapse - 1)
3111 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3112 e->probability = profile_probability::guessed_always () / 8;
3114 struct omp_for_data_loop *l = &fd->loops[i + 1];
3115 if (l->m1 == NULL_TREE || l->outer != 1)
3117 t = l->n1;
3118 if (l->m1)
3120 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3121 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3123 else
3125 tree t2
3126 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3127 fd->loops[i + 1 - l->outer].v, l->m1);
3128 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3131 t = force_gimple_operand_gsi (&gsi, t,
3132 DECL_P (l->v)
3133 && TREE_ADDRESSABLE (l->v),
3134 NULL_TREE, false,
3135 GSI_CONTINUE_LINKING);
3136 stmt = gimple_build_assign (l->v, t);
3137 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3140 else
3141 collapse_bb = bb;
3143 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3145 if (POINTER_TYPE_P (vtype))
3146 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3147 else
3148 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3149 t = force_gimple_operand_gsi (&gsi, t,
3150 DECL_P (fd->loops[i].v)
3151 && TREE_ADDRESSABLE (fd->loops[i].v),
3152 NULL_TREE, false, GSI_CONTINUE_LINKING);
3153 stmt = gimple_build_assign (fd->loops[i].v, t);
3154 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3156 if (fd->loops[i].non_rect_referenced)
3158 basic_block update_bb = NULL, prev_bb = NULL;
3159 for (int j = i + 1; j <= fd->last_nonrect; j++)
3160 if (j - fd->loops[j].outer == i)
3162 tree n1, n2;
3163 struct omp_for_data_loop *l = &fd->loops[j];
3164 basic_block this_bb = create_empty_bb (last_bb);
3165 add_bb_to_loop (this_bb, last_bb->loop_father);
3166 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3167 if (prev_bb)
3169 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3170 e->probability
3171 = profile_probability::guessed_always ().apply_scale (7,
3173 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3175 if (l->m1)
3177 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3178 t = fold_build_pointer_plus (fd->loops[i].v, l->n1);
3179 else
3181 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3182 fd->loops[i].v);
3183 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3184 t, l->n1);
3186 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3187 false,
3188 GSI_CONTINUE_LINKING);
3189 stmt = gimple_build_assign (l->v, n1);
3190 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3191 n1 = l->v;
3193 else
3194 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3195 NULL_TREE, false,
3196 GSI_CONTINUE_LINKING);
3197 if (l->m2)
3199 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3200 t = fold_build_pointer_plus (fd->loops[i].v, l->n2);
3201 else
3203 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3204 fd->loops[i].v);
3205 t = fold_build2 (PLUS_EXPR,
3206 TREE_TYPE (nonrect_bounds[j]),
3207 t, unshare_expr (l->n2));
3209 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3210 false,
3211 GSI_CONTINUE_LINKING);
3212 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3213 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3214 n2 = nonrect_bounds[j];
3216 else
3217 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3218 true, NULL_TREE, false,
3219 GSI_CONTINUE_LINKING);
3220 gcond *cond_stmt
3221 = gimple_build_cond (l->cond_code, n1, n2,
3222 NULL_TREE, NULL_TREE);
3223 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3224 if (update_bb == NULL)
3225 update_bb = this_bb;
3226 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3227 e->probability = profile_probability::guessed_always () / 8;
3228 if (prev_bb == NULL)
3229 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3230 prev_bb = this_bb;
3232 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3233 e->probability
3234 = profile_probability::guessed_always ().apply_scale (7, 8);
3235 body_bb = update_bb;
3238 if (i > 0)
3240 if (fd->loops[i].m2)
3241 t = nonrect_bounds[i];
3242 else
3243 t = unshare_expr (fd->loops[i].n2);
3244 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3245 false, GSI_CONTINUE_LINKING);
3246 tree v = fd->loops[i].v;
3247 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3248 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3249 false, GSI_CONTINUE_LINKING);
3250 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3251 stmt = gimple_build_cond_empty (t);
3252 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3253 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3254 expand_omp_regimplify_p, NULL, NULL)
3255 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3256 expand_omp_regimplify_p, NULL, NULL))
3257 gimple_regimplify_operands (stmt, &gsi);
3258 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3259 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3261 else
3262 make_edge (bb, body_bb, EDGE_FALLTHRU);
3263 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3264 last_bb = bb;
3267 return collapse_bb;
3270 /* Expand #pragma omp ordered depend(source). */
3272 static void
3273 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3274 tree *counts, location_t loc)
3276 enum built_in_function source_ix
3277 = fd->iter_type == long_integer_type_node
3278 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3279 gimple *g
3280 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3281 build_fold_addr_expr (counts[fd->ordered]));
3282 gimple_set_location (g, loc);
3283 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3286 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3288 static void
3289 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3290 tree *counts, tree c, location_t loc,
3291 basic_block cont_bb)
3293 auto_vec<tree, 10> args;
3294 enum built_in_function sink_ix
3295 = fd->iter_type == long_integer_type_node
3296 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3297 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3298 int i;
3299 gimple_stmt_iterator gsi2 = *gsi;
3300 bool warned_step = false;
3302 if (deps == NULL)
3304 /* Handle doacross(sink: omp_cur_iteration - 1). */
3305 gsi_prev (&gsi2);
3306 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3307 edge e2 = split_block_after_labels (e1->dest);
3308 gsi2 = gsi_after_labels (e1->dest);
3309 *gsi = gsi_last_bb (e1->src);
3310 gimple_stmt_iterator gsi3 = *gsi;
3312 if (counts[fd->collapse - 1])
3314 gcc_assert (fd->collapse == 1);
3315 t = counts[fd->collapse - 1];
3317 else if (fd->collapse > 1)
3318 t = fd->loop.v;
3319 else
3321 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3322 fd->loops[0].v, fd->loops[0].n1);
3323 t = fold_convert (fd->iter_type, t);
3326 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
3327 false, GSI_CONTINUE_LINKING);
3328 gsi_insert_after (gsi, gimple_build_cond (NE_EXPR, t,
3329 build_zero_cst (TREE_TYPE (t)),
3330 NULL_TREE, NULL_TREE),
3331 GSI_NEW_STMT);
3333 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3334 build_minus_one_cst (TREE_TYPE (t)));
3335 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3336 true, GSI_SAME_STMT);
3337 args.safe_push (t);
3338 for (i = fd->collapse; i < fd->ordered; i++)
3340 t = counts[fd->ordered + 2 + (i - fd->collapse)];
3341 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3342 build_minus_one_cst (TREE_TYPE (t)));
3343 t = fold_convert (fd->iter_type, t);
3344 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3345 true, GSI_SAME_STMT);
3346 args.safe_push (t);
3349 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix),
3350 args);
3351 gimple_set_location (g, loc);
3352 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3354 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3355 e3->probability = profile_probability::guessed_always () / 8;
3356 e1->probability = e3->probability.invert ();
3357 e1->flags = EDGE_TRUE_VALUE;
3358 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3360 if (fd->ordered > fd->collapse && cont_bb)
3362 if (counts[fd->ordered + 1] == NULL_TREE)
3363 counts[fd->ordered + 1]
3364 = create_tmp_var (boolean_type_node, ".first");
3366 edge e4;
3367 if (gsi_end_p (gsi3))
3368 e4 = split_block_after_labels (e1->src);
3369 else
3371 gsi_prev (&gsi3);
3372 e4 = split_block (gsi_bb (gsi3), gsi_stmt (gsi3));
3374 gsi3 = gsi_last_bb (e4->src);
3376 gsi_insert_after (&gsi3,
3377 gimple_build_cond (NE_EXPR,
3378 counts[fd->ordered + 1],
3379 boolean_false_node,
3380 NULL_TREE, NULL_TREE),
3381 GSI_NEW_STMT);
3383 edge e5 = make_edge (e4->src, e2->dest, EDGE_FALSE_VALUE);
3384 e4->probability = profile_probability::guessed_always () / 8;
3385 e5->probability = e4->probability.invert ();
3386 e4->flags = EDGE_TRUE_VALUE;
3387 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e4->src);
3390 *gsi = gsi_after_labels (e2->dest);
3391 return;
3393 for (i = 0; i < fd->ordered; i++)
3395 tree step = NULL_TREE;
3396 off = TREE_PURPOSE (deps);
3397 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3399 step = TREE_OPERAND (off, 1);
3400 off = TREE_OPERAND (off, 0);
3402 if (!integer_zerop (off))
3404 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3405 || fd->loops[i].cond_code == GT_EXPR);
3406 bool forward = fd->loops[i].cond_code == LT_EXPR;
3407 if (step)
3409 /* Non-simple Fortran DO loops. If step is variable,
3410 we don't know at compile even the direction, so can't
3411 warn. */
3412 if (TREE_CODE (step) != INTEGER_CST)
3413 break;
3414 forward = tree_int_cst_sgn (step) != -1;
3416 if (forward ^ OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3417 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3418 "waiting for lexically later iteration",
3419 OMP_CLAUSE_DOACROSS_DEPEND (c)
3420 ? "depend" : "doacross");
3421 break;
3423 deps = TREE_CHAIN (deps);
3425 /* If all offsets corresponding to the collapsed loops are zero,
3426 this depend clause can be ignored. FIXME: but there is still a
3427 flush needed. We need to emit one __sync_synchronize () for it
3428 though (perhaps conditionally)? Solve this together with the
3429 conservative dependence folding optimization.
3430 if (i >= fd->collapse)
3431 return; */
3433 deps = OMP_CLAUSE_DECL (c);
3434 gsi_prev (&gsi2);
3435 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3436 edge e2 = split_block_after_labels (e1->dest);
3438 gsi2 = gsi_after_labels (e1->dest);
3439 *gsi = gsi_last_bb (e1->src);
3440 for (i = 0; i < fd->ordered; i++)
3442 tree itype = TREE_TYPE (fd->loops[i].v);
3443 tree step = NULL_TREE;
3444 tree orig_off = NULL_TREE;
3445 if (POINTER_TYPE_P (itype))
3446 itype = sizetype;
3447 if (i)
3448 deps = TREE_CHAIN (deps);
3449 off = TREE_PURPOSE (deps);
3450 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3452 step = TREE_OPERAND (off, 1);
3453 off = TREE_OPERAND (off, 0);
3454 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3455 && integer_onep (fd->loops[i].step)
3456 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3458 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3459 if (step)
3461 off = fold_convert_loc (loc, itype, off);
3462 orig_off = off;
3463 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3466 if (integer_zerop (off))
3467 t = boolean_true_node;
3468 else
3470 tree a;
3471 tree co = fold_convert_loc (loc, itype, off);
3472 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3474 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3475 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3476 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3477 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3478 co);
3480 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3481 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3482 fd->loops[i].v, co);
3483 else
3484 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3485 fd->loops[i].v, co);
3486 if (step)
3488 tree t1, t2;
3489 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3490 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3491 fd->loops[i].n1);
3492 else
3493 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3494 fd->loops[i].n2);
3495 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3496 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3497 fd->loops[i].n2);
3498 else
3499 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3500 fd->loops[i].n1);
3501 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3502 step, build_int_cst (TREE_TYPE (step), 0));
3503 if (TREE_CODE (step) != INTEGER_CST)
3505 t1 = unshare_expr (t1);
3506 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3507 false, GSI_CONTINUE_LINKING);
3508 t2 = unshare_expr (t2);
3509 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3510 false, GSI_CONTINUE_LINKING);
3512 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3513 t, t2, t1);
3515 else if (fd->loops[i].cond_code == LT_EXPR)
3517 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3518 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3519 fd->loops[i].n1);
3520 else
3521 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3522 fd->loops[i].n2);
3524 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3525 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3526 fd->loops[i].n2);
3527 else
3528 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3529 fd->loops[i].n1);
3531 if (cond)
3532 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3533 else
3534 cond = t;
3536 off = fold_convert_loc (loc, itype, off);
3538 if (step
3539 || (fd->loops[i].cond_code == LT_EXPR
3540 ? !integer_onep (fd->loops[i].step)
3541 : !integer_minus_onep (fd->loops[i].step)))
3543 if (step == NULL_TREE
3544 && TYPE_UNSIGNED (itype)
3545 && fd->loops[i].cond_code == GT_EXPR)
3546 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3547 fold_build1_loc (loc, NEGATE_EXPR, itype,
3548 s));
3549 else
3550 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3551 orig_off ? orig_off : off, s);
3552 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3553 build_int_cst (itype, 0));
3554 if (integer_zerop (t) && !warned_step)
3556 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3557 "refers to iteration never in the iteration "
3558 "space",
3559 OMP_CLAUSE_DOACROSS_DEPEND (c)
3560 ? "depend" : "doacross");
3561 warned_step = true;
3563 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3564 cond, t);
3567 if (i <= fd->collapse - 1 && fd->collapse > 1)
3568 t = fd->loop.v;
3569 else if (counts[i])
3570 t = counts[i];
3571 else
3573 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3574 fd->loops[i].v, fd->loops[i].n1);
3575 t = fold_convert_loc (loc, fd->iter_type, t);
3577 if (step)
3578 /* We have divided off by step already earlier. */;
3579 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3580 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3581 fold_build1_loc (loc, NEGATE_EXPR, itype,
3582 s));
3583 else
3584 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3585 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3586 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3587 off = fold_convert_loc (loc, fd->iter_type, off);
3588 if (i <= fd->collapse - 1 && fd->collapse > 1)
3590 if (i)
3591 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3592 off);
3593 if (i < fd->collapse - 1)
3595 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3596 counts[i]);
3597 continue;
3600 off = unshare_expr (off);
3601 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3602 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3603 true, GSI_SAME_STMT);
3604 args.safe_push (t);
3606 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3607 gimple_set_location (g, loc);
3608 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3610 cond = unshare_expr (cond);
3611 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3612 GSI_CONTINUE_LINKING);
3613 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3614 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3615 e3->probability = profile_probability::guessed_always () / 8;
3616 e1->probability = e3->probability.invert ();
3617 e1->flags = EDGE_TRUE_VALUE;
3618 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3620 *gsi = gsi_after_labels (e2->dest);
3623 /* Expand all #pragma omp ordered depend(source) and
3624 #pragma omp ordered depend(sink:...) constructs in the current
3625 #pragma omp for ordered(n) region. */
3627 static void
3628 expand_omp_ordered_source_sink (struct omp_region *region,
3629 struct omp_for_data *fd, tree *counts,
3630 basic_block cont_bb)
3632 struct omp_region *inner;
3633 int i;
3634 for (i = fd->collapse - 1; i < fd->ordered; i++)
3635 if (i == fd->collapse - 1 && fd->collapse > 1)
3636 counts[i] = NULL_TREE;
3637 else if (i >= fd->collapse && !cont_bb)
3638 counts[i] = build_zero_cst (fd->iter_type);
3639 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3640 && integer_onep (fd->loops[i].step))
3641 counts[i] = NULL_TREE;
3642 else
3643 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3644 tree atype
3645 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3646 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3647 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3648 counts[fd->ordered + 1] = NULL_TREE;
3650 for (inner = region->inner; inner; inner = inner->next)
3651 if (inner->type == GIMPLE_OMP_ORDERED)
3653 gomp_ordered *ord_stmt = inner->ord_stmt;
3654 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3655 location_t loc = gimple_location (ord_stmt);
3656 tree c;
3657 for (c = gimple_omp_ordered_clauses (ord_stmt);
3658 c; c = OMP_CLAUSE_CHAIN (c))
3659 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SOURCE)
3660 break;
3661 if (c)
3662 expand_omp_ordered_source (&gsi, fd, counts, loc);
3663 for (c = gimple_omp_ordered_clauses (ord_stmt);
3664 c; c = OMP_CLAUSE_CHAIN (c))
3665 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SINK)
3666 expand_omp_ordered_sink (&gsi, fd, counts, c, loc, cont_bb);
3667 gsi_remove (&gsi, true);
3671 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3672 collapsed. */
3674 static basic_block
3675 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3676 basic_block cont_bb, basic_block body_bb,
3677 basic_block l0_bb, bool ordered_lastprivate)
3679 if (fd->ordered == fd->collapse)
3680 return cont_bb;
3682 if (!cont_bb)
3684 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3685 for (int i = fd->collapse; i < fd->ordered; i++)
3687 tree type = TREE_TYPE (fd->loops[i].v);
3688 tree n1 = fold_convert (type, fd->loops[i].n1);
3689 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3690 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3691 size_int (i - fd->collapse + 1),
3692 NULL_TREE, NULL_TREE);
3693 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3695 return NULL;
3698 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3700 tree t, type = TREE_TYPE (fd->loops[i].v);
3701 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3702 if (counts[fd->ordered + 1] && i == fd->collapse)
3703 expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3704 boolean_true_node);
3705 expand_omp_build_assign (&gsi, fd->loops[i].v,
3706 fold_convert (type, fd->loops[i].n1));
3707 if (counts[i])
3708 expand_omp_build_assign (&gsi, counts[i],
3709 build_zero_cst (fd->iter_type));
3710 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3711 size_int (i - fd->collapse + 1),
3712 NULL_TREE, NULL_TREE);
3713 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3714 if (!gsi_end_p (gsi))
3715 gsi_prev (&gsi);
3716 else
3717 gsi = gsi_last_bb (body_bb);
3718 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3719 basic_block new_body = e1->dest;
3720 if (body_bb == cont_bb)
3721 cont_bb = new_body;
3722 edge e2 = NULL;
3723 basic_block new_header;
3724 if (EDGE_COUNT (cont_bb->preds) > 0)
3726 gsi = gsi_last_bb (cont_bb);
3727 if (POINTER_TYPE_P (type))
3728 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3729 else
3730 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3731 fold_convert (type, fd->loops[i].step));
3732 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3733 if (counts[i])
3735 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3736 build_int_cst (fd->iter_type, 1));
3737 expand_omp_build_assign (&gsi, counts[i], t);
3738 t = counts[i];
3740 else
3742 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3743 fd->loops[i].v, fd->loops[i].n1);
3744 t = fold_convert (fd->iter_type, t);
3745 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3746 true, GSI_SAME_STMT);
3748 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3749 size_int (i - fd->collapse + 1),
3750 NULL_TREE, NULL_TREE);
3751 expand_omp_build_assign (&gsi, aref, t);
3752 if (counts[fd->ordered + 1] && i == fd->ordered - 1)
3753 expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3754 boolean_false_node);
3755 gsi_prev (&gsi);
3756 e2 = split_block (cont_bb, gsi_stmt (gsi));
3757 new_header = e2->dest;
3759 else
3760 new_header = cont_bb;
3761 gsi = gsi_after_labels (new_header);
3762 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3763 true, GSI_SAME_STMT);
3764 tree n2
3765 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3766 true, NULL_TREE, true, GSI_SAME_STMT);
3767 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3768 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3769 edge e3 = split_block (new_header, gsi_stmt (gsi));
3770 cont_bb = e3->dest;
3771 remove_edge (e1);
3772 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3773 e3->flags = EDGE_FALSE_VALUE;
3774 e3->probability = profile_probability::guessed_always () / 8;
3775 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3776 e1->probability = e3->probability.invert ();
3778 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3779 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3781 if (e2)
3783 class loop *loop = alloc_loop ();
3784 loop->header = new_header;
3785 loop->latch = e2->src;
3786 add_loop (loop, l0_bb->loop_father);
3790 /* If there are any lastprivate clauses and it is possible some loops
3791 might have zero iterations, ensure all the decls are initialized,
3792 otherwise we could crash evaluating C++ class iterators with lastprivate
3793 clauses. */
3794 bool need_inits = false;
3795 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3796 if (need_inits)
3798 tree type = TREE_TYPE (fd->loops[i].v);
3799 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3800 expand_omp_build_assign (&gsi, fd->loops[i].v,
3801 fold_convert (type, fd->loops[i].n1));
3803 else
3805 tree type = TREE_TYPE (fd->loops[i].v);
3806 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3807 boolean_type_node,
3808 fold_convert (type, fd->loops[i].n1),
3809 fold_convert (type, fd->loops[i].n2));
3810 if (!integer_onep (this_cond))
3811 need_inits = true;
3814 return cont_bb;
3817 /* A subroutine of expand_omp_for. Generate code for a parallel
3818 loop with any schedule. Given parameters:
3820 for (V = N1; V cond N2; V += STEP) BODY;
3822 where COND is "<" or ">", we generate pseudocode
3824 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3825 if (more) goto L0; else goto L3;
3827 V = istart0;
3828 iend = iend0;
3830 BODY;
3831 V += STEP;
3832 if (V cond iend) goto L1; else goto L2;
3834 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3837 If this is a combined omp parallel loop, instead of the call to
3838 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3839 If this is gimple_omp_for_combined_p loop, then instead of assigning
3840 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3841 inner GIMPLE_OMP_FOR and V += STEP; and
3842 if (V cond iend) goto L1; else goto L2; are removed.
3844 For collapsed loops, given parameters:
3845 collapse(3)
3846 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3847 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3848 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3849 BODY;
3851 we generate pseudocode
3853 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3854 if (cond3 is <)
3855 adj = STEP3 - 1;
3856 else
3857 adj = STEP3 + 1;
3858 count3 = (adj + N32 - N31) / STEP3;
3859 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3860 if (cond2 is <)
3861 adj = STEP2 - 1;
3862 else
3863 adj = STEP2 + 1;
3864 count2 = (adj + N22 - N21) / STEP2;
3865 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3866 if (cond1 is <)
3867 adj = STEP1 - 1;
3868 else
3869 adj = STEP1 + 1;
3870 count1 = (adj + N12 - N11) / STEP1;
3871 count = count1 * count2 * count3;
3872 goto Z1;
3874 count = 0;
3876 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3877 if (more) goto L0; else goto L3;
3879 V = istart0;
3880 T = V;
3881 V3 = N31 + (T % count3) * STEP3;
3882 T = T / count3;
3883 V2 = N21 + (T % count2) * STEP2;
3884 T = T / count2;
3885 V1 = N11 + T * STEP1;
3886 iend = iend0;
3888 BODY;
3889 V += 1;
3890 if (V < iend) goto L10; else goto L2;
3891 L10:
3892 V3 += STEP3;
3893 if (V3 cond3 N32) goto L1; else goto L11;
3894 L11:
3895 V3 = N31;
3896 V2 += STEP2;
3897 if (V2 cond2 N22) goto L1; else goto L12;
3898 L12:
3899 V2 = N21;
3900 V1 += STEP1;
3901 goto L1;
3903 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3908 static void
3909 expand_omp_for_generic (struct omp_region *region,
3910 struct omp_for_data *fd,
3911 enum built_in_function start_fn,
3912 enum built_in_function next_fn,
3913 tree sched_arg,
3914 gimple *inner_stmt)
3916 tree type, istart0, iend0, iend;
3917 tree t, vmain, vback, bias = NULL_TREE;
3918 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3919 basic_block l2_bb = NULL, l3_bb = NULL;
3920 gimple_stmt_iterator gsi;
3921 gassign *assign_stmt;
3922 bool in_combined_parallel = is_combined_parallel (region);
3923 bool broken_loop = region->cont == NULL;
3924 edge e, ne;
3925 tree *counts = NULL;
3926 int i;
3927 bool ordered_lastprivate = false;
3929 gcc_assert (!broken_loop || !in_combined_parallel);
3930 gcc_assert (fd->iter_type == long_integer_type_node
3931 || !in_combined_parallel);
3933 entry_bb = region->entry;
3934 cont_bb = region->cont;
3935 collapse_bb = NULL;
3936 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3937 gcc_assert (broken_loop
3938 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3939 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3940 l1_bb = single_succ (l0_bb);
3941 if (!broken_loop)
3943 l2_bb = create_empty_bb (cont_bb);
3944 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3945 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3946 == l1_bb));
3947 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3949 else
3950 l2_bb = NULL;
3951 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3952 exit_bb = region->exit;
3954 gsi = gsi_last_nondebug_bb (entry_bb);
3956 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3957 if (fd->ordered
3958 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3959 OMP_CLAUSE_LASTPRIVATE))
3960 ordered_lastprivate = false;
3961 tree reductions = NULL_TREE;
3962 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3963 tree memv = NULL_TREE;
3964 if (fd->lastprivate_conditional)
3966 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3967 OMP_CLAUSE__CONDTEMP_);
3968 if (fd->have_pointer_condtemp)
3969 condtemp = OMP_CLAUSE_DECL (c);
3970 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3971 cond_var = OMP_CLAUSE_DECL (c);
3973 if (sched_arg)
3975 if (fd->have_reductemp)
3977 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3978 OMP_CLAUSE__REDUCTEMP_);
3979 reductions = OMP_CLAUSE_DECL (c);
3980 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3981 gimple *g = SSA_NAME_DEF_STMT (reductions);
3982 reductions = gimple_assign_rhs1 (g);
3983 OMP_CLAUSE_DECL (c) = reductions;
3984 entry_bb = gimple_bb (g);
3985 edge e = split_block (entry_bb, g);
3986 if (region->entry == entry_bb)
3987 region->entry = e->dest;
3988 gsi = gsi_last_bb (entry_bb);
3990 else
3991 reductions = null_pointer_node;
3992 if (fd->have_pointer_condtemp)
3994 tree type = TREE_TYPE (condtemp);
3995 memv = create_tmp_var (type);
3996 TREE_ADDRESSABLE (memv) = 1;
3997 unsigned HOST_WIDE_INT sz
3998 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3999 sz *= fd->lastprivate_conditional;
4000 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
4001 false);
4002 mem = build_fold_addr_expr (memv);
4004 else
4005 mem = null_pointer_node;
4007 if (fd->collapse > 1 || fd->ordered)
4009 int first_zero_iter1 = -1, first_zero_iter2 = -1;
4010 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
4012 counts = XALLOCAVEC (tree, fd->ordered
4013 ? fd->ordered + 2
4014 + (fd->ordered - fd->collapse)
4015 : fd->collapse);
4016 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4017 zero_iter1_bb, first_zero_iter1,
4018 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
4020 if (zero_iter1_bb)
4022 /* Some counts[i] vars might be uninitialized if
4023 some loop has zero iterations. But the body shouldn't
4024 be executed in that case, so just avoid uninit warnings. */
4025 for (i = first_zero_iter1;
4026 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
4027 if (SSA_VAR_P (counts[i]))
4028 suppress_warning (counts[i], OPT_Wuninitialized);
4029 gsi_prev (&gsi);
4030 e = split_block (entry_bb, gsi_stmt (gsi));
4031 entry_bb = e->dest;
4032 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
4033 gsi = gsi_last_nondebug_bb (entry_bb);
4034 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4035 get_immediate_dominator (CDI_DOMINATORS,
4036 zero_iter1_bb));
4038 if (zero_iter2_bb)
4040 /* Some counts[i] vars might be uninitialized if
4041 some loop has zero iterations. But the body shouldn't
4042 be executed in that case, so just avoid uninit warnings. */
4043 for (i = first_zero_iter2; i < fd->ordered; i++)
4044 if (SSA_VAR_P (counts[i]))
4045 suppress_warning (counts[i], OPT_Wuninitialized);
4046 if (zero_iter1_bb)
4047 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4048 else
4050 gsi_prev (&gsi);
4051 e = split_block (entry_bb, gsi_stmt (gsi));
4052 entry_bb = e->dest;
4053 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4054 gsi = gsi_last_nondebug_bb (entry_bb);
4055 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4056 get_immediate_dominator
4057 (CDI_DOMINATORS, zero_iter2_bb));
4060 if (fd->collapse == 1)
4062 counts[0] = fd->loop.n2;
4063 fd->loop = fd->loops[0];
4067 type = TREE_TYPE (fd->loop.v);
4068 istart0 = create_tmp_var (fd->iter_type, ".istart0");
4069 iend0 = create_tmp_var (fd->iter_type, ".iend0");
4070 TREE_ADDRESSABLE (istart0) = 1;
4071 TREE_ADDRESSABLE (iend0) = 1;
4073 /* See if we need to bias by LLONG_MIN. */
4074 if (fd->iter_type == long_long_unsigned_type_node
4075 && TREE_CODE (type) == INTEGER_TYPE
4076 && !TYPE_UNSIGNED (type)
4077 && fd->ordered == 0)
4079 tree n1, n2;
4081 if (fd->loop.cond_code == LT_EXPR)
4083 n1 = fd->loop.n1;
4084 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4086 else
4088 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4089 n2 = fd->loop.n1;
4091 if (TREE_CODE (n1) != INTEGER_CST
4092 || TREE_CODE (n2) != INTEGER_CST
4093 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4094 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4097 gimple_stmt_iterator gsif = gsi;
4098 gsi_prev (&gsif);
4100 tree arr = NULL_TREE;
4101 if (in_combined_parallel)
4103 gcc_assert (fd->ordered == 0);
4104 /* In a combined parallel loop, emit a call to
4105 GOMP_loop_foo_next. */
4106 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4107 build_fold_addr_expr (istart0),
4108 build_fold_addr_expr (iend0));
4110 else
4112 tree t0, t1, t2, t3, t4;
4113 /* If this is not a combined parallel loop, emit a call to
4114 GOMP_loop_foo_start in ENTRY_BB. */
4115 t4 = build_fold_addr_expr (iend0);
4116 t3 = build_fold_addr_expr (istart0);
4117 if (fd->ordered)
4119 t0 = build_int_cst (unsigned_type_node,
4120 fd->ordered - fd->collapse + 1);
4121 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4122 fd->ordered
4123 - fd->collapse + 1),
4124 ".omp_counts");
4125 DECL_NAMELESS (arr) = 1;
4126 TREE_ADDRESSABLE (arr) = 1;
4127 TREE_STATIC (arr) = 1;
4128 vec<constructor_elt, va_gc> *v;
4129 vec_alloc (v, fd->ordered - fd->collapse + 1);
4130 int idx;
4132 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4134 tree c;
4135 if (idx == 0 && fd->collapse > 1)
4136 c = fd->loop.n2;
4137 else
4138 c = counts[idx + fd->collapse - 1];
4139 tree purpose = size_int (idx);
4140 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4141 if (TREE_CODE (c) != INTEGER_CST)
4142 TREE_STATIC (arr) = 0;
4145 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4146 if (!TREE_STATIC (arr))
4147 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4148 void_type_node, arr),
4149 true, NULL_TREE, true, GSI_SAME_STMT);
4150 t1 = build_fold_addr_expr (arr);
4151 t2 = NULL_TREE;
4153 else
4155 t2 = fold_convert (fd->iter_type, fd->loop.step);
4156 t1 = fd->loop.n2;
4157 t0 = fd->loop.n1;
4158 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4160 tree innerc
4161 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4162 OMP_CLAUSE__LOOPTEMP_);
4163 gcc_assert (innerc);
4164 t0 = OMP_CLAUSE_DECL (innerc);
4165 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4166 OMP_CLAUSE__LOOPTEMP_);
4167 gcc_assert (innerc);
4168 t1 = OMP_CLAUSE_DECL (innerc);
4170 if (POINTER_TYPE_P (TREE_TYPE (t0))
4171 && TYPE_PRECISION (TREE_TYPE (t0))
4172 != TYPE_PRECISION (fd->iter_type))
4174 /* Avoid casting pointers to integer of a different size. */
4175 tree itype = signed_type_for (type);
4176 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4177 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4179 else
4181 t1 = fold_convert (fd->iter_type, t1);
4182 t0 = fold_convert (fd->iter_type, t0);
4184 if (bias)
4186 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4187 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4190 if (fd->iter_type == long_integer_type_node || fd->ordered)
4192 if (fd->chunk_size)
4194 t = fold_convert (fd->iter_type, fd->chunk_size);
4195 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4196 if (sched_arg)
4198 if (fd->ordered)
4199 t = build_call_expr (builtin_decl_explicit (start_fn),
4200 8, t0, t1, sched_arg, t, t3, t4,
4201 reductions, mem);
4202 else
4203 t = build_call_expr (builtin_decl_explicit (start_fn),
4204 9, t0, t1, t2, sched_arg, t, t3, t4,
4205 reductions, mem);
4207 else if (fd->ordered)
4208 t = build_call_expr (builtin_decl_explicit (start_fn),
4209 5, t0, t1, t, t3, t4);
4210 else
4211 t = build_call_expr (builtin_decl_explicit (start_fn),
4212 6, t0, t1, t2, t, t3, t4);
4214 else if (fd->ordered)
4215 t = build_call_expr (builtin_decl_explicit (start_fn),
4216 4, t0, t1, t3, t4);
4217 else
4218 t = build_call_expr (builtin_decl_explicit (start_fn),
4219 5, t0, t1, t2, t3, t4);
4221 else
4223 tree t5;
4224 tree c_bool_type;
4225 tree bfn_decl;
4227 /* The GOMP_loop_ull_*start functions have additional boolean
4228 argument, true for < loops and false for > loops.
4229 In Fortran, the C bool type can be different from
4230 boolean_type_node. */
4231 bfn_decl = builtin_decl_explicit (start_fn);
4232 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4233 t5 = build_int_cst (c_bool_type,
4234 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4235 if (fd->chunk_size)
4237 tree bfn_decl = builtin_decl_explicit (start_fn);
4238 t = fold_convert (fd->iter_type, fd->chunk_size);
4239 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4240 if (sched_arg)
4241 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4242 t, t3, t4, reductions, mem);
4243 else
4244 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4246 else
4247 t = build_call_expr (builtin_decl_explicit (start_fn),
4248 6, t5, t0, t1, t2, t3, t4);
4251 if (TREE_TYPE (t) != boolean_type_node)
4252 t = fold_build2 (NE_EXPR, boolean_type_node,
4253 t, build_int_cst (TREE_TYPE (t), 0));
4254 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4255 true, GSI_SAME_STMT);
4256 if (arr && !TREE_STATIC (arr))
4258 tree clobber = build_clobber (TREE_TYPE (arr));
4259 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4260 GSI_SAME_STMT);
4262 if (fd->have_pointer_condtemp)
4263 expand_omp_build_assign (&gsi, condtemp, memv, false);
4264 if (fd->have_reductemp)
4266 gimple *g = gsi_stmt (gsi);
4267 gsi_remove (&gsi, true);
4268 release_ssa_name (gimple_assign_lhs (g));
4270 entry_bb = region->entry;
4271 gsi = gsi_last_nondebug_bb (entry_bb);
4273 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4275 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4277 /* Remove the GIMPLE_OMP_FOR statement. */
4278 gsi_remove (&gsi, true);
4280 if (gsi_end_p (gsif))
4281 gsif = gsi_after_labels (gsi_bb (gsif));
4282 gsi_next (&gsif);
4284 /* Iteration setup for sequential loop goes in L0_BB. */
4285 tree startvar = fd->loop.v;
4286 tree endvar = NULL_TREE;
4288 if (gimple_omp_for_combined_p (fd->for_stmt))
4290 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4291 && gimple_omp_for_kind (inner_stmt)
4292 == GF_OMP_FOR_KIND_SIMD);
4293 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4294 OMP_CLAUSE__LOOPTEMP_);
4295 gcc_assert (innerc);
4296 startvar = OMP_CLAUSE_DECL (innerc);
4297 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4298 OMP_CLAUSE__LOOPTEMP_);
4299 gcc_assert (innerc);
4300 endvar = OMP_CLAUSE_DECL (innerc);
4303 gsi = gsi_start_bb (l0_bb);
4304 t = istart0;
4305 if (fd->ordered && fd->collapse == 1)
4306 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4307 fold_convert (fd->iter_type, fd->loop.step));
4308 else if (bias)
4309 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4310 if (fd->ordered && fd->collapse == 1)
4312 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4313 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4314 fd->loop.n1, fold_convert (sizetype, t));
4315 else
4317 t = fold_convert (TREE_TYPE (startvar), t);
4318 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4319 fd->loop.n1, t);
4322 else
4324 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4325 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4326 t = fold_convert (TREE_TYPE (startvar), t);
4328 t = force_gimple_operand_gsi (&gsi, t,
4329 DECL_P (startvar)
4330 && TREE_ADDRESSABLE (startvar),
4331 NULL_TREE, false, GSI_CONTINUE_LINKING);
4332 assign_stmt = gimple_build_assign (startvar, t);
4333 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4334 if (cond_var)
4336 tree itype = TREE_TYPE (cond_var);
4337 /* For lastprivate(conditional:) itervar, we need some iteration
4338 counter that starts at unsigned non-zero and increases.
4339 Prefer as few IVs as possible, so if we can use startvar
4340 itself, use that, or startvar + constant (those would be
4341 incremented with step), and as last resort use the s0 + 1
4342 incremented by 1. */
4343 if ((fd->ordered && fd->collapse == 1)
4344 || bias
4345 || POINTER_TYPE_P (type)
4346 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4347 || fd->loop.cond_code != LT_EXPR)
4348 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4349 build_int_cst (itype, 1));
4350 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4351 t = fold_convert (itype, t);
4352 else
4354 tree c = fold_convert (itype, fd->loop.n1);
4355 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4356 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4358 t = force_gimple_operand_gsi (&gsi, t, false,
4359 NULL_TREE, false, GSI_CONTINUE_LINKING);
4360 assign_stmt = gimple_build_assign (cond_var, t);
4361 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4364 t = iend0;
4365 if (fd->ordered && fd->collapse == 1)
4366 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4367 fold_convert (fd->iter_type, fd->loop.step));
4368 else if (bias)
4369 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4370 if (fd->ordered && fd->collapse == 1)
4372 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4373 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4374 fd->loop.n1, fold_convert (sizetype, t));
4375 else
4377 t = fold_convert (TREE_TYPE (startvar), t);
4378 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4379 fd->loop.n1, t);
4382 else
4384 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4385 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4386 t = fold_convert (TREE_TYPE (startvar), t);
4388 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4389 false, GSI_CONTINUE_LINKING);
4390 if (endvar)
4392 assign_stmt = gimple_build_assign (endvar, iend);
4393 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4394 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4395 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4396 else
4397 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4398 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4400 /* Handle linear clause adjustments. */
4401 tree itercnt = NULL_TREE;
4402 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4403 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4404 c; c = OMP_CLAUSE_CHAIN (c))
4405 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4406 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4408 tree d = OMP_CLAUSE_DECL (c);
4409 tree t = d, a, dest;
4410 if (omp_privatize_by_reference (t))
4411 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4412 tree type = TREE_TYPE (t);
4413 if (POINTER_TYPE_P (type))
4414 type = sizetype;
4415 dest = unshare_expr (t);
4416 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4417 expand_omp_build_assign (&gsif, v, t);
4418 if (itercnt == NULL_TREE)
4420 itercnt = startvar;
4421 tree n1 = fd->loop.n1;
4422 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4424 itercnt
4425 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4426 itercnt);
4427 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4429 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4430 itercnt, n1);
4431 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4432 itercnt, fd->loop.step);
4433 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4434 NULL_TREE, false,
4435 GSI_CONTINUE_LINKING);
4437 a = fold_build2 (MULT_EXPR, type,
4438 fold_convert (type, itercnt),
4439 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4440 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4441 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4442 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4443 false, GSI_CONTINUE_LINKING);
4444 expand_omp_build_assign (&gsi, dest, t, true);
4446 if (fd->collapse > 1)
4447 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4449 if (fd->ordered)
4451 /* Until now, counts array contained number of iterations or
4452 variable containing it for ith loop. From now on, we usually need
4453 those counts only for collapsed loops, and only for the 2nd
4454 till the last collapsed one. Move those one element earlier,
4455 we'll use counts[fd->collapse - 1] for the first source/sink
4456 iteration counter and so on and counts[fd->ordered]
4457 as the array holding the current counter values for
4458 depend(source). For doacross(sink:omp_cur_iteration - 1) we need
4459 the counts from fd->collapse to fd->ordered - 1; make a copy of
4460 those to counts[fd->ordered + 2] and onwards.
4461 counts[fd->ordered + 1] can be a flag whether it is the first
4462 iteration with a new collapsed counter (used only if
4463 fd->ordered > fd->collapse). */
4464 if (fd->ordered > fd->collapse)
4465 memcpy (counts + fd->ordered + 2, counts + fd->collapse,
4466 (fd->ordered - fd->collapse) * sizeof (counts[0]));
4467 if (fd->collapse > 1)
4468 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4469 if (broken_loop)
4471 int i;
4472 for (i = fd->collapse; i < fd->ordered; i++)
4474 tree type = TREE_TYPE (fd->loops[i].v);
4475 tree this_cond
4476 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4477 fold_convert (type, fd->loops[i].n1),
4478 fold_convert (type, fd->loops[i].n2));
4479 if (!integer_onep (this_cond))
4480 break;
4482 if (i < fd->ordered)
4484 if (entry_bb->loop_father != l0_bb->loop_father)
4486 remove_bb_from_loops (l0_bb);
4487 add_bb_to_loop (l0_bb, entry_bb->loop_father);
4488 gcc_assert (single_succ (l0_bb) == l1_bb);
4490 cont_bb
4491 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4492 add_bb_to_loop (cont_bb, l0_bb->loop_father);
4493 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4494 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4495 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4496 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4497 make_edge (cont_bb, l1_bb, 0);
4498 l2_bb = create_empty_bb (cont_bb);
4499 broken_loop = false;
4502 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4503 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4504 l0_bb, ordered_lastprivate);
4505 if (counts[fd->collapse - 1])
4507 gcc_assert (fd->collapse == 1);
4508 gsi = gsi_last_bb (l0_bb);
4509 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4510 istart0, true);
4511 if (cont_bb)
4513 gsi = gsi_last_bb (cont_bb);
4514 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4515 counts[fd->collapse - 1],
4516 build_int_cst (fd->iter_type, 1));
4517 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4518 tree aref = build4 (ARRAY_REF, fd->iter_type,
4519 counts[fd->ordered], size_zero_node,
4520 NULL_TREE, NULL_TREE);
4521 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4523 t = counts[fd->collapse - 1];
4525 else if (fd->collapse > 1)
4526 t = fd->loop.v;
4527 else
4529 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4530 fd->loops[0].v, fd->loops[0].n1);
4531 t = fold_convert (fd->iter_type, t);
4533 gsi = gsi_last_bb (l0_bb);
4534 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4535 size_zero_node, NULL_TREE, NULL_TREE);
4536 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4537 false, GSI_CONTINUE_LINKING);
4538 expand_omp_build_assign (&gsi, aref, t, true);
4541 if (!broken_loop)
4543 /* Code to control the increment and predicate for the sequential
4544 loop goes in the CONT_BB. */
4545 gsi = gsi_last_nondebug_bb (cont_bb);
4546 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4547 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4548 vmain = gimple_omp_continue_control_use (cont_stmt);
4549 vback = gimple_omp_continue_control_def (cont_stmt);
4551 if (cond_var)
4553 tree itype = TREE_TYPE (cond_var);
4554 tree t2;
4555 if ((fd->ordered && fd->collapse == 1)
4556 || bias
4557 || POINTER_TYPE_P (type)
4558 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4559 || fd->loop.cond_code != LT_EXPR)
4560 t2 = build_int_cst (itype, 1);
4561 else
4562 t2 = fold_convert (itype, fd->loop.step);
4563 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4564 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4565 NULL_TREE, true, GSI_SAME_STMT);
4566 assign_stmt = gimple_build_assign (cond_var, t2);
4567 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4570 if (!gimple_omp_for_combined_p (fd->for_stmt))
4572 if (POINTER_TYPE_P (type))
4573 t = fold_build_pointer_plus (vmain, fd->loop.step);
4574 else
4575 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4576 t = force_gimple_operand_gsi (&gsi, t,
4577 DECL_P (vback)
4578 && TREE_ADDRESSABLE (vback),
4579 NULL_TREE, true, GSI_SAME_STMT);
4580 assign_stmt = gimple_build_assign (vback, t);
4581 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4583 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4585 tree tem;
4586 if (fd->collapse > 1)
4587 tem = fd->loop.v;
4588 else
4590 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4591 fd->loops[0].v, fd->loops[0].n1);
4592 tem = fold_convert (fd->iter_type, tem);
4594 tree aref = build4 (ARRAY_REF, fd->iter_type,
4595 counts[fd->ordered], size_zero_node,
4596 NULL_TREE, NULL_TREE);
4597 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4598 true, GSI_SAME_STMT);
4599 expand_omp_build_assign (&gsi, aref, tem);
4602 t = build2 (fd->loop.cond_code, boolean_type_node,
4603 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4604 iend);
4605 gcond *cond_stmt = gimple_build_cond_empty (t);
4606 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4609 /* Remove GIMPLE_OMP_CONTINUE. */
4610 gsi_remove (&gsi, true);
4612 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4613 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4615 /* Emit code to get the next parallel iteration in L2_BB. */
4616 gsi = gsi_start_bb (l2_bb);
4618 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4619 build_fold_addr_expr (istart0),
4620 build_fold_addr_expr (iend0));
4621 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4622 false, GSI_CONTINUE_LINKING);
4623 if (TREE_TYPE (t) != boolean_type_node)
4624 t = fold_build2 (NE_EXPR, boolean_type_node,
4625 t, build_int_cst (TREE_TYPE (t), 0));
4626 gcond *cond_stmt = gimple_build_cond_empty (t);
4627 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4630 /* Add the loop cleanup function. */
4631 gsi = gsi_last_nondebug_bb (exit_bb);
4632 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4633 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4634 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4635 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4636 else
4637 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4638 gcall *call_stmt = gimple_build_call (t, 0);
4639 if (fd->ordered)
4641 tree arr = counts[fd->ordered];
4642 tree clobber = build_clobber (TREE_TYPE (arr));
4643 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4644 GSI_SAME_STMT);
4646 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4648 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4649 if (fd->have_reductemp)
4651 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4652 gimple_call_lhs (call_stmt));
4653 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4656 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4657 gsi_remove (&gsi, true);
4659 /* Connect the new blocks. */
4660 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4661 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4663 if (!broken_loop)
4665 gimple_seq phis;
4667 e = find_edge (cont_bb, l3_bb);
4668 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4670 phis = phi_nodes (l3_bb);
4671 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4673 gimple *phi = gsi_stmt (gsi);
4674 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4675 PHI_ARG_DEF_FROM_EDGE (phi, e));
4677 remove_edge (e);
4679 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4680 e = find_edge (cont_bb, l1_bb);
4681 if (e == NULL)
4683 e = BRANCH_EDGE (cont_bb);
4684 gcc_assert (single_succ (e->dest) == l1_bb);
4686 if (gimple_omp_for_combined_p (fd->for_stmt))
4688 remove_edge (e);
4689 e = NULL;
4691 else if (fd->collapse > 1)
4693 remove_edge (e);
4694 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4696 else
4697 e->flags = EDGE_TRUE_VALUE;
4698 if (e)
4700 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4701 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4703 else
4705 e = find_edge (cont_bb, l2_bb);
4706 e->flags = EDGE_FALLTHRU;
4708 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4710 if (gimple_in_ssa_p (cfun))
4712 /* Add phis to the outer loop that connect to the phis in the inner,
4713 original loop, and move the loop entry value of the inner phi to
4714 the loop entry value of the outer phi. */
4715 gphi_iterator psi;
4716 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4718 location_t locus;
4719 gphi *nphi;
4720 gphi *exit_phi = psi.phi ();
4722 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4723 continue;
4725 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4726 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4728 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4729 edge latch_to_l1 = find_edge (latch, l1_bb);
4730 gphi *inner_phi
4731 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4733 tree t = gimple_phi_result (exit_phi);
4734 tree new_res = copy_ssa_name (t, NULL);
4735 nphi = create_phi_node (new_res, l0_bb);
4737 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4738 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4739 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4740 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4741 add_phi_arg (nphi, t, entry_to_l0, locus);
4743 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4744 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4746 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4750 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4751 recompute_dominator (CDI_DOMINATORS, l2_bb));
4752 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4753 recompute_dominator (CDI_DOMINATORS, l3_bb));
4754 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4755 recompute_dominator (CDI_DOMINATORS, l0_bb));
4756 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4757 recompute_dominator (CDI_DOMINATORS, l1_bb));
4759 /* We enter expand_omp_for_generic with a loop. This original loop may
4760 have its own loop struct, or it may be part of an outer loop struct
4761 (which may be the fake loop). */
4762 class loop *outer_loop = entry_bb->loop_father;
4763 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4765 add_bb_to_loop (l2_bb, outer_loop);
4767 /* We've added a new loop around the original loop. Allocate the
4768 corresponding loop struct. */
4769 class loop *new_loop = alloc_loop ();
4770 new_loop->header = l0_bb;
4771 new_loop->latch = l2_bb;
4772 add_loop (new_loop, outer_loop);
4774 /* Allocate a loop structure for the original loop unless we already
4775 had one. */
4776 if (!orig_loop_has_loop_struct
4777 && !gimple_omp_for_combined_p (fd->for_stmt))
4779 class loop *orig_loop = alloc_loop ();
4780 orig_loop->header = l1_bb;
4781 /* The loop may have multiple latches. */
4782 add_loop (orig_loop, new_loop);
4787 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4788 compute needed allocation size. If !ALLOC of team allocations,
4789 if ALLOC of thread allocation. SZ is the initial needed size for
4790 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4791 CNT number of elements of each array, for !ALLOC this is
4792 omp_get_num_threads (), for ALLOC number of iterations handled by the
4793 current thread. If PTR is non-NULL, it is the start of the allocation
4794 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4795 clauses pointers to the corresponding arrays. */
4797 static tree
4798 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4799 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4800 gimple_stmt_iterator *gsi, bool alloc)
4802 tree eltsz = NULL_TREE;
4803 unsigned HOST_WIDE_INT preval = 0;
4804 if (ptr && sz)
4805 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4806 ptr, size_int (sz));
4807 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4808 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4809 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4810 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4812 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4813 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4814 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4816 unsigned HOST_WIDE_INT szl
4817 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4818 szl = least_bit_hwi (szl);
4819 if (szl)
4820 al = MIN (al, szl);
4822 if (ptr == NULL_TREE)
4824 if (eltsz == NULL_TREE)
4825 eltsz = TYPE_SIZE_UNIT (pointee_type);
4826 else
4827 eltsz = size_binop (PLUS_EXPR, eltsz,
4828 TYPE_SIZE_UNIT (pointee_type));
4830 if (preval == 0 && al <= alloc_align)
4832 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4833 sz += diff;
4834 if (diff && ptr)
4835 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4836 ptr, size_int (diff));
4838 else if (al > preval)
4840 if (ptr)
4842 ptr = fold_convert (pointer_sized_int_node, ptr);
4843 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4844 build_int_cst (pointer_sized_int_node,
4845 al - 1));
4846 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4847 build_int_cst (pointer_sized_int_node,
4848 -(HOST_WIDE_INT) al));
4849 ptr = fold_convert (ptr_type_node, ptr);
4851 else
4852 sz += al - 1;
4854 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4855 preval = al;
4856 else
4857 preval = 1;
4858 if (ptr)
4860 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4861 ptr = OMP_CLAUSE_DECL (c);
4862 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4863 size_binop (MULT_EXPR, cnt,
4864 TYPE_SIZE_UNIT (pointee_type)));
4868 if (ptr == NULL_TREE)
4870 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4871 if (sz)
4872 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4873 return eltsz;
4875 else
4876 return ptr;
4879 /* Return the last _looptemp_ clause if one has been created for
4880 lastprivate on distribute parallel for{, simd} or taskloop.
4881 FD is the loop data and INNERC should be the second _looptemp_
4882 clause (the one holding the end of the range).
4883 This is followed by collapse - 1 _looptemp_ clauses for the
4884 counts[1] and up, and for triangular loops followed by 4
4885 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4886 one factor and one adjn1). After this there is optionally one
4887 _looptemp_ clause that this function returns. */
4889 static tree
4890 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4892 gcc_assert (innerc);
4893 int count = fd->collapse - 1;
4894 if (fd->non_rect
4895 && fd->last_nonrect == fd->first_nonrect + 1
4896 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4897 count += 4;
4898 for (int i = 0; i < count; i++)
4900 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4901 OMP_CLAUSE__LOOPTEMP_);
4902 gcc_assert (innerc);
4904 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4905 OMP_CLAUSE__LOOPTEMP_);
4908 /* A subroutine of expand_omp_for. Generate code for a parallel
4909 loop with static schedule and no specified chunk size. Given
4910 parameters:
4912 for (V = N1; V cond N2; V += STEP) BODY;
4914 where COND is "<" or ">", we generate pseudocode
4916 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4917 if (cond is <)
4918 adj = STEP - 1;
4919 else
4920 adj = STEP + 1;
4921 if ((__typeof (V)) -1 > 0 && cond is >)
4922 n = -(adj + N2 - N1) / -STEP;
4923 else
4924 n = (adj + N2 - N1) / STEP;
4925 q = n / nthreads;
4926 tt = n % nthreads;
4927 if (threadid < tt) goto L3; else goto L4;
4929 tt = 0;
4930 q = q + 1;
4932 s0 = q * threadid + tt;
4933 e0 = s0 + q;
4934 V = s0 * STEP + N1;
4935 if (s0 >= e0) goto L2; else goto L0;
4937 e = e0 * STEP + N1;
4939 BODY;
4940 V += STEP;
4941 if (V cond e) goto L1;
4945 static void
4946 expand_omp_for_static_nochunk (struct omp_region *region,
4947 struct omp_for_data *fd,
4948 gimple *inner_stmt)
4950 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4951 tree type, itype, vmain, vback;
4952 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4953 basic_block body_bb, cont_bb, collapse_bb = NULL;
4954 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4955 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4956 gimple_stmt_iterator gsi, gsip;
4957 edge ep;
4958 bool broken_loop = region->cont == NULL;
4959 tree *counts = NULL;
4960 tree n1, n2, step;
4961 tree reductions = NULL_TREE;
4962 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4964 itype = type = TREE_TYPE (fd->loop.v);
4965 if (POINTER_TYPE_P (type))
4966 itype = signed_type_for (type);
4968 entry_bb = region->entry;
4969 cont_bb = region->cont;
4970 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4971 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4972 gcc_assert (broken_loop
4973 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4974 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4975 body_bb = single_succ (seq_start_bb);
4976 if (!broken_loop)
4978 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4979 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4980 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4982 exit_bb = region->exit;
4984 /* Iteration space partitioning goes in ENTRY_BB. */
4985 gsi = gsi_last_nondebug_bb (entry_bb);
4986 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4987 gsip = gsi;
4988 gsi_prev (&gsip);
4990 if (fd->collapse > 1)
4992 int first_zero_iter = -1, dummy = -1;
4993 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4995 counts = XALLOCAVEC (tree, fd->collapse);
4996 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4997 fin_bb, first_zero_iter,
4998 dummy_bb, dummy, l2_dom_bb);
4999 t = NULL_TREE;
5001 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5002 t = integer_one_node;
5003 else
5004 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5005 fold_convert (type, fd->loop.n1),
5006 fold_convert (type, fd->loop.n2));
5007 if (fd->collapse == 1
5008 && TYPE_UNSIGNED (type)
5009 && (t == NULL_TREE || !integer_onep (t)))
5011 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5012 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5013 true, GSI_SAME_STMT);
5014 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5015 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5016 true, GSI_SAME_STMT);
5017 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5018 n1, n2);
5019 ep = split_block (entry_bb, cond_stmt);
5020 ep->flags = EDGE_TRUE_VALUE;
5021 entry_bb = ep->dest;
5022 ep->probability = profile_probability::very_likely ();
5023 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
5024 ep->probability = profile_probability::very_unlikely ();
5025 if (gimple_in_ssa_p (cfun))
5027 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
5028 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5029 !gsi_end_p (gpi); gsi_next (&gpi))
5031 gphi *phi = gpi.phi ();
5032 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5033 ep, UNKNOWN_LOCATION);
5036 gsi = gsi_last_bb (entry_bb);
5039 if (fd->lastprivate_conditional)
5041 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5042 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5043 if (fd->have_pointer_condtemp)
5044 condtemp = OMP_CLAUSE_DECL (c);
5045 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5046 cond_var = OMP_CLAUSE_DECL (c);
5048 if (fd->have_reductemp
5049 /* For scan, we don't want to reinitialize condtemp before the
5050 second loop. */
5051 || (fd->have_pointer_condtemp && !fd->have_scantemp)
5052 || fd->have_nonctrl_scantemp)
5054 tree t1 = build_int_cst (long_integer_type_node, 0);
5055 tree t2 = build_int_cst (long_integer_type_node, 1);
5056 tree t3 = build_int_cstu (long_integer_type_node,
5057 (HOST_WIDE_INT_1U << 31) + 1);
5058 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5059 gimple_stmt_iterator gsi2 = gsi_none ();
5060 gimple *g = NULL;
5061 tree mem = null_pointer_node, memv = NULL_TREE;
5062 unsigned HOST_WIDE_INT condtemp_sz = 0;
5063 unsigned HOST_WIDE_INT alloc_align = 0;
5064 if (fd->have_reductemp)
5066 gcc_assert (!fd->have_nonctrl_scantemp);
5067 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5068 reductions = OMP_CLAUSE_DECL (c);
5069 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5070 g = SSA_NAME_DEF_STMT (reductions);
5071 reductions = gimple_assign_rhs1 (g);
5072 OMP_CLAUSE_DECL (c) = reductions;
5073 gsi2 = gsi_for_stmt (g);
5075 else
5077 if (gsi_end_p (gsip))
5078 gsi2 = gsi_after_labels (region->entry);
5079 else
5080 gsi2 = gsip;
5081 reductions = null_pointer_node;
5083 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
5085 tree type;
5086 if (fd->have_pointer_condtemp)
5087 type = TREE_TYPE (condtemp);
5088 else
5089 type = ptr_type_node;
5090 memv = create_tmp_var (type);
5091 TREE_ADDRESSABLE (memv) = 1;
5092 unsigned HOST_WIDE_INT sz = 0;
5093 tree size = NULL_TREE;
5094 if (fd->have_pointer_condtemp)
5096 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5097 sz *= fd->lastprivate_conditional;
5098 condtemp_sz = sz;
5100 if (fd->have_nonctrl_scantemp)
5102 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5103 gimple *g = gimple_build_call (nthreads, 0);
5104 nthreads = create_tmp_var (integer_type_node);
5105 gimple_call_set_lhs (g, nthreads);
5106 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5107 nthreads = fold_convert (sizetype, nthreads);
5108 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5109 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5110 alloc_align, nthreads, NULL,
5111 false);
5112 size = fold_convert (type, size);
5114 else
5115 size = build_int_cst (type, sz);
5116 expand_omp_build_assign (&gsi2, memv, size, false);
5117 mem = build_fold_addr_expr (memv);
5119 tree t
5120 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5121 9, t1, t2, t2, t3, t1, null_pointer_node,
5122 null_pointer_node, reductions, mem);
5123 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5124 true, GSI_SAME_STMT);
5125 if (fd->have_pointer_condtemp)
5126 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5127 if (fd->have_nonctrl_scantemp)
5129 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5130 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5131 alloc_align, nthreads, &gsi2, false);
5133 if (fd->have_reductemp)
5135 gsi_remove (&gsi2, true);
5136 release_ssa_name (gimple_assign_lhs (g));
5139 switch (gimple_omp_for_kind (fd->for_stmt))
5141 case GF_OMP_FOR_KIND_FOR:
5142 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5143 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5144 break;
5145 case GF_OMP_FOR_KIND_DISTRIBUTE:
5146 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5147 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5148 break;
5149 default:
5150 gcc_unreachable ();
5152 nthreads = build_call_expr (nthreads, 0);
5153 nthreads = fold_convert (itype, nthreads);
5154 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5155 true, GSI_SAME_STMT);
5156 threadid = build_call_expr (threadid, 0);
5157 threadid = fold_convert (itype, threadid);
5158 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5159 true, GSI_SAME_STMT);
5161 n1 = fd->loop.n1;
5162 n2 = fd->loop.n2;
5163 step = fd->loop.step;
5164 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5166 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5167 OMP_CLAUSE__LOOPTEMP_);
5168 gcc_assert (innerc);
5169 n1 = OMP_CLAUSE_DECL (innerc);
5170 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5171 OMP_CLAUSE__LOOPTEMP_);
5172 gcc_assert (innerc);
5173 n2 = OMP_CLAUSE_DECL (innerc);
5175 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5176 true, NULL_TREE, true, GSI_SAME_STMT);
5177 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5178 true, NULL_TREE, true, GSI_SAME_STMT);
5179 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5180 true, NULL_TREE, true, GSI_SAME_STMT);
5182 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5183 t = fold_build2 (PLUS_EXPR, itype, step, t);
5184 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5185 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5186 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5187 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5188 fold_build1 (NEGATE_EXPR, itype, t),
5189 fold_build1 (NEGATE_EXPR, itype, step));
5190 else
5191 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5192 t = fold_convert (itype, t);
5193 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5195 q = create_tmp_reg (itype, "q");
5196 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5197 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5198 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5200 tt = create_tmp_reg (itype, "tt");
5201 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5202 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5203 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5205 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5206 gcond *cond_stmt = gimple_build_cond_empty (t);
5207 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5209 second_bb = split_block (entry_bb, cond_stmt)->dest;
5210 gsi = gsi_last_nondebug_bb (second_bb);
5211 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5213 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5214 GSI_SAME_STMT);
5215 gassign *assign_stmt
5216 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5217 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5219 third_bb = split_block (second_bb, assign_stmt)->dest;
5220 gsi = gsi_last_nondebug_bb (third_bb);
5221 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5223 if (fd->have_nonctrl_scantemp)
5225 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5226 tree controlp = NULL_TREE, controlb = NULL_TREE;
5227 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5228 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5229 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5231 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5232 controlb = OMP_CLAUSE_DECL (c);
5233 else
5234 controlp = OMP_CLAUSE_DECL (c);
5235 if (controlb && controlp)
5236 break;
5238 gcc_assert (controlp && controlb);
5239 tree cnt = create_tmp_var (sizetype);
5240 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5241 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5242 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5243 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5244 alloc_align, cnt, NULL, true);
5245 tree size = create_tmp_var (sizetype);
5246 expand_omp_build_assign (&gsi, size, sz, false);
5247 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5248 size, size_int (16384));
5249 expand_omp_build_assign (&gsi, controlb, cmp);
5250 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5251 NULL_TREE, NULL_TREE);
5252 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5253 fourth_bb = split_block (third_bb, g)->dest;
5254 gsi = gsi_last_nondebug_bb (fourth_bb);
5255 /* FIXME: Once we have allocators, this should use allocator. */
5256 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5257 gimple_call_set_lhs (g, controlp);
5258 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5259 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5260 &gsi, true);
5261 gsi_prev (&gsi);
5262 g = gsi_stmt (gsi);
5263 fifth_bb = split_block (fourth_bb, g)->dest;
5264 gsi = gsi_last_nondebug_bb (fifth_bb);
5266 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5267 gimple_call_set_lhs (g, controlp);
5268 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5269 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5270 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5271 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5272 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5274 tree tmp = create_tmp_var (sizetype);
5275 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5276 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5277 TYPE_SIZE_UNIT (pointee_type));
5278 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5279 g = gimple_build_call (alloca_decl, 2, tmp,
5280 size_int (TYPE_ALIGN (pointee_type)));
5281 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5282 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5285 sixth_bb = split_block (fifth_bb, g)->dest;
5286 gsi = gsi_last_nondebug_bb (sixth_bb);
5289 t = build2 (MULT_EXPR, itype, q, threadid);
5290 t = build2 (PLUS_EXPR, itype, t, tt);
5291 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5293 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5294 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5296 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5297 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5299 /* Remove the GIMPLE_OMP_FOR statement. */
5300 gsi_remove (&gsi, true);
5302 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5303 gsi = gsi_start_bb (seq_start_bb);
5305 tree startvar = fd->loop.v;
5306 tree endvar = NULL_TREE;
5308 if (gimple_omp_for_combined_p (fd->for_stmt))
5310 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5311 ? gimple_omp_parallel_clauses (inner_stmt)
5312 : gimple_omp_for_clauses (inner_stmt);
5313 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5314 gcc_assert (innerc);
5315 startvar = OMP_CLAUSE_DECL (innerc);
5316 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5317 OMP_CLAUSE__LOOPTEMP_);
5318 gcc_assert (innerc);
5319 endvar = OMP_CLAUSE_DECL (innerc);
5320 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5321 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5323 innerc = find_lastprivate_looptemp (fd, innerc);
5324 if (innerc)
5326 /* If needed (distribute parallel for with lastprivate),
5327 propagate down the total number of iterations. */
5328 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5329 fd->loop.n2);
5330 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5331 GSI_CONTINUE_LINKING);
5332 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5333 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5337 t = fold_convert (itype, s0);
5338 t = fold_build2 (MULT_EXPR, itype, t, step);
5339 if (POINTER_TYPE_P (type))
5341 t = fold_build_pointer_plus (n1, t);
5342 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5343 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5344 t = fold_convert (signed_type_for (type), t);
5346 else
5347 t = fold_build2 (PLUS_EXPR, type, t, n1);
5348 t = fold_convert (TREE_TYPE (startvar), t);
5349 t = force_gimple_operand_gsi (&gsi, t,
5350 DECL_P (startvar)
5351 && TREE_ADDRESSABLE (startvar),
5352 NULL_TREE, false, GSI_CONTINUE_LINKING);
5353 assign_stmt = gimple_build_assign (startvar, t);
5354 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5355 if (cond_var)
5357 tree itype = TREE_TYPE (cond_var);
5358 /* For lastprivate(conditional:) itervar, we need some iteration
5359 counter that starts at unsigned non-zero and increases.
5360 Prefer as few IVs as possible, so if we can use startvar
5361 itself, use that, or startvar + constant (those would be
5362 incremented with step), and as last resort use the s0 + 1
5363 incremented by 1. */
5364 if (POINTER_TYPE_P (type)
5365 || TREE_CODE (n1) != INTEGER_CST
5366 || fd->loop.cond_code != LT_EXPR)
5367 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5368 build_int_cst (itype, 1));
5369 else if (tree_int_cst_sgn (n1) == 1)
5370 t = fold_convert (itype, t);
5371 else
5373 tree c = fold_convert (itype, n1);
5374 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5375 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5377 t = force_gimple_operand_gsi (&gsi, t, false,
5378 NULL_TREE, false, GSI_CONTINUE_LINKING);
5379 assign_stmt = gimple_build_assign (cond_var, t);
5380 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5383 t = fold_convert (itype, e0);
5384 t = fold_build2 (MULT_EXPR, itype, t, step);
5385 if (POINTER_TYPE_P (type))
5387 t = fold_build_pointer_plus (n1, t);
5388 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5389 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5390 t = fold_convert (signed_type_for (type), t);
5392 else
5393 t = fold_build2 (PLUS_EXPR, type, t, n1);
5394 t = fold_convert (TREE_TYPE (startvar), t);
5395 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5396 false, GSI_CONTINUE_LINKING);
5397 if (endvar)
5399 assign_stmt = gimple_build_assign (endvar, e);
5400 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5401 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5402 assign_stmt = gimple_build_assign (fd->loop.v, e);
5403 else
5404 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5405 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5407 /* Handle linear clause adjustments. */
5408 tree itercnt = NULL_TREE;
5409 tree *nonrect_bounds = NULL;
5410 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5411 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5412 c; c = OMP_CLAUSE_CHAIN (c))
5413 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5414 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5416 tree d = OMP_CLAUSE_DECL (c);
5417 tree t = d, a, dest;
5418 if (omp_privatize_by_reference (t))
5419 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5420 if (itercnt == NULL_TREE)
5422 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5424 itercnt = fold_build2 (MINUS_EXPR, itype,
5425 fold_convert (itype, n1),
5426 fold_convert (itype, fd->loop.n1));
5427 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5428 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5429 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5430 NULL_TREE, false,
5431 GSI_CONTINUE_LINKING);
5433 else
5434 itercnt = s0;
5436 tree type = TREE_TYPE (t);
5437 if (POINTER_TYPE_P (type))
5438 type = sizetype;
5439 a = fold_build2 (MULT_EXPR, type,
5440 fold_convert (type, itercnt),
5441 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5442 dest = unshare_expr (t);
5443 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5444 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5445 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5446 false, GSI_CONTINUE_LINKING);
5447 expand_omp_build_assign (&gsi, dest, t, true);
5449 if (fd->collapse > 1)
5451 if (fd->non_rect)
5453 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5454 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5456 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5457 startvar);
5460 if (!broken_loop)
5462 /* The code controlling the sequential loop replaces the
5463 GIMPLE_OMP_CONTINUE. */
5464 gsi = gsi_last_nondebug_bb (cont_bb);
5465 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5466 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5467 vmain = gimple_omp_continue_control_use (cont_stmt);
5468 vback = gimple_omp_continue_control_def (cont_stmt);
5470 if (cond_var)
5472 tree itype = TREE_TYPE (cond_var);
5473 tree t2;
5474 if (POINTER_TYPE_P (type)
5475 || TREE_CODE (n1) != INTEGER_CST
5476 || fd->loop.cond_code != LT_EXPR)
5477 t2 = build_int_cst (itype, 1);
5478 else
5479 t2 = fold_convert (itype, step);
5480 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5481 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5482 NULL_TREE, true, GSI_SAME_STMT);
5483 assign_stmt = gimple_build_assign (cond_var, t2);
5484 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5487 if (!gimple_omp_for_combined_p (fd->for_stmt))
5489 if (POINTER_TYPE_P (type))
5490 t = fold_build_pointer_plus (vmain, step);
5491 else
5492 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5493 t = force_gimple_operand_gsi (&gsi, t,
5494 DECL_P (vback)
5495 && TREE_ADDRESSABLE (vback),
5496 NULL_TREE, true, GSI_SAME_STMT);
5497 assign_stmt = gimple_build_assign (vback, t);
5498 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5500 t = build2 (fd->loop.cond_code, boolean_type_node,
5501 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5502 ? t : vback, e);
5503 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5506 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5507 gsi_remove (&gsi, true);
5509 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5510 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5511 cont_bb, body_bb);
5514 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5515 gsi = gsi_last_nondebug_bb (exit_bb);
5516 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5518 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5519 if (fd->have_reductemp
5520 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5521 && !fd->have_nonctrl_scantemp))
5523 tree fn;
5524 if (t)
5525 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5526 else
5527 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5528 gcall *g = gimple_build_call (fn, 0);
5529 if (t)
5531 gimple_call_set_lhs (g, t);
5532 if (fd->have_reductemp)
5533 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5534 NOP_EXPR, t),
5535 GSI_SAME_STMT);
5537 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5539 else
5540 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5542 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5543 && !fd->have_nonctrl_scantemp)
5545 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5546 gcall *g = gimple_build_call (fn, 0);
5547 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5549 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5551 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5552 tree controlp = NULL_TREE, controlb = NULL_TREE;
5553 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5554 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5555 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5557 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5558 controlb = OMP_CLAUSE_DECL (c);
5559 else
5560 controlp = OMP_CLAUSE_DECL (c);
5561 if (controlb && controlp)
5562 break;
5564 gcc_assert (controlp && controlb);
5565 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5566 NULL_TREE, NULL_TREE);
5567 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5568 exit1_bb = split_block (exit_bb, g)->dest;
5569 gsi = gsi_after_labels (exit1_bb);
5570 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5571 controlp);
5572 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5573 exit2_bb = split_block (exit1_bb, g)->dest;
5574 gsi = gsi_after_labels (exit2_bb);
5575 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5576 controlp);
5577 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5578 exit3_bb = split_block (exit2_bb, g)->dest;
5579 gsi = gsi_after_labels (exit3_bb);
5581 gsi_remove (&gsi, true);
5583 /* Connect all the blocks. */
5584 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5585 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5586 ep = find_edge (entry_bb, second_bb);
5587 ep->flags = EDGE_TRUE_VALUE;
5588 ep->probability = profile_probability::guessed_always () / 4;
5589 if (fourth_bb)
5591 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5592 ep->probability = profile_probability::guessed_always () / 2;
5593 ep = find_edge (third_bb, fourth_bb);
5594 ep->flags = EDGE_TRUE_VALUE;
5595 ep->probability = profile_probability::guessed_always () / 2;
5596 ep = find_edge (fourth_bb, fifth_bb);
5597 redirect_edge_and_branch (ep, sixth_bb);
5599 else
5600 sixth_bb = third_bb;
5601 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5602 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5603 if (exit1_bb)
5605 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5606 ep->probability = profile_probability::guessed_always () / 2;
5607 ep = find_edge (exit_bb, exit1_bb);
5608 ep->flags = EDGE_TRUE_VALUE;
5609 ep->probability = profile_probability::guessed_always () / 2;
5610 ep = find_edge (exit1_bb, exit2_bb);
5611 redirect_edge_and_branch (ep, exit3_bb);
5614 if (!broken_loop)
5616 ep = find_edge (cont_bb, body_bb);
5617 if (ep == NULL)
5619 ep = BRANCH_EDGE (cont_bb);
5620 gcc_assert (single_succ (ep->dest) == body_bb);
5622 if (gimple_omp_for_combined_p (fd->for_stmt))
5624 remove_edge (ep);
5625 ep = NULL;
5627 else if (fd->collapse > 1)
5629 remove_edge (ep);
5630 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5632 else
5633 ep->flags = EDGE_TRUE_VALUE;
5634 find_edge (cont_bb, fin_bb)->flags
5635 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5638 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5639 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5640 if (fourth_bb)
5642 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5643 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5645 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5647 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5648 recompute_dominator (CDI_DOMINATORS, body_bb));
5649 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5650 recompute_dominator (CDI_DOMINATORS, fin_bb));
5651 if (exit1_bb)
5653 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5654 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5657 class loop *loop = body_bb->loop_father;
5658 if (loop != entry_bb->loop_father)
5660 gcc_assert (broken_loop || loop->header == body_bb);
5661 gcc_assert (broken_loop
5662 || loop->latch == region->cont
5663 || single_pred (loop->latch) == region->cont);
5664 return;
5667 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5669 loop = alloc_loop ();
5670 loop->header = body_bb;
5671 if (collapse_bb == NULL)
5672 loop->latch = cont_bb;
5673 add_loop (loop, body_bb->loop_father);
5677 /* Return phi in E->DEST with ARG on edge E. */
5679 static gphi *
5680 find_phi_with_arg_on_edge (tree arg, edge e)
5682 basic_block bb = e->dest;
5684 for (gphi_iterator gpi = gsi_start_phis (bb);
5685 !gsi_end_p (gpi);
5686 gsi_next (&gpi))
5688 gphi *phi = gpi.phi ();
5689 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5690 return phi;
5693 return NULL;
5696 /* A subroutine of expand_omp_for. Generate code for a parallel
5697 loop with static schedule and a specified chunk size. Given
5698 parameters:
5700 for (V = N1; V cond N2; V += STEP) BODY;
5702 where COND is "<" or ">", we generate pseudocode
5704 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5705 if (cond is <)
5706 adj = STEP - 1;
5707 else
5708 adj = STEP + 1;
5709 if ((__typeof (V)) -1 > 0 && cond is >)
5710 n = -(adj + N2 - N1) / -STEP;
5711 else
5712 n = (adj + N2 - N1) / STEP;
5713 trip = 0;
5714 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5715 here so that V is defined
5716 if the loop is not entered
5718 s0 = (trip * nthreads + threadid) * CHUNK;
5719 e0 = min (s0 + CHUNK, n);
5720 if (s0 < n) goto L1; else goto L4;
5722 V = s0 * STEP + N1;
5723 e = e0 * STEP + N1;
5725 BODY;
5726 V += STEP;
5727 if (V cond e) goto L2; else goto L3;
5729 trip += 1;
5730 goto L0;
5734 static void
5735 expand_omp_for_static_chunk (struct omp_region *region,
5736 struct omp_for_data *fd, gimple *inner_stmt)
5738 tree n, s0, e0, e, t;
5739 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5740 tree type, itype, vmain, vback, vextra;
5741 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5742 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5743 gimple_stmt_iterator gsi, gsip;
5744 edge se;
5745 bool broken_loop = region->cont == NULL;
5746 tree *counts = NULL;
5747 tree n1, n2, step;
5748 tree reductions = NULL_TREE;
5749 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5751 itype = type = TREE_TYPE (fd->loop.v);
5752 if (POINTER_TYPE_P (type))
5753 itype = signed_type_for (type);
5755 entry_bb = region->entry;
5756 se = split_block (entry_bb, last_stmt (entry_bb));
5757 entry_bb = se->src;
5758 iter_part_bb = se->dest;
5759 cont_bb = region->cont;
5760 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5761 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5762 gcc_assert (broken_loop
5763 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5764 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5765 body_bb = single_succ (seq_start_bb);
5766 if (!broken_loop)
5768 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5769 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5770 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5771 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5773 exit_bb = region->exit;
5775 /* Trip and adjustment setup goes in ENTRY_BB. */
5776 gsi = gsi_last_nondebug_bb (entry_bb);
5777 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5778 gsip = gsi;
5779 gsi_prev (&gsip);
5781 if (fd->collapse > 1)
5783 int first_zero_iter = -1, dummy = -1;
5784 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5786 counts = XALLOCAVEC (tree, fd->collapse);
5787 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5788 fin_bb, first_zero_iter,
5789 dummy_bb, dummy, l2_dom_bb);
5790 t = NULL_TREE;
5792 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5793 t = integer_one_node;
5794 else
5795 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5796 fold_convert (type, fd->loop.n1),
5797 fold_convert (type, fd->loop.n2));
5798 if (fd->collapse == 1
5799 && TYPE_UNSIGNED (type)
5800 && (t == NULL_TREE || !integer_onep (t)))
5802 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5803 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5804 true, GSI_SAME_STMT);
5805 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5806 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5807 true, GSI_SAME_STMT);
5808 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5809 n1, n2);
5810 se = split_block (entry_bb, cond_stmt);
5811 se->flags = EDGE_TRUE_VALUE;
5812 entry_bb = se->dest;
5813 se->probability = profile_probability::very_likely ();
5814 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5815 se->probability = profile_probability::very_unlikely ();
5816 if (gimple_in_ssa_p (cfun))
5818 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5819 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5820 !gsi_end_p (gpi); gsi_next (&gpi))
5822 gphi *phi = gpi.phi ();
5823 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5824 se, UNKNOWN_LOCATION);
5827 gsi = gsi_last_bb (entry_bb);
5830 if (fd->lastprivate_conditional)
5832 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5833 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5834 if (fd->have_pointer_condtemp)
5835 condtemp = OMP_CLAUSE_DECL (c);
5836 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5837 cond_var = OMP_CLAUSE_DECL (c);
5839 if (fd->have_reductemp || fd->have_pointer_condtemp)
5841 tree t1 = build_int_cst (long_integer_type_node, 0);
5842 tree t2 = build_int_cst (long_integer_type_node, 1);
5843 tree t3 = build_int_cstu (long_integer_type_node,
5844 (HOST_WIDE_INT_1U << 31) + 1);
5845 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5846 gimple_stmt_iterator gsi2 = gsi_none ();
5847 gimple *g = NULL;
5848 tree mem = null_pointer_node, memv = NULL_TREE;
5849 if (fd->have_reductemp)
5851 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5852 reductions = OMP_CLAUSE_DECL (c);
5853 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5854 g = SSA_NAME_DEF_STMT (reductions);
5855 reductions = gimple_assign_rhs1 (g);
5856 OMP_CLAUSE_DECL (c) = reductions;
5857 gsi2 = gsi_for_stmt (g);
5859 else
5861 if (gsi_end_p (gsip))
5862 gsi2 = gsi_after_labels (region->entry);
5863 else
5864 gsi2 = gsip;
5865 reductions = null_pointer_node;
5867 if (fd->have_pointer_condtemp)
5869 tree type = TREE_TYPE (condtemp);
5870 memv = create_tmp_var (type);
5871 TREE_ADDRESSABLE (memv) = 1;
5872 unsigned HOST_WIDE_INT sz
5873 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5874 sz *= fd->lastprivate_conditional;
5875 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5876 false);
5877 mem = build_fold_addr_expr (memv);
5879 tree t
5880 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5881 9, t1, t2, t2, t3, t1, null_pointer_node,
5882 null_pointer_node, reductions, mem);
5883 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5884 true, GSI_SAME_STMT);
5885 if (fd->have_pointer_condtemp)
5886 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5887 if (fd->have_reductemp)
5889 gsi_remove (&gsi2, true);
5890 release_ssa_name (gimple_assign_lhs (g));
5893 switch (gimple_omp_for_kind (fd->for_stmt))
5895 case GF_OMP_FOR_KIND_FOR:
5896 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5897 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5898 break;
5899 case GF_OMP_FOR_KIND_DISTRIBUTE:
5900 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5901 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5902 break;
5903 default:
5904 gcc_unreachable ();
5906 nthreads = build_call_expr (nthreads, 0);
5907 nthreads = fold_convert (itype, nthreads);
5908 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5909 true, GSI_SAME_STMT);
5910 threadid = build_call_expr (threadid, 0);
5911 threadid = fold_convert (itype, threadid);
5912 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5913 true, GSI_SAME_STMT);
5915 n1 = fd->loop.n1;
5916 n2 = fd->loop.n2;
5917 step = fd->loop.step;
5918 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5920 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5921 OMP_CLAUSE__LOOPTEMP_);
5922 gcc_assert (innerc);
5923 n1 = OMP_CLAUSE_DECL (innerc);
5924 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5925 OMP_CLAUSE__LOOPTEMP_);
5926 gcc_assert (innerc);
5927 n2 = OMP_CLAUSE_DECL (innerc);
5929 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5930 true, NULL_TREE, true, GSI_SAME_STMT);
5931 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5932 true, NULL_TREE, true, GSI_SAME_STMT);
5933 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5934 true, NULL_TREE, true, GSI_SAME_STMT);
5935 tree chunk_size = fold_convert (itype, fd->chunk_size);
5936 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5937 chunk_size
5938 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5939 GSI_SAME_STMT);
5941 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5942 t = fold_build2 (PLUS_EXPR, itype, step, t);
5943 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5944 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5945 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5946 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5947 fold_build1 (NEGATE_EXPR, itype, t),
5948 fold_build1 (NEGATE_EXPR, itype, step));
5949 else
5950 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5951 t = fold_convert (itype, t);
5952 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5953 true, GSI_SAME_STMT);
5955 trip_var = create_tmp_reg (itype, ".trip");
5956 if (gimple_in_ssa_p (cfun))
5958 trip_init = make_ssa_name (trip_var);
5959 trip_main = make_ssa_name (trip_var);
5960 trip_back = make_ssa_name (trip_var);
5962 else
5964 trip_init = trip_var;
5965 trip_main = trip_var;
5966 trip_back = trip_var;
5969 gassign *assign_stmt
5970 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5971 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5973 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5974 t = fold_build2 (MULT_EXPR, itype, t, step);
5975 if (POINTER_TYPE_P (type))
5976 t = fold_build_pointer_plus (n1, t);
5977 else
5978 t = fold_build2 (PLUS_EXPR, type, t, n1);
5979 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5980 true, GSI_SAME_STMT);
5982 /* Remove the GIMPLE_OMP_FOR. */
5983 gsi_remove (&gsi, true);
5985 gimple_stmt_iterator gsif = gsi;
5987 /* Iteration space partitioning goes in ITER_PART_BB. */
5988 gsi = gsi_last_bb (iter_part_bb);
5990 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5991 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5992 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5993 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5994 false, GSI_CONTINUE_LINKING);
5996 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5997 t = fold_build2 (MIN_EXPR, itype, t, n);
5998 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5999 false, GSI_CONTINUE_LINKING);
6001 t = build2 (LT_EXPR, boolean_type_node, s0, n);
6002 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
6004 /* Setup code for sequential iteration goes in SEQ_START_BB. */
6005 gsi = gsi_start_bb (seq_start_bb);
6007 tree startvar = fd->loop.v;
6008 tree endvar = NULL_TREE;
6010 if (gimple_omp_for_combined_p (fd->for_stmt))
6012 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
6013 ? gimple_omp_parallel_clauses (inner_stmt)
6014 : gimple_omp_for_clauses (inner_stmt);
6015 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6016 gcc_assert (innerc);
6017 startvar = OMP_CLAUSE_DECL (innerc);
6018 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6019 OMP_CLAUSE__LOOPTEMP_);
6020 gcc_assert (innerc);
6021 endvar = OMP_CLAUSE_DECL (innerc);
6022 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
6023 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
6025 innerc = find_lastprivate_looptemp (fd, innerc);
6026 if (innerc)
6028 /* If needed (distribute parallel for with lastprivate),
6029 propagate down the total number of iterations. */
6030 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
6031 fd->loop.n2);
6032 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
6033 GSI_CONTINUE_LINKING);
6034 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6035 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6040 t = fold_convert (itype, s0);
6041 t = fold_build2 (MULT_EXPR, itype, t, step);
6042 if (POINTER_TYPE_P (type))
6044 t = fold_build_pointer_plus (n1, t);
6045 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6046 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6047 t = fold_convert (signed_type_for (type), t);
6049 else
6050 t = fold_build2 (PLUS_EXPR, type, t, n1);
6051 t = fold_convert (TREE_TYPE (startvar), t);
6052 t = force_gimple_operand_gsi (&gsi, t,
6053 DECL_P (startvar)
6054 && TREE_ADDRESSABLE (startvar),
6055 NULL_TREE, false, GSI_CONTINUE_LINKING);
6056 assign_stmt = gimple_build_assign (startvar, t);
6057 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6058 if (cond_var)
6060 tree itype = TREE_TYPE (cond_var);
6061 /* For lastprivate(conditional:) itervar, we need some iteration
6062 counter that starts at unsigned non-zero and increases.
6063 Prefer as few IVs as possible, so if we can use startvar
6064 itself, use that, or startvar + constant (those would be
6065 incremented with step), and as last resort use the s0 + 1
6066 incremented by 1. */
6067 if (POINTER_TYPE_P (type)
6068 || TREE_CODE (n1) != INTEGER_CST
6069 || fd->loop.cond_code != LT_EXPR)
6070 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
6071 build_int_cst (itype, 1));
6072 else if (tree_int_cst_sgn (n1) == 1)
6073 t = fold_convert (itype, t);
6074 else
6076 tree c = fold_convert (itype, n1);
6077 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
6078 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
6080 t = force_gimple_operand_gsi (&gsi, t, false,
6081 NULL_TREE, false, GSI_CONTINUE_LINKING);
6082 assign_stmt = gimple_build_assign (cond_var, t);
6083 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6086 t = fold_convert (itype, e0);
6087 t = fold_build2 (MULT_EXPR, itype, t, step);
6088 if (POINTER_TYPE_P (type))
6090 t = fold_build_pointer_plus (n1, t);
6091 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6092 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6093 t = fold_convert (signed_type_for (type), t);
6095 else
6096 t = fold_build2 (PLUS_EXPR, type, t, n1);
6097 t = fold_convert (TREE_TYPE (startvar), t);
6098 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6099 false, GSI_CONTINUE_LINKING);
6100 if (endvar)
6102 assign_stmt = gimple_build_assign (endvar, e);
6103 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6104 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6105 assign_stmt = gimple_build_assign (fd->loop.v, e);
6106 else
6107 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6108 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6110 /* Handle linear clause adjustments. */
6111 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6112 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6113 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6114 c; c = OMP_CLAUSE_CHAIN (c))
6115 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6116 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6118 tree d = OMP_CLAUSE_DECL (c);
6119 tree t = d, a, dest;
6120 if (omp_privatize_by_reference (t))
6121 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6122 tree type = TREE_TYPE (t);
6123 if (POINTER_TYPE_P (type))
6124 type = sizetype;
6125 dest = unshare_expr (t);
6126 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6127 expand_omp_build_assign (&gsif, v, t);
6128 if (itercnt == NULL_TREE)
6130 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6132 itercntbias
6133 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6134 fold_convert (itype, fd->loop.n1));
6135 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6136 itercntbias, step);
6137 itercntbias
6138 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6139 NULL_TREE, true,
6140 GSI_SAME_STMT);
6141 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6142 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6143 NULL_TREE, false,
6144 GSI_CONTINUE_LINKING);
6146 else
6147 itercnt = s0;
6149 a = fold_build2 (MULT_EXPR, type,
6150 fold_convert (type, itercnt),
6151 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6152 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6153 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6154 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6155 false, GSI_CONTINUE_LINKING);
6156 expand_omp_build_assign (&gsi, dest, t, true);
6158 if (fd->collapse > 1)
6159 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6161 if (!broken_loop)
6163 /* The code controlling the sequential loop goes in CONT_BB,
6164 replacing the GIMPLE_OMP_CONTINUE. */
6165 gsi = gsi_last_nondebug_bb (cont_bb);
6166 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6167 vmain = gimple_omp_continue_control_use (cont_stmt);
6168 vback = gimple_omp_continue_control_def (cont_stmt);
6170 if (cond_var)
6172 tree itype = TREE_TYPE (cond_var);
6173 tree t2;
6174 if (POINTER_TYPE_P (type)
6175 || TREE_CODE (n1) != INTEGER_CST
6176 || fd->loop.cond_code != LT_EXPR)
6177 t2 = build_int_cst (itype, 1);
6178 else
6179 t2 = fold_convert (itype, step);
6180 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6181 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6182 NULL_TREE, true, GSI_SAME_STMT);
6183 assign_stmt = gimple_build_assign (cond_var, t2);
6184 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6187 if (!gimple_omp_for_combined_p (fd->for_stmt))
6189 if (POINTER_TYPE_P (type))
6190 t = fold_build_pointer_plus (vmain, step);
6191 else
6192 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6193 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6194 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6195 true, GSI_SAME_STMT);
6196 assign_stmt = gimple_build_assign (vback, t);
6197 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6199 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6200 t = build2 (EQ_EXPR, boolean_type_node,
6201 build_int_cst (itype, 0),
6202 build_int_cst (itype, 1));
6203 else
6204 t = build2 (fd->loop.cond_code, boolean_type_node,
6205 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6206 ? t : vback, e);
6207 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6210 /* Remove GIMPLE_OMP_CONTINUE. */
6211 gsi_remove (&gsi, true);
6213 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6214 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6216 /* Trip update code goes into TRIP_UPDATE_BB. */
6217 gsi = gsi_start_bb (trip_update_bb);
6219 t = build_int_cst (itype, 1);
6220 t = build2 (PLUS_EXPR, itype, trip_main, t);
6221 assign_stmt = gimple_build_assign (trip_back, t);
6222 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6225 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6226 gsi = gsi_last_nondebug_bb (exit_bb);
6227 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6229 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6230 if (fd->have_reductemp || fd->have_pointer_condtemp)
6232 tree fn;
6233 if (t)
6234 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6235 else
6236 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6237 gcall *g = gimple_build_call (fn, 0);
6238 if (t)
6240 gimple_call_set_lhs (g, t);
6241 if (fd->have_reductemp)
6242 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6243 NOP_EXPR, t),
6244 GSI_SAME_STMT);
6246 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6248 else
6249 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6251 else if (fd->have_pointer_condtemp)
6253 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6254 gcall *g = gimple_build_call (fn, 0);
6255 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6257 gsi_remove (&gsi, true);
6259 /* Connect the new blocks. */
6260 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6261 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6263 if (!broken_loop)
6265 se = find_edge (cont_bb, body_bb);
6266 if (se == NULL)
6268 se = BRANCH_EDGE (cont_bb);
6269 gcc_assert (single_succ (se->dest) == body_bb);
6271 if (gimple_omp_for_combined_p (fd->for_stmt))
6273 remove_edge (se);
6274 se = NULL;
6276 else if (fd->collapse > 1)
6278 remove_edge (se);
6279 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6281 else
6282 se->flags = EDGE_TRUE_VALUE;
6283 find_edge (cont_bb, trip_update_bb)->flags
6284 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6286 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6287 iter_part_bb);
6290 if (gimple_in_ssa_p (cfun))
6292 gphi_iterator psi;
6293 gphi *phi;
6294 edge re, ene;
6295 edge_var_map *vm;
6296 size_t i;
6298 gcc_assert (fd->collapse == 1 && !broken_loop);
6300 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6301 remove arguments of the phi nodes in fin_bb. We need to create
6302 appropriate phi nodes in iter_part_bb instead. */
6303 se = find_edge (iter_part_bb, fin_bb);
6304 re = single_succ_edge (trip_update_bb);
6305 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6306 ene = single_succ_edge (entry_bb);
6308 psi = gsi_start_phis (fin_bb);
6309 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6310 gsi_next (&psi), ++i)
6312 gphi *nphi;
6313 location_t locus;
6315 phi = psi.phi ();
6316 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6317 redirect_edge_var_map_def (vm), 0))
6318 continue;
6320 t = gimple_phi_result (phi);
6321 gcc_assert (t == redirect_edge_var_map_result (vm));
6323 if (!single_pred_p (fin_bb))
6324 t = copy_ssa_name (t, phi);
6326 nphi = create_phi_node (t, iter_part_bb);
6328 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6329 locus = gimple_phi_arg_location_from_edge (phi, se);
6331 /* A special case -- fd->loop.v is not yet computed in
6332 iter_part_bb, we need to use vextra instead. */
6333 if (t == fd->loop.v)
6334 t = vextra;
6335 add_phi_arg (nphi, t, ene, locus);
6336 locus = redirect_edge_var_map_location (vm);
6337 tree back_arg = redirect_edge_var_map_def (vm);
6338 add_phi_arg (nphi, back_arg, re, locus);
6339 edge ce = find_edge (cont_bb, body_bb);
6340 if (ce == NULL)
6342 ce = BRANCH_EDGE (cont_bb);
6343 gcc_assert (single_succ (ce->dest) == body_bb);
6344 ce = single_succ_edge (ce->dest);
6346 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6347 gcc_assert (inner_loop_phi != NULL);
6348 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6349 find_edge (seq_start_bb, body_bb), locus);
6351 if (!single_pred_p (fin_bb))
6352 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6354 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6355 redirect_edge_var_map_clear (re);
6356 if (single_pred_p (fin_bb))
6357 while (1)
6359 psi = gsi_start_phis (fin_bb);
6360 if (gsi_end_p (psi))
6361 break;
6362 remove_phi_node (&psi, false);
6365 /* Make phi node for trip. */
6366 phi = create_phi_node (trip_main, iter_part_bb);
6367 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6368 UNKNOWN_LOCATION);
6369 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6370 UNKNOWN_LOCATION);
6373 if (!broken_loop)
6374 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6375 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6376 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6377 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6378 recompute_dominator (CDI_DOMINATORS, fin_bb));
6379 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6380 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6381 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6382 recompute_dominator (CDI_DOMINATORS, body_bb));
6384 if (!broken_loop)
6386 class loop *loop = body_bb->loop_father;
6387 class loop *trip_loop = alloc_loop ();
6388 trip_loop->header = iter_part_bb;
6389 trip_loop->latch = trip_update_bb;
6390 add_loop (trip_loop, iter_part_bb->loop_father);
6392 if (loop != entry_bb->loop_father)
6394 gcc_assert (loop->header == body_bb);
6395 gcc_assert (loop->latch == region->cont
6396 || single_pred (loop->latch) == region->cont);
6397 trip_loop->inner = loop;
6398 return;
6401 if (!gimple_omp_for_combined_p (fd->for_stmt))
6403 loop = alloc_loop ();
6404 loop->header = body_bb;
6405 if (collapse_bb == NULL)
6406 loop->latch = cont_bb;
6407 add_loop (loop, trip_loop);
6412 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6413 loop. Given parameters:
6415 for (V = N1; V cond N2; V += STEP) BODY;
6417 where COND is "<" or ">", we generate pseudocode
6419 V = N1;
6420 goto L1;
6422 BODY;
6423 V += STEP;
6425 if (V cond N2) goto L0; else goto L2;
6428 For collapsed loops, emit the outer loops as scalar
6429 and only try to vectorize the innermost loop. */
6431 static void
6432 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6434 tree type, t;
6435 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6436 gimple_stmt_iterator gsi;
6437 gimple *stmt;
6438 gcond *cond_stmt;
6439 bool broken_loop = region->cont == NULL;
6440 edge e, ne;
6441 tree *counts = NULL;
6442 int i;
6443 int safelen_int = INT_MAX;
6444 bool dont_vectorize = false;
6445 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6446 OMP_CLAUSE_SAFELEN);
6447 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6448 OMP_CLAUSE__SIMDUID_);
6449 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6450 OMP_CLAUSE_IF);
6451 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6452 OMP_CLAUSE_SIMDLEN);
6453 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6454 OMP_CLAUSE__CONDTEMP_);
6455 tree n1, n2;
6456 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6458 if (safelen)
6460 poly_uint64 val;
6461 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6462 if (!poly_int_tree_p (safelen, &val))
6463 safelen_int = 0;
6464 else
6465 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6466 if (safelen_int == 1)
6467 safelen_int = 0;
6469 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6470 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6472 safelen_int = 0;
6473 dont_vectorize = true;
6475 type = TREE_TYPE (fd->loop.v);
6476 entry_bb = region->entry;
6477 cont_bb = region->cont;
6478 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6479 gcc_assert (broken_loop
6480 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6481 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6482 if (!broken_loop)
6484 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6485 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6486 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6487 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6489 else
6491 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6492 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6493 l2_bb = single_succ (l1_bb);
6495 exit_bb = region->exit;
6496 l2_dom_bb = NULL;
6498 gsi = gsi_last_nondebug_bb (entry_bb);
6500 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6501 /* Not needed in SSA form right now. */
6502 gcc_assert (!gimple_in_ssa_p (cfun));
6503 if (fd->collapse > 1
6504 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6505 || broken_loop))
6507 int first_zero_iter = -1, dummy = -1;
6508 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6510 counts = XALLOCAVEC (tree, fd->collapse);
6511 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6512 zero_iter_bb, first_zero_iter,
6513 dummy_bb, dummy, l2_dom_bb);
6515 if (l2_dom_bb == NULL)
6516 l2_dom_bb = l1_bb;
6518 n1 = fd->loop.n1;
6519 n2 = fd->loop.n2;
6520 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6522 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6523 OMP_CLAUSE__LOOPTEMP_);
6524 gcc_assert (innerc);
6525 n1 = OMP_CLAUSE_DECL (innerc);
6526 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6527 OMP_CLAUSE__LOOPTEMP_);
6528 gcc_assert (innerc);
6529 n2 = OMP_CLAUSE_DECL (innerc);
6531 tree step = fd->loop.step;
6532 tree orig_step = step; /* May be different from step if is_simt. */
6534 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6535 OMP_CLAUSE__SIMT_);
6536 if (is_simt)
6538 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6539 is_simt = safelen_int > 1;
6541 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6542 if (is_simt)
6544 simt_lane = create_tmp_var (unsigned_type_node);
6545 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6546 gimple_call_set_lhs (g, simt_lane);
6547 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6548 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6549 fold_convert (TREE_TYPE (step), simt_lane));
6550 n1 = fold_convert (type, n1);
6551 if (POINTER_TYPE_P (type))
6552 n1 = fold_build_pointer_plus (n1, offset);
6553 else
6554 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6556 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6557 if (fd->collapse > 1)
6558 simt_maxlane = build_one_cst (unsigned_type_node);
6559 else if (safelen_int < omp_max_simt_vf ())
6560 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6561 tree vf
6562 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6563 unsigned_type_node, 0);
6564 if (simt_maxlane)
6565 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6566 vf = fold_convert (TREE_TYPE (step), vf);
6567 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6570 tree n2var = NULL_TREE;
6571 tree n2v = NULL_TREE;
6572 tree *nonrect_bounds = NULL;
6573 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6574 if (fd->collapse > 1)
6576 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6578 if (fd->non_rect)
6580 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6581 memset (nonrect_bounds, 0,
6582 sizeof (tree) * (fd->last_nonrect + 1));
6584 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6585 gcc_assert (entry_bb == gsi_bb (gsi));
6586 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6587 gsi_prev (&gsi);
6588 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6589 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6590 NULL, n1);
6591 gsi = gsi_for_stmt (fd->for_stmt);
6593 if (broken_loop)
6595 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6597 /* Compute in n2var the limit for the first innermost loop,
6598 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6599 where cnt is how many iterations would the loop have if
6600 all further iterations were assigned to the current task. */
6601 n2var = create_tmp_var (type);
6602 i = fd->collapse - 1;
6603 tree itype = TREE_TYPE (fd->loops[i].v);
6604 if (POINTER_TYPE_P (itype))
6605 itype = signed_type_for (itype);
6606 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6607 ? -1 : 1));
6608 t = fold_build2 (PLUS_EXPR, itype,
6609 fold_convert (itype, fd->loops[i].step), t);
6610 t = fold_build2 (PLUS_EXPR, itype, t,
6611 fold_convert (itype, fd->loops[i].n2));
6612 if (fd->loops[i].m2)
6614 tree t2 = fold_convert (itype,
6615 fd->loops[i - fd->loops[i].outer].v);
6616 tree t3 = fold_convert (itype, fd->loops[i].m2);
6617 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6618 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6620 t = fold_build2 (MINUS_EXPR, itype, t,
6621 fold_convert (itype, fd->loops[i].v));
6622 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6623 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6624 fold_build1 (NEGATE_EXPR, itype, t),
6625 fold_build1 (NEGATE_EXPR, itype,
6626 fold_convert (itype,
6627 fd->loops[i].step)));
6628 else
6629 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6630 fold_convert (itype, fd->loops[i].step));
6631 t = fold_convert (type, t);
6632 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6633 min_arg1 = create_tmp_var (type);
6634 expand_omp_build_assign (&gsi, min_arg1, t2);
6635 min_arg2 = create_tmp_var (type);
6636 expand_omp_build_assign (&gsi, min_arg2, t);
6638 else
6640 if (TREE_CODE (n2) == INTEGER_CST)
6642 /* Indicate for lastprivate handling that at least one iteration
6643 has been performed, without wasting runtime. */
6644 if (integer_nonzerop (n2))
6645 expand_omp_build_assign (&gsi, fd->loop.v,
6646 fold_convert (type, n2));
6647 else
6648 /* Indicate that no iteration has been performed. */
6649 expand_omp_build_assign (&gsi, fd->loop.v,
6650 build_one_cst (type));
6652 else
6654 expand_omp_build_assign (&gsi, fd->loop.v,
6655 build_zero_cst (type));
6656 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6658 for (i = 0; i < fd->collapse; i++)
6660 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6661 if (fd->loops[i].m1)
6663 tree t2
6664 = fold_convert (TREE_TYPE (t),
6665 fd->loops[i - fd->loops[i].outer].v);
6666 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6667 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6668 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6670 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6671 /* For normal non-combined collapsed loops just initialize
6672 the outermost iterator in the entry_bb. */
6673 if (!broken_loop)
6674 break;
6678 else
6679 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6680 tree altv = NULL_TREE, altn2 = NULL_TREE;
6681 if (fd->collapse == 1
6682 && !broken_loop
6683 && TREE_CODE (orig_step) != INTEGER_CST)
6685 /* The vectorizer currently punts on loops with non-constant steps
6686 for the main IV (can't compute number of iterations and gives up
6687 because of that). As for OpenMP loops it is always possible to
6688 compute the number of iterations upfront, use an alternate IV
6689 as the loop iterator:
6690 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6691 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6692 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6693 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6694 tree itype = TREE_TYPE (fd->loop.v);
6695 if (POINTER_TYPE_P (itype))
6696 itype = signed_type_for (itype);
6697 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6698 t = fold_build2 (PLUS_EXPR, itype,
6699 fold_convert (itype, step), t);
6700 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6701 t = fold_build2 (MINUS_EXPR, itype, t,
6702 fold_convert (itype, fd->loop.v));
6703 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6704 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6705 fold_build1 (NEGATE_EXPR, itype, t),
6706 fold_build1 (NEGATE_EXPR, itype,
6707 fold_convert (itype, step)));
6708 else
6709 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6710 fold_convert (itype, step));
6711 t = fold_convert (TREE_TYPE (altv), t);
6712 altn2 = create_tmp_var (TREE_TYPE (altv));
6713 expand_omp_build_assign (&gsi, altn2, t);
6714 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6715 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6716 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6717 true, GSI_SAME_STMT);
6718 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6719 build_zero_cst (TREE_TYPE (altv)));
6720 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6722 else if (fd->collapse > 1
6723 && !broken_loop
6724 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6725 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6727 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6728 altn2 = create_tmp_var (TREE_TYPE (altv));
6730 if (cond_var)
6732 if (POINTER_TYPE_P (type)
6733 || TREE_CODE (n1) != INTEGER_CST
6734 || fd->loop.cond_code != LT_EXPR
6735 || tree_int_cst_sgn (n1) != 1)
6736 expand_omp_build_assign (&gsi, cond_var,
6737 build_one_cst (TREE_TYPE (cond_var)));
6738 else
6739 expand_omp_build_assign (&gsi, cond_var,
6740 fold_convert (TREE_TYPE (cond_var), n1));
6743 /* Remove the GIMPLE_OMP_FOR statement. */
6744 gsi_remove (&gsi, true);
6746 if (!broken_loop)
6748 /* Code to control the increment goes in the CONT_BB. */
6749 gsi = gsi_last_nondebug_bb (cont_bb);
6750 stmt = gsi_stmt (gsi);
6751 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6753 if (fd->collapse == 1
6754 || gimple_omp_for_combined_into_p (fd->for_stmt))
6756 if (POINTER_TYPE_P (type))
6757 t = fold_build_pointer_plus (fd->loop.v, step);
6758 else
6759 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6760 expand_omp_build_assign (&gsi, fd->loop.v, t);
6762 else if (TREE_CODE (n2) != INTEGER_CST)
6763 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6764 if (altv)
6766 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6767 build_one_cst (TREE_TYPE (altv)));
6768 expand_omp_build_assign (&gsi, altv, t);
6771 if (fd->collapse > 1)
6773 i = fd->collapse - 1;
6774 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6775 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6776 else
6778 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6779 fd->loops[i].step);
6780 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6781 fd->loops[i].v, t);
6783 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6785 if (cond_var)
6787 if (POINTER_TYPE_P (type)
6788 || TREE_CODE (n1) != INTEGER_CST
6789 || fd->loop.cond_code != LT_EXPR
6790 || tree_int_cst_sgn (n1) != 1)
6791 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6792 build_one_cst (TREE_TYPE (cond_var)));
6793 else
6794 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6795 fold_convert (TREE_TYPE (cond_var), step));
6796 expand_omp_build_assign (&gsi, cond_var, t);
6799 /* Remove GIMPLE_OMP_CONTINUE. */
6800 gsi_remove (&gsi, true);
6803 /* Emit the condition in L1_BB. */
6804 gsi = gsi_start_bb (l1_bb);
6806 if (altv)
6807 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6808 else if (fd->collapse > 1
6809 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6810 && !broken_loop)
6812 i = fd->collapse - 1;
6813 tree itype = TREE_TYPE (fd->loops[i].v);
6814 if (fd->loops[i].m2)
6815 t = n2v = create_tmp_var (itype);
6816 else
6817 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6818 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6819 false, GSI_CONTINUE_LINKING);
6820 tree v = fd->loops[i].v;
6821 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6822 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6823 false, GSI_CONTINUE_LINKING);
6824 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6826 else
6828 if (fd->collapse > 1 && !broken_loop)
6829 t = n2var;
6830 else
6831 t = fold_convert (type, unshare_expr (n2));
6832 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6833 false, GSI_CONTINUE_LINKING);
6834 tree v = fd->loop.v;
6835 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6836 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6837 false, GSI_CONTINUE_LINKING);
6838 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6840 cond_stmt = gimple_build_cond_empty (t);
6841 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6842 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6843 NULL, NULL)
6844 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6845 NULL, NULL))
6847 gsi = gsi_for_stmt (cond_stmt);
6848 gimple_regimplify_operands (cond_stmt, &gsi);
6851 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6852 if (is_simt)
6854 gsi = gsi_start_bb (l2_bb);
6855 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6856 if (POINTER_TYPE_P (type))
6857 t = fold_build_pointer_plus (fd->loop.v, step);
6858 else
6859 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6860 expand_omp_build_assign (&gsi, fd->loop.v, t);
6863 /* Remove GIMPLE_OMP_RETURN. */
6864 gsi = gsi_last_nondebug_bb (exit_bb);
6865 gsi_remove (&gsi, true);
6867 /* Connect the new blocks. */
6868 remove_edge (FALLTHRU_EDGE (entry_bb));
6870 if (!broken_loop)
6872 remove_edge (BRANCH_EDGE (entry_bb));
6873 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6875 e = BRANCH_EDGE (l1_bb);
6876 ne = FALLTHRU_EDGE (l1_bb);
6877 e->flags = EDGE_TRUE_VALUE;
6879 else
6881 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6883 ne = single_succ_edge (l1_bb);
6884 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6887 ne->flags = EDGE_FALSE_VALUE;
6888 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6889 ne->probability = e->probability.invert ();
6891 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6892 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6894 if (simt_maxlane)
6896 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6897 NULL_TREE, NULL_TREE);
6898 gsi = gsi_last_bb (entry_bb);
6899 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6900 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6901 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6902 FALLTHRU_EDGE (entry_bb)->probability
6903 = profile_probability::guessed_always ().apply_scale (7, 8);
6904 BRANCH_EDGE (entry_bb)->probability
6905 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6906 l2_dom_bb = entry_bb;
6908 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6910 if (!broken_loop && fd->collapse > 1)
6912 basic_block last_bb = l1_bb;
6913 basic_block init_bb = NULL;
6914 for (i = fd->collapse - 2; i >= 0; i--)
6916 tree nextn2v = NULL_TREE;
6917 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6918 e = EDGE_SUCC (last_bb, 0);
6919 else
6920 e = EDGE_SUCC (last_bb, 1);
6921 basic_block bb = split_edge (e);
6922 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6923 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6924 else
6926 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6927 fd->loops[i].step);
6928 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6929 fd->loops[i].v, t);
6931 gsi = gsi_after_labels (bb);
6932 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6934 bb = split_block (bb, last_stmt (bb))->dest;
6935 gsi = gsi_start_bb (bb);
6936 tree itype = TREE_TYPE (fd->loops[i].v);
6937 if (fd->loops[i].m2)
6938 t = nextn2v = create_tmp_var (itype);
6939 else
6940 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6941 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6942 false, GSI_CONTINUE_LINKING);
6943 tree v = fd->loops[i].v;
6944 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6945 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6946 false, GSI_CONTINUE_LINKING);
6947 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6948 cond_stmt = gimple_build_cond_empty (t);
6949 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6950 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6951 expand_omp_regimplify_p, NULL, NULL)
6952 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6953 expand_omp_regimplify_p, NULL, NULL))
6955 gsi = gsi_for_stmt (cond_stmt);
6956 gimple_regimplify_operands (cond_stmt, &gsi);
6958 ne = single_succ_edge (bb);
6959 ne->flags = EDGE_FALSE_VALUE;
6961 init_bb = create_empty_bb (bb);
6962 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6963 add_bb_to_loop (init_bb, bb->loop_father);
6964 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6965 e->probability
6966 = profile_probability::guessed_always ().apply_scale (7, 8);
6967 ne->probability = e->probability.invert ();
6969 gsi = gsi_after_labels (init_bb);
6970 if (fd->loops[i + 1].m1)
6972 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6973 fd->loops[i + 1
6974 - fd->loops[i + 1].outer].v);
6975 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6976 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6977 else
6979 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6980 fd->loops[i + 1].n1);
6981 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6982 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6983 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6986 else
6987 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6988 fd->loops[i + 1].n1);
6989 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6990 if (fd->loops[i + 1].m2)
6992 if (i + 2 == fd->collapse && (n2var || altv))
6994 gcc_assert (n2v == NULL_TREE);
6995 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6997 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6998 fd->loops[i + 1
6999 - fd->loops[i + 1].outer].v);
7000 if (POINTER_TYPE_P (TREE_TYPE (t2)))
7001 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
7002 else
7004 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7005 fd->loops[i + 1].n2);
7006 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
7007 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
7008 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
7010 expand_omp_build_assign (&gsi, n2v, t);
7012 if (i + 2 == fd->collapse && n2var)
7014 /* For composite simd, n2 is the first iteration the current
7015 task shouldn't already handle, so we effectively want to use
7016 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
7017 as the vectorized loop. Except the vectorizer will not
7018 vectorize that, so instead compute N2VAR as
7019 N2VAR = V + MIN (N2 - V, COUNTS3) and use
7020 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
7021 as the loop to vectorize. */
7022 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
7023 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
7025 tree itype = TREE_TYPE (fd->loops[i].v);
7026 if (POINTER_TYPE_P (itype))
7027 itype = signed_type_for (itype);
7028 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
7029 == LT_EXPR ? -1 : 1));
7030 t = fold_build2 (PLUS_EXPR, itype,
7031 fold_convert (itype,
7032 fd->loops[i + 1].step), t);
7033 if (fd->loops[i + 1].m2 == NULL_TREE)
7034 t = fold_build2 (PLUS_EXPR, itype, t,
7035 fold_convert (itype,
7036 fd->loops[i + 1].n2));
7037 else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
7039 t = fold_build_pointer_plus (n2v, t);
7040 t = fold_convert (itype, t);
7042 else
7043 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
7044 t = fold_build2 (MINUS_EXPR, itype, t,
7045 fold_convert (itype, fd->loops[i + 1].v));
7046 tree step = fold_convert (itype, fd->loops[i + 1].step);
7047 if (TYPE_UNSIGNED (itype)
7048 && fd->loops[i + 1].cond_code == GT_EXPR)
7049 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7050 fold_build1 (NEGATE_EXPR, itype, t),
7051 fold_build1 (NEGATE_EXPR, itype, step));
7052 else
7053 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7054 t = fold_convert (type, t);
7056 else
7057 t = counts[i + 1];
7058 expand_omp_build_assign (&gsi, min_arg1, t2);
7059 expand_omp_build_assign (&gsi, min_arg2, t);
7060 e = split_block (init_bb, last_stmt (init_bb));
7061 gsi = gsi_after_labels (e->dest);
7062 init_bb = e->dest;
7063 remove_edge (FALLTHRU_EDGE (entry_bb));
7064 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
7065 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
7066 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
7067 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
7068 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
7069 expand_omp_build_assign (&gsi, n2var, t);
7071 if (i + 2 == fd->collapse && altv)
7073 /* The vectorizer currently punts on loops with non-constant
7074 steps for the main IV (can't compute number of iterations
7075 and gives up because of that). As for OpenMP loops it is
7076 always possible to compute the number of iterations upfront,
7077 use an alternate IV as the loop iterator. */
7078 expand_omp_build_assign (&gsi, altv,
7079 build_zero_cst (TREE_TYPE (altv)));
7080 tree itype = TREE_TYPE (fd->loops[i + 1].v);
7081 if (POINTER_TYPE_P (itype))
7082 itype = signed_type_for (itype);
7083 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
7084 ? -1 : 1));
7085 t = fold_build2 (PLUS_EXPR, itype,
7086 fold_convert (itype, fd->loops[i + 1].step), t);
7087 t = fold_build2 (PLUS_EXPR, itype, t,
7088 fold_convert (itype,
7089 fd->loops[i + 1].m2
7090 ? n2v : fd->loops[i + 1].n2));
7091 t = fold_build2 (MINUS_EXPR, itype, t,
7092 fold_convert (itype, fd->loops[i + 1].v));
7093 tree step = fold_convert (itype, fd->loops[i + 1].step);
7094 if (TYPE_UNSIGNED (itype)
7095 && fd->loops[i + 1].cond_code == GT_EXPR)
7096 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7097 fold_build1 (NEGATE_EXPR, itype, t),
7098 fold_build1 (NEGATE_EXPR, itype, step));
7099 else
7100 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7101 t = fold_convert (TREE_TYPE (altv), t);
7102 expand_omp_build_assign (&gsi, altn2, t);
7103 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7104 fd->loops[i + 1].m2
7105 ? n2v : fd->loops[i + 1].n2);
7106 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7107 fd->loops[i + 1].v, t2);
7108 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7109 true, GSI_SAME_STMT);
7110 gassign *g
7111 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7112 build_zero_cst (TREE_TYPE (altv)));
7113 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7115 n2v = nextn2v;
7117 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7118 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7120 e = find_edge (entry_bb, last_bb);
7121 redirect_edge_succ (e, bb);
7122 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7123 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7126 last_bb = bb;
7129 if (!broken_loop)
7131 class loop *loop = alloc_loop ();
7132 loop->header = l1_bb;
7133 loop->latch = cont_bb;
7134 add_loop (loop, l1_bb->loop_father);
7135 loop->safelen = safelen_int;
7136 if (simduid)
7138 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7139 cfun->has_simduid_loops = true;
7141 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7142 the loop. */
7143 if ((flag_tree_loop_vectorize
7144 || !OPTION_SET_P (flag_tree_loop_vectorize))
7145 && flag_tree_loop_optimize
7146 && loop->safelen > 1)
7148 loop->force_vectorize = true;
7149 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7151 unsigned HOST_WIDE_INT v
7152 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7153 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7154 loop->simdlen = v;
7156 cfun->has_force_vectorize_loops = true;
7158 else if (dont_vectorize)
7159 loop->dont_vectorize = true;
7161 else if (simduid)
7162 cfun->has_simduid_loops = true;
7165 /* Taskloop construct is represented after gimplification with
7166 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7167 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7168 which should just compute all the needed loop temporaries
7169 for GIMPLE_OMP_TASK. */
7171 static void
7172 expand_omp_taskloop_for_outer (struct omp_region *region,
7173 struct omp_for_data *fd,
7174 gimple *inner_stmt)
7176 tree type, bias = NULL_TREE;
7177 basic_block entry_bb, cont_bb, exit_bb;
7178 gimple_stmt_iterator gsi;
7179 gassign *assign_stmt;
7180 tree *counts = NULL;
7181 int i;
7183 gcc_assert (inner_stmt);
7184 gcc_assert (region->cont);
7185 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7186 && gimple_omp_task_taskloop_p (inner_stmt));
7187 type = TREE_TYPE (fd->loop.v);
7189 /* See if we need to bias by LLONG_MIN. */
7190 if (fd->iter_type == long_long_unsigned_type_node
7191 && TREE_CODE (type) == INTEGER_TYPE
7192 && !TYPE_UNSIGNED (type))
7194 tree n1, n2;
7196 if (fd->loop.cond_code == LT_EXPR)
7198 n1 = fd->loop.n1;
7199 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7201 else
7203 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7204 n2 = fd->loop.n1;
7206 if (TREE_CODE (n1) != INTEGER_CST
7207 || TREE_CODE (n2) != INTEGER_CST
7208 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7209 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7212 entry_bb = region->entry;
7213 cont_bb = region->cont;
7214 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7215 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7216 exit_bb = region->exit;
7218 gsi = gsi_last_nondebug_bb (entry_bb);
7219 gimple *for_stmt = gsi_stmt (gsi);
7220 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7221 if (fd->collapse > 1)
7223 int first_zero_iter = -1, dummy = -1;
7224 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7226 counts = XALLOCAVEC (tree, fd->collapse);
7227 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7228 zero_iter_bb, first_zero_iter,
7229 dummy_bb, dummy, l2_dom_bb);
7231 if (zero_iter_bb)
7233 /* Some counts[i] vars might be uninitialized if
7234 some loop has zero iterations. But the body shouldn't
7235 be executed in that case, so just avoid uninit warnings. */
7236 for (i = first_zero_iter; i < fd->collapse; i++)
7237 if (SSA_VAR_P (counts[i]))
7238 suppress_warning (counts[i], OPT_Wuninitialized);
7239 gsi_prev (&gsi);
7240 edge e = split_block (entry_bb, gsi_stmt (gsi));
7241 entry_bb = e->dest;
7242 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7243 gsi = gsi_last_bb (entry_bb);
7244 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7245 get_immediate_dominator (CDI_DOMINATORS,
7246 zero_iter_bb));
7250 tree t0, t1;
7251 t1 = fd->loop.n2;
7252 t0 = fd->loop.n1;
7253 if (POINTER_TYPE_P (TREE_TYPE (t0))
7254 && TYPE_PRECISION (TREE_TYPE (t0))
7255 != TYPE_PRECISION (fd->iter_type))
7257 /* Avoid casting pointers to integer of a different size. */
7258 tree itype = signed_type_for (type);
7259 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7260 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7262 else
7264 t1 = fold_convert (fd->iter_type, t1);
7265 t0 = fold_convert (fd->iter_type, t0);
7267 if (bias)
7269 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7270 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7273 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7274 OMP_CLAUSE__LOOPTEMP_);
7275 gcc_assert (innerc);
7276 tree startvar = OMP_CLAUSE_DECL (innerc);
7277 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7278 gcc_assert (innerc);
7279 tree endvar = OMP_CLAUSE_DECL (innerc);
7280 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7282 innerc = find_lastprivate_looptemp (fd, innerc);
7283 if (innerc)
7285 /* If needed (inner taskloop has lastprivate clause), propagate
7286 down the total number of iterations. */
7287 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7288 NULL_TREE, false,
7289 GSI_CONTINUE_LINKING);
7290 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7291 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7295 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7296 GSI_CONTINUE_LINKING);
7297 assign_stmt = gimple_build_assign (startvar, t0);
7298 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7300 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7301 GSI_CONTINUE_LINKING);
7302 assign_stmt = gimple_build_assign (endvar, t1);
7303 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7304 if (fd->collapse > 1)
7305 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7307 /* Remove the GIMPLE_OMP_FOR statement. */
7308 gsi = gsi_for_stmt (for_stmt);
7309 gsi_remove (&gsi, true);
7311 gsi = gsi_last_nondebug_bb (cont_bb);
7312 gsi_remove (&gsi, true);
7314 gsi = gsi_last_nondebug_bb (exit_bb);
7315 gsi_remove (&gsi, true);
7317 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7318 remove_edge (BRANCH_EDGE (entry_bb));
7319 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7320 remove_edge (BRANCH_EDGE (cont_bb));
7321 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7322 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7323 recompute_dominator (CDI_DOMINATORS, region->entry));
7326 /* Taskloop construct is represented after gimplification with
7327 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7328 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7329 GOMP_taskloop{,_ull} function arranges for each task to be given just
7330 a single range of iterations. */
7332 static void
7333 expand_omp_taskloop_for_inner (struct omp_region *region,
7334 struct omp_for_data *fd,
7335 gimple *inner_stmt)
7337 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7338 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7339 basic_block fin_bb;
7340 gimple_stmt_iterator gsi;
7341 edge ep;
7342 bool broken_loop = region->cont == NULL;
7343 tree *counts = NULL;
7344 tree n1, n2, step;
7346 itype = type = TREE_TYPE (fd->loop.v);
7347 if (POINTER_TYPE_P (type))
7348 itype = signed_type_for (type);
7350 /* See if we need to bias by LLONG_MIN. */
7351 if (fd->iter_type == long_long_unsigned_type_node
7352 && TREE_CODE (type) == INTEGER_TYPE
7353 && !TYPE_UNSIGNED (type))
7355 tree n1, n2;
7357 if (fd->loop.cond_code == LT_EXPR)
7359 n1 = fd->loop.n1;
7360 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7362 else
7364 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7365 n2 = fd->loop.n1;
7367 if (TREE_CODE (n1) != INTEGER_CST
7368 || TREE_CODE (n2) != INTEGER_CST
7369 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7370 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7373 entry_bb = region->entry;
7374 cont_bb = region->cont;
7375 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7376 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7377 gcc_assert (broken_loop
7378 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7379 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7380 if (!broken_loop)
7382 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7383 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7385 exit_bb = region->exit;
7387 /* Iteration space partitioning goes in ENTRY_BB. */
7388 gsi = gsi_last_nondebug_bb (entry_bb);
7389 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7391 if (fd->collapse > 1)
7393 int first_zero_iter = -1, dummy = -1;
7394 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7396 counts = XALLOCAVEC (tree, fd->collapse);
7397 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7398 fin_bb, first_zero_iter,
7399 dummy_bb, dummy, l2_dom_bb);
7400 t = NULL_TREE;
7402 else
7403 t = integer_one_node;
7405 step = fd->loop.step;
7406 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7407 OMP_CLAUSE__LOOPTEMP_);
7408 gcc_assert (innerc);
7409 n1 = OMP_CLAUSE_DECL (innerc);
7410 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7411 gcc_assert (innerc);
7412 n2 = OMP_CLAUSE_DECL (innerc);
7413 if (bias)
7415 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7416 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7418 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7419 true, NULL_TREE, true, GSI_SAME_STMT);
7420 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7421 true, NULL_TREE, true, GSI_SAME_STMT);
7422 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7423 true, NULL_TREE, true, GSI_SAME_STMT);
7425 tree startvar = fd->loop.v;
7426 tree endvar = NULL_TREE;
7428 if (gimple_omp_for_combined_p (fd->for_stmt))
7430 tree clauses = gimple_omp_for_clauses (inner_stmt);
7431 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7432 gcc_assert (innerc);
7433 startvar = OMP_CLAUSE_DECL (innerc);
7434 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7435 OMP_CLAUSE__LOOPTEMP_);
7436 gcc_assert (innerc);
7437 endvar = OMP_CLAUSE_DECL (innerc);
7439 t = fold_convert (TREE_TYPE (startvar), n1);
7440 t = force_gimple_operand_gsi (&gsi, t,
7441 DECL_P (startvar)
7442 && TREE_ADDRESSABLE (startvar),
7443 NULL_TREE, false, GSI_CONTINUE_LINKING);
7444 gimple *assign_stmt = gimple_build_assign (startvar, t);
7445 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7447 t = fold_convert (TREE_TYPE (startvar), n2);
7448 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7449 false, GSI_CONTINUE_LINKING);
7450 if (endvar)
7452 assign_stmt = gimple_build_assign (endvar, e);
7453 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7454 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7455 assign_stmt = gimple_build_assign (fd->loop.v, e);
7456 else
7457 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7458 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7461 tree *nonrect_bounds = NULL;
7462 if (fd->collapse > 1)
7464 if (fd->non_rect)
7466 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7467 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7469 gcc_assert (gsi_bb (gsi) == entry_bb);
7470 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7471 startvar);
7472 entry_bb = gsi_bb (gsi);
7475 if (!broken_loop)
7477 /* The code controlling the sequential loop replaces the
7478 GIMPLE_OMP_CONTINUE. */
7479 gsi = gsi_last_nondebug_bb (cont_bb);
7480 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7481 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7482 vmain = gimple_omp_continue_control_use (cont_stmt);
7483 vback = gimple_omp_continue_control_def (cont_stmt);
7485 if (!gimple_omp_for_combined_p (fd->for_stmt))
7487 if (POINTER_TYPE_P (type))
7488 t = fold_build_pointer_plus (vmain, step);
7489 else
7490 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7491 t = force_gimple_operand_gsi (&gsi, t,
7492 DECL_P (vback)
7493 && TREE_ADDRESSABLE (vback),
7494 NULL_TREE, true, GSI_SAME_STMT);
7495 assign_stmt = gimple_build_assign (vback, t);
7496 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7498 t = build2 (fd->loop.cond_code, boolean_type_node,
7499 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7500 ? t : vback, e);
7501 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7504 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7505 gsi_remove (&gsi, true);
7507 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7508 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7509 cont_bb, body_bb);
7512 /* Remove the GIMPLE_OMP_FOR statement. */
7513 gsi = gsi_for_stmt (fd->for_stmt);
7514 gsi_remove (&gsi, true);
7516 /* Remove the GIMPLE_OMP_RETURN statement. */
7517 gsi = gsi_last_nondebug_bb (exit_bb);
7518 gsi_remove (&gsi, true);
7520 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7521 if (!broken_loop)
7522 remove_edge (BRANCH_EDGE (entry_bb));
7523 else
7525 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7526 region->outer->cont = NULL;
7529 /* Connect all the blocks. */
7530 if (!broken_loop)
7532 ep = find_edge (cont_bb, body_bb);
7533 if (gimple_omp_for_combined_p (fd->for_stmt))
7535 remove_edge (ep);
7536 ep = NULL;
7538 else if (fd->collapse > 1)
7540 remove_edge (ep);
7541 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7543 else
7544 ep->flags = EDGE_TRUE_VALUE;
7545 find_edge (cont_bb, fin_bb)->flags
7546 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7549 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7550 recompute_dominator (CDI_DOMINATORS, body_bb));
7551 if (!broken_loop)
7552 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7553 recompute_dominator (CDI_DOMINATORS, fin_bb));
7555 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7557 class loop *loop = alloc_loop ();
7558 loop->header = body_bb;
7559 if (collapse_bb == NULL)
7560 loop->latch = cont_bb;
7561 add_loop (loop, body_bb->loop_father);
7565 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7566 partitioned loop. The lowering here is abstracted, in that the
7567 loop parameters are passed through internal functions, which are
7568 further lowered by oacc_device_lower, once we get to the target
7569 compiler. The loop is of the form:
7571 for (V = B; V LTGT E; V += S) {BODY}
7573 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7574 (constant 0 for no chunking) and we will have a GWV partitioning
7575 mask, specifying dimensions over which the loop is to be
7576 partitioned (see note below). We generate code that looks like
7577 (this ignores tiling):
7579 <entry_bb> [incoming FALL->body, BRANCH->exit]
7580 typedef signedintify (typeof (V)) T; // underlying signed integral type
7581 T range = E - B;
7582 T chunk_no = 0;
7583 T DIR = LTGT == '<' ? +1 : -1;
7584 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7585 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7587 <head_bb> [created by splitting end of entry_bb]
7588 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7589 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7590 if (!(offset LTGT bound)) goto bottom_bb;
7592 <body_bb> [incoming]
7593 V = B + offset;
7594 {BODY}
7596 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7597 offset += step;
7598 if (offset LTGT bound) goto body_bb; [*]
7600 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7601 chunk_no++;
7602 if (chunk < chunk_max) goto head_bb;
7604 <exit_bb> [incoming]
7605 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7607 [*] Needed if V live at end of loop. */
7609 static void
7610 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7612 bool is_oacc_kernels_parallelized
7613 = (lookup_attribute ("oacc kernels parallelized",
7614 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7616 bool is_oacc_kernels
7617 = (lookup_attribute ("oacc kernels",
7618 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7619 if (is_oacc_kernels_parallelized)
7620 gcc_checking_assert (is_oacc_kernels);
7622 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7623 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7624 for SSA specifics, and some are for 'parloops' OpenACC
7625 'kernels'-parallelized specifics. */
7627 tree v = fd->loop.v;
7628 enum tree_code cond_code = fd->loop.cond_code;
7629 enum tree_code plus_code = PLUS_EXPR;
7631 tree chunk_size = integer_minus_one_node;
7632 tree gwv = integer_zero_node;
7633 tree iter_type = TREE_TYPE (v);
7634 tree diff_type = iter_type;
7635 tree plus_type = iter_type;
7636 struct oacc_collapse *counts = NULL;
7638 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7639 == GF_OMP_FOR_KIND_OACC_LOOP);
7640 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7641 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7643 if (POINTER_TYPE_P (iter_type))
7645 plus_code = POINTER_PLUS_EXPR;
7646 plus_type = sizetype;
7648 for (int ix = fd->collapse; ix--;)
7650 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7651 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7652 diff_type = diff_type2;
7654 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7655 diff_type = signed_type_for (diff_type);
7656 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7657 diff_type = integer_type_node;
7659 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7660 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7661 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7662 basic_block bottom_bb = NULL;
7664 /* entry_bb has two successors; the branch edge is to the exit
7665 block, fallthrough edge to body. */
7666 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7667 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7669 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7670 body_bb, or to a block whose only successor is the body_bb. Its
7671 fallthrough successor is the final block (same as the branch
7672 successor of the entry_bb). */
7673 if (cont_bb)
7675 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7676 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7678 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7679 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7681 else
7682 gcc_assert (!gimple_in_ssa_p (cfun));
7684 /* The exit block only has entry_bb and cont_bb as predecessors. */
7685 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7687 tree chunk_no;
7688 tree chunk_max = NULL_TREE;
7689 tree bound, offset;
7690 tree step = create_tmp_var (diff_type, ".step");
7691 bool up = cond_code == LT_EXPR;
7692 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7693 bool chunking = !gimple_in_ssa_p (cfun);
7694 bool negating;
7696 /* Tiling vars. */
7697 tree tile_size = NULL_TREE;
7698 tree element_s = NULL_TREE;
7699 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7700 basic_block elem_body_bb = NULL;
7701 basic_block elem_cont_bb = NULL;
7703 /* SSA instances. */
7704 tree offset_incr = NULL_TREE;
7705 tree offset_init = NULL_TREE;
7707 gimple_stmt_iterator gsi;
7708 gassign *ass;
7709 gcall *call;
7710 gimple *stmt;
7711 tree expr;
7712 location_t loc;
7713 edge split, be, fte;
7715 /* Split the end of entry_bb to create head_bb. */
7716 split = split_block (entry_bb, last_stmt (entry_bb));
7717 basic_block head_bb = split->dest;
7718 entry_bb = split->src;
7720 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7721 gsi = gsi_last_nondebug_bb (entry_bb);
7722 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7723 loc = gimple_location (for_stmt);
7725 if (gimple_in_ssa_p (cfun))
7727 offset_init = gimple_omp_for_index (for_stmt, 0);
7728 gcc_assert (integer_zerop (fd->loop.n1));
7729 /* The SSA parallelizer does gang parallelism. */
7730 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7733 if (fd->collapse > 1 || fd->tiling)
7735 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7736 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7737 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7738 TREE_TYPE (fd->loop.n2), loc);
7740 if (SSA_VAR_P (fd->loop.n2))
7742 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7743 true, GSI_SAME_STMT);
7744 ass = gimple_build_assign (fd->loop.n2, total);
7745 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7749 tree b = fd->loop.n1;
7750 tree e = fd->loop.n2;
7751 tree s = fd->loop.step;
7753 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7754 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7756 /* Convert the step, avoiding possible unsigned->signed overflow. */
7757 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7758 if (negating)
7759 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7760 s = fold_convert (diff_type, s);
7761 if (negating)
7762 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7763 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7765 if (!chunking)
7766 chunk_size = integer_zero_node;
7767 expr = fold_convert (diff_type, chunk_size);
7768 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7769 NULL_TREE, true, GSI_SAME_STMT);
7771 if (fd->tiling)
7773 /* Determine the tile size and element step,
7774 modify the outer loop step size. */
7775 tile_size = create_tmp_var (diff_type, ".tile_size");
7776 expr = build_int_cst (diff_type, 1);
7777 for (int ix = 0; ix < fd->collapse; ix++)
7778 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7779 expr = force_gimple_operand_gsi (&gsi, expr, true,
7780 NULL_TREE, true, GSI_SAME_STMT);
7781 ass = gimple_build_assign (tile_size, expr);
7782 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7784 element_s = create_tmp_var (diff_type, ".element_s");
7785 ass = gimple_build_assign (element_s, s);
7786 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7788 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7789 s = force_gimple_operand_gsi (&gsi, expr, true,
7790 NULL_TREE, true, GSI_SAME_STMT);
7793 /* Determine the range, avoiding possible unsigned->signed overflow. */
7794 negating = !up && TYPE_UNSIGNED (iter_type);
7795 expr = fold_build2 (MINUS_EXPR, plus_type,
7796 fold_convert (plus_type, negating ? b : e),
7797 fold_convert (plus_type, negating ? e : b));
7798 expr = fold_convert (diff_type, expr);
7799 if (negating)
7800 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7801 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7802 NULL_TREE, true, GSI_SAME_STMT);
7804 chunk_no = build_int_cst (diff_type, 0);
7805 if (chunking)
7807 gcc_assert (!gimple_in_ssa_p (cfun));
7809 expr = chunk_no;
7810 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7811 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7813 ass = gimple_build_assign (chunk_no, expr);
7814 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7816 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7817 build_int_cst (integer_type_node,
7818 IFN_GOACC_LOOP_CHUNKS),
7819 dir, range, s, chunk_size, gwv);
7820 gimple_call_set_lhs (call, chunk_max);
7821 gimple_set_location (call, loc);
7822 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7824 else
7825 chunk_size = chunk_no;
7827 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7828 build_int_cst (integer_type_node,
7829 IFN_GOACC_LOOP_STEP),
7830 dir, range, s, chunk_size, gwv);
7831 gimple_call_set_lhs (call, step);
7832 gimple_set_location (call, loc);
7833 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7835 /* Remove the GIMPLE_OMP_FOR. */
7836 gsi_remove (&gsi, true);
7838 /* Fixup edges from head_bb. */
7839 be = BRANCH_EDGE (head_bb);
7840 fte = FALLTHRU_EDGE (head_bb);
7841 be->flags |= EDGE_FALSE_VALUE;
7842 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7844 basic_block body_bb = fte->dest;
7846 if (gimple_in_ssa_p (cfun))
7848 gsi = gsi_last_nondebug_bb (cont_bb);
7849 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7851 offset = gimple_omp_continue_control_use (cont_stmt);
7852 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7854 else
7856 offset = create_tmp_var (diff_type, ".offset");
7857 offset_init = offset_incr = offset;
7859 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7861 /* Loop offset & bound go into head_bb. */
7862 gsi = gsi_start_bb (head_bb);
7864 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7865 build_int_cst (integer_type_node,
7866 IFN_GOACC_LOOP_OFFSET),
7867 dir, range, s,
7868 chunk_size, gwv, chunk_no);
7869 gimple_call_set_lhs (call, offset_init);
7870 gimple_set_location (call, loc);
7871 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7873 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7874 build_int_cst (integer_type_node,
7875 IFN_GOACC_LOOP_BOUND),
7876 dir, range, s,
7877 chunk_size, gwv, offset_init);
7878 gimple_call_set_lhs (call, bound);
7879 gimple_set_location (call, loc);
7880 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7882 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7883 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7884 GSI_CONTINUE_LINKING);
7886 /* V assignment goes into body_bb. */
7887 if (!gimple_in_ssa_p (cfun))
7889 gsi = gsi_start_bb (body_bb);
7891 expr = build2 (plus_code, iter_type, b,
7892 fold_convert (plus_type, offset));
7893 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7894 true, GSI_SAME_STMT);
7895 ass = gimple_build_assign (v, expr);
7896 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7898 if (fd->collapse > 1 || fd->tiling)
7899 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7901 if (fd->tiling)
7903 /* Determine the range of the element loop -- usually simply
7904 the tile_size, but could be smaller if the final
7905 iteration of the outer loop is a partial tile. */
7906 tree e_range = create_tmp_var (diff_type, ".e_range");
7908 expr = build2 (MIN_EXPR, diff_type,
7909 build2 (MINUS_EXPR, diff_type, bound, offset),
7910 build2 (MULT_EXPR, diff_type, tile_size,
7911 element_s));
7912 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7913 true, GSI_SAME_STMT);
7914 ass = gimple_build_assign (e_range, expr);
7915 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7917 /* Determine bound, offset & step of inner loop. */
7918 e_bound = create_tmp_var (diff_type, ".e_bound");
7919 e_offset = create_tmp_var (diff_type, ".e_offset");
7920 e_step = create_tmp_var (diff_type, ".e_step");
7922 /* Mark these as element loops. */
7923 tree t, e_gwv = integer_minus_one_node;
7924 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7926 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7927 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7928 element_s, chunk, e_gwv, chunk);
7929 gimple_call_set_lhs (call, e_offset);
7930 gimple_set_location (call, loc);
7931 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7933 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7934 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7935 element_s, chunk, e_gwv, e_offset);
7936 gimple_call_set_lhs (call, e_bound);
7937 gimple_set_location (call, loc);
7938 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7940 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7941 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7942 element_s, chunk, e_gwv);
7943 gimple_call_set_lhs (call, e_step);
7944 gimple_set_location (call, loc);
7945 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7947 /* Add test and split block. */
7948 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7949 stmt = gimple_build_cond_empty (expr);
7950 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7951 split = split_block (body_bb, stmt);
7952 elem_body_bb = split->dest;
7953 if (cont_bb == body_bb)
7954 cont_bb = elem_body_bb;
7955 body_bb = split->src;
7957 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7959 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7960 if (cont_bb == NULL)
7962 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7963 e->probability = profile_probability::even ();
7964 split->probability = profile_probability::even ();
7967 /* Initialize the user's loop vars. */
7968 gsi = gsi_start_bb (elem_body_bb);
7969 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7970 diff_type);
7974 /* Loop increment goes into cont_bb. If this is not a loop, we
7975 will have spawned threads as if it was, and each one will
7976 execute one iteration. The specification is not explicit about
7977 whether such constructs are ill-formed or not, and they can
7978 occur, especially when noreturn routines are involved. */
7979 if (cont_bb)
7981 gsi = gsi_last_nondebug_bb (cont_bb);
7982 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7983 loc = gimple_location (cont_stmt);
7985 if (fd->tiling)
7987 /* Insert element loop increment and test. */
7988 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7989 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7990 true, GSI_SAME_STMT);
7991 ass = gimple_build_assign (e_offset, expr);
7992 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7993 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7995 stmt = gimple_build_cond_empty (expr);
7996 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7997 split = split_block (cont_bb, stmt);
7998 elem_cont_bb = split->src;
7999 cont_bb = split->dest;
8001 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8002 split->probability = profile_probability::unlikely ().guessed ();
8003 edge latch_edge
8004 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
8005 latch_edge->probability = profile_probability::likely ().guessed ();
8007 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
8008 skip_edge->probability = profile_probability::unlikely ().guessed ();
8009 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
8010 loop_entry_edge->probability
8011 = profile_probability::likely ().guessed ();
8013 gsi = gsi_for_stmt (cont_stmt);
8016 /* Increment offset. */
8017 if (gimple_in_ssa_p (cfun))
8018 expr = build2 (plus_code, iter_type, offset,
8019 fold_convert (plus_type, step));
8020 else
8021 expr = build2 (PLUS_EXPR, diff_type, offset, step);
8022 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8023 true, GSI_SAME_STMT);
8024 ass = gimple_build_assign (offset_incr, expr);
8025 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8026 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
8027 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
8029 /* Remove the GIMPLE_OMP_CONTINUE. */
8030 gsi_remove (&gsi, true);
8032 /* Fixup edges from cont_bb. */
8033 be = BRANCH_EDGE (cont_bb);
8034 fte = FALLTHRU_EDGE (cont_bb);
8035 be->flags |= EDGE_TRUE_VALUE;
8036 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8038 if (chunking)
8040 /* Split the beginning of exit_bb to make bottom_bb. We
8041 need to insert a nop at the start, because splitting is
8042 after a stmt, not before. */
8043 gsi = gsi_start_bb (exit_bb);
8044 stmt = gimple_build_nop ();
8045 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8046 split = split_block (exit_bb, stmt);
8047 bottom_bb = split->src;
8048 exit_bb = split->dest;
8049 gsi = gsi_last_bb (bottom_bb);
8051 /* Chunk increment and test goes into bottom_bb. */
8052 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
8053 build_int_cst (diff_type, 1));
8054 ass = gimple_build_assign (chunk_no, expr);
8055 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
8057 /* Chunk test at end of bottom_bb. */
8058 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
8059 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
8060 GSI_CONTINUE_LINKING);
8062 /* Fixup edges from bottom_bb. */
8063 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8064 split->probability = profile_probability::unlikely ().guessed ();
8065 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
8066 latch_edge->probability = profile_probability::likely ().guessed ();
8070 gsi = gsi_last_nondebug_bb (exit_bb);
8071 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8072 loc = gimple_location (gsi_stmt (gsi));
8074 if (!gimple_in_ssa_p (cfun))
8076 /* Insert the final value of V, in case it is live. This is the
8077 value for the only thread that survives past the join. */
8078 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
8079 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
8080 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
8081 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
8082 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
8083 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8084 true, GSI_SAME_STMT);
8085 ass = gimple_build_assign (v, expr);
8086 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8089 /* Remove the OMP_RETURN. */
8090 gsi_remove (&gsi, true);
8092 if (cont_bb)
8094 /* We now have one, two or three nested loops. Update the loop
8095 structures. */
8096 class loop *parent = entry_bb->loop_father;
8097 class loop *body = body_bb->loop_father;
8099 if (chunking)
8101 class loop *chunk_loop = alloc_loop ();
8102 chunk_loop->header = head_bb;
8103 chunk_loop->latch = bottom_bb;
8104 add_loop (chunk_loop, parent);
8105 parent = chunk_loop;
8107 else if (parent != body)
8109 gcc_assert (body->header == body_bb);
8110 gcc_assert (body->latch == cont_bb
8111 || single_pred (body->latch) == cont_bb);
8112 parent = NULL;
8115 if (parent)
8117 class loop *body_loop = alloc_loop ();
8118 body_loop->header = body_bb;
8119 body_loop->latch = cont_bb;
8120 add_loop (body_loop, parent);
8122 if (fd->tiling)
8124 /* Insert tiling's element loop. */
8125 class loop *inner_loop = alloc_loop ();
8126 inner_loop->header = elem_body_bb;
8127 inner_loop->latch = elem_cont_bb;
8128 add_loop (inner_loop, body_loop);
8134 /* Expand the OMP loop defined by REGION. */
8136 static void
8137 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8139 struct omp_for_data fd;
8140 struct omp_for_data_loop *loops;
8142 loops = XALLOCAVEC (struct omp_for_data_loop,
8143 gimple_omp_for_collapse (last_stmt (region->entry)));
8144 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8145 &fd, loops);
8146 region->sched_kind = fd.sched_kind;
8147 region->sched_modifiers = fd.sched_modifiers;
8148 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8149 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8151 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8152 if ((loops[i].m1 || loops[i].m2)
8153 && (loops[i].m1 == NULL_TREE
8154 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8155 && (loops[i].m2 == NULL_TREE
8156 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8157 && TREE_CODE (loops[i].step) == INTEGER_CST
8158 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8160 tree t;
8161 tree itype = TREE_TYPE (loops[i].v);
8162 if (loops[i].m1 && loops[i].m2)
8163 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8164 else if (loops[i].m1)
8165 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8166 else
8167 t = loops[i].m2;
8168 t = fold_build2 (MULT_EXPR, itype, t,
8169 fold_convert (itype,
8170 loops[i - loops[i].outer].step));
8171 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8172 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8173 fold_build1 (NEGATE_EXPR, itype, t),
8174 fold_build1 (NEGATE_EXPR, itype,
8175 fold_convert (itype,
8176 loops[i].step)));
8177 else
8178 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8179 fold_convert (itype, loops[i].step));
8180 if (integer_nonzerop (t))
8181 error_at (gimple_location (fd.for_stmt),
8182 "invalid OpenMP non-rectangular loop step; "
8183 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8184 "step %qE",
8185 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8186 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8187 loops[i - loops[i].outer].step, i + 1,
8188 loops[i].step);
8192 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8193 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8194 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8195 if (region->cont)
8197 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8198 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8199 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8201 else
8202 /* If there isn't a continue then this is a degerate case where
8203 the introduction of abnormal edges during lowering will prevent
8204 original loops from being detected. Fix that up. */
8205 loops_state_set (LOOPS_NEED_FIXUP);
8207 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8208 expand_omp_simd (region, &fd);
8209 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8211 gcc_assert (!inner_stmt && !fd.non_rect);
8212 expand_oacc_for (region, &fd);
8214 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8216 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8217 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8218 else
8219 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8221 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8222 && !fd.have_ordered)
8224 if (fd.chunk_size == NULL)
8225 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8226 else
8227 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8229 else
8231 int fn_index, start_ix, next_ix;
8232 unsigned HOST_WIDE_INT sched = 0;
8233 tree sched_arg = NULL_TREE;
8235 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8236 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8237 if (fd.chunk_size == NULL
8238 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8239 fd.chunk_size = integer_zero_node;
8240 switch (fd.sched_kind)
8242 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8243 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8244 && fd.lastprivate_conditional == 0)
8246 gcc_assert (!fd.have_ordered);
8247 fn_index = 6;
8248 sched = 4;
8250 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8251 && !fd.have_ordered
8252 && fd.lastprivate_conditional == 0)
8253 fn_index = 7;
8254 else
8256 fn_index = 3;
8257 sched = (HOST_WIDE_INT_1U << 31);
8259 break;
8260 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8261 case OMP_CLAUSE_SCHEDULE_GUIDED:
8262 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8263 && !fd.have_ordered
8264 && fd.lastprivate_conditional == 0)
8266 fn_index = 3 + fd.sched_kind;
8267 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8268 break;
8270 fn_index = fd.sched_kind;
8271 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8272 sched += (HOST_WIDE_INT_1U << 31);
8273 break;
8274 case OMP_CLAUSE_SCHEDULE_STATIC:
8275 gcc_assert (fd.have_ordered);
8276 fn_index = 0;
8277 sched = (HOST_WIDE_INT_1U << 31) + 1;
8278 break;
8279 default:
8280 gcc_unreachable ();
8282 if (!fd.ordered)
8283 fn_index += fd.have_ordered * 8;
8284 if (fd.ordered)
8285 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8286 else
8287 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8288 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8289 if (fd.have_reductemp || fd.have_pointer_condtemp)
8291 if (fd.ordered)
8292 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8293 else if (fd.have_ordered)
8294 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8295 else
8296 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8297 sched_arg = build_int_cstu (long_integer_type_node, sched);
8298 if (!fd.chunk_size)
8299 fd.chunk_size = integer_zero_node;
8301 if (fd.iter_type == long_long_unsigned_type_node)
8303 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8304 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8305 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8306 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8308 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8309 (enum built_in_function) next_ix, sched_arg,
8310 inner_stmt);
8314 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8316 v = GOMP_sections_start (n);
8318 switch (v)
8320 case 0:
8321 goto L2;
8322 case 1:
8323 section 1;
8324 goto L1;
8325 case 2:
8327 case n:
8329 default:
8330 abort ();
8333 v = GOMP_sections_next ();
8334 goto L0;
8336 reduction;
8338 If this is a combined parallel sections, replace the call to
8339 GOMP_sections_start with call to GOMP_sections_next. */
8341 static void
8342 expand_omp_sections (struct omp_region *region)
8344 tree t, u, vin = NULL, vmain, vnext, l2;
8345 unsigned len;
8346 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8347 gimple_stmt_iterator si, switch_si;
8348 gomp_sections *sections_stmt;
8349 gimple *stmt;
8350 gomp_continue *cont;
8351 edge_iterator ei;
8352 edge e;
8353 struct omp_region *inner;
8354 unsigned i, casei;
8355 bool exit_reachable = region->cont != NULL;
8357 gcc_assert (region->exit != NULL);
8358 entry_bb = region->entry;
8359 l0_bb = single_succ (entry_bb);
8360 l1_bb = region->cont;
8361 l2_bb = region->exit;
8362 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8363 l2 = gimple_block_label (l2_bb);
8364 else
8366 /* This can happen if there are reductions. */
8367 len = EDGE_COUNT (l0_bb->succs);
8368 gcc_assert (len > 0);
8369 e = EDGE_SUCC (l0_bb, len - 1);
8370 si = gsi_last_nondebug_bb (e->dest);
8371 l2 = NULL_TREE;
8372 if (gsi_end_p (si)
8373 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8374 l2 = gimple_block_label (e->dest);
8375 else
8376 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8378 si = gsi_last_nondebug_bb (e->dest);
8379 if (gsi_end_p (si)
8380 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8382 l2 = gimple_block_label (e->dest);
8383 break;
8387 if (exit_reachable)
8388 default_bb = create_empty_bb (l1_bb->prev_bb);
8389 else
8390 default_bb = create_empty_bb (l0_bb);
8392 /* We will build a switch() with enough cases for all the
8393 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8394 and a default case to abort if something goes wrong. */
8395 len = EDGE_COUNT (l0_bb->succs);
8397 /* Use vec::quick_push on label_vec throughout, since we know the size
8398 in advance. */
8399 auto_vec<tree> label_vec (len);
8401 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8402 GIMPLE_OMP_SECTIONS statement. */
8403 si = gsi_last_nondebug_bb (entry_bb);
8404 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8405 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8406 vin = gimple_omp_sections_control (sections_stmt);
8407 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8408 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8409 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8410 tree cond_var = NULL_TREE;
8411 if (reductmp || condtmp)
8413 tree reductions = null_pointer_node, mem = null_pointer_node;
8414 tree memv = NULL_TREE, condtemp = NULL_TREE;
8415 gimple_stmt_iterator gsi = gsi_none ();
8416 gimple *g = NULL;
8417 if (reductmp)
8419 reductions = OMP_CLAUSE_DECL (reductmp);
8420 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8421 g = SSA_NAME_DEF_STMT (reductions);
8422 reductions = gimple_assign_rhs1 (g);
8423 OMP_CLAUSE_DECL (reductmp) = reductions;
8424 gsi = gsi_for_stmt (g);
8426 else
8427 gsi = si;
8428 if (condtmp)
8430 condtemp = OMP_CLAUSE_DECL (condtmp);
8431 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8432 OMP_CLAUSE__CONDTEMP_);
8433 cond_var = OMP_CLAUSE_DECL (c);
8434 tree type = TREE_TYPE (condtemp);
8435 memv = create_tmp_var (type);
8436 TREE_ADDRESSABLE (memv) = 1;
8437 unsigned cnt = 0;
8438 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8439 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8440 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8441 ++cnt;
8442 unsigned HOST_WIDE_INT sz
8443 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8444 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8445 false);
8446 mem = build_fold_addr_expr (memv);
8448 t = build_int_cst (unsigned_type_node, len - 1);
8449 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8450 stmt = gimple_build_call (u, 3, t, reductions, mem);
8451 gimple_call_set_lhs (stmt, vin);
8452 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8453 if (condtmp)
8455 expand_omp_build_assign (&gsi, condtemp, memv, false);
8456 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8457 vin, build_one_cst (TREE_TYPE (cond_var)));
8458 expand_omp_build_assign (&gsi, cond_var, t, false);
8460 if (reductmp)
8462 gsi_remove (&gsi, true);
8463 release_ssa_name (gimple_assign_lhs (g));
8466 else if (!is_combined_parallel (region))
8468 /* If we are not inside a combined parallel+sections region,
8469 call GOMP_sections_start. */
8470 t = build_int_cst (unsigned_type_node, len - 1);
8471 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8472 stmt = gimple_build_call (u, 1, t);
8474 else
8476 /* Otherwise, call GOMP_sections_next. */
8477 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8478 stmt = gimple_build_call (u, 0);
8480 if (!reductmp && !condtmp)
8482 gimple_call_set_lhs (stmt, vin);
8483 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8485 gsi_remove (&si, true);
8487 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8488 L0_BB. */
8489 switch_si = gsi_last_nondebug_bb (l0_bb);
8490 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8491 if (exit_reachable)
8493 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8494 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8495 vmain = gimple_omp_continue_control_use (cont);
8496 vnext = gimple_omp_continue_control_def (cont);
8498 else
8500 vmain = vin;
8501 vnext = NULL_TREE;
8504 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8505 label_vec.quick_push (t);
8506 i = 1;
8508 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8509 for (inner = region->inner, casei = 1;
8510 inner;
8511 inner = inner->next, i++, casei++)
8513 basic_block s_entry_bb, s_exit_bb;
8515 /* Skip optional reduction region. */
8516 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8518 --i;
8519 --casei;
8520 continue;
8523 s_entry_bb = inner->entry;
8524 s_exit_bb = inner->exit;
8526 t = gimple_block_label (s_entry_bb);
8527 u = build_int_cst (unsigned_type_node, casei);
8528 u = build_case_label (u, NULL, t);
8529 label_vec.quick_push (u);
8531 si = gsi_last_nondebug_bb (s_entry_bb);
8532 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8533 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8534 gsi_remove (&si, true);
8535 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8537 if (s_exit_bb == NULL)
8538 continue;
8540 si = gsi_last_nondebug_bb (s_exit_bb);
8541 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8542 gsi_remove (&si, true);
8544 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8547 /* Error handling code goes in DEFAULT_BB. */
8548 t = gimple_block_label (default_bb);
8549 u = build_case_label (NULL, NULL, t);
8550 make_edge (l0_bb, default_bb, 0);
8551 add_bb_to_loop (default_bb, current_loops->tree_root);
8553 stmt = gimple_build_switch (vmain, u, label_vec);
8554 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8555 gsi_remove (&switch_si, true);
8557 si = gsi_start_bb (default_bb);
8558 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8559 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8561 if (exit_reachable)
8563 tree bfn_decl;
8565 /* Code to get the next section goes in L1_BB. */
8566 si = gsi_last_nondebug_bb (l1_bb);
8567 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8569 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8570 stmt = gimple_build_call (bfn_decl, 0);
8571 gimple_call_set_lhs (stmt, vnext);
8572 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8573 if (cond_var)
8575 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8576 vnext, build_one_cst (TREE_TYPE (cond_var)));
8577 expand_omp_build_assign (&si, cond_var, t, false);
8579 gsi_remove (&si, true);
8581 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8584 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8585 si = gsi_last_nondebug_bb (l2_bb);
8586 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8587 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8588 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8589 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8590 else
8591 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8592 stmt = gimple_build_call (t, 0);
8593 if (gimple_omp_return_lhs (gsi_stmt (si)))
8594 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8595 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8596 gsi_remove (&si, true);
8598 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8601 /* Expand code for an OpenMP single or scope directive. We've already expanded
8602 much of the code, here we simply place the GOMP_barrier call. */
8604 static void
8605 expand_omp_single (struct omp_region *region)
8607 basic_block entry_bb, exit_bb;
8608 gimple_stmt_iterator si;
8610 entry_bb = region->entry;
8611 exit_bb = region->exit;
8613 si = gsi_last_nondebug_bb (entry_bb);
8614 enum gimple_code code = gimple_code (gsi_stmt (si));
8615 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8616 gsi_remove (&si, true);
8617 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8619 if (exit_bb == NULL)
8621 gcc_assert (code == GIMPLE_OMP_SCOPE);
8622 return;
8625 si = gsi_last_nondebug_bb (exit_bb);
8626 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8628 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8629 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8631 gsi_remove (&si, true);
8632 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8635 /* Generic expansion for OpenMP synchronization directives: master,
8636 ordered and critical. All we need to do here is remove the entry
8637 and exit markers for REGION. */
8639 static void
8640 expand_omp_synch (struct omp_region *region)
8642 basic_block entry_bb, exit_bb;
8643 gimple_stmt_iterator si;
8645 entry_bb = region->entry;
8646 exit_bb = region->exit;
8648 si = gsi_last_nondebug_bb (entry_bb);
8649 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8650 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8651 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8652 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8653 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8654 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8655 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8656 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8657 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8659 expand_omp_taskreg (region);
8660 return;
8662 gsi_remove (&si, true);
8663 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8665 if (exit_bb)
8667 si = gsi_last_nondebug_bb (exit_bb);
8668 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8669 gsi_remove (&si, true);
8670 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8674 /* Translate enum omp_memory_order to enum memmodel for the embedded
8675 fail clause in there. */
8677 static enum memmodel
8678 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8680 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8682 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8683 switch (mo & OMP_MEMORY_ORDER_MASK)
8685 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8686 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8687 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8688 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8689 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8690 default: break;
8692 gcc_unreachable ();
8693 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8694 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8695 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8696 default: gcc_unreachable ();
8700 /* Translate enum omp_memory_order to enum memmodel. The two enums
8701 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8702 is 0 and omp_memory_order has the fail mode encoded in it too. */
8704 static enum memmodel
8705 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8707 enum memmodel ret, fail_ret;
8708 switch (mo & OMP_MEMORY_ORDER_MASK)
8710 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8711 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8712 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8713 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8714 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8715 default: gcc_unreachable ();
8717 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8718 we can just return ret here unconditionally. Otherwise, work around
8719 it here and make sure fail memmodel is not stronger. */
8720 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8721 return ret;
8722 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8723 if (fail_ret > ret)
8724 return fail_ret;
8725 return ret;
8728 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8729 operation as a normal volatile load. */
8731 static bool
8732 expand_omp_atomic_load (basic_block load_bb, tree addr,
8733 tree loaded_val, int index)
8735 enum built_in_function tmpbase;
8736 gimple_stmt_iterator gsi;
8737 basic_block store_bb;
8738 location_t loc;
8739 gimple *stmt;
8740 tree decl, type, itype;
8742 gsi = gsi_last_nondebug_bb (load_bb);
8743 stmt = gsi_stmt (gsi);
8744 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8745 loc = gimple_location (stmt);
8747 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8748 is smaller than word size, then expand_atomic_load assumes that the load
8749 is atomic. We could avoid the builtin entirely in this case. */
8751 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8752 decl = builtin_decl_explicit (tmpbase);
8753 if (decl == NULL_TREE)
8754 return false;
8756 type = TREE_TYPE (loaded_val);
8757 itype = TREE_TYPE (TREE_TYPE (decl));
8759 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8760 tree mo = build_int_cst (integer_type_node,
8761 omp_memory_order_to_memmodel (omo));
8762 gcall *call = gimple_build_call (decl, 2, addr, mo);
8763 gimple_set_location (call, loc);
8764 gimple_set_vuse (call, gimple_vuse (stmt));
8765 gimple *repl;
8766 if (!useless_type_conversion_p (type, itype))
8768 tree lhs = make_ssa_name (itype);
8769 gimple_call_set_lhs (call, lhs);
8770 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8771 repl = gimple_build_assign (loaded_val,
8772 build1 (VIEW_CONVERT_EXPR, type, lhs));
8773 gimple_set_location (repl, loc);
8775 else
8777 gimple_call_set_lhs (call, loaded_val);
8778 repl = call;
8780 gsi_replace (&gsi, repl, true);
8782 store_bb = single_succ (load_bb);
8783 gsi = gsi_last_nondebug_bb (store_bb);
8784 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8785 gsi_remove (&gsi, true);
8787 return true;
8790 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8791 operation as a normal volatile store. */
8793 static bool
8794 expand_omp_atomic_store (basic_block load_bb, tree addr,
8795 tree loaded_val, tree stored_val, int index)
8797 enum built_in_function tmpbase;
8798 gimple_stmt_iterator gsi;
8799 basic_block store_bb = single_succ (load_bb);
8800 location_t loc;
8801 gimple *stmt;
8802 tree decl, type, itype;
8803 machine_mode imode;
8804 bool exchange;
8806 gsi = gsi_last_nondebug_bb (load_bb);
8807 stmt = gsi_stmt (gsi);
8808 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8810 /* If the load value is needed, then this isn't a store but an exchange. */
8811 exchange = gimple_omp_atomic_need_value_p (stmt);
8813 gsi = gsi_last_nondebug_bb (store_bb);
8814 stmt = gsi_stmt (gsi);
8815 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8816 loc = gimple_location (stmt);
8818 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8819 is smaller than word size, then expand_atomic_store assumes that the store
8820 is atomic. We could avoid the builtin entirely in this case. */
8822 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8823 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8824 decl = builtin_decl_explicit (tmpbase);
8825 if (decl == NULL_TREE)
8826 return false;
8828 type = TREE_TYPE (stored_val);
8830 /* Dig out the type of the function's second argument. */
8831 itype = TREE_TYPE (decl);
8832 itype = TYPE_ARG_TYPES (itype);
8833 itype = TREE_CHAIN (itype);
8834 itype = TREE_VALUE (itype);
8835 imode = TYPE_MODE (itype);
8837 if (exchange && !can_atomic_exchange_p (imode, true))
8838 return false;
8840 if (!useless_type_conversion_p (itype, type))
8841 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8842 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8843 tree mo = build_int_cst (integer_type_node,
8844 omp_memory_order_to_memmodel (omo));
8845 stored_val = force_gimple_operand_gsi (&gsi, stored_val, true, NULL_TREE,
8846 true, GSI_SAME_STMT);
8847 gcall *call = gimple_build_call (decl, 3, addr, stored_val, mo);
8848 gimple_set_location (call, loc);
8849 gimple_set_vuse (call, gimple_vuse (stmt));
8850 gimple_set_vdef (call, gimple_vdef (stmt));
8852 gimple *repl = call;
8853 if (exchange)
8855 if (!useless_type_conversion_p (type, itype))
8857 tree lhs = make_ssa_name (itype);
8858 gimple_call_set_lhs (call, lhs);
8859 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8860 repl = gimple_build_assign (loaded_val,
8861 build1 (VIEW_CONVERT_EXPR, type, lhs));
8862 gimple_set_location (repl, loc);
8864 else
8865 gimple_call_set_lhs (call, loaded_val);
8867 gsi_replace (&gsi, repl, true);
8869 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8870 gsi = gsi_last_nondebug_bb (load_bb);
8871 gsi_remove (&gsi, true);
8873 return true;
8876 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8877 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8878 size of the data type, and thus usable to find the index of the builtin
8879 decl. Returns false if the expression is not of the proper form. */
8881 static bool
8882 expand_omp_atomic_fetch_op (basic_block load_bb,
8883 tree addr, tree loaded_val,
8884 tree stored_val, int index)
8886 enum built_in_function oldbase, newbase, tmpbase;
8887 tree decl, itype, call;
8888 tree lhs, rhs;
8889 basic_block store_bb = single_succ (load_bb);
8890 gimple_stmt_iterator gsi;
8891 gimple *stmt;
8892 location_t loc;
8893 enum tree_code code;
8894 bool need_old, need_new;
8895 machine_mode imode;
8897 /* We expect to find the following sequences:
8899 load_bb:
8900 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8902 store_bb:
8903 val = tmp OP something; (or: something OP tmp)
8904 GIMPLE_OMP_STORE (val)
8906 ???FIXME: Allow a more flexible sequence.
8907 Perhaps use data flow to pick the statements.
8911 gsi = gsi_after_labels (store_bb);
8912 stmt = gsi_stmt (gsi);
8913 if (is_gimple_debug (stmt))
8915 gsi_next_nondebug (&gsi);
8916 if (gsi_end_p (gsi))
8917 return false;
8918 stmt = gsi_stmt (gsi);
8920 loc = gimple_location (stmt);
8921 if (!is_gimple_assign (stmt))
8922 return false;
8923 gsi_next_nondebug (&gsi);
8924 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8925 return false;
8926 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8927 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8928 enum omp_memory_order omo
8929 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8930 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8931 gcc_checking_assert (!need_old || !need_new);
8933 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8934 return false;
8936 /* Check for one of the supported fetch-op operations. */
8937 code = gimple_assign_rhs_code (stmt);
8938 switch (code)
8940 case PLUS_EXPR:
8941 case POINTER_PLUS_EXPR:
8942 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8943 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8944 break;
8945 case MINUS_EXPR:
8946 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8947 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8948 break;
8949 case BIT_AND_EXPR:
8950 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8951 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8952 break;
8953 case BIT_IOR_EXPR:
8954 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8955 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8956 break;
8957 case BIT_XOR_EXPR:
8958 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8959 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8960 break;
8961 default:
8962 return false;
8965 /* Make sure the expression is of the proper form. */
8966 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8967 rhs = gimple_assign_rhs2 (stmt);
8968 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8969 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8970 rhs = gimple_assign_rhs1 (stmt);
8971 else
8972 return false;
8974 tmpbase = ((enum built_in_function)
8975 ((need_new ? newbase : oldbase) + index + 1));
8976 decl = builtin_decl_explicit (tmpbase);
8977 if (decl == NULL_TREE)
8978 return false;
8979 itype = TREE_TYPE (TREE_TYPE (decl));
8980 imode = TYPE_MODE (itype);
8982 /* We could test all of the various optabs involved, but the fact of the
8983 matter is that (with the exception of i486 vs i586 and xadd) all targets
8984 that support any atomic operaton optab also implements compare-and-swap.
8985 Let optabs.cc take care of expanding any compare-and-swap loop. */
8986 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8987 return false;
8989 gsi = gsi_last_nondebug_bb (load_bb);
8990 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8992 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8993 It only requires that the operation happen atomically. Thus we can
8994 use the RELAXED memory model. */
8995 call = build_call_expr_loc (loc, decl, 3, addr,
8996 fold_convert_loc (loc, itype, rhs),
8997 build_int_cst (NULL, mo));
8999 if (need_old || need_new)
9001 lhs = need_old ? loaded_val : stored_val;
9002 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
9003 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
9005 else
9006 call = fold_convert_loc (loc, void_type_node, call);
9007 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
9008 gsi_remove (&gsi, true);
9010 gsi = gsi_last_nondebug_bb (store_bb);
9011 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
9012 gsi_remove (&gsi, true);
9013 gsi = gsi_last_nondebug_bb (store_bb);
9014 stmt = gsi_stmt (gsi);
9015 gsi_remove (&gsi, true);
9017 if (gimple_in_ssa_p (cfun))
9018 release_defs (stmt);
9020 return true;
9023 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
9024 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
9025 Returns false if the expression is not of the proper form. */
9027 static bool
9028 expand_omp_atomic_cas (basic_block load_bb, tree addr,
9029 tree loaded_val, tree stored_val, int index)
9031 /* We expect to find the following sequences:
9033 load_bb:
9034 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
9036 store_bb:
9037 val = tmp == e ? d : tmp;
9038 GIMPLE_OMP_ATOMIC_STORE (val)
9040 or in store_bb instead:
9041 tmp2 = tmp == e;
9042 val = tmp2 ? d : tmp;
9043 GIMPLE_OMP_ATOMIC_STORE (val)
9046 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
9047 val = e == tmp3 ? d : tmp;
9048 GIMPLE_OMP_ATOMIC_STORE (val)
9050 etc. */
9053 basic_block store_bb = single_succ (load_bb);
9054 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
9055 gimple *store_stmt = gsi_stmt (gsi);
9056 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
9057 return false;
9058 gsi_prev_nondebug (&gsi);
9059 if (gsi_end_p (gsi))
9060 return false;
9061 gimple *condexpr_stmt = gsi_stmt (gsi);
9062 if (!is_gimple_assign (condexpr_stmt)
9063 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
9064 return false;
9065 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
9066 return false;
9067 gimple *cond_stmt = NULL;
9068 gimple *vce_stmt = NULL;
9069 gsi_prev_nondebug (&gsi);
9070 if (!gsi_end_p (gsi))
9072 cond_stmt = gsi_stmt (gsi);
9073 if (!is_gimple_assign (cond_stmt))
9074 return false;
9075 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
9077 gsi_prev_nondebug (&gsi);
9078 if (!gsi_end_p (gsi))
9080 vce_stmt = gsi_stmt (gsi);
9081 if (!is_gimple_assign (vce_stmt)
9082 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
9083 return false;
9086 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
9087 std::swap (vce_stmt, cond_stmt);
9088 else
9089 return false;
9090 if (vce_stmt)
9092 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
9093 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
9094 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
9095 return false;
9096 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
9097 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
9098 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
9099 TYPE_SIZE (TREE_TYPE (loaded_val))))
9100 return false;
9101 gsi_prev_nondebug (&gsi);
9102 if (!gsi_end_p (gsi))
9103 return false;
9106 tree cond = gimple_assign_rhs1 (condexpr_stmt);
9107 tree cond_op1, cond_op2;
9108 if (cond_stmt)
9110 /* We should now always get a separate cond_stmt. */
9111 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9112 return false;
9113 cond_op1 = gimple_assign_rhs1 (cond_stmt);
9114 cond_op2 = gimple_assign_rhs2 (cond_stmt);
9116 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9117 return false;
9118 else
9120 cond_op1 = TREE_OPERAND (cond, 0);
9121 cond_op2 = TREE_OPERAND (cond, 1);
9123 tree d;
9124 if (TREE_CODE (cond) == NE_EXPR)
9126 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9127 return false;
9128 d = gimple_assign_rhs3 (condexpr_stmt);
9130 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9131 return false;
9132 else
9133 d = gimple_assign_rhs2 (condexpr_stmt);
9134 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9135 if (operand_equal_p (e, cond_op1))
9136 e = cond_op2;
9137 else if (operand_equal_p (e, cond_op2))
9138 e = cond_op1;
9139 else
9140 return false;
9142 location_t loc = gimple_location (store_stmt);
9143 gimple *load_stmt = last_stmt (load_bb);
9144 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9145 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9146 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9147 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9148 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9149 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9150 gcc_checking_assert (!need_old || !need_new);
9152 enum built_in_function fncode
9153 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9154 + index + 1);
9155 tree cmpxchg = builtin_decl_explicit (fncode);
9156 if (cmpxchg == NULL_TREE)
9157 return false;
9158 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9160 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9161 || !can_atomic_load_p (TYPE_MODE (itype)))
9162 return false;
9164 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9165 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9166 return false;
9168 gsi = gsi_for_stmt (store_stmt);
9169 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9171 tree ne = create_tmp_reg (itype);
9172 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9173 gimple_set_location (g, loc);
9174 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9175 e = ne;
9177 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9179 tree nd = create_tmp_reg (itype);
9180 enum tree_code code;
9181 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9183 code = VIEW_CONVERT_EXPR;
9184 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9186 else
9187 code = NOP_EXPR;
9188 gimple *g = gimple_build_assign (nd, code, d);
9189 gimple_set_location (g, loc);
9190 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9191 d = nd;
9194 tree ctype = build_complex_type (itype);
9195 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9196 gimple *g
9197 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9198 build_int_cst (integer_type_node, flag),
9199 mo, fmo);
9200 tree cres = create_tmp_reg (ctype);
9201 gimple_call_set_lhs (g, cres);
9202 gimple_set_location (g, loc);
9203 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9205 if (cond_stmt || need_old || need_new)
9207 tree im = create_tmp_reg (itype);
9208 g = gimple_build_assign (im, IMAGPART_EXPR,
9209 build1 (IMAGPART_EXPR, itype, cres));
9210 gimple_set_location (g, loc);
9211 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9213 tree re = NULL_TREE;
9214 if (need_old || need_new)
9216 re = create_tmp_reg (itype);
9217 g = gimple_build_assign (re, REALPART_EXPR,
9218 build1 (REALPART_EXPR, itype, cres));
9219 gimple_set_location (g, loc);
9220 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9223 if (cond_stmt)
9225 g = gimple_build_assign (cond, NOP_EXPR, im);
9226 gimple_set_location (g, loc);
9227 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9230 if (need_new)
9232 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9233 cond_stmt
9234 ? cond : build2 (NE_EXPR, boolean_type_node,
9235 im, build_zero_cst (itype)),
9236 d, re);
9237 gimple_set_location (g, loc);
9238 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9239 re = gimple_assign_lhs (g);
9242 if (need_old || need_new)
9244 tree v = need_old ? loaded_val : stored_val;
9245 enum tree_code code;
9246 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9248 code = VIEW_CONVERT_EXPR;
9249 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9251 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9252 code = NOP_EXPR;
9253 else
9254 code = TREE_CODE (re);
9255 g = gimple_build_assign (v, code, re);
9256 gimple_set_location (g, loc);
9257 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9261 gsi_remove (&gsi, true);
9262 gsi = gsi_for_stmt (load_stmt);
9263 gsi_remove (&gsi, true);
9264 gsi = gsi_for_stmt (condexpr_stmt);
9265 gsi_remove (&gsi, true);
9266 if (cond_stmt)
9268 gsi = gsi_for_stmt (cond_stmt);
9269 gsi_remove (&gsi, true);
9271 if (vce_stmt)
9273 gsi = gsi_for_stmt (vce_stmt);
9274 gsi_remove (&gsi, true);
9277 return true;
9280 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9282 oldval = *addr;
9283 repeat:
9284 newval = rhs; // with oldval replacing *addr in rhs
9285 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9286 if (oldval != newval)
9287 goto repeat;
9289 INDEX is log2 of the size of the data type, and thus usable to find the
9290 index of the builtin decl. */
9292 static bool
9293 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9294 tree addr, tree loaded_val, tree stored_val,
9295 int index)
9297 tree loadedi, storedi, initial, new_storedi, old_vali;
9298 tree type, itype, cmpxchg, iaddr, atype;
9299 gimple_stmt_iterator si;
9300 basic_block loop_header = single_succ (load_bb);
9301 gimple *phi, *stmt;
9302 edge e;
9303 enum built_in_function fncode;
9305 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9306 + index + 1);
9307 cmpxchg = builtin_decl_explicit (fncode);
9308 if (cmpxchg == NULL_TREE)
9309 return false;
9310 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9311 atype = type;
9312 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9314 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9315 || !can_atomic_load_p (TYPE_MODE (itype)))
9316 return false;
9318 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9319 si = gsi_last_nondebug_bb (load_bb);
9320 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9321 location_t loc = gimple_location (gsi_stmt (si));
9322 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9323 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9324 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9326 /* For floating-point values, we'll need to view-convert them to integers
9327 so that we can perform the atomic compare and swap. Simplify the
9328 following code by always setting up the "i"ntegral variables. */
9329 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9331 tree iaddr_val;
9333 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9334 true));
9335 atype = itype;
9336 iaddr_val
9337 = force_gimple_operand_gsi (&si,
9338 fold_convert (TREE_TYPE (iaddr), addr),
9339 false, NULL_TREE, true, GSI_SAME_STMT);
9340 stmt = gimple_build_assign (iaddr, iaddr_val);
9341 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9342 loadedi = create_tmp_var (itype);
9343 if (gimple_in_ssa_p (cfun))
9344 loadedi = make_ssa_name (loadedi);
9346 else
9348 iaddr = addr;
9349 loadedi = loaded_val;
9352 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9353 tree loaddecl = builtin_decl_explicit (fncode);
9354 if (loaddecl)
9355 initial
9356 = fold_convert (atype,
9357 build_call_expr (loaddecl, 2, iaddr,
9358 build_int_cst (NULL_TREE,
9359 MEMMODEL_RELAXED)));
9360 else
9362 tree off
9363 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9364 true), 0);
9365 initial = build2 (MEM_REF, atype, iaddr, off);
9368 initial
9369 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9370 GSI_SAME_STMT);
9372 /* Move the value to the LOADEDI temporary. */
9373 if (gimple_in_ssa_p (cfun))
9375 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9376 phi = create_phi_node (loadedi, loop_header);
9377 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9378 initial);
9380 else
9381 gsi_insert_before (&si,
9382 gimple_build_assign (loadedi, initial),
9383 GSI_SAME_STMT);
9384 if (loadedi != loaded_val)
9386 gimple_stmt_iterator gsi2;
9387 tree x;
9389 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9390 gsi2 = gsi_start_bb (loop_header);
9391 if (gimple_in_ssa_p (cfun))
9393 gassign *stmt;
9394 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9395 true, GSI_SAME_STMT);
9396 stmt = gimple_build_assign (loaded_val, x);
9397 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9399 else
9401 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9402 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9403 true, GSI_SAME_STMT);
9406 gsi_remove (&si, true);
9408 si = gsi_last_nondebug_bb (store_bb);
9409 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9411 if (iaddr == addr)
9412 storedi = stored_val;
9413 else
9414 storedi
9415 = force_gimple_operand_gsi (&si,
9416 build1 (VIEW_CONVERT_EXPR, itype,
9417 stored_val), true, NULL_TREE, true,
9418 GSI_SAME_STMT);
9420 /* Build the compare&swap statement. */
9421 tree ctype = build_complex_type (itype);
9422 int flag = int_size_in_bytes (itype);
9423 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9424 ctype, 6, iaddr, loadedi,
9425 storedi,
9426 build_int_cst (integer_type_node,
9427 flag),
9428 mo, fmo);
9429 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9430 new_storedi = force_gimple_operand_gsi (&si,
9431 fold_convert (TREE_TYPE (loadedi),
9432 new_storedi),
9433 true, NULL_TREE,
9434 true, GSI_SAME_STMT);
9436 if (gimple_in_ssa_p (cfun))
9437 old_vali = loadedi;
9438 else
9440 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9441 stmt = gimple_build_assign (old_vali, loadedi);
9442 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9444 stmt = gimple_build_assign (loadedi, new_storedi);
9445 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9448 /* Note that we always perform the comparison as an integer, even for
9449 floating point. This allows the atomic operation to properly
9450 succeed even with NaNs and -0.0. */
9451 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9452 stmt = gimple_build_cond_empty (ne);
9453 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9455 /* Update cfg. */
9456 e = single_succ_edge (store_bb);
9457 e->flags &= ~EDGE_FALLTHRU;
9458 e->flags |= EDGE_FALSE_VALUE;
9459 /* Expect no looping. */
9460 e->probability = profile_probability::guessed_always ();
9462 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9463 e->probability = profile_probability::guessed_never ();
9465 /* Copy the new value to loadedi (we already did that before the condition
9466 if we are not in SSA). */
9467 if (gimple_in_ssa_p (cfun))
9469 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9470 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9473 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9474 stmt = gsi_stmt (si);
9475 gsi_remove (&si, true);
9476 if (gimple_in_ssa_p (cfun))
9477 release_defs (stmt);
9479 class loop *loop = alloc_loop ();
9480 loop->header = loop_header;
9481 loop->latch = store_bb;
9482 add_loop (loop, loop_header->loop_father);
9484 return true;
9487 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9489 GOMP_atomic_start ();
9490 *addr = rhs;
9491 GOMP_atomic_end ();
9493 The result is not globally atomic, but works so long as all parallel
9494 references are within #pragma omp atomic directives. According to
9495 responses received from omp@openmp.org, appears to be within spec.
9496 Which makes sense, since that's how several other compilers handle
9497 this situation as well.
9498 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9499 expanding. STORED_VAL is the operand of the matching
9500 GIMPLE_OMP_ATOMIC_STORE.
9502 We replace
9503 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9504 loaded_val = *addr;
9506 and replace
9507 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9508 *addr = stored_val;
9511 static bool
9512 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9513 tree addr, tree loaded_val, tree stored_val)
9515 gimple_stmt_iterator si;
9516 gassign *stmt;
9517 tree t;
9519 si = gsi_last_nondebug_bb (load_bb);
9520 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9522 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9523 t = build_call_expr (t, 0);
9524 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9526 tree mem = build_simple_mem_ref (addr);
9527 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9528 TREE_OPERAND (mem, 1)
9529 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9530 true),
9531 TREE_OPERAND (mem, 1));
9532 stmt = gimple_build_assign (loaded_val, mem);
9533 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9534 gsi_remove (&si, true);
9536 si = gsi_last_nondebug_bb (store_bb);
9537 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9539 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9540 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (si)));
9541 gimple_set_vdef (stmt, gimple_vdef (gsi_stmt (si)));
9542 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9544 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9545 t = build_call_expr (t, 0);
9546 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9547 gsi_remove (&si, true);
9548 return true;
9551 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9552 using expand_omp_atomic_fetch_op. If it failed, we try to
9553 call expand_omp_atomic_pipeline, and if it fails too, the
9554 ultimate fallback is wrapping the operation in a mutex
9555 (expand_omp_atomic_mutex). REGION is the atomic region built
9556 by build_omp_regions_1(). */
9558 static void
9559 expand_omp_atomic (struct omp_region *region)
9561 basic_block load_bb = region->entry, store_bb = region->exit;
9562 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9563 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9564 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9565 tree addr = gimple_omp_atomic_load_rhs (load);
9566 tree stored_val = gimple_omp_atomic_store_val (store);
9567 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9568 HOST_WIDE_INT index;
9570 /* Make sure the type is one of the supported sizes. */
9571 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9572 index = exact_log2 (index);
9573 if (index >= 0 && index <= 4)
9575 unsigned int align = TYPE_ALIGN_UNIT (type);
9577 /* __sync builtins require strict data alignment. */
9578 if (exact_log2 (align) >= index)
9580 /* Atomic load. */
9581 scalar_mode smode;
9582 if (loaded_val == stored_val
9583 && (is_int_mode (TYPE_MODE (type), &smode)
9584 || is_float_mode (TYPE_MODE (type), &smode))
9585 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9586 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9587 return;
9589 /* Atomic store. */
9590 if ((is_int_mode (TYPE_MODE (type), &smode)
9591 || is_float_mode (TYPE_MODE (type), &smode))
9592 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9593 && store_bb == single_succ (load_bb)
9594 && first_stmt (store_bb) == store
9595 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9596 stored_val, index))
9597 return;
9599 /* When possible, use specialized atomic update functions. */
9600 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9601 && store_bb == single_succ (load_bb)
9602 && expand_omp_atomic_fetch_op (load_bb, addr,
9603 loaded_val, stored_val, index))
9604 return;
9606 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9607 if (store_bb == single_succ (load_bb)
9608 && !gimple_in_ssa_p (cfun)
9609 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9610 index))
9611 return;
9613 /* If we don't have specialized __sync builtins, try and implement
9614 as a compare and swap loop. */
9615 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9616 loaded_val, stored_val, index))
9617 return;
9621 /* The ultimate fallback is wrapping the operation in a mutex. */
9622 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9625 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9626 at REGION_EXIT. */
9628 static void
9629 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9630 basic_block region_exit)
9632 class loop *outer = region_entry->loop_father;
9633 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9635 /* Don't parallelize the kernels region if it contains more than one outer
9636 loop. */
9637 unsigned int nr_outer_loops = 0;
9638 class loop *single_outer = NULL;
9639 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9641 gcc_assert (loop_outer (loop) == outer);
9643 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9644 continue;
9646 if (region_exit != NULL
9647 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9648 continue;
9650 nr_outer_loops++;
9651 single_outer = loop;
9653 if (nr_outer_loops != 1)
9654 return;
9656 for (class loop *loop = single_outer->inner;
9657 loop != NULL;
9658 loop = loop->inner)
9659 if (loop->next)
9660 return;
9662 /* Mark the loops in the region. */
9663 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9664 loop->in_oacc_kernels_region = true;
9667 /* Build target argument identifier from the DEVICE identifier, value
9668 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9670 static tree
9671 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9673 tree t = build_int_cst (integer_type_node, device);
9674 if (subseqent_param)
9675 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9676 build_int_cst (integer_type_node,
9677 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9678 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9679 build_int_cst (integer_type_node, id));
9680 return t;
9683 /* Like above but return it in type that can be directly stored as an element
9684 of the argument array. */
9686 static tree
9687 get_target_argument_identifier (int device, bool subseqent_param, int id)
9689 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9690 return fold_convert (ptr_type_node, t);
9693 /* Return a target argument consisting of DEVICE identifier, value identifier
9694 ID, and the actual VALUE. */
9696 static tree
9697 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9698 tree value)
9700 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9701 fold_convert (integer_type_node, value),
9702 build_int_cst (unsigned_type_node,
9703 GOMP_TARGET_ARG_VALUE_SHIFT));
9704 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9705 get_target_argument_identifier_1 (device, false, id));
9706 t = fold_convert (ptr_type_node, t);
9707 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9710 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9711 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9712 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9713 arguments. */
9715 static void
9716 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9717 int id, tree value, vec <tree> *args)
9719 if (tree_fits_shwi_p (value)
9720 && tree_to_shwi (value) > -(1 << 15)
9721 && tree_to_shwi (value) < (1 << 15))
9722 args->quick_push (get_target_argument_value (gsi, device, id, value));
9723 else
9725 args->quick_push (get_target_argument_identifier (device, true, id));
9726 value = fold_convert (ptr_type_node, value);
9727 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9728 GSI_SAME_STMT);
9729 args->quick_push (value);
9733 /* Create an array of arguments that is then passed to GOMP_target. */
9735 static tree
9736 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9738 auto_vec <tree, 6> args;
9739 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9740 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9741 if (c)
9742 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9743 else
9744 t = integer_minus_one_node;
9745 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9746 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9748 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9749 if (c)
9750 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9751 else
9752 t = integer_minus_one_node;
9753 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9754 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9755 &args);
9757 /* Produce more, perhaps device specific, arguments here. */
9759 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9760 args.length () + 1),
9761 ".omp_target_args");
9762 for (unsigned i = 0; i < args.length (); i++)
9764 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9765 build_int_cst (integer_type_node, i),
9766 NULL_TREE, NULL_TREE);
9767 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9768 GSI_SAME_STMT);
9770 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9771 build_int_cst (integer_type_node, args.length ()),
9772 NULL_TREE, NULL_TREE);
9773 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9774 GSI_SAME_STMT);
9775 TREE_ADDRESSABLE (argarray) = 1;
9776 return build_fold_addr_expr (argarray);
9779 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9781 static void
9782 expand_omp_target (struct omp_region *region)
9784 basic_block entry_bb, exit_bb, new_bb;
9785 struct function *child_cfun;
9786 tree child_fn, child_fn2, block, t, c;
9787 gimple_stmt_iterator gsi;
9788 gomp_target *entry_stmt;
9789 gimple *stmt;
9790 edge e;
9791 bool offloaded;
9792 int target_kind;
9794 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9795 target_kind = gimple_omp_target_kind (entry_stmt);
9796 new_bb = region->entry;
9798 offloaded = is_gimple_omp_offloaded (entry_stmt);
9799 switch (target_kind)
9801 case GF_OMP_TARGET_KIND_REGION:
9802 case GF_OMP_TARGET_KIND_UPDATE:
9803 case GF_OMP_TARGET_KIND_ENTER_DATA:
9804 case GF_OMP_TARGET_KIND_EXIT_DATA:
9805 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9806 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9807 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9808 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9809 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9810 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9811 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9812 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9813 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9814 case GF_OMP_TARGET_KIND_DATA:
9815 case GF_OMP_TARGET_KIND_OACC_DATA:
9816 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9817 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9818 break;
9819 default:
9820 gcc_unreachable ();
9823 tree clauses = gimple_omp_target_clauses (entry_stmt);
9825 bool is_ancestor = false;
9826 child_fn = child_fn2 = NULL_TREE;
9827 child_cfun = NULL;
9828 if (offloaded)
9830 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9831 if (ENABLE_OFFLOADING && c)
9832 is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
9833 child_fn = gimple_omp_target_child_fn (entry_stmt);
9834 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9837 /* Supported by expand_omp_taskreg, but not here. */
9838 if (child_cfun != NULL)
9839 gcc_checking_assert (!child_cfun->cfg);
9840 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9842 entry_bb = region->entry;
9843 exit_bb = region->exit;
9845 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9846 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9848 /* Going on, all OpenACC compute constructs are mapped to
9849 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9850 To distinguish between them, we attach attributes. */
9851 switch (target_kind)
9853 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9854 DECL_ATTRIBUTES (child_fn)
9855 = tree_cons (get_identifier ("oacc parallel"),
9856 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9857 break;
9858 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9859 DECL_ATTRIBUTES (child_fn)
9860 = tree_cons (get_identifier ("oacc kernels"),
9861 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9862 break;
9863 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9864 DECL_ATTRIBUTES (child_fn)
9865 = tree_cons (get_identifier ("oacc serial"),
9866 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9867 break;
9868 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9869 DECL_ATTRIBUTES (child_fn)
9870 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9871 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9872 break;
9873 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9874 DECL_ATTRIBUTES (child_fn)
9875 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9876 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9877 break;
9878 default:
9879 /* Make sure we don't miss any. */
9880 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9881 && is_gimple_omp_offloaded (entry_stmt)));
9882 break;
9885 if (offloaded)
9887 unsigned srcidx, dstidx, num;
9889 /* If the offloading region needs data sent from the parent
9890 function, then the very first statement (except possible
9891 tree profile counter updates) of the offloading body
9892 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9893 &.OMP_DATA_O is passed as an argument to the child function,
9894 we need to replace it with the argument as seen by the child
9895 function.
9897 In most cases, this will end up being the identity assignment
9898 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9899 a function call that has been inlined, the original PARM_DECL
9900 .OMP_DATA_I may have been converted into a different local
9901 variable. In which case, we need to keep the assignment. */
9902 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9903 if (data_arg)
9905 basic_block entry_succ_bb = single_succ (entry_bb);
9906 gimple_stmt_iterator gsi;
9907 tree arg;
9908 gimple *tgtcopy_stmt = NULL;
9909 tree sender = TREE_VEC_ELT (data_arg, 0);
9911 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9913 gcc_assert (!gsi_end_p (gsi));
9914 stmt = gsi_stmt (gsi);
9915 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9916 continue;
9918 if (gimple_num_ops (stmt) == 2)
9920 tree arg = gimple_assign_rhs1 (stmt);
9922 /* We're ignoring the subcode because we're
9923 effectively doing a STRIP_NOPS. */
9925 if (TREE_CODE (arg) == ADDR_EXPR
9926 && TREE_OPERAND (arg, 0) == sender)
9928 tgtcopy_stmt = stmt;
9929 break;
9934 gcc_assert (tgtcopy_stmt != NULL);
9935 arg = DECL_ARGUMENTS (child_fn);
9937 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9938 gsi_remove (&gsi, true);
9941 /* Declare local variables needed in CHILD_CFUN. */
9942 block = DECL_INITIAL (child_fn);
9943 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9944 /* The gimplifier could record temporaries in the offloading block
9945 rather than in containing function's local_decls chain,
9946 which would mean cgraph missed finalizing them. Do it now. */
9947 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9948 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9949 varpool_node::finalize_decl (t);
9950 DECL_SAVED_TREE (child_fn) = NULL;
9951 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9952 gimple_set_body (child_fn, NULL);
9953 TREE_USED (block) = 1;
9955 /* Reset DECL_CONTEXT on function arguments. */
9956 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9957 DECL_CONTEXT (t) = child_fn;
9959 /* Split ENTRY_BB at GIMPLE_*,
9960 so that it can be moved to the child function. */
9961 gsi = gsi_last_nondebug_bb (entry_bb);
9962 stmt = gsi_stmt (gsi);
9963 gcc_assert (stmt
9964 && gimple_code (stmt) == gimple_code (entry_stmt));
9965 e = split_block (entry_bb, stmt);
9966 gsi_remove (&gsi, true);
9967 entry_bb = e->dest;
9968 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9970 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9971 if (exit_bb)
9973 gsi = gsi_last_nondebug_bb (exit_bb);
9974 gcc_assert (!gsi_end_p (gsi)
9975 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9976 stmt = gimple_build_return (NULL);
9977 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9978 gsi_remove (&gsi, true);
9981 /* Move the offloading region into CHILD_CFUN. */
9983 block = gimple_block (entry_stmt);
9985 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9986 if (exit_bb)
9987 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9988 /* When the OMP expansion process cannot guarantee an up-to-date
9989 loop tree arrange for the child function to fixup loops. */
9990 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9991 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9993 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9994 num = vec_safe_length (child_cfun->local_decls);
9995 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9997 t = (*child_cfun->local_decls)[srcidx];
9998 if (DECL_CONTEXT (t) == cfun->decl)
9999 continue;
10000 if (srcidx != dstidx)
10001 (*child_cfun->local_decls)[dstidx] = t;
10002 dstidx++;
10004 if (dstidx != num)
10005 vec_safe_truncate (child_cfun->local_decls, dstidx);
10007 /* Inform the callgraph about the new function. */
10008 child_cfun->curr_properties = cfun->curr_properties;
10009 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
10010 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
10011 cgraph_node *node = cgraph_node::get_create (child_fn);
10012 node->parallelized_function = 1;
10013 cgraph_node::add_new_function (child_fn, true);
10015 /* Add the new function to the offload table. */
10016 if (ENABLE_OFFLOADING)
10018 if (in_lto_p)
10019 DECL_PRESERVE_P (child_fn) = 1;
10020 if (!is_ancestor)
10021 vec_safe_push (offload_funcs, child_fn);
10024 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
10025 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
10027 /* Fix the callgraph edges for child_cfun. Those for cfun will be
10028 fixed in a following pass. */
10029 push_cfun (child_cfun);
10030 if (need_asm)
10031 assign_assembler_name_if_needed (child_fn);
10032 cgraph_edge::rebuild_edges ();
10034 /* Some EH regions might become dead, see PR34608. If
10035 pass_cleanup_cfg isn't the first pass to happen with the
10036 new child, these dead EH edges might cause problems.
10037 Clean them up now. */
10038 if (flag_exceptions)
10040 basic_block bb;
10041 bool changed = false;
10043 FOR_EACH_BB_FN (bb, cfun)
10044 changed |= gimple_purge_dead_eh_edges (bb);
10045 if (changed)
10046 cleanup_tree_cfg ();
10048 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10049 verify_loop_structure ();
10050 pop_cfun ();
10052 if (dump_file && !gimple_in_ssa_p (cfun))
10054 omp_any_child_fn_dumped = true;
10055 dump_function_header (dump_file, child_fn, dump_flags);
10056 dump_function_to_file (child_fn, dump_file, dump_flags);
10059 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
10061 /* Handle the case that an inner ancestor:1 target is called by an outer
10062 target region. */
10063 if (is_ancestor)
10065 cgraph_node *fn2_node;
10066 child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
10067 FUNCTION_DECL,
10068 clone_function_name (child_fn, "nohost"),
10069 TREE_TYPE (child_fn));
10070 if (in_lto_p)
10071 DECL_PRESERVE_P (child_fn2) = 1;
10072 TREE_STATIC (child_fn2) = 1;
10073 DECL_ARTIFICIAL (child_fn2) = 1;
10074 DECL_IGNORED_P (child_fn2) = 0;
10075 TREE_PUBLIC (child_fn2) = 0;
10076 DECL_UNINLINABLE (child_fn2) = 1;
10077 DECL_EXTERNAL (child_fn2) = 0;
10078 DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn);
10079 DECL_INITIAL (child_fn2) = make_node (BLOCK);
10080 BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
10081 DECL_ATTRIBUTES (child_fn)
10082 = remove_attribute ("omp target entrypoint",
10083 DECL_ATTRIBUTES (child_fn));
10084 DECL_ATTRIBUTES (child_fn2)
10085 = tree_cons (get_identifier ("omp target device_ancestor_nohost"),
10086 NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
10087 DECL_ATTRIBUTES (child_fn)
10088 = tree_cons (get_identifier ("omp target device_ancestor_host"),
10089 NULL_TREE, DECL_ATTRIBUTES (child_fn));
10090 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
10091 = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
10092 DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
10093 = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
10094 DECL_FUNCTION_VERSIONED (child_fn2)
10095 = DECL_FUNCTION_VERSIONED (current_function_decl);
10097 fn2_node = cgraph_node::get_create (child_fn2);
10098 fn2_node->offloadable = 1;
10099 fn2_node->force_output = 1;
10100 node->offloadable = 0;
10102 /* Enable pass_omp_device_lower pass. */
10103 fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn));
10104 fn2_node->calls_declare_variant_alt = 1;
10106 t = build_decl (DECL_SOURCE_LOCATION (child_fn),
10107 RESULT_DECL, NULL_TREE, void_type_node);
10108 DECL_ARTIFICIAL (t) = 1;
10109 DECL_IGNORED_P (t) = 1;
10110 DECL_CONTEXT (t) = child_fn2;
10111 DECL_RESULT (child_fn2) = t;
10112 DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
10113 void_type_node, NULL);
10114 tree tmp = DECL_ARGUMENTS (child_fn);
10115 t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
10116 DECL_NAME (tmp), TREE_TYPE (tmp));
10117 DECL_ARTIFICIAL (t) = 1;
10118 DECL_NAMELESS (t) = 1;
10119 DECL_ARG_TYPE (t) = ptr_type_node;
10120 DECL_CONTEXT (t) = current_function_decl;
10121 TREE_USED (t) = 1;
10122 TREE_READONLY (t) = 1;
10123 DECL_ARGUMENTS (child_fn2) = t;
10124 gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
10126 gimplify_function_tree (child_fn2);
10127 cgraph_node::add_new_function (child_fn2, true);
10129 vec_safe_push (offload_funcs, child_fn2);
10130 if (dump_file && !gimple_in_ssa_p (cfun))
10132 dump_function_header (dump_file, child_fn2, dump_flags);
10133 dump_function_to_file (child_fn2, dump_file, dump_flags);
10138 /* Emit a library call to launch the offloading region, or do data
10139 transfers. */
10140 tree t1, t2, t3, t4, depend;
10141 enum built_in_function start_ix;
10142 unsigned int flags_i = 0;
10144 switch (gimple_omp_target_kind (entry_stmt))
10146 case GF_OMP_TARGET_KIND_REGION:
10147 start_ix = BUILT_IN_GOMP_TARGET;
10148 break;
10149 case GF_OMP_TARGET_KIND_DATA:
10150 start_ix = BUILT_IN_GOMP_TARGET_DATA;
10151 break;
10152 case GF_OMP_TARGET_KIND_UPDATE:
10153 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
10154 break;
10155 case GF_OMP_TARGET_KIND_ENTER_DATA:
10156 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10157 break;
10158 case GF_OMP_TARGET_KIND_EXIT_DATA:
10159 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10160 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
10161 break;
10162 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10163 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10164 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10165 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10166 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10167 start_ix = BUILT_IN_GOACC_PARALLEL;
10168 break;
10169 case GF_OMP_TARGET_KIND_OACC_DATA:
10170 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10171 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10172 start_ix = BUILT_IN_GOACC_DATA_START;
10173 break;
10174 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10175 start_ix = BUILT_IN_GOACC_UPDATE;
10176 break;
10177 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10178 start_ix = BUILT_IN_GOACC_ENTER_DATA;
10179 break;
10180 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10181 start_ix = BUILT_IN_GOACC_EXIT_DATA;
10182 break;
10183 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10184 start_ix = BUILT_IN_GOACC_DECLARE;
10185 break;
10186 default:
10187 gcc_unreachable ();
10190 tree device = NULL_TREE;
10191 location_t device_loc = UNKNOWN_LOCATION;
10192 tree goacc_flags = NULL_TREE;
10193 bool need_device_adjustment = false;
10194 gimple_stmt_iterator adj_gsi;
10195 if (is_gimple_omp_oacc (entry_stmt))
10197 /* By default, no GOACC_FLAGs are set. */
10198 goacc_flags = integer_zero_node;
10200 else
10202 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10203 if (c)
10205 device = OMP_CLAUSE_DEVICE_ID (c);
10206 /* Ensure 'device' is of the correct type. */
10207 device = fold_convert_loc (device_loc, integer_type_node, device);
10208 if (TREE_CODE (device) == INTEGER_CST)
10210 if (wi::to_wide (device) == GOMP_DEVICE_ICV)
10211 device = build_int_cst (integer_type_node,
10212 GOMP_DEVICE_HOST_FALLBACK);
10213 else if (wi::to_wide (device) == GOMP_DEVICE_HOST_FALLBACK)
10214 device = build_int_cst (integer_type_node,
10215 GOMP_DEVICE_HOST_FALLBACK - 1);
10217 else
10218 need_device_adjustment = true;
10219 device_loc = OMP_CLAUSE_LOCATION (c);
10220 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10221 device = build_int_cst (integer_type_node,
10222 GOMP_DEVICE_HOST_FALLBACK);
10224 else
10226 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10227 library choose). */
10228 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10229 device_loc = gimple_location (entry_stmt);
10232 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10233 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10234 nowait doesn't appear. */
10235 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10236 c = NULL;
10237 if (c)
10238 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10241 /* By default, there is no conditional. */
10242 tree cond = NULL_TREE;
10243 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10244 if (c)
10245 cond = OMP_CLAUSE_IF_EXPR (c);
10246 /* If we found the clause 'if (cond)', build:
10247 OpenACC: goacc_flags = (cond ? goacc_flags
10248 : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10249 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10250 if (cond)
10252 tree *tp;
10253 if (is_gimple_omp_oacc (entry_stmt))
10254 tp = &goacc_flags;
10255 else
10256 tp = &device;
10258 cond = gimple_boolify (cond);
10260 basic_block cond_bb, then_bb, else_bb;
10261 edge e;
10262 tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10263 if (offloaded)
10264 e = split_block_after_labels (new_bb);
10265 else
10267 gsi = gsi_last_nondebug_bb (new_bb);
10268 gsi_prev (&gsi);
10269 e = split_block (new_bb, gsi_stmt (gsi));
10271 cond_bb = e->src;
10272 new_bb = e->dest;
10273 remove_edge (e);
10275 then_bb = create_empty_bb (cond_bb);
10276 else_bb = create_empty_bb (then_bb);
10277 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10278 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10280 stmt = gimple_build_cond_empty (cond);
10281 gsi = gsi_last_bb (cond_bb);
10282 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10284 gsi = gsi_start_bb (then_bb);
10285 stmt = gimple_build_assign (tmp_var, *tp);
10286 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10287 adj_gsi = gsi;
10289 gsi = gsi_start_bb (else_bb);
10290 if (is_gimple_omp_oacc (entry_stmt))
10291 stmt = gimple_build_assign (tmp_var,
10292 BIT_IOR_EXPR,
10293 *tp,
10294 build_int_cst (integer_type_node,
10295 GOACC_FLAG_HOST_FALLBACK));
10296 else
10297 stmt = gimple_build_assign (tmp_var,
10298 build_int_cst (integer_type_node,
10299 GOMP_DEVICE_HOST_FALLBACK));
10300 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10302 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10303 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10304 add_bb_to_loop (then_bb, cond_bb->loop_father);
10305 add_bb_to_loop (else_bb, cond_bb->loop_father);
10306 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10307 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10309 *tp = tmp_var;
10311 gsi = gsi_last_nondebug_bb (new_bb);
10313 else
10315 gsi = gsi_last_nondebug_bb (new_bb);
10317 if (device != NULL_TREE)
10318 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10319 true, GSI_SAME_STMT);
10320 if (need_device_adjustment)
10322 tree tmp_var = create_tmp_var (TREE_TYPE (device));
10323 stmt = gimple_build_assign (tmp_var, device);
10324 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10325 adj_gsi = gsi_for_stmt (stmt);
10326 device = tmp_var;
10330 if (need_device_adjustment)
10332 tree uns = fold_convert (unsigned_type_node, device);
10333 uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10334 false, GSI_CONTINUE_LINKING);
10335 edge e = split_block (gsi_bb (adj_gsi), gsi_stmt (adj_gsi));
10336 basic_block cond_bb = e->src;
10337 basic_block else_bb = e->dest;
10338 if (gsi_bb (adj_gsi) == new_bb)
10340 new_bb = else_bb;
10341 gsi = gsi_last_nondebug_bb (new_bb);
10344 basic_block then_bb = create_empty_bb (cond_bb);
10345 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10347 cond = build2 (GT_EXPR, boolean_type_node, uns,
10348 build_int_cst (unsigned_type_node,
10349 GOMP_DEVICE_HOST_FALLBACK - 1));
10350 stmt = gimple_build_cond_empty (cond);
10351 adj_gsi = gsi_last_bb (cond_bb);
10352 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10354 adj_gsi = gsi_start_bb (then_bb);
10355 tree add = build2 (PLUS_EXPR, integer_type_node, device,
10356 build_int_cst (integer_type_node, -1));
10357 stmt = gimple_build_assign (device, add);
10358 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10360 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10361 e->flags = EDGE_FALSE_VALUE;
10362 add_bb_to_loop (then_bb, cond_bb->loop_father);
10363 make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10366 t = gimple_omp_target_data_arg (entry_stmt);
10367 if (t == NULL)
10369 t1 = size_zero_node;
10370 t2 = build_zero_cst (ptr_type_node);
10371 t3 = t2;
10372 t4 = t2;
10374 else
10376 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10377 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10378 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10379 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10380 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10383 gimple *g;
10384 bool tagging = false;
10385 /* The maximum number used by any start_ix, without varargs. */
10386 auto_vec<tree, 11> args;
10387 if (is_gimple_omp_oacc (entry_stmt))
10389 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10390 TREE_TYPE (goacc_flags), goacc_flags);
10391 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10392 NULL_TREE, true,
10393 GSI_SAME_STMT);
10394 args.quick_push (goacc_flags_m);
10396 else
10397 args.quick_push (device);
10398 if (offloaded)
10399 args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
10400 args.quick_push (t1);
10401 args.quick_push (t2);
10402 args.quick_push (t3);
10403 args.quick_push (t4);
10404 switch (start_ix)
10406 case BUILT_IN_GOACC_DATA_START:
10407 case BUILT_IN_GOACC_DECLARE:
10408 case BUILT_IN_GOMP_TARGET_DATA:
10409 break;
10410 case BUILT_IN_GOMP_TARGET:
10411 case BUILT_IN_GOMP_TARGET_UPDATE:
10412 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10413 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10414 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10415 if (c)
10416 depend = OMP_CLAUSE_DECL (c);
10417 else
10418 depend = build_int_cst (ptr_type_node, 0);
10419 args.quick_push (depend);
10420 if (start_ix == BUILT_IN_GOMP_TARGET)
10421 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10422 break;
10423 case BUILT_IN_GOACC_PARALLEL:
10424 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10426 tree dims = NULL_TREE;
10427 unsigned int ix;
10429 /* For serial constructs we set all dimensions to 1. */
10430 for (ix = GOMP_DIM_MAX; ix--;)
10431 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10432 oacc_replace_fn_attrib (child_fn, dims);
10434 else
10435 oacc_set_fn_attrib (child_fn, clauses, &args);
10436 tagging = true;
10437 /* FALLTHRU */
10438 case BUILT_IN_GOACC_ENTER_DATA:
10439 case BUILT_IN_GOACC_EXIT_DATA:
10440 case BUILT_IN_GOACC_UPDATE:
10442 tree t_async = NULL_TREE;
10444 /* If present, use the value specified by the respective
10445 clause, making sure that is of the correct type. */
10446 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10447 if (c)
10448 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10449 integer_type_node,
10450 OMP_CLAUSE_ASYNC_EXPR (c));
10451 else if (!tagging)
10452 /* Default values for t_async. */
10453 t_async = fold_convert_loc (gimple_location (entry_stmt),
10454 integer_type_node,
10455 build_int_cst (integer_type_node,
10456 GOMP_ASYNC_SYNC));
10457 if (tagging && t_async)
10459 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10461 if (TREE_CODE (t_async) == INTEGER_CST)
10463 /* See if we can pack the async arg in to the tag's
10464 operand. */
10465 i_async = TREE_INT_CST_LOW (t_async);
10466 if (i_async < GOMP_LAUNCH_OP_MAX)
10467 t_async = NULL_TREE;
10468 else
10469 i_async = GOMP_LAUNCH_OP_MAX;
10471 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10472 i_async));
10474 if (t_async)
10475 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10476 NULL_TREE, true,
10477 GSI_SAME_STMT));
10479 /* Save the argument index, and ... */
10480 unsigned t_wait_idx = args.length ();
10481 unsigned num_waits = 0;
10482 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10483 if (!tagging || c)
10484 /* ... push a placeholder. */
10485 args.safe_push (integer_zero_node);
10487 for (; c; c = OMP_CLAUSE_CHAIN (c))
10488 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10490 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10491 integer_type_node,
10492 OMP_CLAUSE_WAIT_EXPR (c));
10493 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10494 GSI_SAME_STMT);
10495 args.safe_push (arg);
10496 num_waits++;
10499 if (!tagging || num_waits)
10501 tree len;
10503 /* Now that we know the number, update the placeholder. */
10504 if (tagging)
10505 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10506 else
10507 len = build_int_cst (integer_type_node, num_waits);
10508 len = fold_convert_loc (gimple_location (entry_stmt),
10509 unsigned_type_node, len);
10510 args[t_wait_idx] = len;
10513 break;
10514 default:
10515 gcc_unreachable ();
10517 if (tagging)
10518 /* Push terminal marker - zero. */
10519 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10521 if (child_fn2)
10523 g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
10524 build_fold_addr_expr (child_fn));
10525 gimple_set_location (g, gimple_location (entry_stmt));
10526 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10529 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10530 gimple_set_location (g, gimple_location (entry_stmt));
10531 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10532 if (!offloaded)
10534 g = gsi_stmt (gsi);
10535 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10536 gsi_remove (&gsi, true);
10540 /* Expand the parallel region tree rooted at REGION. Expansion
10541 proceeds in depth-first order. Innermost regions are expanded
10542 first. This way, parallel regions that require a new function to
10543 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10544 internal dependencies in their body. */
10546 static void
10547 expand_omp (struct omp_region *region)
10549 omp_any_child_fn_dumped = false;
10550 while (region)
10552 location_t saved_location;
10553 gimple *inner_stmt = NULL;
10555 /* First, determine whether this is a combined parallel+workshare
10556 region. */
10557 if (region->type == GIMPLE_OMP_PARALLEL)
10558 determine_parallel_type (region);
10560 if (region->type == GIMPLE_OMP_FOR
10561 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10562 inner_stmt = last_stmt (region->inner->entry);
10564 if (region->inner)
10565 expand_omp (region->inner);
10567 saved_location = input_location;
10568 if (gimple_has_location (last_stmt (region->entry)))
10569 input_location = gimple_location (last_stmt (region->entry));
10571 switch (region->type)
10573 case GIMPLE_OMP_PARALLEL:
10574 case GIMPLE_OMP_TASK:
10575 expand_omp_taskreg (region);
10576 break;
10578 case GIMPLE_OMP_FOR:
10579 expand_omp_for (region, inner_stmt);
10580 break;
10582 case GIMPLE_OMP_SECTIONS:
10583 expand_omp_sections (region);
10584 break;
10586 case GIMPLE_OMP_SECTION:
10587 /* Individual omp sections are handled together with their
10588 parent GIMPLE_OMP_SECTIONS region. */
10589 break;
10591 case GIMPLE_OMP_SINGLE:
10592 case GIMPLE_OMP_SCOPE:
10593 expand_omp_single (region);
10594 break;
10596 case GIMPLE_OMP_ORDERED:
10598 gomp_ordered *ord_stmt
10599 = as_a <gomp_ordered *> (last_stmt (region->entry));
10600 if (gimple_omp_ordered_standalone_p (ord_stmt))
10602 /* We'll expand these when expanding corresponding
10603 worksharing region with ordered(n) clause. */
10604 gcc_assert (region->outer
10605 && region->outer->type == GIMPLE_OMP_FOR);
10606 region->ord_stmt = ord_stmt;
10607 break;
10610 /* FALLTHRU */
10611 case GIMPLE_OMP_MASTER:
10612 case GIMPLE_OMP_MASKED:
10613 case GIMPLE_OMP_TASKGROUP:
10614 case GIMPLE_OMP_CRITICAL:
10615 case GIMPLE_OMP_TEAMS:
10616 expand_omp_synch (region);
10617 break;
10619 case GIMPLE_OMP_ATOMIC_LOAD:
10620 expand_omp_atomic (region);
10621 break;
10623 case GIMPLE_OMP_TARGET:
10624 expand_omp_target (region);
10625 break;
10627 default:
10628 gcc_unreachable ();
10631 input_location = saved_location;
10632 region = region->next;
10634 if (omp_any_child_fn_dumped)
10636 if (dump_file)
10637 dump_function_header (dump_file, current_function_decl, dump_flags);
10638 omp_any_child_fn_dumped = false;
10642 /* Helper for build_omp_regions. Scan the dominator tree starting at
10643 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10644 true, the function ends once a single tree is built (otherwise, whole
10645 forest of OMP constructs may be built). */
10647 static void
10648 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10649 bool single_tree)
10651 gimple_stmt_iterator gsi;
10652 gimple *stmt;
10653 basic_block son;
10655 gsi = gsi_last_nondebug_bb (bb);
10656 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10658 struct omp_region *region;
10659 enum gimple_code code;
10661 stmt = gsi_stmt (gsi);
10662 code = gimple_code (stmt);
10663 if (code == GIMPLE_OMP_RETURN)
10665 /* STMT is the return point out of region PARENT. Mark it
10666 as the exit point and make PARENT the immediately
10667 enclosing region. */
10668 gcc_assert (parent);
10669 region = parent;
10670 region->exit = bb;
10671 parent = parent->outer;
10673 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10675 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10676 GIMPLE_OMP_RETURN, but matches with
10677 GIMPLE_OMP_ATOMIC_LOAD. */
10678 gcc_assert (parent);
10679 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10680 region = parent;
10681 region->exit = bb;
10682 parent = parent->outer;
10684 else if (code == GIMPLE_OMP_CONTINUE)
10686 gcc_assert (parent);
10687 parent->cont = bb;
10689 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10691 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10692 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10694 else
10696 region = new_omp_region (bb, code, parent);
10697 /* Otherwise... */
10698 if (code == GIMPLE_OMP_TARGET)
10700 switch (gimple_omp_target_kind (stmt))
10702 case GF_OMP_TARGET_KIND_REGION:
10703 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10704 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10705 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10706 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10707 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10708 break;
10709 case GF_OMP_TARGET_KIND_UPDATE:
10710 case GF_OMP_TARGET_KIND_ENTER_DATA:
10711 case GF_OMP_TARGET_KIND_EXIT_DATA:
10712 case GF_OMP_TARGET_KIND_DATA:
10713 case GF_OMP_TARGET_KIND_OACC_DATA:
10714 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10715 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10716 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10717 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10718 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10719 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10720 /* ..., other than for those stand-alone directives...
10721 To be precise, target data isn't stand-alone, but
10722 gimplifier put the end API call into try finally block
10723 for it, so omp expansion can treat it as such. */
10724 region = NULL;
10725 break;
10726 default:
10727 gcc_unreachable ();
10730 else if (code == GIMPLE_OMP_ORDERED
10731 && gimple_omp_ordered_standalone_p (stmt))
10732 /* #pragma omp ordered depend is also just a stand-alone
10733 directive. */
10734 region = NULL;
10735 else if (code == GIMPLE_OMP_TASK
10736 && gimple_omp_task_taskwait_p (stmt))
10737 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10738 region = NULL;
10739 else if (code == GIMPLE_OMP_TASKGROUP)
10740 /* #pragma omp taskgroup isn't a stand-alone directive, but
10741 gimplifier put the end API call into try finall block
10742 for it, so omp expansion can treat it as such. */
10743 region = NULL;
10744 /* ..., this directive becomes the parent for a new region. */
10745 if (region)
10746 parent = region;
10750 if (single_tree && !parent)
10751 return;
10753 for (son = first_dom_son (CDI_DOMINATORS, bb);
10754 son;
10755 son = next_dom_son (CDI_DOMINATORS, son))
10756 build_omp_regions_1 (son, parent, single_tree);
10759 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10760 root_omp_region. */
10762 static void
10763 build_omp_regions_root (basic_block root)
10765 gcc_assert (root_omp_region == NULL);
10766 build_omp_regions_1 (root, NULL, true);
10767 gcc_assert (root_omp_region != NULL);
10770 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10772 void
10773 omp_expand_local (basic_block head)
10775 build_omp_regions_root (head);
10776 if (dump_file && (dump_flags & TDF_DETAILS))
10778 fprintf (dump_file, "\nOMP region tree\n\n");
10779 dump_omp_region (dump_file, root_omp_region, 0);
10780 fprintf (dump_file, "\n");
10783 remove_exit_barriers (root_omp_region);
10784 expand_omp (root_omp_region);
10786 omp_free_regions ();
10789 /* Scan the CFG and build a tree of OMP regions. Return the root of
10790 the OMP region tree. */
10792 static void
10793 build_omp_regions (void)
10795 gcc_assert (root_omp_region == NULL);
10796 calculate_dominance_info (CDI_DOMINATORS);
10797 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10800 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10802 static unsigned int
10803 execute_expand_omp (void)
10805 build_omp_regions ();
10807 if (!root_omp_region)
10808 return 0;
10810 if (dump_file)
10812 fprintf (dump_file, "\nOMP region tree\n\n");
10813 dump_omp_region (dump_file, root_omp_region, 0);
10814 fprintf (dump_file, "\n");
10817 remove_exit_barriers (root_omp_region);
10819 expand_omp (root_omp_region);
10821 omp_free_regions ();
10823 return (TODO_cleanup_cfg
10824 | (gimple_in_ssa_p (cfun) ? TODO_update_ssa_only_virtuals : 0));
10827 /* OMP expansion -- the default pass, run before creation of SSA form. */
10829 namespace {
10831 const pass_data pass_data_expand_omp =
10833 GIMPLE_PASS, /* type */
10834 "ompexp", /* name */
10835 OPTGROUP_OMP, /* optinfo_flags */
10836 TV_NONE, /* tv_id */
10837 PROP_gimple_any, /* properties_required */
10838 PROP_gimple_eomp, /* properties_provided */
10839 0, /* properties_destroyed */
10840 0, /* todo_flags_start */
10841 0, /* todo_flags_finish */
10844 class pass_expand_omp : public gimple_opt_pass
10846 public:
10847 pass_expand_omp (gcc::context *ctxt)
10848 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10851 /* opt_pass methods: */
10852 unsigned int execute (function *) final override
10854 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10855 || flag_openmp_simd != 0)
10856 && !seen_error ());
10858 /* This pass always runs, to provide PROP_gimple_eomp.
10859 But often, there is nothing to do. */
10860 if (!gate)
10861 return 0;
10863 return execute_expand_omp ();
10866 }; // class pass_expand_omp
10868 } // anon namespace
10870 gimple_opt_pass *
10871 make_pass_expand_omp (gcc::context *ctxt)
10873 return new pass_expand_omp (ctxt);
10876 namespace {
10878 const pass_data pass_data_expand_omp_ssa =
10880 GIMPLE_PASS, /* type */
10881 "ompexpssa", /* name */
10882 OPTGROUP_OMP, /* optinfo_flags */
10883 TV_NONE, /* tv_id */
10884 PROP_cfg | PROP_ssa, /* properties_required */
10885 PROP_gimple_eomp, /* properties_provided */
10886 0, /* properties_destroyed */
10887 0, /* todo_flags_start */
10888 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10891 class pass_expand_omp_ssa : public gimple_opt_pass
10893 public:
10894 pass_expand_omp_ssa (gcc::context *ctxt)
10895 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10898 /* opt_pass methods: */
10899 bool gate (function *fun) final override
10901 return !(fun->curr_properties & PROP_gimple_eomp);
10903 unsigned int execute (function *) final override
10905 return execute_expand_omp ();
10907 opt_pass * clone () final override
10909 return new pass_expand_omp_ssa (m_ctxt);
10912 }; // class pass_expand_omp_ssa
10914 } // anon namespace
10916 gimple_opt_pass *
10917 make_pass_expand_omp_ssa (gcc::context *ctxt)
10919 return new pass_expand_omp_ssa (ctxt);
10922 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10923 GIMPLE_* codes. */
10925 bool
10926 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10927 int *region_idx)
10929 gimple *last = last_stmt (bb);
10930 enum gimple_code code = gimple_code (last);
10931 struct omp_region *cur_region = *region;
10932 bool fallthru = false;
10934 switch (code)
10936 case GIMPLE_OMP_PARALLEL:
10937 case GIMPLE_OMP_FOR:
10938 case GIMPLE_OMP_SINGLE:
10939 case GIMPLE_OMP_TEAMS:
10940 case GIMPLE_OMP_MASTER:
10941 case GIMPLE_OMP_MASKED:
10942 case GIMPLE_OMP_SCOPE:
10943 case GIMPLE_OMP_CRITICAL:
10944 case GIMPLE_OMP_SECTION:
10945 cur_region = new_omp_region (bb, code, cur_region);
10946 fallthru = true;
10947 break;
10949 case GIMPLE_OMP_TASKGROUP:
10950 cur_region = new_omp_region (bb, code, cur_region);
10951 fallthru = true;
10952 cur_region = cur_region->outer;
10953 break;
10955 case GIMPLE_OMP_TASK:
10956 cur_region = new_omp_region (bb, code, cur_region);
10957 fallthru = true;
10958 if (gimple_omp_task_taskwait_p (last))
10959 cur_region = cur_region->outer;
10960 break;
10962 case GIMPLE_OMP_ORDERED:
10963 cur_region = new_omp_region (bb, code, cur_region);
10964 fallthru = true;
10965 if (gimple_omp_ordered_standalone_p (last))
10966 cur_region = cur_region->outer;
10967 break;
10969 case GIMPLE_OMP_TARGET:
10970 cur_region = new_omp_region (bb, code, cur_region);
10971 fallthru = true;
10972 switch (gimple_omp_target_kind (last))
10974 case GF_OMP_TARGET_KIND_REGION:
10975 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10976 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10977 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10978 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10979 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10980 break;
10981 case GF_OMP_TARGET_KIND_UPDATE:
10982 case GF_OMP_TARGET_KIND_ENTER_DATA:
10983 case GF_OMP_TARGET_KIND_EXIT_DATA:
10984 case GF_OMP_TARGET_KIND_DATA:
10985 case GF_OMP_TARGET_KIND_OACC_DATA:
10986 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10987 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10988 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10989 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10990 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10991 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10992 cur_region = cur_region->outer;
10993 break;
10994 default:
10995 gcc_unreachable ();
10997 break;
10999 case GIMPLE_OMP_SECTIONS:
11000 cur_region = new_omp_region (bb, code, cur_region);
11001 fallthru = true;
11002 break;
11004 case GIMPLE_OMP_SECTIONS_SWITCH:
11005 fallthru = false;
11006 break;
11008 case GIMPLE_OMP_ATOMIC_LOAD:
11009 case GIMPLE_OMP_ATOMIC_STORE:
11010 fallthru = true;
11011 break;
11013 case GIMPLE_OMP_RETURN:
11014 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
11015 somewhere other than the next block. This will be
11016 created later. */
11017 cur_region->exit = bb;
11018 if (cur_region->type == GIMPLE_OMP_TASK)
11019 /* Add an edge corresponding to not scheduling the task
11020 immediately. */
11021 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
11022 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
11023 cur_region = cur_region->outer;
11024 break;
11026 case GIMPLE_OMP_CONTINUE:
11027 cur_region->cont = bb;
11028 switch (cur_region->type)
11030 case GIMPLE_OMP_FOR:
11031 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
11032 succs edges as abnormal to prevent splitting
11033 them. */
11034 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
11035 /* Make the loopback edge. */
11036 make_edge (bb, single_succ (cur_region->entry),
11037 EDGE_ABNORMAL);
11039 /* Create an edge from GIMPLE_OMP_FOR to exit, which
11040 corresponds to the case that the body of the loop
11041 is not executed at all. */
11042 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
11043 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
11044 fallthru = false;
11045 break;
11047 case GIMPLE_OMP_SECTIONS:
11048 /* Wire up the edges into and out of the nested sections. */
11050 basic_block switch_bb = single_succ (cur_region->entry);
11052 struct omp_region *i;
11053 for (i = cur_region->inner; i ; i = i->next)
11055 gcc_assert (i->type == GIMPLE_OMP_SECTION);
11056 make_edge (switch_bb, i->entry, 0);
11057 make_edge (i->exit, bb, EDGE_FALLTHRU);
11060 /* Make the loopback edge to the block with
11061 GIMPLE_OMP_SECTIONS_SWITCH. */
11062 make_edge (bb, switch_bb, 0);
11064 /* Make the edge from the switch to exit. */
11065 make_edge (switch_bb, bb->next_bb, 0);
11066 fallthru = false;
11068 break;
11070 case GIMPLE_OMP_TASK:
11071 fallthru = true;
11072 break;
11074 default:
11075 gcc_unreachable ();
11077 break;
11079 default:
11080 gcc_unreachable ();
11083 if (*region != cur_region)
11085 *region = cur_region;
11086 if (cur_region)
11087 *region_idx = cur_region->entry->index;
11088 else
11089 *region_idx = 0;
11092 return fallthru;