testsuite: Correct vec-rlmi-rlnm.c testsuite expected result
[official-gcc.git] / gcc / omp-expand.c
blob6583c88e2765b533aa4c26f42894a1ec6f86b313
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
120 /* Return true if REGION is a combined parallel+workshare region. */
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
125 return region->is_combined_parallel;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
142 Is lowered into:
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
201 return true;
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 struct omp_for_data fd;
238 tree n1, n2;
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
244 if (gimple_omp_for_combined_into_p (for_stmt))
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
268 if (fd.chunk_size)
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
275 return ws_args;
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
289 gcc_unreachable ();
292 /* Discover whether REGION is a combined parallel+workshare region. */
294 static void
295 determine_parallel_type (struct omp_region *region)
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
335 if (region->inner->type == GIMPLE_OMP_FOR)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
386 if (region->cont)
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
405 dump_omp_region (stderr, region, 0);
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr, root_omp_region, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
420 struct omp_region *region = XCNEW (struct omp_region);
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
426 if (parent)
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
433 else
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
441 return region;
444 /* Release the memory associated with the region tree rooted at REGION. */
446 static void
447 free_omp_region_1 (struct omp_region *region)
449 struct omp_region *i, *n;
451 for (i = region->inner; i ; i = n)
453 n = i->next;
454 free_omp_region_1 (i);
457 free (region);
460 /* Release the memory for the entire omp region tree. */
462 void
463 omp_free_regions (void)
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
468 n = r->next;
469 free_omp_region_1 (r);
471 root_omp_region = NULL;
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
477 static gcond *
478 gimple_build_cond_empty (tree cond)
480 enum tree_code pred_code;
481 tree lhs, rhs;
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
570 switch (region->inner->type)
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
631 else
632 clause_loc = gimple_location (entry_stmt);
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
645 cond = gimple_boolify (cond);
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
664 else
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 if (gimple_in_ssa_p (cfun))
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 val = tmp_join;
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
735 if (rtmp)
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 unsigned int iflags
767 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
768 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
769 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
771 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
772 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
773 tree num_tasks = NULL_TREE;
774 bool ull = false;
775 if (taskloop_p)
777 gimple *g = last_stmt (region->outer->entry);
778 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
780 struct omp_for_data fd;
781 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
782 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
783 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
784 OMP_CLAUSE__LOOPTEMP_);
785 startvar = OMP_CLAUSE_DECL (startvar);
786 endvar = OMP_CLAUSE_DECL (endvar);
787 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
788 if (fd.loop.cond_code == LT_EXPR)
789 iflags |= GOMP_TASK_FLAG_UP;
790 tree tclauses = gimple_omp_for_clauses (g);
791 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
792 if (num_tasks)
793 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
794 else
796 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
797 if (num_tasks)
799 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
800 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
802 else
803 num_tasks = integer_zero_node;
805 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
806 if (ifc == NULL_TREE)
807 iflags |= GOMP_TASK_FLAG_IF;
808 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
809 iflags |= GOMP_TASK_FLAG_NOGROUP;
810 ull = fd.iter_type == long_long_unsigned_type_node;
811 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
812 iflags |= GOMP_TASK_FLAG_REDUCTION;
814 else if (priority)
815 iflags |= GOMP_TASK_FLAG_PRIORITY;
817 tree flags = build_int_cst (unsigned_type_node, iflags);
819 tree cond = boolean_true_node;
820 if (ifc)
822 if (taskloop_p)
824 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
825 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
826 build_int_cst (unsigned_type_node,
827 GOMP_TASK_FLAG_IF),
828 build_int_cst (unsigned_type_node, 0));
829 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
830 flags, t);
832 else
833 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
836 if (finalc)
838 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_FINAL),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
845 if (depend)
846 depend = OMP_CLAUSE_DECL (depend);
847 else
848 depend = build_int_cst (ptr_type_node, 0);
849 if (priority)
850 priority = fold_convert (integer_type_node,
851 OMP_CLAUSE_PRIORITY_EXPR (priority));
852 else
853 priority = integer_zero_node;
855 gsi = gsi_last_nondebug_bb (bb);
856 tree t = gimple_omp_task_data_arg (entry_stmt);
857 if (t == NULL)
858 t2 = null_pointer_node;
859 else
860 t2 = build_fold_addr_expr_loc (loc, t);
861 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
862 t = gimple_omp_task_copy_fn (entry_stmt);
863 if (t == NULL)
864 t3 = null_pointer_node;
865 else
866 t3 = build_fold_addr_expr_loc (loc, t);
868 if (taskloop_p)
869 t = build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
872 11, t1, t2, t3,
873 gimple_omp_task_arg_size (entry_stmt),
874 gimple_omp_task_arg_align (entry_stmt), flags,
875 num_tasks, priority, startvar, endvar, step);
876 else
877 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
878 9, t1, t2, t3,
879 gimple_omp_task_arg_size (entry_stmt),
880 gimple_omp_task_arg_align (entry_stmt), cond, flags,
881 depend, priority);
883 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
889 code. */
891 static void
892 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
894 tree clauses = gimple_omp_task_clauses (entry_stmt);
895 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
896 if (depend == NULL_TREE)
897 return;
899 depend = OMP_CLAUSE_DECL (depend);
901 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
902 tree t
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
904 1, depend);
906 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
907 false, GSI_CONTINUE_LINKING);
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
914 static void
915 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
917 tree clauses = gimple_omp_teams_clauses (entry_stmt);
918 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
919 if (num_teams == NULL_TREE)
920 num_teams = build_int_cst (unsigned_type_node, 0);
921 else
923 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
924 num_teams = fold_convert (unsigned_type_node, num_teams);
926 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
927 if (thread_limit == NULL_TREE)
928 thread_limit = build_int_cst (unsigned_type_node, 0);
929 else
931 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
932 thread_limit = fold_convert (unsigned_type_node, thread_limit);
935 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
936 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
937 if (t == NULL)
938 t1 = null_pointer_node;
939 else
940 t1 = build_fold_addr_expr (t);
941 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
942 tree t2 = build_fold_addr_expr (child_fndecl);
944 vec<tree, va_gc> *args;
945 vec_alloc (args, 5);
946 args->quick_push (t2);
947 args->quick_push (t1);
948 args->quick_push (num_teams);
949 args->quick_push (thread_limit);
950 /* For future extensibility. */
951 args->quick_push (build_zero_cst (unsigned_type_node));
953 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
955 args);
957 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
958 false, GSI_CONTINUE_LINKING);
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
963 static tree
964 vec2chain (vec<tree, va_gc> *v)
966 tree chain = NULL_TREE, t;
967 unsigned ix;
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
971 DECL_CHAIN (t) = chain;
972 chain = t;
975 return chain;
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
982 removed. */
984 static void
985 remove_exit_barrier (struct omp_region *region)
987 gimple_stmt_iterator gsi;
988 basic_block exit_bb;
989 edge_iterator ei;
990 edge e;
991 gimple *stmt;
992 int any_addressable_vars = -1;
994 exit_bb = region->exit;
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
997 block at all. */
998 if (! exit_bb)
999 return;
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi = gsi_last_nondebug_bb (exit_bb);
1007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1008 gsi_prev_nondebug (&gsi);
1009 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1010 return;
1012 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1014 gsi = gsi_last_nondebug_bb (e->src);
1015 if (gsi_end_p (gsi))
1016 continue;
1017 stmt = gsi_stmt (gsi);
1018 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt))
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars < 0)
1031 gomp_parallel *parallel_stmt
1032 = as_a <gomp_parallel *> (last_stmt (region->entry));
1033 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1034 tree local_decls, block, decl;
1035 unsigned ix;
1037 any_addressable_vars = 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1039 if (TREE_ADDRESSABLE (decl))
1041 any_addressable_vars = 1;
1042 break;
1044 for (block = gimple_block (stmt);
1045 !any_addressable_vars
1046 && block
1047 && TREE_CODE (block) == BLOCK;
1048 block = BLOCK_SUPERCONTEXT (block))
1050 for (local_decls = BLOCK_VARS (block);
1051 local_decls;
1052 local_decls = DECL_CHAIN (local_decls))
1053 if (TREE_ADDRESSABLE (local_decls))
1055 any_addressable_vars = 1;
1056 break;
1058 if (block == gimple_block (parallel_stmt))
1059 break;
1062 if (!any_addressable_vars)
1063 gimple_omp_return_set_nowait (stmt);
1068 static void
1069 remove_exit_barriers (struct omp_region *region)
1071 if (region->type == GIMPLE_OMP_PARALLEL)
1072 remove_exit_barrier (region);
1074 if (region->inner)
1076 region = region->inner;
1077 remove_exit_barriers (region);
1078 while (region->next)
1080 region = region->next;
1081 remove_exit_barriers (region);
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1094 static void
1095 optimize_omp_library_calls (gimple *entry_stmt)
1097 basic_block bb;
1098 gimple_stmt_iterator gsi;
1099 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1100 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1101 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1102 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1103 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1105 OMP_CLAUSE_UNTIED) != NULL);
1107 FOR_EACH_BB_FN (bb, cfun)
1108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1110 gimple *call = gsi_stmt (gsi);
1111 tree decl;
1113 if (is_gimple_call (call)
1114 && (decl = gimple_call_fndecl (call))
1115 && DECL_EXTERNAL (decl)
1116 && TREE_PUBLIC (decl)
1117 && DECL_INITIAL (decl) == NULL)
1119 tree built_in;
1121 if (DECL_NAME (decl) == thr_num_id)
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1125 if (untied_task)
1126 continue;
1127 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1129 else if (DECL_NAME (decl) == num_thr_id)
1130 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 else
1132 continue;
1134 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1135 || gimple_call_num_args (call) != 0)
1136 continue;
1138 if (flag_exceptions && !TREE_NOTHROW (decl))
1139 continue;
1141 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1143 TREE_TYPE (TREE_TYPE (built_in))))
1144 continue;
1146 gimple_call_set_fndecl (call, built_in);
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1152 regimplified. */
1154 static tree
1155 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1157 tree t = *tp;
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1161 return t;
1163 if (TREE_CODE (t) == ADDR_EXPR)
1164 recompute_tree_invariant_for_addr_expr (t);
1166 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1167 return NULL_TREE;
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1172 static void
1173 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1174 bool after)
1176 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1177 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1178 !after, after ? GSI_CONTINUE_LINKING
1179 : GSI_SAME_STMT);
1180 gimple *stmt = gimple_build_assign (to, from);
1181 if (after)
1182 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1183 else
1184 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1185 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1186 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1189 gimple_regimplify_operands (stmt, &gsi);
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1195 static void
1196 expand_omp_taskreg (struct omp_region *region)
1198 basic_block entry_bb, exit_bb, new_bb;
1199 struct function *child_cfun;
1200 tree child_fn, block, t;
1201 gimple_stmt_iterator gsi;
1202 gimple *entry_stmt, *stmt;
1203 edge e;
1204 vec<tree, va_gc> *ws_args;
1206 entry_stmt = last_stmt (region->entry);
1207 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt))
1210 new_bb = region->entry;
1211 gsi = gsi_last_nondebug_bb (region->entry);
1212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1213 gsi_remove (&gsi, true);
1214 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1215 return;
1218 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1219 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1221 entry_bb = region->entry;
1222 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1223 exit_bb = region->cont;
1224 else
1225 exit_bb = region->exit;
1227 if (is_combined_parallel (region))
1228 ws_args = region->ws_args;
1229 else
1230 ws_args = NULL;
1232 if (child_cfun->cfg)
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e, exit_succ_e;
1239 entry_succ_e = single_succ_edge (entry_bb);
1241 gsi = gsi_last_nondebug_bb (entry_bb);
1242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1245 gsi_remove (&gsi, true);
1247 new_bb = entry_bb;
1248 if (exit_bb)
1250 exit_succ_e = single_succ_edge (exit_bb);
1251 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1253 remove_edge_and_dominated_blocks (entry_succ_e);
1255 else
1257 unsigned srcidx, dstidx, num;
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1265 function.
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt))
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1276 : FALLTHRU_EDGE (entry_bb)->dest;
1277 tree arg;
1278 gimple *parcopy_stmt = NULL;
1280 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1282 gimple *stmt;
1284 gcc_assert (!gsi_end_p (gsi));
1285 stmt = gsi_stmt (gsi);
1286 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1287 continue;
1289 if (gimple_num_ops (stmt) == 2)
1291 tree arg = gimple_assign_rhs1 (stmt);
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1296 if (TREE_CODE (arg) == ADDR_EXPR
1297 && (TREE_OPERAND (arg, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt)))
1300 parcopy_stmt = stmt;
1301 break;
1306 gcc_assert (parcopy_stmt != NULL);
1307 arg = DECL_ARGUMENTS (child_fn);
1309 if (!gimple_in_ssa_p (cfun))
1311 if (gimple_assign_lhs (parcopy_stmt) == arg)
1312 gsi_remove (&gsi, true);
1313 else
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1317 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1320 else
1322 tree lhs = gimple_assign_lhs (parcopy_stmt);
1323 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 update_stmt (parcopy_stmt);
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block = DECL_INITIAL (child_fn);
1337 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1342 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1343 varpool_node::finalize_decl (t);
1344 DECL_SAVED_TREE (child_fn) = NULL;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn, NULL);
1347 TREE_USED (block) = 1;
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1351 DECL_CONTEXT (t) = child_fn;
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi = gsi_last_nondebug_bb (entry_bb);
1356 stmt = gsi_stmt (gsi);
1357 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1360 e = split_block (entry_bb, stmt);
1361 gsi_remove (&gsi, true);
1362 entry_bb = e->dest;
1363 edge e2 = NULL;
1364 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1365 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1366 else
1368 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1369 gcc_assert (e2->dest == region->exit);
1370 remove_edge (BRANCH_EDGE (entry_bb));
1371 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1372 gsi = gsi_last_nondebug_bb (region->exit);
1373 gcc_assert (!gsi_end_p (gsi)
1374 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1375 gsi_remove (&gsi, true);
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1379 if (exit_bb)
1381 gsi = gsi_last_nondebug_bb (exit_bb);
1382 gcc_assert (!gsi_end_p (gsi)
1383 && (gimple_code (gsi_stmt (gsi))
1384 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1385 stmt = gimple_build_return (NULL);
1386 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1387 gsi_remove (&gsi, true);
1390 /* Move the parallel region into CHILD_CFUN. */
1392 if (gimple_in_ssa_p (cfun))
1394 init_tree_ssa (child_cfun);
1395 init_ssa_operands (child_cfun);
1396 child_cfun->gimple_df->in_ssa_p = true;
1397 block = NULL_TREE;
1399 else
1400 block = gimple_block (entry_stmt);
1402 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1403 if (exit_bb)
1404 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1405 if (e2)
1407 basic_block dest_bb = e2->dest;
1408 if (!exit_bb)
1409 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1410 remove_edge (e2);
1411 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1416 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num = vec_safe_length (child_cfun->local_decls);
1420 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1422 t = (*child_cfun->local_decls)[srcidx];
1423 if (DECL_CONTEXT (t) == cfun->decl)
1424 continue;
1425 if (srcidx != dstidx)
1426 (*child_cfun->local_decls)[dstidx] = t;
1427 dstidx++;
1429 if (dstidx != num)
1430 vec_safe_truncate (child_cfun->local_decls, dstidx);
1432 /* Inform the callgraph about the new function. */
1433 child_cfun->curr_properties = cfun->curr_properties;
1434 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1435 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1436 cgraph_node *node = cgraph_node::get_create (child_fn);
1437 node->parallelized_function = 1;
1438 cgraph_node::add_new_function (child_fn, true);
1440 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun);
1446 if (need_asm)
1447 assign_assembler_name_if_needed (child_fn);
1449 if (optimize)
1450 optimize_omp_library_calls (entry_stmt);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions)
1460 basic_block bb;
1461 bool changed = false;
1463 FOR_EACH_BB_FN (bb, cfun)
1464 changed |= gimple_purge_dead_eh_edges (bb);
1465 if (changed)
1466 cleanup_tree_cfg ();
1468 if (gimple_in_ssa_p (cfun))
1469 update_ssa (TODO_update_ssa);
1470 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1471 verify_loop_structure ();
1472 pop_cfun ();
1474 if (dump_file && !gimple_in_ssa_p (cfun))
1476 omp_any_child_fn_dumped = true;
1477 dump_function_header (dump_file, child_fn, dump_flags);
1478 dump_function_to_file (child_fn, dump_file, dump_flags);
1482 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1484 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1485 expand_parallel_call (region, new_bb,
1486 as_a <gomp_parallel *> (entry_stmt), ws_args);
1487 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1488 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1489 else
1490 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1491 if (gimple_in_ssa_p (cfun))
1492 update_ssa (TODO_update_ssa_only_virtuals);
1495 /* Information about members of an OpenACC collapsed loop nest. */
1497 struct oacc_collapse
1499 tree base; /* Base value. */
1500 tree iters; /* Number of steps. */
1501 tree step; /* Step size. */
1502 tree tile; /* Tile increment (if tiled). */
1503 tree outer; /* Tile iterator var. */
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1510 static tree
1511 expand_oacc_collapse_init (const struct omp_for_data *fd,
1512 gimple_stmt_iterator *gsi,
1513 oacc_collapse *counts, tree bound_type,
1514 location_t loc)
1516 tree tiling = fd->tiling;
1517 tree total = build_int_cst (bound_type, 1);
1518 int ix;
1520 gcc_assert (integer_onep (fd->loop.step));
1521 gcc_assert (integer_zerop (fd->loop.n1));
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix = fd->collapse; ix--;)
1528 const omp_for_data_loop *loop = &fd->loops[ix];
1530 tree iter_type = TREE_TYPE (loop->v);
1531 tree diff_type = iter_type;
1532 tree plus_type = iter_type;
1534 gcc_assert (loop->cond_code == fd->loop.cond_code);
1536 if (POINTER_TYPE_P (iter_type))
1537 plus_type = sizetype;
1538 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1539 diff_type = signed_type_for (diff_type);
1540 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1541 diff_type = integer_type_node;
1543 if (tiling)
1545 tree num = build_int_cst (integer_type_node, fd->collapse);
1546 tree loop_no = build_int_cst (integer_type_node, ix);
1547 tree tile = TREE_VALUE (tiling);
1548 gcall *call
1549 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1550 /* gwv-outer=*/integer_zero_node,
1551 /* gwv-inner=*/integer_zero_node);
1553 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1554 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1555 gimple_call_set_lhs (call, counts[ix].tile);
1556 gimple_set_location (call, loc);
1557 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1559 tiling = TREE_CHAIN (tiling);
1561 else
1563 counts[ix].tile = NULL;
1564 counts[ix].outer = loop->v;
1567 tree b = loop->n1;
1568 tree e = loop->n2;
1569 tree s = loop->step;
1570 bool up = loop->cond_code == LT_EXPR;
1571 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1572 bool negating;
1573 tree expr;
1575 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1576 true, GSI_SAME_STMT);
1577 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1578 true, GSI_SAME_STMT);
1580 /* Convert the step, avoiding possible unsigned->signed overflow. */
1581 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1582 if (negating)
1583 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1584 s = fold_convert (diff_type, s);
1585 if (negating)
1586 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1587 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1590 /* Determine the range, avoiding possible unsigned->signed overflow. */
1591 negating = !up && TYPE_UNSIGNED (iter_type);
1592 expr = fold_build2 (MINUS_EXPR, plus_type,
1593 fold_convert (plus_type, negating ? b : e),
1594 fold_convert (plus_type, negating ? e : b));
1595 expr = fold_convert (diff_type, expr);
1596 if (negating)
1597 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1598 tree range = force_gimple_operand_gsi
1599 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1601 /* Determine number of iterations. */
1602 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1603 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1604 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1606 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1609 counts[ix].base = b;
1610 counts[ix].iters = iters;
1611 counts[ix].step = s;
1613 total = fold_build2 (MULT_EXPR, bound_type, total,
1614 fold_convert (bound_type, iters));
1617 return total;
1620 /* Emit initializers for collapsed loop members. INNER is true if
1621 this is for the element loop of a TILE. IVAR is the outer
1622 loop iteration variable, from which collapsed loop iteration values
1623 are calculated. COUNTS array has been initialized by
1624 expand_oacc_collapse_inits. */
1626 static void
1627 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1628 gimple_stmt_iterator *gsi,
1629 const oacc_collapse *counts, tree ivar)
1631 tree ivar_type = TREE_TYPE (ivar);
1633 /* The most rapidly changing iteration variable is the innermost
1634 one. */
1635 for (int ix = fd->collapse; ix--;)
1637 const omp_for_data_loop *loop = &fd->loops[ix];
1638 const oacc_collapse *collapse = &counts[ix];
1639 tree v = inner ? loop->v : collapse->outer;
1640 tree iter_type = TREE_TYPE (v);
1641 tree diff_type = TREE_TYPE (collapse->step);
1642 tree plus_type = iter_type;
1643 enum tree_code plus_code = PLUS_EXPR;
1644 tree expr;
1646 if (POINTER_TYPE_P (iter_type))
1648 plus_code = POINTER_PLUS_EXPR;
1649 plus_type = sizetype;
1652 expr = ivar;
1653 if (ix)
1655 tree mod = fold_convert (ivar_type, collapse->iters);
1656 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1657 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1658 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1659 true, GSI_SAME_STMT);
1662 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1663 collapse->step);
1664 expr = fold_build2 (plus_code, iter_type,
1665 inner ? collapse->outer : collapse->base,
1666 fold_convert (plus_type, expr));
1667 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1668 true, GSI_SAME_STMT);
1669 gassign *ass = gimple_build_assign (v, expr);
1670 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1674 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1675 of the combined collapse > 1 loop constructs, generate code like:
1676 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1677 if (cond3 is <)
1678 adj = STEP3 - 1;
1679 else
1680 adj = STEP3 + 1;
1681 count3 = (adj + N32 - N31) / STEP3;
1682 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1683 if (cond2 is <)
1684 adj = STEP2 - 1;
1685 else
1686 adj = STEP2 + 1;
1687 count2 = (adj + N22 - N21) / STEP2;
1688 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1689 if (cond1 is <)
1690 adj = STEP1 - 1;
1691 else
1692 adj = STEP1 + 1;
1693 count1 = (adj + N12 - N11) / STEP1;
1694 count = count1 * count2 * count3;
1695 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1696 count = 0;
1697 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1698 of the combined loop constructs, just initialize COUNTS array
1699 from the _looptemp_ clauses. For loop nests with non-rectangular
1700 loops, do this only for the rectangular loops. Then pick
1701 the loops which reference outer vars in their bound expressions
1702 and the loops which they refer to and for this sub-nest compute
1703 number of iterations. For triangular loops use Faulhaber's formula,
1704 otherwise as a fallback, compute by iterating the loops.
1705 If e.g. the sub-nest is
1706 for (I = N11; I COND1 N12; I += STEP1)
1707 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1708 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1710 COUNT = 0;
1711 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1712 for (tmpj = M21 * tmpi + N21;
1713 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1715 int tmpk1 = M31 * tmpj + N31;
1716 int tmpk2 = M32 * tmpj + N32;
1717 if (tmpk1 COND3 tmpk2)
1719 if (COND3 is <)
1720 adj = STEP3 - 1;
1721 else
1722 adj = STEP3 + 1;
1723 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1726 and finally multiply the counts of the rectangular loops not
1727 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1728 store number of iterations of the loops from fd->first_nonrect
1729 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1730 by the counts of rectangular loops not referenced in any non-rectangular
1731 loops sandwitched in between those. */
1733 /* NOTE: It *could* be better to moosh all of the BBs together,
1734 creating one larger BB with all the computation and the unexpected
1735 jump at the end. I.e.
1737 bool zero3, zero2, zero1, zero;
1739 zero3 = N32 c3 N31;
1740 count3 = (N32 - N31) /[cl] STEP3;
1741 zero2 = N22 c2 N21;
1742 count2 = (N22 - N21) /[cl] STEP2;
1743 zero1 = N12 c1 N11;
1744 count1 = (N12 - N11) /[cl] STEP1;
1745 zero = zero3 || zero2 || zero1;
1746 count = count1 * count2 * count3;
1747 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749 After all, we expect the zero=false, and thus we expect to have to
1750 evaluate all of the comparison expressions, so short-circuiting
1751 oughtn't be a win. Since the condition isn't protecting a
1752 denominator, we're not concerned about divide-by-zero, so we can
1753 fully evaluate count even if a numerator turned out to be wrong.
1755 It seems like putting this all together would create much better
1756 scheduling opportunities, and less pressure on the chip's branch
1757 predictor. */
1759 static void
1760 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1761 basic_block &entry_bb, tree *counts,
1762 basic_block &zero_iter1_bb, int &first_zero_iter1,
1763 basic_block &zero_iter2_bb, int &first_zero_iter2,
1764 basic_block &l2_dom_bb)
1766 tree t, type = TREE_TYPE (fd->loop.v);
1767 edge e, ne;
1768 int i;
1770 /* Collapsed loops need work for expansion into SSA form. */
1771 gcc_assert (!gimple_in_ssa_p (cfun));
1773 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1774 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1776 gcc_assert (fd->ordered == 0);
1777 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1778 isn't supposed to be handled, as the inner loop doesn't
1779 use it. */
1780 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1781 OMP_CLAUSE__LOOPTEMP_);
1782 gcc_assert (innerc);
1783 for (i = 0; i < fd->collapse; i++)
1785 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 if (i)
1789 counts[i] = OMP_CLAUSE_DECL (innerc);
1790 else
1791 counts[0] = NULL_TREE;
1793 if (fd->non_rect
1794 && fd->last_nonrect == fd->first_nonrect + 1
1795 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1797 tree c[4];
1798 for (i = 0; i < 4; i++)
1800 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1801 OMP_CLAUSE__LOOPTEMP_);
1802 gcc_assert (innerc);
1803 c[i] = OMP_CLAUSE_DECL (innerc);
1805 counts[0] = c[0];
1806 fd->first_inner_iterations = c[1];
1807 fd->factor = c[2];
1808 fd->adjn1 = c[3];
1810 return;
1813 for (i = fd->collapse; i < fd->ordered; i++)
1815 tree itype = TREE_TYPE (fd->loops[i].v);
1816 counts[i] = NULL_TREE;
1817 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1818 fold_convert (itype, fd->loops[i].n1),
1819 fold_convert (itype, fd->loops[i].n2));
1820 if (t && integer_zerop (t))
1822 for (i = fd->collapse; i < fd->ordered; i++)
1823 counts[i] = build_int_cst (type, 0);
1824 break;
1827 bool rect_count_seen = false;
1828 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1830 tree itype = TREE_TYPE (fd->loops[i].v);
1832 if (i >= fd->collapse && counts[i])
1833 continue;
1834 if (fd->non_rect)
1836 /* Skip loops that use outer iterators in their expressions
1837 during this phase. */
1838 if (fd->loops[i].m1 || fd->loops[i].m2)
1840 counts[i] = build_zero_cst (type);
1841 continue;
1844 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1845 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1846 fold_convert (itype, fd->loops[i].n1),
1847 fold_convert (itype, fd->loops[i].n2)))
1848 == NULL_TREE || !integer_onep (t)))
1850 gcond *cond_stmt;
1851 tree n1, n2;
1852 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1853 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1854 true, GSI_SAME_STMT);
1855 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1856 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1857 true, GSI_SAME_STMT);
1858 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1859 NULL_TREE, NULL_TREE);
1860 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1861 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1862 expand_omp_regimplify_p, NULL, NULL)
1863 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1864 expand_omp_regimplify_p, NULL, NULL))
1866 *gsi = gsi_for_stmt (cond_stmt);
1867 gimple_regimplify_operands (cond_stmt, gsi);
1869 e = split_block (entry_bb, cond_stmt);
1870 basic_block &zero_iter_bb
1871 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1872 int &first_zero_iter
1873 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1874 if (zero_iter_bb == NULL)
1876 gassign *assign_stmt;
1877 first_zero_iter = i;
1878 zero_iter_bb = create_empty_bb (entry_bb);
1879 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1880 *gsi = gsi_after_labels (zero_iter_bb);
1881 if (i < fd->collapse)
1882 assign_stmt = gimple_build_assign (fd->loop.n2,
1883 build_zero_cst (type));
1884 else
1886 counts[i] = create_tmp_reg (type, ".count");
1887 assign_stmt
1888 = gimple_build_assign (counts[i], build_zero_cst (type));
1890 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1891 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1892 entry_bb);
1894 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1895 ne->probability = profile_probability::very_unlikely ();
1896 e->flags = EDGE_TRUE_VALUE;
1897 e->probability = ne->probability.invert ();
1898 if (l2_dom_bb == NULL)
1899 l2_dom_bb = entry_bb;
1900 entry_bb = e->dest;
1901 *gsi = gsi_last_nondebug_bb (entry_bb);
1904 if (POINTER_TYPE_P (itype))
1905 itype = signed_type_for (itype);
1906 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1907 ? -1 : 1));
1908 t = fold_build2 (PLUS_EXPR, itype,
1909 fold_convert (itype, fd->loops[i].step), t);
1910 t = fold_build2 (PLUS_EXPR, itype, t,
1911 fold_convert (itype, fd->loops[i].n2));
1912 t = fold_build2 (MINUS_EXPR, itype, t,
1913 fold_convert (itype, fd->loops[i].n1));
1914 /* ?? We could probably use CEIL_DIV_EXPR instead of
1915 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1916 generate the same code in the end because generically we
1917 don't know that the values involved must be negative for
1918 GT?? */
1919 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1920 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1921 fold_build1 (NEGATE_EXPR, itype, t),
1922 fold_build1 (NEGATE_EXPR, itype,
1923 fold_convert (itype,
1924 fd->loops[i].step)));
1925 else
1926 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1927 fold_convert (itype, fd->loops[i].step));
1928 t = fold_convert (type, t);
1929 if (TREE_CODE (t) == INTEGER_CST)
1930 counts[i] = t;
1931 else
1933 if (i < fd->collapse || i != first_zero_iter2)
1934 counts[i] = create_tmp_reg (type, ".count");
1935 expand_omp_build_assign (gsi, counts[i], t);
1937 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1939 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1940 continue;
1941 if (!rect_count_seen)
1943 t = counts[i];
1944 rect_count_seen = true;
1946 else
1947 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1948 expand_omp_build_assign (gsi, fd->loop.n2, t);
1951 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1953 gcc_assert (fd->last_nonrect != -1);
1955 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1956 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1957 build_zero_cst (type));
1958 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1959 if (fd->loops[i].m1
1960 || fd->loops[i].m2
1961 || fd->loops[i].non_rect_referenced)
1962 break;
1963 if (i == fd->last_nonrect
1964 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1965 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1967 int o = fd->first_nonrect;
1968 tree itype = TREE_TYPE (fd->loops[o].v);
1969 tree n1o = create_tmp_reg (itype, ".n1o");
1970 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1971 expand_omp_build_assign (gsi, n1o, t);
1972 tree n2o = create_tmp_reg (itype, ".n2o");
1973 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1974 expand_omp_build_assign (gsi, n2o, t);
1975 if (fd->loops[i].m1 && fd->loops[i].m2)
1976 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1977 unshare_expr (fd->loops[i].m1));
1978 else if (fd->loops[i].m1)
1979 t = fold_unary (NEGATE_EXPR, itype,
1980 unshare_expr (fd->loops[i].m1));
1981 else
1982 t = unshare_expr (fd->loops[i].m2);
1983 tree m2minusm1
1984 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1985 true, GSI_SAME_STMT);
1987 gimple_stmt_iterator gsi2 = *gsi;
1988 gsi_prev (&gsi2);
1989 e = split_block (entry_bb, gsi_stmt (gsi2));
1990 e = split_block (e->dest, (gimple *) NULL);
1991 basic_block bb1 = e->src;
1992 entry_bb = e->dest;
1993 *gsi = gsi_after_labels (entry_bb);
1995 gsi2 = gsi_after_labels (bb1);
1996 tree ostep = fold_convert (itype, fd->loops[o].step);
1997 t = build_int_cst (itype, (fd->loops[o].cond_code
1998 == LT_EXPR ? -1 : 1));
1999 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2000 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2001 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2002 if (TYPE_UNSIGNED (itype)
2003 && fd->loops[o].cond_code == GT_EXPR)
2004 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2005 fold_build1 (NEGATE_EXPR, itype, t),
2006 fold_build1 (NEGATE_EXPR, itype, ostep));
2007 else
2008 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2009 tree outer_niters
2010 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2011 true, GSI_SAME_STMT);
2012 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2013 build_one_cst (itype));
2014 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2015 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2016 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 tree n1, n2, n1e, n2e;
2019 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2020 if (fd->loops[i].m1)
2022 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2023 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2024 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2026 else
2027 n1 = t;
2028 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2029 true, GSI_SAME_STMT);
2030 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2031 if (fd->loops[i].m2)
2033 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2034 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2035 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2037 else
2038 n2 = t;
2039 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2040 true, GSI_SAME_STMT);
2041 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2042 if (fd->loops[i].m1)
2044 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2045 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2046 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2048 else
2049 n1e = t;
2050 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2051 true, GSI_SAME_STMT);
2052 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2053 if (fd->loops[i].m2)
2055 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2056 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2057 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2059 else
2060 n2e = t;
2061 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2062 true, GSI_SAME_STMT);
2063 gcond *cond_stmt
2064 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2065 NULL_TREE, NULL_TREE);
2066 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2067 e = split_block (bb1, cond_stmt);
2068 e->flags = EDGE_TRUE_VALUE;
2069 e->probability = profile_probability::likely ().guessed ();
2070 basic_block bb2 = e->dest;
2071 gsi2 = gsi_after_labels (bb2);
2073 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2074 NULL_TREE, NULL_TREE);
2075 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2076 e = split_block (bb2, cond_stmt);
2077 e->flags = EDGE_TRUE_VALUE;
2078 e->probability = profile_probability::likely ().guessed ();
2079 gsi2 = gsi_after_labels (e->dest);
2081 tree step = fold_convert (itype, fd->loops[i].step);
2082 t = build_int_cst (itype, (fd->loops[i].cond_code
2083 == LT_EXPR ? -1 : 1));
2084 t = fold_build2 (PLUS_EXPR, itype, step, t);
2085 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2086 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2087 if (TYPE_UNSIGNED (itype)
2088 && fd->loops[i].cond_code == GT_EXPR)
2089 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2090 fold_build1 (NEGATE_EXPR, itype, t),
2091 fold_build1 (NEGATE_EXPR, itype, step));
2092 else
2093 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2094 tree first_inner_iterations
2095 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2096 true, GSI_SAME_STMT);
2097 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2098 if (TYPE_UNSIGNED (itype)
2099 && fd->loops[i].cond_code == GT_EXPR)
2100 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2101 fold_build1 (NEGATE_EXPR, itype, t),
2102 fold_build1 (NEGATE_EXPR, itype, step));
2103 else
2104 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2105 tree factor
2106 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2107 true, GSI_SAME_STMT);
2108 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2109 build_one_cst (itype));
2110 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2111 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2112 t = fold_build2 (MULT_EXPR, itype, factor, t);
2113 t = fold_build2 (PLUS_EXPR, itype,
2114 fold_build2 (MULT_EXPR, itype, outer_niters,
2115 first_inner_iterations), t);
2116 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2117 fold_convert (type, t));
2119 basic_block bb3 = create_empty_bb (bb1);
2120 add_bb_to_loop (bb3, bb1->loop_father);
2122 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2123 e->probability = profile_probability::unlikely ().guessed ();
2125 gsi2 = gsi_after_labels (bb3);
2126 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2127 NULL_TREE, NULL_TREE);
2128 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2129 e = split_block (bb3, cond_stmt);
2130 e->flags = EDGE_TRUE_VALUE;
2131 e->probability = profile_probability::likely ().guessed ();
2132 basic_block bb4 = e->dest;
2134 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2135 ne->probability = e->probability.invert ();
2137 basic_block bb5 = create_empty_bb (bb2);
2138 add_bb_to_loop (bb5, bb2->loop_father);
2140 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2141 ne->probability = profile_probability::unlikely ().guessed ();
2143 for (int j = 0; j < 2; j++)
2145 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2146 t = fold_build2 (MINUS_EXPR, itype,
2147 unshare_expr (fd->loops[i].n1),
2148 unshare_expr (fd->loops[i].n2));
2149 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2150 tree tem
2151 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2152 true, GSI_SAME_STMT);
2153 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2154 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2155 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2156 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2157 true, GSI_SAME_STMT);
2158 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2159 if (fd->loops[i].m1)
2161 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2162 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2163 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2165 else
2166 n1 = t;
2167 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2168 true, GSI_SAME_STMT);
2169 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2170 if (fd->loops[i].m2)
2172 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2173 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2174 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2176 else
2177 n2 = t;
2178 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2179 true, GSI_SAME_STMT);
2180 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2182 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2183 NULL_TREE, NULL_TREE);
2184 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2185 e = split_block (gsi_bb (gsi2), cond_stmt);
2186 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2187 e->probability = profile_probability::unlikely ().guessed ();
2188 ne = make_edge (e->src, bb1,
2189 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2190 ne->probability = e->probability.invert ();
2191 gsi2 = gsi_after_labels (e->dest);
2193 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2194 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2196 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2199 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2200 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2201 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2203 if (fd->first_nonrect + 1 == fd->last_nonrect)
2205 fd->first_inner_iterations = first_inner_iterations;
2206 fd->factor = factor;
2207 fd->adjn1 = n1o;
2210 else
2212 /* Fallback implementation. Evaluate the loops with m1/m2
2213 non-NULL as well as their outer loops at runtime using temporaries
2214 instead of the original iteration variables, and in the
2215 body just bump the counter. */
2216 gimple_stmt_iterator gsi2 = *gsi;
2217 gsi_prev (&gsi2);
2218 e = split_block (entry_bb, gsi_stmt (gsi2));
2219 e = split_block (e->dest, (gimple *) NULL);
2220 basic_block cur_bb = e->src;
2221 basic_block next_bb = e->dest;
2222 entry_bb = e->dest;
2223 *gsi = gsi_after_labels (entry_bb);
2225 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2226 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2228 for (i = 0; i <= fd->last_nonrect; i++)
2230 if (fd->loops[i].m1 == NULL_TREE
2231 && fd->loops[i].m2 == NULL_TREE
2232 && !fd->loops[i].non_rect_referenced)
2233 continue;
2235 tree itype = TREE_TYPE (fd->loops[i].v);
2237 gsi2 = gsi_after_labels (cur_bb);
2238 tree n1, n2;
2239 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2240 if (fd->loops[i].m1)
2242 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2243 n1 = fold_build2 (MULT_EXPR, itype,
2244 vs[i - fd->loops[i].outer], n1);
2245 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2247 else
2248 n1 = t;
2249 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2250 true, GSI_SAME_STMT);
2251 if (i < fd->last_nonrect)
2253 vs[i] = create_tmp_reg (itype, ".it");
2254 expand_omp_build_assign (&gsi2, vs[i], n1);
2256 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2257 if (fd->loops[i].m2)
2259 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2260 n2 = fold_build2 (MULT_EXPR, itype,
2261 vs[i - fd->loops[i].outer], n2);
2262 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2264 else
2265 n2 = t;
2266 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2267 true, GSI_SAME_STMT);
2268 if (i == fd->last_nonrect)
2270 gcond *cond_stmt
2271 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2272 NULL_TREE, NULL_TREE);
2273 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2274 e = split_block (cur_bb, cond_stmt);
2275 e->flags = EDGE_TRUE_VALUE;
2276 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2277 e->probability = profile_probability::likely ().guessed ();
2278 ne->probability = e->probability.invert ();
2279 gsi2 = gsi_after_labels (e->dest);
2281 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2282 ? -1 : 1));
2283 t = fold_build2 (PLUS_EXPR, itype,
2284 fold_convert (itype, fd->loops[i].step), t);
2285 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2286 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2287 tree step = fold_convert (itype, fd->loops[i].step);
2288 if (TYPE_UNSIGNED (itype)
2289 && fd->loops[i].cond_code == GT_EXPR)
2290 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2291 fold_build1 (NEGATE_EXPR, itype, t),
2292 fold_build1 (NEGATE_EXPR, itype, step));
2293 else
2294 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2295 t = fold_convert (type, t);
2296 t = fold_build2 (PLUS_EXPR, type,
2297 counts[fd->last_nonrect], t);
2298 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2299 true, GSI_SAME_STMT);
2300 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2301 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2302 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2303 break;
2305 e = split_block (cur_bb, last_stmt (cur_bb));
2307 basic_block new_cur_bb = create_empty_bb (cur_bb);
2308 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2310 gsi2 = gsi_after_labels (e->dest);
2311 tree step = fold_convert (itype,
2312 unshare_expr (fd->loops[i].step));
2313 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2314 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2315 true, GSI_SAME_STMT);
2316 expand_omp_build_assign (&gsi2, vs[i], t);
2318 ne = split_block (e->dest, last_stmt (e->dest));
2319 gsi2 = gsi_after_labels (ne->dest);
2321 gcond *cond_stmt
2322 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2323 NULL_TREE, NULL_TREE);
2324 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2325 edge e3, e4;
2326 if (next_bb == entry_bb)
2328 e3 = find_edge (ne->dest, next_bb);
2329 e3->flags = EDGE_FALSE_VALUE;
2331 else
2332 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2333 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2334 e4->probability = profile_probability::likely ().guessed ();
2335 e3->probability = e4->probability.invert ();
2336 basic_block esrc = e->src;
2337 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2338 cur_bb = new_cur_bb;
2339 basic_block latch_bb = next_bb;
2340 next_bb = e->dest;
2341 remove_edge (e);
2342 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2343 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2344 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2347 t = NULL_TREE;
2348 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2349 if (!fd->loops[i].non_rect_referenced
2350 && fd->loops[i].m1 == NULL_TREE
2351 && fd->loops[i].m2 == NULL_TREE)
2353 if (t == NULL_TREE)
2354 t = counts[i];
2355 else
2356 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2358 if (t)
2360 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2361 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2363 if (!rect_count_seen)
2364 t = counts[fd->last_nonrect];
2365 else
2366 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2367 counts[fd->last_nonrect]);
2368 expand_omp_build_assign (gsi, fd->loop.n2, t);
2370 else if (fd->non_rect)
2372 tree t = fd->loop.n2;
2373 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2374 int non_rect_referenced = 0, non_rect = 0;
2375 for (i = 0; i < fd->collapse; i++)
2377 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2378 && !integer_zerop (counts[i]))
2379 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2380 if (fd->loops[i].non_rect_referenced)
2381 non_rect_referenced++;
2382 if (fd->loops[i].m1 || fd->loops[i].m2)
2383 non_rect++;
2385 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2386 counts[fd->last_nonrect] = t;
2390 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2391 T = V;
2392 V3 = N31 + (T % count3) * STEP3;
2393 T = T / count3;
2394 V2 = N21 + (T % count2) * STEP2;
2395 T = T / count2;
2396 V1 = N11 + T * STEP1;
2397 if this loop doesn't have an inner loop construct combined with it.
2398 If it does have an inner loop construct combined with it and the
2399 iteration count isn't known constant, store values from counts array
2400 into its _looptemp_ temporaries instead.
2401 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2402 inclusive), use the count of all those loops together, and either
2403 find quadratic etc. equation roots, or as a fallback, do:
2404 COUNT = 0;
2405 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2406 for (tmpj = M21 * tmpi + N21;
2407 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2409 int tmpk1 = M31 * tmpj + N31;
2410 int tmpk2 = M32 * tmpj + N32;
2411 if (tmpk1 COND3 tmpk2)
2413 if (COND3 is <)
2414 adj = STEP3 - 1;
2415 else
2416 adj = STEP3 + 1;
2417 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2418 if (COUNT + temp > T)
2420 V1 = tmpi;
2421 V2 = tmpj;
2422 V3 = tmpk1 + (T - COUNT) * STEP3;
2423 goto done;
2425 else
2426 COUNT += temp;
2429 done:;
2430 but for optional innermost or outermost rectangular loops that aren't
2431 referenced by other loop expressions keep doing the division/modulo. */
2433 static void
2434 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2435 tree *counts, tree *nonrect_bounds,
2436 gimple *inner_stmt, tree startvar)
2438 int i;
2439 if (gimple_omp_for_combined_p (fd->for_stmt))
2441 /* If fd->loop.n2 is constant, then no propagation of the counts
2442 is needed, they are constant. */
2443 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2444 return;
2446 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2447 ? gimple_omp_taskreg_clauses (inner_stmt)
2448 : gimple_omp_for_clauses (inner_stmt);
2449 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2450 isn't supposed to be handled, as the inner loop doesn't
2451 use it. */
2452 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2453 gcc_assert (innerc);
2454 int count = 0;
2455 if (fd->non_rect
2456 && fd->last_nonrect == fd->first_nonrect + 1
2457 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2458 count = 4;
2459 for (i = 0; i < fd->collapse + count; i++)
2461 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2462 OMP_CLAUSE__LOOPTEMP_);
2463 gcc_assert (innerc);
2464 if (i)
2466 tree tem = OMP_CLAUSE_DECL (innerc);
2467 tree t;
2468 if (i < fd->collapse)
2469 t = counts[i];
2470 else
2471 switch (i - fd->collapse)
2473 case 0: t = counts[0]; break;
2474 case 1: t = fd->first_inner_iterations; break;
2475 case 2: t = fd->factor; break;
2476 case 3: t = fd->adjn1; break;
2477 default: gcc_unreachable ();
2479 t = fold_convert (TREE_TYPE (tem), t);
2480 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2481 false, GSI_CONTINUE_LINKING);
2482 gassign *stmt = gimple_build_assign (tem, t);
2483 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2486 return;
2489 tree type = TREE_TYPE (fd->loop.v);
2490 tree tem = create_tmp_reg (type, ".tem");
2491 gassign *stmt = gimple_build_assign (tem, startvar);
2492 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2494 for (i = fd->collapse - 1; i >= 0; i--)
2496 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2497 itype = vtype;
2498 if (POINTER_TYPE_P (vtype))
2499 itype = signed_type_for (vtype);
2500 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2501 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2502 else
2503 t = tem;
2504 if (i == fd->last_nonrect)
2506 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2507 false, GSI_CONTINUE_LINKING);
2508 tree stopval = t;
2509 tree idx = create_tmp_reg (type, ".count");
2510 expand_omp_build_assign (gsi, idx,
2511 build_zero_cst (type), true);
2512 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2513 if (fd->first_nonrect + 1 == fd->last_nonrect
2514 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2515 || fd->first_inner_iterations)
2516 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2517 != CODE_FOR_nothing))
2519 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2520 tree itype = TREE_TYPE (fd->loops[i].v);
2521 tree first_inner_iterations = fd->first_inner_iterations;
2522 tree factor = fd->factor;
2523 gcond *cond_stmt
2524 = gimple_build_cond (NE_EXPR, factor,
2525 build_zero_cst (TREE_TYPE (factor)),
2526 NULL_TREE, NULL_TREE);
2527 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2528 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2529 basic_block bb0 = e->src;
2530 e->flags = EDGE_TRUE_VALUE;
2531 e->probability = profile_probability::likely ();
2532 bb_triang_dom = bb0;
2533 *gsi = gsi_after_labels (e->dest);
2534 tree slltype = long_long_integer_type_node;
2535 tree ulltype = long_long_unsigned_type_node;
2536 tree stopvalull = fold_convert (ulltype, stopval);
2537 stopvalull
2538 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2539 false, GSI_CONTINUE_LINKING);
2540 first_inner_iterations
2541 = fold_convert (slltype, first_inner_iterations);
2542 first_inner_iterations
2543 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2544 NULL_TREE, false,
2545 GSI_CONTINUE_LINKING);
2546 factor = fold_convert (slltype, factor);
2547 factor
2548 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2549 false, GSI_CONTINUE_LINKING);
2550 tree first_inner_iterationsd
2551 = fold_build1 (FLOAT_EXPR, double_type_node,
2552 first_inner_iterations);
2553 first_inner_iterationsd
2554 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2555 NULL_TREE, false,
2556 GSI_CONTINUE_LINKING);
2557 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2558 factor);
2559 factord = force_gimple_operand_gsi (gsi, factord, true,
2560 NULL_TREE, false,
2561 GSI_CONTINUE_LINKING);
2562 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2563 stopvalull);
2564 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2565 NULL_TREE, false,
2566 GSI_CONTINUE_LINKING);
2567 /* Temporarily disable flag_rounding_math, values will be
2568 decimal numbers divided by 2 and worst case imprecisions
2569 due to too large values ought to be caught later by the
2570 checks for fallback. */
2571 int save_flag_rounding_math = flag_rounding_math;
2572 flag_rounding_math = 0;
2573 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2574 build_real (double_type_node, dconst2));
2575 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2576 first_inner_iterationsd, t);
2577 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2578 GSI_CONTINUE_LINKING);
2579 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2580 build_real (double_type_node, dconst2));
2581 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2582 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2583 fold_build2 (MULT_EXPR, double_type_node,
2584 t3, t3));
2585 flag_rounding_math = save_flag_rounding_math;
2586 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2587 GSI_CONTINUE_LINKING);
2588 if (flag_exceptions
2589 && cfun->can_throw_non_call_exceptions
2590 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2592 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2593 build_zero_cst (double_type_node));
2594 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2595 false, GSI_CONTINUE_LINKING);
2596 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2597 boolean_false_node,
2598 NULL_TREE, NULL_TREE);
2600 else
2601 cond_stmt
2602 = gimple_build_cond (LT_EXPR, t,
2603 build_zero_cst (double_type_node),
2604 NULL_TREE, NULL_TREE);
2605 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2606 e = split_block (gsi_bb (*gsi), cond_stmt);
2607 basic_block bb1 = e->src;
2608 e->flags = EDGE_FALSE_VALUE;
2609 e->probability = profile_probability::very_likely ();
2610 *gsi = gsi_after_labels (e->dest);
2611 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2612 tree sqrtr = create_tmp_var (double_type_node);
2613 gimple_call_set_lhs (call, sqrtr);
2614 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2615 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2616 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2617 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2618 tree c = create_tmp_var (ulltype);
2619 tree d = create_tmp_var (ulltype);
2620 expand_omp_build_assign (gsi, c, t, true);
2621 t = fold_build2 (MINUS_EXPR, ulltype, c,
2622 build_one_cst (ulltype));
2623 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2624 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2625 t = fold_build2 (MULT_EXPR, ulltype,
2626 fold_convert (ulltype, fd->factor), t);
2627 tree t2
2628 = fold_build2 (MULT_EXPR, ulltype, c,
2629 fold_convert (ulltype,
2630 fd->first_inner_iterations));
2631 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2632 expand_omp_build_assign (gsi, d, t, true);
2633 t = fold_build2 (MULT_EXPR, ulltype,
2634 fold_convert (ulltype, fd->factor), c);
2635 t = fold_build2 (PLUS_EXPR, ulltype,
2636 t, fold_convert (ulltype,
2637 fd->first_inner_iterations));
2638 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2639 GSI_CONTINUE_LINKING);
2640 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2641 NULL_TREE, NULL_TREE);
2642 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2643 e = split_block (gsi_bb (*gsi), cond_stmt);
2644 basic_block bb2 = e->src;
2645 e->flags = EDGE_TRUE_VALUE;
2646 e->probability = profile_probability::very_likely ();
2647 *gsi = gsi_after_labels (e->dest);
2648 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2649 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2650 GSI_CONTINUE_LINKING);
2651 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2652 NULL_TREE, NULL_TREE);
2653 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2654 e = split_block (gsi_bb (*gsi), cond_stmt);
2655 basic_block bb3 = e->src;
2656 e->flags = EDGE_FALSE_VALUE;
2657 e->probability = profile_probability::very_likely ();
2658 *gsi = gsi_after_labels (e->dest);
2659 t = fold_convert (itype, c);
2660 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2661 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2662 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2663 GSI_CONTINUE_LINKING);
2664 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2665 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2666 t2 = fold_convert (itype, t2);
2667 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2668 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2669 if (fd->loops[i].m1)
2671 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2672 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2674 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2675 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2676 bb_triang = e->src;
2677 *gsi = gsi_after_labels (e->dest);
2678 remove_edge (e);
2679 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2680 e->probability = profile_probability::very_unlikely ();
2681 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2682 e->probability = profile_probability::very_unlikely ();
2683 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2684 e->probability = profile_probability::very_unlikely ();
2686 basic_block bb4 = create_empty_bb (bb0);
2687 add_bb_to_loop (bb4, bb0->loop_father);
2688 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2689 e->probability = profile_probability::unlikely ();
2690 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2691 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2692 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2693 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2694 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2695 counts[i], counts[i - 1]);
2696 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2697 GSI_CONTINUE_LINKING);
2698 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2699 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2700 t = fold_convert (itype, t);
2701 t2 = fold_convert (itype, t2);
2702 t = fold_build2 (MULT_EXPR, itype, t,
2703 fold_convert (itype, fd->loops[i].step));
2704 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2705 t2 = fold_build2 (MULT_EXPR, itype, t2,
2706 fold_convert (itype, fd->loops[i - 1].step));
2707 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2708 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2709 false, GSI_CONTINUE_LINKING);
2710 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2711 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2712 if (fd->loops[i].m1)
2714 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2715 fd->loops[i - 1].v);
2716 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2718 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2719 false, GSI_CONTINUE_LINKING);
2720 stmt = gimple_build_assign (fd->loops[i].v, t);
2721 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2723 /* Fallback implementation. Evaluate the loops in between
2724 (inclusive) fd->first_nonrect and fd->last_nonrect at
2725 runtime unsing temporaries instead of the original iteration
2726 variables, in the body just bump the counter and compare
2727 with the desired value. */
2728 gimple_stmt_iterator gsi2 = *gsi;
2729 basic_block entry_bb = gsi_bb (gsi2);
2730 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2731 e = split_block (e->dest, (gimple *) NULL);
2732 basic_block dom_bb = NULL;
2733 basic_block cur_bb = e->src;
2734 basic_block next_bb = e->dest;
2735 entry_bb = e->dest;
2736 *gsi = gsi_after_labels (entry_bb);
2738 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2739 tree n1 = NULL_TREE, n2 = NULL_TREE;
2740 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2742 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2744 tree itype = TREE_TYPE (fd->loops[j].v);
2745 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2746 && fd->loops[j].m2 == NULL_TREE
2747 && !fd->loops[j].non_rect_referenced);
2748 gsi2 = gsi_after_labels (cur_bb);
2749 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2750 if (fd->loops[j].m1)
2752 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2753 n1 = fold_build2 (MULT_EXPR, itype,
2754 vs[j - fd->loops[j].outer], n1);
2755 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2757 else if (rect_p)
2758 n1 = build_zero_cst (type);
2759 else
2760 n1 = t;
2761 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2762 true, GSI_SAME_STMT);
2763 if (j < fd->last_nonrect)
2765 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2766 expand_omp_build_assign (&gsi2, vs[j], n1);
2768 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2769 if (fd->loops[j].m2)
2771 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2772 n2 = fold_build2 (MULT_EXPR, itype,
2773 vs[j - fd->loops[j].outer], n2);
2774 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2776 else if (rect_p)
2777 n2 = counts[j];
2778 else
2779 n2 = t;
2780 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2781 true, GSI_SAME_STMT);
2782 if (j == fd->last_nonrect)
2784 gcond *cond_stmt
2785 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2786 NULL_TREE, NULL_TREE);
2787 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2788 e = split_block (cur_bb, cond_stmt);
2789 e->flags = EDGE_TRUE_VALUE;
2790 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2791 e->probability = profile_probability::likely ().guessed ();
2792 ne->probability = e->probability.invert ();
2793 gsi2 = gsi_after_labels (e->dest);
2795 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2796 ? -1 : 1));
2797 t = fold_build2 (PLUS_EXPR, itype,
2798 fold_convert (itype, fd->loops[j].step), t);
2799 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2800 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2801 tree step = fold_convert (itype, fd->loops[j].step);
2802 if (TYPE_UNSIGNED (itype)
2803 && fd->loops[j].cond_code == GT_EXPR)
2804 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2805 fold_build1 (NEGATE_EXPR, itype, t),
2806 fold_build1 (NEGATE_EXPR, itype, step));
2807 else
2808 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2809 t = fold_convert (type, t);
2810 t = fold_build2 (PLUS_EXPR, type, idx, t);
2811 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2812 true, GSI_SAME_STMT);
2813 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2814 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2815 cond_stmt
2816 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2817 NULL_TREE);
2818 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2819 e = split_block (gsi_bb (gsi2), cond_stmt);
2820 e->flags = EDGE_TRUE_VALUE;
2821 e->probability = profile_probability::likely ().guessed ();
2822 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2823 ne->probability = e->probability.invert ();
2824 gsi2 = gsi_after_labels (e->dest);
2825 expand_omp_build_assign (&gsi2, idx, t);
2826 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2827 break;
2829 e = split_block (cur_bb, last_stmt (cur_bb));
2831 basic_block new_cur_bb = create_empty_bb (cur_bb);
2832 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2834 gsi2 = gsi_after_labels (e->dest);
2835 if (rect_p)
2836 t = fold_build2 (PLUS_EXPR, type, vs[j],
2837 build_one_cst (type));
2838 else
2840 tree step
2841 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2842 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2844 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2845 true, GSI_SAME_STMT);
2846 expand_omp_build_assign (&gsi2, vs[j], t);
2848 edge ne = split_block (e->dest, last_stmt (e->dest));
2849 gsi2 = gsi_after_labels (ne->dest);
2851 gcond *cond_stmt;
2852 if (next_bb == entry_bb)
2853 /* No need to actually check the outermost condition. */
2854 cond_stmt
2855 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2856 boolean_true_node,
2857 NULL_TREE, NULL_TREE);
2858 else
2859 cond_stmt
2860 = gimple_build_cond (rect_p ? LT_EXPR
2861 : fd->loops[j].cond_code,
2862 vs[j], n2, NULL_TREE, NULL_TREE);
2863 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2864 edge e3, e4;
2865 if (next_bb == entry_bb)
2867 e3 = find_edge (ne->dest, next_bb);
2868 e3->flags = EDGE_FALSE_VALUE;
2869 dom_bb = ne->dest;
2871 else
2872 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2873 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2874 e4->probability = profile_probability::likely ().guessed ();
2875 e3->probability = e4->probability.invert ();
2876 basic_block esrc = e->src;
2877 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2878 cur_bb = new_cur_bb;
2879 basic_block latch_bb = next_bb;
2880 next_bb = e->dest;
2881 remove_edge (e);
2882 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2883 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2884 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2886 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2888 tree itype = TREE_TYPE (fd->loops[j].v);
2889 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2890 && fd->loops[j].m2 == NULL_TREE
2891 && !fd->loops[j].non_rect_referenced);
2892 if (j == fd->last_nonrect)
2894 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2895 t = fold_convert (itype, t);
2896 tree t2
2897 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2898 t = fold_build2 (MULT_EXPR, itype, t, t2);
2899 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2901 else if (rect_p)
2903 t = fold_convert (itype, vs[j]);
2904 t = fold_build2 (MULT_EXPR, itype, t,
2905 fold_convert (itype, fd->loops[j].step));
2906 if (POINTER_TYPE_P (vtype))
2907 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2908 else
2909 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2911 else
2912 t = vs[j];
2913 t = force_gimple_operand_gsi (gsi, t, false,
2914 NULL_TREE, true,
2915 GSI_SAME_STMT);
2916 stmt = gimple_build_assign (fd->loops[j].v, t);
2917 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2919 if (gsi_end_p (*gsi))
2920 *gsi = gsi_last_bb (gsi_bb (*gsi));
2921 else
2922 gsi_prev (gsi);
2923 if (bb_triang)
2925 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2926 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2927 *gsi = gsi_after_labels (e->dest);
2928 if (!gsi_end_p (*gsi))
2929 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2930 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2933 else
2935 t = fold_convert (itype, t);
2936 t = fold_build2 (MULT_EXPR, itype, t,
2937 fold_convert (itype, fd->loops[i].step));
2938 if (POINTER_TYPE_P (vtype))
2939 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2940 else
2941 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2942 t = force_gimple_operand_gsi (gsi, t,
2943 DECL_P (fd->loops[i].v)
2944 && TREE_ADDRESSABLE (fd->loops[i].v),
2945 NULL_TREE, false,
2946 GSI_CONTINUE_LINKING);
2947 stmt = gimple_build_assign (fd->loops[i].v, t);
2948 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2950 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2952 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2953 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2954 false, GSI_CONTINUE_LINKING);
2955 stmt = gimple_build_assign (tem, t);
2956 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2958 if (i == fd->last_nonrect)
2959 i = fd->first_nonrect;
2961 if (fd->non_rect)
2962 for (i = 0; i <= fd->last_nonrect; i++)
2963 if (fd->loops[i].m2)
2965 tree itype = TREE_TYPE (fd->loops[i].v);
2967 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2968 t = fold_build2 (MULT_EXPR, itype,
2969 fd->loops[i - fd->loops[i].outer].v, t);
2970 t = fold_build2 (PLUS_EXPR, itype, t,
2971 fold_convert (itype,
2972 unshare_expr (fd->loops[i].n2)));
2973 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2974 t = force_gimple_operand_gsi (gsi, t, false,
2975 NULL_TREE, false,
2976 GSI_CONTINUE_LINKING);
2977 stmt = gimple_build_assign (nonrect_bounds[i], t);
2978 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2982 /* Helper function for expand_omp_for_*. Generate code like:
2983 L10:
2984 V3 += STEP3;
2985 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2986 L11:
2987 V3 = N31;
2988 V2 += STEP2;
2989 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2990 L12:
2991 V2 = N21;
2992 V1 += STEP1;
2993 goto BODY_BB;
2994 For non-rectangular loops, use temporaries stored in nonrect_bounds
2995 for the upper bounds if M?2 multiplier is present. Given e.g.
2996 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2997 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2998 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2999 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3001 L10:
3002 V4 += STEP4;
3003 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3004 L11:
3005 V4 = N41 + M41 * V2; // This can be left out if the loop
3006 // refers to the immediate parent loop
3007 V3 += STEP3;
3008 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3009 L12:
3010 V3 = N31;
3011 V2 += STEP2;
3012 if (V2 cond2 N22) goto L120; else goto L13;
3013 L120:
3014 V4 = N41 + M41 * V2;
3015 NONRECT_BOUND4 = N42 + M42 * V2;
3016 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3017 L13:
3018 V2 = N21;
3019 V1 += STEP1;
3020 goto L120; */
3022 static basic_block
3023 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3024 basic_block cont_bb, basic_block body_bb)
3026 basic_block last_bb, bb, collapse_bb = NULL;
3027 int i;
3028 gimple_stmt_iterator gsi;
3029 edge e;
3030 tree t;
3031 gimple *stmt;
3033 last_bb = cont_bb;
3034 for (i = fd->collapse - 1; i >= 0; i--)
3036 tree vtype = TREE_TYPE (fd->loops[i].v);
3038 bb = create_empty_bb (last_bb);
3039 add_bb_to_loop (bb, last_bb->loop_father);
3040 gsi = gsi_start_bb (bb);
3042 if (i < fd->collapse - 1)
3044 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3045 e->probability
3046 = profile_probability::guessed_always ().apply_scale (1, 8);
3048 struct omp_for_data_loop *l = &fd->loops[i + 1];
3049 if (l->m1 == NULL_TREE || l->outer != 1)
3051 t = l->n1;
3052 if (l->m1)
3054 tree t2
3055 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3056 fd->loops[i + 1 - l->outer].v, l->m1);
3057 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3059 t = force_gimple_operand_gsi (&gsi, t,
3060 DECL_P (l->v)
3061 && TREE_ADDRESSABLE (l->v),
3062 NULL_TREE, false,
3063 GSI_CONTINUE_LINKING);
3064 stmt = gimple_build_assign (l->v, t);
3065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3068 else
3069 collapse_bb = bb;
3071 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3073 if (POINTER_TYPE_P (vtype))
3074 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3075 else
3076 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3077 t = force_gimple_operand_gsi (&gsi, t,
3078 DECL_P (fd->loops[i].v)
3079 && TREE_ADDRESSABLE (fd->loops[i].v),
3080 NULL_TREE, false, GSI_CONTINUE_LINKING);
3081 stmt = gimple_build_assign (fd->loops[i].v, t);
3082 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3084 if (fd->loops[i].non_rect_referenced)
3086 basic_block update_bb = NULL, prev_bb = NULL;
3087 for (int j = i + 1; j <= fd->last_nonrect; j++)
3088 if (j - fd->loops[j].outer == i)
3090 tree n1, n2;
3091 struct omp_for_data_loop *l = &fd->loops[j];
3092 basic_block this_bb = create_empty_bb (last_bb);
3093 add_bb_to_loop (this_bb, last_bb->loop_father);
3094 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3095 if (prev_bb)
3097 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3098 e->probability
3099 = profile_probability::guessed_always ().apply_scale (7,
3101 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3103 if (l->m1)
3105 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3106 fd->loops[i].v);
3107 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3108 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3109 false,
3110 GSI_CONTINUE_LINKING);
3111 stmt = gimple_build_assign (l->v, n1);
3112 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3113 n1 = l->v;
3115 else
3116 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3117 NULL_TREE, false,
3118 GSI_CONTINUE_LINKING);
3119 if (l->m2)
3121 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3122 fd->loops[i].v);
3123 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3124 t, unshare_expr (l->n2));
3125 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3126 false,
3127 GSI_CONTINUE_LINKING);
3128 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3129 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3130 n2 = nonrect_bounds[j];
3132 else
3133 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3134 true, NULL_TREE, false,
3135 GSI_CONTINUE_LINKING);
3136 gcond *cond_stmt
3137 = gimple_build_cond (l->cond_code, n1, n2,
3138 NULL_TREE, NULL_TREE);
3139 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3140 if (update_bb == NULL)
3141 update_bb = this_bb;
3142 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3143 e->probability
3144 = profile_probability::guessed_always ().apply_scale (1, 8);
3145 if (prev_bb == NULL)
3146 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3147 prev_bb = this_bb;
3149 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3150 e->probability
3151 = profile_probability::guessed_always ().apply_scale (7, 8);
3152 body_bb = update_bb;
3155 if (i > 0)
3157 if (fd->loops[i].m2)
3158 t = nonrect_bounds[i];
3159 else
3160 t = unshare_expr (fd->loops[i].n2);
3161 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3162 false, GSI_CONTINUE_LINKING);
3163 tree v = fd->loops[i].v;
3164 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3165 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3166 false, GSI_CONTINUE_LINKING);
3167 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3168 stmt = gimple_build_cond_empty (t);
3169 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3170 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3171 expand_omp_regimplify_p, NULL, NULL)
3172 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3173 expand_omp_regimplify_p, NULL, NULL))
3174 gimple_regimplify_operands (stmt, &gsi);
3175 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3176 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3178 else
3179 make_edge (bb, body_bb, EDGE_FALLTHRU);
3180 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3181 last_bb = bb;
3184 return collapse_bb;
3187 /* Expand #pragma omp ordered depend(source). */
3189 static void
3190 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3191 tree *counts, location_t loc)
3193 enum built_in_function source_ix
3194 = fd->iter_type == long_integer_type_node
3195 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3196 gimple *g
3197 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3198 build_fold_addr_expr (counts[fd->ordered]));
3199 gimple_set_location (g, loc);
3200 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3203 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3205 static void
3206 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3207 tree *counts, tree c, location_t loc)
3209 auto_vec<tree, 10> args;
3210 enum built_in_function sink_ix
3211 = fd->iter_type == long_integer_type_node
3212 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3213 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3214 int i;
3215 gimple_stmt_iterator gsi2 = *gsi;
3216 bool warned_step = false;
3218 for (i = 0; i < fd->ordered; i++)
3220 tree step = NULL_TREE;
3221 off = TREE_PURPOSE (deps);
3222 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3224 step = TREE_OPERAND (off, 1);
3225 off = TREE_OPERAND (off, 0);
3227 if (!integer_zerop (off))
3229 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3230 || fd->loops[i].cond_code == GT_EXPR);
3231 bool forward = fd->loops[i].cond_code == LT_EXPR;
3232 if (step)
3234 /* Non-simple Fortran DO loops. If step is variable,
3235 we don't know at compile even the direction, so can't
3236 warn. */
3237 if (TREE_CODE (step) != INTEGER_CST)
3238 break;
3239 forward = tree_int_cst_sgn (step) != -1;
3241 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3242 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3243 "waiting for lexically later iteration");
3244 break;
3246 deps = TREE_CHAIN (deps);
3248 /* If all offsets corresponding to the collapsed loops are zero,
3249 this depend clause can be ignored. FIXME: but there is still a
3250 flush needed. We need to emit one __sync_synchronize () for it
3251 though (perhaps conditionally)? Solve this together with the
3252 conservative dependence folding optimization.
3253 if (i >= fd->collapse)
3254 return; */
3256 deps = OMP_CLAUSE_DECL (c);
3257 gsi_prev (&gsi2);
3258 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3259 edge e2 = split_block_after_labels (e1->dest);
3261 gsi2 = gsi_after_labels (e1->dest);
3262 *gsi = gsi_last_bb (e1->src);
3263 for (i = 0; i < fd->ordered; i++)
3265 tree itype = TREE_TYPE (fd->loops[i].v);
3266 tree step = NULL_TREE;
3267 tree orig_off = NULL_TREE;
3268 if (POINTER_TYPE_P (itype))
3269 itype = sizetype;
3270 if (i)
3271 deps = TREE_CHAIN (deps);
3272 off = TREE_PURPOSE (deps);
3273 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3275 step = TREE_OPERAND (off, 1);
3276 off = TREE_OPERAND (off, 0);
3277 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3278 && integer_onep (fd->loops[i].step)
3279 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3281 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3282 if (step)
3284 off = fold_convert_loc (loc, itype, off);
3285 orig_off = off;
3286 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3289 if (integer_zerop (off))
3290 t = boolean_true_node;
3291 else
3293 tree a;
3294 tree co = fold_convert_loc (loc, itype, off);
3295 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3297 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3298 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3299 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3300 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3301 co);
3303 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3304 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3305 fd->loops[i].v, co);
3306 else
3307 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3308 fd->loops[i].v, co);
3309 if (step)
3311 tree t1, t2;
3312 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3313 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3314 fd->loops[i].n1);
3315 else
3316 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3317 fd->loops[i].n2);
3318 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3319 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3320 fd->loops[i].n2);
3321 else
3322 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3323 fd->loops[i].n1);
3324 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3325 step, build_int_cst (TREE_TYPE (step), 0));
3326 if (TREE_CODE (step) != INTEGER_CST)
3328 t1 = unshare_expr (t1);
3329 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3330 false, GSI_CONTINUE_LINKING);
3331 t2 = unshare_expr (t2);
3332 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3333 false, GSI_CONTINUE_LINKING);
3335 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3336 t, t2, t1);
3338 else if (fd->loops[i].cond_code == LT_EXPR)
3340 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3341 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3342 fd->loops[i].n1);
3343 else
3344 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3345 fd->loops[i].n2);
3347 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3348 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3349 fd->loops[i].n2);
3350 else
3351 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3352 fd->loops[i].n1);
3354 if (cond)
3355 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3356 else
3357 cond = t;
3359 off = fold_convert_loc (loc, itype, off);
3361 if (step
3362 || (fd->loops[i].cond_code == LT_EXPR
3363 ? !integer_onep (fd->loops[i].step)
3364 : !integer_minus_onep (fd->loops[i].step)))
3366 if (step == NULL_TREE
3367 && TYPE_UNSIGNED (itype)
3368 && fd->loops[i].cond_code == GT_EXPR)
3369 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3370 fold_build1_loc (loc, NEGATE_EXPR, itype,
3371 s));
3372 else
3373 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3374 orig_off ? orig_off : off, s);
3375 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3376 build_int_cst (itype, 0));
3377 if (integer_zerop (t) && !warned_step)
3379 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3380 "refers to iteration never in the iteration "
3381 "space");
3382 warned_step = true;
3384 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3385 cond, t);
3388 if (i <= fd->collapse - 1 && fd->collapse > 1)
3389 t = fd->loop.v;
3390 else if (counts[i])
3391 t = counts[i];
3392 else
3394 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3395 fd->loops[i].v, fd->loops[i].n1);
3396 t = fold_convert_loc (loc, fd->iter_type, t);
3398 if (step)
3399 /* We have divided off by step already earlier. */;
3400 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3401 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3402 fold_build1_loc (loc, NEGATE_EXPR, itype,
3403 s));
3404 else
3405 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3406 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3407 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3408 off = fold_convert_loc (loc, fd->iter_type, off);
3409 if (i <= fd->collapse - 1 && fd->collapse > 1)
3411 if (i)
3412 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3413 off);
3414 if (i < fd->collapse - 1)
3416 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3417 counts[i]);
3418 continue;
3421 off = unshare_expr (off);
3422 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3423 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3424 true, GSI_SAME_STMT);
3425 args.safe_push (t);
3427 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3428 gimple_set_location (g, loc);
3429 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3431 cond = unshare_expr (cond);
3432 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3433 GSI_CONTINUE_LINKING);
3434 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3435 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3436 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3437 e1->probability = e3->probability.invert ();
3438 e1->flags = EDGE_TRUE_VALUE;
3439 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3441 *gsi = gsi_after_labels (e2->dest);
3444 /* Expand all #pragma omp ordered depend(source) and
3445 #pragma omp ordered depend(sink:...) constructs in the current
3446 #pragma omp for ordered(n) region. */
3448 static void
3449 expand_omp_ordered_source_sink (struct omp_region *region,
3450 struct omp_for_data *fd, tree *counts,
3451 basic_block cont_bb)
3453 struct omp_region *inner;
3454 int i;
3455 for (i = fd->collapse - 1; i < fd->ordered; i++)
3456 if (i == fd->collapse - 1 && fd->collapse > 1)
3457 counts[i] = NULL_TREE;
3458 else if (i >= fd->collapse && !cont_bb)
3459 counts[i] = build_zero_cst (fd->iter_type);
3460 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3461 && integer_onep (fd->loops[i].step))
3462 counts[i] = NULL_TREE;
3463 else
3464 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3465 tree atype
3466 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3467 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3468 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3470 for (inner = region->inner; inner; inner = inner->next)
3471 if (inner->type == GIMPLE_OMP_ORDERED)
3473 gomp_ordered *ord_stmt = inner->ord_stmt;
3474 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3475 location_t loc = gimple_location (ord_stmt);
3476 tree c;
3477 for (c = gimple_omp_ordered_clauses (ord_stmt);
3478 c; c = OMP_CLAUSE_CHAIN (c))
3479 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3480 break;
3481 if (c)
3482 expand_omp_ordered_source (&gsi, fd, counts, loc);
3483 for (c = gimple_omp_ordered_clauses (ord_stmt);
3484 c; c = OMP_CLAUSE_CHAIN (c))
3485 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3486 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3487 gsi_remove (&gsi, true);
3491 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3492 collapsed. */
3494 static basic_block
3495 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3496 basic_block cont_bb, basic_block body_bb,
3497 bool ordered_lastprivate)
3499 if (fd->ordered == fd->collapse)
3500 return cont_bb;
3502 if (!cont_bb)
3504 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3505 for (int i = fd->collapse; i < fd->ordered; i++)
3507 tree type = TREE_TYPE (fd->loops[i].v);
3508 tree n1 = fold_convert (type, fd->loops[i].n1);
3509 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3510 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3511 size_int (i - fd->collapse + 1),
3512 NULL_TREE, NULL_TREE);
3513 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3515 return NULL;
3518 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3520 tree t, type = TREE_TYPE (fd->loops[i].v);
3521 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3522 expand_omp_build_assign (&gsi, fd->loops[i].v,
3523 fold_convert (type, fd->loops[i].n1));
3524 if (counts[i])
3525 expand_omp_build_assign (&gsi, counts[i],
3526 build_zero_cst (fd->iter_type));
3527 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3528 size_int (i - fd->collapse + 1),
3529 NULL_TREE, NULL_TREE);
3530 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3531 if (!gsi_end_p (gsi))
3532 gsi_prev (&gsi);
3533 else
3534 gsi = gsi_last_bb (body_bb);
3535 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3536 basic_block new_body = e1->dest;
3537 if (body_bb == cont_bb)
3538 cont_bb = new_body;
3539 edge e2 = NULL;
3540 basic_block new_header;
3541 if (EDGE_COUNT (cont_bb->preds) > 0)
3543 gsi = gsi_last_bb (cont_bb);
3544 if (POINTER_TYPE_P (type))
3545 t = fold_build_pointer_plus (fd->loops[i].v,
3546 fold_convert (sizetype,
3547 fd->loops[i].step));
3548 else
3549 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3550 fold_convert (type, fd->loops[i].step));
3551 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3552 if (counts[i])
3554 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3555 build_int_cst (fd->iter_type, 1));
3556 expand_omp_build_assign (&gsi, counts[i], t);
3557 t = counts[i];
3559 else
3561 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3562 fd->loops[i].v, fd->loops[i].n1);
3563 t = fold_convert (fd->iter_type, t);
3564 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3565 true, GSI_SAME_STMT);
3567 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3568 size_int (i - fd->collapse + 1),
3569 NULL_TREE, NULL_TREE);
3570 expand_omp_build_assign (&gsi, aref, t);
3571 gsi_prev (&gsi);
3572 e2 = split_block (cont_bb, gsi_stmt (gsi));
3573 new_header = e2->dest;
3575 else
3576 new_header = cont_bb;
3577 gsi = gsi_after_labels (new_header);
3578 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3579 true, GSI_SAME_STMT);
3580 tree n2
3581 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3582 true, NULL_TREE, true, GSI_SAME_STMT);
3583 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3584 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3585 edge e3 = split_block (new_header, gsi_stmt (gsi));
3586 cont_bb = e3->dest;
3587 remove_edge (e1);
3588 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3589 e3->flags = EDGE_FALSE_VALUE;
3590 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3591 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3592 e1->probability = e3->probability.invert ();
3594 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3595 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3597 if (e2)
3599 class loop *loop = alloc_loop ();
3600 loop->header = new_header;
3601 loop->latch = e2->src;
3602 add_loop (loop, body_bb->loop_father);
3606 /* If there are any lastprivate clauses and it is possible some loops
3607 might have zero iterations, ensure all the decls are initialized,
3608 otherwise we could crash evaluating C++ class iterators with lastprivate
3609 clauses. */
3610 bool need_inits = false;
3611 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3612 if (need_inits)
3614 tree type = TREE_TYPE (fd->loops[i].v);
3615 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3616 expand_omp_build_assign (&gsi, fd->loops[i].v,
3617 fold_convert (type, fd->loops[i].n1));
3619 else
3621 tree type = TREE_TYPE (fd->loops[i].v);
3622 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3623 boolean_type_node,
3624 fold_convert (type, fd->loops[i].n1),
3625 fold_convert (type, fd->loops[i].n2));
3626 if (!integer_onep (this_cond))
3627 need_inits = true;
3630 return cont_bb;
3633 /* A subroutine of expand_omp_for. Generate code for a parallel
3634 loop with any schedule. Given parameters:
3636 for (V = N1; V cond N2; V += STEP) BODY;
3638 where COND is "<" or ">", we generate pseudocode
3640 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3641 if (more) goto L0; else goto L3;
3643 V = istart0;
3644 iend = iend0;
3646 BODY;
3647 V += STEP;
3648 if (V cond iend) goto L1; else goto L2;
3650 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3653 If this is a combined omp parallel loop, instead of the call to
3654 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3655 If this is gimple_omp_for_combined_p loop, then instead of assigning
3656 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3657 inner GIMPLE_OMP_FOR and V += STEP; and
3658 if (V cond iend) goto L1; else goto L2; are removed.
3660 For collapsed loops, given parameters:
3661 collapse(3)
3662 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3663 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3664 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3665 BODY;
3667 we generate pseudocode
3669 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3670 if (cond3 is <)
3671 adj = STEP3 - 1;
3672 else
3673 adj = STEP3 + 1;
3674 count3 = (adj + N32 - N31) / STEP3;
3675 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3676 if (cond2 is <)
3677 adj = STEP2 - 1;
3678 else
3679 adj = STEP2 + 1;
3680 count2 = (adj + N22 - N21) / STEP2;
3681 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3682 if (cond1 is <)
3683 adj = STEP1 - 1;
3684 else
3685 adj = STEP1 + 1;
3686 count1 = (adj + N12 - N11) / STEP1;
3687 count = count1 * count2 * count3;
3688 goto Z1;
3690 count = 0;
3692 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3693 if (more) goto L0; else goto L3;
3695 V = istart0;
3696 T = V;
3697 V3 = N31 + (T % count3) * STEP3;
3698 T = T / count3;
3699 V2 = N21 + (T % count2) * STEP2;
3700 T = T / count2;
3701 V1 = N11 + T * STEP1;
3702 iend = iend0;
3704 BODY;
3705 V += 1;
3706 if (V < iend) goto L10; else goto L2;
3707 L10:
3708 V3 += STEP3;
3709 if (V3 cond3 N32) goto L1; else goto L11;
3710 L11:
3711 V3 = N31;
3712 V2 += STEP2;
3713 if (V2 cond2 N22) goto L1; else goto L12;
3714 L12:
3715 V2 = N21;
3716 V1 += STEP1;
3717 goto L1;
3719 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3724 static void
3725 expand_omp_for_generic (struct omp_region *region,
3726 struct omp_for_data *fd,
3727 enum built_in_function start_fn,
3728 enum built_in_function next_fn,
3729 tree sched_arg,
3730 gimple *inner_stmt)
3732 tree type, istart0, iend0, iend;
3733 tree t, vmain, vback, bias = NULL_TREE;
3734 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3735 basic_block l2_bb = NULL, l3_bb = NULL;
3736 gimple_stmt_iterator gsi;
3737 gassign *assign_stmt;
3738 bool in_combined_parallel = is_combined_parallel (region);
3739 bool broken_loop = region->cont == NULL;
3740 edge e, ne;
3741 tree *counts = NULL;
3742 int i;
3743 bool ordered_lastprivate = false;
3745 gcc_assert (!broken_loop || !in_combined_parallel);
3746 gcc_assert (fd->iter_type == long_integer_type_node
3747 || !in_combined_parallel);
3749 entry_bb = region->entry;
3750 cont_bb = region->cont;
3751 collapse_bb = NULL;
3752 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3753 gcc_assert (broken_loop
3754 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3755 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3756 l1_bb = single_succ (l0_bb);
3757 if (!broken_loop)
3759 l2_bb = create_empty_bb (cont_bb);
3760 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3761 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3762 == l1_bb));
3763 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3765 else
3766 l2_bb = NULL;
3767 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3768 exit_bb = region->exit;
3770 gsi = gsi_last_nondebug_bb (entry_bb);
3772 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3773 if (fd->ordered
3774 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3775 OMP_CLAUSE_LASTPRIVATE))
3776 ordered_lastprivate = false;
3777 tree reductions = NULL_TREE;
3778 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3779 tree memv = NULL_TREE;
3780 if (fd->lastprivate_conditional)
3782 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3783 OMP_CLAUSE__CONDTEMP_);
3784 if (fd->have_pointer_condtemp)
3785 condtemp = OMP_CLAUSE_DECL (c);
3786 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3787 cond_var = OMP_CLAUSE_DECL (c);
3789 if (sched_arg)
3791 if (fd->have_reductemp)
3793 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3794 OMP_CLAUSE__REDUCTEMP_);
3795 reductions = OMP_CLAUSE_DECL (c);
3796 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3797 gimple *g = SSA_NAME_DEF_STMT (reductions);
3798 reductions = gimple_assign_rhs1 (g);
3799 OMP_CLAUSE_DECL (c) = reductions;
3800 entry_bb = gimple_bb (g);
3801 edge e = split_block (entry_bb, g);
3802 if (region->entry == entry_bb)
3803 region->entry = e->dest;
3804 gsi = gsi_last_bb (entry_bb);
3806 else
3807 reductions = null_pointer_node;
3808 if (fd->have_pointer_condtemp)
3810 tree type = TREE_TYPE (condtemp);
3811 memv = create_tmp_var (type);
3812 TREE_ADDRESSABLE (memv) = 1;
3813 unsigned HOST_WIDE_INT sz
3814 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3815 sz *= fd->lastprivate_conditional;
3816 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3817 false);
3818 mem = build_fold_addr_expr (memv);
3820 else
3821 mem = null_pointer_node;
3823 if (fd->collapse > 1 || fd->ordered)
3825 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3826 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3828 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3829 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3830 zero_iter1_bb, first_zero_iter1,
3831 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3833 if (zero_iter1_bb)
3835 /* Some counts[i] vars might be uninitialized if
3836 some loop has zero iterations. But the body shouldn't
3837 be executed in that case, so just avoid uninit warnings. */
3838 for (i = first_zero_iter1;
3839 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3840 if (SSA_VAR_P (counts[i]))
3841 TREE_NO_WARNING (counts[i]) = 1;
3842 gsi_prev (&gsi);
3843 e = split_block (entry_bb, gsi_stmt (gsi));
3844 entry_bb = e->dest;
3845 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3846 gsi = gsi_last_nondebug_bb (entry_bb);
3847 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3848 get_immediate_dominator (CDI_DOMINATORS,
3849 zero_iter1_bb));
3851 if (zero_iter2_bb)
3853 /* Some counts[i] vars might be uninitialized if
3854 some loop has zero iterations. But the body shouldn't
3855 be executed in that case, so just avoid uninit warnings. */
3856 for (i = first_zero_iter2; i < fd->ordered; i++)
3857 if (SSA_VAR_P (counts[i]))
3858 TREE_NO_WARNING (counts[i]) = 1;
3859 if (zero_iter1_bb)
3860 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3861 else
3863 gsi_prev (&gsi);
3864 e = split_block (entry_bb, gsi_stmt (gsi));
3865 entry_bb = e->dest;
3866 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3867 gsi = gsi_last_nondebug_bb (entry_bb);
3868 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3869 get_immediate_dominator
3870 (CDI_DOMINATORS, zero_iter2_bb));
3873 if (fd->collapse == 1)
3875 counts[0] = fd->loop.n2;
3876 fd->loop = fd->loops[0];
3880 type = TREE_TYPE (fd->loop.v);
3881 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3882 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3883 TREE_ADDRESSABLE (istart0) = 1;
3884 TREE_ADDRESSABLE (iend0) = 1;
3886 /* See if we need to bias by LLONG_MIN. */
3887 if (fd->iter_type == long_long_unsigned_type_node
3888 && TREE_CODE (type) == INTEGER_TYPE
3889 && !TYPE_UNSIGNED (type)
3890 && fd->ordered == 0)
3892 tree n1, n2;
3894 if (fd->loop.cond_code == LT_EXPR)
3896 n1 = fd->loop.n1;
3897 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3899 else
3901 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3902 n2 = fd->loop.n1;
3904 if (TREE_CODE (n1) != INTEGER_CST
3905 || TREE_CODE (n2) != INTEGER_CST
3906 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3907 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3910 gimple_stmt_iterator gsif = gsi;
3911 gsi_prev (&gsif);
3913 tree arr = NULL_TREE;
3914 if (in_combined_parallel)
3916 gcc_assert (fd->ordered == 0);
3917 /* In a combined parallel loop, emit a call to
3918 GOMP_loop_foo_next. */
3919 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3920 build_fold_addr_expr (istart0),
3921 build_fold_addr_expr (iend0));
3923 else
3925 tree t0, t1, t2, t3, t4;
3926 /* If this is not a combined parallel loop, emit a call to
3927 GOMP_loop_foo_start in ENTRY_BB. */
3928 t4 = build_fold_addr_expr (iend0);
3929 t3 = build_fold_addr_expr (istart0);
3930 if (fd->ordered)
3932 t0 = build_int_cst (unsigned_type_node,
3933 fd->ordered - fd->collapse + 1);
3934 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3935 fd->ordered
3936 - fd->collapse + 1),
3937 ".omp_counts");
3938 DECL_NAMELESS (arr) = 1;
3939 TREE_ADDRESSABLE (arr) = 1;
3940 TREE_STATIC (arr) = 1;
3941 vec<constructor_elt, va_gc> *v;
3942 vec_alloc (v, fd->ordered - fd->collapse + 1);
3943 int idx;
3945 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3947 tree c;
3948 if (idx == 0 && fd->collapse > 1)
3949 c = fd->loop.n2;
3950 else
3951 c = counts[idx + fd->collapse - 1];
3952 tree purpose = size_int (idx);
3953 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3954 if (TREE_CODE (c) != INTEGER_CST)
3955 TREE_STATIC (arr) = 0;
3958 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3959 if (!TREE_STATIC (arr))
3960 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3961 void_type_node, arr),
3962 true, NULL_TREE, true, GSI_SAME_STMT);
3963 t1 = build_fold_addr_expr (arr);
3964 t2 = NULL_TREE;
3966 else
3968 t2 = fold_convert (fd->iter_type, fd->loop.step);
3969 t1 = fd->loop.n2;
3970 t0 = fd->loop.n1;
3971 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3973 tree innerc
3974 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3975 OMP_CLAUSE__LOOPTEMP_);
3976 gcc_assert (innerc);
3977 t0 = OMP_CLAUSE_DECL (innerc);
3978 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3979 OMP_CLAUSE__LOOPTEMP_);
3980 gcc_assert (innerc);
3981 t1 = OMP_CLAUSE_DECL (innerc);
3983 if (POINTER_TYPE_P (TREE_TYPE (t0))
3984 && TYPE_PRECISION (TREE_TYPE (t0))
3985 != TYPE_PRECISION (fd->iter_type))
3987 /* Avoid casting pointers to integer of a different size. */
3988 tree itype = signed_type_for (type);
3989 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3990 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3992 else
3994 t1 = fold_convert (fd->iter_type, t1);
3995 t0 = fold_convert (fd->iter_type, t0);
3997 if (bias)
3999 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4000 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4003 if (fd->iter_type == long_integer_type_node || fd->ordered)
4005 if (fd->chunk_size)
4007 t = fold_convert (fd->iter_type, fd->chunk_size);
4008 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4009 if (sched_arg)
4011 if (fd->ordered)
4012 t = build_call_expr (builtin_decl_explicit (start_fn),
4013 8, t0, t1, sched_arg, t, t3, t4,
4014 reductions, mem);
4015 else
4016 t = build_call_expr (builtin_decl_explicit (start_fn),
4017 9, t0, t1, t2, sched_arg, t, t3, t4,
4018 reductions, mem);
4020 else if (fd->ordered)
4021 t = build_call_expr (builtin_decl_explicit (start_fn),
4022 5, t0, t1, t, t3, t4);
4023 else
4024 t = build_call_expr (builtin_decl_explicit (start_fn),
4025 6, t0, t1, t2, t, t3, t4);
4027 else if (fd->ordered)
4028 t = build_call_expr (builtin_decl_explicit (start_fn),
4029 4, t0, t1, t3, t4);
4030 else
4031 t = build_call_expr (builtin_decl_explicit (start_fn),
4032 5, t0, t1, t2, t3, t4);
4034 else
4036 tree t5;
4037 tree c_bool_type;
4038 tree bfn_decl;
4040 /* The GOMP_loop_ull_*start functions have additional boolean
4041 argument, true for < loops and false for > loops.
4042 In Fortran, the C bool type can be different from
4043 boolean_type_node. */
4044 bfn_decl = builtin_decl_explicit (start_fn);
4045 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4046 t5 = build_int_cst (c_bool_type,
4047 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4048 if (fd->chunk_size)
4050 tree bfn_decl = builtin_decl_explicit (start_fn);
4051 t = fold_convert (fd->iter_type, fd->chunk_size);
4052 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4053 if (sched_arg)
4054 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4055 t, t3, t4, reductions, mem);
4056 else
4057 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4059 else
4060 t = build_call_expr (builtin_decl_explicit (start_fn),
4061 6, t5, t0, t1, t2, t3, t4);
4064 if (TREE_TYPE (t) != boolean_type_node)
4065 t = fold_build2 (NE_EXPR, boolean_type_node,
4066 t, build_int_cst (TREE_TYPE (t), 0));
4067 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4068 true, GSI_SAME_STMT);
4069 if (arr && !TREE_STATIC (arr))
4071 tree clobber = build_clobber (TREE_TYPE (arr));
4072 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4073 GSI_SAME_STMT);
4075 if (fd->have_pointer_condtemp)
4076 expand_omp_build_assign (&gsi, condtemp, memv, false);
4077 if (fd->have_reductemp)
4079 gimple *g = gsi_stmt (gsi);
4080 gsi_remove (&gsi, true);
4081 release_ssa_name (gimple_assign_lhs (g));
4083 entry_bb = region->entry;
4084 gsi = gsi_last_nondebug_bb (entry_bb);
4086 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4088 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4090 /* Remove the GIMPLE_OMP_FOR statement. */
4091 gsi_remove (&gsi, true);
4093 if (gsi_end_p (gsif))
4094 gsif = gsi_after_labels (gsi_bb (gsif));
4095 gsi_next (&gsif);
4097 /* Iteration setup for sequential loop goes in L0_BB. */
4098 tree startvar = fd->loop.v;
4099 tree endvar = NULL_TREE;
4101 if (gimple_omp_for_combined_p (fd->for_stmt))
4103 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4104 && gimple_omp_for_kind (inner_stmt)
4105 == GF_OMP_FOR_KIND_SIMD);
4106 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4107 OMP_CLAUSE__LOOPTEMP_);
4108 gcc_assert (innerc);
4109 startvar = OMP_CLAUSE_DECL (innerc);
4110 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4111 OMP_CLAUSE__LOOPTEMP_);
4112 gcc_assert (innerc);
4113 endvar = OMP_CLAUSE_DECL (innerc);
4116 gsi = gsi_start_bb (l0_bb);
4117 t = istart0;
4118 if (fd->ordered && fd->collapse == 1)
4119 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4120 fold_convert (fd->iter_type, fd->loop.step));
4121 else if (bias)
4122 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4123 if (fd->ordered && fd->collapse == 1)
4125 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4126 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4127 fd->loop.n1, fold_convert (sizetype, t));
4128 else
4130 t = fold_convert (TREE_TYPE (startvar), t);
4131 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4132 fd->loop.n1, t);
4135 else
4137 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4138 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4139 t = fold_convert (TREE_TYPE (startvar), t);
4141 t = force_gimple_operand_gsi (&gsi, t,
4142 DECL_P (startvar)
4143 && TREE_ADDRESSABLE (startvar),
4144 NULL_TREE, false, GSI_CONTINUE_LINKING);
4145 assign_stmt = gimple_build_assign (startvar, t);
4146 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4147 if (cond_var)
4149 tree itype = TREE_TYPE (cond_var);
4150 /* For lastprivate(conditional:) itervar, we need some iteration
4151 counter that starts at unsigned non-zero and increases.
4152 Prefer as few IVs as possible, so if we can use startvar
4153 itself, use that, or startvar + constant (those would be
4154 incremented with step), and as last resort use the s0 + 1
4155 incremented by 1. */
4156 if ((fd->ordered && fd->collapse == 1)
4157 || bias
4158 || POINTER_TYPE_P (type)
4159 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4160 || fd->loop.cond_code != LT_EXPR)
4161 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4162 build_int_cst (itype, 1));
4163 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4164 t = fold_convert (itype, t);
4165 else
4167 tree c = fold_convert (itype, fd->loop.n1);
4168 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4169 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4171 t = force_gimple_operand_gsi (&gsi, t, false,
4172 NULL_TREE, false, GSI_CONTINUE_LINKING);
4173 assign_stmt = gimple_build_assign (cond_var, t);
4174 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4177 t = iend0;
4178 if (fd->ordered && fd->collapse == 1)
4179 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4180 fold_convert (fd->iter_type, fd->loop.step));
4181 else if (bias)
4182 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4183 if (fd->ordered && fd->collapse == 1)
4185 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4186 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4187 fd->loop.n1, fold_convert (sizetype, t));
4188 else
4190 t = fold_convert (TREE_TYPE (startvar), t);
4191 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4192 fd->loop.n1, t);
4195 else
4197 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4198 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4199 t = fold_convert (TREE_TYPE (startvar), t);
4201 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4202 false, GSI_CONTINUE_LINKING);
4203 if (endvar)
4205 assign_stmt = gimple_build_assign (endvar, iend);
4206 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4207 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4208 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4209 else
4210 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4211 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4213 /* Handle linear clause adjustments. */
4214 tree itercnt = NULL_TREE;
4215 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4216 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4217 c; c = OMP_CLAUSE_CHAIN (c))
4218 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4219 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4221 tree d = OMP_CLAUSE_DECL (c);
4222 bool is_ref = omp_is_reference (d);
4223 tree t = d, a, dest;
4224 if (is_ref)
4225 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4226 tree type = TREE_TYPE (t);
4227 if (POINTER_TYPE_P (type))
4228 type = sizetype;
4229 dest = unshare_expr (t);
4230 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4231 expand_omp_build_assign (&gsif, v, t);
4232 if (itercnt == NULL_TREE)
4234 itercnt = startvar;
4235 tree n1 = fd->loop.n1;
4236 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4238 itercnt
4239 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4240 itercnt);
4241 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4243 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4244 itercnt, n1);
4245 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4246 itercnt, fd->loop.step);
4247 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4248 NULL_TREE, false,
4249 GSI_CONTINUE_LINKING);
4251 a = fold_build2 (MULT_EXPR, type,
4252 fold_convert (type, itercnt),
4253 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4254 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4255 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4256 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4257 false, GSI_CONTINUE_LINKING);
4258 assign_stmt = gimple_build_assign (dest, t);
4259 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4261 if (fd->collapse > 1)
4262 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4264 if (fd->ordered)
4266 /* Until now, counts array contained number of iterations or
4267 variable containing it for ith loop. From now on, we need
4268 those counts only for collapsed loops, and only for the 2nd
4269 till the last collapsed one. Move those one element earlier,
4270 we'll use counts[fd->collapse - 1] for the first source/sink
4271 iteration counter and so on and counts[fd->ordered]
4272 as the array holding the current counter values for
4273 depend(source). */
4274 if (fd->collapse > 1)
4275 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4276 if (broken_loop)
4278 int i;
4279 for (i = fd->collapse; i < fd->ordered; i++)
4281 tree type = TREE_TYPE (fd->loops[i].v);
4282 tree this_cond
4283 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4284 fold_convert (type, fd->loops[i].n1),
4285 fold_convert (type, fd->loops[i].n2));
4286 if (!integer_onep (this_cond))
4287 break;
4289 if (i < fd->ordered)
4291 cont_bb
4292 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4293 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4294 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4295 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4296 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4297 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4298 make_edge (cont_bb, l1_bb, 0);
4299 l2_bb = create_empty_bb (cont_bb);
4300 broken_loop = false;
4303 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4304 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4305 ordered_lastprivate);
4306 if (counts[fd->collapse - 1])
4308 gcc_assert (fd->collapse == 1);
4309 gsi = gsi_last_bb (l0_bb);
4310 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4311 istart0, true);
4312 gsi = gsi_last_bb (cont_bb);
4313 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4314 build_int_cst (fd->iter_type, 1));
4315 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4316 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4317 size_zero_node, NULL_TREE, NULL_TREE);
4318 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4319 t = counts[fd->collapse - 1];
4321 else if (fd->collapse > 1)
4322 t = fd->loop.v;
4323 else
4325 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4326 fd->loops[0].v, fd->loops[0].n1);
4327 t = fold_convert (fd->iter_type, t);
4329 gsi = gsi_last_bb (l0_bb);
4330 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4331 size_zero_node, NULL_TREE, NULL_TREE);
4332 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4333 false, GSI_CONTINUE_LINKING);
4334 expand_omp_build_assign (&gsi, aref, t, true);
4337 if (!broken_loop)
4339 /* Code to control the increment and predicate for the sequential
4340 loop goes in the CONT_BB. */
4341 gsi = gsi_last_nondebug_bb (cont_bb);
4342 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4343 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4344 vmain = gimple_omp_continue_control_use (cont_stmt);
4345 vback = gimple_omp_continue_control_def (cont_stmt);
4347 if (cond_var)
4349 tree itype = TREE_TYPE (cond_var);
4350 tree t2;
4351 if ((fd->ordered && fd->collapse == 1)
4352 || bias
4353 || POINTER_TYPE_P (type)
4354 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4355 || fd->loop.cond_code != LT_EXPR)
4356 t2 = build_int_cst (itype, 1);
4357 else
4358 t2 = fold_convert (itype, fd->loop.step);
4359 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4360 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4361 NULL_TREE, true, GSI_SAME_STMT);
4362 assign_stmt = gimple_build_assign (cond_var, t2);
4363 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4366 if (!gimple_omp_for_combined_p (fd->for_stmt))
4368 if (POINTER_TYPE_P (type))
4369 t = fold_build_pointer_plus (vmain, fd->loop.step);
4370 else
4371 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4372 t = force_gimple_operand_gsi (&gsi, t,
4373 DECL_P (vback)
4374 && TREE_ADDRESSABLE (vback),
4375 NULL_TREE, true, GSI_SAME_STMT);
4376 assign_stmt = gimple_build_assign (vback, t);
4377 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4379 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4381 tree tem;
4382 if (fd->collapse > 1)
4383 tem = fd->loop.v;
4384 else
4386 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4387 fd->loops[0].v, fd->loops[0].n1);
4388 tem = fold_convert (fd->iter_type, tem);
4390 tree aref = build4 (ARRAY_REF, fd->iter_type,
4391 counts[fd->ordered], size_zero_node,
4392 NULL_TREE, NULL_TREE);
4393 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4394 true, GSI_SAME_STMT);
4395 expand_omp_build_assign (&gsi, aref, tem);
4398 t = build2 (fd->loop.cond_code, boolean_type_node,
4399 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4400 iend);
4401 gcond *cond_stmt = gimple_build_cond_empty (t);
4402 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4405 /* Remove GIMPLE_OMP_CONTINUE. */
4406 gsi_remove (&gsi, true);
4408 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4409 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4411 /* Emit code to get the next parallel iteration in L2_BB. */
4412 gsi = gsi_start_bb (l2_bb);
4414 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4415 build_fold_addr_expr (istart0),
4416 build_fold_addr_expr (iend0));
4417 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4418 false, GSI_CONTINUE_LINKING);
4419 if (TREE_TYPE (t) != boolean_type_node)
4420 t = fold_build2 (NE_EXPR, boolean_type_node,
4421 t, build_int_cst (TREE_TYPE (t), 0));
4422 gcond *cond_stmt = gimple_build_cond_empty (t);
4423 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4426 /* Add the loop cleanup function. */
4427 gsi = gsi_last_nondebug_bb (exit_bb);
4428 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4429 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4430 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4431 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4432 else
4433 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4434 gcall *call_stmt = gimple_build_call (t, 0);
4435 if (fd->ordered)
4437 tree arr = counts[fd->ordered];
4438 tree clobber = build_clobber (TREE_TYPE (arr));
4439 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4440 GSI_SAME_STMT);
4442 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4444 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4445 if (fd->have_reductemp)
4447 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4448 gimple_call_lhs (call_stmt));
4449 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4452 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4453 gsi_remove (&gsi, true);
4455 /* Connect the new blocks. */
4456 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4457 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4459 if (!broken_loop)
4461 gimple_seq phis;
4463 e = find_edge (cont_bb, l3_bb);
4464 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4466 phis = phi_nodes (l3_bb);
4467 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4469 gimple *phi = gsi_stmt (gsi);
4470 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4471 PHI_ARG_DEF_FROM_EDGE (phi, e));
4473 remove_edge (e);
4475 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4476 e = find_edge (cont_bb, l1_bb);
4477 if (e == NULL)
4479 e = BRANCH_EDGE (cont_bb);
4480 gcc_assert (single_succ (e->dest) == l1_bb);
4482 if (gimple_omp_for_combined_p (fd->for_stmt))
4484 remove_edge (e);
4485 e = NULL;
4487 else if (fd->collapse > 1)
4489 remove_edge (e);
4490 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4492 else
4493 e->flags = EDGE_TRUE_VALUE;
4494 if (e)
4496 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4497 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4499 else
4501 e = find_edge (cont_bb, l2_bb);
4502 e->flags = EDGE_FALLTHRU;
4504 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4506 if (gimple_in_ssa_p (cfun))
4508 /* Add phis to the outer loop that connect to the phis in the inner,
4509 original loop, and move the loop entry value of the inner phi to
4510 the loop entry value of the outer phi. */
4511 gphi_iterator psi;
4512 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4514 location_t locus;
4515 gphi *nphi;
4516 gphi *exit_phi = psi.phi ();
4518 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4519 continue;
4521 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4522 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4524 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4525 edge latch_to_l1 = find_edge (latch, l1_bb);
4526 gphi *inner_phi
4527 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4529 tree t = gimple_phi_result (exit_phi);
4530 tree new_res = copy_ssa_name (t, NULL);
4531 nphi = create_phi_node (new_res, l0_bb);
4533 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4534 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4535 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4536 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4537 add_phi_arg (nphi, t, entry_to_l0, locus);
4539 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4540 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4542 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4546 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4547 recompute_dominator (CDI_DOMINATORS, l2_bb));
4548 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4549 recompute_dominator (CDI_DOMINATORS, l3_bb));
4550 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4551 recompute_dominator (CDI_DOMINATORS, l0_bb));
4552 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4553 recompute_dominator (CDI_DOMINATORS, l1_bb));
4555 /* We enter expand_omp_for_generic with a loop. This original loop may
4556 have its own loop struct, or it may be part of an outer loop struct
4557 (which may be the fake loop). */
4558 class loop *outer_loop = entry_bb->loop_father;
4559 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4561 add_bb_to_loop (l2_bb, outer_loop);
4563 /* We've added a new loop around the original loop. Allocate the
4564 corresponding loop struct. */
4565 class loop *new_loop = alloc_loop ();
4566 new_loop->header = l0_bb;
4567 new_loop->latch = l2_bb;
4568 add_loop (new_loop, outer_loop);
4570 /* Allocate a loop structure for the original loop unless we already
4571 had one. */
4572 if (!orig_loop_has_loop_struct
4573 && !gimple_omp_for_combined_p (fd->for_stmt))
4575 class loop *orig_loop = alloc_loop ();
4576 orig_loop->header = l1_bb;
4577 /* The loop may have multiple latches. */
4578 add_loop (orig_loop, new_loop);
4583 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4584 compute needed allocation size. If !ALLOC of team allocations,
4585 if ALLOC of thread allocation. SZ is the initial needed size for
4586 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4587 CNT number of elements of each array, for !ALLOC this is
4588 omp_get_num_threads (), for ALLOC number of iterations handled by the
4589 current thread. If PTR is non-NULL, it is the start of the allocation
4590 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4591 clauses pointers to the corresponding arrays. */
4593 static tree
4594 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4595 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4596 gimple_stmt_iterator *gsi, bool alloc)
4598 tree eltsz = NULL_TREE;
4599 unsigned HOST_WIDE_INT preval = 0;
4600 if (ptr && sz)
4601 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4602 ptr, size_int (sz));
4603 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4604 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4605 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4606 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4608 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4609 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4610 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4612 unsigned HOST_WIDE_INT szl
4613 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4614 szl = least_bit_hwi (szl);
4615 if (szl)
4616 al = MIN (al, szl);
4618 if (ptr == NULL_TREE)
4620 if (eltsz == NULL_TREE)
4621 eltsz = TYPE_SIZE_UNIT (pointee_type);
4622 else
4623 eltsz = size_binop (PLUS_EXPR, eltsz,
4624 TYPE_SIZE_UNIT (pointee_type));
4626 if (preval == 0 && al <= alloc_align)
4628 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4629 sz += diff;
4630 if (diff && ptr)
4631 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4632 ptr, size_int (diff));
4634 else if (al > preval)
4636 if (ptr)
4638 ptr = fold_convert (pointer_sized_int_node, ptr);
4639 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4640 build_int_cst (pointer_sized_int_node,
4641 al - 1));
4642 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4643 build_int_cst (pointer_sized_int_node,
4644 -(HOST_WIDE_INT) al));
4645 ptr = fold_convert (ptr_type_node, ptr);
4647 else
4648 sz += al - 1;
4650 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4651 preval = al;
4652 else
4653 preval = 1;
4654 if (ptr)
4656 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4657 ptr = OMP_CLAUSE_DECL (c);
4658 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4659 size_binop (MULT_EXPR, cnt,
4660 TYPE_SIZE_UNIT (pointee_type)));
4664 if (ptr == NULL_TREE)
4666 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4667 if (sz)
4668 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4669 return eltsz;
4671 else
4672 return ptr;
4675 /* Return the last _looptemp_ clause if one has been created for
4676 lastprivate on distribute parallel for{, simd} or taskloop.
4677 FD is the loop data and INNERC should be the second _looptemp_
4678 clause (the one holding the end of the range).
4679 This is followed by collapse - 1 _looptemp_ clauses for the
4680 counts[1] and up, and for triangular loops followed by 4
4681 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4682 one factor and one adjn1). After this there is optionally one
4683 _looptemp_ clause that this function returns. */
4685 static tree
4686 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4688 gcc_assert (innerc);
4689 int count = fd->collapse - 1;
4690 if (fd->non_rect
4691 && fd->last_nonrect == fd->first_nonrect + 1
4692 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4693 count += 4;
4694 for (int i = 0; i < count; i++)
4696 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4697 OMP_CLAUSE__LOOPTEMP_);
4698 gcc_assert (innerc);
4700 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4701 OMP_CLAUSE__LOOPTEMP_);
4704 /* A subroutine of expand_omp_for. Generate code for a parallel
4705 loop with static schedule and no specified chunk size. Given
4706 parameters:
4708 for (V = N1; V cond N2; V += STEP) BODY;
4710 where COND is "<" or ">", we generate pseudocode
4712 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4713 if (cond is <)
4714 adj = STEP - 1;
4715 else
4716 adj = STEP + 1;
4717 if ((__typeof (V)) -1 > 0 && cond is >)
4718 n = -(adj + N2 - N1) / -STEP;
4719 else
4720 n = (adj + N2 - N1) / STEP;
4721 q = n / nthreads;
4722 tt = n % nthreads;
4723 if (threadid < tt) goto L3; else goto L4;
4725 tt = 0;
4726 q = q + 1;
4728 s0 = q * threadid + tt;
4729 e0 = s0 + q;
4730 V = s0 * STEP + N1;
4731 if (s0 >= e0) goto L2; else goto L0;
4733 e = e0 * STEP + N1;
4735 BODY;
4736 V += STEP;
4737 if (V cond e) goto L1;
4741 static void
4742 expand_omp_for_static_nochunk (struct omp_region *region,
4743 struct omp_for_data *fd,
4744 gimple *inner_stmt)
4746 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4747 tree type, itype, vmain, vback;
4748 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4749 basic_block body_bb, cont_bb, collapse_bb = NULL;
4750 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4751 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4752 gimple_stmt_iterator gsi, gsip;
4753 edge ep;
4754 bool broken_loop = region->cont == NULL;
4755 tree *counts = NULL;
4756 tree n1, n2, step;
4757 tree reductions = NULL_TREE;
4758 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4760 itype = type = TREE_TYPE (fd->loop.v);
4761 if (POINTER_TYPE_P (type))
4762 itype = signed_type_for (type);
4764 entry_bb = region->entry;
4765 cont_bb = region->cont;
4766 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4767 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4768 gcc_assert (broken_loop
4769 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4770 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4771 body_bb = single_succ (seq_start_bb);
4772 if (!broken_loop)
4774 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4775 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4776 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4778 exit_bb = region->exit;
4780 /* Iteration space partitioning goes in ENTRY_BB. */
4781 gsi = gsi_last_nondebug_bb (entry_bb);
4782 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4783 gsip = gsi;
4784 gsi_prev (&gsip);
4786 if (fd->collapse > 1)
4788 int first_zero_iter = -1, dummy = -1;
4789 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4791 counts = XALLOCAVEC (tree, fd->collapse);
4792 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4793 fin_bb, first_zero_iter,
4794 dummy_bb, dummy, l2_dom_bb);
4795 t = NULL_TREE;
4797 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4798 t = integer_one_node;
4799 else
4800 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4801 fold_convert (type, fd->loop.n1),
4802 fold_convert (type, fd->loop.n2));
4803 if (fd->collapse == 1
4804 && TYPE_UNSIGNED (type)
4805 && (t == NULL_TREE || !integer_onep (t)))
4807 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4808 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4809 true, GSI_SAME_STMT);
4810 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4811 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4812 true, GSI_SAME_STMT);
4813 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4814 NULL_TREE, NULL_TREE);
4815 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4816 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4817 expand_omp_regimplify_p, NULL, NULL)
4818 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4819 expand_omp_regimplify_p, NULL, NULL))
4821 gsi = gsi_for_stmt (cond_stmt);
4822 gimple_regimplify_operands (cond_stmt, &gsi);
4824 ep = split_block (entry_bb, cond_stmt);
4825 ep->flags = EDGE_TRUE_VALUE;
4826 entry_bb = ep->dest;
4827 ep->probability = profile_probability::very_likely ();
4828 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4829 ep->probability = profile_probability::very_unlikely ();
4830 if (gimple_in_ssa_p (cfun))
4832 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4833 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4834 !gsi_end_p (gpi); gsi_next (&gpi))
4836 gphi *phi = gpi.phi ();
4837 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4838 ep, UNKNOWN_LOCATION);
4841 gsi = gsi_last_bb (entry_bb);
4844 if (fd->lastprivate_conditional)
4846 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4847 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4848 if (fd->have_pointer_condtemp)
4849 condtemp = OMP_CLAUSE_DECL (c);
4850 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4851 cond_var = OMP_CLAUSE_DECL (c);
4853 if (fd->have_reductemp
4854 /* For scan, we don't want to reinitialize condtemp before the
4855 second loop. */
4856 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4857 || fd->have_nonctrl_scantemp)
4859 tree t1 = build_int_cst (long_integer_type_node, 0);
4860 tree t2 = build_int_cst (long_integer_type_node, 1);
4861 tree t3 = build_int_cstu (long_integer_type_node,
4862 (HOST_WIDE_INT_1U << 31) + 1);
4863 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4864 gimple_stmt_iterator gsi2 = gsi_none ();
4865 gimple *g = NULL;
4866 tree mem = null_pointer_node, memv = NULL_TREE;
4867 unsigned HOST_WIDE_INT condtemp_sz = 0;
4868 unsigned HOST_WIDE_INT alloc_align = 0;
4869 if (fd->have_reductemp)
4871 gcc_assert (!fd->have_nonctrl_scantemp);
4872 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4873 reductions = OMP_CLAUSE_DECL (c);
4874 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4875 g = SSA_NAME_DEF_STMT (reductions);
4876 reductions = gimple_assign_rhs1 (g);
4877 OMP_CLAUSE_DECL (c) = reductions;
4878 gsi2 = gsi_for_stmt (g);
4880 else
4882 if (gsi_end_p (gsip))
4883 gsi2 = gsi_after_labels (region->entry);
4884 else
4885 gsi2 = gsip;
4886 reductions = null_pointer_node;
4888 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4890 tree type;
4891 if (fd->have_pointer_condtemp)
4892 type = TREE_TYPE (condtemp);
4893 else
4894 type = ptr_type_node;
4895 memv = create_tmp_var (type);
4896 TREE_ADDRESSABLE (memv) = 1;
4897 unsigned HOST_WIDE_INT sz = 0;
4898 tree size = NULL_TREE;
4899 if (fd->have_pointer_condtemp)
4901 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4902 sz *= fd->lastprivate_conditional;
4903 condtemp_sz = sz;
4905 if (fd->have_nonctrl_scantemp)
4907 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4908 gimple *g = gimple_build_call (nthreads, 0);
4909 nthreads = create_tmp_var (integer_type_node);
4910 gimple_call_set_lhs (g, nthreads);
4911 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4912 nthreads = fold_convert (sizetype, nthreads);
4913 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4914 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4915 alloc_align, nthreads, NULL,
4916 false);
4917 size = fold_convert (type, size);
4919 else
4920 size = build_int_cst (type, sz);
4921 expand_omp_build_assign (&gsi2, memv, size, false);
4922 mem = build_fold_addr_expr (memv);
4924 tree t
4925 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4926 9, t1, t2, t2, t3, t1, null_pointer_node,
4927 null_pointer_node, reductions, mem);
4928 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4929 true, GSI_SAME_STMT);
4930 if (fd->have_pointer_condtemp)
4931 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4932 if (fd->have_nonctrl_scantemp)
4934 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4935 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4936 alloc_align, nthreads, &gsi2, false);
4938 if (fd->have_reductemp)
4940 gsi_remove (&gsi2, true);
4941 release_ssa_name (gimple_assign_lhs (g));
4944 switch (gimple_omp_for_kind (fd->for_stmt))
4946 case GF_OMP_FOR_KIND_FOR:
4947 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4948 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4949 break;
4950 case GF_OMP_FOR_KIND_DISTRIBUTE:
4951 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4952 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4953 break;
4954 default:
4955 gcc_unreachable ();
4957 nthreads = build_call_expr (nthreads, 0);
4958 nthreads = fold_convert (itype, nthreads);
4959 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4960 true, GSI_SAME_STMT);
4961 threadid = build_call_expr (threadid, 0);
4962 threadid = fold_convert (itype, threadid);
4963 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4964 true, GSI_SAME_STMT);
4966 n1 = fd->loop.n1;
4967 n2 = fd->loop.n2;
4968 step = fd->loop.step;
4969 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4971 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4972 OMP_CLAUSE__LOOPTEMP_);
4973 gcc_assert (innerc);
4974 n1 = OMP_CLAUSE_DECL (innerc);
4975 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4976 OMP_CLAUSE__LOOPTEMP_);
4977 gcc_assert (innerc);
4978 n2 = OMP_CLAUSE_DECL (innerc);
4980 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4981 true, NULL_TREE, true, GSI_SAME_STMT);
4982 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4983 true, NULL_TREE, true, GSI_SAME_STMT);
4984 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4985 true, NULL_TREE, true, GSI_SAME_STMT);
4987 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4988 t = fold_build2 (PLUS_EXPR, itype, step, t);
4989 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4990 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4991 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4992 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4993 fold_build1 (NEGATE_EXPR, itype, t),
4994 fold_build1 (NEGATE_EXPR, itype, step));
4995 else
4996 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4997 t = fold_convert (itype, t);
4998 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5000 q = create_tmp_reg (itype, "q");
5001 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5002 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5003 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5005 tt = create_tmp_reg (itype, "tt");
5006 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5007 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5008 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5010 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5011 gcond *cond_stmt = gimple_build_cond_empty (t);
5012 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5014 second_bb = split_block (entry_bb, cond_stmt)->dest;
5015 gsi = gsi_last_nondebug_bb (second_bb);
5016 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5018 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5019 GSI_SAME_STMT);
5020 gassign *assign_stmt
5021 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5022 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5024 third_bb = split_block (second_bb, assign_stmt)->dest;
5025 gsi = gsi_last_nondebug_bb (third_bb);
5026 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5028 if (fd->have_nonctrl_scantemp)
5030 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5031 tree controlp = NULL_TREE, controlb = NULL_TREE;
5032 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5033 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5034 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5036 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5037 controlb = OMP_CLAUSE_DECL (c);
5038 else
5039 controlp = OMP_CLAUSE_DECL (c);
5040 if (controlb && controlp)
5041 break;
5043 gcc_assert (controlp && controlb);
5044 tree cnt = create_tmp_var (sizetype);
5045 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5046 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5047 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5048 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5049 alloc_align, cnt, NULL, true);
5050 tree size = create_tmp_var (sizetype);
5051 expand_omp_build_assign (&gsi, size, sz, false);
5052 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5053 size, size_int (16384));
5054 expand_omp_build_assign (&gsi, controlb, cmp);
5055 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5056 NULL_TREE, NULL_TREE);
5057 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5058 fourth_bb = split_block (third_bb, g)->dest;
5059 gsi = gsi_last_nondebug_bb (fourth_bb);
5060 /* FIXME: Once we have allocators, this should use allocator. */
5061 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5062 gimple_call_set_lhs (g, controlp);
5063 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5064 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5065 &gsi, true);
5066 gsi_prev (&gsi);
5067 g = gsi_stmt (gsi);
5068 fifth_bb = split_block (fourth_bb, g)->dest;
5069 gsi = gsi_last_nondebug_bb (fifth_bb);
5071 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5072 gimple_call_set_lhs (g, controlp);
5073 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5074 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5075 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5076 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5077 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5079 tree tmp = create_tmp_var (sizetype);
5080 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5081 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5082 TYPE_SIZE_UNIT (pointee_type));
5083 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5084 g = gimple_build_call (alloca_decl, 2, tmp,
5085 size_int (TYPE_ALIGN (pointee_type)));
5086 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5087 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5090 sixth_bb = split_block (fifth_bb, g)->dest;
5091 gsi = gsi_last_nondebug_bb (sixth_bb);
5094 t = build2 (MULT_EXPR, itype, q, threadid);
5095 t = build2 (PLUS_EXPR, itype, t, tt);
5096 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5098 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5099 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5101 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5102 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5104 /* Remove the GIMPLE_OMP_FOR statement. */
5105 gsi_remove (&gsi, true);
5107 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5108 gsi = gsi_start_bb (seq_start_bb);
5110 tree startvar = fd->loop.v;
5111 tree endvar = NULL_TREE;
5113 if (gimple_omp_for_combined_p (fd->for_stmt))
5115 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5116 ? gimple_omp_parallel_clauses (inner_stmt)
5117 : gimple_omp_for_clauses (inner_stmt);
5118 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5119 gcc_assert (innerc);
5120 startvar = OMP_CLAUSE_DECL (innerc);
5121 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5122 OMP_CLAUSE__LOOPTEMP_);
5123 gcc_assert (innerc);
5124 endvar = OMP_CLAUSE_DECL (innerc);
5125 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5126 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5128 innerc = find_lastprivate_looptemp (fd, innerc);
5129 if (innerc)
5131 /* If needed (distribute parallel for with lastprivate),
5132 propagate down the total number of iterations. */
5133 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5134 fd->loop.n2);
5135 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5136 GSI_CONTINUE_LINKING);
5137 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5138 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5142 t = fold_convert (itype, s0);
5143 t = fold_build2 (MULT_EXPR, itype, t, step);
5144 if (POINTER_TYPE_P (type))
5146 t = fold_build_pointer_plus (n1, t);
5147 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5148 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5149 t = fold_convert (signed_type_for (type), t);
5151 else
5152 t = fold_build2 (PLUS_EXPR, type, t, n1);
5153 t = fold_convert (TREE_TYPE (startvar), t);
5154 t = force_gimple_operand_gsi (&gsi, t,
5155 DECL_P (startvar)
5156 && TREE_ADDRESSABLE (startvar),
5157 NULL_TREE, false, GSI_CONTINUE_LINKING);
5158 assign_stmt = gimple_build_assign (startvar, t);
5159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5160 if (cond_var)
5162 tree itype = TREE_TYPE (cond_var);
5163 /* For lastprivate(conditional:) itervar, we need some iteration
5164 counter that starts at unsigned non-zero and increases.
5165 Prefer as few IVs as possible, so if we can use startvar
5166 itself, use that, or startvar + constant (those would be
5167 incremented with step), and as last resort use the s0 + 1
5168 incremented by 1. */
5169 if (POINTER_TYPE_P (type)
5170 || TREE_CODE (n1) != INTEGER_CST
5171 || fd->loop.cond_code != LT_EXPR)
5172 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5173 build_int_cst (itype, 1));
5174 else if (tree_int_cst_sgn (n1) == 1)
5175 t = fold_convert (itype, t);
5176 else
5178 tree c = fold_convert (itype, n1);
5179 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5180 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5182 t = force_gimple_operand_gsi (&gsi, t, false,
5183 NULL_TREE, false, GSI_CONTINUE_LINKING);
5184 assign_stmt = gimple_build_assign (cond_var, t);
5185 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5188 t = fold_convert (itype, e0);
5189 t = fold_build2 (MULT_EXPR, itype, t, step);
5190 if (POINTER_TYPE_P (type))
5192 t = fold_build_pointer_plus (n1, t);
5193 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5194 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5195 t = fold_convert (signed_type_for (type), t);
5197 else
5198 t = fold_build2 (PLUS_EXPR, type, t, n1);
5199 t = fold_convert (TREE_TYPE (startvar), t);
5200 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5201 false, GSI_CONTINUE_LINKING);
5202 if (endvar)
5204 assign_stmt = gimple_build_assign (endvar, e);
5205 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5206 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5207 assign_stmt = gimple_build_assign (fd->loop.v, e);
5208 else
5209 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5210 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5212 /* Handle linear clause adjustments. */
5213 tree itercnt = NULL_TREE;
5214 tree *nonrect_bounds = NULL;
5215 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5216 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5217 c; c = OMP_CLAUSE_CHAIN (c))
5218 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5219 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5221 tree d = OMP_CLAUSE_DECL (c);
5222 bool is_ref = omp_is_reference (d);
5223 tree t = d, a, dest;
5224 if (is_ref)
5225 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5226 if (itercnt == NULL_TREE)
5228 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5230 itercnt = fold_build2 (MINUS_EXPR, itype,
5231 fold_convert (itype, n1),
5232 fold_convert (itype, fd->loop.n1));
5233 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5234 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5235 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5236 NULL_TREE, false,
5237 GSI_CONTINUE_LINKING);
5239 else
5240 itercnt = s0;
5242 tree type = TREE_TYPE (t);
5243 if (POINTER_TYPE_P (type))
5244 type = sizetype;
5245 a = fold_build2 (MULT_EXPR, type,
5246 fold_convert (type, itercnt),
5247 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5248 dest = unshare_expr (t);
5249 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5250 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5251 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5252 false, GSI_CONTINUE_LINKING);
5253 assign_stmt = gimple_build_assign (dest, t);
5254 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5256 if (fd->collapse > 1)
5258 if (fd->non_rect)
5260 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5261 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5263 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5264 startvar);
5267 if (!broken_loop)
5269 /* The code controlling the sequential loop replaces the
5270 GIMPLE_OMP_CONTINUE. */
5271 gsi = gsi_last_nondebug_bb (cont_bb);
5272 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5273 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5274 vmain = gimple_omp_continue_control_use (cont_stmt);
5275 vback = gimple_omp_continue_control_def (cont_stmt);
5277 if (cond_var)
5279 tree itype = TREE_TYPE (cond_var);
5280 tree t2;
5281 if (POINTER_TYPE_P (type)
5282 || TREE_CODE (n1) != INTEGER_CST
5283 || fd->loop.cond_code != LT_EXPR)
5284 t2 = build_int_cst (itype, 1);
5285 else
5286 t2 = fold_convert (itype, step);
5287 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5288 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5289 NULL_TREE, true, GSI_SAME_STMT);
5290 assign_stmt = gimple_build_assign (cond_var, t2);
5291 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5294 if (!gimple_omp_for_combined_p (fd->for_stmt))
5296 if (POINTER_TYPE_P (type))
5297 t = fold_build_pointer_plus (vmain, step);
5298 else
5299 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5300 t = force_gimple_operand_gsi (&gsi, t,
5301 DECL_P (vback)
5302 && TREE_ADDRESSABLE (vback),
5303 NULL_TREE, true, GSI_SAME_STMT);
5304 assign_stmt = gimple_build_assign (vback, t);
5305 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5307 t = build2 (fd->loop.cond_code, boolean_type_node,
5308 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5309 ? t : vback, e);
5310 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5313 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5314 gsi_remove (&gsi, true);
5316 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5317 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5318 cont_bb, body_bb);
5321 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5322 gsi = gsi_last_nondebug_bb (exit_bb);
5323 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5325 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5326 if (fd->have_reductemp
5327 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5328 && !fd->have_nonctrl_scantemp))
5330 tree fn;
5331 if (t)
5332 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5333 else
5334 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5335 gcall *g = gimple_build_call (fn, 0);
5336 if (t)
5338 gimple_call_set_lhs (g, t);
5339 if (fd->have_reductemp)
5340 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5341 NOP_EXPR, t),
5342 GSI_SAME_STMT);
5344 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5346 else
5347 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5349 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5350 && !fd->have_nonctrl_scantemp)
5352 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5353 gcall *g = gimple_build_call (fn, 0);
5354 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5356 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5358 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5359 tree controlp = NULL_TREE, controlb = NULL_TREE;
5360 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5361 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5362 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5364 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5365 controlb = OMP_CLAUSE_DECL (c);
5366 else
5367 controlp = OMP_CLAUSE_DECL (c);
5368 if (controlb && controlp)
5369 break;
5371 gcc_assert (controlp && controlb);
5372 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5373 NULL_TREE, NULL_TREE);
5374 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5375 exit1_bb = split_block (exit_bb, g)->dest;
5376 gsi = gsi_after_labels (exit1_bb);
5377 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5378 controlp);
5379 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5380 exit2_bb = split_block (exit1_bb, g)->dest;
5381 gsi = gsi_after_labels (exit2_bb);
5382 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5383 controlp);
5384 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5385 exit3_bb = split_block (exit2_bb, g)->dest;
5386 gsi = gsi_after_labels (exit3_bb);
5388 gsi_remove (&gsi, true);
5390 /* Connect all the blocks. */
5391 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5392 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5393 ep = find_edge (entry_bb, second_bb);
5394 ep->flags = EDGE_TRUE_VALUE;
5395 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5396 if (fourth_bb)
5398 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5399 ep->probability
5400 = profile_probability::guessed_always ().apply_scale (1, 2);
5401 ep = find_edge (third_bb, fourth_bb);
5402 ep->flags = EDGE_TRUE_VALUE;
5403 ep->probability
5404 = profile_probability::guessed_always ().apply_scale (1, 2);
5405 ep = find_edge (fourth_bb, fifth_bb);
5406 redirect_edge_and_branch (ep, sixth_bb);
5408 else
5409 sixth_bb = third_bb;
5410 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5411 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5412 if (exit1_bb)
5414 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5415 ep->probability
5416 = profile_probability::guessed_always ().apply_scale (1, 2);
5417 ep = find_edge (exit_bb, exit1_bb);
5418 ep->flags = EDGE_TRUE_VALUE;
5419 ep->probability
5420 = profile_probability::guessed_always ().apply_scale (1, 2);
5421 ep = find_edge (exit1_bb, exit2_bb);
5422 redirect_edge_and_branch (ep, exit3_bb);
5425 if (!broken_loop)
5427 ep = find_edge (cont_bb, body_bb);
5428 if (ep == NULL)
5430 ep = BRANCH_EDGE (cont_bb);
5431 gcc_assert (single_succ (ep->dest) == body_bb);
5433 if (gimple_omp_for_combined_p (fd->for_stmt))
5435 remove_edge (ep);
5436 ep = NULL;
5438 else if (fd->collapse > 1)
5440 remove_edge (ep);
5441 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5443 else
5444 ep->flags = EDGE_TRUE_VALUE;
5445 find_edge (cont_bb, fin_bb)->flags
5446 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5449 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5450 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5451 if (fourth_bb)
5453 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5454 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5456 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5458 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5459 recompute_dominator (CDI_DOMINATORS, body_bb));
5460 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5461 recompute_dominator (CDI_DOMINATORS, fin_bb));
5462 if (exit1_bb)
5464 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5465 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5468 class loop *loop = body_bb->loop_father;
5469 if (loop != entry_bb->loop_father)
5471 gcc_assert (broken_loop || loop->header == body_bb);
5472 gcc_assert (broken_loop
5473 || loop->latch == region->cont
5474 || single_pred (loop->latch) == region->cont);
5475 return;
5478 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5480 loop = alloc_loop ();
5481 loop->header = body_bb;
5482 if (collapse_bb == NULL)
5483 loop->latch = cont_bb;
5484 add_loop (loop, body_bb->loop_father);
5488 /* Return phi in E->DEST with ARG on edge E. */
5490 static gphi *
5491 find_phi_with_arg_on_edge (tree arg, edge e)
5493 basic_block bb = e->dest;
5495 for (gphi_iterator gpi = gsi_start_phis (bb);
5496 !gsi_end_p (gpi);
5497 gsi_next (&gpi))
5499 gphi *phi = gpi.phi ();
5500 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5501 return phi;
5504 return NULL;
5507 /* A subroutine of expand_omp_for. Generate code for a parallel
5508 loop with static schedule and a specified chunk size. Given
5509 parameters:
5511 for (V = N1; V cond N2; V += STEP) BODY;
5513 where COND is "<" or ">", we generate pseudocode
5515 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5516 if (cond is <)
5517 adj = STEP - 1;
5518 else
5519 adj = STEP + 1;
5520 if ((__typeof (V)) -1 > 0 && cond is >)
5521 n = -(adj + N2 - N1) / -STEP;
5522 else
5523 n = (adj + N2 - N1) / STEP;
5524 trip = 0;
5525 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5526 here so that V is defined
5527 if the loop is not entered
5529 s0 = (trip * nthreads + threadid) * CHUNK;
5530 e0 = min (s0 + CHUNK, n);
5531 if (s0 < n) goto L1; else goto L4;
5533 V = s0 * STEP + N1;
5534 e = e0 * STEP + N1;
5536 BODY;
5537 V += STEP;
5538 if (V cond e) goto L2; else goto L3;
5540 trip += 1;
5541 goto L0;
5545 static void
5546 expand_omp_for_static_chunk (struct omp_region *region,
5547 struct omp_for_data *fd, gimple *inner_stmt)
5549 tree n, s0, e0, e, t;
5550 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5551 tree type, itype, vmain, vback, vextra;
5552 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5553 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5554 gimple_stmt_iterator gsi, gsip;
5555 edge se;
5556 bool broken_loop = region->cont == NULL;
5557 tree *counts = NULL;
5558 tree n1, n2, step;
5559 tree reductions = NULL_TREE;
5560 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5562 itype = type = TREE_TYPE (fd->loop.v);
5563 if (POINTER_TYPE_P (type))
5564 itype = signed_type_for (type);
5566 entry_bb = region->entry;
5567 se = split_block (entry_bb, last_stmt (entry_bb));
5568 entry_bb = se->src;
5569 iter_part_bb = se->dest;
5570 cont_bb = region->cont;
5571 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5572 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5573 gcc_assert (broken_loop
5574 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5575 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5576 body_bb = single_succ (seq_start_bb);
5577 if (!broken_loop)
5579 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5580 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5581 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5582 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5584 exit_bb = region->exit;
5586 /* Trip and adjustment setup goes in ENTRY_BB. */
5587 gsi = gsi_last_nondebug_bb (entry_bb);
5588 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5589 gsip = gsi;
5590 gsi_prev (&gsip);
5592 if (fd->collapse > 1)
5594 int first_zero_iter = -1, dummy = -1;
5595 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5597 counts = XALLOCAVEC (tree, fd->collapse);
5598 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5599 fin_bb, first_zero_iter,
5600 dummy_bb, dummy, l2_dom_bb);
5601 t = NULL_TREE;
5603 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5604 t = integer_one_node;
5605 else
5606 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5607 fold_convert (type, fd->loop.n1),
5608 fold_convert (type, fd->loop.n2));
5609 if (fd->collapse == 1
5610 && TYPE_UNSIGNED (type)
5611 && (t == NULL_TREE || !integer_onep (t)))
5613 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5614 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5615 true, GSI_SAME_STMT);
5616 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5617 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5618 true, GSI_SAME_STMT);
5619 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5620 NULL_TREE, NULL_TREE);
5621 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5622 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5623 expand_omp_regimplify_p, NULL, NULL)
5624 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5625 expand_omp_regimplify_p, NULL, NULL))
5627 gsi = gsi_for_stmt (cond_stmt);
5628 gimple_regimplify_operands (cond_stmt, &gsi);
5630 se = split_block (entry_bb, cond_stmt);
5631 se->flags = EDGE_TRUE_VALUE;
5632 entry_bb = se->dest;
5633 se->probability = profile_probability::very_likely ();
5634 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5635 se->probability = profile_probability::very_unlikely ();
5636 if (gimple_in_ssa_p (cfun))
5638 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5639 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5640 !gsi_end_p (gpi); gsi_next (&gpi))
5642 gphi *phi = gpi.phi ();
5643 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5644 se, UNKNOWN_LOCATION);
5647 gsi = gsi_last_bb (entry_bb);
5650 if (fd->lastprivate_conditional)
5652 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5653 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5654 if (fd->have_pointer_condtemp)
5655 condtemp = OMP_CLAUSE_DECL (c);
5656 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5657 cond_var = OMP_CLAUSE_DECL (c);
5659 if (fd->have_reductemp || fd->have_pointer_condtemp)
5661 tree t1 = build_int_cst (long_integer_type_node, 0);
5662 tree t2 = build_int_cst (long_integer_type_node, 1);
5663 tree t3 = build_int_cstu (long_integer_type_node,
5664 (HOST_WIDE_INT_1U << 31) + 1);
5665 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5666 gimple_stmt_iterator gsi2 = gsi_none ();
5667 gimple *g = NULL;
5668 tree mem = null_pointer_node, memv = NULL_TREE;
5669 if (fd->have_reductemp)
5671 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5672 reductions = OMP_CLAUSE_DECL (c);
5673 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5674 g = SSA_NAME_DEF_STMT (reductions);
5675 reductions = gimple_assign_rhs1 (g);
5676 OMP_CLAUSE_DECL (c) = reductions;
5677 gsi2 = gsi_for_stmt (g);
5679 else
5681 if (gsi_end_p (gsip))
5682 gsi2 = gsi_after_labels (region->entry);
5683 else
5684 gsi2 = gsip;
5685 reductions = null_pointer_node;
5687 if (fd->have_pointer_condtemp)
5689 tree type = TREE_TYPE (condtemp);
5690 memv = create_tmp_var (type);
5691 TREE_ADDRESSABLE (memv) = 1;
5692 unsigned HOST_WIDE_INT sz
5693 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5694 sz *= fd->lastprivate_conditional;
5695 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5696 false);
5697 mem = build_fold_addr_expr (memv);
5699 tree t
5700 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5701 9, t1, t2, t2, t3, t1, null_pointer_node,
5702 null_pointer_node, reductions, mem);
5703 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5704 true, GSI_SAME_STMT);
5705 if (fd->have_pointer_condtemp)
5706 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5707 if (fd->have_reductemp)
5709 gsi_remove (&gsi2, true);
5710 release_ssa_name (gimple_assign_lhs (g));
5713 switch (gimple_omp_for_kind (fd->for_stmt))
5715 case GF_OMP_FOR_KIND_FOR:
5716 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5717 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5718 break;
5719 case GF_OMP_FOR_KIND_DISTRIBUTE:
5720 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5721 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5722 break;
5723 default:
5724 gcc_unreachable ();
5726 nthreads = build_call_expr (nthreads, 0);
5727 nthreads = fold_convert (itype, nthreads);
5728 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5729 true, GSI_SAME_STMT);
5730 threadid = build_call_expr (threadid, 0);
5731 threadid = fold_convert (itype, threadid);
5732 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5733 true, GSI_SAME_STMT);
5735 n1 = fd->loop.n1;
5736 n2 = fd->loop.n2;
5737 step = fd->loop.step;
5738 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5740 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5741 OMP_CLAUSE__LOOPTEMP_);
5742 gcc_assert (innerc);
5743 n1 = OMP_CLAUSE_DECL (innerc);
5744 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5745 OMP_CLAUSE__LOOPTEMP_);
5746 gcc_assert (innerc);
5747 n2 = OMP_CLAUSE_DECL (innerc);
5749 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5750 true, NULL_TREE, true, GSI_SAME_STMT);
5751 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5752 true, NULL_TREE, true, GSI_SAME_STMT);
5753 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5754 true, NULL_TREE, true, GSI_SAME_STMT);
5755 tree chunk_size = fold_convert (itype, fd->chunk_size);
5756 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5757 chunk_size
5758 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5759 GSI_SAME_STMT);
5761 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5762 t = fold_build2 (PLUS_EXPR, itype, step, t);
5763 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5764 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5765 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5766 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5767 fold_build1 (NEGATE_EXPR, itype, t),
5768 fold_build1 (NEGATE_EXPR, itype, step));
5769 else
5770 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5771 t = fold_convert (itype, t);
5772 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5773 true, GSI_SAME_STMT);
5775 trip_var = create_tmp_reg (itype, ".trip");
5776 if (gimple_in_ssa_p (cfun))
5778 trip_init = make_ssa_name (trip_var);
5779 trip_main = make_ssa_name (trip_var);
5780 trip_back = make_ssa_name (trip_var);
5782 else
5784 trip_init = trip_var;
5785 trip_main = trip_var;
5786 trip_back = trip_var;
5789 gassign *assign_stmt
5790 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5791 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5793 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5794 t = fold_build2 (MULT_EXPR, itype, t, step);
5795 if (POINTER_TYPE_P (type))
5796 t = fold_build_pointer_plus (n1, t);
5797 else
5798 t = fold_build2 (PLUS_EXPR, type, t, n1);
5799 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5800 true, GSI_SAME_STMT);
5802 /* Remove the GIMPLE_OMP_FOR. */
5803 gsi_remove (&gsi, true);
5805 gimple_stmt_iterator gsif = gsi;
5807 /* Iteration space partitioning goes in ITER_PART_BB. */
5808 gsi = gsi_last_bb (iter_part_bb);
5810 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5811 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5812 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5813 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5814 false, GSI_CONTINUE_LINKING);
5816 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5817 t = fold_build2 (MIN_EXPR, itype, t, n);
5818 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5819 false, GSI_CONTINUE_LINKING);
5821 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5822 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5824 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5825 gsi = gsi_start_bb (seq_start_bb);
5827 tree startvar = fd->loop.v;
5828 tree endvar = NULL_TREE;
5830 if (gimple_omp_for_combined_p (fd->for_stmt))
5832 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5833 ? gimple_omp_parallel_clauses (inner_stmt)
5834 : gimple_omp_for_clauses (inner_stmt);
5835 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5836 gcc_assert (innerc);
5837 startvar = OMP_CLAUSE_DECL (innerc);
5838 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5839 OMP_CLAUSE__LOOPTEMP_);
5840 gcc_assert (innerc);
5841 endvar = OMP_CLAUSE_DECL (innerc);
5842 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5843 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5845 innerc = find_lastprivate_looptemp (fd, innerc);
5846 if (innerc)
5848 /* If needed (distribute parallel for with lastprivate),
5849 propagate down the total number of iterations. */
5850 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5851 fd->loop.n2);
5852 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5853 GSI_CONTINUE_LINKING);
5854 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5855 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5860 t = fold_convert (itype, s0);
5861 t = fold_build2 (MULT_EXPR, itype, t, step);
5862 if (POINTER_TYPE_P (type))
5864 t = fold_build_pointer_plus (n1, t);
5865 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5866 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5867 t = fold_convert (signed_type_for (type), t);
5869 else
5870 t = fold_build2 (PLUS_EXPR, type, t, n1);
5871 t = fold_convert (TREE_TYPE (startvar), t);
5872 t = force_gimple_operand_gsi (&gsi, t,
5873 DECL_P (startvar)
5874 && TREE_ADDRESSABLE (startvar),
5875 NULL_TREE, false, GSI_CONTINUE_LINKING);
5876 assign_stmt = gimple_build_assign (startvar, t);
5877 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5878 if (cond_var)
5880 tree itype = TREE_TYPE (cond_var);
5881 /* For lastprivate(conditional:) itervar, we need some iteration
5882 counter that starts at unsigned non-zero and increases.
5883 Prefer as few IVs as possible, so if we can use startvar
5884 itself, use that, or startvar + constant (those would be
5885 incremented with step), and as last resort use the s0 + 1
5886 incremented by 1. */
5887 if (POINTER_TYPE_P (type)
5888 || TREE_CODE (n1) != INTEGER_CST
5889 || fd->loop.cond_code != LT_EXPR)
5890 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5891 build_int_cst (itype, 1));
5892 else if (tree_int_cst_sgn (n1) == 1)
5893 t = fold_convert (itype, t);
5894 else
5896 tree c = fold_convert (itype, n1);
5897 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5898 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5900 t = force_gimple_operand_gsi (&gsi, t, false,
5901 NULL_TREE, false, GSI_CONTINUE_LINKING);
5902 assign_stmt = gimple_build_assign (cond_var, t);
5903 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5906 t = fold_convert (itype, e0);
5907 t = fold_build2 (MULT_EXPR, itype, t, step);
5908 if (POINTER_TYPE_P (type))
5910 t = fold_build_pointer_plus (n1, t);
5911 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5912 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5913 t = fold_convert (signed_type_for (type), t);
5915 else
5916 t = fold_build2 (PLUS_EXPR, type, t, n1);
5917 t = fold_convert (TREE_TYPE (startvar), t);
5918 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5919 false, GSI_CONTINUE_LINKING);
5920 if (endvar)
5922 assign_stmt = gimple_build_assign (endvar, e);
5923 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5924 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5925 assign_stmt = gimple_build_assign (fd->loop.v, e);
5926 else
5927 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5928 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5930 /* Handle linear clause adjustments. */
5931 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5932 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5933 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5934 c; c = OMP_CLAUSE_CHAIN (c))
5935 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5936 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5938 tree d = OMP_CLAUSE_DECL (c);
5939 bool is_ref = omp_is_reference (d);
5940 tree t = d, a, dest;
5941 if (is_ref)
5942 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5943 tree type = TREE_TYPE (t);
5944 if (POINTER_TYPE_P (type))
5945 type = sizetype;
5946 dest = unshare_expr (t);
5947 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5948 expand_omp_build_assign (&gsif, v, t);
5949 if (itercnt == NULL_TREE)
5951 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5953 itercntbias
5954 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5955 fold_convert (itype, fd->loop.n1));
5956 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5957 itercntbias, step);
5958 itercntbias
5959 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5960 NULL_TREE, true,
5961 GSI_SAME_STMT);
5962 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5963 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5964 NULL_TREE, false,
5965 GSI_CONTINUE_LINKING);
5967 else
5968 itercnt = s0;
5970 a = fold_build2 (MULT_EXPR, type,
5971 fold_convert (type, itercnt),
5972 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5973 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5974 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5975 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5976 false, GSI_CONTINUE_LINKING);
5977 assign_stmt = gimple_build_assign (dest, t);
5978 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5980 if (fd->collapse > 1)
5981 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5983 if (!broken_loop)
5985 /* The code controlling the sequential loop goes in CONT_BB,
5986 replacing the GIMPLE_OMP_CONTINUE. */
5987 gsi = gsi_last_nondebug_bb (cont_bb);
5988 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5989 vmain = gimple_omp_continue_control_use (cont_stmt);
5990 vback = gimple_omp_continue_control_def (cont_stmt);
5992 if (cond_var)
5994 tree itype = TREE_TYPE (cond_var);
5995 tree t2;
5996 if (POINTER_TYPE_P (type)
5997 || TREE_CODE (n1) != INTEGER_CST
5998 || fd->loop.cond_code != LT_EXPR)
5999 t2 = build_int_cst (itype, 1);
6000 else
6001 t2 = fold_convert (itype, step);
6002 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6003 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6004 NULL_TREE, true, GSI_SAME_STMT);
6005 assign_stmt = gimple_build_assign (cond_var, t2);
6006 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6009 if (!gimple_omp_for_combined_p (fd->for_stmt))
6011 if (POINTER_TYPE_P (type))
6012 t = fold_build_pointer_plus (vmain, step);
6013 else
6014 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6015 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6017 true, GSI_SAME_STMT);
6018 assign_stmt = gimple_build_assign (vback, t);
6019 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6021 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6022 t = build2 (EQ_EXPR, boolean_type_node,
6023 build_int_cst (itype, 0),
6024 build_int_cst (itype, 1));
6025 else
6026 t = build2 (fd->loop.cond_code, boolean_type_node,
6027 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6028 ? t : vback, e);
6029 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6032 /* Remove GIMPLE_OMP_CONTINUE. */
6033 gsi_remove (&gsi, true);
6035 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6036 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6038 /* Trip update code goes into TRIP_UPDATE_BB. */
6039 gsi = gsi_start_bb (trip_update_bb);
6041 t = build_int_cst (itype, 1);
6042 t = build2 (PLUS_EXPR, itype, trip_main, t);
6043 assign_stmt = gimple_build_assign (trip_back, t);
6044 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6047 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6048 gsi = gsi_last_nondebug_bb (exit_bb);
6049 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6051 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6052 if (fd->have_reductemp || fd->have_pointer_condtemp)
6054 tree fn;
6055 if (t)
6056 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6057 else
6058 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6059 gcall *g = gimple_build_call (fn, 0);
6060 if (t)
6062 gimple_call_set_lhs (g, t);
6063 if (fd->have_reductemp)
6064 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6065 NOP_EXPR, t),
6066 GSI_SAME_STMT);
6068 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6070 else
6071 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6073 else if (fd->have_pointer_condtemp)
6075 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6076 gcall *g = gimple_build_call (fn, 0);
6077 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6079 gsi_remove (&gsi, true);
6081 /* Connect the new blocks. */
6082 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6083 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6085 if (!broken_loop)
6087 se = find_edge (cont_bb, body_bb);
6088 if (se == NULL)
6090 se = BRANCH_EDGE (cont_bb);
6091 gcc_assert (single_succ (se->dest) == body_bb);
6093 if (gimple_omp_for_combined_p (fd->for_stmt))
6095 remove_edge (se);
6096 se = NULL;
6098 else if (fd->collapse > 1)
6100 remove_edge (se);
6101 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6103 else
6104 se->flags = EDGE_TRUE_VALUE;
6105 find_edge (cont_bb, trip_update_bb)->flags
6106 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6108 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6109 iter_part_bb);
6112 if (gimple_in_ssa_p (cfun))
6114 gphi_iterator psi;
6115 gphi *phi;
6116 edge re, ene;
6117 edge_var_map *vm;
6118 size_t i;
6120 gcc_assert (fd->collapse == 1 && !broken_loop);
6122 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6123 remove arguments of the phi nodes in fin_bb. We need to create
6124 appropriate phi nodes in iter_part_bb instead. */
6125 se = find_edge (iter_part_bb, fin_bb);
6126 re = single_succ_edge (trip_update_bb);
6127 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6128 ene = single_succ_edge (entry_bb);
6130 psi = gsi_start_phis (fin_bb);
6131 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6132 gsi_next (&psi), ++i)
6134 gphi *nphi;
6135 location_t locus;
6137 phi = psi.phi ();
6138 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6139 redirect_edge_var_map_def (vm), 0))
6140 continue;
6142 t = gimple_phi_result (phi);
6143 gcc_assert (t == redirect_edge_var_map_result (vm));
6145 if (!single_pred_p (fin_bb))
6146 t = copy_ssa_name (t, phi);
6148 nphi = create_phi_node (t, iter_part_bb);
6150 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6151 locus = gimple_phi_arg_location_from_edge (phi, se);
6153 /* A special case -- fd->loop.v is not yet computed in
6154 iter_part_bb, we need to use vextra instead. */
6155 if (t == fd->loop.v)
6156 t = vextra;
6157 add_phi_arg (nphi, t, ene, locus);
6158 locus = redirect_edge_var_map_location (vm);
6159 tree back_arg = redirect_edge_var_map_def (vm);
6160 add_phi_arg (nphi, back_arg, re, locus);
6161 edge ce = find_edge (cont_bb, body_bb);
6162 if (ce == NULL)
6164 ce = BRANCH_EDGE (cont_bb);
6165 gcc_assert (single_succ (ce->dest) == body_bb);
6166 ce = single_succ_edge (ce->dest);
6168 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6169 gcc_assert (inner_loop_phi != NULL);
6170 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6171 find_edge (seq_start_bb, body_bb), locus);
6173 if (!single_pred_p (fin_bb))
6174 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6176 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6177 redirect_edge_var_map_clear (re);
6178 if (single_pred_p (fin_bb))
6179 while (1)
6181 psi = gsi_start_phis (fin_bb);
6182 if (gsi_end_p (psi))
6183 break;
6184 remove_phi_node (&psi, false);
6187 /* Make phi node for trip. */
6188 phi = create_phi_node (trip_main, iter_part_bb);
6189 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6190 UNKNOWN_LOCATION);
6191 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6192 UNKNOWN_LOCATION);
6195 if (!broken_loop)
6196 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6197 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6198 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6199 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6200 recompute_dominator (CDI_DOMINATORS, fin_bb));
6201 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6202 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6203 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6204 recompute_dominator (CDI_DOMINATORS, body_bb));
6206 if (!broken_loop)
6208 class loop *loop = body_bb->loop_father;
6209 class loop *trip_loop = alloc_loop ();
6210 trip_loop->header = iter_part_bb;
6211 trip_loop->latch = trip_update_bb;
6212 add_loop (trip_loop, iter_part_bb->loop_father);
6214 if (loop != entry_bb->loop_father)
6216 gcc_assert (loop->header == body_bb);
6217 gcc_assert (loop->latch == region->cont
6218 || single_pred (loop->latch) == region->cont);
6219 trip_loop->inner = loop;
6220 return;
6223 if (!gimple_omp_for_combined_p (fd->for_stmt))
6225 loop = alloc_loop ();
6226 loop->header = body_bb;
6227 if (collapse_bb == NULL)
6228 loop->latch = cont_bb;
6229 add_loop (loop, trip_loop);
6234 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6235 loop. Given parameters:
6237 for (V = N1; V cond N2; V += STEP) BODY;
6239 where COND is "<" or ">", we generate pseudocode
6241 V = N1;
6242 goto L1;
6244 BODY;
6245 V += STEP;
6247 if (V cond N2) goto L0; else goto L2;
6250 For collapsed loops, emit the outer loops as scalar
6251 and only try to vectorize the innermost loop. */
6253 static void
6254 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6256 tree type, t;
6257 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6258 gimple_stmt_iterator gsi;
6259 gimple *stmt;
6260 gcond *cond_stmt;
6261 bool broken_loop = region->cont == NULL;
6262 edge e, ne;
6263 tree *counts = NULL;
6264 int i;
6265 int safelen_int = INT_MAX;
6266 bool dont_vectorize = false;
6267 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6268 OMP_CLAUSE_SAFELEN);
6269 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6270 OMP_CLAUSE__SIMDUID_);
6271 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6272 OMP_CLAUSE_IF);
6273 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6274 OMP_CLAUSE_SIMDLEN);
6275 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6276 OMP_CLAUSE__CONDTEMP_);
6277 tree n1, n2;
6278 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6280 if (safelen)
6282 poly_uint64 val;
6283 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6284 if (!poly_int_tree_p (safelen, &val))
6285 safelen_int = 0;
6286 else
6287 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6288 if (safelen_int == 1)
6289 safelen_int = 0;
6291 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6292 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6294 safelen_int = 0;
6295 dont_vectorize = true;
6297 type = TREE_TYPE (fd->loop.v);
6298 entry_bb = region->entry;
6299 cont_bb = region->cont;
6300 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6301 gcc_assert (broken_loop
6302 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6303 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6304 if (!broken_loop)
6306 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6307 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6308 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6309 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6311 else
6313 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6314 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6315 l2_bb = single_succ (l1_bb);
6317 exit_bb = region->exit;
6318 l2_dom_bb = NULL;
6320 gsi = gsi_last_nondebug_bb (entry_bb);
6322 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6323 /* Not needed in SSA form right now. */
6324 gcc_assert (!gimple_in_ssa_p (cfun));
6325 if (fd->collapse > 1
6326 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6327 || broken_loop))
6329 int first_zero_iter = -1, dummy = -1;
6330 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6332 counts = XALLOCAVEC (tree, fd->collapse);
6333 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6334 zero_iter_bb, first_zero_iter,
6335 dummy_bb, dummy, l2_dom_bb);
6337 if (l2_dom_bb == NULL)
6338 l2_dom_bb = l1_bb;
6340 n1 = fd->loop.n1;
6341 n2 = fd->loop.n2;
6342 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6344 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6345 OMP_CLAUSE__LOOPTEMP_);
6346 gcc_assert (innerc);
6347 n1 = OMP_CLAUSE_DECL (innerc);
6348 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6349 OMP_CLAUSE__LOOPTEMP_);
6350 gcc_assert (innerc);
6351 n2 = OMP_CLAUSE_DECL (innerc);
6353 tree step = fd->loop.step;
6355 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6356 OMP_CLAUSE__SIMT_);
6357 if (is_simt)
6359 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6360 is_simt = safelen_int > 1;
6362 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6363 if (is_simt)
6365 simt_lane = create_tmp_var (unsigned_type_node);
6366 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6367 gimple_call_set_lhs (g, simt_lane);
6368 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6369 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6370 fold_convert (TREE_TYPE (step), simt_lane));
6371 n1 = fold_convert (type, n1);
6372 if (POINTER_TYPE_P (type))
6373 n1 = fold_build_pointer_plus (n1, offset);
6374 else
6375 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6377 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6378 if (fd->collapse > 1)
6379 simt_maxlane = build_one_cst (unsigned_type_node);
6380 else if (safelen_int < omp_max_simt_vf ())
6381 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6382 tree vf
6383 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6384 unsigned_type_node, 0);
6385 if (simt_maxlane)
6386 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6387 vf = fold_convert (TREE_TYPE (step), vf);
6388 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6391 tree n2var = NULL_TREE;
6392 tree n2v = NULL_TREE;
6393 tree *nonrect_bounds = NULL;
6394 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6395 if (fd->collapse > 1)
6397 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6399 if (fd->non_rect)
6401 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6402 memset (nonrect_bounds, 0,
6403 sizeof (tree) * (fd->last_nonrect + 1));
6405 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6406 gcc_assert (entry_bb == gsi_bb (gsi));
6407 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6408 gsi_prev (&gsi);
6409 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6410 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6411 NULL, n1);
6412 gsi = gsi_for_stmt (fd->for_stmt);
6414 if (broken_loop)
6416 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6418 /* Compute in n2var the limit for the first innermost loop,
6419 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6420 where cnt is how many iterations would the loop have if
6421 all further iterations were assigned to the current task. */
6422 n2var = create_tmp_var (type);
6423 i = fd->collapse - 1;
6424 tree itype = TREE_TYPE (fd->loops[i].v);
6425 if (POINTER_TYPE_P (itype))
6426 itype = signed_type_for (itype);
6427 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6428 ? -1 : 1));
6429 t = fold_build2 (PLUS_EXPR, itype,
6430 fold_convert (itype, fd->loops[i].step), t);
6431 t = fold_build2 (PLUS_EXPR, itype, t,
6432 fold_convert (itype, fd->loops[i].n2));
6433 if (fd->loops[i].m2)
6435 tree t2 = fold_convert (itype,
6436 fd->loops[i - fd->loops[i].outer].v);
6437 tree t3 = fold_convert (itype, fd->loops[i].m2);
6438 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6439 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6441 t = fold_build2 (MINUS_EXPR, itype, t,
6442 fold_convert (itype, fd->loops[i].v));
6443 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6444 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6445 fold_build1 (NEGATE_EXPR, itype, t),
6446 fold_build1 (NEGATE_EXPR, itype,
6447 fold_convert (itype,
6448 fd->loops[i].step)));
6449 else
6450 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6451 fold_convert (itype, fd->loops[i].step));
6452 t = fold_convert (type, t);
6453 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6454 min_arg1 = create_tmp_var (type);
6455 expand_omp_build_assign (&gsi, min_arg1, t2);
6456 min_arg2 = create_tmp_var (type);
6457 expand_omp_build_assign (&gsi, min_arg2, t);
6459 else
6461 if (TREE_CODE (n2) == INTEGER_CST)
6463 /* Indicate for lastprivate handling that at least one iteration
6464 has been performed, without wasting runtime. */
6465 if (integer_nonzerop (n2))
6466 expand_omp_build_assign (&gsi, fd->loop.v,
6467 fold_convert (type, n2));
6468 else
6469 /* Indicate that no iteration has been performed. */
6470 expand_omp_build_assign (&gsi, fd->loop.v,
6471 build_one_cst (type));
6473 else
6475 expand_omp_build_assign (&gsi, fd->loop.v,
6476 build_zero_cst (type));
6477 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6479 for (i = 0; i < fd->collapse; i++)
6481 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6482 if (fd->loops[i].m1)
6484 tree t2
6485 = fold_convert (TREE_TYPE (t),
6486 fd->loops[i - fd->loops[i].outer].v);
6487 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6488 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6489 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6491 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6492 /* For normal non-combined collapsed loops just initialize
6493 the outermost iterator in the entry_bb. */
6494 if (!broken_loop)
6495 break;
6499 else
6500 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6501 tree altv = NULL_TREE, altn2 = NULL_TREE;
6502 if (fd->collapse == 1
6503 && !broken_loop
6504 && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
6506 /* The vectorizer currently punts on loops with non-constant steps
6507 for the main IV (can't compute number of iterations and gives up
6508 because of that). As for OpenMP loops it is always possible to
6509 compute the number of iterations upfront, use an alternate IV
6510 as the loop iterator:
6511 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6512 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6513 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6514 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6515 tree itype = TREE_TYPE (fd->loop.v);
6516 if (POINTER_TYPE_P (itype))
6517 itype = signed_type_for (itype);
6518 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6519 t = fold_build2 (PLUS_EXPR, itype,
6520 fold_convert (itype, fd->loop.step), t);
6521 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6522 t = fold_build2 (MINUS_EXPR, itype, t,
6523 fold_convert (itype, fd->loop.v));
6524 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6525 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6526 fold_build1 (NEGATE_EXPR, itype, t),
6527 fold_build1 (NEGATE_EXPR, itype,
6528 fold_convert (itype, fd->loop.step)));
6529 else
6530 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6531 fold_convert (itype, fd->loop.step));
6532 t = fold_convert (TREE_TYPE (altv), t);
6533 altn2 = create_tmp_var (TREE_TYPE (altv));
6534 expand_omp_build_assign (&gsi, altn2, t);
6535 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6536 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6537 true, GSI_SAME_STMT);
6538 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6539 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6540 build_zero_cst (TREE_TYPE (altv)));
6541 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6543 else if (fd->collapse > 1
6544 && !broken_loop
6545 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6546 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6548 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6549 altn2 = create_tmp_var (TREE_TYPE (altv));
6551 if (cond_var)
6553 if (POINTER_TYPE_P (type)
6554 || TREE_CODE (n1) != INTEGER_CST
6555 || fd->loop.cond_code != LT_EXPR
6556 || tree_int_cst_sgn (n1) != 1)
6557 expand_omp_build_assign (&gsi, cond_var,
6558 build_one_cst (TREE_TYPE (cond_var)));
6559 else
6560 expand_omp_build_assign (&gsi, cond_var,
6561 fold_convert (TREE_TYPE (cond_var), n1));
6564 /* Remove the GIMPLE_OMP_FOR statement. */
6565 gsi_remove (&gsi, true);
6567 if (!broken_loop)
6569 /* Code to control the increment goes in the CONT_BB. */
6570 gsi = gsi_last_nondebug_bb (cont_bb);
6571 stmt = gsi_stmt (gsi);
6572 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6574 if (fd->collapse == 1
6575 || gimple_omp_for_combined_into_p (fd->for_stmt))
6577 if (POINTER_TYPE_P (type))
6578 t = fold_build_pointer_plus (fd->loop.v, step);
6579 else
6580 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6581 expand_omp_build_assign (&gsi, fd->loop.v, t);
6583 else if (TREE_CODE (n2) != INTEGER_CST)
6584 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6585 if (altv)
6587 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6588 build_one_cst (TREE_TYPE (altv)));
6589 expand_omp_build_assign (&gsi, altv, t);
6592 if (fd->collapse > 1)
6594 i = fd->collapse - 1;
6595 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6597 t = fold_convert (sizetype, fd->loops[i].step);
6598 t = fold_build_pointer_plus (fd->loops[i].v, t);
6600 else
6602 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6603 fd->loops[i].step);
6604 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6605 fd->loops[i].v, t);
6607 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6609 if (cond_var)
6611 if (POINTER_TYPE_P (type)
6612 || TREE_CODE (n1) != INTEGER_CST
6613 || fd->loop.cond_code != LT_EXPR
6614 || tree_int_cst_sgn (n1) != 1)
6615 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6616 build_one_cst (TREE_TYPE (cond_var)));
6617 else
6618 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6619 fold_convert (TREE_TYPE (cond_var), step));
6620 expand_omp_build_assign (&gsi, cond_var, t);
6623 /* Remove GIMPLE_OMP_CONTINUE. */
6624 gsi_remove (&gsi, true);
6627 /* Emit the condition in L1_BB. */
6628 gsi = gsi_start_bb (l1_bb);
6630 if (altv)
6631 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6632 else if (fd->collapse > 1
6633 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6634 && !broken_loop)
6636 i = fd->collapse - 1;
6637 tree itype = TREE_TYPE (fd->loops[i].v);
6638 if (fd->loops[i].m2)
6639 t = n2v = create_tmp_var (itype);
6640 else
6641 t = fold_convert (itype, fd->loops[i].n2);
6642 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6643 false, GSI_CONTINUE_LINKING);
6644 tree v = fd->loops[i].v;
6645 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6646 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6647 false, GSI_CONTINUE_LINKING);
6648 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6650 else
6652 if (fd->collapse > 1 && !broken_loop)
6653 t = n2var;
6654 else
6655 t = fold_convert (type, n2);
6656 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6657 false, GSI_CONTINUE_LINKING);
6658 tree v = fd->loop.v;
6659 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6660 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6661 false, GSI_CONTINUE_LINKING);
6662 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6664 cond_stmt = gimple_build_cond_empty (t);
6665 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6666 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6667 NULL, NULL)
6668 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6669 NULL, NULL))
6671 gsi = gsi_for_stmt (cond_stmt);
6672 gimple_regimplify_operands (cond_stmt, &gsi);
6675 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6676 if (is_simt)
6678 gsi = gsi_start_bb (l2_bb);
6679 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6680 if (POINTER_TYPE_P (type))
6681 t = fold_build_pointer_plus (fd->loop.v, step);
6682 else
6683 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6684 expand_omp_build_assign (&gsi, fd->loop.v, t);
6687 /* Remove GIMPLE_OMP_RETURN. */
6688 gsi = gsi_last_nondebug_bb (exit_bb);
6689 gsi_remove (&gsi, true);
6691 /* Connect the new blocks. */
6692 remove_edge (FALLTHRU_EDGE (entry_bb));
6694 if (!broken_loop)
6696 remove_edge (BRANCH_EDGE (entry_bb));
6697 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6699 e = BRANCH_EDGE (l1_bb);
6700 ne = FALLTHRU_EDGE (l1_bb);
6701 e->flags = EDGE_TRUE_VALUE;
6703 else
6705 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6707 ne = single_succ_edge (l1_bb);
6708 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6711 ne->flags = EDGE_FALSE_VALUE;
6712 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6713 ne->probability = e->probability.invert ();
6715 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6716 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6718 if (simt_maxlane)
6720 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6721 NULL_TREE, NULL_TREE);
6722 gsi = gsi_last_bb (entry_bb);
6723 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6724 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6725 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6726 FALLTHRU_EDGE (entry_bb)->probability
6727 = profile_probability::guessed_always ().apply_scale (7, 8);
6728 BRANCH_EDGE (entry_bb)->probability
6729 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6730 l2_dom_bb = entry_bb;
6732 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6734 if (!broken_loop && fd->collapse > 1)
6736 basic_block last_bb = l1_bb;
6737 basic_block init_bb = NULL;
6738 for (i = fd->collapse - 2; i >= 0; i--)
6740 tree nextn2v = NULL_TREE;
6741 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6742 e = EDGE_SUCC (last_bb, 0);
6743 else
6744 e = EDGE_SUCC (last_bb, 1);
6745 basic_block bb = split_edge (e);
6746 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6748 t = fold_convert (sizetype, fd->loops[i].step);
6749 t = fold_build_pointer_plus (fd->loops[i].v, t);
6751 else
6753 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6754 fd->loops[i].step);
6755 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6756 fd->loops[i].v, t);
6758 gsi = gsi_after_labels (bb);
6759 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6761 bb = split_block (bb, last_stmt (bb))->dest;
6762 gsi = gsi_start_bb (bb);
6763 tree itype = TREE_TYPE (fd->loops[i].v);
6764 if (fd->loops[i].m2)
6765 t = nextn2v = create_tmp_var (itype);
6766 else
6767 t = fold_convert (itype, fd->loops[i].n2);
6768 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6769 false, GSI_CONTINUE_LINKING);
6770 tree v = fd->loops[i].v;
6771 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6772 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6773 false, GSI_CONTINUE_LINKING);
6774 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6775 cond_stmt = gimple_build_cond_empty (t);
6776 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6777 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6778 expand_omp_regimplify_p, NULL, NULL)
6779 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6780 expand_omp_regimplify_p, NULL, NULL))
6782 gsi = gsi_for_stmt (cond_stmt);
6783 gimple_regimplify_operands (cond_stmt, &gsi);
6785 ne = single_succ_edge (bb);
6786 ne->flags = EDGE_FALSE_VALUE;
6788 init_bb = create_empty_bb (bb);
6789 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6790 add_bb_to_loop (init_bb, bb->loop_father);
6791 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6792 e->probability
6793 = profile_probability::guessed_always ().apply_scale (7, 8);
6794 ne->probability = e->probability.invert ();
6796 gsi = gsi_after_labels (init_bb);
6797 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6798 fd->loops[i + 1].n1);
6799 if (fd->loops[i + 1].m1)
6801 tree t2 = fold_convert (TREE_TYPE (t),
6802 fd->loops[i + 1
6803 - fd->loops[i + 1].outer].v);
6804 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6805 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6806 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6808 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6809 if (fd->loops[i + 1].m2)
6811 if (i + 2 == fd->collapse && (n2var || altv))
6813 gcc_assert (n2v == NULL_TREE);
6814 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6816 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6817 fd->loops[i + 1].n2);
6818 tree t2 = fold_convert (TREE_TYPE (t),
6819 fd->loops[i + 1
6820 - fd->loops[i + 1].outer].v);
6821 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6822 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6823 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6824 expand_omp_build_assign (&gsi, n2v, t);
6826 if (i + 2 == fd->collapse && n2var)
6828 /* For composite simd, n2 is the first iteration the current
6829 task shouldn't already handle, so we effectively want to use
6830 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6831 as the vectorized loop. Except the vectorizer will not
6832 vectorize that, so instead compute N2VAR as
6833 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6834 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6835 as the loop to vectorize. */
6836 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6837 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6839 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6840 == LT_EXPR ? -1 : 1));
6841 t = fold_build2 (PLUS_EXPR, itype,
6842 fold_convert (itype,
6843 fd->loops[i + 1].step), t);
6844 if (fd->loops[i + 1].m2)
6845 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6846 else
6847 t = fold_build2 (PLUS_EXPR, itype, t,
6848 fold_convert (itype,
6849 fd->loops[i + 1].n2));
6850 t = fold_build2 (MINUS_EXPR, itype, t,
6851 fold_convert (itype, fd->loops[i + 1].v));
6852 tree step = fold_convert (itype, fd->loops[i + 1].step);
6853 if (TYPE_UNSIGNED (itype)
6854 && fd->loops[i + 1].cond_code == GT_EXPR)
6855 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6856 fold_build1 (NEGATE_EXPR, itype, t),
6857 fold_build1 (NEGATE_EXPR, itype, step));
6858 else
6859 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6860 t = fold_convert (type, t);
6862 else
6863 t = counts[i + 1];
6864 expand_omp_build_assign (&gsi, min_arg1, t2);
6865 expand_omp_build_assign (&gsi, min_arg2, t);
6866 e = split_block (init_bb, last_stmt (init_bb));
6867 gsi = gsi_after_labels (e->dest);
6868 init_bb = e->dest;
6869 remove_edge (FALLTHRU_EDGE (entry_bb));
6870 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6871 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6872 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6873 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6874 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6875 expand_omp_build_assign (&gsi, n2var, t);
6877 if (i + 2 == fd->collapse && altv)
6879 /* The vectorizer currently punts on loops with non-constant
6880 steps for the main IV (can't compute number of iterations
6881 and gives up because of that). As for OpenMP loops it is
6882 always possible to compute the number of iterations upfront,
6883 use an alternate IV as the loop iterator. */
6884 expand_omp_build_assign (&gsi, altv,
6885 build_zero_cst (TREE_TYPE (altv)));
6886 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6887 if (POINTER_TYPE_P (itype))
6888 itype = signed_type_for (itype);
6889 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6890 ? -1 : 1));
6891 t = fold_build2 (PLUS_EXPR, itype,
6892 fold_convert (itype, fd->loops[i + 1].step), t);
6893 t = fold_build2 (PLUS_EXPR, itype, t,
6894 fold_convert (itype,
6895 fd->loops[i + 1].m2
6896 ? n2v : fd->loops[i + 1].n2));
6897 t = fold_build2 (MINUS_EXPR, itype, t,
6898 fold_convert (itype, fd->loops[i + 1].v));
6899 tree step = fold_convert (itype, fd->loops[i + 1].step);
6900 if (TYPE_UNSIGNED (itype)
6901 && fd->loops[i + 1].cond_code == GT_EXPR)
6902 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6903 fold_build1 (NEGATE_EXPR, itype, t),
6904 fold_build1 (NEGATE_EXPR, itype, step));
6905 else
6906 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6907 t = fold_convert (TREE_TYPE (altv), t);
6908 expand_omp_build_assign (&gsi, altn2, t);
6909 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6910 fd->loops[i + 1].m2
6911 ? n2v : fd->loops[i + 1].n2);
6912 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6913 true, GSI_SAME_STMT);
6914 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6915 fd->loops[i + 1].v, t2);
6916 gassign *g
6917 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6918 build_zero_cst (TREE_TYPE (altv)));
6919 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6921 n2v = nextn2v;
6923 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6924 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6926 e = find_edge (entry_bb, last_bb);
6927 redirect_edge_succ (e, bb);
6928 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6929 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6932 last_bb = bb;
6935 if (!broken_loop)
6937 class loop *loop = alloc_loop ();
6938 loop->header = l1_bb;
6939 loop->latch = cont_bb;
6940 add_loop (loop, l1_bb->loop_father);
6941 loop->safelen = safelen_int;
6942 if (simduid)
6944 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6945 cfun->has_simduid_loops = true;
6947 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6948 the loop. */
6949 if ((flag_tree_loop_vectorize
6950 || !global_options_set.x_flag_tree_loop_vectorize)
6951 && flag_tree_loop_optimize
6952 && loop->safelen > 1)
6954 loop->force_vectorize = true;
6955 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6957 unsigned HOST_WIDE_INT v
6958 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6959 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6960 loop->simdlen = v;
6962 cfun->has_force_vectorize_loops = true;
6964 else if (dont_vectorize)
6965 loop->dont_vectorize = true;
6967 else if (simduid)
6968 cfun->has_simduid_loops = true;
6971 /* Taskloop construct is represented after gimplification with
6972 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6973 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6974 which should just compute all the needed loop temporaries
6975 for GIMPLE_OMP_TASK. */
6977 static void
6978 expand_omp_taskloop_for_outer (struct omp_region *region,
6979 struct omp_for_data *fd,
6980 gimple *inner_stmt)
6982 tree type, bias = NULL_TREE;
6983 basic_block entry_bb, cont_bb, exit_bb;
6984 gimple_stmt_iterator gsi;
6985 gassign *assign_stmt;
6986 tree *counts = NULL;
6987 int i;
6989 gcc_assert (inner_stmt);
6990 gcc_assert (region->cont);
6991 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6992 && gimple_omp_task_taskloop_p (inner_stmt));
6993 type = TREE_TYPE (fd->loop.v);
6995 /* See if we need to bias by LLONG_MIN. */
6996 if (fd->iter_type == long_long_unsigned_type_node
6997 && TREE_CODE (type) == INTEGER_TYPE
6998 && !TYPE_UNSIGNED (type))
7000 tree n1, n2;
7002 if (fd->loop.cond_code == LT_EXPR)
7004 n1 = fd->loop.n1;
7005 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7007 else
7009 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7010 n2 = fd->loop.n1;
7012 if (TREE_CODE (n1) != INTEGER_CST
7013 || TREE_CODE (n2) != INTEGER_CST
7014 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7015 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7018 entry_bb = region->entry;
7019 cont_bb = region->cont;
7020 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7021 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7022 exit_bb = region->exit;
7024 gsi = gsi_last_nondebug_bb (entry_bb);
7025 gimple *for_stmt = gsi_stmt (gsi);
7026 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7027 if (fd->collapse > 1)
7029 int first_zero_iter = -1, dummy = -1;
7030 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7032 counts = XALLOCAVEC (tree, fd->collapse);
7033 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7034 zero_iter_bb, first_zero_iter,
7035 dummy_bb, dummy, l2_dom_bb);
7037 if (zero_iter_bb)
7039 /* Some counts[i] vars might be uninitialized if
7040 some loop has zero iterations. But the body shouldn't
7041 be executed in that case, so just avoid uninit warnings. */
7042 for (i = first_zero_iter; i < fd->collapse; i++)
7043 if (SSA_VAR_P (counts[i]))
7044 TREE_NO_WARNING (counts[i]) = 1;
7045 gsi_prev (&gsi);
7046 edge e = split_block (entry_bb, gsi_stmt (gsi));
7047 entry_bb = e->dest;
7048 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7049 gsi = gsi_last_bb (entry_bb);
7050 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7051 get_immediate_dominator (CDI_DOMINATORS,
7052 zero_iter_bb));
7056 tree t0, t1;
7057 t1 = fd->loop.n2;
7058 t0 = fd->loop.n1;
7059 if (POINTER_TYPE_P (TREE_TYPE (t0))
7060 && TYPE_PRECISION (TREE_TYPE (t0))
7061 != TYPE_PRECISION (fd->iter_type))
7063 /* Avoid casting pointers to integer of a different size. */
7064 tree itype = signed_type_for (type);
7065 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7066 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7068 else
7070 t1 = fold_convert (fd->iter_type, t1);
7071 t0 = fold_convert (fd->iter_type, t0);
7073 if (bias)
7075 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7076 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7079 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7080 OMP_CLAUSE__LOOPTEMP_);
7081 gcc_assert (innerc);
7082 tree startvar = OMP_CLAUSE_DECL (innerc);
7083 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7084 gcc_assert (innerc);
7085 tree endvar = OMP_CLAUSE_DECL (innerc);
7086 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7088 innerc = find_lastprivate_looptemp (fd, innerc);
7089 if (innerc)
7091 /* If needed (inner taskloop has lastprivate clause), propagate
7092 down the total number of iterations. */
7093 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7094 NULL_TREE, false,
7095 GSI_CONTINUE_LINKING);
7096 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7097 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7101 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7102 GSI_CONTINUE_LINKING);
7103 assign_stmt = gimple_build_assign (startvar, t0);
7104 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7106 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7107 GSI_CONTINUE_LINKING);
7108 assign_stmt = gimple_build_assign (endvar, t1);
7109 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7110 if (fd->collapse > 1)
7111 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7113 /* Remove the GIMPLE_OMP_FOR statement. */
7114 gsi = gsi_for_stmt (for_stmt);
7115 gsi_remove (&gsi, true);
7117 gsi = gsi_last_nondebug_bb (cont_bb);
7118 gsi_remove (&gsi, true);
7120 gsi = gsi_last_nondebug_bb (exit_bb);
7121 gsi_remove (&gsi, true);
7123 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7124 remove_edge (BRANCH_EDGE (entry_bb));
7125 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7126 remove_edge (BRANCH_EDGE (cont_bb));
7127 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7128 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7129 recompute_dominator (CDI_DOMINATORS, region->entry));
7132 /* Taskloop construct is represented after gimplification with
7133 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7134 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7135 GOMP_taskloop{,_ull} function arranges for each task to be given just
7136 a single range of iterations. */
7138 static void
7139 expand_omp_taskloop_for_inner (struct omp_region *region,
7140 struct omp_for_data *fd,
7141 gimple *inner_stmt)
7143 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7144 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7145 basic_block fin_bb;
7146 gimple_stmt_iterator gsi;
7147 edge ep;
7148 bool broken_loop = region->cont == NULL;
7149 tree *counts = NULL;
7150 tree n1, n2, step;
7152 itype = type = TREE_TYPE (fd->loop.v);
7153 if (POINTER_TYPE_P (type))
7154 itype = signed_type_for (type);
7156 /* See if we need to bias by LLONG_MIN. */
7157 if (fd->iter_type == long_long_unsigned_type_node
7158 && TREE_CODE (type) == INTEGER_TYPE
7159 && !TYPE_UNSIGNED (type))
7161 tree n1, n2;
7163 if (fd->loop.cond_code == LT_EXPR)
7165 n1 = fd->loop.n1;
7166 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7168 else
7170 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7171 n2 = fd->loop.n1;
7173 if (TREE_CODE (n1) != INTEGER_CST
7174 || TREE_CODE (n2) != INTEGER_CST
7175 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7176 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7179 entry_bb = region->entry;
7180 cont_bb = region->cont;
7181 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7182 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7183 gcc_assert (broken_loop
7184 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7185 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7186 if (!broken_loop)
7188 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7189 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7191 exit_bb = region->exit;
7193 /* Iteration space partitioning goes in ENTRY_BB. */
7194 gsi = gsi_last_nondebug_bb (entry_bb);
7195 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7197 if (fd->collapse > 1)
7199 int first_zero_iter = -1, dummy = -1;
7200 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7202 counts = XALLOCAVEC (tree, fd->collapse);
7203 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7204 fin_bb, first_zero_iter,
7205 dummy_bb, dummy, l2_dom_bb);
7206 t = NULL_TREE;
7208 else
7209 t = integer_one_node;
7211 step = fd->loop.step;
7212 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7213 OMP_CLAUSE__LOOPTEMP_);
7214 gcc_assert (innerc);
7215 n1 = OMP_CLAUSE_DECL (innerc);
7216 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7217 gcc_assert (innerc);
7218 n2 = OMP_CLAUSE_DECL (innerc);
7219 if (bias)
7221 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7222 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7224 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7225 true, NULL_TREE, true, GSI_SAME_STMT);
7226 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7227 true, NULL_TREE, true, GSI_SAME_STMT);
7228 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7229 true, NULL_TREE, true, GSI_SAME_STMT);
7231 tree startvar = fd->loop.v;
7232 tree endvar = NULL_TREE;
7234 if (gimple_omp_for_combined_p (fd->for_stmt))
7236 tree clauses = gimple_omp_for_clauses (inner_stmt);
7237 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7238 gcc_assert (innerc);
7239 startvar = OMP_CLAUSE_DECL (innerc);
7240 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7241 OMP_CLAUSE__LOOPTEMP_);
7242 gcc_assert (innerc);
7243 endvar = OMP_CLAUSE_DECL (innerc);
7245 t = fold_convert (TREE_TYPE (startvar), n1);
7246 t = force_gimple_operand_gsi (&gsi, t,
7247 DECL_P (startvar)
7248 && TREE_ADDRESSABLE (startvar),
7249 NULL_TREE, false, GSI_CONTINUE_LINKING);
7250 gimple *assign_stmt = gimple_build_assign (startvar, t);
7251 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7253 t = fold_convert (TREE_TYPE (startvar), n2);
7254 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7255 false, GSI_CONTINUE_LINKING);
7256 if (endvar)
7258 assign_stmt = gimple_build_assign (endvar, e);
7259 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7260 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7261 assign_stmt = gimple_build_assign (fd->loop.v, e);
7262 else
7263 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7264 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7267 tree *nonrect_bounds = NULL;
7268 if (fd->collapse > 1)
7270 if (fd->non_rect)
7272 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7273 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7275 gcc_assert (gsi_bb (gsi) == entry_bb);
7276 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7277 startvar);
7278 entry_bb = gsi_bb (gsi);
7281 if (!broken_loop)
7283 /* The code controlling the sequential loop replaces the
7284 GIMPLE_OMP_CONTINUE. */
7285 gsi = gsi_last_nondebug_bb (cont_bb);
7286 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7287 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7288 vmain = gimple_omp_continue_control_use (cont_stmt);
7289 vback = gimple_omp_continue_control_def (cont_stmt);
7291 if (!gimple_omp_for_combined_p (fd->for_stmt))
7293 if (POINTER_TYPE_P (type))
7294 t = fold_build_pointer_plus (vmain, step);
7295 else
7296 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7297 t = force_gimple_operand_gsi (&gsi, t,
7298 DECL_P (vback)
7299 && TREE_ADDRESSABLE (vback),
7300 NULL_TREE, true, GSI_SAME_STMT);
7301 assign_stmt = gimple_build_assign (vback, t);
7302 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7304 t = build2 (fd->loop.cond_code, boolean_type_node,
7305 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7306 ? t : vback, e);
7307 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7310 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7311 gsi_remove (&gsi, true);
7313 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7314 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7315 cont_bb, body_bb);
7318 /* Remove the GIMPLE_OMP_FOR statement. */
7319 gsi = gsi_for_stmt (fd->for_stmt);
7320 gsi_remove (&gsi, true);
7322 /* Remove the GIMPLE_OMP_RETURN statement. */
7323 gsi = gsi_last_nondebug_bb (exit_bb);
7324 gsi_remove (&gsi, true);
7326 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7327 if (!broken_loop)
7328 remove_edge (BRANCH_EDGE (entry_bb));
7329 else
7331 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7332 region->outer->cont = NULL;
7335 /* Connect all the blocks. */
7336 if (!broken_loop)
7338 ep = find_edge (cont_bb, body_bb);
7339 if (gimple_omp_for_combined_p (fd->for_stmt))
7341 remove_edge (ep);
7342 ep = NULL;
7344 else if (fd->collapse > 1)
7346 remove_edge (ep);
7347 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7349 else
7350 ep->flags = EDGE_TRUE_VALUE;
7351 find_edge (cont_bb, fin_bb)->flags
7352 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7355 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7356 recompute_dominator (CDI_DOMINATORS, body_bb));
7357 if (!broken_loop)
7358 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7359 recompute_dominator (CDI_DOMINATORS, fin_bb));
7361 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7363 class loop *loop = alloc_loop ();
7364 loop->header = body_bb;
7365 if (collapse_bb == NULL)
7366 loop->latch = cont_bb;
7367 add_loop (loop, body_bb->loop_father);
7371 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7372 partitioned loop. The lowering here is abstracted, in that the
7373 loop parameters are passed through internal functions, which are
7374 further lowered by oacc_device_lower, once we get to the target
7375 compiler. The loop is of the form:
7377 for (V = B; V LTGT E; V += S) {BODY}
7379 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7380 (constant 0 for no chunking) and we will have a GWV partitioning
7381 mask, specifying dimensions over which the loop is to be
7382 partitioned (see note below). We generate code that looks like
7383 (this ignores tiling):
7385 <entry_bb> [incoming FALL->body, BRANCH->exit]
7386 typedef signedintify (typeof (V)) T; // underlying signed integral type
7387 T range = E - B;
7388 T chunk_no = 0;
7389 T DIR = LTGT == '<' ? +1 : -1;
7390 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7391 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7393 <head_bb> [created by splitting end of entry_bb]
7394 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7395 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7396 if (!(offset LTGT bound)) goto bottom_bb;
7398 <body_bb> [incoming]
7399 V = B + offset;
7400 {BODY}
7402 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7403 offset += step;
7404 if (offset LTGT bound) goto body_bb; [*]
7406 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7407 chunk_no++;
7408 if (chunk < chunk_max) goto head_bb;
7410 <exit_bb> [incoming]
7411 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7413 [*] Needed if V live at end of loop. */
7415 static void
7416 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7418 tree v = fd->loop.v;
7419 enum tree_code cond_code = fd->loop.cond_code;
7420 enum tree_code plus_code = PLUS_EXPR;
7422 tree chunk_size = integer_minus_one_node;
7423 tree gwv = integer_zero_node;
7424 tree iter_type = TREE_TYPE (v);
7425 tree diff_type = iter_type;
7426 tree plus_type = iter_type;
7427 struct oacc_collapse *counts = NULL;
7429 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7430 == GF_OMP_FOR_KIND_OACC_LOOP);
7431 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7432 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7434 if (POINTER_TYPE_P (iter_type))
7436 plus_code = POINTER_PLUS_EXPR;
7437 plus_type = sizetype;
7439 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7440 diff_type = signed_type_for (diff_type);
7441 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7442 diff_type = integer_type_node;
7444 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7445 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7446 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7447 basic_block bottom_bb = NULL;
7449 /* entry_bb has two successors; the branch edge is to the exit
7450 block, fallthrough edge to body. */
7451 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7452 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7454 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7455 body_bb, or to a block whose only successor is the body_bb. Its
7456 fallthrough successor is the final block (same as the branch
7457 successor of the entry_bb). */
7458 if (cont_bb)
7460 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7461 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7463 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7464 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7466 else
7467 gcc_assert (!gimple_in_ssa_p (cfun));
7469 /* The exit block only has entry_bb and cont_bb as predecessors. */
7470 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7472 tree chunk_no;
7473 tree chunk_max = NULL_TREE;
7474 tree bound, offset;
7475 tree step = create_tmp_var (diff_type, ".step");
7476 bool up = cond_code == LT_EXPR;
7477 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7478 bool chunking = !gimple_in_ssa_p (cfun);
7479 bool negating;
7481 /* Tiling vars. */
7482 tree tile_size = NULL_TREE;
7483 tree element_s = NULL_TREE;
7484 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7485 basic_block elem_body_bb = NULL;
7486 basic_block elem_cont_bb = NULL;
7488 /* SSA instances. */
7489 tree offset_incr = NULL_TREE;
7490 tree offset_init = NULL_TREE;
7492 gimple_stmt_iterator gsi;
7493 gassign *ass;
7494 gcall *call;
7495 gimple *stmt;
7496 tree expr;
7497 location_t loc;
7498 edge split, be, fte;
7500 /* Split the end of entry_bb to create head_bb. */
7501 split = split_block (entry_bb, last_stmt (entry_bb));
7502 basic_block head_bb = split->dest;
7503 entry_bb = split->src;
7505 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7506 gsi = gsi_last_nondebug_bb (entry_bb);
7507 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7508 loc = gimple_location (for_stmt);
7510 if (gimple_in_ssa_p (cfun))
7512 offset_init = gimple_omp_for_index (for_stmt, 0);
7513 gcc_assert (integer_zerop (fd->loop.n1));
7514 /* The SSA parallelizer does gang parallelism. */
7515 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7518 if (fd->collapse > 1 || fd->tiling)
7520 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7521 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7522 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
7523 TREE_TYPE (fd->loop.n2), loc);
7525 if (SSA_VAR_P (fd->loop.n2))
7527 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7528 true, GSI_SAME_STMT);
7529 ass = gimple_build_assign (fd->loop.n2, total);
7530 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7534 tree b = fd->loop.n1;
7535 tree e = fd->loop.n2;
7536 tree s = fd->loop.step;
7538 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7539 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7541 /* Convert the step, avoiding possible unsigned->signed overflow. */
7542 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7543 if (negating)
7544 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7545 s = fold_convert (diff_type, s);
7546 if (negating)
7547 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7548 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7550 if (!chunking)
7551 chunk_size = integer_zero_node;
7552 expr = fold_convert (diff_type, chunk_size);
7553 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7554 NULL_TREE, true, GSI_SAME_STMT);
7556 if (fd->tiling)
7558 /* Determine the tile size and element step,
7559 modify the outer loop step size. */
7560 tile_size = create_tmp_var (diff_type, ".tile_size");
7561 expr = build_int_cst (diff_type, 1);
7562 for (int ix = 0; ix < fd->collapse; ix++)
7563 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7564 expr = force_gimple_operand_gsi (&gsi, expr, true,
7565 NULL_TREE, true, GSI_SAME_STMT);
7566 ass = gimple_build_assign (tile_size, expr);
7567 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7569 element_s = create_tmp_var (diff_type, ".element_s");
7570 ass = gimple_build_assign (element_s, s);
7571 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7573 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7574 s = force_gimple_operand_gsi (&gsi, expr, true,
7575 NULL_TREE, true, GSI_SAME_STMT);
7578 /* Determine the range, avoiding possible unsigned->signed overflow. */
7579 negating = !up && TYPE_UNSIGNED (iter_type);
7580 expr = fold_build2 (MINUS_EXPR, plus_type,
7581 fold_convert (plus_type, negating ? b : e),
7582 fold_convert (plus_type, negating ? e : b));
7583 expr = fold_convert (diff_type, expr);
7584 if (negating)
7585 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7586 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7587 NULL_TREE, true, GSI_SAME_STMT);
7589 chunk_no = build_int_cst (diff_type, 0);
7590 if (chunking)
7592 gcc_assert (!gimple_in_ssa_p (cfun));
7594 expr = chunk_no;
7595 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7596 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7598 ass = gimple_build_assign (chunk_no, expr);
7599 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7601 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7602 build_int_cst (integer_type_node,
7603 IFN_GOACC_LOOP_CHUNKS),
7604 dir, range, s, chunk_size, gwv);
7605 gimple_call_set_lhs (call, chunk_max);
7606 gimple_set_location (call, loc);
7607 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7609 else
7610 chunk_size = chunk_no;
7612 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7613 build_int_cst (integer_type_node,
7614 IFN_GOACC_LOOP_STEP),
7615 dir, range, s, chunk_size, gwv);
7616 gimple_call_set_lhs (call, step);
7617 gimple_set_location (call, loc);
7618 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7620 /* Remove the GIMPLE_OMP_FOR. */
7621 gsi_remove (&gsi, true);
7623 /* Fixup edges from head_bb. */
7624 be = BRANCH_EDGE (head_bb);
7625 fte = FALLTHRU_EDGE (head_bb);
7626 be->flags |= EDGE_FALSE_VALUE;
7627 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7629 basic_block body_bb = fte->dest;
7631 if (gimple_in_ssa_p (cfun))
7633 gsi = gsi_last_nondebug_bb (cont_bb);
7634 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7636 offset = gimple_omp_continue_control_use (cont_stmt);
7637 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7639 else
7641 offset = create_tmp_var (diff_type, ".offset");
7642 offset_init = offset_incr = offset;
7644 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7646 /* Loop offset & bound go into head_bb. */
7647 gsi = gsi_start_bb (head_bb);
7649 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7650 build_int_cst (integer_type_node,
7651 IFN_GOACC_LOOP_OFFSET),
7652 dir, range, s,
7653 chunk_size, gwv, chunk_no);
7654 gimple_call_set_lhs (call, offset_init);
7655 gimple_set_location (call, loc);
7656 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7658 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7659 build_int_cst (integer_type_node,
7660 IFN_GOACC_LOOP_BOUND),
7661 dir, range, s,
7662 chunk_size, gwv, offset_init);
7663 gimple_call_set_lhs (call, bound);
7664 gimple_set_location (call, loc);
7665 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7667 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7668 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7669 GSI_CONTINUE_LINKING);
7671 /* V assignment goes into body_bb. */
7672 if (!gimple_in_ssa_p (cfun))
7674 gsi = gsi_start_bb (body_bb);
7676 expr = build2 (plus_code, iter_type, b,
7677 fold_convert (plus_type, offset));
7678 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7679 true, GSI_SAME_STMT);
7680 ass = gimple_build_assign (v, expr);
7681 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7683 if (fd->collapse > 1 || fd->tiling)
7684 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7686 if (fd->tiling)
7688 /* Determine the range of the element loop -- usually simply
7689 the tile_size, but could be smaller if the final
7690 iteration of the outer loop is a partial tile. */
7691 tree e_range = create_tmp_var (diff_type, ".e_range");
7693 expr = build2 (MIN_EXPR, diff_type,
7694 build2 (MINUS_EXPR, diff_type, bound, offset),
7695 build2 (MULT_EXPR, diff_type, tile_size,
7696 element_s));
7697 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7698 true, GSI_SAME_STMT);
7699 ass = gimple_build_assign (e_range, expr);
7700 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7702 /* Determine bound, offset & step of inner loop. */
7703 e_bound = create_tmp_var (diff_type, ".e_bound");
7704 e_offset = create_tmp_var (diff_type, ".e_offset");
7705 e_step = create_tmp_var (diff_type, ".e_step");
7707 /* Mark these as element loops. */
7708 tree t, e_gwv = integer_minus_one_node;
7709 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7711 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7712 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7713 element_s, chunk, e_gwv, chunk);
7714 gimple_call_set_lhs (call, e_offset);
7715 gimple_set_location (call, loc);
7716 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7718 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7719 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7720 element_s, chunk, e_gwv, e_offset);
7721 gimple_call_set_lhs (call, e_bound);
7722 gimple_set_location (call, loc);
7723 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7725 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7726 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7727 element_s, chunk, e_gwv);
7728 gimple_call_set_lhs (call, e_step);
7729 gimple_set_location (call, loc);
7730 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7732 /* Add test and split block. */
7733 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7734 stmt = gimple_build_cond_empty (expr);
7735 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7736 split = split_block (body_bb, stmt);
7737 elem_body_bb = split->dest;
7738 if (cont_bb == body_bb)
7739 cont_bb = elem_body_bb;
7740 body_bb = split->src;
7742 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7744 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7745 if (cont_bb == NULL)
7747 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7748 e->probability = profile_probability::even ();
7749 split->probability = profile_probability::even ();
7752 /* Initialize the user's loop vars. */
7753 gsi = gsi_start_bb (elem_body_bb);
7754 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7758 /* Loop increment goes into cont_bb. If this is not a loop, we
7759 will have spawned threads as if it was, and each one will
7760 execute one iteration. The specification is not explicit about
7761 whether such constructs are ill-formed or not, and they can
7762 occur, especially when noreturn routines are involved. */
7763 if (cont_bb)
7765 gsi = gsi_last_nondebug_bb (cont_bb);
7766 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7767 loc = gimple_location (cont_stmt);
7769 if (fd->tiling)
7771 /* Insert element loop increment and test. */
7772 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7773 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7774 true, GSI_SAME_STMT);
7775 ass = gimple_build_assign (e_offset, expr);
7776 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7777 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7779 stmt = gimple_build_cond_empty (expr);
7780 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7781 split = split_block (cont_bb, stmt);
7782 elem_cont_bb = split->src;
7783 cont_bb = split->dest;
7785 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7786 split->probability = profile_probability::unlikely ().guessed ();
7787 edge latch_edge
7788 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7789 latch_edge->probability = profile_probability::likely ().guessed ();
7791 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7792 skip_edge->probability = profile_probability::unlikely ().guessed ();
7793 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7794 loop_entry_edge->probability
7795 = profile_probability::likely ().guessed ();
7797 gsi = gsi_for_stmt (cont_stmt);
7800 /* Increment offset. */
7801 if (gimple_in_ssa_p (cfun))
7802 expr = build2 (plus_code, iter_type, offset,
7803 fold_convert (plus_type, step));
7804 else
7805 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7806 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7807 true, GSI_SAME_STMT);
7808 ass = gimple_build_assign (offset_incr, expr);
7809 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7810 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7811 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7813 /* Remove the GIMPLE_OMP_CONTINUE. */
7814 gsi_remove (&gsi, true);
7816 /* Fixup edges from cont_bb. */
7817 be = BRANCH_EDGE (cont_bb);
7818 fte = FALLTHRU_EDGE (cont_bb);
7819 be->flags |= EDGE_TRUE_VALUE;
7820 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7822 if (chunking)
7824 /* Split the beginning of exit_bb to make bottom_bb. We
7825 need to insert a nop at the start, because splitting is
7826 after a stmt, not before. */
7827 gsi = gsi_start_bb (exit_bb);
7828 stmt = gimple_build_nop ();
7829 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7830 split = split_block (exit_bb, stmt);
7831 bottom_bb = split->src;
7832 exit_bb = split->dest;
7833 gsi = gsi_last_bb (bottom_bb);
7835 /* Chunk increment and test goes into bottom_bb. */
7836 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7837 build_int_cst (diff_type, 1));
7838 ass = gimple_build_assign (chunk_no, expr);
7839 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7841 /* Chunk test at end of bottom_bb. */
7842 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7843 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7844 GSI_CONTINUE_LINKING);
7846 /* Fixup edges from bottom_bb. */
7847 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7848 split->probability = profile_probability::unlikely ().guessed ();
7849 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7850 latch_edge->probability = profile_probability::likely ().guessed ();
7854 gsi = gsi_last_nondebug_bb (exit_bb);
7855 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7856 loc = gimple_location (gsi_stmt (gsi));
7858 if (!gimple_in_ssa_p (cfun))
7860 /* Insert the final value of V, in case it is live. This is the
7861 value for the only thread that survives past the join. */
7862 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7863 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7864 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7865 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7866 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7867 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7868 true, GSI_SAME_STMT);
7869 ass = gimple_build_assign (v, expr);
7870 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7873 /* Remove the OMP_RETURN. */
7874 gsi_remove (&gsi, true);
7876 if (cont_bb)
7878 /* We now have one, two or three nested loops. Update the loop
7879 structures. */
7880 class loop *parent = entry_bb->loop_father;
7881 class loop *body = body_bb->loop_father;
7883 if (chunking)
7885 class loop *chunk_loop = alloc_loop ();
7886 chunk_loop->header = head_bb;
7887 chunk_loop->latch = bottom_bb;
7888 add_loop (chunk_loop, parent);
7889 parent = chunk_loop;
7891 else if (parent != body)
7893 gcc_assert (body->header == body_bb);
7894 gcc_assert (body->latch == cont_bb
7895 || single_pred (body->latch) == cont_bb);
7896 parent = NULL;
7899 if (parent)
7901 class loop *body_loop = alloc_loop ();
7902 body_loop->header = body_bb;
7903 body_loop->latch = cont_bb;
7904 add_loop (body_loop, parent);
7906 if (fd->tiling)
7908 /* Insert tiling's element loop. */
7909 class loop *inner_loop = alloc_loop ();
7910 inner_loop->header = elem_body_bb;
7911 inner_loop->latch = elem_cont_bb;
7912 add_loop (inner_loop, body_loop);
7918 /* Expand the OMP loop defined by REGION. */
7920 static void
7921 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7923 struct omp_for_data fd;
7924 struct omp_for_data_loop *loops;
7926 loops = XALLOCAVEC (struct omp_for_data_loop,
7927 gimple_omp_for_collapse (last_stmt (region->entry)));
7928 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7929 &fd, loops);
7930 region->sched_kind = fd.sched_kind;
7931 region->sched_modifiers = fd.sched_modifiers;
7932 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7933 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7935 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7936 if ((loops[i].m1 || loops[i].m2)
7937 && (loops[i].m1 == NULL_TREE
7938 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7939 && (loops[i].m2 == NULL_TREE
7940 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7941 && TREE_CODE (loops[i].step) == INTEGER_CST
7942 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7944 tree t;
7945 tree itype = TREE_TYPE (loops[i].v);
7946 if (loops[i].m1 && loops[i].m2)
7947 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7948 else if (loops[i].m1)
7949 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7950 else
7951 t = loops[i].m2;
7952 t = fold_build2 (MULT_EXPR, itype, t,
7953 fold_convert (itype,
7954 loops[i - loops[i].outer].step));
7955 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7956 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7957 fold_build1 (NEGATE_EXPR, itype, t),
7958 fold_build1 (NEGATE_EXPR, itype,
7959 fold_convert (itype,
7960 loops[i].step)));
7961 else
7962 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7963 fold_convert (itype, loops[i].step));
7964 if (integer_nonzerop (t))
7965 error_at (gimple_location (fd.for_stmt),
7966 "invalid OpenMP non-rectangular loop step; "
7967 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7968 "step %qE",
7969 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7970 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7971 loops[i - loops[i].outer].step, i + 1,
7972 loops[i].step);
7976 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7977 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7978 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7979 if (region->cont)
7981 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7982 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7983 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7985 else
7986 /* If there isn't a continue then this is a degerate case where
7987 the introduction of abnormal edges during lowering will prevent
7988 original loops from being detected. Fix that up. */
7989 loops_state_set (LOOPS_NEED_FIXUP);
7991 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
7992 expand_omp_simd (region, &fd);
7993 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7995 gcc_assert (!inner_stmt && !fd.non_rect);
7996 expand_oacc_for (region, &fd);
7998 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8000 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8001 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8002 else
8003 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8005 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8006 && !fd.have_ordered)
8008 if (fd.chunk_size == NULL)
8009 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8010 else
8011 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8013 else
8015 int fn_index, start_ix, next_ix;
8016 unsigned HOST_WIDE_INT sched = 0;
8017 tree sched_arg = NULL_TREE;
8019 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8020 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8021 if (fd.chunk_size == NULL
8022 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8023 fd.chunk_size = integer_zero_node;
8024 switch (fd.sched_kind)
8026 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8027 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8028 && fd.lastprivate_conditional == 0)
8030 gcc_assert (!fd.have_ordered);
8031 fn_index = 6;
8032 sched = 4;
8034 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8035 && !fd.have_ordered
8036 && fd.lastprivate_conditional == 0)
8037 fn_index = 7;
8038 else
8040 fn_index = 3;
8041 sched = (HOST_WIDE_INT_1U << 31);
8043 break;
8044 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8045 case OMP_CLAUSE_SCHEDULE_GUIDED:
8046 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8047 && !fd.have_ordered
8048 && fd.lastprivate_conditional == 0)
8050 fn_index = 3 + fd.sched_kind;
8051 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8052 break;
8054 fn_index = fd.sched_kind;
8055 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8056 sched += (HOST_WIDE_INT_1U << 31);
8057 break;
8058 case OMP_CLAUSE_SCHEDULE_STATIC:
8059 gcc_assert (fd.have_ordered);
8060 fn_index = 0;
8061 sched = (HOST_WIDE_INT_1U << 31) + 1;
8062 break;
8063 default:
8064 gcc_unreachable ();
8066 if (!fd.ordered)
8067 fn_index += fd.have_ordered * 8;
8068 if (fd.ordered)
8069 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8070 else
8071 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8072 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8073 if (fd.have_reductemp || fd.have_pointer_condtemp)
8075 if (fd.ordered)
8076 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8077 else if (fd.have_ordered)
8078 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8079 else
8080 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8081 sched_arg = build_int_cstu (long_integer_type_node, sched);
8082 if (!fd.chunk_size)
8083 fd.chunk_size = integer_zero_node;
8085 if (fd.iter_type == long_long_unsigned_type_node)
8087 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8088 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8089 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8090 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8092 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8093 (enum built_in_function) next_ix, sched_arg,
8094 inner_stmt);
8097 if (gimple_in_ssa_p (cfun))
8098 update_ssa (TODO_update_ssa_only_virtuals);
8101 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8103 v = GOMP_sections_start (n);
8105 switch (v)
8107 case 0:
8108 goto L2;
8109 case 1:
8110 section 1;
8111 goto L1;
8112 case 2:
8114 case n:
8116 default:
8117 abort ();
8120 v = GOMP_sections_next ();
8121 goto L0;
8123 reduction;
8125 If this is a combined parallel sections, replace the call to
8126 GOMP_sections_start with call to GOMP_sections_next. */
8128 static void
8129 expand_omp_sections (struct omp_region *region)
8131 tree t, u, vin = NULL, vmain, vnext, l2;
8132 unsigned len;
8133 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8134 gimple_stmt_iterator si, switch_si;
8135 gomp_sections *sections_stmt;
8136 gimple *stmt;
8137 gomp_continue *cont;
8138 edge_iterator ei;
8139 edge e;
8140 struct omp_region *inner;
8141 unsigned i, casei;
8142 bool exit_reachable = region->cont != NULL;
8144 gcc_assert (region->exit != NULL);
8145 entry_bb = region->entry;
8146 l0_bb = single_succ (entry_bb);
8147 l1_bb = region->cont;
8148 l2_bb = region->exit;
8149 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8150 l2 = gimple_block_label (l2_bb);
8151 else
8153 /* This can happen if there are reductions. */
8154 len = EDGE_COUNT (l0_bb->succs);
8155 gcc_assert (len > 0);
8156 e = EDGE_SUCC (l0_bb, len - 1);
8157 si = gsi_last_nondebug_bb (e->dest);
8158 l2 = NULL_TREE;
8159 if (gsi_end_p (si)
8160 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8161 l2 = gimple_block_label (e->dest);
8162 else
8163 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8165 si = gsi_last_nondebug_bb (e->dest);
8166 if (gsi_end_p (si)
8167 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8169 l2 = gimple_block_label (e->dest);
8170 break;
8174 if (exit_reachable)
8175 default_bb = create_empty_bb (l1_bb->prev_bb);
8176 else
8177 default_bb = create_empty_bb (l0_bb);
8179 /* We will build a switch() with enough cases for all the
8180 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8181 and a default case to abort if something goes wrong. */
8182 len = EDGE_COUNT (l0_bb->succs);
8184 /* Use vec::quick_push on label_vec throughout, since we know the size
8185 in advance. */
8186 auto_vec<tree> label_vec (len);
8188 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8189 GIMPLE_OMP_SECTIONS statement. */
8190 si = gsi_last_nondebug_bb (entry_bb);
8191 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8192 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8193 vin = gimple_omp_sections_control (sections_stmt);
8194 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8195 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8196 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8197 tree cond_var = NULL_TREE;
8198 if (reductmp || condtmp)
8200 tree reductions = null_pointer_node, mem = null_pointer_node;
8201 tree memv = NULL_TREE, condtemp = NULL_TREE;
8202 gimple_stmt_iterator gsi = gsi_none ();
8203 gimple *g = NULL;
8204 if (reductmp)
8206 reductions = OMP_CLAUSE_DECL (reductmp);
8207 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8208 g = SSA_NAME_DEF_STMT (reductions);
8209 reductions = gimple_assign_rhs1 (g);
8210 OMP_CLAUSE_DECL (reductmp) = reductions;
8211 gsi = gsi_for_stmt (g);
8213 else
8214 gsi = si;
8215 if (condtmp)
8217 condtemp = OMP_CLAUSE_DECL (condtmp);
8218 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8219 OMP_CLAUSE__CONDTEMP_);
8220 cond_var = OMP_CLAUSE_DECL (c);
8221 tree type = TREE_TYPE (condtemp);
8222 memv = create_tmp_var (type);
8223 TREE_ADDRESSABLE (memv) = 1;
8224 unsigned cnt = 0;
8225 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8226 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8227 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8228 ++cnt;
8229 unsigned HOST_WIDE_INT sz
8230 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8231 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8232 false);
8233 mem = build_fold_addr_expr (memv);
8235 t = build_int_cst (unsigned_type_node, len - 1);
8236 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8237 stmt = gimple_build_call (u, 3, t, reductions, mem);
8238 gimple_call_set_lhs (stmt, vin);
8239 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8240 if (condtmp)
8242 expand_omp_build_assign (&gsi, condtemp, memv, false);
8243 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8244 vin, build_one_cst (TREE_TYPE (cond_var)));
8245 expand_omp_build_assign (&gsi, cond_var, t, false);
8247 if (reductmp)
8249 gsi_remove (&gsi, true);
8250 release_ssa_name (gimple_assign_lhs (g));
8253 else if (!is_combined_parallel (region))
8255 /* If we are not inside a combined parallel+sections region,
8256 call GOMP_sections_start. */
8257 t = build_int_cst (unsigned_type_node, len - 1);
8258 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8259 stmt = gimple_build_call (u, 1, t);
8261 else
8263 /* Otherwise, call GOMP_sections_next. */
8264 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8265 stmt = gimple_build_call (u, 0);
8267 if (!reductmp && !condtmp)
8269 gimple_call_set_lhs (stmt, vin);
8270 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8272 gsi_remove (&si, true);
8274 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8275 L0_BB. */
8276 switch_si = gsi_last_nondebug_bb (l0_bb);
8277 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8278 if (exit_reachable)
8280 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8281 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8282 vmain = gimple_omp_continue_control_use (cont);
8283 vnext = gimple_omp_continue_control_def (cont);
8285 else
8287 vmain = vin;
8288 vnext = NULL_TREE;
8291 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8292 label_vec.quick_push (t);
8293 i = 1;
8295 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8296 for (inner = region->inner, casei = 1;
8297 inner;
8298 inner = inner->next, i++, casei++)
8300 basic_block s_entry_bb, s_exit_bb;
8302 /* Skip optional reduction region. */
8303 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8305 --i;
8306 --casei;
8307 continue;
8310 s_entry_bb = inner->entry;
8311 s_exit_bb = inner->exit;
8313 t = gimple_block_label (s_entry_bb);
8314 u = build_int_cst (unsigned_type_node, casei);
8315 u = build_case_label (u, NULL, t);
8316 label_vec.quick_push (u);
8318 si = gsi_last_nondebug_bb (s_entry_bb);
8319 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8320 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8321 gsi_remove (&si, true);
8322 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8324 if (s_exit_bb == NULL)
8325 continue;
8327 si = gsi_last_nondebug_bb (s_exit_bb);
8328 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8329 gsi_remove (&si, true);
8331 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8334 /* Error handling code goes in DEFAULT_BB. */
8335 t = gimple_block_label (default_bb);
8336 u = build_case_label (NULL, NULL, t);
8337 make_edge (l0_bb, default_bb, 0);
8338 add_bb_to_loop (default_bb, current_loops->tree_root);
8340 stmt = gimple_build_switch (vmain, u, label_vec);
8341 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8342 gsi_remove (&switch_si, true);
8344 si = gsi_start_bb (default_bb);
8345 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8346 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8348 if (exit_reachable)
8350 tree bfn_decl;
8352 /* Code to get the next section goes in L1_BB. */
8353 si = gsi_last_nondebug_bb (l1_bb);
8354 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8356 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8357 stmt = gimple_build_call (bfn_decl, 0);
8358 gimple_call_set_lhs (stmt, vnext);
8359 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8360 if (cond_var)
8362 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8363 vnext, build_one_cst (TREE_TYPE (cond_var)));
8364 expand_omp_build_assign (&si, cond_var, t, false);
8366 gsi_remove (&si, true);
8368 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8371 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8372 si = gsi_last_nondebug_bb (l2_bb);
8373 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8374 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8375 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8376 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8377 else
8378 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8379 stmt = gimple_build_call (t, 0);
8380 if (gimple_omp_return_lhs (gsi_stmt (si)))
8381 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8382 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8383 gsi_remove (&si, true);
8385 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8388 /* Expand code for an OpenMP single directive. We've already expanded
8389 much of the code, here we simply place the GOMP_barrier call. */
8391 static void
8392 expand_omp_single (struct omp_region *region)
8394 basic_block entry_bb, exit_bb;
8395 gimple_stmt_iterator si;
8397 entry_bb = region->entry;
8398 exit_bb = region->exit;
8400 si = gsi_last_nondebug_bb (entry_bb);
8401 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8402 gsi_remove (&si, true);
8403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8405 si = gsi_last_nondebug_bb (exit_bb);
8406 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8408 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8409 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8411 gsi_remove (&si, true);
8412 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8415 /* Generic expansion for OpenMP synchronization directives: master,
8416 ordered and critical. All we need to do here is remove the entry
8417 and exit markers for REGION. */
8419 static void
8420 expand_omp_synch (struct omp_region *region)
8422 basic_block entry_bb, exit_bb;
8423 gimple_stmt_iterator si;
8425 entry_bb = region->entry;
8426 exit_bb = region->exit;
8428 si = gsi_last_nondebug_bb (entry_bb);
8429 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8430 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8431 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8432 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8433 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8434 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8435 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8436 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8438 expand_omp_taskreg (region);
8439 return;
8441 gsi_remove (&si, true);
8442 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8444 if (exit_bb)
8446 si = gsi_last_nondebug_bb (exit_bb);
8447 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8448 gsi_remove (&si, true);
8449 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8453 /* Translate enum omp_memory_order to enum memmodel. The two enums
8454 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8455 is 0. */
8457 static enum memmodel
8458 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8460 switch (mo)
8462 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8463 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8464 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8465 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8466 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8467 default: gcc_unreachable ();
8471 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8472 operation as a normal volatile load. */
8474 static bool
8475 expand_omp_atomic_load (basic_block load_bb, tree addr,
8476 tree loaded_val, int index)
8478 enum built_in_function tmpbase;
8479 gimple_stmt_iterator gsi;
8480 basic_block store_bb;
8481 location_t loc;
8482 gimple *stmt;
8483 tree decl, call, type, itype;
8485 gsi = gsi_last_nondebug_bb (load_bb);
8486 stmt = gsi_stmt (gsi);
8487 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8488 loc = gimple_location (stmt);
8490 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8491 is smaller than word size, then expand_atomic_load assumes that the load
8492 is atomic. We could avoid the builtin entirely in this case. */
8494 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8495 decl = builtin_decl_explicit (tmpbase);
8496 if (decl == NULL_TREE)
8497 return false;
8499 type = TREE_TYPE (loaded_val);
8500 itype = TREE_TYPE (TREE_TYPE (decl));
8502 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8503 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8504 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8505 if (!useless_type_conversion_p (type, itype))
8506 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8507 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8509 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8510 gsi_remove (&gsi, true);
8512 store_bb = single_succ (load_bb);
8513 gsi = gsi_last_nondebug_bb (store_bb);
8514 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8515 gsi_remove (&gsi, true);
8517 if (gimple_in_ssa_p (cfun))
8518 update_ssa (TODO_update_ssa_no_phi);
8520 return true;
8523 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8524 operation as a normal volatile store. */
8526 static bool
8527 expand_omp_atomic_store (basic_block load_bb, tree addr,
8528 tree loaded_val, tree stored_val, int index)
8530 enum built_in_function tmpbase;
8531 gimple_stmt_iterator gsi;
8532 basic_block store_bb = single_succ (load_bb);
8533 location_t loc;
8534 gimple *stmt;
8535 tree decl, call, type, itype;
8536 machine_mode imode;
8537 bool exchange;
8539 gsi = gsi_last_nondebug_bb (load_bb);
8540 stmt = gsi_stmt (gsi);
8541 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8543 /* If the load value is needed, then this isn't a store but an exchange. */
8544 exchange = gimple_omp_atomic_need_value_p (stmt);
8546 gsi = gsi_last_nondebug_bb (store_bb);
8547 stmt = gsi_stmt (gsi);
8548 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8549 loc = gimple_location (stmt);
8551 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8552 is smaller than word size, then expand_atomic_store assumes that the store
8553 is atomic. We could avoid the builtin entirely in this case. */
8555 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8556 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8557 decl = builtin_decl_explicit (tmpbase);
8558 if (decl == NULL_TREE)
8559 return false;
8561 type = TREE_TYPE (stored_val);
8563 /* Dig out the type of the function's second argument. */
8564 itype = TREE_TYPE (decl);
8565 itype = TYPE_ARG_TYPES (itype);
8566 itype = TREE_CHAIN (itype);
8567 itype = TREE_VALUE (itype);
8568 imode = TYPE_MODE (itype);
8570 if (exchange && !can_atomic_exchange_p (imode, true))
8571 return false;
8573 if (!useless_type_conversion_p (itype, type))
8574 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8575 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8576 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8577 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8578 if (exchange)
8580 if (!useless_type_conversion_p (type, itype))
8581 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8582 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8585 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8586 gsi_remove (&gsi, true);
8588 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8589 gsi = gsi_last_nondebug_bb (load_bb);
8590 gsi_remove (&gsi, true);
8592 if (gimple_in_ssa_p (cfun))
8593 update_ssa (TODO_update_ssa_no_phi);
8595 return true;
8598 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8599 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8600 size of the data type, and thus usable to find the index of the builtin
8601 decl. Returns false if the expression is not of the proper form. */
8603 static bool
8604 expand_omp_atomic_fetch_op (basic_block load_bb,
8605 tree addr, tree loaded_val,
8606 tree stored_val, int index)
8608 enum built_in_function oldbase, newbase, tmpbase;
8609 tree decl, itype, call;
8610 tree lhs, rhs;
8611 basic_block store_bb = single_succ (load_bb);
8612 gimple_stmt_iterator gsi;
8613 gimple *stmt;
8614 location_t loc;
8615 enum tree_code code;
8616 bool need_old, need_new;
8617 machine_mode imode;
8619 /* We expect to find the following sequences:
8621 load_bb:
8622 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8624 store_bb:
8625 val = tmp OP something; (or: something OP tmp)
8626 GIMPLE_OMP_STORE (val)
8628 ???FIXME: Allow a more flexible sequence.
8629 Perhaps use data flow to pick the statements.
8633 gsi = gsi_after_labels (store_bb);
8634 stmt = gsi_stmt (gsi);
8635 if (is_gimple_debug (stmt))
8637 gsi_next_nondebug (&gsi);
8638 if (gsi_end_p (gsi))
8639 return false;
8640 stmt = gsi_stmt (gsi);
8642 loc = gimple_location (stmt);
8643 if (!is_gimple_assign (stmt))
8644 return false;
8645 gsi_next_nondebug (&gsi);
8646 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8647 return false;
8648 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8649 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8650 enum omp_memory_order omo
8651 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8652 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8653 gcc_checking_assert (!need_old || !need_new);
8655 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8656 return false;
8658 /* Check for one of the supported fetch-op operations. */
8659 code = gimple_assign_rhs_code (stmt);
8660 switch (code)
8662 case PLUS_EXPR:
8663 case POINTER_PLUS_EXPR:
8664 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8665 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8666 break;
8667 case MINUS_EXPR:
8668 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8669 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8670 break;
8671 case BIT_AND_EXPR:
8672 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8673 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8674 break;
8675 case BIT_IOR_EXPR:
8676 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8677 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8678 break;
8679 case BIT_XOR_EXPR:
8680 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8681 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8682 break;
8683 default:
8684 return false;
8687 /* Make sure the expression is of the proper form. */
8688 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8689 rhs = gimple_assign_rhs2 (stmt);
8690 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8691 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8692 rhs = gimple_assign_rhs1 (stmt);
8693 else
8694 return false;
8696 tmpbase = ((enum built_in_function)
8697 ((need_new ? newbase : oldbase) + index + 1));
8698 decl = builtin_decl_explicit (tmpbase);
8699 if (decl == NULL_TREE)
8700 return false;
8701 itype = TREE_TYPE (TREE_TYPE (decl));
8702 imode = TYPE_MODE (itype);
8704 /* We could test all of the various optabs involved, but the fact of the
8705 matter is that (with the exception of i486 vs i586 and xadd) all targets
8706 that support any atomic operaton optab also implements compare-and-swap.
8707 Let optabs.c take care of expanding any compare-and-swap loop. */
8708 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8709 return false;
8711 gsi = gsi_last_nondebug_bb (load_bb);
8712 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8714 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8715 It only requires that the operation happen atomically. Thus we can
8716 use the RELAXED memory model. */
8717 call = build_call_expr_loc (loc, decl, 3, addr,
8718 fold_convert_loc (loc, itype, rhs),
8719 build_int_cst (NULL, mo));
8721 if (need_old || need_new)
8723 lhs = need_old ? loaded_val : stored_val;
8724 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8725 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8727 else
8728 call = fold_convert_loc (loc, void_type_node, call);
8729 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8730 gsi_remove (&gsi, true);
8732 gsi = gsi_last_nondebug_bb (store_bb);
8733 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8734 gsi_remove (&gsi, true);
8735 gsi = gsi_last_nondebug_bb (store_bb);
8736 stmt = gsi_stmt (gsi);
8737 gsi_remove (&gsi, true);
8739 if (gimple_in_ssa_p (cfun))
8741 release_defs (stmt);
8742 update_ssa (TODO_update_ssa_no_phi);
8745 return true;
8748 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8750 oldval = *addr;
8751 repeat:
8752 newval = rhs; // with oldval replacing *addr in rhs
8753 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8754 if (oldval != newval)
8755 goto repeat;
8757 INDEX is log2 of the size of the data type, and thus usable to find the
8758 index of the builtin decl. */
8760 static bool
8761 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8762 tree addr, tree loaded_val, tree stored_val,
8763 int index)
8765 tree loadedi, storedi, initial, new_storedi, old_vali;
8766 tree type, itype, cmpxchg, iaddr, atype;
8767 gimple_stmt_iterator si;
8768 basic_block loop_header = single_succ (load_bb);
8769 gimple *phi, *stmt;
8770 edge e;
8771 enum built_in_function fncode;
8773 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8774 order to use the RELAXED memory model effectively. */
8775 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8776 + index + 1);
8777 cmpxchg = builtin_decl_explicit (fncode);
8778 if (cmpxchg == NULL_TREE)
8779 return false;
8780 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8781 atype = type;
8782 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8784 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8785 || !can_atomic_load_p (TYPE_MODE (itype)))
8786 return false;
8788 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8789 si = gsi_last_nondebug_bb (load_bb);
8790 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8792 /* For floating-point values, we'll need to view-convert them to integers
8793 so that we can perform the atomic compare and swap. Simplify the
8794 following code by always setting up the "i"ntegral variables. */
8795 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8797 tree iaddr_val;
8799 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8800 true));
8801 atype = itype;
8802 iaddr_val
8803 = force_gimple_operand_gsi (&si,
8804 fold_convert (TREE_TYPE (iaddr), addr),
8805 false, NULL_TREE, true, GSI_SAME_STMT);
8806 stmt = gimple_build_assign (iaddr, iaddr_val);
8807 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8808 loadedi = create_tmp_var (itype);
8809 if (gimple_in_ssa_p (cfun))
8810 loadedi = make_ssa_name (loadedi);
8812 else
8814 iaddr = addr;
8815 loadedi = loaded_val;
8818 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8819 tree loaddecl = builtin_decl_explicit (fncode);
8820 if (loaddecl)
8821 initial
8822 = fold_convert (atype,
8823 build_call_expr (loaddecl, 2, iaddr,
8824 build_int_cst (NULL_TREE,
8825 MEMMODEL_RELAXED)));
8826 else
8828 tree off
8829 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8830 true), 0);
8831 initial = build2 (MEM_REF, atype, iaddr, off);
8834 initial
8835 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8836 GSI_SAME_STMT);
8838 /* Move the value to the LOADEDI temporary. */
8839 if (gimple_in_ssa_p (cfun))
8841 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8842 phi = create_phi_node (loadedi, loop_header);
8843 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8844 initial);
8846 else
8847 gsi_insert_before (&si,
8848 gimple_build_assign (loadedi, initial),
8849 GSI_SAME_STMT);
8850 if (loadedi != loaded_val)
8852 gimple_stmt_iterator gsi2;
8853 tree x;
8855 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8856 gsi2 = gsi_start_bb (loop_header);
8857 if (gimple_in_ssa_p (cfun))
8859 gassign *stmt;
8860 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8861 true, GSI_SAME_STMT);
8862 stmt = gimple_build_assign (loaded_val, x);
8863 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8865 else
8867 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8868 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8869 true, GSI_SAME_STMT);
8872 gsi_remove (&si, true);
8874 si = gsi_last_nondebug_bb (store_bb);
8875 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8877 if (iaddr == addr)
8878 storedi = stored_val;
8879 else
8880 storedi
8881 = force_gimple_operand_gsi (&si,
8882 build1 (VIEW_CONVERT_EXPR, itype,
8883 stored_val), true, NULL_TREE, true,
8884 GSI_SAME_STMT);
8886 /* Build the compare&swap statement. */
8887 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8888 new_storedi = force_gimple_operand_gsi (&si,
8889 fold_convert (TREE_TYPE (loadedi),
8890 new_storedi),
8891 true, NULL_TREE,
8892 true, GSI_SAME_STMT);
8894 if (gimple_in_ssa_p (cfun))
8895 old_vali = loadedi;
8896 else
8898 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8899 stmt = gimple_build_assign (old_vali, loadedi);
8900 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8902 stmt = gimple_build_assign (loadedi, new_storedi);
8903 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8906 /* Note that we always perform the comparison as an integer, even for
8907 floating point. This allows the atomic operation to properly
8908 succeed even with NaNs and -0.0. */
8909 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8910 stmt = gimple_build_cond_empty (ne);
8911 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8913 /* Update cfg. */
8914 e = single_succ_edge (store_bb);
8915 e->flags &= ~EDGE_FALLTHRU;
8916 e->flags |= EDGE_FALSE_VALUE;
8917 /* Expect no looping. */
8918 e->probability = profile_probability::guessed_always ();
8920 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8921 e->probability = profile_probability::guessed_never ();
8923 /* Copy the new value to loadedi (we already did that before the condition
8924 if we are not in SSA). */
8925 if (gimple_in_ssa_p (cfun))
8927 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8928 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8931 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8932 gsi_remove (&si, true);
8934 class loop *loop = alloc_loop ();
8935 loop->header = loop_header;
8936 loop->latch = store_bb;
8937 add_loop (loop, loop_header->loop_father);
8939 if (gimple_in_ssa_p (cfun))
8940 update_ssa (TODO_update_ssa_no_phi);
8942 return true;
8945 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8947 GOMP_atomic_start ();
8948 *addr = rhs;
8949 GOMP_atomic_end ();
8951 The result is not globally atomic, but works so long as all parallel
8952 references are within #pragma omp atomic directives. According to
8953 responses received from omp@openmp.org, appears to be within spec.
8954 Which makes sense, since that's how several other compilers handle
8955 this situation as well.
8956 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8957 expanding. STORED_VAL is the operand of the matching
8958 GIMPLE_OMP_ATOMIC_STORE.
8960 We replace
8961 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8962 loaded_val = *addr;
8964 and replace
8965 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8966 *addr = stored_val;
8969 static bool
8970 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8971 tree addr, tree loaded_val, tree stored_val)
8973 gimple_stmt_iterator si;
8974 gassign *stmt;
8975 tree t;
8977 si = gsi_last_nondebug_bb (load_bb);
8978 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8980 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8981 t = build_call_expr (t, 0);
8982 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8984 tree mem = build_simple_mem_ref (addr);
8985 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8986 TREE_OPERAND (mem, 1)
8987 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8988 true),
8989 TREE_OPERAND (mem, 1));
8990 stmt = gimple_build_assign (loaded_val, mem);
8991 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8992 gsi_remove (&si, true);
8994 si = gsi_last_nondebug_bb (store_bb);
8995 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8997 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
8998 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9000 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9001 t = build_call_expr (t, 0);
9002 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9003 gsi_remove (&si, true);
9005 if (gimple_in_ssa_p (cfun))
9006 update_ssa (TODO_update_ssa_no_phi);
9007 return true;
9010 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9011 using expand_omp_atomic_fetch_op. If it failed, we try to
9012 call expand_omp_atomic_pipeline, and if it fails too, the
9013 ultimate fallback is wrapping the operation in a mutex
9014 (expand_omp_atomic_mutex). REGION is the atomic region built
9015 by build_omp_regions_1(). */
9017 static void
9018 expand_omp_atomic (struct omp_region *region)
9020 basic_block load_bb = region->entry, store_bb = region->exit;
9021 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9022 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9023 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9024 tree addr = gimple_omp_atomic_load_rhs (load);
9025 tree stored_val = gimple_omp_atomic_store_val (store);
9026 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9027 HOST_WIDE_INT index;
9029 /* Make sure the type is one of the supported sizes. */
9030 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9031 index = exact_log2 (index);
9032 if (index >= 0 && index <= 4)
9034 unsigned int align = TYPE_ALIGN_UNIT (type);
9036 /* __sync builtins require strict data alignment. */
9037 if (exact_log2 (align) >= index)
9039 /* Atomic load. */
9040 scalar_mode smode;
9041 if (loaded_val == stored_val
9042 && (is_int_mode (TYPE_MODE (type), &smode)
9043 || is_float_mode (TYPE_MODE (type), &smode))
9044 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9045 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9046 return;
9048 /* Atomic store. */
9049 if ((is_int_mode (TYPE_MODE (type), &smode)
9050 || is_float_mode (TYPE_MODE (type), &smode))
9051 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9052 && store_bb == single_succ (load_bb)
9053 && first_stmt (store_bb) == store
9054 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9055 stored_val, index))
9056 return;
9058 /* When possible, use specialized atomic update functions. */
9059 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9060 && store_bb == single_succ (load_bb)
9061 && expand_omp_atomic_fetch_op (load_bb, addr,
9062 loaded_val, stored_val, index))
9063 return;
9065 /* If we don't have specialized __sync builtins, try and implement
9066 as a compare and swap loop. */
9067 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9068 loaded_val, stored_val, index))
9069 return;
9073 /* The ultimate fallback is wrapping the operation in a mutex. */
9074 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9077 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9078 at REGION_EXIT. */
9080 static void
9081 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9082 basic_block region_exit)
9084 class loop *outer = region_entry->loop_father;
9085 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9087 /* Don't parallelize the kernels region if it contains more than one outer
9088 loop. */
9089 unsigned int nr_outer_loops = 0;
9090 class loop *single_outer = NULL;
9091 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9093 gcc_assert (loop_outer (loop) == outer);
9095 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9096 continue;
9098 if (region_exit != NULL
9099 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9100 continue;
9102 nr_outer_loops++;
9103 single_outer = loop;
9105 if (nr_outer_loops != 1)
9106 return;
9108 for (class loop *loop = single_outer->inner;
9109 loop != NULL;
9110 loop = loop->inner)
9111 if (loop->next)
9112 return;
9114 /* Mark the loops in the region. */
9115 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9116 loop->in_oacc_kernels_region = true;
9119 /* Build target argument identifier from the DEVICE identifier, value
9120 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9122 static tree
9123 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9125 tree t = build_int_cst (integer_type_node, device);
9126 if (subseqent_param)
9127 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9128 build_int_cst (integer_type_node,
9129 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9130 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9131 build_int_cst (integer_type_node, id));
9132 return t;
9135 /* Like above but return it in type that can be directly stored as an element
9136 of the argument array. */
9138 static tree
9139 get_target_argument_identifier (int device, bool subseqent_param, int id)
9141 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9142 return fold_convert (ptr_type_node, t);
9145 /* Return a target argument consisting of DEVICE identifier, value identifier
9146 ID, and the actual VALUE. */
9148 static tree
9149 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9150 tree value)
9152 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9153 fold_convert (integer_type_node, value),
9154 build_int_cst (unsigned_type_node,
9155 GOMP_TARGET_ARG_VALUE_SHIFT));
9156 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9157 get_target_argument_identifier_1 (device, false, id));
9158 t = fold_convert (ptr_type_node, t);
9159 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9162 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9163 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9164 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9165 arguments. */
9167 static void
9168 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9169 int id, tree value, vec <tree> *args)
9171 if (tree_fits_shwi_p (value)
9172 && tree_to_shwi (value) > -(1 << 15)
9173 && tree_to_shwi (value) < (1 << 15))
9174 args->quick_push (get_target_argument_value (gsi, device, id, value));
9175 else
9177 args->quick_push (get_target_argument_identifier (device, true, id));
9178 value = fold_convert (ptr_type_node, value);
9179 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9180 GSI_SAME_STMT);
9181 args->quick_push (value);
9185 /* Create an array of arguments that is then passed to GOMP_target. */
9187 static tree
9188 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9190 auto_vec <tree, 6> args;
9191 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9192 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9193 if (c)
9194 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9195 else
9196 t = integer_minus_one_node;
9197 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9198 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9200 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9201 if (c)
9202 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9203 else
9204 t = integer_minus_one_node;
9205 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9206 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9207 &args);
9209 /* Produce more, perhaps device specific, arguments here. */
9211 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9212 args.length () + 1),
9213 ".omp_target_args");
9214 for (unsigned i = 0; i < args.length (); i++)
9216 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9217 build_int_cst (integer_type_node, i),
9218 NULL_TREE, NULL_TREE);
9219 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9220 GSI_SAME_STMT);
9222 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9223 build_int_cst (integer_type_node, args.length ()),
9224 NULL_TREE, NULL_TREE);
9225 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9226 GSI_SAME_STMT);
9227 TREE_ADDRESSABLE (argarray) = 1;
9228 return build_fold_addr_expr (argarray);
9231 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9233 static void
9234 expand_omp_target (struct omp_region *region)
9236 basic_block entry_bb, exit_bb, new_bb;
9237 struct function *child_cfun;
9238 tree child_fn, block, t;
9239 gimple_stmt_iterator gsi;
9240 gomp_target *entry_stmt;
9241 gimple *stmt;
9242 edge e;
9243 bool offloaded, data_region;
9244 int target_kind;
9246 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9247 target_kind = gimple_omp_target_kind (entry_stmt);
9248 new_bb = region->entry;
9250 offloaded = is_gimple_omp_offloaded (entry_stmt);
9251 switch (target_kind)
9253 case GF_OMP_TARGET_KIND_REGION:
9254 case GF_OMP_TARGET_KIND_UPDATE:
9255 case GF_OMP_TARGET_KIND_ENTER_DATA:
9256 case GF_OMP_TARGET_KIND_EXIT_DATA:
9257 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9258 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9259 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9260 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9261 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9262 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9263 data_region = false;
9264 break;
9265 case GF_OMP_TARGET_KIND_DATA:
9266 case GF_OMP_TARGET_KIND_OACC_DATA:
9267 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9268 data_region = true;
9269 break;
9270 default:
9271 gcc_unreachable ();
9274 child_fn = NULL_TREE;
9275 child_cfun = NULL;
9276 if (offloaded)
9278 child_fn = gimple_omp_target_child_fn (entry_stmt);
9279 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9282 /* Supported by expand_omp_taskreg, but not here. */
9283 if (child_cfun != NULL)
9284 gcc_checking_assert (!child_cfun->cfg);
9285 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9287 entry_bb = region->entry;
9288 exit_bb = region->exit;
9290 switch (target_kind)
9292 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9293 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9295 /* Further down, all OpenACC compute constructs will be mapped to
9296 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9297 is an "oacc kernels" attribute set for OpenACC kernels. */
9298 DECL_ATTRIBUTES (child_fn)
9299 = tree_cons (get_identifier ("oacc kernels"),
9300 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9301 break;
9302 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9303 /* Further down, all OpenACC compute constructs will be mapped to
9304 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9305 is an "oacc serial" attribute set for OpenACC serial. */
9306 DECL_ATTRIBUTES (child_fn)
9307 = tree_cons (get_identifier ("oacc serial"),
9308 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9309 break;
9310 default:
9311 break;
9314 if (offloaded)
9316 unsigned srcidx, dstidx, num;
9318 /* If the offloading region needs data sent from the parent
9319 function, then the very first statement (except possible
9320 tree profile counter updates) of the offloading body
9321 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9322 &.OMP_DATA_O is passed as an argument to the child function,
9323 we need to replace it with the argument as seen by the child
9324 function.
9326 In most cases, this will end up being the identity assignment
9327 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9328 a function call that has been inlined, the original PARM_DECL
9329 .OMP_DATA_I may have been converted into a different local
9330 variable. In which case, we need to keep the assignment. */
9331 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9332 if (data_arg)
9334 basic_block entry_succ_bb = single_succ (entry_bb);
9335 gimple_stmt_iterator gsi;
9336 tree arg;
9337 gimple *tgtcopy_stmt = NULL;
9338 tree sender = TREE_VEC_ELT (data_arg, 0);
9340 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9342 gcc_assert (!gsi_end_p (gsi));
9343 stmt = gsi_stmt (gsi);
9344 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9345 continue;
9347 if (gimple_num_ops (stmt) == 2)
9349 tree arg = gimple_assign_rhs1 (stmt);
9351 /* We're ignoring the subcode because we're
9352 effectively doing a STRIP_NOPS. */
9354 if (TREE_CODE (arg) == ADDR_EXPR
9355 && TREE_OPERAND (arg, 0) == sender)
9357 tgtcopy_stmt = stmt;
9358 break;
9363 gcc_assert (tgtcopy_stmt != NULL);
9364 arg = DECL_ARGUMENTS (child_fn);
9366 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9367 gsi_remove (&gsi, true);
9370 /* Declare local variables needed in CHILD_CFUN. */
9371 block = DECL_INITIAL (child_fn);
9372 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9373 /* The gimplifier could record temporaries in the offloading block
9374 rather than in containing function's local_decls chain,
9375 which would mean cgraph missed finalizing them. Do it now. */
9376 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9377 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9378 varpool_node::finalize_decl (t);
9379 DECL_SAVED_TREE (child_fn) = NULL;
9380 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9381 gimple_set_body (child_fn, NULL);
9382 TREE_USED (block) = 1;
9384 /* Reset DECL_CONTEXT on function arguments. */
9385 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9386 DECL_CONTEXT (t) = child_fn;
9388 /* Split ENTRY_BB at GIMPLE_*,
9389 so that it can be moved to the child function. */
9390 gsi = gsi_last_nondebug_bb (entry_bb);
9391 stmt = gsi_stmt (gsi);
9392 gcc_assert (stmt
9393 && gimple_code (stmt) == gimple_code (entry_stmt));
9394 e = split_block (entry_bb, stmt);
9395 gsi_remove (&gsi, true);
9396 entry_bb = e->dest;
9397 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9399 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9400 if (exit_bb)
9402 gsi = gsi_last_nondebug_bb (exit_bb);
9403 gcc_assert (!gsi_end_p (gsi)
9404 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9405 stmt = gimple_build_return (NULL);
9406 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9407 gsi_remove (&gsi, true);
9410 /* Move the offloading region into CHILD_CFUN. */
9412 block = gimple_block (entry_stmt);
9414 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9415 if (exit_bb)
9416 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9417 /* When the OMP expansion process cannot guarantee an up-to-date
9418 loop tree arrange for the child function to fixup loops. */
9419 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9420 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9422 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9423 num = vec_safe_length (child_cfun->local_decls);
9424 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9426 t = (*child_cfun->local_decls)[srcidx];
9427 if (DECL_CONTEXT (t) == cfun->decl)
9428 continue;
9429 if (srcidx != dstidx)
9430 (*child_cfun->local_decls)[dstidx] = t;
9431 dstidx++;
9433 if (dstidx != num)
9434 vec_safe_truncate (child_cfun->local_decls, dstidx);
9436 /* Inform the callgraph about the new function. */
9437 child_cfun->curr_properties = cfun->curr_properties;
9438 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9439 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9440 cgraph_node *node = cgraph_node::get_create (child_fn);
9441 node->parallelized_function = 1;
9442 cgraph_node::add_new_function (child_fn, true);
9444 /* Add the new function to the offload table. */
9445 if (ENABLE_OFFLOADING)
9447 if (in_lto_p)
9448 DECL_PRESERVE_P (child_fn) = 1;
9449 vec_safe_push (offload_funcs, child_fn);
9452 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9453 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9455 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9456 fixed in a following pass. */
9457 push_cfun (child_cfun);
9458 if (need_asm)
9459 assign_assembler_name_if_needed (child_fn);
9460 cgraph_edge::rebuild_edges ();
9462 /* Some EH regions might become dead, see PR34608. If
9463 pass_cleanup_cfg isn't the first pass to happen with the
9464 new child, these dead EH edges might cause problems.
9465 Clean them up now. */
9466 if (flag_exceptions)
9468 basic_block bb;
9469 bool changed = false;
9471 FOR_EACH_BB_FN (bb, cfun)
9472 changed |= gimple_purge_dead_eh_edges (bb);
9473 if (changed)
9474 cleanup_tree_cfg ();
9476 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9477 verify_loop_structure ();
9478 pop_cfun ();
9480 if (dump_file && !gimple_in_ssa_p (cfun))
9482 omp_any_child_fn_dumped = true;
9483 dump_function_header (dump_file, child_fn, dump_flags);
9484 dump_function_to_file (child_fn, dump_file, dump_flags);
9487 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9490 /* Emit a library call to launch the offloading region, or do data
9491 transfers. */
9492 tree t1, t2, t3, t4, depend, c, clauses;
9493 enum built_in_function start_ix;
9494 unsigned int flags_i = 0;
9496 switch (gimple_omp_target_kind (entry_stmt))
9498 case GF_OMP_TARGET_KIND_REGION:
9499 start_ix = BUILT_IN_GOMP_TARGET;
9500 break;
9501 case GF_OMP_TARGET_KIND_DATA:
9502 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9503 break;
9504 case GF_OMP_TARGET_KIND_UPDATE:
9505 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9506 break;
9507 case GF_OMP_TARGET_KIND_ENTER_DATA:
9508 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9509 break;
9510 case GF_OMP_TARGET_KIND_EXIT_DATA:
9511 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9512 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9513 break;
9514 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9515 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9516 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9517 start_ix = BUILT_IN_GOACC_PARALLEL;
9518 break;
9519 case GF_OMP_TARGET_KIND_OACC_DATA:
9520 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9521 start_ix = BUILT_IN_GOACC_DATA_START;
9522 break;
9523 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9524 start_ix = BUILT_IN_GOACC_UPDATE;
9525 break;
9526 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9527 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9528 break;
9529 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9530 start_ix = BUILT_IN_GOACC_DECLARE;
9531 break;
9532 default:
9533 gcc_unreachable ();
9536 clauses = gimple_omp_target_clauses (entry_stmt);
9538 tree device = NULL_TREE;
9539 location_t device_loc = UNKNOWN_LOCATION;
9540 tree goacc_flags = NULL_TREE;
9541 if (is_gimple_omp_oacc (entry_stmt))
9543 /* By default, no GOACC_FLAGs are set. */
9544 goacc_flags = integer_zero_node;
9546 else
9548 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9549 if (c)
9551 device = OMP_CLAUSE_DEVICE_ID (c);
9552 device_loc = OMP_CLAUSE_LOCATION (c);
9554 else
9556 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9557 library choose). */
9558 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9559 device_loc = gimple_location (entry_stmt);
9562 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9563 if (c)
9564 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9567 /* By default, there is no conditional. */
9568 tree cond = NULL_TREE;
9569 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9570 if (c)
9571 cond = OMP_CLAUSE_IF_EXPR (c);
9572 /* If we found the clause 'if (cond)', build:
9573 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9574 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9575 if (cond)
9577 tree *tp;
9578 if (is_gimple_omp_oacc (entry_stmt))
9579 tp = &goacc_flags;
9580 else
9582 /* Ensure 'device' is of the correct type. */
9583 device = fold_convert_loc (device_loc, integer_type_node, device);
9585 tp = &device;
9588 cond = gimple_boolify (cond);
9590 basic_block cond_bb, then_bb, else_bb;
9591 edge e;
9592 tree tmp_var;
9594 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9595 if (offloaded)
9596 e = split_block_after_labels (new_bb);
9597 else
9599 gsi = gsi_last_nondebug_bb (new_bb);
9600 gsi_prev (&gsi);
9601 e = split_block (new_bb, gsi_stmt (gsi));
9603 cond_bb = e->src;
9604 new_bb = e->dest;
9605 remove_edge (e);
9607 then_bb = create_empty_bb (cond_bb);
9608 else_bb = create_empty_bb (then_bb);
9609 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9610 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9612 stmt = gimple_build_cond_empty (cond);
9613 gsi = gsi_last_bb (cond_bb);
9614 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9616 gsi = gsi_start_bb (then_bb);
9617 stmt = gimple_build_assign (tmp_var, *tp);
9618 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9620 gsi = gsi_start_bb (else_bb);
9621 if (is_gimple_omp_oacc (entry_stmt))
9622 stmt = gimple_build_assign (tmp_var,
9623 BIT_IOR_EXPR,
9624 *tp,
9625 build_int_cst (integer_type_node,
9626 GOACC_FLAG_HOST_FALLBACK));
9627 else
9628 stmt = gimple_build_assign (tmp_var,
9629 build_int_cst (integer_type_node,
9630 GOMP_DEVICE_HOST_FALLBACK));
9631 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9633 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9634 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9635 add_bb_to_loop (then_bb, cond_bb->loop_father);
9636 add_bb_to_loop (else_bb, cond_bb->loop_father);
9637 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9638 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9640 *tp = tmp_var;
9642 gsi = gsi_last_nondebug_bb (new_bb);
9644 else
9646 gsi = gsi_last_nondebug_bb (new_bb);
9648 if (device != NULL_TREE)
9649 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9650 true, GSI_SAME_STMT);
9653 t = gimple_omp_target_data_arg (entry_stmt);
9654 if (t == NULL)
9656 t1 = size_zero_node;
9657 t2 = build_zero_cst (ptr_type_node);
9658 t3 = t2;
9659 t4 = t2;
9661 else
9663 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9664 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9665 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9666 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9667 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9670 gimple *g;
9671 bool tagging = false;
9672 /* The maximum number used by any start_ix, without varargs. */
9673 auto_vec<tree, 11> args;
9674 if (is_gimple_omp_oacc (entry_stmt))
9676 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9677 TREE_TYPE (goacc_flags), goacc_flags);
9678 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9679 NULL_TREE, true,
9680 GSI_SAME_STMT);
9681 args.quick_push (goacc_flags_m);
9683 else
9684 args.quick_push (device);
9685 if (offloaded)
9686 args.quick_push (build_fold_addr_expr (child_fn));
9687 args.quick_push (t1);
9688 args.quick_push (t2);
9689 args.quick_push (t3);
9690 args.quick_push (t4);
9691 switch (start_ix)
9693 case BUILT_IN_GOACC_DATA_START:
9694 case BUILT_IN_GOACC_DECLARE:
9695 case BUILT_IN_GOMP_TARGET_DATA:
9696 break;
9697 case BUILT_IN_GOMP_TARGET:
9698 case BUILT_IN_GOMP_TARGET_UPDATE:
9699 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9700 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9701 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9702 if (c)
9703 depend = OMP_CLAUSE_DECL (c);
9704 else
9705 depend = build_int_cst (ptr_type_node, 0);
9706 args.quick_push (depend);
9707 if (start_ix == BUILT_IN_GOMP_TARGET)
9708 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9709 break;
9710 case BUILT_IN_GOACC_PARALLEL:
9711 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9713 tree dims = NULL_TREE;
9714 unsigned int ix;
9716 /* For serial constructs we set all dimensions to 1. */
9717 for (ix = GOMP_DIM_MAX; ix--;)
9718 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9719 oacc_replace_fn_attrib (child_fn, dims);
9721 else
9722 oacc_set_fn_attrib (child_fn, clauses, &args);
9723 tagging = true;
9724 /* FALLTHRU */
9725 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9726 case BUILT_IN_GOACC_UPDATE:
9728 tree t_async = NULL_TREE;
9730 /* If present, use the value specified by the respective
9731 clause, making sure that is of the correct type. */
9732 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9733 if (c)
9734 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9735 integer_type_node,
9736 OMP_CLAUSE_ASYNC_EXPR (c));
9737 else if (!tagging)
9738 /* Default values for t_async. */
9739 t_async = fold_convert_loc (gimple_location (entry_stmt),
9740 integer_type_node,
9741 build_int_cst (integer_type_node,
9742 GOMP_ASYNC_SYNC));
9743 if (tagging && t_async)
9745 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9747 if (TREE_CODE (t_async) == INTEGER_CST)
9749 /* See if we can pack the async arg in to the tag's
9750 operand. */
9751 i_async = TREE_INT_CST_LOW (t_async);
9752 if (i_async < GOMP_LAUNCH_OP_MAX)
9753 t_async = NULL_TREE;
9754 else
9755 i_async = GOMP_LAUNCH_OP_MAX;
9757 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9758 i_async));
9760 if (t_async)
9761 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9762 NULL_TREE, true,
9763 GSI_SAME_STMT));
9765 /* Save the argument index, and ... */
9766 unsigned t_wait_idx = args.length ();
9767 unsigned num_waits = 0;
9768 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9769 if (!tagging || c)
9770 /* ... push a placeholder. */
9771 args.safe_push (integer_zero_node);
9773 for (; c; c = OMP_CLAUSE_CHAIN (c))
9774 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9776 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9777 integer_type_node,
9778 OMP_CLAUSE_WAIT_EXPR (c));
9779 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9780 GSI_SAME_STMT);
9781 args.safe_push (arg);
9782 num_waits++;
9785 if (!tagging || num_waits)
9787 tree len;
9789 /* Now that we know the number, update the placeholder. */
9790 if (tagging)
9791 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9792 else
9793 len = build_int_cst (integer_type_node, num_waits);
9794 len = fold_convert_loc (gimple_location (entry_stmt),
9795 unsigned_type_node, len);
9796 args[t_wait_idx] = len;
9799 break;
9800 default:
9801 gcc_unreachable ();
9803 if (tagging)
9804 /* Push terminal marker - zero. */
9805 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9807 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9808 gimple_set_location (g, gimple_location (entry_stmt));
9809 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9810 if (!offloaded)
9812 g = gsi_stmt (gsi);
9813 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9814 gsi_remove (&gsi, true);
9816 if (data_region && region->exit)
9818 gsi = gsi_last_nondebug_bb (region->exit);
9819 g = gsi_stmt (gsi);
9820 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9821 gsi_remove (&gsi, true);
9825 /* Expand the parallel region tree rooted at REGION. Expansion
9826 proceeds in depth-first order. Innermost regions are expanded
9827 first. This way, parallel regions that require a new function to
9828 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9829 internal dependencies in their body. */
9831 static void
9832 expand_omp (struct omp_region *region)
9834 omp_any_child_fn_dumped = false;
9835 while (region)
9837 location_t saved_location;
9838 gimple *inner_stmt = NULL;
9840 /* First, determine whether this is a combined parallel+workshare
9841 region. */
9842 if (region->type == GIMPLE_OMP_PARALLEL)
9843 determine_parallel_type (region);
9845 if (region->type == GIMPLE_OMP_FOR
9846 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9847 inner_stmt = last_stmt (region->inner->entry);
9849 if (region->inner)
9850 expand_omp (region->inner);
9852 saved_location = input_location;
9853 if (gimple_has_location (last_stmt (region->entry)))
9854 input_location = gimple_location (last_stmt (region->entry));
9856 switch (region->type)
9858 case GIMPLE_OMP_PARALLEL:
9859 case GIMPLE_OMP_TASK:
9860 expand_omp_taskreg (region);
9861 break;
9863 case GIMPLE_OMP_FOR:
9864 expand_omp_for (region, inner_stmt);
9865 break;
9867 case GIMPLE_OMP_SECTIONS:
9868 expand_omp_sections (region);
9869 break;
9871 case GIMPLE_OMP_SECTION:
9872 /* Individual omp sections are handled together with their
9873 parent GIMPLE_OMP_SECTIONS region. */
9874 break;
9876 case GIMPLE_OMP_SINGLE:
9877 expand_omp_single (region);
9878 break;
9880 case GIMPLE_OMP_ORDERED:
9882 gomp_ordered *ord_stmt
9883 = as_a <gomp_ordered *> (last_stmt (region->entry));
9884 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9885 OMP_CLAUSE_DEPEND))
9887 /* We'll expand these when expanding corresponding
9888 worksharing region with ordered(n) clause. */
9889 gcc_assert (region->outer
9890 && region->outer->type == GIMPLE_OMP_FOR);
9891 region->ord_stmt = ord_stmt;
9892 break;
9895 /* FALLTHRU */
9896 case GIMPLE_OMP_MASTER:
9897 case GIMPLE_OMP_TASKGROUP:
9898 case GIMPLE_OMP_CRITICAL:
9899 case GIMPLE_OMP_TEAMS:
9900 expand_omp_synch (region);
9901 break;
9903 case GIMPLE_OMP_ATOMIC_LOAD:
9904 expand_omp_atomic (region);
9905 break;
9907 case GIMPLE_OMP_TARGET:
9908 expand_omp_target (region);
9909 break;
9911 default:
9912 gcc_unreachable ();
9915 input_location = saved_location;
9916 region = region->next;
9918 if (omp_any_child_fn_dumped)
9920 if (dump_file)
9921 dump_function_header (dump_file, current_function_decl, dump_flags);
9922 omp_any_child_fn_dumped = false;
9926 /* Helper for build_omp_regions. Scan the dominator tree starting at
9927 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9928 true, the function ends once a single tree is built (otherwise, whole
9929 forest of OMP constructs may be built). */
9931 static void
9932 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9933 bool single_tree)
9935 gimple_stmt_iterator gsi;
9936 gimple *stmt;
9937 basic_block son;
9939 gsi = gsi_last_nondebug_bb (bb);
9940 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9942 struct omp_region *region;
9943 enum gimple_code code;
9945 stmt = gsi_stmt (gsi);
9946 code = gimple_code (stmt);
9947 if (code == GIMPLE_OMP_RETURN)
9949 /* STMT is the return point out of region PARENT. Mark it
9950 as the exit point and make PARENT the immediately
9951 enclosing region. */
9952 gcc_assert (parent);
9953 region = parent;
9954 region->exit = bb;
9955 parent = parent->outer;
9957 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9959 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9960 GIMPLE_OMP_RETURN, but matches with
9961 GIMPLE_OMP_ATOMIC_LOAD. */
9962 gcc_assert (parent);
9963 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9964 region = parent;
9965 region->exit = bb;
9966 parent = parent->outer;
9968 else if (code == GIMPLE_OMP_CONTINUE)
9970 gcc_assert (parent);
9971 parent->cont = bb;
9973 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9975 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9976 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9978 else
9980 region = new_omp_region (bb, code, parent);
9981 /* Otherwise... */
9982 if (code == GIMPLE_OMP_TARGET)
9984 switch (gimple_omp_target_kind (stmt))
9986 case GF_OMP_TARGET_KIND_REGION:
9987 case GF_OMP_TARGET_KIND_DATA:
9988 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9989 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9990 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9991 case GF_OMP_TARGET_KIND_OACC_DATA:
9992 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9993 break;
9994 case GF_OMP_TARGET_KIND_UPDATE:
9995 case GF_OMP_TARGET_KIND_ENTER_DATA:
9996 case GF_OMP_TARGET_KIND_EXIT_DATA:
9997 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9998 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9999 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10000 /* ..., other than for those stand-alone directives... */
10001 region = NULL;
10002 break;
10003 default:
10004 gcc_unreachable ();
10007 else if (code == GIMPLE_OMP_ORDERED
10008 && omp_find_clause (gimple_omp_ordered_clauses
10009 (as_a <gomp_ordered *> (stmt)),
10010 OMP_CLAUSE_DEPEND))
10011 /* #pragma omp ordered depend is also just a stand-alone
10012 directive. */
10013 region = NULL;
10014 else if (code == GIMPLE_OMP_TASK
10015 && gimple_omp_task_taskwait_p (stmt))
10016 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10017 region = NULL;
10018 /* ..., this directive becomes the parent for a new region. */
10019 if (region)
10020 parent = region;
10024 if (single_tree && !parent)
10025 return;
10027 for (son = first_dom_son (CDI_DOMINATORS, bb);
10028 son;
10029 son = next_dom_son (CDI_DOMINATORS, son))
10030 build_omp_regions_1 (son, parent, single_tree);
10033 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10034 root_omp_region. */
10036 static void
10037 build_omp_regions_root (basic_block root)
10039 gcc_assert (root_omp_region == NULL);
10040 build_omp_regions_1 (root, NULL, true);
10041 gcc_assert (root_omp_region != NULL);
10044 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10046 void
10047 omp_expand_local (basic_block head)
10049 build_omp_regions_root (head);
10050 if (dump_file && (dump_flags & TDF_DETAILS))
10052 fprintf (dump_file, "\nOMP region tree\n\n");
10053 dump_omp_region (dump_file, root_omp_region, 0);
10054 fprintf (dump_file, "\n");
10057 remove_exit_barriers (root_omp_region);
10058 expand_omp (root_omp_region);
10060 omp_free_regions ();
10063 /* Scan the CFG and build a tree of OMP regions. Return the root of
10064 the OMP region tree. */
10066 static void
10067 build_omp_regions (void)
10069 gcc_assert (root_omp_region == NULL);
10070 calculate_dominance_info (CDI_DOMINATORS);
10071 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10074 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10076 static unsigned int
10077 execute_expand_omp (void)
10079 build_omp_regions ();
10081 if (!root_omp_region)
10082 return 0;
10084 if (dump_file)
10086 fprintf (dump_file, "\nOMP region tree\n\n");
10087 dump_omp_region (dump_file, root_omp_region, 0);
10088 fprintf (dump_file, "\n");
10091 remove_exit_barriers (root_omp_region);
10093 expand_omp (root_omp_region);
10095 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10096 verify_loop_structure ();
10097 cleanup_tree_cfg ();
10099 omp_free_regions ();
10101 return 0;
10104 /* OMP expansion -- the default pass, run before creation of SSA form. */
10106 namespace {
10108 const pass_data pass_data_expand_omp =
10110 GIMPLE_PASS, /* type */
10111 "ompexp", /* name */
10112 OPTGROUP_OMP, /* optinfo_flags */
10113 TV_NONE, /* tv_id */
10114 PROP_gimple_any, /* properties_required */
10115 PROP_gimple_eomp, /* properties_provided */
10116 0, /* properties_destroyed */
10117 0, /* todo_flags_start */
10118 0, /* todo_flags_finish */
10121 class pass_expand_omp : public gimple_opt_pass
10123 public:
10124 pass_expand_omp (gcc::context *ctxt)
10125 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10128 /* opt_pass methods: */
10129 virtual unsigned int execute (function *)
10131 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10132 || flag_openmp_simd != 0)
10133 && !seen_error ());
10135 /* This pass always runs, to provide PROP_gimple_eomp.
10136 But often, there is nothing to do. */
10137 if (!gate)
10138 return 0;
10140 return execute_expand_omp ();
10143 }; // class pass_expand_omp
10145 } // anon namespace
10147 gimple_opt_pass *
10148 make_pass_expand_omp (gcc::context *ctxt)
10150 return new pass_expand_omp (ctxt);
10153 namespace {
10155 const pass_data pass_data_expand_omp_ssa =
10157 GIMPLE_PASS, /* type */
10158 "ompexpssa", /* name */
10159 OPTGROUP_OMP, /* optinfo_flags */
10160 TV_NONE, /* tv_id */
10161 PROP_cfg | PROP_ssa, /* properties_required */
10162 PROP_gimple_eomp, /* properties_provided */
10163 0, /* properties_destroyed */
10164 0, /* todo_flags_start */
10165 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10168 class pass_expand_omp_ssa : public gimple_opt_pass
10170 public:
10171 pass_expand_omp_ssa (gcc::context *ctxt)
10172 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10175 /* opt_pass methods: */
10176 virtual bool gate (function *fun)
10178 return !(fun->curr_properties & PROP_gimple_eomp);
10180 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10181 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10183 }; // class pass_expand_omp_ssa
10185 } // anon namespace
10187 gimple_opt_pass *
10188 make_pass_expand_omp_ssa (gcc::context *ctxt)
10190 return new pass_expand_omp_ssa (ctxt);
10193 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10194 GIMPLE_* codes. */
10196 bool
10197 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10198 int *region_idx)
10200 gimple *last = last_stmt (bb);
10201 enum gimple_code code = gimple_code (last);
10202 struct omp_region *cur_region = *region;
10203 bool fallthru = false;
10205 switch (code)
10207 case GIMPLE_OMP_PARALLEL:
10208 case GIMPLE_OMP_FOR:
10209 case GIMPLE_OMP_SINGLE:
10210 case GIMPLE_OMP_TEAMS:
10211 case GIMPLE_OMP_MASTER:
10212 case GIMPLE_OMP_TASKGROUP:
10213 case GIMPLE_OMP_CRITICAL:
10214 case GIMPLE_OMP_SECTION:
10215 cur_region = new_omp_region (bb, code, cur_region);
10216 fallthru = true;
10217 break;
10219 case GIMPLE_OMP_TASK:
10220 cur_region = new_omp_region (bb, code, cur_region);
10221 fallthru = true;
10222 if (gimple_omp_task_taskwait_p (last))
10223 cur_region = cur_region->outer;
10224 break;
10226 case GIMPLE_OMP_ORDERED:
10227 cur_region = new_omp_region (bb, code, cur_region);
10228 fallthru = true;
10229 if (omp_find_clause (gimple_omp_ordered_clauses
10230 (as_a <gomp_ordered *> (last)),
10231 OMP_CLAUSE_DEPEND))
10232 cur_region = cur_region->outer;
10233 break;
10235 case GIMPLE_OMP_TARGET:
10236 cur_region = new_omp_region (bb, code, cur_region);
10237 fallthru = true;
10238 switch (gimple_omp_target_kind (last))
10240 case GF_OMP_TARGET_KIND_REGION:
10241 case GF_OMP_TARGET_KIND_DATA:
10242 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10243 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10244 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10245 case GF_OMP_TARGET_KIND_OACC_DATA:
10246 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10247 break;
10248 case GF_OMP_TARGET_KIND_UPDATE:
10249 case GF_OMP_TARGET_KIND_ENTER_DATA:
10250 case GF_OMP_TARGET_KIND_EXIT_DATA:
10251 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10252 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10253 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10254 cur_region = cur_region->outer;
10255 break;
10256 default:
10257 gcc_unreachable ();
10259 break;
10261 case GIMPLE_OMP_SECTIONS:
10262 cur_region = new_omp_region (bb, code, cur_region);
10263 fallthru = true;
10264 break;
10266 case GIMPLE_OMP_SECTIONS_SWITCH:
10267 fallthru = false;
10268 break;
10270 case GIMPLE_OMP_ATOMIC_LOAD:
10271 case GIMPLE_OMP_ATOMIC_STORE:
10272 fallthru = true;
10273 break;
10275 case GIMPLE_OMP_RETURN:
10276 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10277 somewhere other than the next block. This will be
10278 created later. */
10279 cur_region->exit = bb;
10280 if (cur_region->type == GIMPLE_OMP_TASK)
10281 /* Add an edge corresponding to not scheduling the task
10282 immediately. */
10283 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10284 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10285 cur_region = cur_region->outer;
10286 break;
10288 case GIMPLE_OMP_CONTINUE:
10289 cur_region->cont = bb;
10290 switch (cur_region->type)
10292 case GIMPLE_OMP_FOR:
10293 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10294 succs edges as abnormal to prevent splitting
10295 them. */
10296 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10297 /* Make the loopback edge. */
10298 make_edge (bb, single_succ (cur_region->entry),
10299 EDGE_ABNORMAL);
10301 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10302 corresponds to the case that the body of the loop
10303 is not executed at all. */
10304 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10305 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10306 fallthru = false;
10307 break;
10309 case GIMPLE_OMP_SECTIONS:
10310 /* Wire up the edges into and out of the nested sections. */
10312 basic_block switch_bb = single_succ (cur_region->entry);
10314 struct omp_region *i;
10315 for (i = cur_region->inner; i ; i = i->next)
10317 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10318 make_edge (switch_bb, i->entry, 0);
10319 make_edge (i->exit, bb, EDGE_FALLTHRU);
10322 /* Make the loopback edge to the block with
10323 GIMPLE_OMP_SECTIONS_SWITCH. */
10324 make_edge (bb, switch_bb, 0);
10326 /* Make the edge from the switch to exit. */
10327 make_edge (switch_bb, bb->next_bb, 0);
10328 fallthru = false;
10330 break;
10332 case GIMPLE_OMP_TASK:
10333 fallthru = true;
10334 break;
10336 default:
10337 gcc_unreachable ();
10339 break;
10341 default:
10342 gcc_unreachable ();
10345 if (*region != cur_region)
10347 *region = cur_region;
10348 if (cur_region)
10349 *region_idx = cur_region->entry->index;
10350 else
10351 *region_idx = 0;
10354 return fallthru;