testsuite: Update scanning symbol sections to support AIX.
[official-gcc.git] / gcc / omp-expand.c
blobb731fd69b1e5b8025ff3f901b2e15bf9ee02ada6
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
120 /* Return true if REGION is a combined parallel+workshare region. */
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
125 return region->is_combined_parallel;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
142 Is lowered into:
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
201 return true;
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 struct omp_for_data fd;
238 tree n1, n2;
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
244 if (gimple_omp_for_combined_into_p (for_stmt))
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
268 if (fd.chunk_size)
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
275 return ws_args;
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
289 gcc_unreachable ();
292 /* Discover whether REGION is a combined parallel+workshare region. */
294 static void
295 determine_parallel_type (struct omp_region *region)
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
335 if (region->inner->type == GIMPLE_OMP_FOR)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
386 if (region->cont)
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
405 dump_omp_region (stderr, region, 0);
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr, root_omp_region, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
420 struct omp_region *region = XCNEW (struct omp_region);
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
426 if (parent)
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
433 else
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
441 return region;
444 /* Release the memory associated with the region tree rooted at REGION. */
446 static void
447 free_omp_region_1 (struct omp_region *region)
449 struct omp_region *i, *n;
451 for (i = region->inner; i ; i = n)
453 n = i->next;
454 free_omp_region_1 (i);
457 free (region);
460 /* Release the memory for the entire omp region tree. */
462 void
463 omp_free_regions (void)
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
468 n = r->next;
469 free_omp_region_1 (r);
471 root_omp_region = NULL;
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
477 static gcond *
478 gimple_build_cond_empty (tree cond)
480 enum tree_code pred_code;
481 tree lhs, rhs;
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
570 switch (region->inner->type)
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
631 else
632 clause_loc = gimple_location (entry_stmt);
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
645 cond = gimple_boolify (cond);
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
664 else
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 if (gimple_in_ssa_p (cfun))
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 val = tmp_join;
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
735 if (rtmp)
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 unsigned int iflags
767 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
768 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
769 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
771 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
772 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
773 tree num_tasks = NULL_TREE;
774 bool ull = false;
775 if (taskloop_p)
777 gimple *g = last_stmt (region->outer->entry);
778 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
780 struct omp_for_data fd;
781 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
782 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
783 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
784 OMP_CLAUSE__LOOPTEMP_);
785 startvar = OMP_CLAUSE_DECL (startvar);
786 endvar = OMP_CLAUSE_DECL (endvar);
787 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
788 if (fd.loop.cond_code == LT_EXPR)
789 iflags |= GOMP_TASK_FLAG_UP;
790 tree tclauses = gimple_omp_for_clauses (g);
791 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
792 if (num_tasks)
793 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
794 else
796 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
797 if (num_tasks)
799 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
800 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
802 else
803 num_tasks = integer_zero_node;
805 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
806 if (ifc == NULL_TREE)
807 iflags |= GOMP_TASK_FLAG_IF;
808 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
809 iflags |= GOMP_TASK_FLAG_NOGROUP;
810 ull = fd.iter_type == long_long_unsigned_type_node;
811 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
812 iflags |= GOMP_TASK_FLAG_REDUCTION;
814 else if (priority)
815 iflags |= GOMP_TASK_FLAG_PRIORITY;
817 tree flags = build_int_cst (unsigned_type_node, iflags);
819 tree cond = boolean_true_node;
820 if (ifc)
822 if (taskloop_p)
824 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
825 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
826 build_int_cst (unsigned_type_node,
827 GOMP_TASK_FLAG_IF),
828 build_int_cst (unsigned_type_node, 0));
829 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
830 flags, t);
832 else
833 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
836 if (finalc)
838 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_FINAL),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
845 if (depend)
846 depend = OMP_CLAUSE_DECL (depend);
847 else
848 depend = build_int_cst (ptr_type_node, 0);
849 if (priority)
850 priority = fold_convert (integer_type_node,
851 OMP_CLAUSE_PRIORITY_EXPR (priority));
852 else
853 priority = integer_zero_node;
855 gsi = gsi_last_nondebug_bb (bb);
856 tree t = gimple_omp_task_data_arg (entry_stmt);
857 if (t == NULL)
858 t2 = null_pointer_node;
859 else
860 t2 = build_fold_addr_expr_loc (loc, t);
861 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
862 t = gimple_omp_task_copy_fn (entry_stmt);
863 if (t == NULL)
864 t3 = null_pointer_node;
865 else
866 t3 = build_fold_addr_expr_loc (loc, t);
868 if (taskloop_p)
869 t = build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
872 11, t1, t2, t3,
873 gimple_omp_task_arg_size (entry_stmt),
874 gimple_omp_task_arg_align (entry_stmt), flags,
875 num_tasks, priority, startvar, endvar, step);
876 else
877 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
878 9, t1, t2, t3,
879 gimple_omp_task_arg_size (entry_stmt),
880 gimple_omp_task_arg_align (entry_stmt), cond, flags,
881 depend, priority);
883 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
889 code. */
891 static void
892 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
894 tree clauses = gimple_omp_task_clauses (entry_stmt);
895 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
896 if (depend == NULL_TREE)
897 return;
899 depend = OMP_CLAUSE_DECL (depend);
901 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
902 tree t
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
904 1, depend);
906 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
907 false, GSI_CONTINUE_LINKING);
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
914 static void
915 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
917 tree clauses = gimple_omp_teams_clauses (entry_stmt);
918 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
919 if (num_teams == NULL_TREE)
920 num_teams = build_int_cst (unsigned_type_node, 0);
921 else
923 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
924 num_teams = fold_convert (unsigned_type_node, num_teams);
926 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
927 if (thread_limit == NULL_TREE)
928 thread_limit = build_int_cst (unsigned_type_node, 0);
929 else
931 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
932 thread_limit = fold_convert (unsigned_type_node, thread_limit);
935 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
936 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
937 if (t == NULL)
938 t1 = null_pointer_node;
939 else
940 t1 = build_fold_addr_expr (t);
941 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
942 tree t2 = build_fold_addr_expr (child_fndecl);
944 vec<tree, va_gc> *args;
945 vec_alloc (args, 5);
946 args->quick_push (t2);
947 args->quick_push (t1);
948 args->quick_push (num_teams);
949 args->quick_push (thread_limit);
950 /* For future extensibility. */
951 args->quick_push (build_zero_cst (unsigned_type_node));
953 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
955 args);
957 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
958 false, GSI_CONTINUE_LINKING);
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
963 static tree
964 vec2chain (vec<tree, va_gc> *v)
966 tree chain = NULL_TREE, t;
967 unsigned ix;
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
971 DECL_CHAIN (t) = chain;
972 chain = t;
975 return chain;
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
982 removed. */
984 static void
985 remove_exit_barrier (struct omp_region *region)
987 gimple_stmt_iterator gsi;
988 basic_block exit_bb;
989 edge_iterator ei;
990 edge e;
991 gimple *stmt;
992 int any_addressable_vars = -1;
994 exit_bb = region->exit;
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
997 block at all. */
998 if (! exit_bb)
999 return;
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi = gsi_last_nondebug_bb (exit_bb);
1007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1008 gsi_prev_nondebug (&gsi);
1009 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1010 return;
1012 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1014 gsi = gsi_last_nondebug_bb (e->src);
1015 if (gsi_end_p (gsi))
1016 continue;
1017 stmt = gsi_stmt (gsi);
1018 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt))
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars < 0)
1031 gomp_parallel *parallel_stmt
1032 = as_a <gomp_parallel *> (last_stmt (region->entry));
1033 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1034 tree local_decls, block, decl;
1035 unsigned ix;
1037 any_addressable_vars = 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1039 if (TREE_ADDRESSABLE (decl))
1041 any_addressable_vars = 1;
1042 break;
1044 for (block = gimple_block (stmt);
1045 !any_addressable_vars
1046 && block
1047 && TREE_CODE (block) == BLOCK;
1048 block = BLOCK_SUPERCONTEXT (block))
1050 for (local_decls = BLOCK_VARS (block);
1051 local_decls;
1052 local_decls = DECL_CHAIN (local_decls))
1053 if (TREE_ADDRESSABLE (local_decls))
1055 any_addressable_vars = 1;
1056 break;
1058 if (block == gimple_block (parallel_stmt))
1059 break;
1062 if (!any_addressable_vars)
1063 gimple_omp_return_set_nowait (stmt);
1068 static void
1069 remove_exit_barriers (struct omp_region *region)
1071 if (region->type == GIMPLE_OMP_PARALLEL)
1072 remove_exit_barrier (region);
1074 if (region->inner)
1076 region = region->inner;
1077 remove_exit_barriers (region);
1078 while (region->next)
1080 region = region->next;
1081 remove_exit_barriers (region);
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1094 static void
1095 optimize_omp_library_calls (gimple *entry_stmt)
1097 basic_block bb;
1098 gimple_stmt_iterator gsi;
1099 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1100 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1101 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1102 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1103 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1105 OMP_CLAUSE_UNTIED) != NULL);
1107 FOR_EACH_BB_FN (bb, cfun)
1108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1110 gimple *call = gsi_stmt (gsi);
1111 tree decl;
1113 if (is_gimple_call (call)
1114 && (decl = gimple_call_fndecl (call))
1115 && DECL_EXTERNAL (decl)
1116 && TREE_PUBLIC (decl)
1117 && DECL_INITIAL (decl) == NULL)
1119 tree built_in;
1121 if (DECL_NAME (decl) == thr_num_id)
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1125 if (untied_task)
1126 continue;
1127 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1129 else if (DECL_NAME (decl) == num_thr_id)
1130 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 else
1132 continue;
1134 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1135 || gimple_call_num_args (call) != 0)
1136 continue;
1138 if (flag_exceptions && !TREE_NOTHROW (decl))
1139 continue;
1141 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1143 TREE_TYPE (TREE_TYPE (built_in))))
1144 continue;
1146 gimple_call_set_fndecl (call, built_in);
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1152 regimplified. */
1154 static tree
1155 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1157 tree t = *tp;
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1161 return t;
1163 if (TREE_CODE (t) == ADDR_EXPR)
1164 recompute_tree_invariant_for_addr_expr (t);
1166 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1167 return NULL_TREE;
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1172 static void
1173 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1174 bool after)
1176 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1177 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1178 !after, after ? GSI_CONTINUE_LINKING
1179 : GSI_SAME_STMT);
1180 gimple *stmt = gimple_build_assign (to, from);
1181 if (after)
1182 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1183 else
1184 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1185 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1186 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1189 gimple_regimplify_operands (stmt, &gsi);
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1195 static void
1196 expand_omp_taskreg (struct omp_region *region)
1198 basic_block entry_bb, exit_bb, new_bb;
1199 struct function *child_cfun;
1200 tree child_fn, block, t;
1201 gimple_stmt_iterator gsi;
1202 gimple *entry_stmt, *stmt;
1203 edge e;
1204 vec<tree, va_gc> *ws_args;
1206 entry_stmt = last_stmt (region->entry);
1207 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt))
1210 new_bb = region->entry;
1211 gsi = gsi_last_nondebug_bb (region->entry);
1212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1213 gsi_remove (&gsi, true);
1214 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1215 return;
1218 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1219 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1221 entry_bb = region->entry;
1222 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1223 exit_bb = region->cont;
1224 else
1225 exit_bb = region->exit;
1227 if (is_combined_parallel (region))
1228 ws_args = region->ws_args;
1229 else
1230 ws_args = NULL;
1232 if (child_cfun->cfg)
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e, exit_succ_e;
1239 entry_succ_e = single_succ_edge (entry_bb);
1241 gsi = gsi_last_nondebug_bb (entry_bb);
1242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1245 gsi_remove (&gsi, true);
1247 new_bb = entry_bb;
1248 if (exit_bb)
1250 exit_succ_e = single_succ_edge (exit_bb);
1251 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1253 remove_edge_and_dominated_blocks (entry_succ_e);
1255 else
1257 unsigned srcidx, dstidx, num;
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1265 function.
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt))
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1276 : FALLTHRU_EDGE (entry_bb)->dest;
1277 tree arg;
1278 gimple *parcopy_stmt = NULL;
1280 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1282 gimple *stmt;
1284 gcc_assert (!gsi_end_p (gsi));
1285 stmt = gsi_stmt (gsi);
1286 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1287 continue;
1289 if (gimple_num_ops (stmt) == 2)
1291 tree arg = gimple_assign_rhs1 (stmt);
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1296 if (TREE_CODE (arg) == ADDR_EXPR
1297 && (TREE_OPERAND (arg, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt)))
1300 parcopy_stmt = stmt;
1301 break;
1306 gcc_assert (parcopy_stmt != NULL);
1307 arg = DECL_ARGUMENTS (child_fn);
1309 if (!gimple_in_ssa_p (cfun))
1311 if (gimple_assign_lhs (parcopy_stmt) == arg)
1312 gsi_remove (&gsi, true);
1313 else
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1317 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1320 else
1322 tree lhs = gimple_assign_lhs (parcopy_stmt);
1323 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 update_stmt (parcopy_stmt);
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block = DECL_INITIAL (child_fn);
1337 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1342 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1343 varpool_node::finalize_decl (t);
1344 DECL_SAVED_TREE (child_fn) = NULL;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn, NULL);
1347 TREE_USED (block) = 1;
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1351 DECL_CONTEXT (t) = child_fn;
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi = gsi_last_nondebug_bb (entry_bb);
1356 stmt = gsi_stmt (gsi);
1357 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1360 e = split_block (entry_bb, stmt);
1361 gsi_remove (&gsi, true);
1362 entry_bb = e->dest;
1363 edge e2 = NULL;
1364 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1365 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1366 else
1368 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1369 gcc_assert (e2->dest == region->exit);
1370 remove_edge (BRANCH_EDGE (entry_bb));
1371 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1372 gsi = gsi_last_nondebug_bb (region->exit);
1373 gcc_assert (!gsi_end_p (gsi)
1374 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1375 gsi_remove (&gsi, true);
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1379 if (exit_bb)
1381 gsi = gsi_last_nondebug_bb (exit_bb);
1382 gcc_assert (!gsi_end_p (gsi)
1383 && (gimple_code (gsi_stmt (gsi))
1384 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1385 stmt = gimple_build_return (NULL);
1386 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1387 gsi_remove (&gsi, true);
1390 /* Move the parallel region into CHILD_CFUN. */
1392 if (gimple_in_ssa_p (cfun))
1394 init_tree_ssa (child_cfun);
1395 init_ssa_operands (child_cfun);
1396 child_cfun->gimple_df->in_ssa_p = true;
1397 block = NULL_TREE;
1399 else
1400 block = gimple_block (entry_stmt);
1402 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1403 if (exit_bb)
1404 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1405 if (e2)
1407 basic_block dest_bb = e2->dest;
1408 if (!exit_bb)
1409 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1410 remove_edge (e2);
1411 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1416 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num = vec_safe_length (child_cfun->local_decls);
1420 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1422 t = (*child_cfun->local_decls)[srcidx];
1423 if (DECL_CONTEXT (t) == cfun->decl)
1424 continue;
1425 if (srcidx != dstidx)
1426 (*child_cfun->local_decls)[dstidx] = t;
1427 dstidx++;
1429 if (dstidx != num)
1430 vec_safe_truncate (child_cfun->local_decls, dstidx);
1432 /* Inform the callgraph about the new function. */
1433 child_cfun->curr_properties = cfun->curr_properties;
1434 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1435 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1436 cgraph_node *node = cgraph_node::get_create (child_fn);
1437 node->parallelized_function = 1;
1438 cgraph_node::add_new_function (child_fn, true);
1440 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun);
1446 if (need_asm)
1447 assign_assembler_name_if_needed (child_fn);
1449 if (optimize)
1450 optimize_omp_library_calls (entry_stmt);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions)
1460 basic_block bb;
1461 bool changed = false;
1463 FOR_EACH_BB_FN (bb, cfun)
1464 changed |= gimple_purge_dead_eh_edges (bb);
1465 if (changed)
1466 cleanup_tree_cfg ();
1468 if (gimple_in_ssa_p (cfun))
1469 update_ssa (TODO_update_ssa);
1470 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1471 verify_loop_structure ();
1472 pop_cfun ();
1474 if (dump_file && !gimple_in_ssa_p (cfun))
1476 omp_any_child_fn_dumped = true;
1477 dump_function_header (dump_file, child_fn, dump_flags);
1478 dump_function_to_file (child_fn, dump_file, dump_flags);
1482 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1484 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1485 expand_parallel_call (region, new_bb,
1486 as_a <gomp_parallel *> (entry_stmt), ws_args);
1487 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1488 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1489 else
1490 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1491 if (gimple_in_ssa_p (cfun))
1492 update_ssa (TODO_update_ssa_only_virtuals);
1495 /* Information about members of an OpenACC collapsed loop nest. */
1497 struct oacc_collapse
1499 tree base; /* Base value. */
1500 tree iters; /* Number of steps. */
1501 tree step; /* Step size. */
1502 tree tile; /* Tile increment (if tiled). */
1503 tree outer; /* Tile iterator var. */
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1510 static tree
1511 expand_oacc_collapse_init (const struct omp_for_data *fd,
1512 gimple_stmt_iterator *gsi,
1513 oacc_collapse *counts, tree bound_type,
1514 location_t loc)
1516 tree tiling = fd->tiling;
1517 tree total = build_int_cst (bound_type, 1);
1518 int ix;
1520 gcc_assert (integer_onep (fd->loop.step));
1521 gcc_assert (integer_zerop (fd->loop.n1));
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix = fd->collapse; ix--;)
1528 const omp_for_data_loop *loop = &fd->loops[ix];
1530 tree iter_type = TREE_TYPE (loop->v);
1531 tree diff_type = iter_type;
1532 tree plus_type = iter_type;
1534 gcc_assert (loop->cond_code == fd->loop.cond_code);
1536 if (POINTER_TYPE_P (iter_type))
1537 plus_type = sizetype;
1538 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1539 diff_type = signed_type_for (diff_type);
1540 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1541 diff_type = integer_type_node;
1543 if (tiling)
1545 tree num = build_int_cst (integer_type_node, fd->collapse);
1546 tree loop_no = build_int_cst (integer_type_node, ix);
1547 tree tile = TREE_VALUE (tiling);
1548 gcall *call
1549 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1550 /* gwv-outer=*/integer_zero_node,
1551 /* gwv-inner=*/integer_zero_node);
1553 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1554 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1555 gimple_call_set_lhs (call, counts[ix].tile);
1556 gimple_set_location (call, loc);
1557 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1559 tiling = TREE_CHAIN (tiling);
1561 else
1563 counts[ix].tile = NULL;
1564 counts[ix].outer = loop->v;
1567 tree b = loop->n1;
1568 tree e = loop->n2;
1569 tree s = loop->step;
1570 bool up = loop->cond_code == LT_EXPR;
1571 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1572 bool negating;
1573 tree expr;
1575 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1576 true, GSI_SAME_STMT);
1577 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1578 true, GSI_SAME_STMT);
1580 /* Convert the step, avoiding possible unsigned->signed overflow. */
1581 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1582 if (negating)
1583 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1584 s = fold_convert (diff_type, s);
1585 if (negating)
1586 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1587 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1590 /* Determine the range, avoiding possible unsigned->signed overflow. */
1591 negating = !up && TYPE_UNSIGNED (iter_type);
1592 expr = fold_build2 (MINUS_EXPR, plus_type,
1593 fold_convert (plus_type, negating ? b : e),
1594 fold_convert (plus_type, negating ? e : b));
1595 expr = fold_convert (diff_type, expr);
1596 if (negating)
1597 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1598 tree range = force_gimple_operand_gsi
1599 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1601 /* Determine number of iterations. */
1602 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1603 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1604 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1606 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1609 counts[ix].base = b;
1610 counts[ix].iters = iters;
1611 counts[ix].step = s;
1613 total = fold_build2 (MULT_EXPR, bound_type, total,
1614 fold_convert (bound_type, iters));
1617 return total;
1620 /* Emit initializers for collapsed loop members. INNER is true if
1621 this is for the element loop of a TILE. IVAR is the outer
1622 loop iteration variable, from which collapsed loop iteration values
1623 are calculated. COUNTS array has been initialized by
1624 expand_oacc_collapse_inits. */
1626 static void
1627 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1628 gimple_stmt_iterator *gsi,
1629 const oacc_collapse *counts, tree ivar)
1631 tree ivar_type = TREE_TYPE (ivar);
1633 /* The most rapidly changing iteration variable is the innermost
1634 one. */
1635 for (int ix = fd->collapse; ix--;)
1637 const omp_for_data_loop *loop = &fd->loops[ix];
1638 const oacc_collapse *collapse = &counts[ix];
1639 tree v = inner ? loop->v : collapse->outer;
1640 tree iter_type = TREE_TYPE (v);
1641 tree diff_type = TREE_TYPE (collapse->step);
1642 tree plus_type = iter_type;
1643 enum tree_code plus_code = PLUS_EXPR;
1644 tree expr;
1646 if (POINTER_TYPE_P (iter_type))
1648 plus_code = POINTER_PLUS_EXPR;
1649 plus_type = sizetype;
1652 expr = ivar;
1653 if (ix)
1655 tree mod = fold_convert (ivar_type, collapse->iters);
1656 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1657 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1658 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1659 true, GSI_SAME_STMT);
1662 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1663 collapse->step);
1664 expr = fold_build2 (plus_code, iter_type,
1665 inner ? collapse->outer : collapse->base,
1666 fold_convert (plus_type, expr));
1667 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1668 true, GSI_SAME_STMT);
1669 gassign *ass = gimple_build_assign (v, expr);
1670 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1674 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1675 of the combined collapse > 1 loop constructs, generate code like:
1676 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1677 if (cond3 is <)
1678 adj = STEP3 - 1;
1679 else
1680 adj = STEP3 + 1;
1681 count3 = (adj + N32 - N31) / STEP3;
1682 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1683 if (cond2 is <)
1684 adj = STEP2 - 1;
1685 else
1686 adj = STEP2 + 1;
1687 count2 = (adj + N22 - N21) / STEP2;
1688 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1689 if (cond1 is <)
1690 adj = STEP1 - 1;
1691 else
1692 adj = STEP1 + 1;
1693 count1 = (adj + N12 - N11) / STEP1;
1694 count = count1 * count2 * count3;
1695 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1696 count = 0;
1697 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1698 of the combined loop constructs, just initialize COUNTS array
1699 from the _looptemp_ clauses. For loop nests with non-rectangular
1700 loops, do this only for the rectangular loops. Then pick
1701 the loops which reference outer vars in their bound expressions
1702 and the loops which they refer to and for this sub-nest compute
1703 number of iterations. For triangular loops use Faulhaber's formula,
1704 otherwise as a fallback, compute by iterating the loops.
1705 If e.g. the sub-nest is
1706 for (I = N11; I COND1 N12; I += STEP1)
1707 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1708 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1710 COUNT = 0;
1711 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1712 for (tmpj = M21 * tmpi + N21;
1713 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1715 int tmpk1 = M31 * tmpj + N31;
1716 int tmpk2 = M32 * tmpj + N32;
1717 if (tmpk1 COND3 tmpk2)
1719 if (COND3 is <)
1720 adj = STEP3 - 1;
1721 else
1722 adj = STEP3 + 1;
1723 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1726 and finally multiply the counts of the rectangular loops not
1727 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1728 store number of iterations of the loops from fd->first_nonrect
1729 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1730 by the counts of rectangular loops not referenced in any non-rectangular
1731 loops sandwitched in between those. */
1733 /* NOTE: It *could* be better to moosh all of the BBs together,
1734 creating one larger BB with all the computation and the unexpected
1735 jump at the end. I.e.
1737 bool zero3, zero2, zero1, zero;
1739 zero3 = N32 c3 N31;
1740 count3 = (N32 - N31) /[cl] STEP3;
1741 zero2 = N22 c2 N21;
1742 count2 = (N22 - N21) /[cl] STEP2;
1743 zero1 = N12 c1 N11;
1744 count1 = (N12 - N11) /[cl] STEP1;
1745 zero = zero3 || zero2 || zero1;
1746 count = count1 * count2 * count3;
1747 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749 After all, we expect the zero=false, and thus we expect to have to
1750 evaluate all of the comparison expressions, so short-circuiting
1751 oughtn't be a win. Since the condition isn't protecting a
1752 denominator, we're not concerned about divide-by-zero, so we can
1753 fully evaluate count even if a numerator turned out to be wrong.
1755 It seems like putting this all together would create much better
1756 scheduling opportunities, and less pressure on the chip's branch
1757 predictor. */
1759 static void
1760 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1761 basic_block &entry_bb, tree *counts,
1762 basic_block &zero_iter1_bb, int &first_zero_iter1,
1763 basic_block &zero_iter2_bb, int &first_zero_iter2,
1764 basic_block &l2_dom_bb)
1766 tree t, type = TREE_TYPE (fd->loop.v);
1767 edge e, ne;
1768 int i;
1770 /* Collapsed loops need work for expansion into SSA form. */
1771 gcc_assert (!gimple_in_ssa_p (cfun));
1773 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1774 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1776 gcc_assert (fd->ordered == 0);
1777 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1778 isn't supposed to be handled, as the inner loop doesn't
1779 use it. */
1780 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1781 OMP_CLAUSE__LOOPTEMP_);
1782 gcc_assert (innerc);
1783 for (i = 0; i < fd->collapse; i++)
1785 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 if (i)
1789 counts[i] = OMP_CLAUSE_DECL (innerc);
1790 else
1791 counts[0] = NULL_TREE;
1793 if (fd->non_rect
1794 && fd->last_nonrect == fd->first_nonrect + 1
1795 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1797 tree c[4];
1798 for (i = 0; i < 4; i++)
1800 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1801 OMP_CLAUSE__LOOPTEMP_);
1802 gcc_assert (innerc);
1803 c[i] = OMP_CLAUSE_DECL (innerc);
1805 counts[0] = c[0];
1806 fd->first_inner_iterations = c[1];
1807 fd->factor = c[2];
1808 fd->adjn1 = c[3];
1810 return;
1813 for (i = fd->collapse; i < fd->ordered; i++)
1815 tree itype = TREE_TYPE (fd->loops[i].v);
1816 counts[i] = NULL_TREE;
1817 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1818 fold_convert (itype, fd->loops[i].n1),
1819 fold_convert (itype, fd->loops[i].n2));
1820 if (t && integer_zerop (t))
1822 for (i = fd->collapse; i < fd->ordered; i++)
1823 counts[i] = build_int_cst (type, 0);
1824 break;
1827 bool rect_count_seen = false;
1828 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1830 tree itype = TREE_TYPE (fd->loops[i].v);
1832 if (i >= fd->collapse && counts[i])
1833 continue;
1834 if (fd->non_rect)
1836 /* Skip loops that use outer iterators in their expressions
1837 during this phase. */
1838 if (fd->loops[i].m1 || fd->loops[i].m2)
1840 counts[i] = build_zero_cst (type);
1841 continue;
1844 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1845 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1846 fold_convert (itype, fd->loops[i].n1),
1847 fold_convert (itype, fd->loops[i].n2)))
1848 == NULL_TREE || !integer_onep (t)))
1850 gcond *cond_stmt;
1851 tree n1, n2;
1852 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1853 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1854 true, GSI_SAME_STMT);
1855 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1856 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1857 true, GSI_SAME_STMT);
1858 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1859 NULL_TREE, NULL_TREE);
1860 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1861 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1862 expand_omp_regimplify_p, NULL, NULL)
1863 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1864 expand_omp_regimplify_p, NULL, NULL))
1866 *gsi = gsi_for_stmt (cond_stmt);
1867 gimple_regimplify_operands (cond_stmt, gsi);
1869 e = split_block (entry_bb, cond_stmt);
1870 basic_block &zero_iter_bb
1871 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1872 int &first_zero_iter
1873 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1874 if (zero_iter_bb == NULL)
1876 gassign *assign_stmt;
1877 first_zero_iter = i;
1878 zero_iter_bb = create_empty_bb (entry_bb);
1879 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1880 *gsi = gsi_after_labels (zero_iter_bb);
1881 if (i < fd->collapse)
1882 assign_stmt = gimple_build_assign (fd->loop.n2,
1883 build_zero_cst (type));
1884 else
1886 counts[i] = create_tmp_reg (type, ".count");
1887 assign_stmt
1888 = gimple_build_assign (counts[i], build_zero_cst (type));
1890 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1891 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1892 entry_bb);
1894 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1895 ne->probability = profile_probability::very_unlikely ();
1896 e->flags = EDGE_TRUE_VALUE;
1897 e->probability = ne->probability.invert ();
1898 if (l2_dom_bb == NULL)
1899 l2_dom_bb = entry_bb;
1900 entry_bb = e->dest;
1901 *gsi = gsi_last_nondebug_bb (entry_bb);
1904 if (POINTER_TYPE_P (itype))
1905 itype = signed_type_for (itype);
1906 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1907 ? -1 : 1));
1908 t = fold_build2 (PLUS_EXPR, itype,
1909 fold_convert (itype, fd->loops[i].step), t);
1910 t = fold_build2 (PLUS_EXPR, itype, t,
1911 fold_convert (itype, fd->loops[i].n2));
1912 t = fold_build2 (MINUS_EXPR, itype, t,
1913 fold_convert (itype, fd->loops[i].n1));
1914 /* ?? We could probably use CEIL_DIV_EXPR instead of
1915 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1916 generate the same code in the end because generically we
1917 don't know that the values involved must be negative for
1918 GT?? */
1919 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1920 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1921 fold_build1 (NEGATE_EXPR, itype, t),
1922 fold_build1 (NEGATE_EXPR, itype,
1923 fold_convert (itype,
1924 fd->loops[i].step)));
1925 else
1926 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1927 fold_convert (itype, fd->loops[i].step));
1928 t = fold_convert (type, t);
1929 if (TREE_CODE (t) == INTEGER_CST)
1930 counts[i] = t;
1931 else
1933 if (i < fd->collapse || i != first_zero_iter2)
1934 counts[i] = create_tmp_reg (type, ".count");
1935 expand_omp_build_assign (gsi, counts[i], t);
1937 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1939 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1940 continue;
1941 if (!rect_count_seen)
1943 t = counts[i];
1944 rect_count_seen = true;
1946 else
1947 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1948 expand_omp_build_assign (gsi, fd->loop.n2, t);
1951 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1953 gcc_assert (fd->last_nonrect != -1);
1955 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1956 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1957 build_zero_cst (type));
1958 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1959 if (fd->loops[i].m1
1960 || fd->loops[i].m2
1961 || fd->loops[i].non_rect_referenced)
1962 break;
1963 if (i == fd->last_nonrect
1964 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1965 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1967 int o = fd->first_nonrect;
1968 tree itype = TREE_TYPE (fd->loops[o].v);
1969 tree n1o = create_tmp_reg (itype, ".n1o");
1970 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1971 expand_omp_build_assign (gsi, n1o, t);
1972 tree n2o = create_tmp_reg (itype, ".n2o");
1973 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1974 expand_omp_build_assign (gsi, n2o, t);
1975 if (fd->loops[i].m1 && fd->loops[i].m2)
1976 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1977 unshare_expr (fd->loops[i].m1));
1978 else if (fd->loops[i].m1)
1979 t = fold_unary (NEGATE_EXPR, itype,
1980 unshare_expr (fd->loops[i].m1));
1981 else
1982 t = unshare_expr (fd->loops[i].m2);
1983 tree m2minusm1
1984 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1985 true, GSI_SAME_STMT);
1987 gimple_stmt_iterator gsi2 = *gsi;
1988 gsi_prev (&gsi2);
1989 e = split_block (entry_bb, gsi_stmt (gsi2));
1990 e = split_block (e->dest, (gimple *) NULL);
1991 basic_block bb1 = e->src;
1992 entry_bb = e->dest;
1993 *gsi = gsi_after_labels (entry_bb);
1995 gsi2 = gsi_after_labels (bb1);
1996 tree ostep = fold_convert (itype, fd->loops[o].step);
1997 t = build_int_cst (itype, (fd->loops[o].cond_code
1998 == LT_EXPR ? -1 : 1));
1999 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2000 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2001 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2002 if (TYPE_UNSIGNED (itype)
2003 && fd->loops[o].cond_code == GT_EXPR)
2004 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2005 fold_build1 (NEGATE_EXPR, itype, t),
2006 fold_build1 (NEGATE_EXPR, itype, ostep));
2007 else
2008 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2009 tree outer_niters
2010 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2011 true, GSI_SAME_STMT);
2012 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2013 build_one_cst (itype));
2014 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2015 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2016 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 tree n1, n2, n1e, n2e;
2019 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2020 if (fd->loops[i].m1)
2022 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2023 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2024 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2026 else
2027 n1 = t;
2028 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2029 true, GSI_SAME_STMT);
2030 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2031 if (fd->loops[i].m2)
2033 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2034 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2035 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2037 else
2038 n2 = t;
2039 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2040 true, GSI_SAME_STMT);
2041 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2042 if (fd->loops[i].m1)
2044 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2045 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2046 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2048 else
2049 n1e = t;
2050 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2051 true, GSI_SAME_STMT);
2052 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2053 if (fd->loops[i].m2)
2055 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2056 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2057 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2059 else
2060 n2e = t;
2061 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2062 true, GSI_SAME_STMT);
2063 gcond *cond_stmt
2064 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2065 NULL_TREE, NULL_TREE);
2066 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2067 e = split_block (bb1, cond_stmt);
2068 e->flags = EDGE_TRUE_VALUE;
2069 e->probability = profile_probability::likely ().guessed ();
2070 basic_block bb2 = e->dest;
2071 gsi2 = gsi_after_labels (bb2);
2073 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2074 NULL_TREE, NULL_TREE);
2075 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2076 e = split_block (bb2, cond_stmt);
2077 e->flags = EDGE_TRUE_VALUE;
2078 e->probability = profile_probability::likely ().guessed ();
2079 gsi2 = gsi_after_labels (e->dest);
2081 tree step = fold_convert (itype, fd->loops[i].step);
2082 t = build_int_cst (itype, (fd->loops[i].cond_code
2083 == LT_EXPR ? -1 : 1));
2084 t = fold_build2 (PLUS_EXPR, itype, step, t);
2085 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2086 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2087 if (TYPE_UNSIGNED (itype)
2088 && fd->loops[i].cond_code == GT_EXPR)
2089 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2090 fold_build1 (NEGATE_EXPR, itype, t),
2091 fold_build1 (NEGATE_EXPR, itype, step));
2092 else
2093 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2094 tree first_inner_iterations
2095 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2096 true, GSI_SAME_STMT);
2097 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2098 if (TYPE_UNSIGNED (itype)
2099 && fd->loops[i].cond_code == GT_EXPR)
2100 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2101 fold_build1 (NEGATE_EXPR, itype, t),
2102 fold_build1 (NEGATE_EXPR, itype, step));
2103 else
2104 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2105 tree factor
2106 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2107 true, GSI_SAME_STMT);
2108 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2109 build_one_cst (itype));
2110 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2111 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2112 t = fold_build2 (MULT_EXPR, itype, factor, t);
2113 t = fold_build2 (PLUS_EXPR, itype,
2114 fold_build2 (MULT_EXPR, itype, outer_niters,
2115 first_inner_iterations), t);
2116 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2117 fold_convert (type, t));
2119 basic_block bb3 = create_empty_bb (bb1);
2120 add_bb_to_loop (bb3, bb1->loop_father);
2122 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2123 e->probability = profile_probability::unlikely ().guessed ();
2125 gsi2 = gsi_after_labels (bb3);
2126 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2127 NULL_TREE, NULL_TREE);
2128 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2129 e = split_block (bb3, cond_stmt);
2130 e->flags = EDGE_TRUE_VALUE;
2131 e->probability = profile_probability::likely ().guessed ();
2132 basic_block bb4 = e->dest;
2134 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2135 ne->probability = e->probability.invert ();
2137 basic_block bb5 = create_empty_bb (bb2);
2138 add_bb_to_loop (bb5, bb2->loop_father);
2140 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2141 ne->probability = profile_probability::unlikely ().guessed ();
2143 for (int j = 0; j < 2; j++)
2145 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2146 t = fold_build2 (MINUS_EXPR, itype,
2147 unshare_expr (fd->loops[i].n1),
2148 unshare_expr (fd->loops[i].n2));
2149 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2150 tree tem
2151 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2152 true, GSI_SAME_STMT);
2153 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2154 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2155 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2156 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2157 true, GSI_SAME_STMT);
2158 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2159 if (fd->loops[i].m1)
2161 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2162 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2163 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2165 else
2166 n1 = t;
2167 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2168 true, GSI_SAME_STMT);
2169 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2170 if (fd->loops[i].m2)
2172 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2173 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2174 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2176 else
2177 n2 = t;
2178 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2179 true, GSI_SAME_STMT);
2180 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2182 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2183 NULL_TREE, NULL_TREE);
2184 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2185 e = split_block (gsi_bb (gsi2), cond_stmt);
2186 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2187 e->probability = profile_probability::unlikely ().guessed ();
2188 ne = make_edge (e->src, bb1,
2189 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2190 ne->probability = e->probability.invert ();
2191 gsi2 = gsi_after_labels (e->dest);
2193 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2194 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2196 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2199 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2200 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2201 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2203 if (fd->first_nonrect + 1 == fd->last_nonrect)
2205 fd->first_inner_iterations = first_inner_iterations;
2206 fd->factor = factor;
2207 fd->adjn1 = n1o;
2210 else
2212 /* Fallback implementation. Evaluate the loops with m1/m2
2213 non-NULL as well as their outer loops at runtime using temporaries
2214 instead of the original iteration variables, and in the
2215 body just bump the counter. */
2216 gimple_stmt_iterator gsi2 = *gsi;
2217 gsi_prev (&gsi2);
2218 e = split_block (entry_bb, gsi_stmt (gsi2));
2219 e = split_block (e->dest, (gimple *) NULL);
2220 basic_block cur_bb = e->src;
2221 basic_block next_bb = e->dest;
2222 entry_bb = e->dest;
2223 *gsi = gsi_after_labels (entry_bb);
2225 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2226 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2228 for (i = 0; i <= fd->last_nonrect; i++)
2230 if (fd->loops[i].m1 == NULL_TREE
2231 && fd->loops[i].m2 == NULL_TREE
2232 && !fd->loops[i].non_rect_referenced)
2233 continue;
2235 tree itype = TREE_TYPE (fd->loops[i].v);
2237 gsi2 = gsi_after_labels (cur_bb);
2238 tree n1, n2;
2239 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2240 if (fd->loops[i].m1)
2242 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2243 n1 = fold_build2 (MULT_EXPR, itype,
2244 vs[i - fd->loops[i].outer], n1);
2245 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2247 else
2248 n1 = t;
2249 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2250 true, GSI_SAME_STMT);
2251 if (i < fd->last_nonrect)
2253 vs[i] = create_tmp_reg (itype, ".it");
2254 expand_omp_build_assign (&gsi2, vs[i], n1);
2256 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2257 if (fd->loops[i].m2)
2259 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2260 n2 = fold_build2 (MULT_EXPR, itype,
2261 vs[i - fd->loops[i].outer], n2);
2262 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2264 else
2265 n2 = t;
2266 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2267 true, GSI_SAME_STMT);
2268 if (i == fd->last_nonrect)
2270 gcond *cond_stmt
2271 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2272 NULL_TREE, NULL_TREE);
2273 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2274 e = split_block (cur_bb, cond_stmt);
2275 e->flags = EDGE_TRUE_VALUE;
2276 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2277 e->probability = profile_probability::likely ().guessed ();
2278 ne->probability = e->probability.invert ();
2279 gsi2 = gsi_after_labels (e->dest);
2281 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2282 ? -1 : 1));
2283 t = fold_build2 (PLUS_EXPR, itype,
2284 fold_convert (itype, fd->loops[i].step), t);
2285 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2286 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2287 tree step = fold_convert (itype, fd->loops[i].step);
2288 if (TYPE_UNSIGNED (itype)
2289 && fd->loops[i].cond_code == GT_EXPR)
2290 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2291 fold_build1 (NEGATE_EXPR, itype, t),
2292 fold_build1 (NEGATE_EXPR, itype, step));
2293 else
2294 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2295 t = fold_convert (type, t);
2296 t = fold_build2 (PLUS_EXPR, type,
2297 counts[fd->last_nonrect], t);
2298 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2299 true, GSI_SAME_STMT);
2300 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2301 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2302 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2303 break;
2305 e = split_block (cur_bb, last_stmt (cur_bb));
2307 basic_block new_cur_bb = create_empty_bb (cur_bb);
2308 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2310 gsi2 = gsi_after_labels (e->dest);
2311 tree step = fold_convert (itype,
2312 unshare_expr (fd->loops[i].step));
2313 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2314 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2315 true, GSI_SAME_STMT);
2316 expand_omp_build_assign (&gsi2, vs[i], t);
2318 ne = split_block (e->dest, last_stmt (e->dest));
2319 gsi2 = gsi_after_labels (ne->dest);
2321 gcond *cond_stmt
2322 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2323 NULL_TREE, NULL_TREE);
2324 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2325 edge e3, e4;
2326 if (next_bb == entry_bb)
2328 e3 = find_edge (ne->dest, next_bb);
2329 e3->flags = EDGE_FALSE_VALUE;
2331 else
2332 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2333 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2334 e4->probability = profile_probability::likely ().guessed ();
2335 e3->probability = e4->probability.invert ();
2336 basic_block esrc = e->src;
2337 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2338 cur_bb = new_cur_bb;
2339 basic_block latch_bb = next_bb;
2340 next_bb = e->dest;
2341 remove_edge (e);
2342 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2343 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2344 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2347 t = NULL_TREE;
2348 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2349 if (!fd->loops[i].non_rect_referenced
2350 && fd->loops[i].m1 == NULL_TREE
2351 && fd->loops[i].m2 == NULL_TREE)
2353 if (t == NULL_TREE)
2354 t = counts[i];
2355 else
2356 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2358 if (t)
2360 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2361 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2363 if (!rect_count_seen)
2364 t = counts[fd->last_nonrect];
2365 else
2366 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2367 counts[fd->last_nonrect]);
2368 expand_omp_build_assign (gsi, fd->loop.n2, t);
2370 else if (fd->non_rect)
2372 tree t = fd->loop.n2;
2373 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2374 int non_rect_referenced = 0, non_rect = 0;
2375 for (i = 0; i < fd->collapse; i++)
2377 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2378 && !integer_zerop (counts[i]))
2379 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2380 if (fd->loops[i].non_rect_referenced)
2381 non_rect_referenced++;
2382 if (fd->loops[i].m1 || fd->loops[i].m2)
2383 non_rect++;
2385 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2386 counts[fd->last_nonrect] = t;
2390 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2391 T = V;
2392 V3 = N31 + (T % count3) * STEP3;
2393 T = T / count3;
2394 V2 = N21 + (T % count2) * STEP2;
2395 T = T / count2;
2396 V1 = N11 + T * STEP1;
2397 if this loop doesn't have an inner loop construct combined with it.
2398 If it does have an inner loop construct combined with it and the
2399 iteration count isn't known constant, store values from counts array
2400 into its _looptemp_ temporaries instead.
2401 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2402 inclusive), use the count of all those loops together, and either
2403 find quadratic etc. equation roots, or as a fallback, do:
2404 COUNT = 0;
2405 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2406 for (tmpj = M21 * tmpi + N21;
2407 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2409 int tmpk1 = M31 * tmpj + N31;
2410 int tmpk2 = M32 * tmpj + N32;
2411 if (tmpk1 COND3 tmpk2)
2413 if (COND3 is <)
2414 adj = STEP3 - 1;
2415 else
2416 adj = STEP3 + 1;
2417 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2418 if (COUNT + temp > T)
2420 V1 = tmpi;
2421 V2 = tmpj;
2422 V3 = tmpk1 + (T - COUNT) * STEP3;
2423 goto done;
2425 else
2426 COUNT += temp;
2429 done:;
2430 but for optional innermost or outermost rectangular loops that aren't
2431 referenced by other loop expressions keep doing the division/modulo. */
2433 static void
2434 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2435 tree *counts, tree *nonrect_bounds,
2436 gimple *inner_stmt, tree startvar)
2438 int i;
2439 if (gimple_omp_for_combined_p (fd->for_stmt))
2441 /* If fd->loop.n2 is constant, then no propagation of the counts
2442 is needed, they are constant. */
2443 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2444 return;
2446 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2447 ? gimple_omp_taskreg_clauses (inner_stmt)
2448 : gimple_omp_for_clauses (inner_stmt);
2449 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2450 isn't supposed to be handled, as the inner loop doesn't
2451 use it. */
2452 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2453 gcc_assert (innerc);
2454 int count = 0;
2455 if (fd->non_rect
2456 && fd->last_nonrect == fd->first_nonrect + 1
2457 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2458 count = 4;
2459 for (i = 0; i < fd->collapse + count; i++)
2461 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2462 OMP_CLAUSE__LOOPTEMP_);
2463 gcc_assert (innerc);
2464 if (i)
2466 tree tem = OMP_CLAUSE_DECL (innerc);
2467 tree t;
2468 if (i < fd->collapse)
2469 t = counts[i];
2470 else
2471 switch (i - fd->collapse)
2473 case 0: t = counts[0]; break;
2474 case 1: t = fd->first_inner_iterations; break;
2475 case 2: t = fd->factor; break;
2476 case 3: t = fd->adjn1; break;
2477 default: gcc_unreachable ();
2479 t = fold_convert (TREE_TYPE (tem), t);
2480 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2481 false, GSI_CONTINUE_LINKING);
2482 gassign *stmt = gimple_build_assign (tem, t);
2483 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2486 return;
2489 tree type = TREE_TYPE (fd->loop.v);
2490 tree tem = create_tmp_reg (type, ".tem");
2491 gassign *stmt = gimple_build_assign (tem, startvar);
2492 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2494 for (i = fd->collapse - 1; i >= 0; i--)
2496 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2497 itype = vtype;
2498 if (POINTER_TYPE_P (vtype))
2499 itype = signed_type_for (vtype);
2500 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2501 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2502 else
2503 t = tem;
2504 if (i == fd->last_nonrect)
2506 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2507 false, GSI_CONTINUE_LINKING);
2508 tree stopval = t;
2509 tree idx = create_tmp_reg (type, ".count");
2510 expand_omp_build_assign (gsi, idx,
2511 build_zero_cst (type), true);
2512 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2513 if (fd->first_nonrect + 1 == fd->last_nonrect
2514 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2515 || fd->first_inner_iterations)
2516 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2517 != CODE_FOR_nothing))
2519 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2520 tree itype = TREE_TYPE (fd->loops[i].v);
2521 tree first_inner_iterations = fd->first_inner_iterations;
2522 tree factor = fd->factor;
2523 gcond *cond_stmt
2524 = gimple_build_cond (NE_EXPR, factor,
2525 build_zero_cst (TREE_TYPE (factor)),
2526 NULL_TREE, NULL_TREE);
2527 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2528 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2529 basic_block bb0 = e->src;
2530 e->flags = EDGE_TRUE_VALUE;
2531 e->probability = profile_probability::likely ();
2532 bb_triang_dom = bb0;
2533 *gsi = gsi_after_labels (e->dest);
2534 tree slltype = long_long_integer_type_node;
2535 tree ulltype = long_long_unsigned_type_node;
2536 tree stopvalull = fold_convert (ulltype, stopval);
2537 stopvalull
2538 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2539 false, GSI_CONTINUE_LINKING);
2540 first_inner_iterations
2541 = fold_convert (slltype, first_inner_iterations);
2542 first_inner_iterations
2543 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2544 NULL_TREE, false,
2545 GSI_CONTINUE_LINKING);
2546 factor = fold_convert (slltype, factor);
2547 factor
2548 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2549 false, GSI_CONTINUE_LINKING);
2550 tree first_inner_iterationsd
2551 = fold_build1 (FLOAT_EXPR, double_type_node,
2552 first_inner_iterations);
2553 first_inner_iterationsd
2554 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2555 NULL_TREE, false,
2556 GSI_CONTINUE_LINKING);
2557 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2558 factor);
2559 factord = force_gimple_operand_gsi (gsi, factord, true,
2560 NULL_TREE, false,
2561 GSI_CONTINUE_LINKING);
2562 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2563 stopvalull);
2564 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2565 NULL_TREE, false,
2566 GSI_CONTINUE_LINKING);
2567 /* Temporarily disable flag_rounding_math, values will be
2568 decimal numbers divided by 2 and worst case imprecisions
2569 due to too large values ought to be caught later by the
2570 checks for fallback. */
2571 int save_flag_rounding_math = flag_rounding_math;
2572 flag_rounding_math = 0;
2573 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2574 build_real (double_type_node, dconst2));
2575 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2576 first_inner_iterationsd, t);
2577 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2578 GSI_CONTINUE_LINKING);
2579 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2580 build_real (double_type_node, dconst2));
2581 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2582 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2583 fold_build2 (MULT_EXPR, double_type_node,
2584 t3, t3));
2585 flag_rounding_math = save_flag_rounding_math;
2586 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2587 GSI_CONTINUE_LINKING);
2588 if (flag_exceptions
2589 && cfun->can_throw_non_call_exceptions
2590 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2592 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2593 build_zero_cst (double_type_node));
2594 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2595 false, GSI_CONTINUE_LINKING);
2596 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2597 boolean_false_node,
2598 NULL_TREE, NULL_TREE);
2600 else
2601 cond_stmt
2602 = gimple_build_cond (LT_EXPR, t,
2603 build_zero_cst (double_type_node),
2604 NULL_TREE, NULL_TREE);
2605 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2606 e = split_block (gsi_bb (*gsi), cond_stmt);
2607 basic_block bb1 = e->src;
2608 e->flags = EDGE_FALSE_VALUE;
2609 e->probability = profile_probability::very_likely ();
2610 *gsi = gsi_after_labels (e->dest);
2611 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2612 tree sqrtr = create_tmp_var (double_type_node);
2613 gimple_call_set_lhs (call, sqrtr);
2614 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2615 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2616 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2617 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2618 tree c = create_tmp_var (ulltype);
2619 tree d = create_tmp_var (ulltype);
2620 expand_omp_build_assign (gsi, c, t, true);
2621 t = fold_build2 (MINUS_EXPR, ulltype, c,
2622 build_one_cst (ulltype));
2623 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2624 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2625 t = fold_build2 (MULT_EXPR, ulltype,
2626 fold_convert (ulltype, fd->factor), t);
2627 tree t2
2628 = fold_build2 (MULT_EXPR, ulltype, c,
2629 fold_convert (ulltype,
2630 fd->first_inner_iterations));
2631 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2632 expand_omp_build_assign (gsi, d, t, true);
2633 t = fold_build2 (MULT_EXPR, ulltype,
2634 fold_convert (ulltype, fd->factor), c);
2635 t = fold_build2 (PLUS_EXPR, ulltype,
2636 t, fold_convert (ulltype,
2637 fd->first_inner_iterations));
2638 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2639 GSI_CONTINUE_LINKING);
2640 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2641 NULL_TREE, NULL_TREE);
2642 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2643 e = split_block (gsi_bb (*gsi), cond_stmt);
2644 basic_block bb2 = e->src;
2645 e->flags = EDGE_TRUE_VALUE;
2646 e->probability = profile_probability::very_likely ();
2647 *gsi = gsi_after_labels (e->dest);
2648 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2649 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2650 GSI_CONTINUE_LINKING);
2651 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2652 NULL_TREE, NULL_TREE);
2653 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2654 e = split_block (gsi_bb (*gsi), cond_stmt);
2655 basic_block bb3 = e->src;
2656 e->flags = EDGE_FALSE_VALUE;
2657 e->probability = profile_probability::very_likely ();
2658 *gsi = gsi_after_labels (e->dest);
2659 t = fold_convert (itype, c);
2660 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2661 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2662 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2663 GSI_CONTINUE_LINKING);
2664 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2665 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2666 t2 = fold_convert (itype, t2);
2667 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2668 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2669 if (fd->loops[i].m1)
2671 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2672 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2674 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2675 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2676 bb_triang = e->src;
2677 *gsi = gsi_after_labels (e->dest);
2678 remove_edge (e);
2679 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2680 e->probability = profile_probability::very_unlikely ();
2681 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2682 e->probability = profile_probability::very_unlikely ();
2683 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2684 e->probability = profile_probability::very_unlikely ();
2686 basic_block bb4 = create_empty_bb (bb0);
2687 add_bb_to_loop (bb4, bb0->loop_father);
2688 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2689 e->probability = profile_probability::unlikely ();
2690 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2691 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2692 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2693 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2694 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2695 counts[i], counts[i - 1]);
2696 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2697 GSI_CONTINUE_LINKING);
2698 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2699 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2700 t = fold_convert (itype, t);
2701 t2 = fold_convert (itype, t2);
2702 t = fold_build2 (MULT_EXPR, itype, t,
2703 fold_convert (itype, fd->loops[i].step));
2704 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2705 t2 = fold_build2 (MULT_EXPR, itype, t2,
2706 fold_convert (itype, fd->loops[i - 1].step));
2707 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2708 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2709 false, GSI_CONTINUE_LINKING);
2710 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2711 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2712 if (fd->loops[i].m1)
2714 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2715 fd->loops[i - 1].v);
2716 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2718 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2719 false, GSI_CONTINUE_LINKING);
2720 stmt = gimple_build_assign (fd->loops[i].v, t);
2721 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2723 /* Fallback implementation. Evaluate the loops in between
2724 (inclusive) fd->first_nonrect and fd->last_nonrect at
2725 runtime unsing temporaries instead of the original iteration
2726 variables, in the body just bump the counter and compare
2727 with the desired value. */
2728 gimple_stmt_iterator gsi2 = *gsi;
2729 basic_block entry_bb = gsi_bb (gsi2);
2730 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2731 e = split_block (e->dest, (gimple *) NULL);
2732 basic_block dom_bb = NULL;
2733 basic_block cur_bb = e->src;
2734 basic_block next_bb = e->dest;
2735 entry_bb = e->dest;
2736 *gsi = gsi_after_labels (entry_bb);
2738 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2739 tree n1 = NULL_TREE, n2 = NULL_TREE;
2740 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2742 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2744 tree itype = TREE_TYPE (fd->loops[j].v);
2745 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2746 && fd->loops[j].m2 == NULL_TREE
2747 && !fd->loops[j].non_rect_referenced);
2748 gsi2 = gsi_after_labels (cur_bb);
2749 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2750 if (fd->loops[j].m1)
2752 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2753 n1 = fold_build2 (MULT_EXPR, itype,
2754 vs[j - fd->loops[j].outer], n1);
2755 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2757 else if (rect_p)
2758 n1 = build_zero_cst (type);
2759 else
2760 n1 = t;
2761 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2762 true, GSI_SAME_STMT);
2763 if (j < fd->last_nonrect)
2765 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2766 expand_omp_build_assign (&gsi2, vs[j], n1);
2768 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2769 if (fd->loops[j].m2)
2771 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2772 n2 = fold_build2 (MULT_EXPR, itype,
2773 vs[j - fd->loops[j].outer], n2);
2774 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2776 else if (rect_p)
2777 n2 = counts[j];
2778 else
2779 n2 = t;
2780 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2781 true, GSI_SAME_STMT);
2782 if (j == fd->last_nonrect)
2784 gcond *cond_stmt
2785 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2786 NULL_TREE, NULL_TREE);
2787 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2788 e = split_block (cur_bb, cond_stmt);
2789 e->flags = EDGE_TRUE_VALUE;
2790 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2791 e->probability = profile_probability::likely ().guessed ();
2792 ne->probability = e->probability.invert ();
2793 gsi2 = gsi_after_labels (e->dest);
2795 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2796 ? -1 : 1));
2797 t = fold_build2 (PLUS_EXPR, itype,
2798 fold_convert (itype, fd->loops[j].step), t);
2799 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2800 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2801 tree step = fold_convert (itype, fd->loops[j].step);
2802 if (TYPE_UNSIGNED (itype)
2803 && fd->loops[j].cond_code == GT_EXPR)
2804 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2805 fold_build1 (NEGATE_EXPR, itype, t),
2806 fold_build1 (NEGATE_EXPR, itype, step));
2807 else
2808 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2809 t = fold_convert (type, t);
2810 t = fold_build2 (PLUS_EXPR, type, idx, t);
2811 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2812 true, GSI_SAME_STMT);
2813 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2814 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2815 cond_stmt
2816 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2817 NULL_TREE);
2818 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2819 e = split_block (gsi_bb (gsi2), cond_stmt);
2820 e->flags = EDGE_TRUE_VALUE;
2821 e->probability = profile_probability::likely ().guessed ();
2822 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2823 ne->probability = e->probability.invert ();
2824 gsi2 = gsi_after_labels (e->dest);
2825 expand_omp_build_assign (&gsi2, idx, t);
2826 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2827 break;
2829 e = split_block (cur_bb, last_stmt (cur_bb));
2831 basic_block new_cur_bb = create_empty_bb (cur_bb);
2832 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2834 gsi2 = gsi_after_labels (e->dest);
2835 if (rect_p)
2836 t = fold_build2 (PLUS_EXPR, type, vs[j],
2837 build_one_cst (type));
2838 else
2840 tree step
2841 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2842 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2844 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2845 true, GSI_SAME_STMT);
2846 expand_omp_build_assign (&gsi2, vs[j], t);
2848 edge ne = split_block (e->dest, last_stmt (e->dest));
2849 gsi2 = gsi_after_labels (ne->dest);
2851 gcond *cond_stmt;
2852 if (next_bb == entry_bb)
2853 /* No need to actually check the outermost condition. */
2854 cond_stmt
2855 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2856 boolean_true_node,
2857 NULL_TREE, NULL_TREE);
2858 else
2859 cond_stmt
2860 = gimple_build_cond (rect_p ? LT_EXPR
2861 : fd->loops[j].cond_code,
2862 vs[j], n2, NULL_TREE, NULL_TREE);
2863 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2864 edge e3, e4;
2865 if (next_bb == entry_bb)
2867 e3 = find_edge (ne->dest, next_bb);
2868 e3->flags = EDGE_FALSE_VALUE;
2869 dom_bb = ne->dest;
2871 else
2872 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2873 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2874 e4->probability = profile_probability::likely ().guessed ();
2875 e3->probability = e4->probability.invert ();
2876 basic_block esrc = e->src;
2877 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2878 cur_bb = new_cur_bb;
2879 basic_block latch_bb = next_bb;
2880 next_bb = e->dest;
2881 remove_edge (e);
2882 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2883 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2884 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2886 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2888 tree itype = TREE_TYPE (fd->loops[j].v);
2889 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2890 && fd->loops[j].m2 == NULL_TREE
2891 && !fd->loops[j].non_rect_referenced);
2892 if (j == fd->last_nonrect)
2894 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2895 t = fold_convert (itype, t);
2896 tree t2
2897 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2898 t = fold_build2 (MULT_EXPR, itype, t, t2);
2899 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2901 else if (rect_p)
2903 t = fold_convert (itype, vs[j]);
2904 t = fold_build2 (MULT_EXPR, itype, t,
2905 fold_convert (itype, fd->loops[j].step));
2906 if (POINTER_TYPE_P (vtype))
2907 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2908 else
2909 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2911 else
2912 t = vs[j];
2913 t = force_gimple_operand_gsi (gsi, t, false,
2914 NULL_TREE, true,
2915 GSI_SAME_STMT);
2916 stmt = gimple_build_assign (fd->loops[j].v, t);
2917 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2919 if (gsi_end_p (*gsi))
2920 *gsi = gsi_last_bb (gsi_bb (*gsi));
2921 else
2922 gsi_prev (gsi);
2923 if (bb_triang)
2925 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2926 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2927 *gsi = gsi_after_labels (e->dest);
2928 if (!gsi_end_p (*gsi))
2929 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2930 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2933 else
2935 t = fold_convert (itype, t);
2936 t = fold_build2 (MULT_EXPR, itype, t,
2937 fold_convert (itype, fd->loops[i].step));
2938 if (POINTER_TYPE_P (vtype))
2939 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2940 else
2941 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2942 t = force_gimple_operand_gsi (gsi, t,
2943 DECL_P (fd->loops[i].v)
2944 && TREE_ADDRESSABLE (fd->loops[i].v),
2945 NULL_TREE, false,
2946 GSI_CONTINUE_LINKING);
2947 stmt = gimple_build_assign (fd->loops[i].v, t);
2948 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2950 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2952 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2953 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2954 false, GSI_CONTINUE_LINKING);
2955 stmt = gimple_build_assign (tem, t);
2956 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2958 if (i == fd->last_nonrect)
2959 i = fd->first_nonrect;
2961 if (fd->non_rect)
2962 for (i = 0; i <= fd->last_nonrect; i++)
2963 if (fd->loops[i].m2)
2965 tree itype = TREE_TYPE (fd->loops[i].v);
2967 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2968 t = fold_build2 (MULT_EXPR, itype,
2969 fd->loops[i - fd->loops[i].outer].v, t);
2970 t = fold_build2 (PLUS_EXPR, itype, t,
2971 fold_convert (itype,
2972 unshare_expr (fd->loops[i].n2)));
2973 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2974 t = force_gimple_operand_gsi (gsi, t, false,
2975 NULL_TREE, false,
2976 GSI_CONTINUE_LINKING);
2977 stmt = gimple_build_assign (nonrect_bounds[i], t);
2978 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2982 /* Helper function for expand_omp_for_*. Generate code like:
2983 L10:
2984 V3 += STEP3;
2985 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2986 L11:
2987 V3 = N31;
2988 V2 += STEP2;
2989 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2990 L12:
2991 V2 = N21;
2992 V1 += STEP1;
2993 goto BODY_BB;
2994 For non-rectangular loops, use temporaries stored in nonrect_bounds
2995 for the upper bounds if M?2 multiplier is present. Given e.g.
2996 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2997 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2998 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2999 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3001 L10:
3002 V4 += STEP4;
3003 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3004 L11:
3005 V4 = N41 + M41 * V2; // This can be left out if the loop
3006 // refers to the immediate parent loop
3007 V3 += STEP3;
3008 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3009 L12:
3010 V3 = N31;
3011 V2 += STEP2;
3012 if (V2 cond2 N22) goto L120; else goto L13;
3013 L120:
3014 V4 = N41 + M41 * V2;
3015 NONRECT_BOUND4 = N42 + M42 * V2;
3016 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3017 L13:
3018 V2 = N21;
3019 V1 += STEP1;
3020 goto L120; */
3022 static basic_block
3023 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3024 basic_block cont_bb, basic_block body_bb)
3026 basic_block last_bb, bb, collapse_bb = NULL;
3027 int i;
3028 gimple_stmt_iterator gsi;
3029 edge e;
3030 tree t;
3031 gimple *stmt;
3033 last_bb = cont_bb;
3034 for (i = fd->collapse - 1; i >= 0; i--)
3036 tree vtype = TREE_TYPE (fd->loops[i].v);
3038 bb = create_empty_bb (last_bb);
3039 add_bb_to_loop (bb, last_bb->loop_father);
3040 gsi = gsi_start_bb (bb);
3042 if (i < fd->collapse - 1)
3044 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3045 e->probability
3046 = profile_probability::guessed_always ().apply_scale (1, 8);
3048 struct omp_for_data_loop *l = &fd->loops[i + 1];
3049 if (l->m1 == NULL_TREE || l->outer != 1)
3051 t = l->n1;
3052 if (l->m1)
3054 tree t2
3055 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3056 fd->loops[i + 1 - l->outer].v, l->m1);
3057 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3059 t = force_gimple_operand_gsi (&gsi, t,
3060 DECL_P (l->v)
3061 && TREE_ADDRESSABLE (l->v),
3062 NULL_TREE, false,
3063 GSI_CONTINUE_LINKING);
3064 stmt = gimple_build_assign (l->v, t);
3065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3068 else
3069 collapse_bb = bb;
3071 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3073 if (POINTER_TYPE_P (vtype))
3074 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3075 else
3076 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3077 t = force_gimple_operand_gsi (&gsi, t,
3078 DECL_P (fd->loops[i].v)
3079 && TREE_ADDRESSABLE (fd->loops[i].v),
3080 NULL_TREE, false, GSI_CONTINUE_LINKING);
3081 stmt = gimple_build_assign (fd->loops[i].v, t);
3082 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3084 if (fd->loops[i].non_rect_referenced)
3086 basic_block update_bb = NULL, prev_bb = NULL;
3087 for (int j = i + 1; j <= fd->last_nonrect; j++)
3088 if (j - fd->loops[j].outer == i)
3090 tree n1, n2;
3091 struct omp_for_data_loop *l = &fd->loops[j];
3092 basic_block this_bb = create_empty_bb (last_bb);
3093 add_bb_to_loop (this_bb, last_bb->loop_father);
3094 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3095 if (prev_bb)
3097 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3098 e->probability
3099 = profile_probability::guessed_always ().apply_scale (7,
3101 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3103 if (l->m1)
3105 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3106 fd->loops[i].v);
3107 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3108 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3109 false,
3110 GSI_CONTINUE_LINKING);
3111 stmt = gimple_build_assign (l->v, n1);
3112 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3113 n1 = l->v;
3115 else
3116 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3117 NULL_TREE, false,
3118 GSI_CONTINUE_LINKING);
3119 if (l->m2)
3121 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3122 fd->loops[i].v);
3123 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3124 t, unshare_expr (l->n2));
3125 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3126 false,
3127 GSI_CONTINUE_LINKING);
3128 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3129 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3130 n2 = nonrect_bounds[j];
3132 else
3133 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3134 true, NULL_TREE, false,
3135 GSI_CONTINUE_LINKING);
3136 gcond *cond_stmt
3137 = gimple_build_cond (l->cond_code, n1, n2,
3138 NULL_TREE, NULL_TREE);
3139 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3140 if (update_bb == NULL)
3141 update_bb = this_bb;
3142 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3143 e->probability
3144 = profile_probability::guessed_always ().apply_scale (1, 8);
3145 if (prev_bb == NULL)
3146 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3147 prev_bb = this_bb;
3149 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3150 e->probability
3151 = profile_probability::guessed_always ().apply_scale (7, 8);
3152 body_bb = update_bb;
3155 if (i > 0)
3157 if (fd->loops[i].m2)
3158 t = nonrect_bounds[i];
3159 else
3160 t = unshare_expr (fd->loops[i].n2);
3161 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3162 false, GSI_CONTINUE_LINKING);
3163 tree v = fd->loops[i].v;
3164 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3165 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3166 false, GSI_CONTINUE_LINKING);
3167 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3168 stmt = gimple_build_cond_empty (t);
3169 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3170 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3171 expand_omp_regimplify_p, NULL, NULL)
3172 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3173 expand_omp_regimplify_p, NULL, NULL))
3174 gimple_regimplify_operands (stmt, &gsi);
3175 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3176 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3178 else
3179 make_edge (bb, body_bb, EDGE_FALLTHRU);
3180 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3181 last_bb = bb;
3184 return collapse_bb;
3187 /* Expand #pragma omp ordered depend(source). */
3189 static void
3190 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3191 tree *counts, location_t loc)
3193 enum built_in_function source_ix
3194 = fd->iter_type == long_integer_type_node
3195 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3196 gimple *g
3197 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3198 build_fold_addr_expr (counts[fd->ordered]));
3199 gimple_set_location (g, loc);
3200 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3203 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3205 static void
3206 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3207 tree *counts, tree c, location_t loc)
3209 auto_vec<tree, 10> args;
3210 enum built_in_function sink_ix
3211 = fd->iter_type == long_integer_type_node
3212 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3213 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3214 int i;
3215 gimple_stmt_iterator gsi2 = *gsi;
3216 bool warned_step = false;
3218 for (i = 0; i < fd->ordered; i++)
3220 tree step = NULL_TREE;
3221 off = TREE_PURPOSE (deps);
3222 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3224 step = TREE_OPERAND (off, 1);
3225 off = TREE_OPERAND (off, 0);
3227 if (!integer_zerop (off))
3229 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3230 || fd->loops[i].cond_code == GT_EXPR);
3231 bool forward = fd->loops[i].cond_code == LT_EXPR;
3232 if (step)
3234 /* Non-simple Fortran DO loops. If step is variable,
3235 we don't know at compile even the direction, so can't
3236 warn. */
3237 if (TREE_CODE (step) != INTEGER_CST)
3238 break;
3239 forward = tree_int_cst_sgn (step) != -1;
3241 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3242 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3243 "waiting for lexically later iteration");
3244 break;
3246 deps = TREE_CHAIN (deps);
3248 /* If all offsets corresponding to the collapsed loops are zero,
3249 this depend clause can be ignored. FIXME: but there is still a
3250 flush needed. We need to emit one __sync_synchronize () for it
3251 though (perhaps conditionally)? Solve this together with the
3252 conservative dependence folding optimization.
3253 if (i >= fd->collapse)
3254 return; */
3256 deps = OMP_CLAUSE_DECL (c);
3257 gsi_prev (&gsi2);
3258 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3259 edge e2 = split_block_after_labels (e1->dest);
3261 gsi2 = gsi_after_labels (e1->dest);
3262 *gsi = gsi_last_bb (e1->src);
3263 for (i = 0; i < fd->ordered; i++)
3265 tree itype = TREE_TYPE (fd->loops[i].v);
3266 tree step = NULL_TREE;
3267 tree orig_off = NULL_TREE;
3268 if (POINTER_TYPE_P (itype))
3269 itype = sizetype;
3270 if (i)
3271 deps = TREE_CHAIN (deps);
3272 off = TREE_PURPOSE (deps);
3273 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3275 step = TREE_OPERAND (off, 1);
3276 off = TREE_OPERAND (off, 0);
3277 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3278 && integer_onep (fd->loops[i].step)
3279 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3281 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3282 if (step)
3284 off = fold_convert_loc (loc, itype, off);
3285 orig_off = off;
3286 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3289 if (integer_zerop (off))
3290 t = boolean_true_node;
3291 else
3293 tree a;
3294 tree co = fold_convert_loc (loc, itype, off);
3295 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3297 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3298 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3299 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3300 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3301 co);
3303 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3304 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3305 fd->loops[i].v, co);
3306 else
3307 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3308 fd->loops[i].v, co);
3309 if (step)
3311 tree t1, t2;
3312 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3313 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3314 fd->loops[i].n1);
3315 else
3316 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3317 fd->loops[i].n2);
3318 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3319 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3320 fd->loops[i].n2);
3321 else
3322 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3323 fd->loops[i].n1);
3324 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3325 step, build_int_cst (TREE_TYPE (step), 0));
3326 if (TREE_CODE (step) != INTEGER_CST)
3328 t1 = unshare_expr (t1);
3329 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3330 false, GSI_CONTINUE_LINKING);
3331 t2 = unshare_expr (t2);
3332 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3333 false, GSI_CONTINUE_LINKING);
3335 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3336 t, t2, t1);
3338 else if (fd->loops[i].cond_code == LT_EXPR)
3340 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3341 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3342 fd->loops[i].n1);
3343 else
3344 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3345 fd->loops[i].n2);
3347 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3348 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3349 fd->loops[i].n2);
3350 else
3351 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3352 fd->loops[i].n1);
3354 if (cond)
3355 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3356 else
3357 cond = t;
3359 off = fold_convert_loc (loc, itype, off);
3361 if (step
3362 || (fd->loops[i].cond_code == LT_EXPR
3363 ? !integer_onep (fd->loops[i].step)
3364 : !integer_minus_onep (fd->loops[i].step)))
3366 if (step == NULL_TREE
3367 && TYPE_UNSIGNED (itype)
3368 && fd->loops[i].cond_code == GT_EXPR)
3369 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3370 fold_build1_loc (loc, NEGATE_EXPR, itype,
3371 s));
3372 else
3373 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3374 orig_off ? orig_off : off, s);
3375 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3376 build_int_cst (itype, 0));
3377 if (integer_zerop (t) && !warned_step)
3379 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3380 "refers to iteration never in the iteration "
3381 "space");
3382 warned_step = true;
3384 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3385 cond, t);
3388 if (i <= fd->collapse - 1 && fd->collapse > 1)
3389 t = fd->loop.v;
3390 else if (counts[i])
3391 t = counts[i];
3392 else
3394 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3395 fd->loops[i].v, fd->loops[i].n1);
3396 t = fold_convert_loc (loc, fd->iter_type, t);
3398 if (step)
3399 /* We have divided off by step already earlier. */;
3400 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3401 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3402 fold_build1_loc (loc, NEGATE_EXPR, itype,
3403 s));
3404 else
3405 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3406 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3407 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3408 off = fold_convert_loc (loc, fd->iter_type, off);
3409 if (i <= fd->collapse - 1 && fd->collapse > 1)
3411 if (i)
3412 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3413 off);
3414 if (i < fd->collapse - 1)
3416 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3417 counts[i]);
3418 continue;
3421 off = unshare_expr (off);
3422 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3423 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3424 true, GSI_SAME_STMT);
3425 args.safe_push (t);
3427 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3428 gimple_set_location (g, loc);
3429 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3431 cond = unshare_expr (cond);
3432 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3433 GSI_CONTINUE_LINKING);
3434 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3435 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3436 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3437 e1->probability = e3->probability.invert ();
3438 e1->flags = EDGE_TRUE_VALUE;
3439 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3441 *gsi = gsi_after_labels (e2->dest);
3444 /* Expand all #pragma omp ordered depend(source) and
3445 #pragma omp ordered depend(sink:...) constructs in the current
3446 #pragma omp for ordered(n) region. */
3448 static void
3449 expand_omp_ordered_source_sink (struct omp_region *region,
3450 struct omp_for_data *fd, tree *counts,
3451 basic_block cont_bb)
3453 struct omp_region *inner;
3454 int i;
3455 for (i = fd->collapse - 1; i < fd->ordered; i++)
3456 if (i == fd->collapse - 1 && fd->collapse > 1)
3457 counts[i] = NULL_TREE;
3458 else if (i >= fd->collapse && !cont_bb)
3459 counts[i] = build_zero_cst (fd->iter_type);
3460 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3461 && integer_onep (fd->loops[i].step))
3462 counts[i] = NULL_TREE;
3463 else
3464 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3465 tree atype
3466 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3467 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3468 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3470 for (inner = region->inner; inner; inner = inner->next)
3471 if (inner->type == GIMPLE_OMP_ORDERED)
3473 gomp_ordered *ord_stmt = inner->ord_stmt;
3474 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3475 location_t loc = gimple_location (ord_stmt);
3476 tree c;
3477 for (c = gimple_omp_ordered_clauses (ord_stmt);
3478 c; c = OMP_CLAUSE_CHAIN (c))
3479 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3480 break;
3481 if (c)
3482 expand_omp_ordered_source (&gsi, fd, counts, loc);
3483 for (c = gimple_omp_ordered_clauses (ord_stmt);
3484 c; c = OMP_CLAUSE_CHAIN (c))
3485 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3486 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3487 gsi_remove (&gsi, true);
3491 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3492 collapsed. */
3494 static basic_block
3495 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3496 basic_block cont_bb, basic_block body_bb,
3497 bool ordered_lastprivate)
3499 if (fd->ordered == fd->collapse)
3500 return cont_bb;
3502 if (!cont_bb)
3504 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3505 for (int i = fd->collapse; i < fd->ordered; i++)
3507 tree type = TREE_TYPE (fd->loops[i].v);
3508 tree n1 = fold_convert (type, fd->loops[i].n1);
3509 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3510 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3511 size_int (i - fd->collapse + 1),
3512 NULL_TREE, NULL_TREE);
3513 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3515 return NULL;
3518 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3520 tree t, type = TREE_TYPE (fd->loops[i].v);
3521 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3522 expand_omp_build_assign (&gsi, fd->loops[i].v,
3523 fold_convert (type, fd->loops[i].n1));
3524 if (counts[i])
3525 expand_omp_build_assign (&gsi, counts[i],
3526 build_zero_cst (fd->iter_type));
3527 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3528 size_int (i - fd->collapse + 1),
3529 NULL_TREE, NULL_TREE);
3530 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3531 if (!gsi_end_p (gsi))
3532 gsi_prev (&gsi);
3533 else
3534 gsi = gsi_last_bb (body_bb);
3535 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3536 basic_block new_body = e1->dest;
3537 if (body_bb == cont_bb)
3538 cont_bb = new_body;
3539 edge e2 = NULL;
3540 basic_block new_header;
3541 if (EDGE_COUNT (cont_bb->preds) > 0)
3543 gsi = gsi_last_bb (cont_bb);
3544 if (POINTER_TYPE_P (type))
3545 t = fold_build_pointer_plus (fd->loops[i].v,
3546 fold_convert (sizetype,
3547 fd->loops[i].step));
3548 else
3549 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3550 fold_convert (type, fd->loops[i].step));
3551 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3552 if (counts[i])
3554 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3555 build_int_cst (fd->iter_type, 1));
3556 expand_omp_build_assign (&gsi, counts[i], t);
3557 t = counts[i];
3559 else
3561 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3562 fd->loops[i].v, fd->loops[i].n1);
3563 t = fold_convert (fd->iter_type, t);
3564 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3565 true, GSI_SAME_STMT);
3567 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3568 size_int (i - fd->collapse + 1),
3569 NULL_TREE, NULL_TREE);
3570 expand_omp_build_assign (&gsi, aref, t);
3571 gsi_prev (&gsi);
3572 e2 = split_block (cont_bb, gsi_stmt (gsi));
3573 new_header = e2->dest;
3575 else
3576 new_header = cont_bb;
3577 gsi = gsi_after_labels (new_header);
3578 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3579 true, GSI_SAME_STMT);
3580 tree n2
3581 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3582 true, NULL_TREE, true, GSI_SAME_STMT);
3583 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3584 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3585 edge e3 = split_block (new_header, gsi_stmt (gsi));
3586 cont_bb = e3->dest;
3587 remove_edge (e1);
3588 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3589 e3->flags = EDGE_FALSE_VALUE;
3590 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3591 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3592 e1->probability = e3->probability.invert ();
3594 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3595 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3597 if (e2)
3599 class loop *loop = alloc_loop ();
3600 loop->header = new_header;
3601 loop->latch = e2->src;
3602 add_loop (loop, body_bb->loop_father);
3606 /* If there are any lastprivate clauses and it is possible some loops
3607 might have zero iterations, ensure all the decls are initialized,
3608 otherwise we could crash evaluating C++ class iterators with lastprivate
3609 clauses. */
3610 bool need_inits = false;
3611 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3612 if (need_inits)
3614 tree type = TREE_TYPE (fd->loops[i].v);
3615 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3616 expand_omp_build_assign (&gsi, fd->loops[i].v,
3617 fold_convert (type, fd->loops[i].n1));
3619 else
3621 tree type = TREE_TYPE (fd->loops[i].v);
3622 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3623 boolean_type_node,
3624 fold_convert (type, fd->loops[i].n1),
3625 fold_convert (type, fd->loops[i].n2));
3626 if (!integer_onep (this_cond))
3627 need_inits = true;
3630 return cont_bb;
3633 /* A subroutine of expand_omp_for. Generate code for a parallel
3634 loop with any schedule. Given parameters:
3636 for (V = N1; V cond N2; V += STEP) BODY;
3638 where COND is "<" or ">", we generate pseudocode
3640 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3641 if (more) goto L0; else goto L3;
3643 V = istart0;
3644 iend = iend0;
3646 BODY;
3647 V += STEP;
3648 if (V cond iend) goto L1; else goto L2;
3650 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3653 If this is a combined omp parallel loop, instead of the call to
3654 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3655 If this is gimple_omp_for_combined_p loop, then instead of assigning
3656 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3657 inner GIMPLE_OMP_FOR and V += STEP; and
3658 if (V cond iend) goto L1; else goto L2; are removed.
3660 For collapsed loops, given parameters:
3661 collapse(3)
3662 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3663 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3664 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3665 BODY;
3667 we generate pseudocode
3669 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3670 if (cond3 is <)
3671 adj = STEP3 - 1;
3672 else
3673 adj = STEP3 + 1;
3674 count3 = (adj + N32 - N31) / STEP3;
3675 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3676 if (cond2 is <)
3677 adj = STEP2 - 1;
3678 else
3679 adj = STEP2 + 1;
3680 count2 = (adj + N22 - N21) / STEP2;
3681 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3682 if (cond1 is <)
3683 adj = STEP1 - 1;
3684 else
3685 adj = STEP1 + 1;
3686 count1 = (adj + N12 - N11) / STEP1;
3687 count = count1 * count2 * count3;
3688 goto Z1;
3690 count = 0;
3692 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3693 if (more) goto L0; else goto L3;
3695 V = istart0;
3696 T = V;
3697 V3 = N31 + (T % count3) * STEP3;
3698 T = T / count3;
3699 V2 = N21 + (T % count2) * STEP2;
3700 T = T / count2;
3701 V1 = N11 + T * STEP1;
3702 iend = iend0;
3704 BODY;
3705 V += 1;
3706 if (V < iend) goto L10; else goto L2;
3707 L10:
3708 V3 += STEP3;
3709 if (V3 cond3 N32) goto L1; else goto L11;
3710 L11:
3711 V3 = N31;
3712 V2 += STEP2;
3713 if (V2 cond2 N22) goto L1; else goto L12;
3714 L12:
3715 V2 = N21;
3716 V1 += STEP1;
3717 goto L1;
3719 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3724 static void
3725 expand_omp_for_generic (struct omp_region *region,
3726 struct omp_for_data *fd,
3727 enum built_in_function start_fn,
3728 enum built_in_function next_fn,
3729 tree sched_arg,
3730 gimple *inner_stmt)
3732 tree type, istart0, iend0, iend;
3733 tree t, vmain, vback, bias = NULL_TREE;
3734 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3735 basic_block l2_bb = NULL, l3_bb = NULL;
3736 gimple_stmt_iterator gsi;
3737 gassign *assign_stmt;
3738 bool in_combined_parallel = is_combined_parallel (region);
3739 bool broken_loop = region->cont == NULL;
3740 edge e, ne;
3741 tree *counts = NULL;
3742 int i;
3743 bool ordered_lastprivate = false;
3745 gcc_assert (!broken_loop || !in_combined_parallel);
3746 gcc_assert (fd->iter_type == long_integer_type_node
3747 || !in_combined_parallel);
3749 entry_bb = region->entry;
3750 cont_bb = region->cont;
3751 collapse_bb = NULL;
3752 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3753 gcc_assert (broken_loop
3754 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3755 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3756 l1_bb = single_succ (l0_bb);
3757 if (!broken_loop)
3759 l2_bb = create_empty_bb (cont_bb);
3760 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3761 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3762 == l1_bb));
3763 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3765 else
3766 l2_bb = NULL;
3767 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3768 exit_bb = region->exit;
3770 gsi = gsi_last_nondebug_bb (entry_bb);
3772 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3773 if (fd->ordered
3774 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3775 OMP_CLAUSE_LASTPRIVATE))
3776 ordered_lastprivate = false;
3777 tree reductions = NULL_TREE;
3778 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3779 tree memv = NULL_TREE;
3780 if (fd->lastprivate_conditional)
3782 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3783 OMP_CLAUSE__CONDTEMP_);
3784 if (fd->have_pointer_condtemp)
3785 condtemp = OMP_CLAUSE_DECL (c);
3786 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3787 cond_var = OMP_CLAUSE_DECL (c);
3789 if (sched_arg)
3791 if (fd->have_reductemp)
3793 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3794 OMP_CLAUSE__REDUCTEMP_);
3795 reductions = OMP_CLAUSE_DECL (c);
3796 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3797 gimple *g = SSA_NAME_DEF_STMT (reductions);
3798 reductions = gimple_assign_rhs1 (g);
3799 OMP_CLAUSE_DECL (c) = reductions;
3800 entry_bb = gimple_bb (g);
3801 edge e = split_block (entry_bb, g);
3802 if (region->entry == entry_bb)
3803 region->entry = e->dest;
3804 gsi = gsi_last_bb (entry_bb);
3806 else
3807 reductions = null_pointer_node;
3808 if (fd->have_pointer_condtemp)
3810 tree type = TREE_TYPE (condtemp);
3811 memv = create_tmp_var (type);
3812 TREE_ADDRESSABLE (memv) = 1;
3813 unsigned HOST_WIDE_INT sz
3814 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3815 sz *= fd->lastprivate_conditional;
3816 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3817 false);
3818 mem = build_fold_addr_expr (memv);
3820 else
3821 mem = null_pointer_node;
3823 if (fd->collapse > 1 || fd->ordered)
3825 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3826 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3828 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3829 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3830 zero_iter1_bb, first_zero_iter1,
3831 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3833 if (zero_iter1_bb)
3835 /* Some counts[i] vars might be uninitialized if
3836 some loop has zero iterations. But the body shouldn't
3837 be executed in that case, so just avoid uninit warnings. */
3838 for (i = first_zero_iter1;
3839 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3840 if (SSA_VAR_P (counts[i]))
3841 TREE_NO_WARNING (counts[i]) = 1;
3842 gsi_prev (&gsi);
3843 e = split_block (entry_bb, gsi_stmt (gsi));
3844 entry_bb = e->dest;
3845 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3846 gsi = gsi_last_nondebug_bb (entry_bb);
3847 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3848 get_immediate_dominator (CDI_DOMINATORS,
3849 zero_iter1_bb));
3851 if (zero_iter2_bb)
3853 /* Some counts[i] vars might be uninitialized if
3854 some loop has zero iterations. But the body shouldn't
3855 be executed in that case, so just avoid uninit warnings. */
3856 for (i = first_zero_iter2; i < fd->ordered; i++)
3857 if (SSA_VAR_P (counts[i]))
3858 TREE_NO_WARNING (counts[i]) = 1;
3859 if (zero_iter1_bb)
3860 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3861 else
3863 gsi_prev (&gsi);
3864 e = split_block (entry_bb, gsi_stmt (gsi));
3865 entry_bb = e->dest;
3866 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3867 gsi = gsi_last_nondebug_bb (entry_bb);
3868 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3869 get_immediate_dominator
3870 (CDI_DOMINATORS, zero_iter2_bb));
3873 if (fd->collapse == 1)
3875 counts[0] = fd->loop.n2;
3876 fd->loop = fd->loops[0];
3880 type = TREE_TYPE (fd->loop.v);
3881 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3882 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3883 TREE_ADDRESSABLE (istart0) = 1;
3884 TREE_ADDRESSABLE (iend0) = 1;
3886 /* See if we need to bias by LLONG_MIN. */
3887 if (fd->iter_type == long_long_unsigned_type_node
3888 && TREE_CODE (type) == INTEGER_TYPE
3889 && !TYPE_UNSIGNED (type)
3890 && fd->ordered == 0)
3892 tree n1, n2;
3894 if (fd->loop.cond_code == LT_EXPR)
3896 n1 = fd->loop.n1;
3897 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3899 else
3901 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3902 n2 = fd->loop.n1;
3904 if (TREE_CODE (n1) != INTEGER_CST
3905 || TREE_CODE (n2) != INTEGER_CST
3906 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3907 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3910 gimple_stmt_iterator gsif = gsi;
3911 gsi_prev (&gsif);
3913 tree arr = NULL_TREE;
3914 if (in_combined_parallel)
3916 gcc_assert (fd->ordered == 0);
3917 /* In a combined parallel loop, emit a call to
3918 GOMP_loop_foo_next. */
3919 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3920 build_fold_addr_expr (istart0),
3921 build_fold_addr_expr (iend0));
3923 else
3925 tree t0, t1, t2, t3, t4;
3926 /* If this is not a combined parallel loop, emit a call to
3927 GOMP_loop_foo_start in ENTRY_BB. */
3928 t4 = build_fold_addr_expr (iend0);
3929 t3 = build_fold_addr_expr (istart0);
3930 if (fd->ordered)
3932 t0 = build_int_cst (unsigned_type_node,
3933 fd->ordered - fd->collapse + 1);
3934 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3935 fd->ordered
3936 - fd->collapse + 1),
3937 ".omp_counts");
3938 DECL_NAMELESS (arr) = 1;
3939 TREE_ADDRESSABLE (arr) = 1;
3940 TREE_STATIC (arr) = 1;
3941 vec<constructor_elt, va_gc> *v;
3942 vec_alloc (v, fd->ordered - fd->collapse + 1);
3943 int idx;
3945 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3947 tree c;
3948 if (idx == 0 && fd->collapse > 1)
3949 c = fd->loop.n2;
3950 else
3951 c = counts[idx + fd->collapse - 1];
3952 tree purpose = size_int (idx);
3953 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3954 if (TREE_CODE (c) != INTEGER_CST)
3955 TREE_STATIC (arr) = 0;
3958 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3959 if (!TREE_STATIC (arr))
3960 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3961 void_type_node, arr),
3962 true, NULL_TREE, true, GSI_SAME_STMT);
3963 t1 = build_fold_addr_expr (arr);
3964 t2 = NULL_TREE;
3966 else
3968 t2 = fold_convert (fd->iter_type, fd->loop.step);
3969 t1 = fd->loop.n2;
3970 t0 = fd->loop.n1;
3971 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3973 tree innerc
3974 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3975 OMP_CLAUSE__LOOPTEMP_);
3976 gcc_assert (innerc);
3977 t0 = OMP_CLAUSE_DECL (innerc);
3978 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3979 OMP_CLAUSE__LOOPTEMP_);
3980 gcc_assert (innerc);
3981 t1 = OMP_CLAUSE_DECL (innerc);
3983 if (POINTER_TYPE_P (TREE_TYPE (t0))
3984 && TYPE_PRECISION (TREE_TYPE (t0))
3985 != TYPE_PRECISION (fd->iter_type))
3987 /* Avoid casting pointers to integer of a different size. */
3988 tree itype = signed_type_for (type);
3989 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3990 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3992 else
3994 t1 = fold_convert (fd->iter_type, t1);
3995 t0 = fold_convert (fd->iter_type, t0);
3997 if (bias)
3999 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4000 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4003 if (fd->iter_type == long_integer_type_node || fd->ordered)
4005 if (fd->chunk_size)
4007 t = fold_convert (fd->iter_type, fd->chunk_size);
4008 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4009 if (sched_arg)
4011 if (fd->ordered)
4012 t = build_call_expr (builtin_decl_explicit (start_fn),
4013 8, t0, t1, sched_arg, t, t3, t4,
4014 reductions, mem);
4015 else
4016 t = build_call_expr (builtin_decl_explicit (start_fn),
4017 9, t0, t1, t2, sched_arg, t, t3, t4,
4018 reductions, mem);
4020 else if (fd->ordered)
4021 t = build_call_expr (builtin_decl_explicit (start_fn),
4022 5, t0, t1, t, t3, t4);
4023 else
4024 t = build_call_expr (builtin_decl_explicit (start_fn),
4025 6, t0, t1, t2, t, t3, t4);
4027 else if (fd->ordered)
4028 t = build_call_expr (builtin_decl_explicit (start_fn),
4029 4, t0, t1, t3, t4);
4030 else
4031 t = build_call_expr (builtin_decl_explicit (start_fn),
4032 5, t0, t1, t2, t3, t4);
4034 else
4036 tree t5;
4037 tree c_bool_type;
4038 tree bfn_decl;
4040 /* The GOMP_loop_ull_*start functions have additional boolean
4041 argument, true for < loops and false for > loops.
4042 In Fortran, the C bool type can be different from
4043 boolean_type_node. */
4044 bfn_decl = builtin_decl_explicit (start_fn);
4045 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4046 t5 = build_int_cst (c_bool_type,
4047 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4048 if (fd->chunk_size)
4050 tree bfn_decl = builtin_decl_explicit (start_fn);
4051 t = fold_convert (fd->iter_type, fd->chunk_size);
4052 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4053 if (sched_arg)
4054 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4055 t, t3, t4, reductions, mem);
4056 else
4057 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4059 else
4060 t = build_call_expr (builtin_decl_explicit (start_fn),
4061 6, t5, t0, t1, t2, t3, t4);
4064 if (TREE_TYPE (t) != boolean_type_node)
4065 t = fold_build2 (NE_EXPR, boolean_type_node,
4066 t, build_int_cst (TREE_TYPE (t), 0));
4067 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4068 true, GSI_SAME_STMT);
4069 if (arr && !TREE_STATIC (arr))
4071 tree clobber = build_clobber (TREE_TYPE (arr));
4072 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4073 GSI_SAME_STMT);
4075 if (fd->have_pointer_condtemp)
4076 expand_omp_build_assign (&gsi, condtemp, memv, false);
4077 if (fd->have_reductemp)
4079 gimple *g = gsi_stmt (gsi);
4080 gsi_remove (&gsi, true);
4081 release_ssa_name (gimple_assign_lhs (g));
4083 entry_bb = region->entry;
4084 gsi = gsi_last_nondebug_bb (entry_bb);
4086 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4088 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4090 /* Remove the GIMPLE_OMP_FOR statement. */
4091 gsi_remove (&gsi, true);
4093 if (gsi_end_p (gsif))
4094 gsif = gsi_after_labels (gsi_bb (gsif));
4095 gsi_next (&gsif);
4097 /* Iteration setup for sequential loop goes in L0_BB. */
4098 tree startvar = fd->loop.v;
4099 tree endvar = NULL_TREE;
4101 if (gimple_omp_for_combined_p (fd->for_stmt))
4103 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4104 && gimple_omp_for_kind (inner_stmt)
4105 == GF_OMP_FOR_KIND_SIMD);
4106 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4107 OMP_CLAUSE__LOOPTEMP_);
4108 gcc_assert (innerc);
4109 startvar = OMP_CLAUSE_DECL (innerc);
4110 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4111 OMP_CLAUSE__LOOPTEMP_);
4112 gcc_assert (innerc);
4113 endvar = OMP_CLAUSE_DECL (innerc);
4116 gsi = gsi_start_bb (l0_bb);
4117 t = istart0;
4118 if (fd->ordered && fd->collapse == 1)
4119 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4120 fold_convert (fd->iter_type, fd->loop.step));
4121 else if (bias)
4122 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4123 if (fd->ordered && fd->collapse == 1)
4125 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4126 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4127 fd->loop.n1, fold_convert (sizetype, t));
4128 else
4130 t = fold_convert (TREE_TYPE (startvar), t);
4131 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4132 fd->loop.n1, t);
4135 else
4137 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4138 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4139 t = fold_convert (TREE_TYPE (startvar), t);
4141 t = force_gimple_operand_gsi (&gsi, t,
4142 DECL_P (startvar)
4143 && TREE_ADDRESSABLE (startvar),
4144 NULL_TREE, false, GSI_CONTINUE_LINKING);
4145 assign_stmt = gimple_build_assign (startvar, t);
4146 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4147 if (cond_var)
4149 tree itype = TREE_TYPE (cond_var);
4150 /* For lastprivate(conditional:) itervar, we need some iteration
4151 counter that starts at unsigned non-zero and increases.
4152 Prefer as few IVs as possible, so if we can use startvar
4153 itself, use that, or startvar + constant (those would be
4154 incremented with step), and as last resort use the s0 + 1
4155 incremented by 1. */
4156 if ((fd->ordered && fd->collapse == 1)
4157 || bias
4158 || POINTER_TYPE_P (type)
4159 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4160 || fd->loop.cond_code != LT_EXPR)
4161 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4162 build_int_cst (itype, 1));
4163 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4164 t = fold_convert (itype, t);
4165 else
4167 tree c = fold_convert (itype, fd->loop.n1);
4168 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4169 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4171 t = force_gimple_operand_gsi (&gsi, t, false,
4172 NULL_TREE, false, GSI_CONTINUE_LINKING);
4173 assign_stmt = gimple_build_assign (cond_var, t);
4174 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4177 t = iend0;
4178 if (fd->ordered && fd->collapse == 1)
4179 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4180 fold_convert (fd->iter_type, fd->loop.step));
4181 else if (bias)
4182 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4183 if (fd->ordered && fd->collapse == 1)
4185 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4186 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4187 fd->loop.n1, fold_convert (sizetype, t));
4188 else
4190 t = fold_convert (TREE_TYPE (startvar), t);
4191 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4192 fd->loop.n1, t);
4195 else
4197 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4198 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4199 t = fold_convert (TREE_TYPE (startvar), t);
4201 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4202 false, GSI_CONTINUE_LINKING);
4203 if (endvar)
4205 assign_stmt = gimple_build_assign (endvar, iend);
4206 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4207 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4208 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4209 else
4210 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4211 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4213 /* Handle linear clause adjustments. */
4214 tree itercnt = NULL_TREE;
4215 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4216 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4217 c; c = OMP_CLAUSE_CHAIN (c))
4218 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4219 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4221 tree d = OMP_CLAUSE_DECL (c);
4222 bool is_ref = omp_is_reference (d);
4223 tree t = d, a, dest;
4224 if (is_ref)
4225 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4226 tree type = TREE_TYPE (t);
4227 if (POINTER_TYPE_P (type))
4228 type = sizetype;
4229 dest = unshare_expr (t);
4230 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4231 expand_omp_build_assign (&gsif, v, t);
4232 if (itercnt == NULL_TREE)
4234 itercnt = startvar;
4235 tree n1 = fd->loop.n1;
4236 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4238 itercnt
4239 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4240 itercnt);
4241 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4243 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4244 itercnt, n1);
4245 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4246 itercnt, fd->loop.step);
4247 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4248 NULL_TREE, false,
4249 GSI_CONTINUE_LINKING);
4251 a = fold_build2 (MULT_EXPR, type,
4252 fold_convert (type, itercnt),
4253 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4254 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4255 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4256 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4257 false, GSI_CONTINUE_LINKING);
4258 expand_omp_build_assign (&gsi, dest, t, true);
4260 if (fd->collapse > 1)
4261 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4263 if (fd->ordered)
4265 /* Until now, counts array contained number of iterations or
4266 variable containing it for ith loop. From now on, we need
4267 those counts only for collapsed loops, and only for the 2nd
4268 till the last collapsed one. Move those one element earlier,
4269 we'll use counts[fd->collapse - 1] for the first source/sink
4270 iteration counter and so on and counts[fd->ordered]
4271 as the array holding the current counter values for
4272 depend(source). */
4273 if (fd->collapse > 1)
4274 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4275 if (broken_loop)
4277 int i;
4278 for (i = fd->collapse; i < fd->ordered; i++)
4280 tree type = TREE_TYPE (fd->loops[i].v);
4281 tree this_cond
4282 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4283 fold_convert (type, fd->loops[i].n1),
4284 fold_convert (type, fd->loops[i].n2));
4285 if (!integer_onep (this_cond))
4286 break;
4288 if (i < fd->ordered)
4290 cont_bb
4291 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4292 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4293 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4294 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4295 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4296 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4297 make_edge (cont_bb, l1_bb, 0);
4298 l2_bb = create_empty_bb (cont_bb);
4299 broken_loop = false;
4302 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4303 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4304 ordered_lastprivate);
4305 if (counts[fd->collapse - 1])
4307 gcc_assert (fd->collapse == 1);
4308 gsi = gsi_last_bb (l0_bb);
4309 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4310 istart0, true);
4311 gsi = gsi_last_bb (cont_bb);
4312 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4313 build_int_cst (fd->iter_type, 1));
4314 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4315 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4316 size_zero_node, NULL_TREE, NULL_TREE);
4317 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4318 t = counts[fd->collapse - 1];
4320 else if (fd->collapse > 1)
4321 t = fd->loop.v;
4322 else
4324 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4325 fd->loops[0].v, fd->loops[0].n1);
4326 t = fold_convert (fd->iter_type, t);
4328 gsi = gsi_last_bb (l0_bb);
4329 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4330 size_zero_node, NULL_TREE, NULL_TREE);
4331 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4332 false, GSI_CONTINUE_LINKING);
4333 expand_omp_build_assign (&gsi, aref, t, true);
4336 if (!broken_loop)
4338 /* Code to control the increment and predicate for the sequential
4339 loop goes in the CONT_BB. */
4340 gsi = gsi_last_nondebug_bb (cont_bb);
4341 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4342 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4343 vmain = gimple_omp_continue_control_use (cont_stmt);
4344 vback = gimple_omp_continue_control_def (cont_stmt);
4346 if (cond_var)
4348 tree itype = TREE_TYPE (cond_var);
4349 tree t2;
4350 if ((fd->ordered && fd->collapse == 1)
4351 || bias
4352 || POINTER_TYPE_P (type)
4353 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4354 || fd->loop.cond_code != LT_EXPR)
4355 t2 = build_int_cst (itype, 1);
4356 else
4357 t2 = fold_convert (itype, fd->loop.step);
4358 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4359 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4360 NULL_TREE, true, GSI_SAME_STMT);
4361 assign_stmt = gimple_build_assign (cond_var, t2);
4362 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4365 if (!gimple_omp_for_combined_p (fd->for_stmt))
4367 if (POINTER_TYPE_P (type))
4368 t = fold_build_pointer_plus (vmain, fd->loop.step);
4369 else
4370 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4371 t = force_gimple_operand_gsi (&gsi, t,
4372 DECL_P (vback)
4373 && TREE_ADDRESSABLE (vback),
4374 NULL_TREE, true, GSI_SAME_STMT);
4375 assign_stmt = gimple_build_assign (vback, t);
4376 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4378 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4380 tree tem;
4381 if (fd->collapse > 1)
4382 tem = fd->loop.v;
4383 else
4385 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4386 fd->loops[0].v, fd->loops[0].n1);
4387 tem = fold_convert (fd->iter_type, tem);
4389 tree aref = build4 (ARRAY_REF, fd->iter_type,
4390 counts[fd->ordered], size_zero_node,
4391 NULL_TREE, NULL_TREE);
4392 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4393 true, GSI_SAME_STMT);
4394 expand_omp_build_assign (&gsi, aref, tem);
4397 t = build2 (fd->loop.cond_code, boolean_type_node,
4398 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4399 iend);
4400 gcond *cond_stmt = gimple_build_cond_empty (t);
4401 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4404 /* Remove GIMPLE_OMP_CONTINUE. */
4405 gsi_remove (&gsi, true);
4407 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4408 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4410 /* Emit code to get the next parallel iteration in L2_BB. */
4411 gsi = gsi_start_bb (l2_bb);
4413 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4414 build_fold_addr_expr (istart0),
4415 build_fold_addr_expr (iend0));
4416 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4417 false, GSI_CONTINUE_LINKING);
4418 if (TREE_TYPE (t) != boolean_type_node)
4419 t = fold_build2 (NE_EXPR, boolean_type_node,
4420 t, build_int_cst (TREE_TYPE (t), 0));
4421 gcond *cond_stmt = gimple_build_cond_empty (t);
4422 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4425 /* Add the loop cleanup function. */
4426 gsi = gsi_last_nondebug_bb (exit_bb);
4427 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4428 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4429 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4430 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4431 else
4432 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4433 gcall *call_stmt = gimple_build_call (t, 0);
4434 if (fd->ordered)
4436 tree arr = counts[fd->ordered];
4437 tree clobber = build_clobber (TREE_TYPE (arr));
4438 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4439 GSI_SAME_STMT);
4441 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4443 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4444 if (fd->have_reductemp)
4446 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4447 gimple_call_lhs (call_stmt));
4448 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4451 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4452 gsi_remove (&gsi, true);
4454 /* Connect the new blocks. */
4455 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4456 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4458 if (!broken_loop)
4460 gimple_seq phis;
4462 e = find_edge (cont_bb, l3_bb);
4463 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4465 phis = phi_nodes (l3_bb);
4466 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4468 gimple *phi = gsi_stmt (gsi);
4469 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4470 PHI_ARG_DEF_FROM_EDGE (phi, e));
4472 remove_edge (e);
4474 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4475 e = find_edge (cont_bb, l1_bb);
4476 if (e == NULL)
4478 e = BRANCH_EDGE (cont_bb);
4479 gcc_assert (single_succ (e->dest) == l1_bb);
4481 if (gimple_omp_for_combined_p (fd->for_stmt))
4483 remove_edge (e);
4484 e = NULL;
4486 else if (fd->collapse > 1)
4488 remove_edge (e);
4489 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4491 else
4492 e->flags = EDGE_TRUE_VALUE;
4493 if (e)
4495 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4496 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4498 else
4500 e = find_edge (cont_bb, l2_bb);
4501 e->flags = EDGE_FALLTHRU;
4503 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4505 if (gimple_in_ssa_p (cfun))
4507 /* Add phis to the outer loop that connect to the phis in the inner,
4508 original loop, and move the loop entry value of the inner phi to
4509 the loop entry value of the outer phi. */
4510 gphi_iterator psi;
4511 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4513 location_t locus;
4514 gphi *nphi;
4515 gphi *exit_phi = psi.phi ();
4517 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4518 continue;
4520 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4521 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4523 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4524 edge latch_to_l1 = find_edge (latch, l1_bb);
4525 gphi *inner_phi
4526 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4528 tree t = gimple_phi_result (exit_phi);
4529 tree new_res = copy_ssa_name (t, NULL);
4530 nphi = create_phi_node (new_res, l0_bb);
4532 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4533 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4534 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4535 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4536 add_phi_arg (nphi, t, entry_to_l0, locus);
4538 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4539 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4541 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4545 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4546 recompute_dominator (CDI_DOMINATORS, l2_bb));
4547 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4548 recompute_dominator (CDI_DOMINATORS, l3_bb));
4549 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4550 recompute_dominator (CDI_DOMINATORS, l0_bb));
4551 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4552 recompute_dominator (CDI_DOMINATORS, l1_bb));
4554 /* We enter expand_omp_for_generic with a loop. This original loop may
4555 have its own loop struct, or it may be part of an outer loop struct
4556 (which may be the fake loop). */
4557 class loop *outer_loop = entry_bb->loop_father;
4558 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4560 add_bb_to_loop (l2_bb, outer_loop);
4562 /* We've added a new loop around the original loop. Allocate the
4563 corresponding loop struct. */
4564 class loop *new_loop = alloc_loop ();
4565 new_loop->header = l0_bb;
4566 new_loop->latch = l2_bb;
4567 add_loop (new_loop, outer_loop);
4569 /* Allocate a loop structure for the original loop unless we already
4570 had one. */
4571 if (!orig_loop_has_loop_struct
4572 && !gimple_omp_for_combined_p (fd->for_stmt))
4574 class loop *orig_loop = alloc_loop ();
4575 orig_loop->header = l1_bb;
4576 /* The loop may have multiple latches. */
4577 add_loop (orig_loop, new_loop);
4582 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4583 compute needed allocation size. If !ALLOC of team allocations,
4584 if ALLOC of thread allocation. SZ is the initial needed size for
4585 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4586 CNT number of elements of each array, for !ALLOC this is
4587 omp_get_num_threads (), for ALLOC number of iterations handled by the
4588 current thread. If PTR is non-NULL, it is the start of the allocation
4589 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4590 clauses pointers to the corresponding arrays. */
4592 static tree
4593 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4594 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4595 gimple_stmt_iterator *gsi, bool alloc)
4597 tree eltsz = NULL_TREE;
4598 unsigned HOST_WIDE_INT preval = 0;
4599 if (ptr && sz)
4600 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4601 ptr, size_int (sz));
4602 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4603 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4604 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4605 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4607 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4608 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4609 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4611 unsigned HOST_WIDE_INT szl
4612 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4613 szl = least_bit_hwi (szl);
4614 if (szl)
4615 al = MIN (al, szl);
4617 if (ptr == NULL_TREE)
4619 if (eltsz == NULL_TREE)
4620 eltsz = TYPE_SIZE_UNIT (pointee_type);
4621 else
4622 eltsz = size_binop (PLUS_EXPR, eltsz,
4623 TYPE_SIZE_UNIT (pointee_type));
4625 if (preval == 0 && al <= alloc_align)
4627 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4628 sz += diff;
4629 if (diff && ptr)
4630 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4631 ptr, size_int (diff));
4633 else if (al > preval)
4635 if (ptr)
4637 ptr = fold_convert (pointer_sized_int_node, ptr);
4638 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4639 build_int_cst (pointer_sized_int_node,
4640 al - 1));
4641 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4642 build_int_cst (pointer_sized_int_node,
4643 -(HOST_WIDE_INT) al));
4644 ptr = fold_convert (ptr_type_node, ptr);
4646 else
4647 sz += al - 1;
4649 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4650 preval = al;
4651 else
4652 preval = 1;
4653 if (ptr)
4655 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4656 ptr = OMP_CLAUSE_DECL (c);
4657 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4658 size_binop (MULT_EXPR, cnt,
4659 TYPE_SIZE_UNIT (pointee_type)));
4663 if (ptr == NULL_TREE)
4665 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4666 if (sz)
4667 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4668 return eltsz;
4670 else
4671 return ptr;
4674 /* Return the last _looptemp_ clause if one has been created for
4675 lastprivate on distribute parallel for{, simd} or taskloop.
4676 FD is the loop data and INNERC should be the second _looptemp_
4677 clause (the one holding the end of the range).
4678 This is followed by collapse - 1 _looptemp_ clauses for the
4679 counts[1] and up, and for triangular loops followed by 4
4680 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4681 one factor and one adjn1). After this there is optionally one
4682 _looptemp_ clause that this function returns. */
4684 static tree
4685 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4687 gcc_assert (innerc);
4688 int count = fd->collapse - 1;
4689 if (fd->non_rect
4690 && fd->last_nonrect == fd->first_nonrect + 1
4691 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4692 count += 4;
4693 for (int i = 0; i < count; i++)
4695 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4696 OMP_CLAUSE__LOOPTEMP_);
4697 gcc_assert (innerc);
4699 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4700 OMP_CLAUSE__LOOPTEMP_);
4703 /* A subroutine of expand_omp_for. Generate code for a parallel
4704 loop with static schedule and no specified chunk size. Given
4705 parameters:
4707 for (V = N1; V cond N2; V += STEP) BODY;
4709 where COND is "<" or ">", we generate pseudocode
4711 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4712 if (cond is <)
4713 adj = STEP - 1;
4714 else
4715 adj = STEP + 1;
4716 if ((__typeof (V)) -1 > 0 && cond is >)
4717 n = -(adj + N2 - N1) / -STEP;
4718 else
4719 n = (adj + N2 - N1) / STEP;
4720 q = n / nthreads;
4721 tt = n % nthreads;
4722 if (threadid < tt) goto L3; else goto L4;
4724 tt = 0;
4725 q = q + 1;
4727 s0 = q * threadid + tt;
4728 e0 = s0 + q;
4729 V = s0 * STEP + N1;
4730 if (s0 >= e0) goto L2; else goto L0;
4732 e = e0 * STEP + N1;
4734 BODY;
4735 V += STEP;
4736 if (V cond e) goto L1;
4740 static void
4741 expand_omp_for_static_nochunk (struct omp_region *region,
4742 struct omp_for_data *fd,
4743 gimple *inner_stmt)
4745 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4746 tree type, itype, vmain, vback;
4747 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4748 basic_block body_bb, cont_bb, collapse_bb = NULL;
4749 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4750 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4751 gimple_stmt_iterator gsi, gsip;
4752 edge ep;
4753 bool broken_loop = region->cont == NULL;
4754 tree *counts = NULL;
4755 tree n1, n2, step;
4756 tree reductions = NULL_TREE;
4757 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4759 itype = type = TREE_TYPE (fd->loop.v);
4760 if (POINTER_TYPE_P (type))
4761 itype = signed_type_for (type);
4763 entry_bb = region->entry;
4764 cont_bb = region->cont;
4765 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4766 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4767 gcc_assert (broken_loop
4768 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4769 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4770 body_bb = single_succ (seq_start_bb);
4771 if (!broken_loop)
4773 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4774 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4775 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4777 exit_bb = region->exit;
4779 /* Iteration space partitioning goes in ENTRY_BB. */
4780 gsi = gsi_last_nondebug_bb (entry_bb);
4781 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4782 gsip = gsi;
4783 gsi_prev (&gsip);
4785 if (fd->collapse > 1)
4787 int first_zero_iter = -1, dummy = -1;
4788 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4790 counts = XALLOCAVEC (tree, fd->collapse);
4791 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4792 fin_bb, first_zero_iter,
4793 dummy_bb, dummy, l2_dom_bb);
4794 t = NULL_TREE;
4796 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4797 t = integer_one_node;
4798 else
4799 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4800 fold_convert (type, fd->loop.n1),
4801 fold_convert (type, fd->loop.n2));
4802 if (fd->collapse == 1
4803 && TYPE_UNSIGNED (type)
4804 && (t == NULL_TREE || !integer_onep (t)))
4806 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4807 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4808 true, GSI_SAME_STMT);
4809 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4810 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4811 true, GSI_SAME_STMT);
4812 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4813 NULL_TREE, NULL_TREE);
4814 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4815 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4816 expand_omp_regimplify_p, NULL, NULL)
4817 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4818 expand_omp_regimplify_p, NULL, NULL))
4820 gsi = gsi_for_stmt (cond_stmt);
4821 gimple_regimplify_operands (cond_stmt, &gsi);
4823 ep = split_block (entry_bb, cond_stmt);
4824 ep->flags = EDGE_TRUE_VALUE;
4825 entry_bb = ep->dest;
4826 ep->probability = profile_probability::very_likely ();
4827 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4828 ep->probability = profile_probability::very_unlikely ();
4829 if (gimple_in_ssa_p (cfun))
4831 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4832 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4833 !gsi_end_p (gpi); gsi_next (&gpi))
4835 gphi *phi = gpi.phi ();
4836 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4837 ep, UNKNOWN_LOCATION);
4840 gsi = gsi_last_bb (entry_bb);
4843 if (fd->lastprivate_conditional)
4845 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4846 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4847 if (fd->have_pointer_condtemp)
4848 condtemp = OMP_CLAUSE_DECL (c);
4849 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4850 cond_var = OMP_CLAUSE_DECL (c);
4852 if (fd->have_reductemp
4853 /* For scan, we don't want to reinitialize condtemp before the
4854 second loop. */
4855 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4856 || fd->have_nonctrl_scantemp)
4858 tree t1 = build_int_cst (long_integer_type_node, 0);
4859 tree t2 = build_int_cst (long_integer_type_node, 1);
4860 tree t3 = build_int_cstu (long_integer_type_node,
4861 (HOST_WIDE_INT_1U << 31) + 1);
4862 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4863 gimple_stmt_iterator gsi2 = gsi_none ();
4864 gimple *g = NULL;
4865 tree mem = null_pointer_node, memv = NULL_TREE;
4866 unsigned HOST_WIDE_INT condtemp_sz = 0;
4867 unsigned HOST_WIDE_INT alloc_align = 0;
4868 if (fd->have_reductemp)
4870 gcc_assert (!fd->have_nonctrl_scantemp);
4871 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4872 reductions = OMP_CLAUSE_DECL (c);
4873 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4874 g = SSA_NAME_DEF_STMT (reductions);
4875 reductions = gimple_assign_rhs1 (g);
4876 OMP_CLAUSE_DECL (c) = reductions;
4877 gsi2 = gsi_for_stmt (g);
4879 else
4881 if (gsi_end_p (gsip))
4882 gsi2 = gsi_after_labels (region->entry);
4883 else
4884 gsi2 = gsip;
4885 reductions = null_pointer_node;
4887 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4889 tree type;
4890 if (fd->have_pointer_condtemp)
4891 type = TREE_TYPE (condtemp);
4892 else
4893 type = ptr_type_node;
4894 memv = create_tmp_var (type);
4895 TREE_ADDRESSABLE (memv) = 1;
4896 unsigned HOST_WIDE_INT sz = 0;
4897 tree size = NULL_TREE;
4898 if (fd->have_pointer_condtemp)
4900 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4901 sz *= fd->lastprivate_conditional;
4902 condtemp_sz = sz;
4904 if (fd->have_nonctrl_scantemp)
4906 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4907 gimple *g = gimple_build_call (nthreads, 0);
4908 nthreads = create_tmp_var (integer_type_node);
4909 gimple_call_set_lhs (g, nthreads);
4910 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4911 nthreads = fold_convert (sizetype, nthreads);
4912 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4913 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4914 alloc_align, nthreads, NULL,
4915 false);
4916 size = fold_convert (type, size);
4918 else
4919 size = build_int_cst (type, sz);
4920 expand_omp_build_assign (&gsi2, memv, size, false);
4921 mem = build_fold_addr_expr (memv);
4923 tree t
4924 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4925 9, t1, t2, t2, t3, t1, null_pointer_node,
4926 null_pointer_node, reductions, mem);
4927 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4928 true, GSI_SAME_STMT);
4929 if (fd->have_pointer_condtemp)
4930 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4931 if (fd->have_nonctrl_scantemp)
4933 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4934 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4935 alloc_align, nthreads, &gsi2, false);
4937 if (fd->have_reductemp)
4939 gsi_remove (&gsi2, true);
4940 release_ssa_name (gimple_assign_lhs (g));
4943 switch (gimple_omp_for_kind (fd->for_stmt))
4945 case GF_OMP_FOR_KIND_FOR:
4946 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4947 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4948 break;
4949 case GF_OMP_FOR_KIND_DISTRIBUTE:
4950 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4951 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4952 break;
4953 default:
4954 gcc_unreachable ();
4956 nthreads = build_call_expr (nthreads, 0);
4957 nthreads = fold_convert (itype, nthreads);
4958 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4959 true, GSI_SAME_STMT);
4960 threadid = build_call_expr (threadid, 0);
4961 threadid = fold_convert (itype, threadid);
4962 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4963 true, GSI_SAME_STMT);
4965 n1 = fd->loop.n1;
4966 n2 = fd->loop.n2;
4967 step = fd->loop.step;
4968 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4970 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4971 OMP_CLAUSE__LOOPTEMP_);
4972 gcc_assert (innerc);
4973 n1 = OMP_CLAUSE_DECL (innerc);
4974 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4975 OMP_CLAUSE__LOOPTEMP_);
4976 gcc_assert (innerc);
4977 n2 = OMP_CLAUSE_DECL (innerc);
4979 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4980 true, NULL_TREE, true, GSI_SAME_STMT);
4981 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4982 true, NULL_TREE, true, GSI_SAME_STMT);
4983 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4984 true, NULL_TREE, true, GSI_SAME_STMT);
4986 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4987 t = fold_build2 (PLUS_EXPR, itype, step, t);
4988 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4989 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4990 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4991 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4992 fold_build1 (NEGATE_EXPR, itype, t),
4993 fold_build1 (NEGATE_EXPR, itype, step));
4994 else
4995 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4996 t = fold_convert (itype, t);
4997 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4999 q = create_tmp_reg (itype, "q");
5000 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5001 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5002 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5004 tt = create_tmp_reg (itype, "tt");
5005 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5006 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5007 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5009 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5010 gcond *cond_stmt = gimple_build_cond_empty (t);
5011 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5013 second_bb = split_block (entry_bb, cond_stmt)->dest;
5014 gsi = gsi_last_nondebug_bb (second_bb);
5015 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5017 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5018 GSI_SAME_STMT);
5019 gassign *assign_stmt
5020 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5021 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5023 third_bb = split_block (second_bb, assign_stmt)->dest;
5024 gsi = gsi_last_nondebug_bb (third_bb);
5025 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5027 if (fd->have_nonctrl_scantemp)
5029 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5030 tree controlp = NULL_TREE, controlb = NULL_TREE;
5031 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5032 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5033 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5035 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5036 controlb = OMP_CLAUSE_DECL (c);
5037 else
5038 controlp = OMP_CLAUSE_DECL (c);
5039 if (controlb && controlp)
5040 break;
5042 gcc_assert (controlp && controlb);
5043 tree cnt = create_tmp_var (sizetype);
5044 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5045 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5046 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5047 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5048 alloc_align, cnt, NULL, true);
5049 tree size = create_tmp_var (sizetype);
5050 expand_omp_build_assign (&gsi, size, sz, false);
5051 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5052 size, size_int (16384));
5053 expand_omp_build_assign (&gsi, controlb, cmp);
5054 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5055 NULL_TREE, NULL_TREE);
5056 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5057 fourth_bb = split_block (third_bb, g)->dest;
5058 gsi = gsi_last_nondebug_bb (fourth_bb);
5059 /* FIXME: Once we have allocators, this should use allocator. */
5060 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5061 gimple_call_set_lhs (g, controlp);
5062 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5063 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5064 &gsi, true);
5065 gsi_prev (&gsi);
5066 g = gsi_stmt (gsi);
5067 fifth_bb = split_block (fourth_bb, g)->dest;
5068 gsi = gsi_last_nondebug_bb (fifth_bb);
5070 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5071 gimple_call_set_lhs (g, controlp);
5072 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5073 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5074 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5075 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5076 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5078 tree tmp = create_tmp_var (sizetype);
5079 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5080 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5081 TYPE_SIZE_UNIT (pointee_type));
5082 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5083 g = gimple_build_call (alloca_decl, 2, tmp,
5084 size_int (TYPE_ALIGN (pointee_type)));
5085 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5086 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5089 sixth_bb = split_block (fifth_bb, g)->dest;
5090 gsi = gsi_last_nondebug_bb (sixth_bb);
5093 t = build2 (MULT_EXPR, itype, q, threadid);
5094 t = build2 (PLUS_EXPR, itype, t, tt);
5095 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5097 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5098 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5100 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5101 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5103 /* Remove the GIMPLE_OMP_FOR statement. */
5104 gsi_remove (&gsi, true);
5106 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5107 gsi = gsi_start_bb (seq_start_bb);
5109 tree startvar = fd->loop.v;
5110 tree endvar = NULL_TREE;
5112 if (gimple_omp_for_combined_p (fd->for_stmt))
5114 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5115 ? gimple_omp_parallel_clauses (inner_stmt)
5116 : gimple_omp_for_clauses (inner_stmt);
5117 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5118 gcc_assert (innerc);
5119 startvar = OMP_CLAUSE_DECL (innerc);
5120 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5121 OMP_CLAUSE__LOOPTEMP_);
5122 gcc_assert (innerc);
5123 endvar = OMP_CLAUSE_DECL (innerc);
5124 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5125 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5127 innerc = find_lastprivate_looptemp (fd, innerc);
5128 if (innerc)
5130 /* If needed (distribute parallel for with lastprivate),
5131 propagate down the total number of iterations. */
5132 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5133 fd->loop.n2);
5134 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5135 GSI_CONTINUE_LINKING);
5136 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5137 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5141 t = fold_convert (itype, s0);
5142 t = fold_build2 (MULT_EXPR, itype, t, step);
5143 if (POINTER_TYPE_P (type))
5145 t = fold_build_pointer_plus (n1, t);
5146 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5147 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5148 t = fold_convert (signed_type_for (type), t);
5150 else
5151 t = fold_build2 (PLUS_EXPR, type, t, n1);
5152 t = fold_convert (TREE_TYPE (startvar), t);
5153 t = force_gimple_operand_gsi (&gsi, t,
5154 DECL_P (startvar)
5155 && TREE_ADDRESSABLE (startvar),
5156 NULL_TREE, false, GSI_CONTINUE_LINKING);
5157 assign_stmt = gimple_build_assign (startvar, t);
5158 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5159 if (cond_var)
5161 tree itype = TREE_TYPE (cond_var);
5162 /* For lastprivate(conditional:) itervar, we need some iteration
5163 counter that starts at unsigned non-zero and increases.
5164 Prefer as few IVs as possible, so if we can use startvar
5165 itself, use that, or startvar + constant (those would be
5166 incremented with step), and as last resort use the s0 + 1
5167 incremented by 1. */
5168 if (POINTER_TYPE_P (type)
5169 || TREE_CODE (n1) != INTEGER_CST
5170 || fd->loop.cond_code != LT_EXPR)
5171 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5172 build_int_cst (itype, 1));
5173 else if (tree_int_cst_sgn (n1) == 1)
5174 t = fold_convert (itype, t);
5175 else
5177 tree c = fold_convert (itype, n1);
5178 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5179 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5181 t = force_gimple_operand_gsi (&gsi, t, false,
5182 NULL_TREE, false, GSI_CONTINUE_LINKING);
5183 assign_stmt = gimple_build_assign (cond_var, t);
5184 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5187 t = fold_convert (itype, e0);
5188 t = fold_build2 (MULT_EXPR, itype, t, step);
5189 if (POINTER_TYPE_P (type))
5191 t = fold_build_pointer_plus (n1, t);
5192 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5193 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5194 t = fold_convert (signed_type_for (type), t);
5196 else
5197 t = fold_build2 (PLUS_EXPR, type, t, n1);
5198 t = fold_convert (TREE_TYPE (startvar), t);
5199 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5200 false, GSI_CONTINUE_LINKING);
5201 if (endvar)
5203 assign_stmt = gimple_build_assign (endvar, e);
5204 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5205 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5206 assign_stmt = gimple_build_assign (fd->loop.v, e);
5207 else
5208 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5209 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5211 /* Handle linear clause adjustments. */
5212 tree itercnt = NULL_TREE;
5213 tree *nonrect_bounds = NULL;
5214 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5215 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5216 c; c = OMP_CLAUSE_CHAIN (c))
5217 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5218 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5220 tree d = OMP_CLAUSE_DECL (c);
5221 bool is_ref = omp_is_reference (d);
5222 tree t = d, a, dest;
5223 if (is_ref)
5224 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5225 if (itercnt == NULL_TREE)
5227 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5229 itercnt = fold_build2 (MINUS_EXPR, itype,
5230 fold_convert (itype, n1),
5231 fold_convert (itype, fd->loop.n1));
5232 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5233 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5234 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5235 NULL_TREE, false,
5236 GSI_CONTINUE_LINKING);
5238 else
5239 itercnt = s0;
5241 tree type = TREE_TYPE (t);
5242 if (POINTER_TYPE_P (type))
5243 type = sizetype;
5244 a = fold_build2 (MULT_EXPR, type,
5245 fold_convert (type, itercnt),
5246 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5247 dest = unshare_expr (t);
5248 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5249 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5250 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5251 false, GSI_CONTINUE_LINKING);
5252 expand_omp_build_assign (&gsi, dest, t, true);
5254 if (fd->collapse > 1)
5256 if (fd->non_rect)
5258 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5259 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5261 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5262 startvar);
5265 if (!broken_loop)
5267 /* The code controlling the sequential loop replaces the
5268 GIMPLE_OMP_CONTINUE. */
5269 gsi = gsi_last_nondebug_bb (cont_bb);
5270 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5271 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5272 vmain = gimple_omp_continue_control_use (cont_stmt);
5273 vback = gimple_omp_continue_control_def (cont_stmt);
5275 if (cond_var)
5277 tree itype = TREE_TYPE (cond_var);
5278 tree t2;
5279 if (POINTER_TYPE_P (type)
5280 || TREE_CODE (n1) != INTEGER_CST
5281 || fd->loop.cond_code != LT_EXPR)
5282 t2 = build_int_cst (itype, 1);
5283 else
5284 t2 = fold_convert (itype, step);
5285 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5286 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5287 NULL_TREE, true, GSI_SAME_STMT);
5288 assign_stmt = gimple_build_assign (cond_var, t2);
5289 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5292 if (!gimple_omp_for_combined_p (fd->for_stmt))
5294 if (POINTER_TYPE_P (type))
5295 t = fold_build_pointer_plus (vmain, step);
5296 else
5297 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5298 t = force_gimple_operand_gsi (&gsi, t,
5299 DECL_P (vback)
5300 && TREE_ADDRESSABLE (vback),
5301 NULL_TREE, true, GSI_SAME_STMT);
5302 assign_stmt = gimple_build_assign (vback, t);
5303 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5305 t = build2 (fd->loop.cond_code, boolean_type_node,
5306 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5307 ? t : vback, e);
5308 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5311 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5312 gsi_remove (&gsi, true);
5314 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5315 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5316 cont_bb, body_bb);
5319 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5320 gsi = gsi_last_nondebug_bb (exit_bb);
5321 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5323 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5324 if (fd->have_reductemp
5325 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5326 && !fd->have_nonctrl_scantemp))
5328 tree fn;
5329 if (t)
5330 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5331 else
5332 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5333 gcall *g = gimple_build_call (fn, 0);
5334 if (t)
5336 gimple_call_set_lhs (g, t);
5337 if (fd->have_reductemp)
5338 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5339 NOP_EXPR, t),
5340 GSI_SAME_STMT);
5342 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5344 else
5345 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5347 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5348 && !fd->have_nonctrl_scantemp)
5350 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5351 gcall *g = gimple_build_call (fn, 0);
5352 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5354 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5356 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5357 tree controlp = NULL_TREE, controlb = NULL_TREE;
5358 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5359 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5360 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5362 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5363 controlb = OMP_CLAUSE_DECL (c);
5364 else
5365 controlp = OMP_CLAUSE_DECL (c);
5366 if (controlb && controlp)
5367 break;
5369 gcc_assert (controlp && controlb);
5370 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5371 NULL_TREE, NULL_TREE);
5372 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5373 exit1_bb = split_block (exit_bb, g)->dest;
5374 gsi = gsi_after_labels (exit1_bb);
5375 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5376 controlp);
5377 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5378 exit2_bb = split_block (exit1_bb, g)->dest;
5379 gsi = gsi_after_labels (exit2_bb);
5380 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5381 controlp);
5382 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5383 exit3_bb = split_block (exit2_bb, g)->dest;
5384 gsi = gsi_after_labels (exit3_bb);
5386 gsi_remove (&gsi, true);
5388 /* Connect all the blocks. */
5389 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5390 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5391 ep = find_edge (entry_bb, second_bb);
5392 ep->flags = EDGE_TRUE_VALUE;
5393 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5394 if (fourth_bb)
5396 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5397 ep->probability
5398 = profile_probability::guessed_always ().apply_scale (1, 2);
5399 ep = find_edge (third_bb, fourth_bb);
5400 ep->flags = EDGE_TRUE_VALUE;
5401 ep->probability
5402 = profile_probability::guessed_always ().apply_scale (1, 2);
5403 ep = find_edge (fourth_bb, fifth_bb);
5404 redirect_edge_and_branch (ep, sixth_bb);
5406 else
5407 sixth_bb = third_bb;
5408 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5409 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5410 if (exit1_bb)
5412 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5413 ep->probability
5414 = profile_probability::guessed_always ().apply_scale (1, 2);
5415 ep = find_edge (exit_bb, exit1_bb);
5416 ep->flags = EDGE_TRUE_VALUE;
5417 ep->probability
5418 = profile_probability::guessed_always ().apply_scale (1, 2);
5419 ep = find_edge (exit1_bb, exit2_bb);
5420 redirect_edge_and_branch (ep, exit3_bb);
5423 if (!broken_loop)
5425 ep = find_edge (cont_bb, body_bb);
5426 if (ep == NULL)
5428 ep = BRANCH_EDGE (cont_bb);
5429 gcc_assert (single_succ (ep->dest) == body_bb);
5431 if (gimple_omp_for_combined_p (fd->for_stmt))
5433 remove_edge (ep);
5434 ep = NULL;
5436 else if (fd->collapse > 1)
5438 remove_edge (ep);
5439 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5441 else
5442 ep->flags = EDGE_TRUE_VALUE;
5443 find_edge (cont_bb, fin_bb)->flags
5444 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5447 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5448 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5449 if (fourth_bb)
5451 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5452 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5454 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5456 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5457 recompute_dominator (CDI_DOMINATORS, body_bb));
5458 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5459 recompute_dominator (CDI_DOMINATORS, fin_bb));
5460 if (exit1_bb)
5462 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5463 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5466 class loop *loop = body_bb->loop_father;
5467 if (loop != entry_bb->loop_father)
5469 gcc_assert (broken_loop || loop->header == body_bb);
5470 gcc_assert (broken_loop
5471 || loop->latch == region->cont
5472 || single_pred (loop->latch) == region->cont);
5473 return;
5476 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5478 loop = alloc_loop ();
5479 loop->header = body_bb;
5480 if (collapse_bb == NULL)
5481 loop->latch = cont_bb;
5482 add_loop (loop, body_bb->loop_father);
5486 /* Return phi in E->DEST with ARG on edge E. */
5488 static gphi *
5489 find_phi_with_arg_on_edge (tree arg, edge e)
5491 basic_block bb = e->dest;
5493 for (gphi_iterator gpi = gsi_start_phis (bb);
5494 !gsi_end_p (gpi);
5495 gsi_next (&gpi))
5497 gphi *phi = gpi.phi ();
5498 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5499 return phi;
5502 return NULL;
5505 /* A subroutine of expand_omp_for. Generate code for a parallel
5506 loop with static schedule and a specified chunk size. Given
5507 parameters:
5509 for (V = N1; V cond N2; V += STEP) BODY;
5511 where COND is "<" or ">", we generate pseudocode
5513 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5514 if (cond is <)
5515 adj = STEP - 1;
5516 else
5517 adj = STEP + 1;
5518 if ((__typeof (V)) -1 > 0 && cond is >)
5519 n = -(adj + N2 - N1) / -STEP;
5520 else
5521 n = (adj + N2 - N1) / STEP;
5522 trip = 0;
5523 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5524 here so that V is defined
5525 if the loop is not entered
5527 s0 = (trip * nthreads + threadid) * CHUNK;
5528 e0 = min (s0 + CHUNK, n);
5529 if (s0 < n) goto L1; else goto L4;
5531 V = s0 * STEP + N1;
5532 e = e0 * STEP + N1;
5534 BODY;
5535 V += STEP;
5536 if (V cond e) goto L2; else goto L3;
5538 trip += 1;
5539 goto L0;
5543 static void
5544 expand_omp_for_static_chunk (struct omp_region *region,
5545 struct omp_for_data *fd, gimple *inner_stmt)
5547 tree n, s0, e0, e, t;
5548 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5549 tree type, itype, vmain, vback, vextra;
5550 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5551 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5552 gimple_stmt_iterator gsi, gsip;
5553 edge se;
5554 bool broken_loop = region->cont == NULL;
5555 tree *counts = NULL;
5556 tree n1, n2, step;
5557 tree reductions = NULL_TREE;
5558 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5560 itype = type = TREE_TYPE (fd->loop.v);
5561 if (POINTER_TYPE_P (type))
5562 itype = signed_type_for (type);
5564 entry_bb = region->entry;
5565 se = split_block (entry_bb, last_stmt (entry_bb));
5566 entry_bb = se->src;
5567 iter_part_bb = se->dest;
5568 cont_bb = region->cont;
5569 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5570 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5571 gcc_assert (broken_loop
5572 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5573 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5574 body_bb = single_succ (seq_start_bb);
5575 if (!broken_loop)
5577 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5578 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5579 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5580 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5582 exit_bb = region->exit;
5584 /* Trip and adjustment setup goes in ENTRY_BB. */
5585 gsi = gsi_last_nondebug_bb (entry_bb);
5586 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5587 gsip = gsi;
5588 gsi_prev (&gsip);
5590 if (fd->collapse > 1)
5592 int first_zero_iter = -1, dummy = -1;
5593 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5595 counts = XALLOCAVEC (tree, fd->collapse);
5596 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5597 fin_bb, first_zero_iter,
5598 dummy_bb, dummy, l2_dom_bb);
5599 t = NULL_TREE;
5601 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5602 t = integer_one_node;
5603 else
5604 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5605 fold_convert (type, fd->loop.n1),
5606 fold_convert (type, fd->loop.n2));
5607 if (fd->collapse == 1
5608 && TYPE_UNSIGNED (type)
5609 && (t == NULL_TREE || !integer_onep (t)))
5611 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5612 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5613 true, GSI_SAME_STMT);
5614 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5615 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5616 true, GSI_SAME_STMT);
5617 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5618 NULL_TREE, NULL_TREE);
5619 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5620 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5621 expand_omp_regimplify_p, NULL, NULL)
5622 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5623 expand_omp_regimplify_p, NULL, NULL))
5625 gsi = gsi_for_stmt (cond_stmt);
5626 gimple_regimplify_operands (cond_stmt, &gsi);
5628 se = split_block (entry_bb, cond_stmt);
5629 se->flags = EDGE_TRUE_VALUE;
5630 entry_bb = se->dest;
5631 se->probability = profile_probability::very_likely ();
5632 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5633 se->probability = profile_probability::very_unlikely ();
5634 if (gimple_in_ssa_p (cfun))
5636 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5637 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5638 !gsi_end_p (gpi); gsi_next (&gpi))
5640 gphi *phi = gpi.phi ();
5641 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5642 se, UNKNOWN_LOCATION);
5645 gsi = gsi_last_bb (entry_bb);
5648 if (fd->lastprivate_conditional)
5650 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5651 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5652 if (fd->have_pointer_condtemp)
5653 condtemp = OMP_CLAUSE_DECL (c);
5654 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5655 cond_var = OMP_CLAUSE_DECL (c);
5657 if (fd->have_reductemp || fd->have_pointer_condtemp)
5659 tree t1 = build_int_cst (long_integer_type_node, 0);
5660 tree t2 = build_int_cst (long_integer_type_node, 1);
5661 tree t3 = build_int_cstu (long_integer_type_node,
5662 (HOST_WIDE_INT_1U << 31) + 1);
5663 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5664 gimple_stmt_iterator gsi2 = gsi_none ();
5665 gimple *g = NULL;
5666 tree mem = null_pointer_node, memv = NULL_TREE;
5667 if (fd->have_reductemp)
5669 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5670 reductions = OMP_CLAUSE_DECL (c);
5671 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5672 g = SSA_NAME_DEF_STMT (reductions);
5673 reductions = gimple_assign_rhs1 (g);
5674 OMP_CLAUSE_DECL (c) = reductions;
5675 gsi2 = gsi_for_stmt (g);
5677 else
5679 if (gsi_end_p (gsip))
5680 gsi2 = gsi_after_labels (region->entry);
5681 else
5682 gsi2 = gsip;
5683 reductions = null_pointer_node;
5685 if (fd->have_pointer_condtemp)
5687 tree type = TREE_TYPE (condtemp);
5688 memv = create_tmp_var (type);
5689 TREE_ADDRESSABLE (memv) = 1;
5690 unsigned HOST_WIDE_INT sz
5691 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5692 sz *= fd->lastprivate_conditional;
5693 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5694 false);
5695 mem = build_fold_addr_expr (memv);
5697 tree t
5698 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5699 9, t1, t2, t2, t3, t1, null_pointer_node,
5700 null_pointer_node, reductions, mem);
5701 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5702 true, GSI_SAME_STMT);
5703 if (fd->have_pointer_condtemp)
5704 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5705 if (fd->have_reductemp)
5707 gsi_remove (&gsi2, true);
5708 release_ssa_name (gimple_assign_lhs (g));
5711 switch (gimple_omp_for_kind (fd->for_stmt))
5713 case GF_OMP_FOR_KIND_FOR:
5714 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5715 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5716 break;
5717 case GF_OMP_FOR_KIND_DISTRIBUTE:
5718 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5719 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5720 break;
5721 default:
5722 gcc_unreachable ();
5724 nthreads = build_call_expr (nthreads, 0);
5725 nthreads = fold_convert (itype, nthreads);
5726 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5727 true, GSI_SAME_STMT);
5728 threadid = build_call_expr (threadid, 0);
5729 threadid = fold_convert (itype, threadid);
5730 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5731 true, GSI_SAME_STMT);
5733 n1 = fd->loop.n1;
5734 n2 = fd->loop.n2;
5735 step = fd->loop.step;
5736 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5738 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5739 OMP_CLAUSE__LOOPTEMP_);
5740 gcc_assert (innerc);
5741 n1 = OMP_CLAUSE_DECL (innerc);
5742 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5743 OMP_CLAUSE__LOOPTEMP_);
5744 gcc_assert (innerc);
5745 n2 = OMP_CLAUSE_DECL (innerc);
5747 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5748 true, NULL_TREE, true, GSI_SAME_STMT);
5749 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5750 true, NULL_TREE, true, GSI_SAME_STMT);
5751 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5752 true, NULL_TREE, true, GSI_SAME_STMT);
5753 tree chunk_size = fold_convert (itype, fd->chunk_size);
5754 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5755 chunk_size
5756 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5757 GSI_SAME_STMT);
5759 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5760 t = fold_build2 (PLUS_EXPR, itype, step, t);
5761 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5762 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5763 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5764 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5765 fold_build1 (NEGATE_EXPR, itype, t),
5766 fold_build1 (NEGATE_EXPR, itype, step));
5767 else
5768 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5769 t = fold_convert (itype, t);
5770 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5771 true, GSI_SAME_STMT);
5773 trip_var = create_tmp_reg (itype, ".trip");
5774 if (gimple_in_ssa_p (cfun))
5776 trip_init = make_ssa_name (trip_var);
5777 trip_main = make_ssa_name (trip_var);
5778 trip_back = make_ssa_name (trip_var);
5780 else
5782 trip_init = trip_var;
5783 trip_main = trip_var;
5784 trip_back = trip_var;
5787 gassign *assign_stmt
5788 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5789 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5791 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5792 t = fold_build2 (MULT_EXPR, itype, t, step);
5793 if (POINTER_TYPE_P (type))
5794 t = fold_build_pointer_plus (n1, t);
5795 else
5796 t = fold_build2 (PLUS_EXPR, type, t, n1);
5797 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5798 true, GSI_SAME_STMT);
5800 /* Remove the GIMPLE_OMP_FOR. */
5801 gsi_remove (&gsi, true);
5803 gimple_stmt_iterator gsif = gsi;
5805 /* Iteration space partitioning goes in ITER_PART_BB. */
5806 gsi = gsi_last_bb (iter_part_bb);
5808 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5809 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5810 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5811 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5812 false, GSI_CONTINUE_LINKING);
5814 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5815 t = fold_build2 (MIN_EXPR, itype, t, n);
5816 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5817 false, GSI_CONTINUE_LINKING);
5819 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5820 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5822 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5823 gsi = gsi_start_bb (seq_start_bb);
5825 tree startvar = fd->loop.v;
5826 tree endvar = NULL_TREE;
5828 if (gimple_omp_for_combined_p (fd->for_stmt))
5830 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5831 ? gimple_omp_parallel_clauses (inner_stmt)
5832 : gimple_omp_for_clauses (inner_stmt);
5833 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5834 gcc_assert (innerc);
5835 startvar = OMP_CLAUSE_DECL (innerc);
5836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5837 OMP_CLAUSE__LOOPTEMP_);
5838 gcc_assert (innerc);
5839 endvar = OMP_CLAUSE_DECL (innerc);
5840 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5841 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5843 innerc = find_lastprivate_looptemp (fd, innerc);
5844 if (innerc)
5846 /* If needed (distribute parallel for with lastprivate),
5847 propagate down the total number of iterations. */
5848 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5849 fd->loop.n2);
5850 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5851 GSI_CONTINUE_LINKING);
5852 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5853 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5858 t = fold_convert (itype, s0);
5859 t = fold_build2 (MULT_EXPR, itype, t, step);
5860 if (POINTER_TYPE_P (type))
5862 t = fold_build_pointer_plus (n1, t);
5863 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5864 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5865 t = fold_convert (signed_type_for (type), t);
5867 else
5868 t = fold_build2 (PLUS_EXPR, type, t, n1);
5869 t = fold_convert (TREE_TYPE (startvar), t);
5870 t = force_gimple_operand_gsi (&gsi, t,
5871 DECL_P (startvar)
5872 && TREE_ADDRESSABLE (startvar),
5873 NULL_TREE, false, GSI_CONTINUE_LINKING);
5874 assign_stmt = gimple_build_assign (startvar, t);
5875 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5876 if (cond_var)
5878 tree itype = TREE_TYPE (cond_var);
5879 /* For lastprivate(conditional:) itervar, we need some iteration
5880 counter that starts at unsigned non-zero and increases.
5881 Prefer as few IVs as possible, so if we can use startvar
5882 itself, use that, or startvar + constant (those would be
5883 incremented with step), and as last resort use the s0 + 1
5884 incremented by 1. */
5885 if (POINTER_TYPE_P (type)
5886 || TREE_CODE (n1) != INTEGER_CST
5887 || fd->loop.cond_code != LT_EXPR)
5888 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5889 build_int_cst (itype, 1));
5890 else if (tree_int_cst_sgn (n1) == 1)
5891 t = fold_convert (itype, t);
5892 else
5894 tree c = fold_convert (itype, n1);
5895 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5896 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5898 t = force_gimple_operand_gsi (&gsi, t, false,
5899 NULL_TREE, false, GSI_CONTINUE_LINKING);
5900 assign_stmt = gimple_build_assign (cond_var, t);
5901 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5904 t = fold_convert (itype, e0);
5905 t = fold_build2 (MULT_EXPR, itype, t, step);
5906 if (POINTER_TYPE_P (type))
5908 t = fold_build_pointer_plus (n1, t);
5909 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5910 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5911 t = fold_convert (signed_type_for (type), t);
5913 else
5914 t = fold_build2 (PLUS_EXPR, type, t, n1);
5915 t = fold_convert (TREE_TYPE (startvar), t);
5916 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5917 false, GSI_CONTINUE_LINKING);
5918 if (endvar)
5920 assign_stmt = gimple_build_assign (endvar, e);
5921 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5922 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5923 assign_stmt = gimple_build_assign (fd->loop.v, e);
5924 else
5925 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5926 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5928 /* Handle linear clause adjustments. */
5929 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5930 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5931 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5932 c; c = OMP_CLAUSE_CHAIN (c))
5933 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5934 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5936 tree d = OMP_CLAUSE_DECL (c);
5937 bool is_ref = omp_is_reference (d);
5938 tree t = d, a, dest;
5939 if (is_ref)
5940 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5941 tree type = TREE_TYPE (t);
5942 if (POINTER_TYPE_P (type))
5943 type = sizetype;
5944 dest = unshare_expr (t);
5945 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5946 expand_omp_build_assign (&gsif, v, t);
5947 if (itercnt == NULL_TREE)
5949 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5951 itercntbias
5952 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5953 fold_convert (itype, fd->loop.n1));
5954 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5955 itercntbias, step);
5956 itercntbias
5957 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5958 NULL_TREE, true,
5959 GSI_SAME_STMT);
5960 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5961 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5962 NULL_TREE, false,
5963 GSI_CONTINUE_LINKING);
5965 else
5966 itercnt = s0;
5968 a = fold_build2 (MULT_EXPR, type,
5969 fold_convert (type, itercnt),
5970 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5971 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5972 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5973 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5974 false, GSI_CONTINUE_LINKING);
5975 expand_omp_build_assign (&gsi, dest, t, true);
5977 if (fd->collapse > 1)
5978 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5980 if (!broken_loop)
5982 /* The code controlling the sequential loop goes in CONT_BB,
5983 replacing the GIMPLE_OMP_CONTINUE. */
5984 gsi = gsi_last_nondebug_bb (cont_bb);
5985 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5986 vmain = gimple_omp_continue_control_use (cont_stmt);
5987 vback = gimple_omp_continue_control_def (cont_stmt);
5989 if (cond_var)
5991 tree itype = TREE_TYPE (cond_var);
5992 tree t2;
5993 if (POINTER_TYPE_P (type)
5994 || TREE_CODE (n1) != INTEGER_CST
5995 || fd->loop.cond_code != LT_EXPR)
5996 t2 = build_int_cst (itype, 1);
5997 else
5998 t2 = fold_convert (itype, step);
5999 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6000 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6001 NULL_TREE, true, GSI_SAME_STMT);
6002 assign_stmt = gimple_build_assign (cond_var, t2);
6003 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6006 if (!gimple_omp_for_combined_p (fd->for_stmt))
6008 if (POINTER_TYPE_P (type))
6009 t = fold_build_pointer_plus (vmain, step);
6010 else
6011 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6012 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6013 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6014 true, GSI_SAME_STMT);
6015 assign_stmt = gimple_build_assign (vback, t);
6016 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6018 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6019 t = build2 (EQ_EXPR, boolean_type_node,
6020 build_int_cst (itype, 0),
6021 build_int_cst (itype, 1));
6022 else
6023 t = build2 (fd->loop.cond_code, boolean_type_node,
6024 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6025 ? t : vback, e);
6026 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6029 /* Remove GIMPLE_OMP_CONTINUE. */
6030 gsi_remove (&gsi, true);
6032 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6033 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6035 /* Trip update code goes into TRIP_UPDATE_BB. */
6036 gsi = gsi_start_bb (trip_update_bb);
6038 t = build_int_cst (itype, 1);
6039 t = build2 (PLUS_EXPR, itype, trip_main, t);
6040 assign_stmt = gimple_build_assign (trip_back, t);
6041 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6044 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6045 gsi = gsi_last_nondebug_bb (exit_bb);
6046 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6048 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6049 if (fd->have_reductemp || fd->have_pointer_condtemp)
6051 tree fn;
6052 if (t)
6053 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6054 else
6055 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6056 gcall *g = gimple_build_call (fn, 0);
6057 if (t)
6059 gimple_call_set_lhs (g, t);
6060 if (fd->have_reductemp)
6061 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6062 NOP_EXPR, t),
6063 GSI_SAME_STMT);
6065 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6067 else
6068 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6070 else if (fd->have_pointer_condtemp)
6072 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6073 gcall *g = gimple_build_call (fn, 0);
6074 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6076 gsi_remove (&gsi, true);
6078 /* Connect the new blocks. */
6079 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6080 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6082 if (!broken_loop)
6084 se = find_edge (cont_bb, body_bb);
6085 if (se == NULL)
6087 se = BRANCH_EDGE (cont_bb);
6088 gcc_assert (single_succ (se->dest) == body_bb);
6090 if (gimple_omp_for_combined_p (fd->for_stmt))
6092 remove_edge (se);
6093 se = NULL;
6095 else if (fd->collapse > 1)
6097 remove_edge (se);
6098 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6100 else
6101 se->flags = EDGE_TRUE_VALUE;
6102 find_edge (cont_bb, trip_update_bb)->flags
6103 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6105 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6106 iter_part_bb);
6109 if (gimple_in_ssa_p (cfun))
6111 gphi_iterator psi;
6112 gphi *phi;
6113 edge re, ene;
6114 edge_var_map *vm;
6115 size_t i;
6117 gcc_assert (fd->collapse == 1 && !broken_loop);
6119 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6120 remove arguments of the phi nodes in fin_bb. We need to create
6121 appropriate phi nodes in iter_part_bb instead. */
6122 se = find_edge (iter_part_bb, fin_bb);
6123 re = single_succ_edge (trip_update_bb);
6124 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6125 ene = single_succ_edge (entry_bb);
6127 psi = gsi_start_phis (fin_bb);
6128 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6129 gsi_next (&psi), ++i)
6131 gphi *nphi;
6132 location_t locus;
6134 phi = psi.phi ();
6135 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6136 redirect_edge_var_map_def (vm), 0))
6137 continue;
6139 t = gimple_phi_result (phi);
6140 gcc_assert (t == redirect_edge_var_map_result (vm));
6142 if (!single_pred_p (fin_bb))
6143 t = copy_ssa_name (t, phi);
6145 nphi = create_phi_node (t, iter_part_bb);
6147 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6148 locus = gimple_phi_arg_location_from_edge (phi, se);
6150 /* A special case -- fd->loop.v is not yet computed in
6151 iter_part_bb, we need to use vextra instead. */
6152 if (t == fd->loop.v)
6153 t = vextra;
6154 add_phi_arg (nphi, t, ene, locus);
6155 locus = redirect_edge_var_map_location (vm);
6156 tree back_arg = redirect_edge_var_map_def (vm);
6157 add_phi_arg (nphi, back_arg, re, locus);
6158 edge ce = find_edge (cont_bb, body_bb);
6159 if (ce == NULL)
6161 ce = BRANCH_EDGE (cont_bb);
6162 gcc_assert (single_succ (ce->dest) == body_bb);
6163 ce = single_succ_edge (ce->dest);
6165 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6166 gcc_assert (inner_loop_phi != NULL);
6167 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6168 find_edge (seq_start_bb, body_bb), locus);
6170 if (!single_pred_p (fin_bb))
6171 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6173 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6174 redirect_edge_var_map_clear (re);
6175 if (single_pred_p (fin_bb))
6176 while (1)
6178 psi = gsi_start_phis (fin_bb);
6179 if (gsi_end_p (psi))
6180 break;
6181 remove_phi_node (&psi, false);
6184 /* Make phi node for trip. */
6185 phi = create_phi_node (trip_main, iter_part_bb);
6186 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6187 UNKNOWN_LOCATION);
6188 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6189 UNKNOWN_LOCATION);
6192 if (!broken_loop)
6193 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6194 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6195 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6196 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6197 recompute_dominator (CDI_DOMINATORS, fin_bb));
6198 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6199 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6200 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6201 recompute_dominator (CDI_DOMINATORS, body_bb));
6203 if (!broken_loop)
6205 class loop *loop = body_bb->loop_father;
6206 class loop *trip_loop = alloc_loop ();
6207 trip_loop->header = iter_part_bb;
6208 trip_loop->latch = trip_update_bb;
6209 add_loop (trip_loop, iter_part_bb->loop_father);
6211 if (loop != entry_bb->loop_father)
6213 gcc_assert (loop->header == body_bb);
6214 gcc_assert (loop->latch == region->cont
6215 || single_pred (loop->latch) == region->cont);
6216 trip_loop->inner = loop;
6217 return;
6220 if (!gimple_omp_for_combined_p (fd->for_stmt))
6222 loop = alloc_loop ();
6223 loop->header = body_bb;
6224 if (collapse_bb == NULL)
6225 loop->latch = cont_bb;
6226 add_loop (loop, trip_loop);
6231 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6232 loop. Given parameters:
6234 for (V = N1; V cond N2; V += STEP) BODY;
6236 where COND is "<" or ">", we generate pseudocode
6238 V = N1;
6239 goto L1;
6241 BODY;
6242 V += STEP;
6244 if (V cond N2) goto L0; else goto L2;
6247 For collapsed loops, emit the outer loops as scalar
6248 and only try to vectorize the innermost loop. */
6250 static void
6251 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6253 tree type, t;
6254 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6255 gimple_stmt_iterator gsi;
6256 gimple *stmt;
6257 gcond *cond_stmt;
6258 bool broken_loop = region->cont == NULL;
6259 edge e, ne;
6260 tree *counts = NULL;
6261 int i;
6262 int safelen_int = INT_MAX;
6263 bool dont_vectorize = false;
6264 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6265 OMP_CLAUSE_SAFELEN);
6266 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6267 OMP_CLAUSE__SIMDUID_);
6268 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6269 OMP_CLAUSE_IF);
6270 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6271 OMP_CLAUSE_SIMDLEN);
6272 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6273 OMP_CLAUSE__CONDTEMP_);
6274 tree n1, n2;
6275 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6277 if (safelen)
6279 poly_uint64 val;
6280 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6281 if (!poly_int_tree_p (safelen, &val))
6282 safelen_int = 0;
6283 else
6284 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6285 if (safelen_int == 1)
6286 safelen_int = 0;
6288 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6289 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6291 safelen_int = 0;
6292 dont_vectorize = true;
6294 type = TREE_TYPE (fd->loop.v);
6295 entry_bb = region->entry;
6296 cont_bb = region->cont;
6297 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6298 gcc_assert (broken_loop
6299 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6300 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6301 if (!broken_loop)
6303 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6304 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6305 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6306 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6308 else
6310 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6311 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6312 l2_bb = single_succ (l1_bb);
6314 exit_bb = region->exit;
6315 l2_dom_bb = NULL;
6317 gsi = gsi_last_nondebug_bb (entry_bb);
6319 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6320 /* Not needed in SSA form right now. */
6321 gcc_assert (!gimple_in_ssa_p (cfun));
6322 if (fd->collapse > 1
6323 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6324 || broken_loop))
6326 int first_zero_iter = -1, dummy = -1;
6327 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6329 counts = XALLOCAVEC (tree, fd->collapse);
6330 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6331 zero_iter_bb, first_zero_iter,
6332 dummy_bb, dummy, l2_dom_bb);
6334 if (l2_dom_bb == NULL)
6335 l2_dom_bb = l1_bb;
6337 n1 = fd->loop.n1;
6338 n2 = fd->loop.n2;
6339 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6341 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6342 OMP_CLAUSE__LOOPTEMP_);
6343 gcc_assert (innerc);
6344 n1 = OMP_CLAUSE_DECL (innerc);
6345 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6346 OMP_CLAUSE__LOOPTEMP_);
6347 gcc_assert (innerc);
6348 n2 = OMP_CLAUSE_DECL (innerc);
6350 tree step = fd->loop.step;
6352 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6353 OMP_CLAUSE__SIMT_);
6354 if (is_simt)
6356 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6357 is_simt = safelen_int > 1;
6359 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6360 if (is_simt)
6362 simt_lane = create_tmp_var (unsigned_type_node);
6363 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6364 gimple_call_set_lhs (g, simt_lane);
6365 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6366 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6367 fold_convert (TREE_TYPE (step), simt_lane));
6368 n1 = fold_convert (type, n1);
6369 if (POINTER_TYPE_P (type))
6370 n1 = fold_build_pointer_plus (n1, offset);
6371 else
6372 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6374 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6375 if (fd->collapse > 1)
6376 simt_maxlane = build_one_cst (unsigned_type_node);
6377 else if (safelen_int < omp_max_simt_vf ())
6378 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6379 tree vf
6380 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6381 unsigned_type_node, 0);
6382 if (simt_maxlane)
6383 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6384 vf = fold_convert (TREE_TYPE (step), vf);
6385 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6388 tree n2var = NULL_TREE;
6389 tree n2v = NULL_TREE;
6390 tree *nonrect_bounds = NULL;
6391 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6392 if (fd->collapse > 1)
6394 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6396 if (fd->non_rect)
6398 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6399 memset (nonrect_bounds, 0,
6400 sizeof (tree) * (fd->last_nonrect + 1));
6402 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6403 gcc_assert (entry_bb == gsi_bb (gsi));
6404 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6405 gsi_prev (&gsi);
6406 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6407 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6408 NULL, n1);
6409 gsi = gsi_for_stmt (fd->for_stmt);
6411 if (broken_loop)
6413 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6415 /* Compute in n2var the limit for the first innermost loop,
6416 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6417 where cnt is how many iterations would the loop have if
6418 all further iterations were assigned to the current task. */
6419 n2var = create_tmp_var (type);
6420 i = fd->collapse - 1;
6421 tree itype = TREE_TYPE (fd->loops[i].v);
6422 if (POINTER_TYPE_P (itype))
6423 itype = signed_type_for (itype);
6424 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6425 ? -1 : 1));
6426 t = fold_build2 (PLUS_EXPR, itype,
6427 fold_convert (itype, fd->loops[i].step), t);
6428 t = fold_build2 (PLUS_EXPR, itype, t,
6429 fold_convert (itype, fd->loops[i].n2));
6430 if (fd->loops[i].m2)
6432 tree t2 = fold_convert (itype,
6433 fd->loops[i - fd->loops[i].outer].v);
6434 tree t3 = fold_convert (itype, fd->loops[i].m2);
6435 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6436 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6438 t = fold_build2 (MINUS_EXPR, itype, t,
6439 fold_convert (itype, fd->loops[i].v));
6440 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6441 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6442 fold_build1 (NEGATE_EXPR, itype, t),
6443 fold_build1 (NEGATE_EXPR, itype,
6444 fold_convert (itype,
6445 fd->loops[i].step)));
6446 else
6447 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6448 fold_convert (itype, fd->loops[i].step));
6449 t = fold_convert (type, t);
6450 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6451 min_arg1 = create_tmp_var (type);
6452 expand_omp_build_assign (&gsi, min_arg1, t2);
6453 min_arg2 = create_tmp_var (type);
6454 expand_omp_build_assign (&gsi, min_arg2, t);
6456 else
6458 if (TREE_CODE (n2) == INTEGER_CST)
6460 /* Indicate for lastprivate handling that at least one iteration
6461 has been performed, without wasting runtime. */
6462 if (integer_nonzerop (n2))
6463 expand_omp_build_assign (&gsi, fd->loop.v,
6464 fold_convert (type, n2));
6465 else
6466 /* Indicate that no iteration has been performed. */
6467 expand_omp_build_assign (&gsi, fd->loop.v,
6468 build_one_cst (type));
6470 else
6472 expand_omp_build_assign (&gsi, fd->loop.v,
6473 build_zero_cst (type));
6474 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6476 for (i = 0; i < fd->collapse; i++)
6478 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6479 if (fd->loops[i].m1)
6481 tree t2
6482 = fold_convert (TREE_TYPE (t),
6483 fd->loops[i - fd->loops[i].outer].v);
6484 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6485 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6486 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6488 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6489 /* For normal non-combined collapsed loops just initialize
6490 the outermost iterator in the entry_bb. */
6491 if (!broken_loop)
6492 break;
6496 else
6497 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6498 tree altv = NULL_TREE, altn2 = NULL_TREE;
6499 if (fd->collapse == 1
6500 && !broken_loop
6501 && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
6503 /* The vectorizer currently punts on loops with non-constant steps
6504 for the main IV (can't compute number of iterations and gives up
6505 because of that). As for OpenMP loops it is always possible to
6506 compute the number of iterations upfront, use an alternate IV
6507 as the loop iterator:
6508 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6509 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6510 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6511 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6512 tree itype = TREE_TYPE (fd->loop.v);
6513 if (POINTER_TYPE_P (itype))
6514 itype = signed_type_for (itype);
6515 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6516 t = fold_build2 (PLUS_EXPR, itype,
6517 fold_convert (itype, fd->loop.step), t);
6518 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6519 t = fold_build2 (MINUS_EXPR, itype, t,
6520 fold_convert (itype, fd->loop.v));
6521 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6522 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6523 fold_build1 (NEGATE_EXPR, itype, t),
6524 fold_build1 (NEGATE_EXPR, itype,
6525 fold_convert (itype, fd->loop.step)));
6526 else
6527 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6528 fold_convert (itype, fd->loop.step));
6529 t = fold_convert (TREE_TYPE (altv), t);
6530 altn2 = create_tmp_var (TREE_TYPE (altv));
6531 expand_omp_build_assign (&gsi, altn2, t);
6532 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6533 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6534 true, GSI_SAME_STMT);
6535 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6536 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6537 build_zero_cst (TREE_TYPE (altv)));
6538 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6540 else if (fd->collapse > 1
6541 && !broken_loop
6542 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6543 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6545 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6546 altn2 = create_tmp_var (TREE_TYPE (altv));
6548 if (cond_var)
6550 if (POINTER_TYPE_P (type)
6551 || TREE_CODE (n1) != INTEGER_CST
6552 || fd->loop.cond_code != LT_EXPR
6553 || tree_int_cst_sgn (n1) != 1)
6554 expand_omp_build_assign (&gsi, cond_var,
6555 build_one_cst (TREE_TYPE (cond_var)));
6556 else
6557 expand_omp_build_assign (&gsi, cond_var,
6558 fold_convert (TREE_TYPE (cond_var), n1));
6561 /* Remove the GIMPLE_OMP_FOR statement. */
6562 gsi_remove (&gsi, true);
6564 if (!broken_loop)
6566 /* Code to control the increment goes in the CONT_BB. */
6567 gsi = gsi_last_nondebug_bb (cont_bb);
6568 stmt = gsi_stmt (gsi);
6569 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6571 if (fd->collapse == 1
6572 || gimple_omp_for_combined_into_p (fd->for_stmt))
6574 if (POINTER_TYPE_P (type))
6575 t = fold_build_pointer_plus (fd->loop.v, step);
6576 else
6577 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6578 expand_omp_build_assign (&gsi, fd->loop.v, t);
6580 else if (TREE_CODE (n2) != INTEGER_CST)
6581 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6582 if (altv)
6584 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6585 build_one_cst (TREE_TYPE (altv)));
6586 expand_omp_build_assign (&gsi, altv, t);
6589 if (fd->collapse > 1)
6591 i = fd->collapse - 1;
6592 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6594 t = fold_convert (sizetype, fd->loops[i].step);
6595 t = fold_build_pointer_plus (fd->loops[i].v, t);
6597 else
6599 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6600 fd->loops[i].step);
6601 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6602 fd->loops[i].v, t);
6604 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6606 if (cond_var)
6608 if (POINTER_TYPE_P (type)
6609 || TREE_CODE (n1) != INTEGER_CST
6610 || fd->loop.cond_code != LT_EXPR
6611 || tree_int_cst_sgn (n1) != 1)
6612 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6613 build_one_cst (TREE_TYPE (cond_var)));
6614 else
6615 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6616 fold_convert (TREE_TYPE (cond_var), step));
6617 expand_omp_build_assign (&gsi, cond_var, t);
6620 /* Remove GIMPLE_OMP_CONTINUE. */
6621 gsi_remove (&gsi, true);
6624 /* Emit the condition in L1_BB. */
6625 gsi = gsi_start_bb (l1_bb);
6627 if (altv)
6628 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6629 else if (fd->collapse > 1
6630 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6631 && !broken_loop)
6633 i = fd->collapse - 1;
6634 tree itype = TREE_TYPE (fd->loops[i].v);
6635 if (fd->loops[i].m2)
6636 t = n2v = create_tmp_var (itype);
6637 else
6638 t = fold_convert (itype, fd->loops[i].n2);
6639 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6640 false, GSI_CONTINUE_LINKING);
6641 tree v = fd->loops[i].v;
6642 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6643 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6644 false, GSI_CONTINUE_LINKING);
6645 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6647 else
6649 if (fd->collapse > 1 && !broken_loop)
6650 t = n2var;
6651 else
6652 t = fold_convert (type, n2);
6653 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6654 false, GSI_CONTINUE_LINKING);
6655 tree v = fd->loop.v;
6656 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6657 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6658 false, GSI_CONTINUE_LINKING);
6659 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6661 cond_stmt = gimple_build_cond_empty (t);
6662 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6663 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6664 NULL, NULL)
6665 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6666 NULL, NULL))
6668 gsi = gsi_for_stmt (cond_stmt);
6669 gimple_regimplify_operands (cond_stmt, &gsi);
6672 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6673 if (is_simt)
6675 gsi = gsi_start_bb (l2_bb);
6676 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6677 if (POINTER_TYPE_P (type))
6678 t = fold_build_pointer_plus (fd->loop.v, step);
6679 else
6680 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6681 expand_omp_build_assign (&gsi, fd->loop.v, t);
6684 /* Remove GIMPLE_OMP_RETURN. */
6685 gsi = gsi_last_nondebug_bb (exit_bb);
6686 gsi_remove (&gsi, true);
6688 /* Connect the new blocks. */
6689 remove_edge (FALLTHRU_EDGE (entry_bb));
6691 if (!broken_loop)
6693 remove_edge (BRANCH_EDGE (entry_bb));
6694 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6696 e = BRANCH_EDGE (l1_bb);
6697 ne = FALLTHRU_EDGE (l1_bb);
6698 e->flags = EDGE_TRUE_VALUE;
6700 else
6702 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6704 ne = single_succ_edge (l1_bb);
6705 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6708 ne->flags = EDGE_FALSE_VALUE;
6709 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6710 ne->probability = e->probability.invert ();
6712 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6713 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6715 if (simt_maxlane)
6717 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6718 NULL_TREE, NULL_TREE);
6719 gsi = gsi_last_bb (entry_bb);
6720 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6721 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6722 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6723 FALLTHRU_EDGE (entry_bb)->probability
6724 = profile_probability::guessed_always ().apply_scale (7, 8);
6725 BRANCH_EDGE (entry_bb)->probability
6726 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6727 l2_dom_bb = entry_bb;
6729 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6731 if (!broken_loop && fd->collapse > 1)
6733 basic_block last_bb = l1_bb;
6734 basic_block init_bb = NULL;
6735 for (i = fd->collapse - 2; i >= 0; i--)
6737 tree nextn2v = NULL_TREE;
6738 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6739 e = EDGE_SUCC (last_bb, 0);
6740 else
6741 e = EDGE_SUCC (last_bb, 1);
6742 basic_block bb = split_edge (e);
6743 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6745 t = fold_convert (sizetype, fd->loops[i].step);
6746 t = fold_build_pointer_plus (fd->loops[i].v, t);
6748 else
6750 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6751 fd->loops[i].step);
6752 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6753 fd->loops[i].v, t);
6755 gsi = gsi_after_labels (bb);
6756 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6758 bb = split_block (bb, last_stmt (bb))->dest;
6759 gsi = gsi_start_bb (bb);
6760 tree itype = TREE_TYPE (fd->loops[i].v);
6761 if (fd->loops[i].m2)
6762 t = nextn2v = create_tmp_var (itype);
6763 else
6764 t = fold_convert (itype, fd->loops[i].n2);
6765 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6766 false, GSI_CONTINUE_LINKING);
6767 tree v = fd->loops[i].v;
6768 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6769 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6770 false, GSI_CONTINUE_LINKING);
6771 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6772 cond_stmt = gimple_build_cond_empty (t);
6773 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6774 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6775 expand_omp_regimplify_p, NULL, NULL)
6776 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6777 expand_omp_regimplify_p, NULL, NULL))
6779 gsi = gsi_for_stmt (cond_stmt);
6780 gimple_regimplify_operands (cond_stmt, &gsi);
6782 ne = single_succ_edge (bb);
6783 ne->flags = EDGE_FALSE_VALUE;
6785 init_bb = create_empty_bb (bb);
6786 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6787 add_bb_to_loop (init_bb, bb->loop_father);
6788 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6789 e->probability
6790 = profile_probability::guessed_always ().apply_scale (7, 8);
6791 ne->probability = e->probability.invert ();
6793 gsi = gsi_after_labels (init_bb);
6794 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6795 fd->loops[i + 1].n1);
6796 if (fd->loops[i + 1].m1)
6798 tree t2 = fold_convert (TREE_TYPE (t),
6799 fd->loops[i + 1
6800 - fd->loops[i + 1].outer].v);
6801 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6802 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6803 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6805 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6806 if (fd->loops[i + 1].m2)
6808 if (i + 2 == fd->collapse && (n2var || altv))
6810 gcc_assert (n2v == NULL_TREE);
6811 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6813 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6814 fd->loops[i + 1].n2);
6815 tree t2 = fold_convert (TREE_TYPE (t),
6816 fd->loops[i + 1
6817 - fd->loops[i + 1].outer].v);
6818 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6819 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6820 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6821 expand_omp_build_assign (&gsi, n2v, t);
6823 if (i + 2 == fd->collapse && n2var)
6825 /* For composite simd, n2 is the first iteration the current
6826 task shouldn't already handle, so we effectively want to use
6827 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6828 as the vectorized loop. Except the vectorizer will not
6829 vectorize that, so instead compute N2VAR as
6830 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6831 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6832 as the loop to vectorize. */
6833 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6834 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6836 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6837 == LT_EXPR ? -1 : 1));
6838 t = fold_build2 (PLUS_EXPR, itype,
6839 fold_convert (itype,
6840 fd->loops[i + 1].step), t);
6841 if (fd->loops[i + 1].m2)
6842 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6843 else
6844 t = fold_build2 (PLUS_EXPR, itype, t,
6845 fold_convert (itype,
6846 fd->loops[i + 1].n2));
6847 t = fold_build2 (MINUS_EXPR, itype, t,
6848 fold_convert (itype, fd->loops[i + 1].v));
6849 tree step = fold_convert (itype, fd->loops[i + 1].step);
6850 if (TYPE_UNSIGNED (itype)
6851 && fd->loops[i + 1].cond_code == GT_EXPR)
6852 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6853 fold_build1 (NEGATE_EXPR, itype, t),
6854 fold_build1 (NEGATE_EXPR, itype, step));
6855 else
6856 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6857 t = fold_convert (type, t);
6859 else
6860 t = counts[i + 1];
6861 expand_omp_build_assign (&gsi, min_arg1, t2);
6862 expand_omp_build_assign (&gsi, min_arg2, t);
6863 e = split_block (init_bb, last_stmt (init_bb));
6864 gsi = gsi_after_labels (e->dest);
6865 init_bb = e->dest;
6866 remove_edge (FALLTHRU_EDGE (entry_bb));
6867 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6868 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6869 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6870 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6871 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6872 expand_omp_build_assign (&gsi, n2var, t);
6874 if (i + 2 == fd->collapse && altv)
6876 /* The vectorizer currently punts on loops with non-constant
6877 steps for the main IV (can't compute number of iterations
6878 and gives up because of that). As for OpenMP loops it is
6879 always possible to compute the number of iterations upfront,
6880 use an alternate IV as the loop iterator. */
6881 expand_omp_build_assign (&gsi, altv,
6882 build_zero_cst (TREE_TYPE (altv)));
6883 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6884 if (POINTER_TYPE_P (itype))
6885 itype = signed_type_for (itype);
6886 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6887 ? -1 : 1));
6888 t = fold_build2 (PLUS_EXPR, itype,
6889 fold_convert (itype, fd->loops[i + 1].step), t);
6890 t = fold_build2 (PLUS_EXPR, itype, t,
6891 fold_convert (itype,
6892 fd->loops[i + 1].m2
6893 ? n2v : fd->loops[i + 1].n2));
6894 t = fold_build2 (MINUS_EXPR, itype, t,
6895 fold_convert (itype, fd->loops[i + 1].v));
6896 tree step = fold_convert (itype, fd->loops[i + 1].step);
6897 if (TYPE_UNSIGNED (itype)
6898 && fd->loops[i + 1].cond_code == GT_EXPR)
6899 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6900 fold_build1 (NEGATE_EXPR, itype, t),
6901 fold_build1 (NEGATE_EXPR, itype, step));
6902 else
6903 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6904 t = fold_convert (TREE_TYPE (altv), t);
6905 expand_omp_build_assign (&gsi, altn2, t);
6906 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6907 fd->loops[i + 1].m2
6908 ? n2v : fd->loops[i + 1].n2);
6909 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6910 true, GSI_SAME_STMT);
6911 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6912 fd->loops[i + 1].v, t2);
6913 gassign *g
6914 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6915 build_zero_cst (TREE_TYPE (altv)));
6916 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6918 n2v = nextn2v;
6920 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6921 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6923 e = find_edge (entry_bb, last_bb);
6924 redirect_edge_succ (e, bb);
6925 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6926 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6929 last_bb = bb;
6932 if (!broken_loop)
6934 class loop *loop = alloc_loop ();
6935 loop->header = l1_bb;
6936 loop->latch = cont_bb;
6937 add_loop (loop, l1_bb->loop_father);
6938 loop->safelen = safelen_int;
6939 if (simduid)
6941 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6942 cfun->has_simduid_loops = true;
6944 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6945 the loop. */
6946 if ((flag_tree_loop_vectorize
6947 || !global_options_set.x_flag_tree_loop_vectorize)
6948 && flag_tree_loop_optimize
6949 && loop->safelen > 1)
6951 loop->force_vectorize = true;
6952 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6954 unsigned HOST_WIDE_INT v
6955 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6956 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6957 loop->simdlen = v;
6959 cfun->has_force_vectorize_loops = true;
6961 else if (dont_vectorize)
6962 loop->dont_vectorize = true;
6964 else if (simduid)
6965 cfun->has_simduid_loops = true;
6968 /* Taskloop construct is represented after gimplification with
6969 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6970 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6971 which should just compute all the needed loop temporaries
6972 for GIMPLE_OMP_TASK. */
6974 static void
6975 expand_omp_taskloop_for_outer (struct omp_region *region,
6976 struct omp_for_data *fd,
6977 gimple *inner_stmt)
6979 tree type, bias = NULL_TREE;
6980 basic_block entry_bb, cont_bb, exit_bb;
6981 gimple_stmt_iterator gsi;
6982 gassign *assign_stmt;
6983 tree *counts = NULL;
6984 int i;
6986 gcc_assert (inner_stmt);
6987 gcc_assert (region->cont);
6988 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6989 && gimple_omp_task_taskloop_p (inner_stmt));
6990 type = TREE_TYPE (fd->loop.v);
6992 /* See if we need to bias by LLONG_MIN. */
6993 if (fd->iter_type == long_long_unsigned_type_node
6994 && TREE_CODE (type) == INTEGER_TYPE
6995 && !TYPE_UNSIGNED (type))
6997 tree n1, n2;
6999 if (fd->loop.cond_code == LT_EXPR)
7001 n1 = fd->loop.n1;
7002 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7004 else
7006 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7007 n2 = fd->loop.n1;
7009 if (TREE_CODE (n1) != INTEGER_CST
7010 || TREE_CODE (n2) != INTEGER_CST
7011 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7012 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7015 entry_bb = region->entry;
7016 cont_bb = region->cont;
7017 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7018 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7019 exit_bb = region->exit;
7021 gsi = gsi_last_nondebug_bb (entry_bb);
7022 gimple *for_stmt = gsi_stmt (gsi);
7023 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7024 if (fd->collapse > 1)
7026 int first_zero_iter = -1, dummy = -1;
7027 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7029 counts = XALLOCAVEC (tree, fd->collapse);
7030 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7031 zero_iter_bb, first_zero_iter,
7032 dummy_bb, dummy, l2_dom_bb);
7034 if (zero_iter_bb)
7036 /* Some counts[i] vars might be uninitialized if
7037 some loop has zero iterations. But the body shouldn't
7038 be executed in that case, so just avoid uninit warnings. */
7039 for (i = first_zero_iter; i < fd->collapse; i++)
7040 if (SSA_VAR_P (counts[i]))
7041 TREE_NO_WARNING (counts[i]) = 1;
7042 gsi_prev (&gsi);
7043 edge e = split_block (entry_bb, gsi_stmt (gsi));
7044 entry_bb = e->dest;
7045 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7046 gsi = gsi_last_bb (entry_bb);
7047 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7048 get_immediate_dominator (CDI_DOMINATORS,
7049 zero_iter_bb));
7053 tree t0, t1;
7054 t1 = fd->loop.n2;
7055 t0 = fd->loop.n1;
7056 if (POINTER_TYPE_P (TREE_TYPE (t0))
7057 && TYPE_PRECISION (TREE_TYPE (t0))
7058 != TYPE_PRECISION (fd->iter_type))
7060 /* Avoid casting pointers to integer of a different size. */
7061 tree itype = signed_type_for (type);
7062 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7063 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7065 else
7067 t1 = fold_convert (fd->iter_type, t1);
7068 t0 = fold_convert (fd->iter_type, t0);
7070 if (bias)
7072 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7073 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7076 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7077 OMP_CLAUSE__LOOPTEMP_);
7078 gcc_assert (innerc);
7079 tree startvar = OMP_CLAUSE_DECL (innerc);
7080 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7081 gcc_assert (innerc);
7082 tree endvar = OMP_CLAUSE_DECL (innerc);
7083 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7085 innerc = find_lastprivate_looptemp (fd, innerc);
7086 if (innerc)
7088 /* If needed (inner taskloop has lastprivate clause), propagate
7089 down the total number of iterations. */
7090 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7091 NULL_TREE, false,
7092 GSI_CONTINUE_LINKING);
7093 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7094 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7098 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7099 GSI_CONTINUE_LINKING);
7100 assign_stmt = gimple_build_assign (startvar, t0);
7101 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7103 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7104 GSI_CONTINUE_LINKING);
7105 assign_stmt = gimple_build_assign (endvar, t1);
7106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7107 if (fd->collapse > 1)
7108 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7110 /* Remove the GIMPLE_OMP_FOR statement. */
7111 gsi = gsi_for_stmt (for_stmt);
7112 gsi_remove (&gsi, true);
7114 gsi = gsi_last_nondebug_bb (cont_bb);
7115 gsi_remove (&gsi, true);
7117 gsi = gsi_last_nondebug_bb (exit_bb);
7118 gsi_remove (&gsi, true);
7120 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7121 remove_edge (BRANCH_EDGE (entry_bb));
7122 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7123 remove_edge (BRANCH_EDGE (cont_bb));
7124 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7125 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7126 recompute_dominator (CDI_DOMINATORS, region->entry));
7129 /* Taskloop construct is represented after gimplification with
7130 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7131 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7132 GOMP_taskloop{,_ull} function arranges for each task to be given just
7133 a single range of iterations. */
7135 static void
7136 expand_omp_taskloop_for_inner (struct omp_region *region,
7137 struct omp_for_data *fd,
7138 gimple *inner_stmt)
7140 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7141 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7142 basic_block fin_bb;
7143 gimple_stmt_iterator gsi;
7144 edge ep;
7145 bool broken_loop = region->cont == NULL;
7146 tree *counts = NULL;
7147 tree n1, n2, step;
7149 itype = type = TREE_TYPE (fd->loop.v);
7150 if (POINTER_TYPE_P (type))
7151 itype = signed_type_for (type);
7153 /* See if we need to bias by LLONG_MIN. */
7154 if (fd->iter_type == long_long_unsigned_type_node
7155 && TREE_CODE (type) == INTEGER_TYPE
7156 && !TYPE_UNSIGNED (type))
7158 tree n1, n2;
7160 if (fd->loop.cond_code == LT_EXPR)
7162 n1 = fd->loop.n1;
7163 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7165 else
7167 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7168 n2 = fd->loop.n1;
7170 if (TREE_CODE (n1) != INTEGER_CST
7171 || TREE_CODE (n2) != INTEGER_CST
7172 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7173 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7176 entry_bb = region->entry;
7177 cont_bb = region->cont;
7178 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7179 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7180 gcc_assert (broken_loop
7181 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7182 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7183 if (!broken_loop)
7185 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7186 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7188 exit_bb = region->exit;
7190 /* Iteration space partitioning goes in ENTRY_BB. */
7191 gsi = gsi_last_nondebug_bb (entry_bb);
7192 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7194 if (fd->collapse > 1)
7196 int first_zero_iter = -1, dummy = -1;
7197 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7199 counts = XALLOCAVEC (tree, fd->collapse);
7200 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7201 fin_bb, first_zero_iter,
7202 dummy_bb, dummy, l2_dom_bb);
7203 t = NULL_TREE;
7205 else
7206 t = integer_one_node;
7208 step = fd->loop.step;
7209 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7210 OMP_CLAUSE__LOOPTEMP_);
7211 gcc_assert (innerc);
7212 n1 = OMP_CLAUSE_DECL (innerc);
7213 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7214 gcc_assert (innerc);
7215 n2 = OMP_CLAUSE_DECL (innerc);
7216 if (bias)
7218 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7219 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7221 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7222 true, NULL_TREE, true, GSI_SAME_STMT);
7223 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7224 true, NULL_TREE, true, GSI_SAME_STMT);
7225 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7226 true, NULL_TREE, true, GSI_SAME_STMT);
7228 tree startvar = fd->loop.v;
7229 tree endvar = NULL_TREE;
7231 if (gimple_omp_for_combined_p (fd->for_stmt))
7233 tree clauses = gimple_omp_for_clauses (inner_stmt);
7234 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7235 gcc_assert (innerc);
7236 startvar = OMP_CLAUSE_DECL (innerc);
7237 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7238 OMP_CLAUSE__LOOPTEMP_);
7239 gcc_assert (innerc);
7240 endvar = OMP_CLAUSE_DECL (innerc);
7242 t = fold_convert (TREE_TYPE (startvar), n1);
7243 t = force_gimple_operand_gsi (&gsi, t,
7244 DECL_P (startvar)
7245 && TREE_ADDRESSABLE (startvar),
7246 NULL_TREE, false, GSI_CONTINUE_LINKING);
7247 gimple *assign_stmt = gimple_build_assign (startvar, t);
7248 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7250 t = fold_convert (TREE_TYPE (startvar), n2);
7251 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7252 false, GSI_CONTINUE_LINKING);
7253 if (endvar)
7255 assign_stmt = gimple_build_assign (endvar, e);
7256 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7257 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7258 assign_stmt = gimple_build_assign (fd->loop.v, e);
7259 else
7260 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7261 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7264 tree *nonrect_bounds = NULL;
7265 if (fd->collapse > 1)
7267 if (fd->non_rect)
7269 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7270 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7272 gcc_assert (gsi_bb (gsi) == entry_bb);
7273 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7274 startvar);
7275 entry_bb = gsi_bb (gsi);
7278 if (!broken_loop)
7280 /* The code controlling the sequential loop replaces the
7281 GIMPLE_OMP_CONTINUE. */
7282 gsi = gsi_last_nondebug_bb (cont_bb);
7283 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7284 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7285 vmain = gimple_omp_continue_control_use (cont_stmt);
7286 vback = gimple_omp_continue_control_def (cont_stmt);
7288 if (!gimple_omp_for_combined_p (fd->for_stmt))
7290 if (POINTER_TYPE_P (type))
7291 t = fold_build_pointer_plus (vmain, step);
7292 else
7293 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7294 t = force_gimple_operand_gsi (&gsi, t,
7295 DECL_P (vback)
7296 && TREE_ADDRESSABLE (vback),
7297 NULL_TREE, true, GSI_SAME_STMT);
7298 assign_stmt = gimple_build_assign (vback, t);
7299 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7301 t = build2 (fd->loop.cond_code, boolean_type_node,
7302 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7303 ? t : vback, e);
7304 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7307 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7308 gsi_remove (&gsi, true);
7310 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7311 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7312 cont_bb, body_bb);
7315 /* Remove the GIMPLE_OMP_FOR statement. */
7316 gsi = gsi_for_stmt (fd->for_stmt);
7317 gsi_remove (&gsi, true);
7319 /* Remove the GIMPLE_OMP_RETURN statement. */
7320 gsi = gsi_last_nondebug_bb (exit_bb);
7321 gsi_remove (&gsi, true);
7323 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7324 if (!broken_loop)
7325 remove_edge (BRANCH_EDGE (entry_bb));
7326 else
7328 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7329 region->outer->cont = NULL;
7332 /* Connect all the blocks. */
7333 if (!broken_loop)
7335 ep = find_edge (cont_bb, body_bb);
7336 if (gimple_omp_for_combined_p (fd->for_stmt))
7338 remove_edge (ep);
7339 ep = NULL;
7341 else if (fd->collapse > 1)
7343 remove_edge (ep);
7344 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7346 else
7347 ep->flags = EDGE_TRUE_VALUE;
7348 find_edge (cont_bb, fin_bb)->flags
7349 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7352 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7353 recompute_dominator (CDI_DOMINATORS, body_bb));
7354 if (!broken_loop)
7355 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7356 recompute_dominator (CDI_DOMINATORS, fin_bb));
7358 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7360 class loop *loop = alloc_loop ();
7361 loop->header = body_bb;
7362 if (collapse_bb == NULL)
7363 loop->latch = cont_bb;
7364 add_loop (loop, body_bb->loop_father);
7368 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7369 partitioned loop. The lowering here is abstracted, in that the
7370 loop parameters are passed through internal functions, which are
7371 further lowered by oacc_device_lower, once we get to the target
7372 compiler. The loop is of the form:
7374 for (V = B; V LTGT E; V += S) {BODY}
7376 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7377 (constant 0 for no chunking) and we will have a GWV partitioning
7378 mask, specifying dimensions over which the loop is to be
7379 partitioned (see note below). We generate code that looks like
7380 (this ignores tiling):
7382 <entry_bb> [incoming FALL->body, BRANCH->exit]
7383 typedef signedintify (typeof (V)) T; // underlying signed integral type
7384 T range = E - B;
7385 T chunk_no = 0;
7386 T DIR = LTGT == '<' ? +1 : -1;
7387 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7388 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7390 <head_bb> [created by splitting end of entry_bb]
7391 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7392 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7393 if (!(offset LTGT bound)) goto bottom_bb;
7395 <body_bb> [incoming]
7396 V = B + offset;
7397 {BODY}
7399 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7400 offset += step;
7401 if (offset LTGT bound) goto body_bb; [*]
7403 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7404 chunk_no++;
7405 if (chunk < chunk_max) goto head_bb;
7407 <exit_bb> [incoming]
7408 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7410 [*] Needed if V live at end of loop. */
7412 static void
7413 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7415 tree v = fd->loop.v;
7416 enum tree_code cond_code = fd->loop.cond_code;
7417 enum tree_code plus_code = PLUS_EXPR;
7419 tree chunk_size = integer_minus_one_node;
7420 tree gwv = integer_zero_node;
7421 tree iter_type = TREE_TYPE (v);
7422 tree diff_type = iter_type;
7423 tree plus_type = iter_type;
7424 struct oacc_collapse *counts = NULL;
7426 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7427 == GF_OMP_FOR_KIND_OACC_LOOP);
7428 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7429 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7431 if (POINTER_TYPE_P (iter_type))
7433 plus_code = POINTER_PLUS_EXPR;
7434 plus_type = sizetype;
7436 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7437 diff_type = signed_type_for (diff_type);
7438 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7439 diff_type = integer_type_node;
7441 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7442 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7443 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7444 basic_block bottom_bb = NULL;
7446 /* entry_bb has two successors; the branch edge is to the exit
7447 block, fallthrough edge to body. */
7448 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7449 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7451 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7452 body_bb, or to a block whose only successor is the body_bb. Its
7453 fallthrough successor is the final block (same as the branch
7454 successor of the entry_bb). */
7455 if (cont_bb)
7457 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7458 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7460 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7461 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7463 else
7464 gcc_assert (!gimple_in_ssa_p (cfun));
7466 /* The exit block only has entry_bb and cont_bb as predecessors. */
7467 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7469 tree chunk_no;
7470 tree chunk_max = NULL_TREE;
7471 tree bound, offset;
7472 tree step = create_tmp_var (diff_type, ".step");
7473 bool up = cond_code == LT_EXPR;
7474 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7475 bool chunking = !gimple_in_ssa_p (cfun);
7476 bool negating;
7478 /* Tiling vars. */
7479 tree tile_size = NULL_TREE;
7480 tree element_s = NULL_TREE;
7481 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7482 basic_block elem_body_bb = NULL;
7483 basic_block elem_cont_bb = NULL;
7485 /* SSA instances. */
7486 tree offset_incr = NULL_TREE;
7487 tree offset_init = NULL_TREE;
7489 gimple_stmt_iterator gsi;
7490 gassign *ass;
7491 gcall *call;
7492 gimple *stmt;
7493 tree expr;
7494 location_t loc;
7495 edge split, be, fte;
7497 /* Split the end of entry_bb to create head_bb. */
7498 split = split_block (entry_bb, last_stmt (entry_bb));
7499 basic_block head_bb = split->dest;
7500 entry_bb = split->src;
7502 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7503 gsi = gsi_last_nondebug_bb (entry_bb);
7504 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7505 loc = gimple_location (for_stmt);
7507 if (gimple_in_ssa_p (cfun))
7509 offset_init = gimple_omp_for_index (for_stmt, 0);
7510 gcc_assert (integer_zerop (fd->loop.n1));
7511 /* The SSA parallelizer does gang parallelism. */
7512 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7515 if (fd->collapse > 1 || fd->tiling)
7517 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7518 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7519 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
7520 TREE_TYPE (fd->loop.n2), loc);
7522 if (SSA_VAR_P (fd->loop.n2))
7524 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7525 true, GSI_SAME_STMT);
7526 ass = gimple_build_assign (fd->loop.n2, total);
7527 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7531 tree b = fd->loop.n1;
7532 tree e = fd->loop.n2;
7533 tree s = fd->loop.step;
7535 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7536 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7538 /* Convert the step, avoiding possible unsigned->signed overflow. */
7539 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7540 if (negating)
7541 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7542 s = fold_convert (diff_type, s);
7543 if (negating)
7544 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7545 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7547 if (!chunking)
7548 chunk_size = integer_zero_node;
7549 expr = fold_convert (diff_type, chunk_size);
7550 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7551 NULL_TREE, true, GSI_SAME_STMT);
7553 if (fd->tiling)
7555 /* Determine the tile size and element step,
7556 modify the outer loop step size. */
7557 tile_size = create_tmp_var (diff_type, ".tile_size");
7558 expr = build_int_cst (diff_type, 1);
7559 for (int ix = 0; ix < fd->collapse; ix++)
7560 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7561 expr = force_gimple_operand_gsi (&gsi, expr, true,
7562 NULL_TREE, true, GSI_SAME_STMT);
7563 ass = gimple_build_assign (tile_size, expr);
7564 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7566 element_s = create_tmp_var (diff_type, ".element_s");
7567 ass = gimple_build_assign (element_s, s);
7568 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7570 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7571 s = force_gimple_operand_gsi (&gsi, expr, true,
7572 NULL_TREE, true, GSI_SAME_STMT);
7575 /* Determine the range, avoiding possible unsigned->signed overflow. */
7576 negating = !up && TYPE_UNSIGNED (iter_type);
7577 expr = fold_build2 (MINUS_EXPR, plus_type,
7578 fold_convert (plus_type, negating ? b : e),
7579 fold_convert (plus_type, negating ? e : b));
7580 expr = fold_convert (diff_type, expr);
7581 if (negating)
7582 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7583 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7584 NULL_TREE, true, GSI_SAME_STMT);
7586 chunk_no = build_int_cst (diff_type, 0);
7587 if (chunking)
7589 gcc_assert (!gimple_in_ssa_p (cfun));
7591 expr = chunk_no;
7592 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7593 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7595 ass = gimple_build_assign (chunk_no, expr);
7596 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7598 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7599 build_int_cst (integer_type_node,
7600 IFN_GOACC_LOOP_CHUNKS),
7601 dir, range, s, chunk_size, gwv);
7602 gimple_call_set_lhs (call, chunk_max);
7603 gimple_set_location (call, loc);
7604 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7606 else
7607 chunk_size = chunk_no;
7609 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7610 build_int_cst (integer_type_node,
7611 IFN_GOACC_LOOP_STEP),
7612 dir, range, s, chunk_size, gwv);
7613 gimple_call_set_lhs (call, step);
7614 gimple_set_location (call, loc);
7615 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7617 /* Remove the GIMPLE_OMP_FOR. */
7618 gsi_remove (&gsi, true);
7620 /* Fixup edges from head_bb. */
7621 be = BRANCH_EDGE (head_bb);
7622 fte = FALLTHRU_EDGE (head_bb);
7623 be->flags |= EDGE_FALSE_VALUE;
7624 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7626 basic_block body_bb = fte->dest;
7628 if (gimple_in_ssa_p (cfun))
7630 gsi = gsi_last_nondebug_bb (cont_bb);
7631 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7633 offset = gimple_omp_continue_control_use (cont_stmt);
7634 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7636 else
7638 offset = create_tmp_var (diff_type, ".offset");
7639 offset_init = offset_incr = offset;
7641 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7643 /* Loop offset & bound go into head_bb. */
7644 gsi = gsi_start_bb (head_bb);
7646 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7647 build_int_cst (integer_type_node,
7648 IFN_GOACC_LOOP_OFFSET),
7649 dir, range, s,
7650 chunk_size, gwv, chunk_no);
7651 gimple_call_set_lhs (call, offset_init);
7652 gimple_set_location (call, loc);
7653 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7655 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7656 build_int_cst (integer_type_node,
7657 IFN_GOACC_LOOP_BOUND),
7658 dir, range, s,
7659 chunk_size, gwv, offset_init);
7660 gimple_call_set_lhs (call, bound);
7661 gimple_set_location (call, loc);
7662 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7664 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7665 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7666 GSI_CONTINUE_LINKING);
7668 /* V assignment goes into body_bb. */
7669 if (!gimple_in_ssa_p (cfun))
7671 gsi = gsi_start_bb (body_bb);
7673 expr = build2 (plus_code, iter_type, b,
7674 fold_convert (plus_type, offset));
7675 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7676 true, GSI_SAME_STMT);
7677 ass = gimple_build_assign (v, expr);
7678 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7680 if (fd->collapse > 1 || fd->tiling)
7681 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7683 if (fd->tiling)
7685 /* Determine the range of the element loop -- usually simply
7686 the tile_size, but could be smaller if the final
7687 iteration of the outer loop is a partial tile. */
7688 tree e_range = create_tmp_var (diff_type, ".e_range");
7690 expr = build2 (MIN_EXPR, diff_type,
7691 build2 (MINUS_EXPR, diff_type, bound, offset),
7692 build2 (MULT_EXPR, diff_type, tile_size,
7693 element_s));
7694 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7695 true, GSI_SAME_STMT);
7696 ass = gimple_build_assign (e_range, expr);
7697 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7699 /* Determine bound, offset & step of inner loop. */
7700 e_bound = create_tmp_var (diff_type, ".e_bound");
7701 e_offset = create_tmp_var (diff_type, ".e_offset");
7702 e_step = create_tmp_var (diff_type, ".e_step");
7704 /* Mark these as element loops. */
7705 tree t, e_gwv = integer_minus_one_node;
7706 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7708 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7709 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7710 element_s, chunk, e_gwv, chunk);
7711 gimple_call_set_lhs (call, e_offset);
7712 gimple_set_location (call, loc);
7713 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7715 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7716 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7717 element_s, chunk, e_gwv, e_offset);
7718 gimple_call_set_lhs (call, e_bound);
7719 gimple_set_location (call, loc);
7720 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7722 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7723 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7724 element_s, chunk, e_gwv);
7725 gimple_call_set_lhs (call, e_step);
7726 gimple_set_location (call, loc);
7727 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7729 /* Add test and split block. */
7730 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7731 stmt = gimple_build_cond_empty (expr);
7732 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7733 split = split_block (body_bb, stmt);
7734 elem_body_bb = split->dest;
7735 if (cont_bb == body_bb)
7736 cont_bb = elem_body_bb;
7737 body_bb = split->src;
7739 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7741 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7742 if (cont_bb == NULL)
7744 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7745 e->probability = profile_probability::even ();
7746 split->probability = profile_probability::even ();
7749 /* Initialize the user's loop vars. */
7750 gsi = gsi_start_bb (elem_body_bb);
7751 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7755 /* Loop increment goes into cont_bb. If this is not a loop, we
7756 will have spawned threads as if it was, and each one will
7757 execute one iteration. The specification is not explicit about
7758 whether such constructs are ill-formed or not, and they can
7759 occur, especially when noreturn routines are involved. */
7760 if (cont_bb)
7762 gsi = gsi_last_nondebug_bb (cont_bb);
7763 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7764 loc = gimple_location (cont_stmt);
7766 if (fd->tiling)
7768 /* Insert element loop increment and test. */
7769 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7770 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7771 true, GSI_SAME_STMT);
7772 ass = gimple_build_assign (e_offset, expr);
7773 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7774 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7776 stmt = gimple_build_cond_empty (expr);
7777 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7778 split = split_block (cont_bb, stmt);
7779 elem_cont_bb = split->src;
7780 cont_bb = split->dest;
7782 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7783 split->probability = profile_probability::unlikely ().guessed ();
7784 edge latch_edge
7785 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7786 latch_edge->probability = profile_probability::likely ().guessed ();
7788 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7789 skip_edge->probability = profile_probability::unlikely ().guessed ();
7790 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7791 loop_entry_edge->probability
7792 = profile_probability::likely ().guessed ();
7794 gsi = gsi_for_stmt (cont_stmt);
7797 /* Increment offset. */
7798 if (gimple_in_ssa_p (cfun))
7799 expr = build2 (plus_code, iter_type, offset,
7800 fold_convert (plus_type, step));
7801 else
7802 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7803 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7804 true, GSI_SAME_STMT);
7805 ass = gimple_build_assign (offset_incr, expr);
7806 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7807 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7808 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7810 /* Remove the GIMPLE_OMP_CONTINUE. */
7811 gsi_remove (&gsi, true);
7813 /* Fixup edges from cont_bb. */
7814 be = BRANCH_EDGE (cont_bb);
7815 fte = FALLTHRU_EDGE (cont_bb);
7816 be->flags |= EDGE_TRUE_VALUE;
7817 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7819 if (chunking)
7821 /* Split the beginning of exit_bb to make bottom_bb. We
7822 need to insert a nop at the start, because splitting is
7823 after a stmt, not before. */
7824 gsi = gsi_start_bb (exit_bb);
7825 stmt = gimple_build_nop ();
7826 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7827 split = split_block (exit_bb, stmt);
7828 bottom_bb = split->src;
7829 exit_bb = split->dest;
7830 gsi = gsi_last_bb (bottom_bb);
7832 /* Chunk increment and test goes into bottom_bb. */
7833 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7834 build_int_cst (diff_type, 1));
7835 ass = gimple_build_assign (chunk_no, expr);
7836 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7838 /* Chunk test at end of bottom_bb. */
7839 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7840 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7841 GSI_CONTINUE_LINKING);
7843 /* Fixup edges from bottom_bb. */
7844 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7845 split->probability = profile_probability::unlikely ().guessed ();
7846 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7847 latch_edge->probability = profile_probability::likely ().guessed ();
7851 gsi = gsi_last_nondebug_bb (exit_bb);
7852 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7853 loc = gimple_location (gsi_stmt (gsi));
7855 if (!gimple_in_ssa_p (cfun))
7857 /* Insert the final value of V, in case it is live. This is the
7858 value for the only thread that survives past the join. */
7859 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7860 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7861 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7862 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7863 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7864 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7865 true, GSI_SAME_STMT);
7866 ass = gimple_build_assign (v, expr);
7867 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7870 /* Remove the OMP_RETURN. */
7871 gsi_remove (&gsi, true);
7873 if (cont_bb)
7875 /* We now have one, two or three nested loops. Update the loop
7876 structures. */
7877 class loop *parent = entry_bb->loop_father;
7878 class loop *body = body_bb->loop_father;
7880 if (chunking)
7882 class loop *chunk_loop = alloc_loop ();
7883 chunk_loop->header = head_bb;
7884 chunk_loop->latch = bottom_bb;
7885 add_loop (chunk_loop, parent);
7886 parent = chunk_loop;
7888 else if (parent != body)
7890 gcc_assert (body->header == body_bb);
7891 gcc_assert (body->latch == cont_bb
7892 || single_pred (body->latch) == cont_bb);
7893 parent = NULL;
7896 if (parent)
7898 class loop *body_loop = alloc_loop ();
7899 body_loop->header = body_bb;
7900 body_loop->latch = cont_bb;
7901 add_loop (body_loop, parent);
7903 if (fd->tiling)
7905 /* Insert tiling's element loop. */
7906 class loop *inner_loop = alloc_loop ();
7907 inner_loop->header = elem_body_bb;
7908 inner_loop->latch = elem_cont_bb;
7909 add_loop (inner_loop, body_loop);
7915 /* Expand the OMP loop defined by REGION. */
7917 static void
7918 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7920 struct omp_for_data fd;
7921 struct omp_for_data_loop *loops;
7923 loops = XALLOCAVEC (struct omp_for_data_loop,
7924 gimple_omp_for_collapse (last_stmt (region->entry)));
7925 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7926 &fd, loops);
7927 region->sched_kind = fd.sched_kind;
7928 region->sched_modifiers = fd.sched_modifiers;
7929 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7930 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7932 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7933 if ((loops[i].m1 || loops[i].m2)
7934 && (loops[i].m1 == NULL_TREE
7935 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7936 && (loops[i].m2 == NULL_TREE
7937 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7938 && TREE_CODE (loops[i].step) == INTEGER_CST
7939 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7941 tree t;
7942 tree itype = TREE_TYPE (loops[i].v);
7943 if (loops[i].m1 && loops[i].m2)
7944 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7945 else if (loops[i].m1)
7946 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7947 else
7948 t = loops[i].m2;
7949 t = fold_build2 (MULT_EXPR, itype, t,
7950 fold_convert (itype,
7951 loops[i - loops[i].outer].step));
7952 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7953 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7954 fold_build1 (NEGATE_EXPR, itype, t),
7955 fold_build1 (NEGATE_EXPR, itype,
7956 fold_convert (itype,
7957 loops[i].step)));
7958 else
7959 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7960 fold_convert (itype, loops[i].step));
7961 if (integer_nonzerop (t))
7962 error_at (gimple_location (fd.for_stmt),
7963 "invalid OpenMP non-rectangular loop step; "
7964 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7965 "step %qE",
7966 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7967 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7968 loops[i - loops[i].outer].step, i + 1,
7969 loops[i].step);
7973 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7974 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7975 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7976 if (region->cont)
7978 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7979 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7980 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7982 else
7983 /* If there isn't a continue then this is a degerate case where
7984 the introduction of abnormal edges during lowering will prevent
7985 original loops from being detected. Fix that up. */
7986 loops_state_set (LOOPS_NEED_FIXUP);
7988 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
7989 expand_omp_simd (region, &fd);
7990 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7992 gcc_assert (!inner_stmt && !fd.non_rect);
7993 expand_oacc_for (region, &fd);
7995 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
7997 if (gimple_omp_for_combined_into_p (fd.for_stmt))
7998 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
7999 else
8000 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8002 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8003 && !fd.have_ordered)
8005 if (fd.chunk_size == NULL)
8006 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8007 else
8008 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8010 else
8012 int fn_index, start_ix, next_ix;
8013 unsigned HOST_WIDE_INT sched = 0;
8014 tree sched_arg = NULL_TREE;
8016 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8017 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8018 if (fd.chunk_size == NULL
8019 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8020 fd.chunk_size = integer_zero_node;
8021 switch (fd.sched_kind)
8023 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8024 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8025 && fd.lastprivate_conditional == 0)
8027 gcc_assert (!fd.have_ordered);
8028 fn_index = 6;
8029 sched = 4;
8031 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8032 && !fd.have_ordered
8033 && fd.lastprivate_conditional == 0)
8034 fn_index = 7;
8035 else
8037 fn_index = 3;
8038 sched = (HOST_WIDE_INT_1U << 31);
8040 break;
8041 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8042 case OMP_CLAUSE_SCHEDULE_GUIDED:
8043 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8044 && !fd.have_ordered
8045 && fd.lastprivate_conditional == 0)
8047 fn_index = 3 + fd.sched_kind;
8048 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8049 break;
8051 fn_index = fd.sched_kind;
8052 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8053 sched += (HOST_WIDE_INT_1U << 31);
8054 break;
8055 case OMP_CLAUSE_SCHEDULE_STATIC:
8056 gcc_assert (fd.have_ordered);
8057 fn_index = 0;
8058 sched = (HOST_WIDE_INT_1U << 31) + 1;
8059 break;
8060 default:
8061 gcc_unreachable ();
8063 if (!fd.ordered)
8064 fn_index += fd.have_ordered * 8;
8065 if (fd.ordered)
8066 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8067 else
8068 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8069 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8070 if (fd.have_reductemp || fd.have_pointer_condtemp)
8072 if (fd.ordered)
8073 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8074 else if (fd.have_ordered)
8075 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8076 else
8077 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8078 sched_arg = build_int_cstu (long_integer_type_node, sched);
8079 if (!fd.chunk_size)
8080 fd.chunk_size = integer_zero_node;
8082 if (fd.iter_type == long_long_unsigned_type_node)
8084 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8085 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8086 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8087 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8089 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8090 (enum built_in_function) next_ix, sched_arg,
8091 inner_stmt);
8094 if (gimple_in_ssa_p (cfun))
8095 update_ssa (TODO_update_ssa_only_virtuals);
8098 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8100 v = GOMP_sections_start (n);
8102 switch (v)
8104 case 0:
8105 goto L2;
8106 case 1:
8107 section 1;
8108 goto L1;
8109 case 2:
8111 case n:
8113 default:
8114 abort ();
8117 v = GOMP_sections_next ();
8118 goto L0;
8120 reduction;
8122 If this is a combined parallel sections, replace the call to
8123 GOMP_sections_start with call to GOMP_sections_next. */
8125 static void
8126 expand_omp_sections (struct omp_region *region)
8128 tree t, u, vin = NULL, vmain, vnext, l2;
8129 unsigned len;
8130 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8131 gimple_stmt_iterator si, switch_si;
8132 gomp_sections *sections_stmt;
8133 gimple *stmt;
8134 gomp_continue *cont;
8135 edge_iterator ei;
8136 edge e;
8137 struct omp_region *inner;
8138 unsigned i, casei;
8139 bool exit_reachable = region->cont != NULL;
8141 gcc_assert (region->exit != NULL);
8142 entry_bb = region->entry;
8143 l0_bb = single_succ (entry_bb);
8144 l1_bb = region->cont;
8145 l2_bb = region->exit;
8146 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8147 l2 = gimple_block_label (l2_bb);
8148 else
8150 /* This can happen if there are reductions. */
8151 len = EDGE_COUNT (l0_bb->succs);
8152 gcc_assert (len > 0);
8153 e = EDGE_SUCC (l0_bb, len - 1);
8154 si = gsi_last_nondebug_bb (e->dest);
8155 l2 = NULL_TREE;
8156 if (gsi_end_p (si)
8157 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8158 l2 = gimple_block_label (e->dest);
8159 else
8160 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8162 si = gsi_last_nondebug_bb (e->dest);
8163 if (gsi_end_p (si)
8164 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8166 l2 = gimple_block_label (e->dest);
8167 break;
8171 if (exit_reachable)
8172 default_bb = create_empty_bb (l1_bb->prev_bb);
8173 else
8174 default_bb = create_empty_bb (l0_bb);
8176 /* We will build a switch() with enough cases for all the
8177 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8178 and a default case to abort if something goes wrong. */
8179 len = EDGE_COUNT (l0_bb->succs);
8181 /* Use vec::quick_push on label_vec throughout, since we know the size
8182 in advance. */
8183 auto_vec<tree> label_vec (len);
8185 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8186 GIMPLE_OMP_SECTIONS statement. */
8187 si = gsi_last_nondebug_bb (entry_bb);
8188 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8189 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8190 vin = gimple_omp_sections_control (sections_stmt);
8191 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8192 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8193 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8194 tree cond_var = NULL_TREE;
8195 if (reductmp || condtmp)
8197 tree reductions = null_pointer_node, mem = null_pointer_node;
8198 tree memv = NULL_TREE, condtemp = NULL_TREE;
8199 gimple_stmt_iterator gsi = gsi_none ();
8200 gimple *g = NULL;
8201 if (reductmp)
8203 reductions = OMP_CLAUSE_DECL (reductmp);
8204 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8205 g = SSA_NAME_DEF_STMT (reductions);
8206 reductions = gimple_assign_rhs1 (g);
8207 OMP_CLAUSE_DECL (reductmp) = reductions;
8208 gsi = gsi_for_stmt (g);
8210 else
8211 gsi = si;
8212 if (condtmp)
8214 condtemp = OMP_CLAUSE_DECL (condtmp);
8215 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8216 OMP_CLAUSE__CONDTEMP_);
8217 cond_var = OMP_CLAUSE_DECL (c);
8218 tree type = TREE_TYPE (condtemp);
8219 memv = create_tmp_var (type);
8220 TREE_ADDRESSABLE (memv) = 1;
8221 unsigned cnt = 0;
8222 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8223 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8224 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8225 ++cnt;
8226 unsigned HOST_WIDE_INT sz
8227 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8228 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8229 false);
8230 mem = build_fold_addr_expr (memv);
8232 t = build_int_cst (unsigned_type_node, len - 1);
8233 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8234 stmt = gimple_build_call (u, 3, t, reductions, mem);
8235 gimple_call_set_lhs (stmt, vin);
8236 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8237 if (condtmp)
8239 expand_omp_build_assign (&gsi, condtemp, memv, false);
8240 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8241 vin, build_one_cst (TREE_TYPE (cond_var)));
8242 expand_omp_build_assign (&gsi, cond_var, t, false);
8244 if (reductmp)
8246 gsi_remove (&gsi, true);
8247 release_ssa_name (gimple_assign_lhs (g));
8250 else if (!is_combined_parallel (region))
8252 /* If we are not inside a combined parallel+sections region,
8253 call GOMP_sections_start. */
8254 t = build_int_cst (unsigned_type_node, len - 1);
8255 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8256 stmt = gimple_build_call (u, 1, t);
8258 else
8260 /* Otherwise, call GOMP_sections_next. */
8261 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8262 stmt = gimple_build_call (u, 0);
8264 if (!reductmp && !condtmp)
8266 gimple_call_set_lhs (stmt, vin);
8267 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8269 gsi_remove (&si, true);
8271 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8272 L0_BB. */
8273 switch_si = gsi_last_nondebug_bb (l0_bb);
8274 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8275 if (exit_reachable)
8277 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8278 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8279 vmain = gimple_omp_continue_control_use (cont);
8280 vnext = gimple_omp_continue_control_def (cont);
8282 else
8284 vmain = vin;
8285 vnext = NULL_TREE;
8288 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8289 label_vec.quick_push (t);
8290 i = 1;
8292 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8293 for (inner = region->inner, casei = 1;
8294 inner;
8295 inner = inner->next, i++, casei++)
8297 basic_block s_entry_bb, s_exit_bb;
8299 /* Skip optional reduction region. */
8300 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8302 --i;
8303 --casei;
8304 continue;
8307 s_entry_bb = inner->entry;
8308 s_exit_bb = inner->exit;
8310 t = gimple_block_label (s_entry_bb);
8311 u = build_int_cst (unsigned_type_node, casei);
8312 u = build_case_label (u, NULL, t);
8313 label_vec.quick_push (u);
8315 si = gsi_last_nondebug_bb (s_entry_bb);
8316 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8317 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8318 gsi_remove (&si, true);
8319 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8321 if (s_exit_bb == NULL)
8322 continue;
8324 si = gsi_last_nondebug_bb (s_exit_bb);
8325 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8326 gsi_remove (&si, true);
8328 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8331 /* Error handling code goes in DEFAULT_BB. */
8332 t = gimple_block_label (default_bb);
8333 u = build_case_label (NULL, NULL, t);
8334 make_edge (l0_bb, default_bb, 0);
8335 add_bb_to_loop (default_bb, current_loops->tree_root);
8337 stmt = gimple_build_switch (vmain, u, label_vec);
8338 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8339 gsi_remove (&switch_si, true);
8341 si = gsi_start_bb (default_bb);
8342 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8343 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8345 if (exit_reachable)
8347 tree bfn_decl;
8349 /* Code to get the next section goes in L1_BB. */
8350 si = gsi_last_nondebug_bb (l1_bb);
8351 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8353 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8354 stmt = gimple_build_call (bfn_decl, 0);
8355 gimple_call_set_lhs (stmt, vnext);
8356 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8357 if (cond_var)
8359 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8360 vnext, build_one_cst (TREE_TYPE (cond_var)));
8361 expand_omp_build_assign (&si, cond_var, t, false);
8363 gsi_remove (&si, true);
8365 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8368 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8369 si = gsi_last_nondebug_bb (l2_bb);
8370 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8371 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8372 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8373 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8374 else
8375 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8376 stmt = gimple_build_call (t, 0);
8377 if (gimple_omp_return_lhs (gsi_stmt (si)))
8378 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8379 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8380 gsi_remove (&si, true);
8382 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8385 /* Expand code for an OpenMP single directive. We've already expanded
8386 much of the code, here we simply place the GOMP_barrier call. */
8388 static void
8389 expand_omp_single (struct omp_region *region)
8391 basic_block entry_bb, exit_bb;
8392 gimple_stmt_iterator si;
8394 entry_bb = region->entry;
8395 exit_bb = region->exit;
8397 si = gsi_last_nondebug_bb (entry_bb);
8398 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8399 gsi_remove (&si, true);
8400 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8402 si = gsi_last_nondebug_bb (exit_bb);
8403 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8405 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8406 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8408 gsi_remove (&si, true);
8409 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8412 /* Generic expansion for OpenMP synchronization directives: master,
8413 ordered and critical. All we need to do here is remove the entry
8414 and exit markers for REGION. */
8416 static void
8417 expand_omp_synch (struct omp_region *region)
8419 basic_block entry_bb, exit_bb;
8420 gimple_stmt_iterator si;
8422 entry_bb = region->entry;
8423 exit_bb = region->exit;
8425 si = gsi_last_nondebug_bb (entry_bb);
8426 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8427 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8428 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8429 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8430 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8431 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8432 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8433 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8435 expand_omp_taskreg (region);
8436 return;
8438 gsi_remove (&si, true);
8439 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8441 if (exit_bb)
8443 si = gsi_last_nondebug_bb (exit_bb);
8444 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8445 gsi_remove (&si, true);
8446 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8450 /* Translate enum omp_memory_order to enum memmodel. The two enums
8451 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8452 is 0. */
8454 static enum memmodel
8455 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8457 switch (mo)
8459 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8460 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8461 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8462 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8463 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8464 default: gcc_unreachable ();
8468 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8469 operation as a normal volatile load. */
8471 static bool
8472 expand_omp_atomic_load (basic_block load_bb, tree addr,
8473 tree loaded_val, int index)
8475 enum built_in_function tmpbase;
8476 gimple_stmt_iterator gsi;
8477 basic_block store_bb;
8478 location_t loc;
8479 gimple *stmt;
8480 tree decl, call, type, itype;
8482 gsi = gsi_last_nondebug_bb (load_bb);
8483 stmt = gsi_stmt (gsi);
8484 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8485 loc = gimple_location (stmt);
8487 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8488 is smaller than word size, then expand_atomic_load assumes that the load
8489 is atomic. We could avoid the builtin entirely in this case. */
8491 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8492 decl = builtin_decl_explicit (tmpbase);
8493 if (decl == NULL_TREE)
8494 return false;
8496 type = TREE_TYPE (loaded_val);
8497 itype = TREE_TYPE (TREE_TYPE (decl));
8499 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8500 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8501 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8502 if (!useless_type_conversion_p (type, itype))
8503 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8504 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8506 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8507 gsi_remove (&gsi, true);
8509 store_bb = single_succ (load_bb);
8510 gsi = gsi_last_nondebug_bb (store_bb);
8511 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8512 gsi_remove (&gsi, true);
8514 if (gimple_in_ssa_p (cfun))
8515 update_ssa (TODO_update_ssa_no_phi);
8517 return true;
8520 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8521 operation as a normal volatile store. */
8523 static bool
8524 expand_omp_atomic_store (basic_block load_bb, tree addr,
8525 tree loaded_val, tree stored_val, int index)
8527 enum built_in_function tmpbase;
8528 gimple_stmt_iterator gsi;
8529 basic_block store_bb = single_succ (load_bb);
8530 location_t loc;
8531 gimple *stmt;
8532 tree decl, call, type, itype;
8533 machine_mode imode;
8534 bool exchange;
8536 gsi = gsi_last_nondebug_bb (load_bb);
8537 stmt = gsi_stmt (gsi);
8538 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8540 /* If the load value is needed, then this isn't a store but an exchange. */
8541 exchange = gimple_omp_atomic_need_value_p (stmt);
8543 gsi = gsi_last_nondebug_bb (store_bb);
8544 stmt = gsi_stmt (gsi);
8545 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8546 loc = gimple_location (stmt);
8548 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8549 is smaller than word size, then expand_atomic_store assumes that the store
8550 is atomic. We could avoid the builtin entirely in this case. */
8552 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8553 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8554 decl = builtin_decl_explicit (tmpbase);
8555 if (decl == NULL_TREE)
8556 return false;
8558 type = TREE_TYPE (stored_val);
8560 /* Dig out the type of the function's second argument. */
8561 itype = TREE_TYPE (decl);
8562 itype = TYPE_ARG_TYPES (itype);
8563 itype = TREE_CHAIN (itype);
8564 itype = TREE_VALUE (itype);
8565 imode = TYPE_MODE (itype);
8567 if (exchange && !can_atomic_exchange_p (imode, true))
8568 return false;
8570 if (!useless_type_conversion_p (itype, type))
8571 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8572 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8573 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8574 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8575 if (exchange)
8577 if (!useless_type_conversion_p (type, itype))
8578 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8579 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8582 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8583 gsi_remove (&gsi, true);
8585 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8586 gsi = gsi_last_nondebug_bb (load_bb);
8587 gsi_remove (&gsi, true);
8589 if (gimple_in_ssa_p (cfun))
8590 update_ssa (TODO_update_ssa_no_phi);
8592 return true;
8595 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8596 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8597 size of the data type, and thus usable to find the index of the builtin
8598 decl. Returns false if the expression is not of the proper form. */
8600 static bool
8601 expand_omp_atomic_fetch_op (basic_block load_bb,
8602 tree addr, tree loaded_val,
8603 tree stored_val, int index)
8605 enum built_in_function oldbase, newbase, tmpbase;
8606 tree decl, itype, call;
8607 tree lhs, rhs;
8608 basic_block store_bb = single_succ (load_bb);
8609 gimple_stmt_iterator gsi;
8610 gimple *stmt;
8611 location_t loc;
8612 enum tree_code code;
8613 bool need_old, need_new;
8614 machine_mode imode;
8616 /* We expect to find the following sequences:
8618 load_bb:
8619 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8621 store_bb:
8622 val = tmp OP something; (or: something OP tmp)
8623 GIMPLE_OMP_STORE (val)
8625 ???FIXME: Allow a more flexible sequence.
8626 Perhaps use data flow to pick the statements.
8630 gsi = gsi_after_labels (store_bb);
8631 stmt = gsi_stmt (gsi);
8632 if (is_gimple_debug (stmt))
8634 gsi_next_nondebug (&gsi);
8635 if (gsi_end_p (gsi))
8636 return false;
8637 stmt = gsi_stmt (gsi);
8639 loc = gimple_location (stmt);
8640 if (!is_gimple_assign (stmt))
8641 return false;
8642 gsi_next_nondebug (&gsi);
8643 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8644 return false;
8645 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8646 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8647 enum omp_memory_order omo
8648 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8649 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8650 gcc_checking_assert (!need_old || !need_new);
8652 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8653 return false;
8655 /* Check for one of the supported fetch-op operations. */
8656 code = gimple_assign_rhs_code (stmt);
8657 switch (code)
8659 case PLUS_EXPR:
8660 case POINTER_PLUS_EXPR:
8661 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8662 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8663 break;
8664 case MINUS_EXPR:
8665 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8666 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8667 break;
8668 case BIT_AND_EXPR:
8669 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8670 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8671 break;
8672 case BIT_IOR_EXPR:
8673 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8674 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8675 break;
8676 case BIT_XOR_EXPR:
8677 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8678 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8679 break;
8680 default:
8681 return false;
8684 /* Make sure the expression is of the proper form. */
8685 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8686 rhs = gimple_assign_rhs2 (stmt);
8687 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8688 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8689 rhs = gimple_assign_rhs1 (stmt);
8690 else
8691 return false;
8693 tmpbase = ((enum built_in_function)
8694 ((need_new ? newbase : oldbase) + index + 1));
8695 decl = builtin_decl_explicit (tmpbase);
8696 if (decl == NULL_TREE)
8697 return false;
8698 itype = TREE_TYPE (TREE_TYPE (decl));
8699 imode = TYPE_MODE (itype);
8701 /* We could test all of the various optabs involved, but the fact of the
8702 matter is that (with the exception of i486 vs i586 and xadd) all targets
8703 that support any atomic operaton optab also implements compare-and-swap.
8704 Let optabs.c take care of expanding any compare-and-swap loop. */
8705 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8706 return false;
8708 gsi = gsi_last_nondebug_bb (load_bb);
8709 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8711 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8712 It only requires that the operation happen atomically. Thus we can
8713 use the RELAXED memory model. */
8714 call = build_call_expr_loc (loc, decl, 3, addr,
8715 fold_convert_loc (loc, itype, rhs),
8716 build_int_cst (NULL, mo));
8718 if (need_old || need_new)
8720 lhs = need_old ? loaded_val : stored_val;
8721 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8722 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8724 else
8725 call = fold_convert_loc (loc, void_type_node, call);
8726 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8727 gsi_remove (&gsi, true);
8729 gsi = gsi_last_nondebug_bb (store_bb);
8730 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8731 gsi_remove (&gsi, true);
8732 gsi = gsi_last_nondebug_bb (store_bb);
8733 stmt = gsi_stmt (gsi);
8734 gsi_remove (&gsi, true);
8736 if (gimple_in_ssa_p (cfun))
8738 release_defs (stmt);
8739 update_ssa (TODO_update_ssa_no_phi);
8742 return true;
8745 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8747 oldval = *addr;
8748 repeat:
8749 newval = rhs; // with oldval replacing *addr in rhs
8750 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8751 if (oldval != newval)
8752 goto repeat;
8754 INDEX is log2 of the size of the data type, and thus usable to find the
8755 index of the builtin decl. */
8757 static bool
8758 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8759 tree addr, tree loaded_val, tree stored_val,
8760 int index)
8762 tree loadedi, storedi, initial, new_storedi, old_vali;
8763 tree type, itype, cmpxchg, iaddr, atype;
8764 gimple_stmt_iterator si;
8765 basic_block loop_header = single_succ (load_bb);
8766 gimple *phi, *stmt;
8767 edge e;
8768 enum built_in_function fncode;
8770 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8771 order to use the RELAXED memory model effectively. */
8772 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8773 + index + 1);
8774 cmpxchg = builtin_decl_explicit (fncode);
8775 if (cmpxchg == NULL_TREE)
8776 return false;
8777 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8778 atype = type;
8779 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8781 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8782 || !can_atomic_load_p (TYPE_MODE (itype)))
8783 return false;
8785 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8786 si = gsi_last_nondebug_bb (load_bb);
8787 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8789 /* For floating-point values, we'll need to view-convert them to integers
8790 so that we can perform the atomic compare and swap. Simplify the
8791 following code by always setting up the "i"ntegral variables. */
8792 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8794 tree iaddr_val;
8796 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8797 true));
8798 atype = itype;
8799 iaddr_val
8800 = force_gimple_operand_gsi (&si,
8801 fold_convert (TREE_TYPE (iaddr), addr),
8802 false, NULL_TREE, true, GSI_SAME_STMT);
8803 stmt = gimple_build_assign (iaddr, iaddr_val);
8804 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8805 loadedi = create_tmp_var (itype);
8806 if (gimple_in_ssa_p (cfun))
8807 loadedi = make_ssa_name (loadedi);
8809 else
8811 iaddr = addr;
8812 loadedi = loaded_val;
8815 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8816 tree loaddecl = builtin_decl_explicit (fncode);
8817 if (loaddecl)
8818 initial
8819 = fold_convert (atype,
8820 build_call_expr (loaddecl, 2, iaddr,
8821 build_int_cst (NULL_TREE,
8822 MEMMODEL_RELAXED)));
8823 else
8825 tree off
8826 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8827 true), 0);
8828 initial = build2 (MEM_REF, atype, iaddr, off);
8831 initial
8832 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8833 GSI_SAME_STMT);
8835 /* Move the value to the LOADEDI temporary. */
8836 if (gimple_in_ssa_p (cfun))
8838 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8839 phi = create_phi_node (loadedi, loop_header);
8840 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8841 initial);
8843 else
8844 gsi_insert_before (&si,
8845 gimple_build_assign (loadedi, initial),
8846 GSI_SAME_STMT);
8847 if (loadedi != loaded_val)
8849 gimple_stmt_iterator gsi2;
8850 tree x;
8852 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8853 gsi2 = gsi_start_bb (loop_header);
8854 if (gimple_in_ssa_p (cfun))
8856 gassign *stmt;
8857 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8858 true, GSI_SAME_STMT);
8859 stmt = gimple_build_assign (loaded_val, x);
8860 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8862 else
8864 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8865 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8866 true, GSI_SAME_STMT);
8869 gsi_remove (&si, true);
8871 si = gsi_last_nondebug_bb (store_bb);
8872 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8874 if (iaddr == addr)
8875 storedi = stored_val;
8876 else
8877 storedi
8878 = force_gimple_operand_gsi (&si,
8879 build1 (VIEW_CONVERT_EXPR, itype,
8880 stored_val), true, NULL_TREE, true,
8881 GSI_SAME_STMT);
8883 /* Build the compare&swap statement. */
8884 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8885 new_storedi = force_gimple_operand_gsi (&si,
8886 fold_convert (TREE_TYPE (loadedi),
8887 new_storedi),
8888 true, NULL_TREE,
8889 true, GSI_SAME_STMT);
8891 if (gimple_in_ssa_p (cfun))
8892 old_vali = loadedi;
8893 else
8895 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8896 stmt = gimple_build_assign (old_vali, loadedi);
8897 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8899 stmt = gimple_build_assign (loadedi, new_storedi);
8900 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8903 /* Note that we always perform the comparison as an integer, even for
8904 floating point. This allows the atomic operation to properly
8905 succeed even with NaNs and -0.0. */
8906 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8907 stmt = gimple_build_cond_empty (ne);
8908 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8910 /* Update cfg. */
8911 e = single_succ_edge (store_bb);
8912 e->flags &= ~EDGE_FALLTHRU;
8913 e->flags |= EDGE_FALSE_VALUE;
8914 /* Expect no looping. */
8915 e->probability = profile_probability::guessed_always ();
8917 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8918 e->probability = profile_probability::guessed_never ();
8920 /* Copy the new value to loadedi (we already did that before the condition
8921 if we are not in SSA). */
8922 if (gimple_in_ssa_p (cfun))
8924 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8925 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8928 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8929 gsi_remove (&si, true);
8931 class loop *loop = alloc_loop ();
8932 loop->header = loop_header;
8933 loop->latch = store_bb;
8934 add_loop (loop, loop_header->loop_father);
8936 if (gimple_in_ssa_p (cfun))
8937 update_ssa (TODO_update_ssa_no_phi);
8939 return true;
8942 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8944 GOMP_atomic_start ();
8945 *addr = rhs;
8946 GOMP_atomic_end ();
8948 The result is not globally atomic, but works so long as all parallel
8949 references are within #pragma omp atomic directives. According to
8950 responses received from omp@openmp.org, appears to be within spec.
8951 Which makes sense, since that's how several other compilers handle
8952 this situation as well.
8953 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8954 expanding. STORED_VAL is the operand of the matching
8955 GIMPLE_OMP_ATOMIC_STORE.
8957 We replace
8958 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8959 loaded_val = *addr;
8961 and replace
8962 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8963 *addr = stored_val;
8966 static bool
8967 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8968 tree addr, tree loaded_val, tree stored_val)
8970 gimple_stmt_iterator si;
8971 gassign *stmt;
8972 tree t;
8974 si = gsi_last_nondebug_bb (load_bb);
8975 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8977 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8978 t = build_call_expr (t, 0);
8979 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8981 tree mem = build_simple_mem_ref (addr);
8982 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8983 TREE_OPERAND (mem, 1)
8984 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8985 true),
8986 TREE_OPERAND (mem, 1));
8987 stmt = gimple_build_assign (loaded_val, mem);
8988 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8989 gsi_remove (&si, true);
8991 si = gsi_last_nondebug_bb (store_bb);
8992 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8994 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
8995 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8997 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
8998 t = build_call_expr (t, 0);
8999 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9000 gsi_remove (&si, true);
9002 if (gimple_in_ssa_p (cfun))
9003 update_ssa (TODO_update_ssa_no_phi);
9004 return true;
9007 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9008 using expand_omp_atomic_fetch_op. If it failed, we try to
9009 call expand_omp_atomic_pipeline, and if it fails too, the
9010 ultimate fallback is wrapping the operation in a mutex
9011 (expand_omp_atomic_mutex). REGION is the atomic region built
9012 by build_omp_regions_1(). */
9014 static void
9015 expand_omp_atomic (struct omp_region *region)
9017 basic_block load_bb = region->entry, store_bb = region->exit;
9018 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9019 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9020 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9021 tree addr = gimple_omp_atomic_load_rhs (load);
9022 tree stored_val = gimple_omp_atomic_store_val (store);
9023 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9024 HOST_WIDE_INT index;
9026 /* Make sure the type is one of the supported sizes. */
9027 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9028 index = exact_log2 (index);
9029 if (index >= 0 && index <= 4)
9031 unsigned int align = TYPE_ALIGN_UNIT (type);
9033 /* __sync builtins require strict data alignment. */
9034 if (exact_log2 (align) >= index)
9036 /* Atomic load. */
9037 scalar_mode smode;
9038 if (loaded_val == stored_val
9039 && (is_int_mode (TYPE_MODE (type), &smode)
9040 || is_float_mode (TYPE_MODE (type), &smode))
9041 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9042 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9043 return;
9045 /* Atomic store. */
9046 if ((is_int_mode (TYPE_MODE (type), &smode)
9047 || is_float_mode (TYPE_MODE (type), &smode))
9048 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9049 && store_bb == single_succ (load_bb)
9050 && first_stmt (store_bb) == store
9051 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9052 stored_val, index))
9053 return;
9055 /* When possible, use specialized atomic update functions. */
9056 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9057 && store_bb == single_succ (load_bb)
9058 && expand_omp_atomic_fetch_op (load_bb, addr,
9059 loaded_val, stored_val, index))
9060 return;
9062 /* If we don't have specialized __sync builtins, try and implement
9063 as a compare and swap loop. */
9064 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9065 loaded_val, stored_val, index))
9066 return;
9070 /* The ultimate fallback is wrapping the operation in a mutex. */
9071 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9074 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9075 at REGION_EXIT. */
9077 static void
9078 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9079 basic_block region_exit)
9081 class loop *outer = region_entry->loop_father;
9082 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9084 /* Don't parallelize the kernels region if it contains more than one outer
9085 loop. */
9086 unsigned int nr_outer_loops = 0;
9087 class loop *single_outer = NULL;
9088 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9090 gcc_assert (loop_outer (loop) == outer);
9092 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9093 continue;
9095 if (region_exit != NULL
9096 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9097 continue;
9099 nr_outer_loops++;
9100 single_outer = loop;
9102 if (nr_outer_loops != 1)
9103 return;
9105 for (class loop *loop = single_outer->inner;
9106 loop != NULL;
9107 loop = loop->inner)
9108 if (loop->next)
9109 return;
9111 /* Mark the loops in the region. */
9112 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9113 loop->in_oacc_kernels_region = true;
9116 /* Build target argument identifier from the DEVICE identifier, value
9117 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9119 static tree
9120 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9122 tree t = build_int_cst (integer_type_node, device);
9123 if (subseqent_param)
9124 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9125 build_int_cst (integer_type_node,
9126 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9127 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9128 build_int_cst (integer_type_node, id));
9129 return t;
9132 /* Like above but return it in type that can be directly stored as an element
9133 of the argument array. */
9135 static tree
9136 get_target_argument_identifier (int device, bool subseqent_param, int id)
9138 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9139 return fold_convert (ptr_type_node, t);
9142 /* Return a target argument consisting of DEVICE identifier, value identifier
9143 ID, and the actual VALUE. */
9145 static tree
9146 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9147 tree value)
9149 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9150 fold_convert (integer_type_node, value),
9151 build_int_cst (unsigned_type_node,
9152 GOMP_TARGET_ARG_VALUE_SHIFT));
9153 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9154 get_target_argument_identifier_1 (device, false, id));
9155 t = fold_convert (ptr_type_node, t);
9156 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9159 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9160 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9161 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9162 arguments. */
9164 static void
9165 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9166 int id, tree value, vec <tree> *args)
9168 if (tree_fits_shwi_p (value)
9169 && tree_to_shwi (value) > -(1 << 15)
9170 && tree_to_shwi (value) < (1 << 15))
9171 args->quick_push (get_target_argument_value (gsi, device, id, value));
9172 else
9174 args->quick_push (get_target_argument_identifier (device, true, id));
9175 value = fold_convert (ptr_type_node, value);
9176 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9177 GSI_SAME_STMT);
9178 args->quick_push (value);
9182 /* Create an array of arguments that is then passed to GOMP_target. */
9184 static tree
9185 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9187 auto_vec <tree, 6> args;
9188 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9189 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9190 if (c)
9191 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9192 else
9193 t = integer_minus_one_node;
9194 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9195 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9197 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9198 if (c)
9199 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9200 else
9201 t = integer_minus_one_node;
9202 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9203 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9204 &args);
9206 /* Produce more, perhaps device specific, arguments here. */
9208 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9209 args.length () + 1),
9210 ".omp_target_args");
9211 for (unsigned i = 0; i < args.length (); i++)
9213 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9214 build_int_cst (integer_type_node, i),
9215 NULL_TREE, NULL_TREE);
9216 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9217 GSI_SAME_STMT);
9219 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9220 build_int_cst (integer_type_node, args.length ()),
9221 NULL_TREE, NULL_TREE);
9222 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9223 GSI_SAME_STMT);
9224 TREE_ADDRESSABLE (argarray) = 1;
9225 return build_fold_addr_expr (argarray);
9228 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9230 static void
9231 expand_omp_target (struct omp_region *region)
9233 basic_block entry_bb, exit_bb, new_bb;
9234 struct function *child_cfun;
9235 tree child_fn, block, t;
9236 gimple_stmt_iterator gsi;
9237 gomp_target *entry_stmt;
9238 gimple *stmt;
9239 edge e;
9240 bool offloaded, data_region;
9241 int target_kind;
9243 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9244 target_kind = gimple_omp_target_kind (entry_stmt);
9245 new_bb = region->entry;
9247 offloaded = is_gimple_omp_offloaded (entry_stmt);
9248 switch (target_kind)
9250 case GF_OMP_TARGET_KIND_REGION:
9251 case GF_OMP_TARGET_KIND_UPDATE:
9252 case GF_OMP_TARGET_KIND_ENTER_DATA:
9253 case GF_OMP_TARGET_KIND_EXIT_DATA:
9254 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9255 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9256 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9257 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9258 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9259 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9260 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9261 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9262 data_region = false;
9263 break;
9264 case GF_OMP_TARGET_KIND_DATA:
9265 case GF_OMP_TARGET_KIND_OACC_DATA:
9266 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9267 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9268 data_region = true;
9269 break;
9270 default:
9271 gcc_unreachable ();
9274 child_fn = NULL_TREE;
9275 child_cfun = NULL;
9276 if (offloaded)
9278 child_fn = gimple_omp_target_child_fn (entry_stmt);
9279 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9282 /* Supported by expand_omp_taskreg, but not here. */
9283 if (child_cfun != NULL)
9284 gcc_checking_assert (!child_cfun->cfg);
9285 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9287 entry_bb = region->entry;
9288 exit_bb = region->exit;
9290 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9291 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9293 /* Going on, all OpenACC compute constructs are mapped to
9294 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9295 To distinguish between them, we attach attributes. */
9296 switch (target_kind)
9298 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9299 DECL_ATTRIBUTES (child_fn)
9300 = tree_cons (get_identifier ("oacc parallel"),
9301 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9302 break;
9303 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9304 DECL_ATTRIBUTES (child_fn)
9305 = tree_cons (get_identifier ("oacc kernels"),
9306 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9307 break;
9308 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9309 DECL_ATTRIBUTES (child_fn)
9310 = tree_cons (get_identifier ("oacc serial"),
9311 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9312 break;
9313 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9314 DECL_ATTRIBUTES (child_fn)
9315 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9316 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9317 break;
9318 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9319 DECL_ATTRIBUTES (child_fn)
9320 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9321 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9322 break;
9323 default:
9324 /* Make sure we don't miss any. */
9325 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9326 && is_gimple_omp_offloaded (entry_stmt)));
9327 break;
9330 if (offloaded)
9332 unsigned srcidx, dstidx, num;
9334 /* If the offloading region needs data sent from the parent
9335 function, then the very first statement (except possible
9336 tree profile counter updates) of the offloading body
9337 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9338 &.OMP_DATA_O is passed as an argument to the child function,
9339 we need to replace it with the argument as seen by the child
9340 function.
9342 In most cases, this will end up being the identity assignment
9343 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9344 a function call that has been inlined, the original PARM_DECL
9345 .OMP_DATA_I may have been converted into a different local
9346 variable. In which case, we need to keep the assignment. */
9347 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9348 if (data_arg)
9350 basic_block entry_succ_bb = single_succ (entry_bb);
9351 gimple_stmt_iterator gsi;
9352 tree arg;
9353 gimple *tgtcopy_stmt = NULL;
9354 tree sender = TREE_VEC_ELT (data_arg, 0);
9356 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9358 gcc_assert (!gsi_end_p (gsi));
9359 stmt = gsi_stmt (gsi);
9360 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9361 continue;
9363 if (gimple_num_ops (stmt) == 2)
9365 tree arg = gimple_assign_rhs1 (stmt);
9367 /* We're ignoring the subcode because we're
9368 effectively doing a STRIP_NOPS. */
9370 if (TREE_CODE (arg) == ADDR_EXPR
9371 && TREE_OPERAND (arg, 0) == sender)
9373 tgtcopy_stmt = stmt;
9374 break;
9379 gcc_assert (tgtcopy_stmt != NULL);
9380 arg = DECL_ARGUMENTS (child_fn);
9382 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9383 gsi_remove (&gsi, true);
9386 /* Declare local variables needed in CHILD_CFUN. */
9387 block = DECL_INITIAL (child_fn);
9388 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9389 /* The gimplifier could record temporaries in the offloading block
9390 rather than in containing function's local_decls chain,
9391 which would mean cgraph missed finalizing them. Do it now. */
9392 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9393 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9394 varpool_node::finalize_decl (t);
9395 DECL_SAVED_TREE (child_fn) = NULL;
9396 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9397 gimple_set_body (child_fn, NULL);
9398 TREE_USED (block) = 1;
9400 /* Reset DECL_CONTEXT on function arguments. */
9401 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9402 DECL_CONTEXT (t) = child_fn;
9404 /* Split ENTRY_BB at GIMPLE_*,
9405 so that it can be moved to the child function. */
9406 gsi = gsi_last_nondebug_bb (entry_bb);
9407 stmt = gsi_stmt (gsi);
9408 gcc_assert (stmt
9409 && gimple_code (stmt) == gimple_code (entry_stmt));
9410 e = split_block (entry_bb, stmt);
9411 gsi_remove (&gsi, true);
9412 entry_bb = e->dest;
9413 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9415 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9416 if (exit_bb)
9418 gsi = gsi_last_nondebug_bb (exit_bb);
9419 gcc_assert (!gsi_end_p (gsi)
9420 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9421 stmt = gimple_build_return (NULL);
9422 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9423 gsi_remove (&gsi, true);
9426 /* Move the offloading region into CHILD_CFUN. */
9428 block = gimple_block (entry_stmt);
9430 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9431 if (exit_bb)
9432 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9433 /* When the OMP expansion process cannot guarantee an up-to-date
9434 loop tree arrange for the child function to fixup loops. */
9435 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9436 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9438 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9439 num = vec_safe_length (child_cfun->local_decls);
9440 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9442 t = (*child_cfun->local_decls)[srcidx];
9443 if (DECL_CONTEXT (t) == cfun->decl)
9444 continue;
9445 if (srcidx != dstidx)
9446 (*child_cfun->local_decls)[dstidx] = t;
9447 dstidx++;
9449 if (dstidx != num)
9450 vec_safe_truncate (child_cfun->local_decls, dstidx);
9452 /* Inform the callgraph about the new function. */
9453 child_cfun->curr_properties = cfun->curr_properties;
9454 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9455 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9456 cgraph_node *node = cgraph_node::get_create (child_fn);
9457 node->parallelized_function = 1;
9458 cgraph_node::add_new_function (child_fn, true);
9460 /* Add the new function to the offload table. */
9461 if (ENABLE_OFFLOADING)
9463 if (in_lto_p)
9464 DECL_PRESERVE_P (child_fn) = 1;
9465 vec_safe_push (offload_funcs, child_fn);
9468 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9469 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9471 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9472 fixed in a following pass. */
9473 push_cfun (child_cfun);
9474 if (need_asm)
9475 assign_assembler_name_if_needed (child_fn);
9476 cgraph_edge::rebuild_edges ();
9478 /* Some EH regions might become dead, see PR34608. If
9479 pass_cleanup_cfg isn't the first pass to happen with the
9480 new child, these dead EH edges might cause problems.
9481 Clean them up now. */
9482 if (flag_exceptions)
9484 basic_block bb;
9485 bool changed = false;
9487 FOR_EACH_BB_FN (bb, cfun)
9488 changed |= gimple_purge_dead_eh_edges (bb);
9489 if (changed)
9490 cleanup_tree_cfg ();
9492 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9493 verify_loop_structure ();
9494 pop_cfun ();
9496 if (dump_file && !gimple_in_ssa_p (cfun))
9498 omp_any_child_fn_dumped = true;
9499 dump_function_header (dump_file, child_fn, dump_flags);
9500 dump_function_to_file (child_fn, dump_file, dump_flags);
9503 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9506 /* Emit a library call to launch the offloading region, or do data
9507 transfers. */
9508 tree t1, t2, t3, t4, depend, c, clauses;
9509 enum built_in_function start_ix;
9510 unsigned int flags_i = 0;
9512 switch (gimple_omp_target_kind (entry_stmt))
9514 case GF_OMP_TARGET_KIND_REGION:
9515 start_ix = BUILT_IN_GOMP_TARGET;
9516 break;
9517 case GF_OMP_TARGET_KIND_DATA:
9518 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9519 break;
9520 case GF_OMP_TARGET_KIND_UPDATE:
9521 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9522 break;
9523 case GF_OMP_TARGET_KIND_ENTER_DATA:
9524 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9525 break;
9526 case GF_OMP_TARGET_KIND_EXIT_DATA:
9527 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9528 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9529 break;
9530 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9531 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9532 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9533 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9534 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9535 start_ix = BUILT_IN_GOACC_PARALLEL;
9536 break;
9537 case GF_OMP_TARGET_KIND_OACC_DATA:
9538 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9539 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9540 start_ix = BUILT_IN_GOACC_DATA_START;
9541 break;
9542 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9543 start_ix = BUILT_IN_GOACC_UPDATE;
9544 break;
9545 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9546 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9547 break;
9548 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9549 start_ix = BUILT_IN_GOACC_DECLARE;
9550 break;
9551 default:
9552 gcc_unreachable ();
9555 clauses = gimple_omp_target_clauses (entry_stmt);
9557 tree device = NULL_TREE;
9558 location_t device_loc = UNKNOWN_LOCATION;
9559 tree goacc_flags = NULL_TREE;
9560 if (is_gimple_omp_oacc (entry_stmt))
9562 /* By default, no GOACC_FLAGs are set. */
9563 goacc_flags = integer_zero_node;
9565 else
9567 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9568 if (c)
9570 device = OMP_CLAUSE_DEVICE_ID (c);
9571 device_loc = OMP_CLAUSE_LOCATION (c);
9573 else
9575 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9576 library choose). */
9577 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9578 device_loc = gimple_location (entry_stmt);
9581 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9582 if (c)
9583 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9586 /* By default, there is no conditional. */
9587 tree cond = NULL_TREE;
9588 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9589 if (c)
9590 cond = OMP_CLAUSE_IF_EXPR (c);
9591 /* If we found the clause 'if (cond)', build:
9592 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9593 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9594 if (cond)
9596 tree *tp;
9597 if (is_gimple_omp_oacc (entry_stmt))
9598 tp = &goacc_flags;
9599 else
9601 /* Ensure 'device' is of the correct type. */
9602 device = fold_convert_loc (device_loc, integer_type_node, device);
9604 tp = &device;
9607 cond = gimple_boolify (cond);
9609 basic_block cond_bb, then_bb, else_bb;
9610 edge e;
9611 tree tmp_var;
9613 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9614 if (offloaded)
9615 e = split_block_after_labels (new_bb);
9616 else
9618 gsi = gsi_last_nondebug_bb (new_bb);
9619 gsi_prev (&gsi);
9620 e = split_block (new_bb, gsi_stmt (gsi));
9622 cond_bb = e->src;
9623 new_bb = e->dest;
9624 remove_edge (e);
9626 then_bb = create_empty_bb (cond_bb);
9627 else_bb = create_empty_bb (then_bb);
9628 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9629 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9631 stmt = gimple_build_cond_empty (cond);
9632 gsi = gsi_last_bb (cond_bb);
9633 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9635 gsi = gsi_start_bb (then_bb);
9636 stmt = gimple_build_assign (tmp_var, *tp);
9637 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9639 gsi = gsi_start_bb (else_bb);
9640 if (is_gimple_omp_oacc (entry_stmt))
9641 stmt = gimple_build_assign (tmp_var,
9642 BIT_IOR_EXPR,
9643 *tp,
9644 build_int_cst (integer_type_node,
9645 GOACC_FLAG_HOST_FALLBACK));
9646 else
9647 stmt = gimple_build_assign (tmp_var,
9648 build_int_cst (integer_type_node,
9649 GOMP_DEVICE_HOST_FALLBACK));
9650 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9652 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9653 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9654 add_bb_to_loop (then_bb, cond_bb->loop_father);
9655 add_bb_to_loop (else_bb, cond_bb->loop_father);
9656 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9657 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9659 *tp = tmp_var;
9661 gsi = gsi_last_nondebug_bb (new_bb);
9663 else
9665 gsi = gsi_last_nondebug_bb (new_bb);
9667 if (device != NULL_TREE)
9668 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9669 true, GSI_SAME_STMT);
9672 t = gimple_omp_target_data_arg (entry_stmt);
9673 if (t == NULL)
9675 t1 = size_zero_node;
9676 t2 = build_zero_cst (ptr_type_node);
9677 t3 = t2;
9678 t4 = t2;
9680 else
9682 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9683 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9684 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9685 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9686 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9689 gimple *g;
9690 bool tagging = false;
9691 /* The maximum number used by any start_ix, without varargs. */
9692 auto_vec<tree, 11> args;
9693 if (is_gimple_omp_oacc (entry_stmt))
9695 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9696 TREE_TYPE (goacc_flags), goacc_flags);
9697 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9698 NULL_TREE, true,
9699 GSI_SAME_STMT);
9700 args.quick_push (goacc_flags_m);
9702 else
9703 args.quick_push (device);
9704 if (offloaded)
9705 args.quick_push (build_fold_addr_expr (child_fn));
9706 args.quick_push (t1);
9707 args.quick_push (t2);
9708 args.quick_push (t3);
9709 args.quick_push (t4);
9710 switch (start_ix)
9712 case BUILT_IN_GOACC_DATA_START:
9713 case BUILT_IN_GOACC_DECLARE:
9714 case BUILT_IN_GOMP_TARGET_DATA:
9715 break;
9716 case BUILT_IN_GOMP_TARGET:
9717 case BUILT_IN_GOMP_TARGET_UPDATE:
9718 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9719 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9720 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9721 if (c)
9722 depend = OMP_CLAUSE_DECL (c);
9723 else
9724 depend = build_int_cst (ptr_type_node, 0);
9725 args.quick_push (depend);
9726 if (start_ix == BUILT_IN_GOMP_TARGET)
9727 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9728 break;
9729 case BUILT_IN_GOACC_PARALLEL:
9730 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9732 tree dims = NULL_TREE;
9733 unsigned int ix;
9735 /* For serial constructs we set all dimensions to 1. */
9736 for (ix = GOMP_DIM_MAX; ix--;)
9737 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9738 oacc_replace_fn_attrib (child_fn, dims);
9740 else
9741 oacc_set_fn_attrib (child_fn, clauses, &args);
9742 tagging = true;
9743 /* FALLTHRU */
9744 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9745 case BUILT_IN_GOACC_UPDATE:
9747 tree t_async = NULL_TREE;
9749 /* If present, use the value specified by the respective
9750 clause, making sure that is of the correct type. */
9751 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9752 if (c)
9753 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9754 integer_type_node,
9755 OMP_CLAUSE_ASYNC_EXPR (c));
9756 else if (!tagging)
9757 /* Default values for t_async. */
9758 t_async = fold_convert_loc (gimple_location (entry_stmt),
9759 integer_type_node,
9760 build_int_cst (integer_type_node,
9761 GOMP_ASYNC_SYNC));
9762 if (tagging && t_async)
9764 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9766 if (TREE_CODE (t_async) == INTEGER_CST)
9768 /* See if we can pack the async arg in to the tag's
9769 operand. */
9770 i_async = TREE_INT_CST_LOW (t_async);
9771 if (i_async < GOMP_LAUNCH_OP_MAX)
9772 t_async = NULL_TREE;
9773 else
9774 i_async = GOMP_LAUNCH_OP_MAX;
9776 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9777 i_async));
9779 if (t_async)
9780 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9781 NULL_TREE, true,
9782 GSI_SAME_STMT));
9784 /* Save the argument index, and ... */
9785 unsigned t_wait_idx = args.length ();
9786 unsigned num_waits = 0;
9787 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9788 if (!tagging || c)
9789 /* ... push a placeholder. */
9790 args.safe_push (integer_zero_node);
9792 for (; c; c = OMP_CLAUSE_CHAIN (c))
9793 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9795 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9796 integer_type_node,
9797 OMP_CLAUSE_WAIT_EXPR (c));
9798 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9799 GSI_SAME_STMT);
9800 args.safe_push (arg);
9801 num_waits++;
9804 if (!tagging || num_waits)
9806 tree len;
9808 /* Now that we know the number, update the placeholder. */
9809 if (tagging)
9810 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9811 else
9812 len = build_int_cst (integer_type_node, num_waits);
9813 len = fold_convert_loc (gimple_location (entry_stmt),
9814 unsigned_type_node, len);
9815 args[t_wait_idx] = len;
9818 break;
9819 default:
9820 gcc_unreachable ();
9822 if (tagging)
9823 /* Push terminal marker - zero. */
9824 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9826 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9827 gimple_set_location (g, gimple_location (entry_stmt));
9828 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9829 if (!offloaded)
9831 g = gsi_stmt (gsi);
9832 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9833 gsi_remove (&gsi, true);
9835 if (data_region && region->exit)
9837 gsi = gsi_last_nondebug_bb (region->exit);
9838 g = gsi_stmt (gsi);
9839 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9840 gsi_remove (&gsi, true);
9844 /* Expand the parallel region tree rooted at REGION. Expansion
9845 proceeds in depth-first order. Innermost regions are expanded
9846 first. This way, parallel regions that require a new function to
9847 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9848 internal dependencies in their body. */
9850 static void
9851 expand_omp (struct omp_region *region)
9853 omp_any_child_fn_dumped = false;
9854 while (region)
9856 location_t saved_location;
9857 gimple *inner_stmt = NULL;
9859 /* First, determine whether this is a combined parallel+workshare
9860 region. */
9861 if (region->type == GIMPLE_OMP_PARALLEL)
9862 determine_parallel_type (region);
9864 if (region->type == GIMPLE_OMP_FOR
9865 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9866 inner_stmt = last_stmt (region->inner->entry);
9868 if (region->inner)
9869 expand_omp (region->inner);
9871 saved_location = input_location;
9872 if (gimple_has_location (last_stmt (region->entry)))
9873 input_location = gimple_location (last_stmt (region->entry));
9875 switch (region->type)
9877 case GIMPLE_OMP_PARALLEL:
9878 case GIMPLE_OMP_TASK:
9879 expand_omp_taskreg (region);
9880 break;
9882 case GIMPLE_OMP_FOR:
9883 expand_omp_for (region, inner_stmt);
9884 break;
9886 case GIMPLE_OMP_SECTIONS:
9887 expand_omp_sections (region);
9888 break;
9890 case GIMPLE_OMP_SECTION:
9891 /* Individual omp sections are handled together with their
9892 parent GIMPLE_OMP_SECTIONS region. */
9893 break;
9895 case GIMPLE_OMP_SINGLE:
9896 expand_omp_single (region);
9897 break;
9899 case GIMPLE_OMP_ORDERED:
9901 gomp_ordered *ord_stmt
9902 = as_a <gomp_ordered *> (last_stmt (region->entry));
9903 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9904 OMP_CLAUSE_DEPEND))
9906 /* We'll expand these when expanding corresponding
9907 worksharing region with ordered(n) clause. */
9908 gcc_assert (region->outer
9909 && region->outer->type == GIMPLE_OMP_FOR);
9910 region->ord_stmt = ord_stmt;
9911 break;
9914 /* FALLTHRU */
9915 case GIMPLE_OMP_MASTER:
9916 case GIMPLE_OMP_TASKGROUP:
9917 case GIMPLE_OMP_CRITICAL:
9918 case GIMPLE_OMP_TEAMS:
9919 expand_omp_synch (region);
9920 break;
9922 case GIMPLE_OMP_ATOMIC_LOAD:
9923 expand_omp_atomic (region);
9924 break;
9926 case GIMPLE_OMP_TARGET:
9927 expand_omp_target (region);
9928 break;
9930 default:
9931 gcc_unreachable ();
9934 input_location = saved_location;
9935 region = region->next;
9937 if (omp_any_child_fn_dumped)
9939 if (dump_file)
9940 dump_function_header (dump_file, current_function_decl, dump_flags);
9941 omp_any_child_fn_dumped = false;
9945 /* Helper for build_omp_regions. Scan the dominator tree starting at
9946 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9947 true, the function ends once a single tree is built (otherwise, whole
9948 forest of OMP constructs may be built). */
9950 static void
9951 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9952 bool single_tree)
9954 gimple_stmt_iterator gsi;
9955 gimple *stmt;
9956 basic_block son;
9958 gsi = gsi_last_nondebug_bb (bb);
9959 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9961 struct omp_region *region;
9962 enum gimple_code code;
9964 stmt = gsi_stmt (gsi);
9965 code = gimple_code (stmt);
9966 if (code == GIMPLE_OMP_RETURN)
9968 /* STMT is the return point out of region PARENT. Mark it
9969 as the exit point and make PARENT the immediately
9970 enclosing region. */
9971 gcc_assert (parent);
9972 region = parent;
9973 region->exit = bb;
9974 parent = parent->outer;
9976 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9978 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9979 GIMPLE_OMP_RETURN, but matches with
9980 GIMPLE_OMP_ATOMIC_LOAD. */
9981 gcc_assert (parent);
9982 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9983 region = parent;
9984 region->exit = bb;
9985 parent = parent->outer;
9987 else if (code == GIMPLE_OMP_CONTINUE)
9989 gcc_assert (parent);
9990 parent->cont = bb;
9992 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9994 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9995 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9997 else
9999 region = new_omp_region (bb, code, parent);
10000 /* Otherwise... */
10001 if (code == GIMPLE_OMP_TARGET)
10003 switch (gimple_omp_target_kind (stmt))
10005 case GF_OMP_TARGET_KIND_REGION:
10006 case GF_OMP_TARGET_KIND_DATA:
10007 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10008 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10009 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10010 case GF_OMP_TARGET_KIND_OACC_DATA:
10011 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10012 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10013 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10014 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10015 break;
10016 case GF_OMP_TARGET_KIND_UPDATE:
10017 case GF_OMP_TARGET_KIND_ENTER_DATA:
10018 case GF_OMP_TARGET_KIND_EXIT_DATA:
10019 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10020 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10021 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10022 /* ..., other than for those stand-alone directives... */
10023 region = NULL;
10024 break;
10025 default:
10026 gcc_unreachable ();
10029 else if (code == GIMPLE_OMP_ORDERED
10030 && omp_find_clause (gimple_omp_ordered_clauses
10031 (as_a <gomp_ordered *> (stmt)),
10032 OMP_CLAUSE_DEPEND))
10033 /* #pragma omp ordered depend is also just a stand-alone
10034 directive. */
10035 region = NULL;
10036 else if (code == GIMPLE_OMP_TASK
10037 && gimple_omp_task_taskwait_p (stmt))
10038 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10039 region = NULL;
10040 /* ..., this directive becomes the parent for a new region. */
10041 if (region)
10042 parent = region;
10046 if (single_tree && !parent)
10047 return;
10049 for (son = first_dom_son (CDI_DOMINATORS, bb);
10050 son;
10051 son = next_dom_son (CDI_DOMINATORS, son))
10052 build_omp_regions_1 (son, parent, single_tree);
10055 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10056 root_omp_region. */
10058 static void
10059 build_omp_regions_root (basic_block root)
10061 gcc_assert (root_omp_region == NULL);
10062 build_omp_regions_1 (root, NULL, true);
10063 gcc_assert (root_omp_region != NULL);
10066 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10068 void
10069 omp_expand_local (basic_block head)
10071 build_omp_regions_root (head);
10072 if (dump_file && (dump_flags & TDF_DETAILS))
10074 fprintf (dump_file, "\nOMP region tree\n\n");
10075 dump_omp_region (dump_file, root_omp_region, 0);
10076 fprintf (dump_file, "\n");
10079 remove_exit_barriers (root_omp_region);
10080 expand_omp (root_omp_region);
10082 omp_free_regions ();
10085 /* Scan the CFG and build a tree of OMP regions. Return the root of
10086 the OMP region tree. */
10088 static void
10089 build_omp_regions (void)
10091 gcc_assert (root_omp_region == NULL);
10092 calculate_dominance_info (CDI_DOMINATORS);
10093 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10096 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10098 static unsigned int
10099 execute_expand_omp (void)
10101 build_omp_regions ();
10103 if (!root_omp_region)
10104 return 0;
10106 if (dump_file)
10108 fprintf (dump_file, "\nOMP region tree\n\n");
10109 dump_omp_region (dump_file, root_omp_region, 0);
10110 fprintf (dump_file, "\n");
10113 remove_exit_barriers (root_omp_region);
10115 expand_omp (root_omp_region);
10117 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10118 verify_loop_structure ();
10119 cleanup_tree_cfg ();
10121 omp_free_regions ();
10123 return 0;
10126 /* OMP expansion -- the default pass, run before creation of SSA form. */
10128 namespace {
10130 const pass_data pass_data_expand_omp =
10132 GIMPLE_PASS, /* type */
10133 "ompexp", /* name */
10134 OPTGROUP_OMP, /* optinfo_flags */
10135 TV_NONE, /* tv_id */
10136 PROP_gimple_any, /* properties_required */
10137 PROP_gimple_eomp, /* properties_provided */
10138 0, /* properties_destroyed */
10139 0, /* todo_flags_start */
10140 0, /* todo_flags_finish */
10143 class pass_expand_omp : public gimple_opt_pass
10145 public:
10146 pass_expand_omp (gcc::context *ctxt)
10147 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10150 /* opt_pass methods: */
10151 virtual unsigned int execute (function *)
10153 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10154 || flag_openmp_simd != 0)
10155 && !seen_error ());
10157 /* This pass always runs, to provide PROP_gimple_eomp.
10158 But often, there is nothing to do. */
10159 if (!gate)
10160 return 0;
10162 return execute_expand_omp ();
10165 }; // class pass_expand_omp
10167 } // anon namespace
10169 gimple_opt_pass *
10170 make_pass_expand_omp (gcc::context *ctxt)
10172 return new pass_expand_omp (ctxt);
10175 namespace {
10177 const pass_data pass_data_expand_omp_ssa =
10179 GIMPLE_PASS, /* type */
10180 "ompexpssa", /* name */
10181 OPTGROUP_OMP, /* optinfo_flags */
10182 TV_NONE, /* tv_id */
10183 PROP_cfg | PROP_ssa, /* properties_required */
10184 PROP_gimple_eomp, /* properties_provided */
10185 0, /* properties_destroyed */
10186 0, /* todo_flags_start */
10187 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10190 class pass_expand_omp_ssa : public gimple_opt_pass
10192 public:
10193 pass_expand_omp_ssa (gcc::context *ctxt)
10194 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10197 /* opt_pass methods: */
10198 virtual bool gate (function *fun)
10200 return !(fun->curr_properties & PROP_gimple_eomp);
10202 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10203 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10205 }; // class pass_expand_omp_ssa
10207 } // anon namespace
10209 gimple_opt_pass *
10210 make_pass_expand_omp_ssa (gcc::context *ctxt)
10212 return new pass_expand_omp_ssa (ctxt);
10215 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10216 GIMPLE_* codes. */
10218 bool
10219 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10220 int *region_idx)
10222 gimple *last = last_stmt (bb);
10223 enum gimple_code code = gimple_code (last);
10224 struct omp_region *cur_region = *region;
10225 bool fallthru = false;
10227 switch (code)
10229 case GIMPLE_OMP_PARALLEL:
10230 case GIMPLE_OMP_FOR:
10231 case GIMPLE_OMP_SINGLE:
10232 case GIMPLE_OMP_TEAMS:
10233 case GIMPLE_OMP_MASTER:
10234 case GIMPLE_OMP_TASKGROUP:
10235 case GIMPLE_OMP_CRITICAL:
10236 case GIMPLE_OMP_SECTION:
10237 cur_region = new_omp_region (bb, code, cur_region);
10238 fallthru = true;
10239 break;
10241 case GIMPLE_OMP_TASK:
10242 cur_region = new_omp_region (bb, code, cur_region);
10243 fallthru = true;
10244 if (gimple_omp_task_taskwait_p (last))
10245 cur_region = cur_region->outer;
10246 break;
10248 case GIMPLE_OMP_ORDERED:
10249 cur_region = new_omp_region (bb, code, cur_region);
10250 fallthru = true;
10251 if (omp_find_clause (gimple_omp_ordered_clauses
10252 (as_a <gomp_ordered *> (last)),
10253 OMP_CLAUSE_DEPEND))
10254 cur_region = cur_region->outer;
10255 break;
10257 case GIMPLE_OMP_TARGET:
10258 cur_region = new_omp_region (bb, code, cur_region);
10259 fallthru = true;
10260 switch (gimple_omp_target_kind (last))
10262 case GF_OMP_TARGET_KIND_REGION:
10263 case GF_OMP_TARGET_KIND_DATA:
10264 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10265 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10266 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10267 case GF_OMP_TARGET_KIND_OACC_DATA:
10268 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10269 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10270 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10271 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10272 break;
10273 case GF_OMP_TARGET_KIND_UPDATE:
10274 case GF_OMP_TARGET_KIND_ENTER_DATA:
10275 case GF_OMP_TARGET_KIND_EXIT_DATA:
10276 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10277 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10278 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10279 cur_region = cur_region->outer;
10280 break;
10281 default:
10282 gcc_unreachable ();
10284 break;
10286 case GIMPLE_OMP_SECTIONS:
10287 cur_region = new_omp_region (bb, code, cur_region);
10288 fallthru = true;
10289 break;
10291 case GIMPLE_OMP_SECTIONS_SWITCH:
10292 fallthru = false;
10293 break;
10295 case GIMPLE_OMP_ATOMIC_LOAD:
10296 case GIMPLE_OMP_ATOMIC_STORE:
10297 fallthru = true;
10298 break;
10300 case GIMPLE_OMP_RETURN:
10301 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10302 somewhere other than the next block. This will be
10303 created later. */
10304 cur_region->exit = bb;
10305 if (cur_region->type == GIMPLE_OMP_TASK)
10306 /* Add an edge corresponding to not scheduling the task
10307 immediately. */
10308 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10309 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10310 cur_region = cur_region->outer;
10311 break;
10313 case GIMPLE_OMP_CONTINUE:
10314 cur_region->cont = bb;
10315 switch (cur_region->type)
10317 case GIMPLE_OMP_FOR:
10318 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10319 succs edges as abnormal to prevent splitting
10320 them. */
10321 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10322 /* Make the loopback edge. */
10323 make_edge (bb, single_succ (cur_region->entry),
10324 EDGE_ABNORMAL);
10326 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10327 corresponds to the case that the body of the loop
10328 is not executed at all. */
10329 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10330 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10331 fallthru = false;
10332 break;
10334 case GIMPLE_OMP_SECTIONS:
10335 /* Wire up the edges into and out of the nested sections. */
10337 basic_block switch_bb = single_succ (cur_region->entry);
10339 struct omp_region *i;
10340 for (i = cur_region->inner; i ; i = i->next)
10342 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10343 make_edge (switch_bb, i->entry, 0);
10344 make_edge (i->exit, bb, EDGE_FALLTHRU);
10347 /* Make the loopback edge to the block with
10348 GIMPLE_OMP_SECTIONS_SWITCH. */
10349 make_edge (bb, switch_bb, 0);
10351 /* Make the edge from the switch to exit. */
10352 make_edge (switch_bb, bb->next_bb, 0);
10353 fallthru = false;
10355 break;
10357 case GIMPLE_OMP_TASK:
10358 fallthru = true;
10359 break;
10361 default:
10362 gcc_unreachable ();
10364 break;
10366 default:
10367 gcc_unreachable ();
10370 if (*region != cur_region)
10372 *region = cur_region;
10373 if (cur_region)
10374 *region_idx = cur_region->entry->index;
10375 else
10376 *region_idx = 0;
10379 return fallthru;