testsuite: Update pr79251 ilp32 store counts.
[official-gcc.git] / gcc / omp-expand.c
blob7559ec8026311e001f068e25cf26f03a9027c9b4
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2021 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
120 /* Return true if REGION is a combined parallel+workshare region. */
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
125 return region->is_combined_parallel;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
142 Is lowered into:
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
201 return true;
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 struct omp_for_data fd;
238 tree n1, n2;
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
244 if (gimple_omp_for_combined_into_p (for_stmt))
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
268 if (fd.chunk_size)
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
275 return ws_args;
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
289 gcc_unreachable ();
292 /* Discover whether REGION is a combined parallel+workshare region. */
294 static void
295 determine_parallel_type (struct omp_region *region)
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
335 if (region->inner->type == GIMPLE_OMP_FOR)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
386 if (region->cont)
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
405 dump_omp_region (stderr, region, 0);
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr, root_omp_region, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
420 struct omp_region *region = XCNEW (struct omp_region);
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
426 if (parent)
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
433 else
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
441 return region;
444 /* Release the memory associated with the region tree rooted at REGION. */
446 static void
447 free_omp_region_1 (struct omp_region *region)
449 struct omp_region *i, *n;
451 for (i = region->inner; i ; i = n)
453 n = i->next;
454 free_omp_region_1 (i);
457 free (region);
460 /* Release the memory for the entire omp region tree. */
462 void
463 omp_free_regions (void)
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
468 n = r->next;
469 free_omp_region_1 (r);
471 root_omp_region = NULL;
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
477 static gcond *
478 gimple_build_cond_empty (tree cond)
480 enum tree_code pred_code;
481 tree lhs, rhs;
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
570 switch (region->inner->type)
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
631 else
632 clause_loc = gimple_location (entry_stmt);
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
645 cond = gimple_boolify (cond);
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
664 else
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 if (gimple_in_ssa_p (cfun))
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 val = tmp_join;
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
735 if (rtmp)
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
765 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767 unsigned int iflags
768 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
769 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
770 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
773 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
774 tree num_tasks = NULL_TREE;
775 bool ull = false;
776 if (taskloop_p)
778 gimple *g = last_stmt (region->outer->entry);
779 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
780 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
781 struct omp_for_data fd;
782 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
783 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
784 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
785 OMP_CLAUSE__LOOPTEMP_);
786 startvar = OMP_CLAUSE_DECL (startvar);
787 endvar = OMP_CLAUSE_DECL (endvar);
788 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
789 if (fd.loop.cond_code == LT_EXPR)
790 iflags |= GOMP_TASK_FLAG_UP;
791 tree tclauses = gimple_omp_for_clauses (g);
792 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
793 if (num_tasks)
794 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
795 else
797 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
798 if (num_tasks)
800 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
801 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
803 else
804 num_tasks = integer_zero_node;
806 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
807 if (ifc == NULL_TREE)
808 iflags |= GOMP_TASK_FLAG_IF;
809 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
810 iflags |= GOMP_TASK_FLAG_NOGROUP;
811 ull = fd.iter_type == long_long_unsigned_type_node;
812 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
813 iflags |= GOMP_TASK_FLAG_REDUCTION;
815 else
817 if (priority)
818 iflags |= GOMP_TASK_FLAG_PRIORITY;
819 if (detach)
820 iflags |= GOMP_TASK_FLAG_DETACH;
823 tree flags = build_int_cst (unsigned_type_node, iflags);
825 tree cond = boolean_true_node;
826 if (ifc)
828 if (taskloop_p)
830 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
831 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
832 build_int_cst (unsigned_type_node,
833 GOMP_TASK_FLAG_IF),
834 build_int_cst (unsigned_type_node, 0));
835 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
836 flags, t);
838 else
839 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
842 if (finalc)
844 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
845 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
846 build_int_cst (unsigned_type_node,
847 GOMP_TASK_FLAG_FINAL),
848 build_int_cst (unsigned_type_node, 0));
849 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
851 if (depend)
852 depend = OMP_CLAUSE_DECL (depend);
853 else
854 depend = build_int_cst (ptr_type_node, 0);
855 if (priority)
856 priority = fold_convert (integer_type_node,
857 OMP_CLAUSE_PRIORITY_EXPR (priority));
858 else
859 priority = integer_zero_node;
861 gsi = gsi_last_nondebug_bb (bb);
863 detach = (detach
864 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
865 : null_pointer_node);
867 tree t = gimple_omp_task_data_arg (entry_stmt);
868 if (t == NULL)
869 t2 = null_pointer_node;
870 else
871 t2 = build_fold_addr_expr_loc (loc, t);
872 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
873 t = gimple_omp_task_copy_fn (entry_stmt);
874 if (t == NULL)
875 t3 = null_pointer_node;
876 else
877 t3 = build_fold_addr_expr_loc (loc, t);
879 if (taskloop_p)
880 t = build_call_expr (ull
881 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
882 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
883 11, t1, t2, t3,
884 gimple_omp_task_arg_size (entry_stmt),
885 gimple_omp_task_arg_align (entry_stmt), flags,
886 num_tasks, priority, startvar, endvar, step);
887 else
888 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
889 10, t1, t2, t3,
890 gimple_omp_task_arg_size (entry_stmt),
891 gimple_omp_task_arg_align (entry_stmt), cond, flags,
892 depend, priority, detach);
894 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
895 false, GSI_CONTINUE_LINKING);
898 /* Build the function call to GOMP_taskwait_depend to actually
899 generate the taskwait operation. BB is the block where to insert the
900 code. */
902 static void
903 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
905 tree clauses = gimple_omp_task_clauses (entry_stmt);
906 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
907 if (depend == NULL_TREE)
908 return;
910 depend = OMP_CLAUSE_DECL (depend);
912 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
913 tree t
914 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
915 1, depend);
917 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
918 false, GSI_CONTINUE_LINKING);
921 /* Build the function call to GOMP_teams_reg to actually
922 generate the host teams operation. REGION is the teams region
923 being expanded. BB is the block where to insert the code. */
925 static void
926 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
928 tree clauses = gimple_omp_teams_clauses (entry_stmt);
929 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
930 if (num_teams == NULL_TREE)
931 num_teams = build_int_cst (unsigned_type_node, 0);
932 else
934 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
935 num_teams = fold_convert (unsigned_type_node, num_teams);
937 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
938 if (thread_limit == NULL_TREE)
939 thread_limit = build_int_cst (unsigned_type_node, 0);
940 else
942 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
943 thread_limit = fold_convert (unsigned_type_node, thread_limit);
946 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
947 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
948 if (t == NULL)
949 t1 = null_pointer_node;
950 else
951 t1 = build_fold_addr_expr (t);
952 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
953 tree t2 = build_fold_addr_expr (child_fndecl);
955 vec<tree, va_gc> *args;
956 vec_alloc (args, 5);
957 args->quick_push (t2);
958 args->quick_push (t1);
959 args->quick_push (num_teams);
960 args->quick_push (thread_limit);
961 /* For future extensibility. */
962 args->quick_push (build_zero_cst (unsigned_type_node));
964 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
965 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
966 args);
968 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
969 false, GSI_CONTINUE_LINKING);
972 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
974 static tree
975 vec2chain (vec<tree, va_gc> *v)
977 tree chain = NULL_TREE, t;
978 unsigned ix;
980 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
982 DECL_CHAIN (t) = chain;
983 chain = t;
986 return chain;
989 /* Remove barriers in REGION->EXIT's block. Note that this is only
990 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
991 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
992 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
993 removed. */
995 static void
996 remove_exit_barrier (struct omp_region *region)
998 gimple_stmt_iterator gsi;
999 basic_block exit_bb;
1000 edge_iterator ei;
1001 edge e;
1002 gimple *stmt;
1003 int any_addressable_vars = -1;
1005 exit_bb = region->exit;
1007 /* If the parallel region doesn't return, we don't have REGION->EXIT
1008 block at all. */
1009 if (! exit_bb)
1010 return;
1012 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1013 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1014 statements that can appear in between are extremely limited -- no
1015 memory operations at all. Here, we allow nothing at all, so the
1016 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1017 gsi = gsi_last_nondebug_bb (exit_bb);
1018 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1019 gsi_prev_nondebug (&gsi);
1020 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1021 return;
1023 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1025 gsi = gsi_last_nondebug_bb (e->src);
1026 if (gsi_end_p (gsi))
1027 continue;
1028 stmt = gsi_stmt (gsi);
1029 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1030 && !gimple_omp_return_nowait_p (stmt))
1032 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1033 in many cases. If there could be tasks queued, the barrier
1034 might be needed to let the tasks run before some local
1035 variable of the parallel that the task uses as shared
1036 runs out of scope. The task can be spawned either
1037 from within current function (this would be easy to check)
1038 or from some function it calls and gets passed an address
1039 of such a variable. */
1040 if (any_addressable_vars < 0)
1042 gomp_parallel *parallel_stmt
1043 = as_a <gomp_parallel *> (last_stmt (region->entry));
1044 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1045 tree local_decls, block, decl;
1046 unsigned ix;
1048 any_addressable_vars = 0;
1049 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1050 if (TREE_ADDRESSABLE (decl))
1052 any_addressable_vars = 1;
1053 break;
1055 for (block = gimple_block (stmt);
1056 !any_addressable_vars
1057 && block
1058 && TREE_CODE (block) == BLOCK;
1059 block = BLOCK_SUPERCONTEXT (block))
1061 for (local_decls = BLOCK_VARS (block);
1062 local_decls;
1063 local_decls = DECL_CHAIN (local_decls))
1064 if (TREE_ADDRESSABLE (local_decls))
1066 any_addressable_vars = 1;
1067 break;
1069 if (block == gimple_block (parallel_stmt))
1070 break;
1073 if (!any_addressable_vars)
1074 gimple_omp_return_set_nowait (stmt);
1079 static void
1080 remove_exit_barriers (struct omp_region *region)
1082 if (region->type == GIMPLE_OMP_PARALLEL)
1083 remove_exit_barrier (region);
1085 if (region->inner)
1087 region = region->inner;
1088 remove_exit_barriers (region);
1089 while (region->next)
1091 region = region->next;
1092 remove_exit_barriers (region);
1097 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1098 calls. These can't be declared as const functions, but
1099 within one parallel body they are constant, so they can be
1100 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1101 which are declared const. Similarly for task body, except
1102 that in untied task omp_get_thread_num () can change at any task
1103 scheduling point. */
1105 static void
1106 optimize_omp_library_calls (gimple *entry_stmt)
1108 basic_block bb;
1109 gimple_stmt_iterator gsi;
1110 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1111 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1112 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1113 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1114 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1115 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1116 OMP_CLAUSE_UNTIED) != NULL);
1118 FOR_EACH_BB_FN (bb, cfun)
1119 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1121 gimple *call = gsi_stmt (gsi);
1122 tree decl;
1124 if (is_gimple_call (call)
1125 && (decl = gimple_call_fndecl (call))
1126 && DECL_EXTERNAL (decl)
1127 && TREE_PUBLIC (decl)
1128 && DECL_INITIAL (decl) == NULL)
1130 tree built_in;
1132 if (DECL_NAME (decl) == thr_num_id)
1134 /* In #pragma omp task untied omp_get_thread_num () can change
1135 during the execution of the task region. */
1136 if (untied_task)
1137 continue;
1138 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1140 else if (DECL_NAME (decl) == num_thr_id)
1141 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1142 else
1143 continue;
1145 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1146 || gimple_call_num_args (call) != 0)
1147 continue;
1149 if (flag_exceptions && !TREE_NOTHROW (decl))
1150 continue;
1152 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1153 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1154 TREE_TYPE (TREE_TYPE (built_in))))
1155 continue;
1157 gimple_call_set_fndecl (call, built_in);
1162 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1163 regimplified. */
1165 static tree
1166 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1168 tree t = *tp;
1170 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1171 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1172 return t;
1174 if (TREE_CODE (t) == ADDR_EXPR)
1175 recompute_tree_invariant_for_addr_expr (t);
1177 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1178 return NULL_TREE;
1181 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1183 static void
1184 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1185 bool after)
1187 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1188 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1189 !after, after ? GSI_CONTINUE_LINKING
1190 : GSI_SAME_STMT);
1191 gimple *stmt = gimple_build_assign (to, from);
1192 if (after)
1193 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1194 else
1195 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1196 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1197 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1199 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1200 gimple_regimplify_operands (stmt, &gsi);
1204 /* Expand the OpenMP parallel or task directive starting at REGION. */
1206 static void
1207 expand_omp_taskreg (struct omp_region *region)
1209 basic_block entry_bb, exit_bb, new_bb;
1210 struct function *child_cfun;
1211 tree child_fn, block, t;
1212 gimple_stmt_iterator gsi;
1213 gimple *entry_stmt, *stmt;
1214 edge e;
1215 vec<tree, va_gc> *ws_args;
1217 entry_stmt = last_stmt (region->entry);
1218 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1219 && gimple_omp_task_taskwait_p (entry_stmt))
1221 new_bb = region->entry;
1222 gsi = gsi_last_nondebug_bb (region->entry);
1223 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1224 gsi_remove (&gsi, true);
1225 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1226 return;
1229 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1230 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1232 entry_bb = region->entry;
1233 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1234 exit_bb = region->cont;
1235 else
1236 exit_bb = region->exit;
1238 if (is_combined_parallel (region))
1239 ws_args = region->ws_args;
1240 else
1241 ws_args = NULL;
1243 if (child_cfun->cfg)
1245 /* Due to inlining, it may happen that we have already outlined
1246 the region, in which case all we need to do is make the
1247 sub-graph unreachable and emit the parallel call. */
1248 edge entry_succ_e, exit_succ_e;
1250 entry_succ_e = single_succ_edge (entry_bb);
1252 gsi = gsi_last_nondebug_bb (entry_bb);
1253 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1255 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1256 gsi_remove (&gsi, true);
1258 new_bb = entry_bb;
1259 if (exit_bb)
1261 exit_succ_e = single_succ_edge (exit_bb);
1262 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1264 remove_edge_and_dominated_blocks (entry_succ_e);
1266 else
1268 unsigned srcidx, dstidx, num;
1270 /* If the parallel region needs data sent from the parent
1271 function, then the very first statement (except possible
1272 tree profile counter updates) of the parallel body
1273 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1274 &.OMP_DATA_O is passed as an argument to the child function,
1275 we need to replace it with the argument as seen by the child
1276 function.
1278 In most cases, this will end up being the identity assignment
1279 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1280 a function call that has been inlined, the original PARM_DECL
1281 .OMP_DATA_I may have been converted into a different local
1282 variable. In which case, we need to keep the assignment. */
1283 if (gimple_omp_taskreg_data_arg (entry_stmt))
1285 basic_block entry_succ_bb
1286 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1287 : FALLTHRU_EDGE (entry_bb)->dest;
1288 tree arg;
1289 gimple *parcopy_stmt = NULL;
1291 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1293 gimple *stmt;
1295 gcc_assert (!gsi_end_p (gsi));
1296 stmt = gsi_stmt (gsi);
1297 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1298 continue;
1300 if (gimple_num_ops (stmt) == 2)
1302 tree arg = gimple_assign_rhs1 (stmt);
1304 /* We're ignore the subcode because we're
1305 effectively doing a STRIP_NOPS. */
1307 if (TREE_CODE (arg) == ADDR_EXPR
1308 && (TREE_OPERAND (arg, 0)
1309 == gimple_omp_taskreg_data_arg (entry_stmt)))
1311 parcopy_stmt = stmt;
1312 break;
1317 gcc_assert (parcopy_stmt != NULL);
1318 arg = DECL_ARGUMENTS (child_fn);
1320 if (!gimple_in_ssa_p (cfun))
1322 if (gimple_assign_lhs (parcopy_stmt) == arg)
1323 gsi_remove (&gsi, true);
1324 else
1326 /* ?? Is setting the subcode really necessary ?? */
1327 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1328 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 else
1333 tree lhs = gimple_assign_lhs (parcopy_stmt);
1334 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1335 /* We'd like to set the rhs to the default def in the child_fn,
1336 but it's too early to create ssa names in the child_fn.
1337 Instead, we set the rhs to the parm. In
1338 move_sese_region_to_fn, we introduce a default def for the
1339 parm, map the parm to it's default def, and once we encounter
1340 this stmt, replace the parm with the default def. */
1341 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1342 update_stmt (parcopy_stmt);
1346 /* Declare local variables needed in CHILD_CFUN. */
1347 block = DECL_INITIAL (child_fn);
1348 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1349 /* The gimplifier could record temporaries in parallel/task block
1350 rather than in containing function's local_decls chain,
1351 which would mean cgraph missed finalizing them. Do it now. */
1352 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1353 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1354 varpool_node::finalize_decl (t);
1355 DECL_SAVED_TREE (child_fn) = NULL;
1356 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1357 gimple_set_body (child_fn, NULL);
1358 TREE_USED (block) = 1;
1360 /* Reset DECL_CONTEXT on function arguments. */
1361 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1362 DECL_CONTEXT (t) = child_fn;
1364 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1365 so that it can be moved to the child function. */
1366 gsi = gsi_last_nondebug_bb (entry_bb);
1367 stmt = gsi_stmt (gsi);
1368 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1369 || gimple_code (stmt) == GIMPLE_OMP_TASK
1370 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1371 e = split_block (entry_bb, stmt);
1372 gsi_remove (&gsi, true);
1373 entry_bb = e->dest;
1374 edge e2 = NULL;
1375 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1376 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1377 else
1379 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1380 gcc_assert (e2->dest == region->exit);
1381 remove_edge (BRANCH_EDGE (entry_bb));
1382 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1383 gsi = gsi_last_nondebug_bb (region->exit);
1384 gcc_assert (!gsi_end_p (gsi)
1385 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1386 gsi_remove (&gsi, true);
1389 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1390 if (exit_bb)
1392 gsi = gsi_last_nondebug_bb (exit_bb);
1393 gcc_assert (!gsi_end_p (gsi)
1394 && (gimple_code (gsi_stmt (gsi))
1395 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1396 stmt = gimple_build_return (NULL);
1397 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1398 gsi_remove (&gsi, true);
1401 /* Move the parallel region into CHILD_CFUN. */
1403 if (gimple_in_ssa_p (cfun))
1405 init_tree_ssa (child_cfun);
1406 init_ssa_operands (child_cfun);
1407 child_cfun->gimple_df->in_ssa_p = true;
1408 block = NULL_TREE;
1410 else
1411 block = gimple_block (entry_stmt);
1413 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1414 if (exit_bb)
1415 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1416 if (e2)
1418 basic_block dest_bb = e2->dest;
1419 if (!exit_bb)
1420 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1421 remove_edge (e2);
1422 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1424 /* When the OMP expansion process cannot guarantee an up-to-date
1425 loop tree arrange for the child function to fixup loops. */
1426 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1427 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1429 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1430 num = vec_safe_length (child_cfun->local_decls);
1431 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1433 t = (*child_cfun->local_decls)[srcidx];
1434 if (DECL_CONTEXT (t) == cfun->decl)
1435 continue;
1436 if (srcidx != dstidx)
1437 (*child_cfun->local_decls)[dstidx] = t;
1438 dstidx++;
1440 if (dstidx != num)
1441 vec_safe_truncate (child_cfun->local_decls, dstidx);
1443 /* Inform the callgraph about the new function. */
1444 child_cfun->curr_properties = cfun->curr_properties;
1445 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1446 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1447 cgraph_node *node = cgraph_node::get_create (child_fn);
1448 node->parallelized_function = 1;
1449 cgraph_node::add_new_function (child_fn, true);
1451 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1452 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1454 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1455 fixed in a following pass. */
1456 push_cfun (child_cfun);
1457 if (need_asm)
1458 assign_assembler_name_if_needed (child_fn);
1460 if (optimize)
1461 optimize_omp_library_calls (entry_stmt);
1462 update_max_bb_count ();
1463 cgraph_edge::rebuild_edges ();
1465 /* Some EH regions might become dead, see PR34608. If
1466 pass_cleanup_cfg isn't the first pass to happen with the
1467 new child, these dead EH edges might cause problems.
1468 Clean them up now. */
1469 if (flag_exceptions)
1471 basic_block bb;
1472 bool changed = false;
1474 FOR_EACH_BB_FN (bb, cfun)
1475 changed |= gimple_purge_dead_eh_edges (bb);
1476 if (changed)
1477 cleanup_tree_cfg ();
1479 if (gimple_in_ssa_p (cfun))
1480 update_ssa (TODO_update_ssa);
1481 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1482 verify_loop_structure ();
1483 pop_cfun ();
1485 if (dump_file && !gimple_in_ssa_p (cfun))
1487 omp_any_child_fn_dumped = true;
1488 dump_function_header (dump_file, child_fn, dump_flags);
1489 dump_function_to_file (child_fn, dump_file, dump_flags);
1493 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1495 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1496 expand_parallel_call (region, new_bb,
1497 as_a <gomp_parallel *> (entry_stmt), ws_args);
1498 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1499 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1500 else
1501 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1502 if (gimple_in_ssa_p (cfun))
1503 update_ssa (TODO_update_ssa_only_virtuals);
1506 /* Information about members of an OpenACC collapsed loop nest. */
1508 struct oacc_collapse
1510 tree base; /* Base value. */
1511 tree iters; /* Number of steps. */
1512 tree step; /* Step size. */
1513 tree tile; /* Tile increment (if tiled). */
1514 tree outer; /* Tile iterator var. */
1517 /* Helper for expand_oacc_for. Determine collapsed loop information.
1518 Fill in COUNTS array. Emit any initialization code before GSI.
1519 Return the calculated outer loop bound of BOUND_TYPE. */
1521 static tree
1522 expand_oacc_collapse_init (const struct omp_for_data *fd,
1523 gimple_stmt_iterator *gsi,
1524 oacc_collapse *counts, tree diff_type,
1525 tree bound_type, location_t loc)
1527 tree tiling = fd->tiling;
1528 tree total = build_int_cst (bound_type, 1);
1529 int ix;
1531 gcc_assert (integer_onep (fd->loop.step));
1532 gcc_assert (integer_zerop (fd->loop.n1));
1534 /* When tiling, the first operand of the tile clause applies to the
1535 innermost loop, and we work outwards from there. Seems
1536 backwards, but whatever. */
1537 for (ix = fd->collapse; ix--;)
1539 const omp_for_data_loop *loop = &fd->loops[ix];
1541 tree iter_type = TREE_TYPE (loop->v);
1542 tree plus_type = iter_type;
1544 gcc_assert (loop->cond_code == fd->loop.cond_code);
1546 if (POINTER_TYPE_P (iter_type))
1547 plus_type = sizetype;
1549 if (tiling)
1551 tree num = build_int_cst (integer_type_node, fd->collapse);
1552 tree loop_no = build_int_cst (integer_type_node, ix);
1553 tree tile = TREE_VALUE (tiling);
1554 gcall *call
1555 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1556 /* gwv-outer=*/integer_zero_node,
1557 /* gwv-inner=*/integer_zero_node);
1559 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1560 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1561 gimple_call_set_lhs (call, counts[ix].tile);
1562 gimple_set_location (call, loc);
1563 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1565 tiling = TREE_CHAIN (tiling);
1567 else
1569 counts[ix].tile = NULL;
1570 counts[ix].outer = loop->v;
1573 tree b = loop->n1;
1574 tree e = loop->n2;
1575 tree s = loop->step;
1576 bool up = loop->cond_code == LT_EXPR;
1577 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1578 bool negating;
1579 tree expr;
1581 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1582 true, GSI_SAME_STMT);
1583 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1584 true, GSI_SAME_STMT);
1586 /* Convert the step, avoiding possible unsigned->signed overflow. */
1587 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1588 if (negating)
1589 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1590 s = fold_convert (diff_type, s);
1591 if (negating)
1592 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1593 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1594 true, GSI_SAME_STMT);
1596 /* Determine the range, avoiding possible unsigned->signed overflow. */
1597 negating = !up && TYPE_UNSIGNED (iter_type);
1598 expr = fold_build2 (MINUS_EXPR, plus_type,
1599 fold_convert (plus_type, negating ? b : e),
1600 fold_convert (plus_type, negating ? e : b));
1601 expr = fold_convert (diff_type, expr);
1602 if (negating)
1603 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1604 tree range = force_gimple_operand_gsi
1605 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1607 /* Determine number of iterations. */
1608 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1609 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1610 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1612 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1615 counts[ix].base = b;
1616 counts[ix].iters = iters;
1617 counts[ix].step = s;
1619 total = fold_build2 (MULT_EXPR, bound_type, total,
1620 fold_convert (bound_type, iters));
1623 return total;
1626 /* Emit initializers for collapsed loop members. INNER is true if
1627 this is for the element loop of a TILE. IVAR is the outer
1628 loop iteration variable, from which collapsed loop iteration values
1629 are calculated. COUNTS array has been initialized by
1630 expand_oacc_collapse_inits. */
1632 static void
1633 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1634 gimple_stmt_iterator *gsi,
1635 const oacc_collapse *counts, tree ivar,
1636 tree diff_type)
1638 tree ivar_type = TREE_TYPE (ivar);
1640 /* The most rapidly changing iteration variable is the innermost
1641 one. */
1642 for (int ix = fd->collapse; ix--;)
1644 const omp_for_data_loop *loop = &fd->loops[ix];
1645 const oacc_collapse *collapse = &counts[ix];
1646 tree v = inner ? loop->v : collapse->outer;
1647 tree iter_type = TREE_TYPE (v);
1648 tree plus_type = iter_type;
1649 enum tree_code plus_code = PLUS_EXPR;
1650 tree expr;
1652 if (POINTER_TYPE_P (iter_type))
1654 plus_code = POINTER_PLUS_EXPR;
1655 plus_type = sizetype;
1658 expr = ivar;
1659 if (ix)
1661 tree mod = fold_convert (ivar_type, collapse->iters);
1662 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1663 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1664 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1665 true, GSI_SAME_STMT);
1668 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1669 fold_convert (diff_type, collapse->step));
1670 expr = fold_build2 (plus_code, iter_type,
1671 inner ? collapse->outer : collapse->base,
1672 fold_convert (plus_type, expr));
1673 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1674 true, GSI_SAME_STMT);
1675 gassign *ass = gimple_build_assign (v, expr);
1676 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1680 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1681 of the combined collapse > 1 loop constructs, generate code like:
1682 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1683 if (cond3 is <)
1684 adj = STEP3 - 1;
1685 else
1686 adj = STEP3 + 1;
1687 count3 = (adj + N32 - N31) / STEP3;
1688 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1689 if (cond2 is <)
1690 adj = STEP2 - 1;
1691 else
1692 adj = STEP2 + 1;
1693 count2 = (adj + N22 - N21) / STEP2;
1694 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1695 if (cond1 is <)
1696 adj = STEP1 - 1;
1697 else
1698 adj = STEP1 + 1;
1699 count1 = (adj + N12 - N11) / STEP1;
1700 count = count1 * count2 * count3;
1701 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1702 count = 0;
1703 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1704 of the combined loop constructs, just initialize COUNTS array
1705 from the _looptemp_ clauses. For loop nests with non-rectangular
1706 loops, do this only for the rectangular loops. Then pick
1707 the loops which reference outer vars in their bound expressions
1708 and the loops which they refer to and for this sub-nest compute
1709 number of iterations. For triangular loops use Faulhaber's formula,
1710 otherwise as a fallback, compute by iterating the loops.
1711 If e.g. the sub-nest is
1712 for (I = N11; I COND1 N12; I += STEP1)
1713 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1714 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1716 COUNT = 0;
1717 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1718 for (tmpj = M21 * tmpi + N21;
1719 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1721 int tmpk1 = M31 * tmpj + N31;
1722 int tmpk2 = M32 * tmpj + N32;
1723 if (tmpk1 COND3 tmpk2)
1725 if (COND3 is <)
1726 adj = STEP3 - 1;
1727 else
1728 adj = STEP3 + 1;
1729 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1732 and finally multiply the counts of the rectangular loops not
1733 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1734 store number of iterations of the loops from fd->first_nonrect
1735 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1736 by the counts of rectangular loops not referenced in any non-rectangular
1737 loops sandwitched in between those. */
1739 /* NOTE: It *could* be better to moosh all of the BBs together,
1740 creating one larger BB with all the computation and the unexpected
1741 jump at the end. I.e.
1743 bool zero3, zero2, zero1, zero;
1745 zero3 = N32 c3 N31;
1746 count3 = (N32 - N31) /[cl] STEP3;
1747 zero2 = N22 c2 N21;
1748 count2 = (N22 - N21) /[cl] STEP2;
1749 zero1 = N12 c1 N11;
1750 count1 = (N12 - N11) /[cl] STEP1;
1751 zero = zero3 || zero2 || zero1;
1752 count = count1 * count2 * count3;
1753 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1755 After all, we expect the zero=false, and thus we expect to have to
1756 evaluate all of the comparison expressions, so short-circuiting
1757 oughtn't be a win. Since the condition isn't protecting a
1758 denominator, we're not concerned about divide-by-zero, so we can
1759 fully evaluate count even if a numerator turned out to be wrong.
1761 It seems like putting this all together would create much better
1762 scheduling opportunities, and less pressure on the chip's branch
1763 predictor. */
1765 static void
1766 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1767 basic_block &entry_bb, tree *counts,
1768 basic_block &zero_iter1_bb, int &first_zero_iter1,
1769 basic_block &zero_iter2_bb, int &first_zero_iter2,
1770 basic_block &l2_dom_bb)
1772 tree t, type = TREE_TYPE (fd->loop.v);
1773 edge e, ne;
1774 int i;
1776 /* Collapsed loops need work for expansion into SSA form. */
1777 gcc_assert (!gimple_in_ssa_p (cfun));
1779 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1780 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1782 gcc_assert (fd->ordered == 0);
1783 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1784 isn't supposed to be handled, as the inner loop doesn't
1785 use it. */
1786 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1787 OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1795 counts[i] = OMP_CLAUSE_DECL (innerc);
1796 else
1797 counts[0] = NULL_TREE;
1799 if (fd->non_rect
1800 && fd->last_nonrect == fd->first_nonrect + 1
1801 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1803 tree c[4];
1804 for (i = 0; i < 4; i++)
1806 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1807 OMP_CLAUSE__LOOPTEMP_);
1808 gcc_assert (innerc);
1809 c[i] = OMP_CLAUSE_DECL (innerc);
1811 counts[0] = c[0];
1812 fd->first_inner_iterations = c[1];
1813 fd->factor = c[2];
1814 fd->adjn1 = c[3];
1816 return;
1819 for (i = fd->collapse; i < fd->ordered; i++)
1821 tree itype = TREE_TYPE (fd->loops[i].v);
1822 counts[i] = NULL_TREE;
1823 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1824 fold_convert (itype, fd->loops[i].n1),
1825 fold_convert (itype, fd->loops[i].n2));
1826 if (t && integer_zerop (t))
1828 for (i = fd->collapse; i < fd->ordered; i++)
1829 counts[i] = build_int_cst (type, 0);
1830 break;
1833 bool rect_count_seen = false;
1834 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1836 tree itype = TREE_TYPE (fd->loops[i].v);
1838 if (i >= fd->collapse && counts[i])
1839 continue;
1840 if (fd->non_rect)
1842 /* Skip loops that use outer iterators in their expressions
1843 during this phase. */
1844 if (fd->loops[i].m1 || fd->loops[i].m2)
1846 counts[i] = build_zero_cst (type);
1847 continue;
1850 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1851 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1852 fold_convert (itype, fd->loops[i].n1),
1853 fold_convert (itype, fd->loops[i].n2)))
1854 == NULL_TREE || !integer_onep (t)))
1856 gcond *cond_stmt;
1857 tree n1, n2;
1858 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1859 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1860 true, GSI_SAME_STMT);
1861 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1862 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1863 true, GSI_SAME_STMT);
1864 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1865 NULL_TREE, NULL_TREE);
1866 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1867 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1868 expand_omp_regimplify_p, NULL, NULL)
1869 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1870 expand_omp_regimplify_p, NULL, NULL))
1872 *gsi = gsi_for_stmt (cond_stmt);
1873 gimple_regimplify_operands (cond_stmt, gsi);
1875 e = split_block (entry_bb, cond_stmt);
1876 basic_block &zero_iter_bb
1877 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1878 int &first_zero_iter
1879 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1880 if (zero_iter_bb == NULL)
1882 gassign *assign_stmt;
1883 first_zero_iter = i;
1884 zero_iter_bb = create_empty_bb (entry_bb);
1885 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1886 *gsi = gsi_after_labels (zero_iter_bb);
1887 if (i < fd->collapse)
1888 assign_stmt = gimple_build_assign (fd->loop.n2,
1889 build_zero_cst (type));
1890 else
1892 counts[i] = create_tmp_reg (type, ".count");
1893 assign_stmt
1894 = gimple_build_assign (counts[i], build_zero_cst (type));
1896 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1897 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1898 entry_bb);
1900 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1901 ne->probability = profile_probability::very_unlikely ();
1902 e->flags = EDGE_TRUE_VALUE;
1903 e->probability = ne->probability.invert ();
1904 if (l2_dom_bb == NULL)
1905 l2_dom_bb = entry_bb;
1906 entry_bb = e->dest;
1907 *gsi = gsi_last_nondebug_bb (entry_bb);
1910 if (POINTER_TYPE_P (itype))
1911 itype = signed_type_for (itype);
1912 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1913 ? -1 : 1));
1914 t = fold_build2 (PLUS_EXPR, itype,
1915 fold_convert (itype, fd->loops[i].step), t);
1916 t = fold_build2 (PLUS_EXPR, itype, t,
1917 fold_convert (itype, fd->loops[i].n2));
1918 t = fold_build2 (MINUS_EXPR, itype, t,
1919 fold_convert (itype, fd->loops[i].n1));
1920 /* ?? We could probably use CEIL_DIV_EXPR instead of
1921 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1922 generate the same code in the end because generically we
1923 don't know that the values involved must be negative for
1924 GT?? */
1925 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1926 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1927 fold_build1 (NEGATE_EXPR, itype, t),
1928 fold_build1 (NEGATE_EXPR, itype,
1929 fold_convert (itype,
1930 fd->loops[i].step)));
1931 else
1932 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1933 fold_convert (itype, fd->loops[i].step));
1934 t = fold_convert (type, t);
1935 if (TREE_CODE (t) == INTEGER_CST)
1936 counts[i] = t;
1937 else
1939 if (i < fd->collapse || i != first_zero_iter2)
1940 counts[i] = create_tmp_reg (type, ".count");
1941 expand_omp_build_assign (gsi, counts[i], t);
1943 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1945 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1946 continue;
1947 if (!rect_count_seen)
1949 t = counts[i];
1950 rect_count_seen = true;
1952 else
1953 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1954 expand_omp_build_assign (gsi, fd->loop.n2, t);
1957 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1959 gcc_assert (fd->last_nonrect != -1);
1961 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1962 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1963 build_zero_cst (type));
1964 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1965 if (fd->loops[i].m1
1966 || fd->loops[i].m2
1967 || fd->loops[i].non_rect_referenced)
1968 break;
1969 if (i == fd->last_nonrect
1970 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1971 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1973 int o = fd->first_nonrect;
1974 tree itype = TREE_TYPE (fd->loops[o].v);
1975 tree n1o = create_tmp_reg (itype, ".n1o");
1976 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1977 expand_omp_build_assign (gsi, n1o, t);
1978 tree n2o = create_tmp_reg (itype, ".n2o");
1979 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1980 expand_omp_build_assign (gsi, n2o, t);
1981 if (fd->loops[i].m1 && fd->loops[i].m2)
1982 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1983 unshare_expr (fd->loops[i].m1));
1984 else if (fd->loops[i].m1)
1985 t = fold_unary (NEGATE_EXPR, itype,
1986 unshare_expr (fd->loops[i].m1));
1987 else
1988 t = unshare_expr (fd->loops[i].m2);
1989 tree m2minusm1
1990 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1991 true, GSI_SAME_STMT);
1993 gimple_stmt_iterator gsi2 = *gsi;
1994 gsi_prev (&gsi2);
1995 e = split_block (entry_bb, gsi_stmt (gsi2));
1996 e = split_block (e->dest, (gimple *) NULL);
1997 basic_block bb1 = e->src;
1998 entry_bb = e->dest;
1999 *gsi = gsi_after_labels (entry_bb);
2001 gsi2 = gsi_after_labels (bb1);
2002 tree ostep = fold_convert (itype, fd->loops[o].step);
2003 t = build_int_cst (itype, (fd->loops[o].cond_code
2004 == LT_EXPR ? -1 : 1));
2005 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2006 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2007 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2008 if (TYPE_UNSIGNED (itype)
2009 && fd->loops[o].cond_code == GT_EXPR)
2010 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2011 fold_build1 (NEGATE_EXPR, itype, t),
2012 fold_build1 (NEGATE_EXPR, itype, ostep));
2013 else
2014 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2015 tree outer_niters
2016 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2019 build_one_cst (itype));
2020 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2021 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2022 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 tree n1, n2, n1e, n2e;
2025 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2026 if (fd->loops[i].m1)
2028 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2029 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2030 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2032 else
2033 n1 = t;
2034 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2035 true, GSI_SAME_STMT);
2036 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2037 if (fd->loops[i].m2)
2039 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2040 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2041 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2043 else
2044 n2 = t;
2045 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2046 true, GSI_SAME_STMT);
2047 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2048 if (fd->loops[i].m1)
2050 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2051 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2052 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2054 else
2055 n1e = t;
2056 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2057 true, GSI_SAME_STMT);
2058 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2059 if (fd->loops[i].m2)
2061 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2062 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2063 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2065 else
2066 n2e = t;
2067 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2068 true, GSI_SAME_STMT);
2069 gcond *cond_stmt
2070 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2071 NULL_TREE, NULL_TREE);
2072 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2073 e = split_block (bb1, cond_stmt);
2074 e->flags = EDGE_TRUE_VALUE;
2075 e->probability = profile_probability::likely ().guessed ();
2076 basic_block bb2 = e->dest;
2077 gsi2 = gsi_after_labels (bb2);
2079 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2080 NULL_TREE, NULL_TREE);
2081 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2082 e = split_block (bb2, cond_stmt);
2083 e->flags = EDGE_TRUE_VALUE;
2084 e->probability = profile_probability::likely ().guessed ();
2085 gsi2 = gsi_after_labels (e->dest);
2087 tree step = fold_convert (itype, fd->loops[i].step);
2088 t = build_int_cst (itype, (fd->loops[i].cond_code
2089 == LT_EXPR ? -1 : 1));
2090 t = fold_build2 (PLUS_EXPR, itype, step, t);
2091 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2092 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2093 if (TYPE_UNSIGNED (itype)
2094 && fd->loops[i].cond_code == GT_EXPR)
2095 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2096 fold_build1 (NEGATE_EXPR, itype, t),
2097 fold_build1 (NEGATE_EXPR, itype, step));
2098 else
2099 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2100 tree first_inner_iterations
2101 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2102 true, GSI_SAME_STMT);
2103 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2104 if (TYPE_UNSIGNED (itype)
2105 && fd->loops[i].cond_code == GT_EXPR)
2106 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2107 fold_build1 (NEGATE_EXPR, itype, t),
2108 fold_build1 (NEGATE_EXPR, itype, step));
2109 else
2110 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2111 tree factor
2112 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2113 true, GSI_SAME_STMT);
2114 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2115 build_one_cst (itype));
2116 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2117 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2118 t = fold_build2 (MULT_EXPR, itype, factor, t);
2119 t = fold_build2 (PLUS_EXPR, itype,
2120 fold_build2 (MULT_EXPR, itype, outer_niters,
2121 first_inner_iterations), t);
2122 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2123 fold_convert (type, t));
2125 basic_block bb3 = create_empty_bb (bb1);
2126 add_bb_to_loop (bb3, bb1->loop_father);
2128 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2129 e->probability = profile_probability::unlikely ().guessed ();
2131 gsi2 = gsi_after_labels (bb3);
2132 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2133 NULL_TREE, NULL_TREE);
2134 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2135 e = split_block (bb3, cond_stmt);
2136 e->flags = EDGE_TRUE_VALUE;
2137 e->probability = profile_probability::likely ().guessed ();
2138 basic_block bb4 = e->dest;
2140 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2141 ne->probability = e->probability.invert ();
2143 basic_block bb5 = create_empty_bb (bb2);
2144 add_bb_to_loop (bb5, bb2->loop_father);
2146 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2147 ne->probability = profile_probability::unlikely ().guessed ();
2149 for (int j = 0; j < 2; j++)
2151 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2152 t = fold_build2 (MINUS_EXPR, itype,
2153 unshare_expr (fd->loops[i].n1),
2154 unshare_expr (fd->loops[i].n2));
2155 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2156 tree tem
2157 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2158 true, GSI_SAME_STMT);
2159 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2160 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2161 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2162 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2163 true, GSI_SAME_STMT);
2164 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2165 if (fd->loops[i].m1)
2167 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2168 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2169 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2171 else
2172 n1 = t;
2173 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2174 true, GSI_SAME_STMT);
2175 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2176 if (fd->loops[i].m2)
2178 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2179 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2180 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2182 else
2183 n2 = t;
2184 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2185 true, GSI_SAME_STMT);
2186 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2188 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2189 NULL_TREE, NULL_TREE);
2190 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2191 e = split_block (gsi_bb (gsi2), cond_stmt);
2192 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2193 e->probability = profile_probability::unlikely ().guessed ();
2194 ne = make_edge (e->src, bb1,
2195 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2196 ne->probability = e->probability.invert ();
2197 gsi2 = gsi_after_labels (e->dest);
2199 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2200 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2202 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2205 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2206 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2207 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2209 if (fd->first_nonrect + 1 == fd->last_nonrect)
2211 fd->first_inner_iterations = first_inner_iterations;
2212 fd->factor = factor;
2213 fd->adjn1 = n1o;
2216 else
2218 /* Fallback implementation. Evaluate the loops with m1/m2
2219 non-NULL as well as their outer loops at runtime using temporaries
2220 instead of the original iteration variables, and in the
2221 body just bump the counter. */
2222 gimple_stmt_iterator gsi2 = *gsi;
2223 gsi_prev (&gsi2);
2224 e = split_block (entry_bb, gsi_stmt (gsi2));
2225 e = split_block (e->dest, (gimple *) NULL);
2226 basic_block cur_bb = e->src;
2227 basic_block next_bb = e->dest;
2228 entry_bb = e->dest;
2229 *gsi = gsi_after_labels (entry_bb);
2231 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2232 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2234 for (i = 0; i <= fd->last_nonrect; i++)
2236 if (fd->loops[i].m1 == NULL_TREE
2237 && fd->loops[i].m2 == NULL_TREE
2238 && !fd->loops[i].non_rect_referenced)
2239 continue;
2241 tree itype = TREE_TYPE (fd->loops[i].v);
2243 gsi2 = gsi_after_labels (cur_bb);
2244 tree n1, n2;
2245 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2246 if (fd->loops[i].m1)
2248 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2249 n1 = fold_build2 (MULT_EXPR, itype,
2250 vs[i - fd->loops[i].outer], n1);
2251 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2253 else
2254 n1 = t;
2255 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2256 true, GSI_SAME_STMT);
2257 if (i < fd->last_nonrect)
2259 vs[i] = create_tmp_reg (itype, ".it");
2260 expand_omp_build_assign (&gsi2, vs[i], n1);
2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2263 if (fd->loops[i].m2)
2265 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2266 n2 = fold_build2 (MULT_EXPR, itype,
2267 vs[i - fd->loops[i].outer], n2);
2268 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2270 else
2271 n2 = t;
2272 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2273 true, GSI_SAME_STMT);
2274 if (i == fd->last_nonrect)
2276 gcond *cond_stmt
2277 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2278 NULL_TREE, NULL_TREE);
2279 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2280 e = split_block (cur_bb, cond_stmt);
2281 e->flags = EDGE_TRUE_VALUE;
2282 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2283 e->probability = profile_probability::likely ().guessed ();
2284 ne->probability = e->probability.invert ();
2285 gsi2 = gsi_after_labels (e->dest);
2287 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2288 ? -1 : 1));
2289 t = fold_build2 (PLUS_EXPR, itype,
2290 fold_convert (itype, fd->loops[i].step), t);
2291 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2292 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2293 tree step = fold_convert (itype, fd->loops[i].step);
2294 if (TYPE_UNSIGNED (itype)
2295 && fd->loops[i].cond_code == GT_EXPR)
2296 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2297 fold_build1 (NEGATE_EXPR, itype, t),
2298 fold_build1 (NEGATE_EXPR, itype, step));
2299 else
2300 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2301 t = fold_convert (type, t);
2302 t = fold_build2 (PLUS_EXPR, type,
2303 counts[fd->last_nonrect], t);
2304 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2305 true, GSI_SAME_STMT);
2306 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2307 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2308 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2309 break;
2311 e = split_block (cur_bb, last_stmt (cur_bb));
2313 basic_block new_cur_bb = create_empty_bb (cur_bb);
2314 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2316 gsi2 = gsi_after_labels (e->dest);
2317 tree step = fold_convert (itype,
2318 unshare_expr (fd->loops[i].step));
2319 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2320 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2321 true, GSI_SAME_STMT);
2322 expand_omp_build_assign (&gsi2, vs[i], t);
2324 ne = split_block (e->dest, last_stmt (e->dest));
2325 gsi2 = gsi_after_labels (ne->dest);
2327 gcond *cond_stmt
2328 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2329 NULL_TREE, NULL_TREE);
2330 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2331 edge e3, e4;
2332 if (next_bb == entry_bb)
2334 e3 = find_edge (ne->dest, next_bb);
2335 e3->flags = EDGE_FALSE_VALUE;
2337 else
2338 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2339 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2340 e4->probability = profile_probability::likely ().guessed ();
2341 e3->probability = e4->probability.invert ();
2342 basic_block esrc = e->src;
2343 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2344 cur_bb = new_cur_bb;
2345 basic_block latch_bb = next_bb;
2346 next_bb = e->dest;
2347 remove_edge (e);
2348 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2349 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2350 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2353 t = NULL_TREE;
2354 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2355 if (!fd->loops[i].non_rect_referenced
2356 && fd->loops[i].m1 == NULL_TREE
2357 && fd->loops[i].m2 == NULL_TREE)
2359 if (t == NULL_TREE)
2360 t = counts[i];
2361 else
2362 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2364 if (t)
2366 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2367 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2369 if (!rect_count_seen)
2370 t = counts[fd->last_nonrect];
2371 else
2372 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2373 counts[fd->last_nonrect]);
2374 expand_omp_build_assign (gsi, fd->loop.n2, t);
2376 else if (fd->non_rect)
2378 tree t = fd->loop.n2;
2379 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2380 int non_rect_referenced = 0, non_rect = 0;
2381 for (i = 0; i < fd->collapse; i++)
2383 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2384 && !integer_zerop (counts[i]))
2385 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2386 if (fd->loops[i].non_rect_referenced)
2387 non_rect_referenced++;
2388 if (fd->loops[i].m1 || fd->loops[i].m2)
2389 non_rect++;
2391 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2392 counts[fd->last_nonrect] = t;
2396 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2397 T = V;
2398 V3 = N31 + (T % count3) * STEP3;
2399 T = T / count3;
2400 V2 = N21 + (T % count2) * STEP2;
2401 T = T / count2;
2402 V1 = N11 + T * STEP1;
2403 if this loop doesn't have an inner loop construct combined with it.
2404 If it does have an inner loop construct combined with it and the
2405 iteration count isn't known constant, store values from counts array
2406 into its _looptemp_ temporaries instead.
2407 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2408 inclusive), use the count of all those loops together, and either
2409 find quadratic etc. equation roots, or as a fallback, do:
2410 COUNT = 0;
2411 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2412 for (tmpj = M21 * tmpi + N21;
2413 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2415 int tmpk1 = M31 * tmpj + N31;
2416 int tmpk2 = M32 * tmpj + N32;
2417 if (tmpk1 COND3 tmpk2)
2419 if (COND3 is <)
2420 adj = STEP3 - 1;
2421 else
2422 adj = STEP3 + 1;
2423 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2424 if (COUNT + temp > T)
2426 V1 = tmpi;
2427 V2 = tmpj;
2428 V3 = tmpk1 + (T - COUNT) * STEP3;
2429 goto done;
2431 else
2432 COUNT += temp;
2435 done:;
2436 but for optional innermost or outermost rectangular loops that aren't
2437 referenced by other loop expressions keep doing the division/modulo. */
2439 static void
2440 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2441 tree *counts, tree *nonrect_bounds,
2442 gimple *inner_stmt, tree startvar)
2444 int i;
2445 if (gimple_omp_for_combined_p (fd->for_stmt))
2447 /* If fd->loop.n2 is constant, then no propagation of the counts
2448 is needed, they are constant. */
2449 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2450 return;
2452 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2453 ? gimple_omp_taskreg_clauses (inner_stmt)
2454 : gimple_omp_for_clauses (inner_stmt);
2455 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2456 isn't supposed to be handled, as the inner loop doesn't
2457 use it. */
2458 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2459 gcc_assert (innerc);
2460 int count = 0;
2461 if (fd->non_rect
2462 && fd->last_nonrect == fd->first_nonrect + 1
2463 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2464 count = 4;
2465 for (i = 0; i < fd->collapse + count; i++)
2467 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2468 OMP_CLAUSE__LOOPTEMP_);
2469 gcc_assert (innerc);
2470 if (i)
2472 tree tem = OMP_CLAUSE_DECL (innerc);
2473 tree t;
2474 if (i < fd->collapse)
2475 t = counts[i];
2476 else
2477 switch (i - fd->collapse)
2479 case 0: t = counts[0]; break;
2480 case 1: t = fd->first_inner_iterations; break;
2481 case 2: t = fd->factor; break;
2482 case 3: t = fd->adjn1; break;
2483 default: gcc_unreachable ();
2485 t = fold_convert (TREE_TYPE (tem), t);
2486 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2487 false, GSI_CONTINUE_LINKING);
2488 gassign *stmt = gimple_build_assign (tem, t);
2489 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2492 return;
2495 tree type = TREE_TYPE (fd->loop.v);
2496 tree tem = create_tmp_reg (type, ".tem");
2497 gassign *stmt = gimple_build_assign (tem, startvar);
2498 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2500 for (i = fd->collapse - 1; i >= 0; i--)
2502 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2503 itype = vtype;
2504 if (POINTER_TYPE_P (vtype))
2505 itype = signed_type_for (vtype);
2506 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2507 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2508 else
2509 t = tem;
2510 if (i == fd->last_nonrect)
2512 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2513 false, GSI_CONTINUE_LINKING);
2514 tree stopval = t;
2515 tree idx = create_tmp_reg (type, ".count");
2516 expand_omp_build_assign (gsi, idx,
2517 build_zero_cst (type), true);
2518 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2519 if (fd->first_nonrect + 1 == fd->last_nonrect
2520 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2521 || fd->first_inner_iterations)
2522 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2523 != CODE_FOR_nothing)
2524 && !integer_zerop (fd->loop.n2))
2526 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2527 tree itype = TREE_TYPE (fd->loops[i].v);
2528 tree first_inner_iterations = fd->first_inner_iterations;
2529 tree factor = fd->factor;
2530 gcond *cond_stmt
2531 = gimple_build_cond (NE_EXPR, factor,
2532 build_zero_cst (TREE_TYPE (factor)),
2533 NULL_TREE, NULL_TREE);
2534 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2535 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2536 basic_block bb0 = e->src;
2537 e->flags = EDGE_TRUE_VALUE;
2538 e->probability = profile_probability::likely ();
2539 bb_triang_dom = bb0;
2540 *gsi = gsi_after_labels (e->dest);
2541 tree slltype = long_long_integer_type_node;
2542 tree ulltype = long_long_unsigned_type_node;
2543 tree stopvalull = fold_convert (ulltype, stopval);
2544 stopvalull
2545 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2546 false, GSI_CONTINUE_LINKING);
2547 first_inner_iterations
2548 = fold_convert (slltype, first_inner_iterations);
2549 first_inner_iterations
2550 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2551 NULL_TREE, false,
2552 GSI_CONTINUE_LINKING);
2553 factor = fold_convert (slltype, factor);
2554 factor
2555 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2556 false, GSI_CONTINUE_LINKING);
2557 tree first_inner_iterationsd
2558 = fold_build1 (FLOAT_EXPR, double_type_node,
2559 first_inner_iterations);
2560 first_inner_iterationsd
2561 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2562 NULL_TREE, false,
2563 GSI_CONTINUE_LINKING);
2564 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2565 factor);
2566 factord = force_gimple_operand_gsi (gsi, factord, true,
2567 NULL_TREE, false,
2568 GSI_CONTINUE_LINKING);
2569 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2570 stopvalull);
2571 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2572 NULL_TREE, false,
2573 GSI_CONTINUE_LINKING);
2574 /* Temporarily disable flag_rounding_math, values will be
2575 decimal numbers divided by 2 and worst case imprecisions
2576 due to too large values ought to be caught later by the
2577 checks for fallback. */
2578 int save_flag_rounding_math = flag_rounding_math;
2579 flag_rounding_math = 0;
2580 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2581 build_real (double_type_node, dconst2));
2582 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2583 first_inner_iterationsd, t);
2584 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2585 GSI_CONTINUE_LINKING);
2586 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2587 build_real (double_type_node, dconst2));
2588 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2589 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2590 fold_build2 (MULT_EXPR, double_type_node,
2591 t3, t3));
2592 flag_rounding_math = save_flag_rounding_math;
2593 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2594 GSI_CONTINUE_LINKING);
2595 if (flag_exceptions
2596 && cfun->can_throw_non_call_exceptions
2597 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2599 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2600 build_zero_cst (double_type_node));
2601 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2602 false, GSI_CONTINUE_LINKING);
2603 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2604 boolean_false_node,
2605 NULL_TREE, NULL_TREE);
2607 else
2608 cond_stmt
2609 = gimple_build_cond (LT_EXPR, t,
2610 build_zero_cst (double_type_node),
2611 NULL_TREE, NULL_TREE);
2612 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2613 e = split_block (gsi_bb (*gsi), cond_stmt);
2614 basic_block bb1 = e->src;
2615 e->flags = EDGE_FALSE_VALUE;
2616 e->probability = profile_probability::very_likely ();
2617 *gsi = gsi_after_labels (e->dest);
2618 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2619 tree sqrtr = create_tmp_var (double_type_node);
2620 gimple_call_set_lhs (call, sqrtr);
2621 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2622 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2623 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2624 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2625 tree c = create_tmp_var (ulltype);
2626 tree d = create_tmp_var (ulltype);
2627 expand_omp_build_assign (gsi, c, t, true);
2628 t = fold_build2 (MINUS_EXPR, ulltype, c,
2629 build_one_cst (ulltype));
2630 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2631 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2632 t = fold_build2 (MULT_EXPR, ulltype,
2633 fold_convert (ulltype, fd->factor), t);
2634 tree t2
2635 = fold_build2 (MULT_EXPR, ulltype, c,
2636 fold_convert (ulltype,
2637 fd->first_inner_iterations));
2638 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2639 expand_omp_build_assign (gsi, d, t, true);
2640 t = fold_build2 (MULT_EXPR, ulltype,
2641 fold_convert (ulltype, fd->factor), c);
2642 t = fold_build2 (PLUS_EXPR, ulltype,
2643 t, fold_convert (ulltype,
2644 fd->first_inner_iterations));
2645 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2646 GSI_CONTINUE_LINKING);
2647 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2648 NULL_TREE, NULL_TREE);
2649 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2650 e = split_block (gsi_bb (*gsi), cond_stmt);
2651 basic_block bb2 = e->src;
2652 e->flags = EDGE_TRUE_VALUE;
2653 e->probability = profile_probability::very_likely ();
2654 *gsi = gsi_after_labels (e->dest);
2655 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2656 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2657 GSI_CONTINUE_LINKING);
2658 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2659 NULL_TREE, NULL_TREE);
2660 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2661 e = split_block (gsi_bb (*gsi), cond_stmt);
2662 basic_block bb3 = e->src;
2663 e->flags = EDGE_FALSE_VALUE;
2664 e->probability = profile_probability::very_likely ();
2665 *gsi = gsi_after_labels (e->dest);
2666 t = fold_convert (itype, c);
2667 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2668 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2669 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2670 GSI_CONTINUE_LINKING);
2671 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2672 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2673 t2 = fold_convert (itype, t2);
2674 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2675 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2676 if (fd->loops[i].m1)
2678 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2679 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2681 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2682 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2683 bb_triang = e->src;
2684 *gsi = gsi_after_labels (e->dest);
2685 remove_edge (e);
2686 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2687 e->probability = profile_probability::very_unlikely ();
2688 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2689 e->probability = profile_probability::very_unlikely ();
2690 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2691 e->probability = profile_probability::very_unlikely ();
2693 basic_block bb4 = create_empty_bb (bb0);
2694 add_bb_to_loop (bb4, bb0->loop_father);
2695 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2696 e->probability = profile_probability::unlikely ();
2697 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2698 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2699 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2700 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2701 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2702 counts[i], counts[i - 1]);
2703 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2704 GSI_CONTINUE_LINKING);
2705 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2706 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2707 t = fold_convert (itype, t);
2708 t2 = fold_convert (itype, t2);
2709 t = fold_build2 (MULT_EXPR, itype, t,
2710 fold_convert (itype, fd->loops[i].step));
2711 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2712 t2 = fold_build2 (MULT_EXPR, itype, t2,
2713 fold_convert (itype, fd->loops[i - 1].step));
2714 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2715 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2716 false, GSI_CONTINUE_LINKING);
2717 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2718 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2719 if (fd->loops[i].m1)
2721 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2722 fd->loops[i - 1].v);
2723 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2725 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2726 false, GSI_CONTINUE_LINKING);
2727 stmt = gimple_build_assign (fd->loops[i].v, t);
2728 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2730 /* Fallback implementation. Evaluate the loops in between
2731 (inclusive) fd->first_nonrect and fd->last_nonrect at
2732 runtime unsing temporaries instead of the original iteration
2733 variables, in the body just bump the counter and compare
2734 with the desired value. */
2735 gimple_stmt_iterator gsi2 = *gsi;
2736 basic_block entry_bb = gsi_bb (gsi2);
2737 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2738 e = split_block (e->dest, (gimple *) NULL);
2739 basic_block dom_bb = NULL;
2740 basic_block cur_bb = e->src;
2741 basic_block next_bb = e->dest;
2742 entry_bb = e->dest;
2743 *gsi = gsi_after_labels (entry_bb);
2745 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2746 tree n1 = NULL_TREE, n2 = NULL_TREE;
2747 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2749 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2751 tree itype = TREE_TYPE (fd->loops[j].v);
2752 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2753 && fd->loops[j].m2 == NULL_TREE
2754 && !fd->loops[j].non_rect_referenced);
2755 gsi2 = gsi_after_labels (cur_bb);
2756 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2757 if (fd->loops[j].m1)
2759 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2760 n1 = fold_build2 (MULT_EXPR, itype,
2761 vs[j - fd->loops[j].outer], n1);
2762 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2764 else if (rect_p)
2765 n1 = build_zero_cst (type);
2766 else
2767 n1 = t;
2768 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2769 true, GSI_SAME_STMT);
2770 if (j < fd->last_nonrect)
2772 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2773 expand_omp_build_assign (&gsi2, vs[j], n1);
2775 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2776 if (fd->loops[j].m2)
2778 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2779 n2 = fold_build2 (MULT_EXPR, itype,
2780 vs[j - fd->loops[j].outer], n2);
2781 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2783 else if (rect_p)
2784 n2 = counts[j];
2785 else
2786 n2 = t;
2787 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2788 true, GSI_SAME_STMT);
2789 if (j == fd->last_nonrect)
2791 gcond *cond_stmt
2792 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2793 NULL_TREE, NULL_TREE);
2794 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2795 e = split_block (cur_bb, cond_stmt);
2796 e->flags = EDGE_TRUE_VALUE;
2797 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2798 e->probability = profile_probability::likely ().guessed ();
2799 ne->probability = e->probability.invert ();
2800 gsi2 = gsi_after_labels (e->dest);
2802 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2803 ? -1 : 1));
2804 t = fold_build2 (PLUS_EXPR, itype,
2805 fold_convert (itype, fd->loops[j].step), t);
2806 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2807 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2808 tree step = fold_convert (itype, fd->loops[j].step);
2809 if (TYPE_UNSIGNED (itype)
2810 && fd->loops[j].cond_code == GT_EXPR)
2811 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2812 fold_build1 (NEGATE_EXPR, itype, t),
2813 fold_build1 (NEGATE_EXPR, itype, step));
2814 else
2815 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2816 t = fold_convert (type, t);
2817 t = fold_build2 (PLUS_EXPR, type, idx, t);
2818 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2819 true, GSI_SAME_STMT);
2820 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2821 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2822 cond_stmt
2823 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2824 NULL_TREE);
2825 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2826 e = split_block (gsi_bb (gsi2), cond_stmt);
2827 e->flags = EDGE_TRUE_VALUE;
2828 e->probability = profile_probability::likely ().guessed ();
2829 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2830 ne->probability = e->probability.invert ();
2831 gsi2 = gsi_after_labels (e->dest);
2832 expand_omp_build_assign (&gsi2, idx, t);
2833 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2834 break;
2836 e = split_block (cur_bb, last_stmt (cur_bb));
2838 basic_block new_cur_bb = create_empty_bb (cur_bb);
2839 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2841 gsi2 = gsi_after_labels (e->dest);
2842 if (rect_p)
2843 t = fold_build2 (PLUS_EXPR, type, vs[j],
2844 build_one_cst (type));
2845 else
2847 tree step
2848 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2849 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2851 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2852 true, GSI_SAME_STMT);
2853 expand_omp_build_assign (&gsi2, vs[j], t);
2855 edge ne = split_block (e->dest, last_stmt (e->dest));
2856 gsi2 = gsi_after_labels (ne->dest);
2858 gcond *cond_stmt;
2859 if (next_bb == entry_bb)
2860 /* No need to actually check the outermost condition. */
2861 cond_stmt
2862 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2863 boolean_true_node,
2864 NULL_TREE, NULL_TREE);
2865 else
2866 cond_stmt
2867 = gimple_build_cond (rect_p ? LT_EXPR
2868 : fd->loops[j].cond_code,
2869 vs[j], n2, NULL_TREE, NULL_TREE);
2870 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2871 edge e3, e4;
2872 if (next_bb == entry_bb)
2874 e3 = find_edge (ne->dest, next_bb);
2875 e3->flags = EDGE_FALSE_VALUE;
2876 dom_bb = ne->dest;
2878 else
2879 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2880 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2881 e4->probability = profile_probability::likely ().guessed ();
2882 e3->probability = e4->probability.invert ();
2883 basic_block esrc = e->src;
2884 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2885 cur_bb = new_cur_bb;
2886 basic_block latch_bb = next_bb;
2887 next_bb = e->dest;
2888 remove_edge (e);
2889 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2890 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2891 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2893 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2895 tree itype = TREE_TYPE (fd->loops[j].v);
2896 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2897 && fd->loops[j].m2 == NULL_TREE
2898 && !fd->loops[j].non_rect_referenced);
2899 if (j == fd->last_nonrect)
2901 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2902 t = fold_convert (itype, t);
2903 tree t2
2904 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2905 t = fold_build2 (MULT_EXPR, itype, t, t2);
2906 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2908 else if (rect_p)
2910 t = fold_convert (itype, vs[j]);
2911 t = fold_build2 (MULT_EXPR, itype, t,
2912 fold_convert (itype, fd->loops[j].step));
2913 if (POINTER_TYPE_P (vtype))
2914 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2915 else
2916 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2918 else
2919 t = vs[j];
2920 t = force_gimple_operand_gsi (gsi, t, false,
2921 NULL_TREE, true,
2922 GSI_SAME_STMT);
2923 stmt = gimple_build_assign (fd->loops[j].v, t);
2924 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2926 if (gsi_end_p (*gsi))
2927 *gsi = gsi_last_bb (gsi_bb (*gsi));
2928 else
2929 gsi_prev (gsi);
2930 if (bb_triang)
2932 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2933 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2934 *gsi = gsi_after_labels (e->dest);
2935 if (!gsi_end_p (*gsi))
2936 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2937 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2940 else
2942 t = fold_convert (itype, t);
2943 t = fold_build2 (MULT_EXPR, itype, t,
2944 fold_convert (itype, fd->loops[i].step));
2945 if (POINTER_TYPE_P (vtype))
2946 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2947 else
2948 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2949 t = force_gimple_operand_gsi (gsi, t,
2950 DECL_P (fd->loops[i].v)
2951 && TREE_ADDRESSABLE (fd->loops[i].v),
2952 NULL_TREE, false,
2953 GSI_CONTINUE_LINKING);
2954 stmt = gimple_build_assign (fd->loops[i].v, t);
2955 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2957 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2959 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2960 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2961 false, GSI_CONTINUE_LINKING);
2962 stmt = gimple_build_assign (tem, t);
2963 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2965 if (i == fd->last_nonrect)
2966 i = fd->first_nonrect;
2968 if (fd->non_rect)
2969 for (i = 0; i <= fd->last_nonrect; i++)
2970 if (fd->loops[i].m2)
2972 tree itype = TREE_TYPE (fd->loops[i].v);
2974 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2975 t = fold_build2 (MULT_EXPR, itype,
2976 fd->loops[i - fd->loops[i].outer].v, t);
2977 t = fold_build2 (PLUS_EXPR, itype, t,
2978 fold_convert (itype,
2979 unshare_expr (fd->loops[i].n2)));
2980 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2981 t = force_gimple_operand_gsi (gsi, t, false,
2982 NULL_TREE, false,
2983 GSI_CONTINUE_LINKING);
2984 stmt = gimple_build_assign (nonrect_bounds[i], t);
2985 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2989 /* Helper function for expand_omp_for_*. Generate code like:
2990 L10:
2991 V3 += STEP3;
2992 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2993 L11:
2994 V3 = N31;
2995 V2 += STEP2;
2996 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2997 L12:
2998 V2 = N21;
2999 V1 += STEP1;
3000 goto BODY_BB;
3001 For non-rectangular loops, use temporaries stored in nonrect_bounds
3002 for the upper bounds if M?2 multiplier is present. Given e.g.
3003 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3004 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3005 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3006 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3008 L10:
3009 V4 += STEP4;
3010 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3011 L11:
3012 V4 = N41 + M41 * V2; // This can be left out if the loop
3013 // refers to the immediate parent loop
3014 V3 += STEP3;
3015 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3016 L12:
3017 V3 = N31;
3018 V2 += STEP2;
3019 if (V2 cond2 N22) goto L120; else goto L13;
3020 L120:
3021 V4 = N41 + M41 * V2;
3022 NONRECT_BOUND4 = N42 + M42 * V2;
3023 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3024 L13:
3025 V2 = N21;
3026 V1 += STEP1;
3027 goto L120; */
3029 static basic_block
3030 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3031 basic_block cont_bb, basic_block body_bb)
3033 basic_block last_bb, bb, collapse_bb = NULL;
3034 int i;
3035 gimple_stmt_iterator gsi;
3036 edge e;
3037 tree t;
3038 gimple *stmt;
3040 last_bb = cont_bb;
3041 for (i = fd->collapse - 1; i >= 0; i--)
3043 tree vtype = TREE_TYPE (fd->loops[i].v);
3045 bb = create_empty_bb (last_bb);
3046 add_bb_to_loop (bb, last_bb->loop_father);
3047 gsi = gsi_start_bb (bb);
3049 if (i < fd->collapse - 1)
3051 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3052 e->probability
3053 = profile_probability::guessed_always ().apply_scale (1, 8);
3055 struct omp_for_data_loop *l = &fd->loops[i + 1];
3056 if (l->m1 == NULL_TREE || l->outer != 1)
3058 t = l->n1;
3059 if (l->m1)
3061 tree t2
3062 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3063 fd->loops[i + 1 - l->outer].v, l->m1);
3064 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3066 t = force_gimple_operand_gsi (&gsi, t,
3067 DECL_P (l->v)
3068 && TREE_ADDRESSABLE (l->v),
3069 NULL_TREE, false,
3070 GSI_CONTINUE_LINKING);
3071 stmt = gimple_build_assign (l->v, t);
3072 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3075 else
3076 collapse_bb = bb;
3078 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3080 if (POINTER_TYPE_P (vtype))
3081 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3082 else
3083 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3084 t = force_gimple_operand_gsi (&gsi, t,
3085 DECL_P (fd->loops[i].v)
3086 && TREE_ADDRESSABLE (fd->loops[i].v),
3087 NULL_TREE, false, GSI_CONTINUE_LINKING);
3088 stmt = gimple_build_assign (fd->loops[i].v, t);
3089 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3091 if (fd->loops[i].non_rect_referenced)
3093 basic_block update_bb = NULL, prev_bb = NULL;
3094 for (int j = i + 1; j <= fd->last_nonrect; j++)
3095 if (j - fd->loops[j].outer == i)
3097 tree n1, n2;
3098 struct omp_for_data_loop *l = &fd->loops[j];
3099 basic_block this_bb = create_empty_bb (last_bb);
3100 add_bb_to_loop (this_bb, last_bb->loop_father);
3101 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3102 if (prev_bb)
3104 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3105 e->probability
3106 = profile_probability::guessed_always ().apply_scale (7,
3108 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3110 if (l->m1)
3112 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3113 fd->loops[i].v);
3114 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3115 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3116 false,
3117 GSI_CONTINUE_LINKING);
3118 stmt = gimple_build_assign (l->v, n1);
3119 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3120 n1 = l->v;
3122 else
3123 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3124 NULL_TREE, false,
3125 GSI_CONTINUE_LINKING);
3126 if (l->m2)
3128 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3129 fd->loops[i].v);
3130 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3131 t, unshare_expr (l->n2));
3132 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3133 false,
3134 GSI_CONTINUE_LINKING);
3135 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3136 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3137 n2 = nonrect_bounds[j];
3139 else
3140 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3141 true, NULL_TREE, false,
3142 GSI_CONTINUE_LINKING);
3143 gcond *cond_stmt
3144 = gimple_build_cond (l->cond_code, n1, n2,
3145 NULL_TREE, NULL_TREE);
3146 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3147 if (update_bb == NULL)
3148 update_bb = this_bb;
3149 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3150 e->probability
3151 = profile_probability::guessed_always ().apply_scale (1, 8);
3152 if (prev_bb == NULL)
3153 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3154 prev_bb = this_bb;
3156 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3157 e->probability
3158 = profile_probability::guessed_always ().apply_scale (7, 8);
3159 body_bb = update_bb;
3162 if (i > 0)
3164 if (fd->loops[i].m2)
3165 t = nonrect_bounds[i];
3166 else
3167 t = unshare_expr (fd->loops[i].n2);
3168 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3169 false, GSI_CONTINUE_LINKING);
3170 tree v = fd->loops[i].v;
3171 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3172 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3173 false, GSI_CONTINUE_LINKING);
3174 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3175 stmt = gimple_build_cond_empty (t);
3176 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3177 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3178 expand_omp_regimplify_p, NULL, NULL)
3179 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3180 expand_omp_regimplify_p, NULL, NULL))
3181 gimple_regimplify_operands (stmt, &gsi);
3182 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3183 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3185 else
3186 make_edge (bb, body_bb, EDGE_FALLTHRU);
3187 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3188 last_bb = bb;
3191 return collapse_bb;
3194 /* Expand #pragma omp ordered depend(source). */
3196 static void
3197 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3198 tree *counts, location_t loc)
3200 enum built_in_function source_ix
3201 = fd->iter_type == long_integer_type_node
3202 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3203 gimple *g
3204 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3205 build_fold_addr_expr (counts[fd->ordered]));
3206 gimple_set_location (g, loc);
3207 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3210 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3212 static void
3213 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3214 tree *counts, tree c, location_t loc)
3216 auto_vec<tree, 10> args;
3217 enum built_in_function sink_ix
3218 = fd->iter_type == long_integer_type_node
3219 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3220 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3221 int i;
3222 gimple_stmt_iterator gsi2 = *gsi;
3223 bool warned_step = false;
3225 for (i = 0; i < fd->ordered; i++)
3227 tree step = NULL_TREE;
3228 off = TREE_PURPOSE (deps);
3229 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3231 step = TREE_OPERAND (off, 1);
3232 off = TREE_OPERAND (off, 0);
3234 if (!integer_zerop (off))
3236 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3237 || fd->loops[i].cond_code == GT_EXPR);
3238 bool forward = fd->loops[i].cond_code == LT_EXPR;
3239 if (step)
3241 /* Non-simple Fortran DO loops. If step is variable,
3242 we don't know at compile even the direction, so can't
3243 warn. */
3244 if (TREE_CODE (step) != INTEGER_CST)
3245 break;
3246 forward = tree_int_cst_sgn (step) != -1;
3248 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3249 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3250 "waiting for lexically later iteration");
3251 break;
3253 deps = TREE_CHAIN (deps);
3255 /* If all offsets corresponding to the collapsed loops are zero,
3256 this depend clause can be ignored. FIXME: but there is still a
3257 flush needed. We need to emit one __sync_synchronize () for it
3258 though (perhaps conditionally)? Solve this together with the
3259 conservative dependence folding optimization.
3260 if (i >= fd->collapse)
3261 return; */
3263 deps = OMP_CLAUSE_DECL (c);
3264 gsi_prev (&gsi2);
3265 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3266 edge e2 = split_block_after_labels (e1->dest);
3268 gsi2 = gsi_after_labels (e1->dest);
3269 *gsi = gsi_last_bb (e1->src);
3270 for (i = 0; i < fd->ordered; i++)
3272 tree itype = TREE_TYPE (fd->loops[i].v);
3273 tree step = NULL_TREE;
3274 tree orig_off = NULL_TREE;
3275 if (POINTER_TYPE_P (itype))
3276 itype = sizetype;
3277 if (i)
3278 deps = TREE_CHAIN (deps);
3279 off = TREE_PURPOSE (deps);
3280 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3282 step = TREE_OPERAND (off, 1);
3283 off = TREE_OPERAND (off, 0);
3284 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3285 && integer_onep (fd->loops[i].step)
3286 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3288 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3289 if (step)
3291 off = fold_convert_loc (loc, itype, off);
3292 orig_off = off;
3293 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3296 if (integer_zerop (off))
3297 t = boolean_true_node;
3298 else
3300 tree a;
3301 tree co = fold_convert_loc (loc, itype, off);
3302 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3304 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3305 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3306 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3307 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3308 co);
3310 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3311 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3312 fd->loops[i].v, co);
3313 else
3314 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3315 fd->loops[i].v, co);
3316 if (step)
3318 tree t1, t2;
3319 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3320 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3321 fd->loops[i].n1);
3322 else
3323 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3324 fd->loops[i].n2);
3325 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3326 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3327 fd->loops[i].n2);
3328 else
3329 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3330 fd->loops[i].n1);
3331 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3332 step, build_int_cst (TREE_TYPE (step), 0));
3333 if (TREE_CODE (step) != INTEGER_CST)
3335 t1 = unshare_expr (t1);
3336 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3337 false, GSI_CONTINUE_LINKING);
3338 t2 = unshare_expr (t2);
3339 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3340 false, GSI_CONTINUE_LINKING);
3342 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3343 t, t2, t1);
3345 else if (fd->loops[i].cond_code == LT_EXPR)
3347 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3348 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3349 fd->loops[i].n1);
3350 else
3351 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3352 fd->loops[i].n2);
3354 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3355 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3356 fd->loops[i].n2);
3357 else
3358 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3359 fd->loops[i].n1);
3361 if (cond)
3362 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3363 else
3364 cond = t;
3366 off = fold_convert_loc (loc, itype, off);
3368 if (step
3369 || (fd->loops[i].cond_code == LT_EXPR
3370 ? !integer_onep (fd->loops[i].step)
3371 : !integer_minus_onep (fd->loops[i].step)))
3373 if (step == NULL_TREE
3374 && TYPE_UNSIGNED (itype)
3375 && fd->loops[i].cond_code == GT_EXPR)
3376 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3377 fold_build1_loc (loc, NEGATE_EXPR, itype,
3378 s));
3379 else
3380 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3381 orig_off ? orig_off : off, s);
3382 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3383 build_int_cst (itype, 0));
3384 if (integer_zerop (t) && !warned_step)
3386 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3387 "refers to iteration never in the iteration "
3388 "space");
3389 warned_step = true;
3391 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3392 cond, t);
3395 if (i <= fd->collapse - 1 && fd->collapse > 1)
3396 t = fd->loop.v;
3397 else if (counts[i])
3398 t = counts[i];
3399 else
3401 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3402 fd->loops[i].v, fd->loops[i].n1);
3403 t = fold_convert_loc (loc, fd->iter_type, t);
3405 if (step)
3406 /* We have divided off by step already earlier. */;
3407 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3408 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3409 fold_build1_loc (loc, NEGATE_EXPR, itype,
3410 s));
3411 else
3412 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3413 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3414 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3415 off = fold_convert_loc (loc, fd->iter_type, off);
3416 if (i <= fd->collapse - 1 && fd->collapse > 1)
3418 if (i)
3419 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3420 off);
3421 if (i < fd->collapse - 1)
3423 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3424 counts[i]);
3425 continue;
3428 off = unshare_expr (off);
3429 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3430 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3431 true, GSI_SAME_STMT);
3432 args.safe_push (t);
3434 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3435 gimple_set_location (g, loc);
3436 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3438 cond = unshare_expr (cond);
3439 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3440 GSI_CONTINUE_LINKING);
3441 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3442 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3443 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3444 e1->probability = e3->probability.invert ();
3445 e1->flags = EDGE_TRUE_VALUE;
3446 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3448 *gsi = gsi_after_labels (e2->dest);
3451 /* Expand all #pragma omp ordered depend(source) and
3452 #pragma omp ordered depend(sink:...) constructs in the current
3453 #pragma omp for ordered(n) region. */
3455 static void
3456 expand_omp_ordered_source_sink (struct omp_region *region,
3457 struct omp_for_data *fd, tree *counts,
3458 basic_block cont_bb)
3460 struct omp_region *inner;
3461 int i;
3462 for (i = fd->collapse - 1; i < fd->ordered; i++)
3463 if (i == fd->collapse - 1 && fd->collapse > 1)
3464 counts[i] = NULL_TREE;
3465 else if (i >= fd->collapse && !cont_bb)
3466 counts[i] = build_zero_cst (fd->iter_type);
3467 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3468 && integer_onep (fd->loops[i].step))
3469 counts[i] = NULL_TREE;
3470 else
3471 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3472 tree atype
3473 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3474 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3475 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3477 for (inner = region->inner; inner; inner = inner->next)
3478 if (inner->type == GIMPLE_OMP_ORDERED)
3480 gomp_ordered *ord_stmt = inner->ord_stmt;
3481 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3482 location_t loc = gimple_location (ord_stmt);
3483 tree c;
3484 for (c = gimple_omp_ordered_clauses (ord_stmt);
3485 c; c = OMP_CLAUSE_CHAIN (c))
3486 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3487 break;
3488 if (c)
3489 expand_omp_ordered_source (&gsi, fd, counts, loc);
3490 for (c = gimple_omp_ordered_clauses (ord_stmt);
3491 c; c = OMP_CLAUSE_CHAIN (c))
3492 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3493 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3494 gsi_remove (&gsi, true);
3498 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3499 collapsed. */
3501 static basic_block
3502 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3503 basic_block cont_bb, basic_block body_bb,
3504 bool ordered_lastprivate)
3506 if (fd->ordered == fd->collapse)
3507 return cont_bb;
3509 if (!cont_bb)
3511 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3512 for (int i = fd->collapse; i < fd->ordered; i++)
3514 tree type = TREE_TYPE (fd->loops[i].v);
3515 tree n1 = fold_convert (type, fd->loops[i].n1);
3516 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3517 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3518 size_int (i - fd->collapse + 1),
3519 NULL_TREE, NULL_TREE);
3520 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3522 return NULL;
3525 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3527 tree t, type = TREE_TYPE (fd->loops[i].v);
3528 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3529 expand_omp_build_assign (&gsi, fd->loops[i].v,
3530 fold_convert (type, fd->loops[i].n1));
3531 if (counts[i])
3532 expand_omp_build_assign (&gsi, counts[i],
3533 build_zero_cst (fd->iter_type));
3534 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3535 size_int (i - fd->collapse + 1),
3536 NULL_TREE, NULL_TREE);
3537 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3538 if (!gsi_end_p (gsi))
3539 gsi_prev (&gsi);
3540 else
3541 gsi = gsi_last_bb (body_bb);
3542 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3543 basic_block new_body = e1->dest;
3544 if (body_bb == cont_bb)
3545 cont_bb = new_body;
3546 edge e2 = NULL;
3547 basic_block new_header;
3548 if (EDGE_COUNT (cont_bb->preds) > 0)
3550 gsi = gsi_last_bb (cont_bb);
3551 if (POINTER_TYPE_P (type))
3552 t = fold_build_pointer_plus (fd->loops[i].v,
3553 fold_convert (sizetype,
3554 fd->loops[i].step));
3555 else
3556 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3557 fold_convert (type, fd->loops[i].step));
3558 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3559 if (counts[i])
3561 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3562 build_int_cst (fd->iter_type, 1));
3563 expand_omp_build_assign (&gsi, counts[i], t);
3564 t = counts[i];
3566 else
3568 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3569 fd->loops[i].v, fd->loops[i].n1);
3570 t = fold_convert (fd->iter_type, t);
3571 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3572 true, GSI_SAME_STMT);
3574 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3575 size_int (i - fd->collapse + 1),
3576 NULL_TREE, NULL_TREE);
3577 expand_omp_build_assign (&gsi, aref, t);
3578 gsi_prev (&gsi);
3579 e2 = split_block (cont_bb, gsi_stmt (gsi));
3580 new_header = e2->dest;
3582 else
3583 new_header = cont_bb;
3584 gsi = gsi_after_labels (new_header);
3585 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3586 true, GSI_SAME_STMT);
3587 tree n2
3588 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3589 true, NULL_TREE, true, GSI_SAME_STMT);
3590 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3591 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3592 edge e3 = split_block (new_header, gsi_stmt (gsi));
3593 cont_bb = e3->dest;
3594 remove_edge (e1);
3595 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3596 e3->flags = EDGE_FALSE_VALUE;
3597 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3598 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3599 e1->probability = e3->probability.invert ();
3601 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3602 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3604 if (e2)
3606 class loop *loop = alloc_loop ();
3607 loop->header = new_header;
3608 loop->latch = e2->src;
3609 add_loop (loop, body_bb->loop_father);
3613 /* If there are any lastprivate clauses and it is possible some loops
3614 might have zero iterations, ensure all the decls are initialized,
3615 otherwise we could crash evaluating C++ class iterators with lastprivate
3616 clauses. */
3617 bool need_inits = false;
3618 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3619 if (need_inits)
3621 tree type = TREE_TYPE (fd->loops[i].v);
3622 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3623 expand_omp_build_assign (&gsi, fd->loops[i].v,
3624 fold_convert (type, fd->loops[i].n1));
3626 else
3628 tree type = TREE_TYPE (fd->loops[i].v);
3629 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3630 boolean_type_node,
3631 fold_convert (type, fd->loops[i].n1),
3632 fold_convert (type, fd->loops[i].n2));
3633 if (!integer_onep (this_cond))
3634 need_inits = true;
3637 return cont_bb;
3640 /* A subroutine of expand_omp_for. Generate code for a parallel
3641 loop with any schedule. Given parameters:
3643 for (V = N1; V cond N2; V += STEP) BODY;
3645 where COND is "<" or ">", we generate pseudocode
3647 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3648 if (more) goto L0; else goto L3;
3650 V = istart0;
3651 iend = iend0;
3653 BODY;
3654 V += STEP;
3655 if (V cond iend) goto L1; else goto L2;
3657 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3660 If this is a combined omp parallel loop, instead of the call to
3661 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3662 If this is gimple_omp_for_combined_p loop, then instead of assigning
3663 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3664 inner GIMPLE_OMP_FOR and V += STEP; and
3665 if (V cond iend) goto L1; else goto L2; are removed.
3667 For collapsed loops, given parameters:
3668 collapse(3)
3669 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3670 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3671 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3672 BODY;
3674 we generate pseudocode
3676 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3677 if (cond3 is <)
3678 adj = STEP3 - 1;
3679 else
3680 adj = STEP3 + 1;
3681 count3 = (adj + N32 - N31) / STEP3;
3682 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3683 if (cond2 is <)
3684 adj = STEP2 - 1;
3685 else
3686 adj = STEP2 + 1;
3687 count2 = (adj + N22 - N21) / STEP2;
3688 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3689 if (cond1 is <)
3690 adj = STEP1 - 1;
3691 else
3692 adj = STEP1 + 1;
3693 count1 = (adj + N12 - N11) / STEP1;
3694 count = count1 * count2 * count3;
3695 goto Z1;
3697 count = 0;
3699 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3700 if (more) goto L0; else goto L3;
3702 V = istart0;
3703 T = V;
3704 V3 = N31 + (T % count3) * STEP3;
3705 T = T / count3;
3706 V2 = N21 + (T % count2) * STEP2;
3707 T = T / count2;
3708 V1 = N11 + T * STEP1;
3709 iend = iend0;
3711 BODY;
3712 V += 1;
3713 if (V < iend) goto L10; else goto L2;
3714 L10:
3715 V3 += STEP3;
3716 if (V3 cond3 N32) goto L1; else goto L11;
3717 L11:
3718 V3 = N31;
3719 V2 += STEP2;
3720 if (V2 cond2 N22) goto L1; else goto L12;
3721 L12:
3722 V2 = N21;
3723 V1 += STEP1;
3724 goto L1;
3726 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3731 static void
3732 expand_omp_for_generic (struct omp_region *region,
3733 struct omp_for_data *fd,
3734 enum built_in_function start_fn,
3735 enum built_in_function next_fn,
3736 tree sched_arg,
3737 gimple *inner_stmt)
3739 tree type, istart0, iend0, iend;
3740 tree t, vmain, vback, bias = NULL_TREE;
3741 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3742 basic_block l2_bb = NULL, l3_bb = NULL;
3743 gimple_stmt_iterator gsi;
3744 gassign *assign_stmt;
3745 bool in_combined_parallel = is_combined_parallel (region);
3746 bool broken_loop = region->cont == NULL;
3747 edge e, ne;
3748 tree *counts = NULL;
3749 int i;
3750 bool ordered_lastprivate = false;
3752 gcc_assert (!broken_loop || !in_combined_parallel);
3753 gcc_assert (fd->iter_type == long_integer_type_node
3754 || !in_combined_parallel);
3756 entry_bb = region->entry;
3757 cont_bb = region->cont;
3758 collapse_bb = NULL;
3759 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3760 gcc_assert (broken_loop
3761 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3762 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3763 l1_bb = single_succ (l0_bb);
3764 if (!broken_loop)
3766 l2_bb = create_empty_bb (cont_bb);
3767 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3768 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3769 == l1_bb));
3770 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3772 else
3773 l2_bb = NULL;
3774 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3775 exit_bb = region->exit;
3777 gsi = gsi_last_nondebug_bb (entry_bb);
3779 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3780 if (fd->ordered
3781 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3782 OMP_CLAUSE_LASTPRIVATE))
3783 ordered_lastprivate = false;
3784 tree reductions = NULL_TREE;
3785 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3786 tree memv = NULL_TREE;
3787 if (fd->lastprivate_conditional)
3789 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3790 OMP_CLAUSE__CONDTEMP_);
3791 if (fd->have_pointer_condtemp)
3792 condtemp = OMP_CLAUSE_DECL (c);
3793 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3794 cond_var = OMP_CLAUSE_DECL (c);
3796 if (sched_arg)
3798 if (fd->have_reductemp)
3800 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3801 OMP_CLAUSE__REDUCTEMP_);
3802 reductions = OMP_CLAUSE_DECL (c);
3803 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3804 gimple *g = SSA_NAME_DEF_STMT (reductions);
3805 reductions = gimple_assign_rhs1 (g);
3806 OMP_CLAUSE_DECL (c) = reductions;
3807 entry_bb = gimple_bb (g);
3808 edge e = split_block (entry_bb, g);
3809 if (region->entry == entry_bb)
3810 region->entry = e->dest;
3811 gsi = gsi_last_bb (entry_bb);
3813 else
3814 reductions = null_pointer_node;
3815 if (fd->have_pointer_condtemp)
3817 tree type = TREE_TYPE (condtemp);
3818 memv = create_tmp_var (type);
3819 TREE_ADDRESSABLE (memv) = 1;
3820 unsigned HOST_WIDE_INT sz
3821 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3822 sz *= fd->lastprivate_conditional;
3823 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3824 false);
3825 mem = build_fold_addr_expr (memv);
3827 else
3828 mem = null_pointer_node;
3830 if (fd->collapse > 1 || fd->ordered)
3832 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3833 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3835 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3836 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3837 zero_iter1_bb, first_zero_iter1,
3838 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3840 if (zero_iter1_bb)
3842 /* Some counts[i] vars might be uninitialized if
3843 some loop has zero iterations. But the body shouldn't
3844 be executed in that case, so just avoid uninit warnings. */
3845 for (i = first_zero_iter1;
3846 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3847 if (SSA_VAR_P (counts[i]))
3848 TREE_NO_WARNING (counts[i]) = 1;
3849 gsi_prev (&gsi);
3850 e = split_block (entry_bb, gsi_stmt (gsi));
3851 entry_bb = e->dest;
3852 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3853 gsi = gsi_last_nondebug_bb (entry_bb);
3854 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3855 get_immediate_dominator (CDI_DOMINATORS,
3856 zero_iter1_bb));
3858 if (zero_iter2_bb)
3860 /* Some counts[i] vars might be uninitialized if
3861 some loop has zero iterations. But the body shouldn't
3862 be executed in that case, so just avoid uninit warnings. */
3863 for (i = first_zero_iter2; i < fd->ordered; i++)
3864 if (SSA_VAR_P (counts[i]))
3865 TREE_NO_WARNING (counts[i]) = 1;
3866 if (zero_iter1_bb)
3867 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3868 else
3870 gsi_prev (&gsi);
3871 e = split_block (entry_bb, gsi_stmt (gsi));
3872 entry_bb = e->dest;
3873 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3874 gsi = gsi_last_nondebug_bb (entry_bb);
3875 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3876 get_immediate_dominator
3877 (CDI_DOMINATORS, zero_iter2_bb));
3880 if (fd->collapse == 1)
3882 counts[0] = fd->loop.n2;
3883 fd->loop = fd->loops[0];
3887 type = TREE_TYPE (fd->loop.v);
3888 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3889 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3890 TREE_ADDRESSABLE (istart0) = 1;
3891 TREE_ADDRESSABLE (iend0) = 1;
3893 /* See if we need to bias by LLONG_MIN. */
3894 if (fd->iter_type == long_long_unsigned_type_node
3895 && TREE_CODE (type) == INTEGER_TYPE
3896 && !TYPE_UNSIGNED (type)
3897 && fd->ordered == 0)
3899 tree n1, n2;
3901 if (fd->loop.cond_code == LT_EXPR)
3903 n1 = fd->loop.n1;
3904 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3906 else
3908 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3909 n2 = fd->loop.n1;
3911 if (TREE_CODE (n1) != INTEGER_CST
3912 || TREE_CODE (n2) != INTEGER_CST
3913 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3914 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3917 gimple_stmt_iterator gsif = gsi;
3918 gsi_prev (&gsif);
3920 tree arr = NULL_TREE;
3921 if (in_combined_parallel)
3923 gcc_assert (fd->ordered == 0);
3924 /* In a combined parallel loop, emit a call to
3925 GOMP_loop_foo_next. */
3926 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3927 build_fold_addr_expr (istart0),
3928 build_fold_addr_expr (iend0));
3930 else
3932 tree t0, t1, t2, t3, t4;
3933 /* If this is not a combined parallel loop, emit a call to
3934 GOMP_loop_foo_start in ENTRY_BB. */
3935 t4 = build_fold_addr_expr (iend0);
3936 t3 = build_fold_addr_expr (istart0);
3937 if (fd->ordered)
3939 t0 = build_int_cst (unsigned_type_node,
3940 fd->ordered - fd->collapse + 1);
3941 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3942 fd->ordered
3943 - fd->collapse + 1),
3944 ".omp_counts");
3945 DECL_NAMELESS (arr) = 1;
3946 TREE_ADDRESSABLE (arr) = 1;
3947 TREE_STATIC (arr) = 1;
3948 vec<constructor_elt, va_gc> *v;
3949 vec_alloc (v, fd->ordered - fd->collapse + 1);
3950 int idx;
3952 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3954 tree c;
3955 if (idx == 0 && fd->collapse > 1)
3956 c = fd->loop.n2;
3957 else
3958 c = counts[idx + fd->collapse - 1];
3959 tree purpose = size_int (idx);
3960 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3961 if (TREE_CODE (c) != INTEGER_CST)
3962 TREE_STATIC (arr) = 0;
3965 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3966 if (!TREE_STATIC (arr))
3967 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3968 void_type_node, arr),
3969 true, NULL_TREE, true, GSI_SAME_STMT);
3970 t1 = build_fold_addr_expr (arr);
3971 t2 = NULL_TREE;
3973 else
3975 t2 = fold_convert (fd->iter_type, fd->loop.step);
3976 t1 = fd->loop.n2;
3977 t0 = fd->loop.n1;
3978 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3980 tree innerc
3981 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3982 OMP_CLAUSE__LOOPTEMP_);
3983 gcc_assert (innerc);
3984 t0 = OMP_CLAUSE_DECL (innerc);
3985 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3986 OMP_CLAUSE__LOOPTEMP_);
3987 gcc_assert (innerc);
3988 t1 = OMP_CLAUSE_DECL (innerc);
3990 if (POINTER_TYPE_P (TREE_TYPE (t0))
3991 && TYPE_PRECISION (TREE_TYPE (t0))
3992 != TYPE_PRECISION (fd->iter_type))
3994 /* Avoid casting pointers to integer of a different size. */
3995 tree itype = signed_type_for (type);
3996 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3997 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3999 else
4001 t1 = fold_convert (fd->iter_type, t1);
4002 t0 = fold_convert (fd->iter_type, t0);
4004 if (bias)
4006 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4007 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4010 if (fd->iter_type == long_integer_type_node || fd->ordered)
4012 if (fd->chunk_size)
4014 t = fold_convert (fd->iter_type, fd->chunk_size);
4015 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4016 if (sched_arg)
4018 if (fd->ordered)
4019 t = build_call_expr (builtin_decl_explicit (start_fn),
4020 8, t0, t1, sched_arg, t, t3, t4,
4021 reductions, mem);
4022 else
4023 t = build_call_expr (builtin_decl_explicit (start_fn),
4024 9, t0, t1, t2, sched_arg, t, t3, t4,
4025 reductions, mem);
4027 else if (fd->ordered)
4028 t = build_call_expr (builtin_decl_explicit (start_fn),
4029 5, t0, t1, t, t3, t4);
4030 else
4031 t = build_call_expr (builtin_decl_explicit (start_fn),
4032 6, t0, t1, t2, t, t3, t4);
4034 else if (fd->ordered)
4035 t = build_call_expr (builtin_decl_explicit (start_fn),
4036 4, t0, t1, t3, t4);
4037 else
4038 t = build_call_expr (builtin_decl_explicit (start_fn),
4039 5, t0, t1, t2, t3, t4);
4041 else
4043 tree t5;
4044 tree c_bool_type;
4045 tree bfn_decl;
4047 /* The GOMP_loop_ull_*start functions have additional boolean
4048 argument, true for < loops and false for > loops.
4049 In Fortran, the C bool type can be different from
4050 boolean_type_node. */
4051 bfn_decl = builtin_decl_explicit (start_fn);
4052 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4053 t5 = build_int_cst (c_bool_type,
4054 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4055 if (fd->chunk_size)
4057 tree bfn_decl = builtin_decl_explicit (start_fn);
4058 t = fold_convert (fd->iter_type, fd->chunk_size);
4059 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4060 if (sched_arg)
4061 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4062 t, t3, t4, reductions, mem);
4063 else
4064 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4066 else
4067 t = build_call_expr (builtin_decl_explicit (start_fn),
4068 6, t5, t0, t1, t2, t3, t4);
4071 if (TREE_TYPE (t) != boolean_type_node)
4072 t = fold_build2 (NE_EXPR, boolean_type_node,
4073 t, build_int_cst (TREE_TYPE (t), 0));
4074 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4075 true, GSI_SAME_STMT);
4076 if (arr && !TREE_STATIC (arr))
4078 tree clobber = build_clobber (TREE_TYPE (arr));
4079 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4080 GSI_SAME_STMT);
4082 if (fd->have_pointer_condtemp)
4083 expand_omp_build_assign (&gsi, condtemp, memv, false);
4084 if (fd->have_reductemp)
4086 gimple *g = gsi_stmt (gsi);
4087 gsi_remove (&gsi, true);
4088 release_ssa_name (gimple_assign_lhs (g));
4090 entry_bb = region->entry;
4091 gsi = gsi_last_nondebug_bb (entry_bb);
4093 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4095 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4097 /* Remove the GIMPLE_OMP_FOR statement. */
4098 gsi_remove (&gsi, true);
4100 if (gsi_end_p (gsif))
4101 gsif = gsi_after_labels (gsi_bb (gsif));
4102 gsi_next (&gsif);
4104 /* Iteration setup for sequential loop goes in L0_BB. */
4105 tree startvar = fd->loop.v;
4106 tree endvar = NULL_TREE;
4108 if (gimple_omp_for_combined_p (fd->for_stmt))
4110 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4111 && gimple_omp_for_kind (inner_stmt)
4112 == GF_OMP_FOR_KIND_SIMD);
4113 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4114 OMP_CLAUSE__LOOPTEMP_);
4115 gcc_assert (innerc);
4116 startvar = OMP_CLAUSE_DECL (innerc);
4117 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4118 OMP_CLAUSE__LOOPTEMP_);
4119 gcc_assert (innerc);
4120 endvar = OMP_CLAUSE_DECL (innerc);
4123 gsi = gsi_start_bb (l0_bb);
4124 t = istart0;
4125 if (fd->ordered && fd->collapse == 1)
4126 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4127 fold_convert (fd->iter_type, fd->loop.step));
4128 else if (bias)
4129 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4130 if (fd->ordered && fd->collapse == 1)
4132 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4133 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4134 fd->loop.n1, fold_convert (sizetype, t));
4135 else
4137 t = fold_convert (TREE_TYPE (startvar), t);
4138 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4139 fd->loop.n1, t);
4142 else
4144 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4145 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4146 t = fold_convert (TREE_TYPE (startvar), t);
4148 t = force_gimple_operand_gsi (&gsi, t,
4149 DECL_P (startvar)
4150 && TREE_ADDRESSABLE (startvar),
4151 NULL_TREE, false, GSI_CONTINUE_LINKING);
4152 assign_stmt = gimple_build_assign (startvar, t);
4153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4154 if (cond_var)
4156 tree itype = TREE_TYPE (cond_var);
4157 /* For lastprivate(conditional:) itervar, we need some iteration
4158 counter that starts at unsigned non-zero and increases.
4159 Prefer as few IVs as possible, so if we can use startvar
4160 itself, use that, or startvar + constant (those would be
4161 incremented with step), and as last resort use the s0 + 1
4162 incremented by 1. */
4163 if ((fd->ordered && fd->collapse == 1)
4164 || bias
4165 || POINTER_TYPE_P (type)
4166 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4167 || fd->loop.cond_code != LT_EXPR)
4168 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4169 build_int_cst (itype, 1));
4170 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4171 t = fold_convert (itype, t);
4172 else
4174 tree c = fold_convert (itype, fd->loop.n1);
4175 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4176 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4178 t = force_gimple_operand_gsi (&gsi, t, false,
4179 NULL_TREE, false, GSI_CONTINUE_LINKING);
4180 assign_stmt = gimple_build_assign (cond_var, t);
4181 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4184 t = iend0;
4185 if (fd->ordered && fd->collapse == 1)
4186 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4187 fold_convert (fd->iter_type, fd->loop.step));
4188 else if (bias)
4189 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4190 if (fd->ordered && fd->collapse == 1)
4192 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4193 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4194 fd->loop.n1, fold_convert (sizetype, t));
4195 else
4197 t = fold_convert (TREE_TYPE (startvar), t);
4198 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4199 fd->loop.n1, t);
4202 else
4204 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4205 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4206 t = fold_convert (TREE_TYPE (startvar), t);
4208 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4209 false, GSI_CONTINUE_LINKING);
4210 if (endvar)
4212 assign_stmt = gimple_build_assign (endvar, iend);
4213 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4214 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4215 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4216 else
4217 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4218 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4220 /* Handle linear clause adjustments. */
4221 tree itercnt = NULL_TREE;
4222 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4223 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4224 c; c = OMP_CLAUSE_CHAIN (c))
4225 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4226 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4228 tree d = OMP_CLAUSE_DECL (c);
4229 bool is_ref = omp_is_reference (d);
4230 tree t = d, a, dest;
4231 if (is_ref)
4232 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4233 tree type = TREE_TYPE (t);
4234 if (POINTER_TYPE_P (type))
4235 type = sizetype;
4236 dest = unshare_expr (t);
4237 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4238 expand_omp_build_assign (&gsif, v, t);
4239 if (itercnt == NULL_TREE)
4241 itercnt = startvar;
4242 tree n1 = fd->loop.n1;
4243 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4245 itercnt
4246 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4247 itercnt);
4248 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4250 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4251 itercnt, n1);
4252 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4253 itercnt, fd->loop.step);
4254 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4255 NULL_TREE, false,
4256 GSI_CONTINUE_LINKING);
4258 a = fold_build2 (MULT_EXPR, type,
4259 fold_convert (type, itercnt),
4260 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4261 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4262 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4263 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4264 false, GSI_CONTINUE_LINKING);
4265 expand_omp_build_assign (&gsi, dest, t, true);
4267 if (fd->collapse > 1)
4268 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4270 if (fd->ordered)
4272 /* Until now, counts array contained number of iterations or
4273 variable containing it for ith loop. From now on, we need
4274 those counts only for collapsed loops, and only for the 2nd
4275 till the last collapsed one. Move those one element earlier,
4276 we'll use counts[fd->collapse - 1] for the first source/sink
4277 iteration counter and so on and counts[fd->ordered]
4278 as the array holding the current counter values for
4279 depend(source). */
4280 if (fd->collapse > 1)
4281 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4282 if (broken_loop)
4284 int i;
4285 for (i = fd->collapse; i < fd->ordered; i++)
4287 tree type = TREE_TYPE (fd->loops[i].v);
4288 tree this_cond
4289 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4290 fold_convert (type, fd->loops[i].n1),
4291 fold_convert (type, fd->loops[i].n2));
4292 if (!integer_onep (this_cond))
4293 break;
4295 if (i < fd->ordered)
4297 cont_bb
4298 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4299 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4300 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4301 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4302 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4303 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4304 make_edge (cont_bb, l1_bb, 0);
4305 l2_bb = create_empty_bb (cont_bb);
4306 broken_loop = false;
4309 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4310 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4311 ordered_lastprivate);
4312 if (counts[fd->collapse - 1])
4314 gcc_assert (fd->collapse == 1);
4315 gsi = gsi_last_bb (l0_bb);
4316 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4317 istart0, true);
4318 if (cont_bb)
4320 gsi = gsi_last_bb (cont_bb);
4321 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4322 counts[fd->collapse - 1],
4323 build_int_cst (fd->iter_type, 1));
4324 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4325 tree aref = build4 (ARRAY_REF, fd->iter_type,
4326 counts[fd->ordered], size_zero_node,
4327 NULL_TREE, NULL_TREE);
4328 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4330 t = counts[fd->collapse - 1];
4332 else if (fd->collapse > 1)
4333 t = fd->loop.v;
4334 else
4336 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4337 fd->loops[0].v, fd->loops[0].n1);
4338 t = fold_convert (fd->iter_type, t);
4340 gsi = gsi_last_bb (l0_bb);
4341 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4342 size_zero_node, NULL_TREE, NULL_TREE);
4343 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4344 false, GSI_CONTINUE_LINKING);
4345 expand_omp_build_assign (&gsi, aref, t, true);
4348 if (!broken_loop)
4350 /* Code to control the increment and predicate for the sequential
4351 loop goes in the CONT_BB. */
4352 gsi = gsi_last_nondebug_bb (cont_bb);
4353 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4354 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4355 vmain = gimple_omp_continue_control_use (cont_stmt);
4356 vback = gimple_omp_continue_control_def (cont_stmt);
4358 if (cond_var)
4360 tree itype = TREE_TYPE (cond_var);
4361 tree t2;
4362 if ((fd->ordered && fd->collapse == 1)
4363 || bias
4364 || POINTER_TYPE_P (type)
4365 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4366 || fd->loop.cond_code != LT_EXPR)
4367 t2 = build_int_cst (itype, 1);
4368 else
4369 t2 = fold_convert (itype, fd->loop.step);
4370 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4371 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4372 NULL_TREE, true, GSI_SAME_STMT);
4373 assign_stmt = gimple_build_assign (cond_var, t2);
4374 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4377 if (!gimple_omp_for_combined_p (fd->for_stmt))
4379 if (POINTER_TYPE_P (type))
4380 t = fold_build_pointer_plus (vmain, fd->loop.step);
4381 else
4382 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4383 t = force_gimple_operand_gsi (&gsi, t,
4384 DECL_P (vback)
4385 && TREE_ADDRESSABLE (vback),
4386 NULL_TREE, true, GSI_SAME_STMT);
4387 assign_stmt = gimple_build_assign (vback, t);
4388 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4390 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4392 tree tem;
4393 if (fd->collapse > 1)
4394 tem = fd->loop.v;
4395 else
4397 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4398 fd->loops[0].v, fd->loops[0].n1);
4399 tem = fold_convert (fd->iter_type, tem);
4401 tree aref = build4 (ARRAY_REF, fd->iter_type,
4402 counts[fd->ordered], size_zero_node,
4403 NULL_TREE, NULL_TREE);
4404 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4405 true, GSI_SAME_STMT);
4406 expand_omp_build_assign (&gsi, aref, tem);
4409 t = build2 (fd->loop.cond_code, boolean_type_node,
4410 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4411 iend);
4412 gcond *cond_stmt = gimple_build_cond_empty (t);
4413 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4416 /* Remove GIMPLE_OMP_CONTINUE. */
4417 gsi_remove (&gsi, true);
4419 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4420 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4422 /* Emit code to get the next parallel iteration in L2_BB. */
4423 gsi = gsi_start_bb (l2_bb);
4425 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4426 build_fold_addr_expr (istart0),
4427 build_fold_addr_expr (iend0));
4428 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4429 false, GSI_CONTINUE_LINKING);
4430 if (TREE_TYPE (t) != boolean_type_node)
4431 t = fold_build2 (NE_EXPR, boolean_type_node,
4432 t, build_int_cst (TREE_TYPE (t), 0));
4433 gcond *cond_stmt = gimple_build_cond_empty (t);
4434 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4437 /* Add the loop cleanup function. */
4438 gsi = gsi_last_nondebug_bb (exit_bb);
4439 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4440 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4441 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4442 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4443 else
4444 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4445 gcall *call_stmt = gimple_build_call (t, 0);
4446 if (fd->ordered)
4448 tree arr = counts[fd->ordered];
4449 tree clobber = build_clobber (TREE_TYPE (arr));
4450 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4451 GSI_SAME_STMT);
4453 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4455 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4456 if (fd->have_reductemp)
4458 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4459 gimple_call_lhs (call_stmt));
4460 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4463 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4464 gsi_remove (&gsi, true);
4466 /* Connect the new blocks. */
4467 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4468 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4470 if (!broken_loop)
4472 gimple_seq phis;
4474 e = find_edge (cont_bb, l3_bb);
4475 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4477 phis = phi_nodes (l3_bb);
4478 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4480 gimple *phi = gsi_stmt (gsi);
4481 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4482 PHI_ARG_DEF_FROM_EDGE (phi, e));
4484 remove_edge (e);
4486 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4487 e = find_edge (cont_bb, l1_bb);
4488 if (e == NULL)
4490 e = BRANCH_EDGE (cont_bb);
4491 gcc_assert (single_succ (e->dest) == l1_bb);
4493 if (gimple_omp_for_combined_p (fd->for_stmt))
4495 remove_edge (e);
4496 e = NULL;
4498 else if (fd->collapse > 1)
4500 remove_edge (e);
4501 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4503 else
4504 e->flags = EDGE_TRUE_VALUE;
4505 if (e)
4507 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4508 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4510 else
4512 e = find_edge (cont_bb, l2_bb);
4513 e->flags = EDGE_FALLTHRU;
4515 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4517 if (gimple_in_ssa_p (cfun))
4519 /* Add phis to the outer loop that connect to the phis in the inner,
4520 original loop, and move the loop entry value of the inner phi to
4521 the loop entry value of the outer phi. */
4522 gphi_iterator psi;
4523 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4525 location_t locus;
4526 gphi *nphi;
4527 gphi *exit_phi = psi.phi ();
4529 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4530 continue;
4532 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4533 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4535 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4536 edge latch_to_l1 = find_edge (latch, l1_bb);
4537 gphi *inner_phi
4538 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4540 tree t = gimple_phi_result (exit_phi);
4541 tree new_res = copy_ssa_name (t, NULL);
4542 nphi = create_phi_node (new_res, l0_bb);
4544 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4545 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4546 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4547 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4548 add_phi_arg (nphi, t, entry_to_l0, locus);
4550 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4551 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4553 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4557 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4558 recompute_dominator (CDI_DOMINATORS, l2_bb));
4559 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4560 recompute_dominator (CDI_DOMINATORS, l3_bb));
4561 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4562 recompute_dominator (CDI_DOMINATORS, l0_bb));
4563 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4564 recompute_dominator (CDI_DOMINATORS, l1_bb));
4566 /* We enter expand_omp_for_generic with a loop. This original loop may
4567 have its own loop struct, or it may be part of an outer loop struct
4568 (which may be the fake loop). */
4569 class loop *outer_loop = entry_bb->loop_father;
4570 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4572 add_bb_to_loop (l2_bb, outer_loop);
4574 /* We've added a new loop around the original loop. Allocate the
4575 corresponding loop struct. */
4576 class loop *new_loop = alloc_loop ();
4577 new_loop->header = l0_bb;
4578 new_loop->latch = l2_bb;
4579 add_loop (new_loop, outer_loop);
4581 /* Allocate a loop structure for the original loop unless we already
4582 had one. */
4583 if (!orig_loop_has_loop_struct
4584 && !gimple_omp_for_combined_p (fd->for_stmt))
4586 class loop *orig_loop = alloc_loop ();
4587 orig_loop->header = l1_bb;
4588 /* The loop may have multiple latches. */
4589 add_loop (orig_loop, new_loop);
4594 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4595 compute needed allocation size. If !ALLOC of team allocations,
4596 if ALLOC of thread allocation. SZ is the initial needed size for
4597 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4598 CNT number of elements of each array, for !ALLOC this is
4599 omp_get_num_threads (), for ALLOC number of iterations handled by the
4600 current thread. If PTR is non-NULL, it is the start of the allocation
4601 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4602 clauses pointers to the corresponding arrays. */
4604 static tree
4605 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4606 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4607 gimple_stmt_iterator *gsi, bool alloc)
4609 tree eltsz = NULL_TREE;
4610 unsigned HOST_WIDE_INT preval = 0;
4611 if (ptr && sz)
4612 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4613 ptr, size_int (sz));
4614 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4615 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4616 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4617 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4619 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4620 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4621 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4623 unsigned HOST_WIDE_INT szl
4624 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4625 szl = least_bit_hwi (szl);
4626 if (szl)
4627 al = MIN (al, szl);
4629 if (ptr == NULL_TREE)
4631 if (eltsz == NULL_TREE)
4632 eltsz = TYPE_SIZE_UNIT (pointee_type);
4633 else
4634 eltsz = size_binop (PLUS_EXPR, eltsz,
4635 TYPE_SIZE_UNIT (pointee_type));
4637 if (preval == 0 && al <= alloc_align)
4639 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4640 sz += diff;
4641 if (diff && ptr)
4642 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4643 ptr, size_int (diff));
4645 else if (al > preval)
4647 if (ptr)
4649 ptr = fold_convert (pointer_sized_int_node, ptr);
4650 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4651 build_int_cst (pointer_sized_int_node,
4652 al - 1));
4653 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4654 build_int_cst (pointer_sized_int_node,
4655 -(HOST_WIDE_INT) al));
4656 ptr = fold_convert (ptr_type_node, ptr);
4658 else
4659 sz += al - 1;
4661 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4662 preval = al;
4663 else
4664 preval = 1;
4665 if (ptr)
4667 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4668 ptr = OMP_CLAUSE_DECL (c);
4669 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4670 size_binop (MULT_EXPR, cnt,
4671 TYPE_SIZE_UNIT (pointee_type)));
4675 if (ptr == NULL_TREE)
4677 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4678 if (sz)
4679 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4680 return eltsz;
4682 else
4683 return ptr;
4686 /* Return the last _looptemp_ clause if one has been created for
4687 lastprivate on distribute parallel for{, simd} or taskloop.
4688 FD is the loop data and INNERC should be the second _looptemp_
4689 clause (the one holding the end of the range).
4690 This is followed by collapse - 1 _looptemp_ clauses for the
4691 counts[1] and up, and for triangular loops followed by 4
4692 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4693 one factor and one adjn1). After this there is optionally one
4694 _looptemp_ clause that this function returns. */
4696 static tree
4697 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4699 gcc_assert (innerc);
4700 int count = fd->collapse - 1;
4701 if (fd->non_rect
4702 && fd->last_nonrect == fd->first_nonrect + 1
4703 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4704 count += 4;
4705 for (int i = 0; i < count; i++)
4707 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4708 OMP_CLAUSE__LOOPTEMP_);
4709 gcc_assert (innerc);
4711 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4712 OMP_CLAUSE__LOOPTEMP_);
4715 /* A subroutine of expand_omp_for. Generate code for a parallel
4716 loop with static schedule and no specified chunk size. Given
4717 parameters:
4719 for (V = N1; V cond N2; V += STEP) BODY;
4721 where COND is "<" or ">", we generate pseudocode
4723 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4724 if (cond is <)
4725 adj = STEP - 1;
4726 else
4727 adj = STEP + 1;
4728 if ((__typeof (V)) -1 > 0 && cond is >)
4729 n = -(adj + N2 - N1) / -STEP;
4730 else
4731 n = (adj + N2 - N1) / STEP;
4732 q = n / nthreads;
4733 tt = n % nthreads;
4734 if (threadid < tt) goto L3; else goto L4;
4736 tt = 0;
4737 q = q + 1;
4739 s0 = q * threadid + tt;
4740 e0 = s0 + q;
4741 V = s0 * STEP + N1;
4742 if (s0 >= e0) goto L2; else goto L0;
4744 e = e0 * STEP + N1;
4746 BODY;
4747 V += STEP;
4748 if (V cond e) goto L1;
4752 static void
4753 expand_omp_for_static_nochunk (struct omp_region *region,
4754 struct omp_for_data *fd,
4755 gimple *inner_stmt)
4757 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4758 tree type, itype, vmain, vback;
4759 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4760 basic_block body_bb, cont_bb, collapse_bb = NULL;
4761 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4762 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4763 gimple_stmt_iterator gsi, gsip;
4764 edge ep;
4765 bool broken_loop = region->cont == NULL;
4766 tree *counts = NULL;
4767 tree n1, n2, step;
4768 tree reductions = NULL_TREE;
4769 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4771 itype = type = TREE_TYPE (fd->loop.v);
4772 if (POINTER_TYPE_P (type))
4773 itype = signed_type_for (type);
4775 entry_bb = region->entry;
4776 cont_bb = region->cont;
4777 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4778 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4779 gcc_assert (broken_loop
4780 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4781 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4782 body_bb = single_succ (seq_start_bb);
4783 if (!broken_loop)
4785 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4786 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4787 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4789 exit_bb = region->exit;
4791 /* Iteration space partitioning goes in ENTRY_BB. */
4792 gsi = gsi_last_nondebug_bb (entry_bb);
4793 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4794 gsip = gsi;
4795 gsi_prev (&gsip);
4797 if (fd->collapse > 1)
4799 int first_zero_iter = -1, dummy = -1;
4800 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4802 counts = XALLOCAVEC (tree, fd->collapse);
4803 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4804 fin_bb, first_zero_iter,
4805 dummy_bb, dummy, l2_dom_bb);
4806 t = NULL_TREE;
4808 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4809 t = integer_one_node;
4810 else
4811 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4812 fold_convert (type, fd->loop.n1),
4813 fold_convert (type, fd->loop.n2));
4814 if (fd->collapse == 1
4815 && TYPE_UNSIGNED (type)
4816 && (t == NULL_TREE || !integer_onep (t)))
4818 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4819 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4820 true, GSI_SAME_STMT);
4821 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4822 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4823 true, GSI_SAME_STMT);
4824 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4825 NULL_TREE, NULL_TREE);
4826 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4828 expand_omp_regimplify_p, NULL, NULL)
4829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4830 expand_omp_regimplify_p, NULL, NULL))
4832 gsi = gsi_for_stmt (cond_stmt);
4833 gimple_regimplify_operands (cond_stmt, &gsi);
4835 ep = split_block (entry_bb, cond_stmt);
4836 ep->flags = EDGE_TRUE_VALUE;
4837 entry_bb = ep->dest;
4838 ep->probability = profile_probability::very_likely ();
4839 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4840 ep->probability = profile_probability::very_unlikely ();
4841 if (gimple_in_ssa_p (cfun))
4843 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4844 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4845 !gsi_end_p (gpi); gsi_next (&gpi))
4847 gphi *phi = gpi.phi ();
4848 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4849 ep, UNKNOWN_LOCATION);
4852 gsi = gsi_last_bb (entry_bb);
4855 if (fd->lastprivate_conditional)
4857 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4858 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4859 if (fd->have_pointer_condtemp)
4860 condtemp = OMP_CLAUSE_DECL (c);
4861 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4862 cond_var = OMP_CLAUSE_DECL (c);
4864 if (fd->have_reductemp
4865 /* For scan, we don't want to reinitialize condtemp before the
4866 second loop. */
4867 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4868 || fd->have_nonctrl_scantemp)
4870 tree t1 = build_int_cst (long_integer_type_node, 0);
4871 tree t2 = build_int_cst (long_integer_type_node, 1);
4872 tree t3 = build_int_cstu (long_integer_type_node,
4873 (HOST_WIDE_INT_1U << 31) + 1);
4874 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4875 gimple_stmt_iterator gsi2 = gsi_none ();
4876 gimple *g = NULL;
4877 tree mem = null_pointer_node, memv = NULL_TREE;
4878 unsigned HOST_WIDE_INT condtemp_sz = 0;
4879 unsigned HOST_WIDE_INT alloc_align = 0;
4880 if (fd->have_reductemp)
4882 gcc_assert (!fd->have_nonctrl_scantemp);
4883 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4884 reductions = OMP_CLAUSE_DECL (c);
4885 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4886 g = SSA_NAME_DEF_STMT (reductions);
4887 reductions = gimple_assign_rhs1 (g);
4888 OMP_CLAUSE_DECL (c) = reductions;
4889 gsi2 = gsi_for_stmt (g);
4891 else
4893 if (gsi_end_p (gsip))
4894 gsi2 = gsi_after_labels (region->entry);
4895 else
4896 gsi2 = gsip;
4897 reductions = null_pointer_node;
4899 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4901 tree type;
4902 if (fd->have_pointer_condtemp)
4903 type = TREE_TYPE (condtemp);
4904 else
4905 type = ptr_type_node;
4906 memv = create_tmp_var (type);
4907 TREE_ADDRESSABLE (memv) = 1;
4908 unsigned HOST_WIDE_INT sz = 0;
4909 tree size = NULL_TREE;
4910 if (fd->have_pointer_condtemp)
4912 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4913 sz *= fd->lastprivate_conditional;
4914 condtemp_sz = sz;
4916 if (fd->have_nonctrl_scantemp)
4918 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4919 gimple *g = gimple_build_call (nthreads, 0);
4920 nthreads = create_tmp_var (integer_type_node);
4921 gimple_call_set_lhs (g, nthreads);
4922 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4923 nthreads = fold_convert (sizetype, nthreads);
4924 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4925 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4926 alloc_align, nthreads, NULL,
4927 false);
4928 size = fold_convert (type, size);
4930 else
4931 size = build_int_cst (type, sz);
4932 expand_omp_build_assign (&gsi2, memv, size, false);
4933 mem = build_fold_addr_expr (memv);
4935 tree t
4936 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4937 9, t1, t2, t2, t3, t1, null_pointer_node,
4938 null_pointer_node, reductions, mem);
4939 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4940 true, GSI_SAME_STMT);
4941 if (fd->have_pointer_condtemp)
4942 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4943 if (fd->have_nonctrl_scantemp)
4945 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4946 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4947 alloc_align, nthreads, &gsi2, false);
4949 if (fd->have_reductemp)
4951 gsi_remove (&gsi2, true);
4952 release_ssa_name (gimple_assign_lhs (g));
4955 switch (gimple_omp_for_kind (fd->for_stmt))
4957 case GF_OMP_FOR_KIND_FOR:
4958 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4959 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4960 break;
4961 case GF_OMP_FOR_KIND_DISTRIBUTE:
4962 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4963 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4964 break;
4965 default:
4966 gcc_unreachable ();
4968 nthreads = build_call_expr (nthreads, 0);
4969 nthreads = fold_convert (itype, nthreads);
4970 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4971 true, GSI_SAME_STMT);
4972 threadid = build_call_expr (threadid, 0);
4973 threadid = fold_convert (itype, threadid);
4974 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4975 true, GSI_SAME_STMT);
4977 n1 = fd->loop.n1;
4978 n2 = fd->loop.n2;
4979 step = fd->loop.step;
4980 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4982 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4983 OMP_CLAUSE__LOOPTEMP_);
4984 gcc_assert (innerc);
4985 n1 = OMP_CLAUSE_DECL (innerc);
4986 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4987 OMP_CLAUSE__LOOPTEMP_);
4988 gcc_assert (innerc);
4989 n2 = OMP_CLAUSE_DECL (innerc);
4991 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4992 true, NULL_TREE, true, GSI_SAME_STMT);
4993 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4994 true, NULL_TREE, true, GSI_SAME_STMT);
4995 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4996 true, NULL_TREE, true, GSI_SAME_STMT);
4998 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4999 t = fold_build2 (PLUS_EXPR, itype, step, t);
5000 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5001 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5002 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5003 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5004 fold_build1 (NEGATE_EXPR, itype, t),
5005 fold_build1 (NEGATE_EXPR, itype, step));
5006 else
5007 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5008 t = fold_convert (itype, t);
5009 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5011 q = create_tmp_reg (itype, "q");
5012 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5013 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5014 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5016 tt = create_tmp_reg (itype, "tt");
5017 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5018 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5019 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5021 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5022 gcond *cond_stmt = gimple_build_cond_empty (t);
5023 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5025 second_bb = split_block (entry_bb, cond_stmt)->dest;
5026 gsi = gsi_last_nondebug_bb (second_bb);
5027 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5029 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5030 GSI_SAME_STMT);
5031 gassign *assign_stmt
5032 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5033 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5035 third_bb = split_block (second_bb, assign_stmt)->dest;
5036 gsi = gsi_last_nondebug_bb (third_bb);
5037 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5039 if (fd->have_nonctrl_scantemp)
5041 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5042 tree controlp = NULL_TREE, controlb = NULL_TREE;
5043 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5044 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5045 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5047 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5048 controlb = OMP_CLAUSE_DECL (c);
5049 else
5050 controlp = OMP_CLAUSE_DECL (c);
5051 if (controlb && controlp)
5052 break;
5054 gcc_assert (controlp && controlb);
5055 tree cnt = create_tmp_var (sizetype);
5056 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5057 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5058 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5059 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5060 alloc_align, cnt, NULL, true);
5061 tree size = create_tmp_var (sizetype);
5062 expand_omp_build_assign (&gsi, size, sz, false);
5063 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5064 size, size_int (16384));
5065 expand_omp_build_assign (&gsi, controlb, cmp);
5066 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5067 NULL_TREE, NULL_TREE);
5068 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5069 fourth_bb = split_block (third_bb, g)->dest;
5070 gsi = gsi_last_nondebug_bb (fourth_bb);
5071 /* FIXME: Once we have allocators, this should use allocator. */
5072 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5073 gimple_call_set_lhs (g, controlp);
5074 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5075 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5076 &gsi, true);
5077 gsi_prev (&gsi);
5078 g = gsi_stmt (gsi);
5079 fifth_bb = split_block (fourth_bb, g)->dest;
5080 gsi = gsi_last_nondebug_bb (fifth_bb);
5082 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5083 gimple_call_set_lhs (g, controlp);
5084 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5085 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5086 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5087 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5088 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5090 tree tmp = create_tmp_var (sizetype);
5091 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5092 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5093 TYPE_SIZE_UNIT (pointee_type));
5094 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5095 g = gimple_build_call (alloca_decl, 2, tmp,
5096 size_int (TYPE_ALIGN (pointee_type)));
5097 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5098 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5101 sixth_bb = split_block (fifth_bb, g)->dest;
5102 gsi = gsi_last_nondebug_bb (sixth_bb);
5105 t = build2 (MULT_EXPR, itype, q, threadid);
5106 t = build2 (PLUS_EXPR, itype, t, tt);
5107 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5109 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5110 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5112 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5113 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5115 /* Remove the GIMPLE_OMP_FOR statement. */
5116 gsi_remove (&gsi, true);
5118 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5119 gsi = gsi_start_bb (seq_start_bb);
5121 tree startvar = fd->loop.v;
5122 tree endvar = NULL_TREE;
5124 if (gimple_omp_for_combined_p (fd->for_stmt))
5126 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5127 ? gimple_omp_parallel_clauses (inner_stmt)
5128 : gimple_omp_for_clauses (inner_stmt);
5129 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5130 gcc_assert (innerc);
5131 startvar = OMP_CLAUSE_DECL (innerc);
5132 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5133 OMP_CLAUSE__LOOPTEMP_);
5134 gcc_assert (innerc);
5135 endvar = OMP_CLAUSE_DECL (innerc);
5136 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5137 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5139 innerc = find_lastprivate_looptemp (fd, innerc);
5140 if (innerc)
5142 /* If needed (distribute parallel for with lastprivate),
5143 propagate down the total number of iterations. */
5144 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5145 fd->loop.n2);
5146 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5147 GSI_CONTINUE_LINKING);
5148 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5149 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5153 t = fold_convert (itype, s0);
5154 t = fold_build2 (MULT_EXPR, itype, t, step);
5155 if (POINTER_TYPE_P (type))
5157 t = fold_build_pointer_plus (n1, t);
5158 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5159 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5160 t = fold_convert (signed_type_for (type), t);
5162 else
5163 t = fold_build2 (PLUS_EXPR, type, t, n1);
5164 t = fold_convert (TREE_TYPE (startvar), t);
5165 t = force_gimple_operand_gsi (&gsi, t,
5166 DECL_P (startvar)
5167 && TREE_ADDRESSABLE (startvar),
5168 NULL_TREE, false, GSI_CONTINUE_LINKING);
5169 assign_stmt = gimple_build_assign (startvar, t);
5170 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5171 if (cond_var)
5173 tree itype = TREE_TYPE (cond_var);
5174 /* For lastprivate(conditional:) itervar, we need some iteration
5175 counter that starts at unsigned non-zero and increases.
5176 Prefer as few IVs as possible, so if we can use startvar
5177 itself, use that, or startvar + constant (those would be
5178 incremented with step), and as last resort use the s0 + 1
5179 incremented by 1. */
5180 if (POINTER_TYPE_P (type)
5181 || TREE_CODE (n1) != INTEGER_CST
5182 || fd->loop.cond_code != LT_EXPR)
5183 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5184 build_int_cst (itype, 1));
5185 else if (tree_int_cst_sgn (n1) == 1)
5186 t = fold_convert (itype, t);
5187 else
5189 tree c = fold_convert (itype, n1);
5190 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5191 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5193 t = force_gimple_operand_gsi (&gsi, t, false,
5194 NULL_TREE, false, GSI_CONTINUE_LINKING);
5195 assign_stmt = gimple_build_assign (cond_var, t);
5196 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5199 t = fold_convert (itype, e0);
5200 t = fold_build2 (MULT_EXPR, itype, t, step);
5201 if (POINTER_TYPE_P (type))
5203 t = fold_build_pointer_plus (n1, t);
5204 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5205 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5206 t = fold_convert (signed_type_for (type), t);
5208 else
5209 t = fold_build2 (PLUS_EXPR, type, t, n1);
5210 t = fold_convert (TREE_TYPE (startvar), t);
5211 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5212 false, GSI_CONTINUE_LINKING);
5213 if (endvar)
5215 assign_stmt = gimple_build_assign (endvar, e);
5216 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5217 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5218 assign_stmt = gimple_build_assign (fd->loop.v, e);
5219 else
5220 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5221 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5223 /* Handle linear clause adjustments. */
5224 tree itercnt = NULL_TREE;
5225 tree *nonrect_bounds = NULL;
5226 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5227 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5228 c; c = OMP_CLAUSE_CHAIN (c))
5229 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5230 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5232 tree d = OMP_CLAUSE_DECL (c);
5233 bool is_ref = omp_is_reference (d);
5234 tree t = d, a, dest;
5235 if (is_ref)
5236 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5237 if (itercnt == NULL_TREE)
5239 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5241 itercnt = fold_build2 (MINUS_EXPR, itype,
5242 fold_convert (itype, n1),
5243 fold_convert (itype, fd->loop.n1));
5244 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5245 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5246 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5247 NULL_TREE, false,
5248 GSI_CONTINUE_LINKING);
5250 else
5251 itercnt = s0;
5253 tree type = TREE_TYPE (t);
5254 if (POINTER_TYPE_P (type))
5255 type = sizetype;
5256 a = fold_build2 (MULT_EXPR, type,
5257 fold_convert (type, itercnt),
5258 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5259 dest = unshare_expr (t);
5260 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5261 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5262 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5263 false, GSI_CONTINUE_LINKING);
5264 expand_omp_build_assign (&gsi, dest, t, true);
5266 if (fd->collapse > 1)
5268 if (fd->non_rect)
5270 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5271 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5273 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5274 startvar);
5277 if (!broken_loop)
5279 /* The code controlling the sequential loop replaces the
5280 GIMPLE_OMP_CONTINUE. */
5281 gsi = gsi_last_nondebug_bb (cont_bb);
5282 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5283 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5284 vmain = gimple_omp_continue_control_use (cont_stmt);
5285 vback = gimple_omp_continue_control_def (cont_stmt);
5287 if (cond_var)
5289 tree itype = TREE_TYPE (cond_var);
5290 tree t2;
5291 if (POINTER_TYPE_P (type)
5292 || TREE_CODE (n1) != INTEGER_CST
5293 || fd->loop.cond_code != LT_EXPR)
5294 t2 = build_int_cst (itype, 1);
5295 else
5296 t2 = fold_convert (itype, step);
5297 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5298 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5299 NULL_TREE, true, GSI_SAME_STMT);
5300 assign_stmt = gimple_build_assign (cond_var, t2);
5301 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5304 if (!gimple_omp_for_combined_p (fd->for_stmt))
5306 if (POINTER_TYPE_P (type))
5307 t = fold_build_pointer_plus (vmain, step);
5308 else
5309 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5310 t = force_gimple_operand_gsi (&gsi, t,
5311 DECL_P (vback)
5312 && TREE_ADDRESSABLE (vback),
5313 NULL_TREE, true, GSI_SAME_STMT);
5314 assign_stmt = gimple_build_assign (vback, t);
5315 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5317 t = build2 (fd->loop.cond_code, boolean_type_node,
5318 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5319 ? t : vback, e);
5320 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5323 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5324 gsi_remove (&gsi, true);
5326 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5327 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5328 cont_bb, body_bb);
5331 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5332 gsi = gsi_last_nondebug_bb (exit_bb);
5333 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5335 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5336 if (fd->have_reductemp
5337 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5338 && !fd->have_nonctrl_scantemp))
5340 tree fn;
5341 if (t)
5342 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5343 else
5344 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5345 gcall *g = gimple_build_call (fn, 0);
5346 if (t)
5348 gimple_call_set_lhs (g, t);
5349 if (fd->have_reductemp)
5350 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5351 NOP_EXPR, t),
5352 GSI_SAME_STMT);
5354 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5356 else
5357 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5359 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5360 && !fd->have_nonctrl_scantemp)
5362 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5363 gcall *g = gimple_build_call (fn, 0);
5364 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5366 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5368 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5369 tree controlp = NULL_TREE, controlb = NULL_TREE;
5370 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5371 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5372 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5374 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5375 controlb = OMP_CLAUSE_DECL (c);
5376 else
5377 controlp = OMP_CLAUSE_DECL (c);
5378 if (controlb && controlp)
5379 break;
5381 gcc_assert (controlp && controlb);
5382 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5383 NULL_TREE, NULL_TREE);
5384 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5385 exit1_bb = split_block (exit_bb, g)->dest;
5386 gsi = gsi_after_labels (exit1_bb);
5387 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5388 controlp);
5389 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5390 exit2_bb = split_block (exit1_bb, g)->dest;
5391 gsi = gsi_after_labels (exit2_bb);
5392 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5393 controlp);
5394 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5395 exit3_bb = split_block (exit2_bb, g)->dest;
5396 gsi = gsi_after_labels (exit3_bb);
5398 gsi_remove (&gsi, true);
5400 /* Connect all the blocks. */
5401 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5402 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5403 ep = find_edge (entry_bb, second_bb);
5404 ep->flags = EDGE_TRUE_VALUE;
5405 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5406 if (fourth_bb)
5408 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5409 ep->probability
5410 = profile_probability::guessed_always ().apply_scale (1, 2);
5411 ep = find_edge (third_bb, fourth_bb);
5412 ep->flags = EDGE_TRUE_VALUE;
5413 ep->probability
5414 = profile_probability::guessed_always ().apply_scale (1, 2);
5415 ep = find_edge (fourth_bb, fifth_bb);
5416 redirect_edge_and_branch (ep, sixth_bb);
5418 else
5419 sixth_bb = third_bb;
5420 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5421 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5422 if (exit1_bb)
5424 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5425 ep->probability
5426 = profile_probability::guessed_always ().apply_scale (1, 2);
5427 ep = find_edge (exit_bb, exit1_bb);
5428 ep->flags = EDGE_TRUE_VALUE;
5429 ep->probability
5430 = profile_probability::guessed_always ().apply_scale (1, 2);
5431 ep = find_edge (exit1_bb, exit2_bb);
5432 redirect_edge_and_branch (ep, exit3_bb);
5435 if (!broken_loop)
5437 ep = find_edge (cont_bb, body_bb);
5438 if (ep == NULL)
5440 ep = BRANCH_EDGE (cont_bb);
5441 gcc_assert (single_succ (ep->dest) == body_bb);
5443 if (gimple_omp_for_combined_p (fd->for_stmt))
5445 remove_edge (ep);
5446 ep = NULL;
5448 else if (fd->collapse > 1)
5450 remove_edge (ep);
5451 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5453 else
5454 ep->flags = EDGE_TRUE_VALUE;
5455 find_edge (cont_bb, fin_bb)->flags
5456 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5459 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5460 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5461 if (fourth_bb)
5463 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5464 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5466 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5468 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5469 recompute_dominator (CDI_DOMINATORS, body_bb));
5470 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5471 recompute_dominator (CDI_DOMINATORS, fin_bb));
5472 if (exit1_bb)
5474 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5475 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5478 class loop *loop = body_bb->loop_father;
5479 if (loop != entry_bb->loop_father)
5481 gcc_assert (broken_loop || loop->header == body_bb);
5482 gcc_assert (broken_loop
5483 || loop->latch == region->cont
5484 || single_pred (loop->latch) == region->cont);
5485 return;
5488 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5490 loop = alloc_loop ();
5491 loop->header = body_bb;
5492 if (collapse_bb == NULL)
5493 loop->latch = cont_bb;
5494 add_loop (loop, body_bb->loop_father);
5498 /* Return phi in E->DEST with ARG on edge E. */
5500 static gphi *
5501 find_phi_with_arg_on_edge (tree arg, edge e)
5503 basic_block bb = e->dest;
5505 for (gphi_iterator gpi = gsi_start_phis (bb);
5506 !gsi_end_p (gpi);
5507 gsi_next (&gpi))
5509 gphi *phi = gpi.phi ();
5510 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5511 return phi;
5514 return NULL;
5517 /* A subroutine of expand_omp_for. Generate code for a parallel
5518 loop with static schedule and a specified chunk size. Given
5519 parameters:
5521 for (V = N1; V cond N2; V += STEP) BODY;
5523 where COND is "<" or ">", we generate pseudocode
5525 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5526 if (cond is <)
5527 adj = STEP - 1;
5528 else
5529 adj = STEP + 1;
5530 if ((__typeof (V)) -1 > 0 && cond is >)
5531 n = -(adj + N2 - N1) / -STEP;
5532 else
5533 n = (adj + N2 - N1) / STEP;
5534 trip = 0;
5535 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5536 here so that V is defined
5537 if the loop is not entered
5539 s0 = (trip * nthreads + threadid) * CHUNK;
5540 e0 = min (s0 + CHUNK, n);
5541 if (s0 < n) goto L1; else goto L4;
5543 V = s0 * STEP + N1;
5544 e = e0 * STEP + N1;
5546 BODY;
5547 V += STEP;
5548 if (V cond e) goto L2; else goto L3;
5550 trip += 1;
5551 goto L0;
5555 static void
5556 expand_omp_for_static_chunk (struct omp_region *region,
5557 struct omp_for_data *fd, gimple *inner_stmt)
5559 tree n, s0, e0, e, t;
5560 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5561 tree type, itype, vmain, vback, vextra;
5562 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5563 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5564 gimple_stmt_iterator gsi, gsip;
5565 edge se;
5566 bool broken_loop = region->cont == NULL;
5567 tree *counts = NULL;
5568 tree n1, n2, step;
5569 tree reductions = NULL_TREE;
5570 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5572 itype = type = TREE_TYPE (fd->loop.v);
5573 if (POINTER_TYPE_P (type))
5574 itype = signed_type_for (type);
5576 entry_bb = region->entry;
5577 se = split_block (entry_bb, last_stmt (entry_bb));
5578 entry_bb = se->src;
5579 iter_part_bb = se->dest;
5580 cont_bb = region->cont;
5581 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5582 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5583 gcc_assert (broken_loop
5584 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5585 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5586 body_bb = single_succ (seq_start_bb);
5587 if (!broken_loop)
5589 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5590 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5591 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5592 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5594 exit_bb = region->exit;
5596 /* Trip and adjustment setup goes in ENTRY_BB. */
5597 gsi = gsi_last_nondebug_bb (entry_bb);
5598 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5599 gsip = gsi;
5600 gsi_prev (&gsip);
5602 if (fd->collapse > 1)
5604 int first_zero_iter = -1, dummy = -1;
5605 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5607 counts = XALLOCAVEC (tree, fd->collapse);
5608 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5609 fin_bb, first_zero_iter,
5610 dummy_bb, dummy, l2_dom_bb);
5611 t = NULL_TREE;
5613 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5614 t = integer_one_node;
5615 else
5616 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5617 fold_convert (type, fd->loop.n1),
5618 fold_convert (type, fd->loop.n2));
5619 if (fd->collapse == 1
5620 && TYPE_UNSIGNED (type)
5621 && (t == NULL_TREE || !integer_onep (t)))
5623 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5624 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5625 true, GSI_SAME_STMT);
5626 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5627 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5628 true, GSI_SAME_STMT);
5629 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5630 NULL_TREE, NULL_TREE);
5631 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5632 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5633 expand_omp_regimplify_p, NULL, NULL)
5634 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5635 expand_omp_regimplify_p, NULL, NULL))
5637 gsi = gsi_for_stmt (cond_stmt);
5638 gimple_regimplify_operands (cond_stmt, &gsi);
5640 se = split_block (entry_bb, cond_stmt);
5641 se->flags = EDGE_TRUE_VALUE;
5642 entry_bb = se->dest;
5643 se->probability = profile_probability::very_likely ();
5644 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5645 se->probability = profile_probability::very_unlikely ();
5646 if (gimple_in_ssa_p (cfun))
5648 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5649 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5650 !gsi_end_p (gpi); gsi_next (&gpi))
5652 gphi *phi = gpi.phi ();
5653 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5654 se, UNKNOWN_LOCATION);
5657 gsi = gsi_last_bb (entry_bb);
5660 if (fd->lastprivate_conditional)
5662 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5663 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5664 if (fd->have_pointer_condtemp)
5665 condtemp = OMP_CLAUSE_DECL (c);
5666 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5667 cond_var = OMP_CLAUSE_DECL (c);
5669 if (fd->have_reductemp || fd->have_pointer_condtemp)
5671 tree t1 = build_int_cst (long_integer_type_node, 0);
5672 tree t2 = build_int_cst (long_integer_type_node, 1);
5673 tree t3 = build_int_cstu (long_integer_type_node,
5674 (HOST_WIDE_INT_1U << 31) + 1);
5675 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5676 gimple_stmt_iterator gsi2 = gsi_none ();
5677 gimple *g = NULL;
5678 tree mem = null_pointer_node, memv = NULL_TREE;
5679 if (fd->have_reductemp)
5681 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5682 reductions = OMP_CLAUSE_DECL (c);
5683 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5684 g = SSA_NAME_DEF_STMT (reductions);
5685 reductions = gimple_assign_rhs1 (g);
5686 OMP_CLAUSE_DECL (c) = reductions;
5687 gsi2 = gsi_for_stmt (g);
5689 else
5691 if (gsi_end_p (gsip))
5692 gsi2 = gsi_after_labels (region->entry);
5693 else
5694 gsi2 = gsip;
5695 reductions = null_pointer_node;
5697 if (fd->have_pointer_condtemp)
5699 tree type = TREE_TYPE (condtemp);
5700 memv = create_tmp_var (type);
5701 TREE_ADDRESSABLE (memv) = 1;
5702 unsigned HOST_WIDE_INT sz
5703 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5704 sz *= fd->lastprivate_conditional;
5705 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5706 false);
5707 mem = build_fold_addr_expr (memv);
5709 tree t
5710 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5711 9, t1, t2, t2, t3, t1, null_pointer_node,
5712 null_pointer_node, reductions, mem);
5713 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5714 true, GSI_SAME_STMT);
5715 if (fd->have_pointer_condtemp)
5716 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5717 if (fd->have_reductemp)
5719 gsi_remove (&gsi2, true);
5720 release_ssa_name (gimple_assign_lhs (g));
5723 switch (gimple_omp_for_kind (fd->for_stmt))
5725 case GF_OMP_FOR_KIND_FOR:
5726 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5727 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5728 break;
5729 case GF_OMP_FOR_KIND_DISTRIBUTE:
5730 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5731 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5732 break;
5733 default:
5734 gcc_unreachable ();
5736 nthreads = build_call_expr (nthreads, 0);
5737 nthreads = fold_convert (itype, nthreads);
5738 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5739 true, GSI_SAME_STMT);
5740 threadid = build_call_expr (threadid, 0);
5741 threadid = fold_convert (itype, threadid);
5742 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5743 true, GSI_SAME_STMT);
5745 n1 = fd->loop.n1;
5746 n2 = fd->loop.n2;
5747 step = fd->loop.step;
5748 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5750 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5751 OMP_CLAUSE__LOOPTEMP_);
5752 gcc_assert (innerc);
5753 n1 = OMP_CLAUSE_DECL (innerc);
5754 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5755 OMP_CLAUSE__LOOPTEMP_);
5756 gcc_assert (innerc);
5757 n2 = OMP_CLAUSE_DECL (innerc);
5759 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5760 true, NULL_TREE, true, GSI_SAME_STMT);
5761 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5762 true, NULL_TREE, true, GSI_SAME_STMT);
5763 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5764 true, NULL_TREE, true, GSI_SAME_STMT);
5765 tree chunk_size = fold_convert (itype, fd->chunk_size);
5766 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5767 chunk_size
5768 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5769 GSI_SAME_STMT);
5771 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5772 t = fold_build2 (PLUS_EXPR, itype, step, t);
5773 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5774 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5775 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5776 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5777 fold_build1 (NEGATE_EXPR, itype, t),
5778 fold_build1 (NEGATE_EXPR, itype, step));
5779 else
5780 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5781 t = fold_convert (itype, t);
5782 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5783 true, GSI_SAME_STMT);
5785 trip_var = create_tmp_reg (itype, ".trip");
5786 if (gimple_in_ssa_p (cfun))
5788 trip_init = make_ssa_name (trip_var);
5789 trip_main = make_ssa_name (trip_var);
5790 trip_back = make_ssa_name (trip_var);
5792 else
5794 trip_init = trip_var;
5795 trip_main = trip_var;
5796 trip_back = trip_var;
5799 gassign *assign_stmt
5800 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5801 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5803 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5804 t = fold_build2 (MULT_EXPR, itype, t, step);
5805 if (POINTER_TYPE_P (type))
5806 t = fold_build_pointer_plus (n1, t);
5807 else
5808 t = fold_build2 (PLUS_EXPR, type, t, n1);
5809 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5810 true, GSI_SAME_STMT);
5812 /* Remove the GIMPLE_OMP_FOR. */
5813 gsi_remove (&gsi, true);
5815 gimple_stmt_iterator gsif = gsi;
5817 /* Iteration space partitioning goes in ITER_PART_BB. */
5818 gsi = gsi_last_bb (iter_part_bb);
5820 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5821 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5822 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5823 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5824 false, GSI_CONTINUE_LINKING);
5826 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5827 t = fold_build2 (MIN_EXPR, itype, t, n);
5828 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5829 false, GSI_CONTINUE_LINKING);
5831 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5832 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5834 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5835 gsi = gsi_start_bb (seq_start_bb);
5837 tree startvar = fd->loop.v;
5838 tree endvar = NULL_TREE;
5840 if (gimple_omp_for_combined_p (fd->for_stmt))
5842 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5843 ? gimple_omp_parallel_clauses (inner_stmt)
5844 : gimple_omp_for_clauses (inner_stmt);
5845 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5846 gcc_assert (innerc);
5847 startvar = OMP_CLAUSE_DECL (innerc);
5848 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5849 OMP_CLAUSE__LOOPTEMP_);
5850 gcc_assert (innerc);
5851 endvar = OMP_CLAUSE_DECL (innerc);
5852 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5853 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5855 innerc = find_lastprivate_looptemp (fd, innerc);
5856 if (innerc)
5858 /* If needed (distribute parallel for with lastprivate),
5859 propagate down the total number of iterations. */
5860 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5861 fd->loop.n2);
5862 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5863 GSI_CONTINUE_LINKING);
5864 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5865 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5870 t = fold_convert (itype, s0);
5871 t = fold_build2 (MULT_EXPR, itype, t, step);
5872 if (POINTER_TYPE_P (type))
5874 t = fold_build_pointer_plus (n1, t);
5875 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5876 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5877 t = fold_convert (signed_type_for (type), t);
5879 else
5880 t = fold_build2 (PLUS_EXPR, type, t, n1);
5881 t = fold_convert (TREE_TYPE (startvar), t);
5882 t = force_gimple_operand_gsi (&gsi, t,
5883 DECL_P (startvar)
5884 && TREE_ADDRESSABLE (startvar),
5885 NULL_TREE, false, GSI_CONTINUE_LINKING);
5886 assign_stmt = gimple_build_assign (startvar, t);
5887 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5888 if (cond_var)
5890 tree itype = TREE_TYPE (cond_var);
5891 /* For lastprivate(conditional:) itervar, we need some iteration
5892 counter that starts at unsigned non-zero and increases.
5893 Prefer as few IVs as possible, so if we can use startvar
5894 itself, use that, or startvar + constant (those would be
5895 incremented with step), and as last resort use the s0 + 1
5896 incremented by 1. */
5897 if (POINTER_TYPE_P (type)
5898 || TREE_CODE (n1) != INTEGER_CST
5899 || fd->loop.cond_code != LT_EXPR)
5900 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5901 build_int_cst (itype, 1));
5902 else if (tree_int_cst_sgn (n1) == 1)
5903 t = fold_convert (itype, t);
5904 else
5906 tree c = fold_convert (itype, n1);
5907 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5908 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5910 t = force_gimple_operand_gsi (&gsi, t, false,
5911 NULL_TREE, false, GSI_CONTINUE_LINKING);
5912 assign_stmt = gimple_build_assign (cond_var, t);
5913 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5916 t = fold_convert (itype, e0);
5917 t = fold_build2 (MULT_EXPR, itype, t, step);
5918 if (POINTER_TYPE_P (type))
5920 t = fold_build_pointer_plus (n1, t);
5921 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5922 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5923 t = fold_convert (signed_type_for (type), t);
5925 else
5926 t = fold_build2 (PLUS_EXPR, type, t, n1);
5927 t = fold_convert (TREE_TYPE (startvar), t);
5928 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5929 false, GSI_CONTINUE_LINKING);
5930 if (endvar)
5932 assign_stmt = gimple_build_assign (endvar, e);
5933 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5934 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5935 assign_stmt = gimple_build_assign (fd->loop.v, e);
5936 else
5937 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5938 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5940 /* Handle linear clause adjustments. */
5941 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5942 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5943 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5944 c; c = OMP_CLAUSE_CHAIN (c))
5945 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5946 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5948 tree d = OMP_CLAUSE_DECL (c);
5949 bool is_ref = omp_is_reference (d);
5950 tree t = d, a, dest;
5951 if (is_ref)
5952 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5953 tree type = TREE_TYPE (t);
5954 if (POINTER_TYPE_P (type))
5955 type = sizetype;
5956 dest = unshare_expr (t);
5957 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5958 expand_omp_build_assign (&gsif, v, t);
5959 if (itercnt == NULL_TREE)
5961 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5963 itercntbias
5964 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5965 fold_convert (itype, fd->loop.n1));
5966 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5967 itercntbias, step);
5968 itercntbias
5969 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5970 NULL_TREE, true,
5971 GSI_SAME_STMT);
5972 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5973 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5974 NULL_TREE, false,
5975 GSI_CONTINUE_LINKING);
5977 else
5978 itercnt = s0;
5980 a = fold_build2 (MULT_EXPR, type,
5981 fold_convert (type, itercnt),
5982 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5983 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5984 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5985 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5986 false, GSI_CONTINUE_LINKING);
5987 expand_omp_build_assign (&gsi, dest, t, true);
5989 if (fd->collapse > 1)
5990 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5992 if (!broken_loop)
5994 /* The code controlling the sequential loop goes in CONT_BB,
5995 replacing the GIMPLE_OMP_CONTINUE. */
5996 gsi = gsi_last_nondebug_bb (cont_bb);
5997 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5998 vmain = gimple_omp_continue_control_use (cont_stmt);
5999 vback = gimple_omp_continue_control_def (cont_stmt);
6001 if (cond_var)
6003 tree itype = TREE_TYPE (cond_var);
6004 tree t2;
6005 if (POINTER_TYPE_P (type)
6006 || TREE_CODE (n1) != INTEGER_CST
6007 || fd->loop.cond_code != LT_EXPR)
6008 t2 = build_int_cst (itype, 1);
6009 else
6010 t2 = fold_convert (itype, step);
6011 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6012 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6013 NULL_TREE, true, GSI_SAME_STMT);
6014 assign_stmt = gimple_build_assign (cond_var, t2);
6015 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6018 if (!gimple_omp_for_combined_p (fd->for_stmt))
6020 if (POINTER_TYPE_P (type))
6021 t = fold_build_pointer_plus (vmain, step);
6022 else
6023 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6024 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6025 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6026 true, GSI_SAME_STMT);
6027 assign_stmt = gimple_build_assign (vback, t);
6028 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6030 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6031 t = build2 (EQ_EXPR, boolean_type_node,
6032 build_int_cst (itype, 0),
6033 build_int_cst (itype, 1));
6034 else
6035 t = build2 (fd->loop.cond_code, boolean_type_node,
6036 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6037 ? t : vback, e);
6038 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6041 /* Remove GIMPLE_OMP_CONTINUE. */
6042 gsi_remove (&gsi, true);
6044 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6045 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6047 /* Trip update code goes into TRIP_UPDATE_BB. */
6048 gsi = gsi_start_bb (trip_update_bb);
6050 t = build_int_cst (itype, 1);
6051 t = build2 (PLUS_EXPR, itype, trip_main, t);
6052 assign_stmt = gimple_build_assign (trip_back, t);
6053 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6056 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6057 gsi = gsi_last_nondebug_bb (exit_bb);
6058 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6060 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6061 if (fd->have_reductemp || fd->have_pointer_condtemp)
6063 tree fn;
6064 if (t)
6065 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6066 else
6067 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6068 gcall *g = gimple_build_call (fn, 0);
6069 if (t)
6071 gimple_call_set_lhs (g, t);
6072 if (fd->have_reductemp)
6073 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6074 NOP_EXPR, t),
6075 GSI_SAME_STMT);
6077 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6079 else
6080 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6082 else if (fd->have_pointer_condtemp)
6084 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6085 gcall *g = gimple_build_call (fn, 0);
6086 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6088 gsi_remove (&gsi, true);
6090 /* Connect the new blocks. */
6091 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6092 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6094 if (!broken_loop)
6096 se = find_edge (cont_bb, body_bb);
6097 if (se == NULL)
6099 se = BRANCH_EDGE (cont_bb);
6100 gcc_assert (single_succ (se->dest) == body_bb);
6102 if (gimple_omp_for_combined_p (fd->for_stmt))
6104 remove_edge (se);
6105 se = NULL;
6107 else if (fd->collapse > 1)
6109 remove_edge (se);
6110 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6112 else
6113 se->flags = EDGE_TRUE_VALUE;
6114 find_edge (cont_bb, trip_update_bb)->flags
6115 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6117 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6118 iter_part_bb);
6121 if (gimple_in_ssa_p (cfun))
6123 gphi_iterator psi;
6124 gphi *phi;
6125 edge re, ene;
6126 edge_var_map *vm;
6127 size_t i;
6129 gcc_assert (fd->collapse == 1 && !broken_loop);
6131 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6132 remove arguments of the phi nodes in fin_bb. We need to create
6133 appropriate phi nodes in iter_part_bb instead. */
6134 se = find_edge (iter_part_bb, fin_bb);
6135 re = single_succ_edge (trip_update_bb);
6136 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6137 ene = single_succ_edge (entry_bb);
6139 psi = gsi_start_phis (fin_bb);
6140 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6141 gsi_next (&psi), ++i)
6143 gphi *nphi;
6144 location_t locus;
6146 phi = psi.phi ();
6147 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6148 redirect_edge_var_map_def (vm), 0))
6149 continue;
6151 t = gimple_phi_result (phi);
6152 gcc_assert (t == redirect_edge_var_map_result (vm));
6154 if (!single_pred_p (fin_bb))
6155 t = copy_ssa_name (t, phi);
6157 nphi = create_phi_node (t, iter_part_bb);
6159 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6160 locus = gimple_phi_arg_location_from_edge (phi, se);
6162 /* A special case -- fd->loop.v is not yet computed in
6163 iter_part_bb, we need to use vextra instead. */
6164 if (t == fd->loop.v)
6165 t = vextra;
6166 add_phi_arg (nphi, t, ene, locus);
6167 locus = redirect_edge_var_map_location (vm);
6168 tree back_arg = redirect_edge_var_map_def (vm);
6169 add_phi_arg (nphi, back_arg, re, locus);
6170 edge ce = find_edge (cont_bb, body_bb);
6171 if (ce == NULL)
6173 ce = BRANCH_EDGE (cont_bb);
6174 gcc_assert (single_succ (ce->dest) == body_bb);
6175 ce = single_succ_edge (ce->dest);
6177 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6178 gcc_assert (inner_loop_phi != NULL);
6179 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6180 find_edge (seq_start_bb, body_bb), locus);
6182 if (!single_pred_p (fin_bb))
6183 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6185 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6186 redirect_edge_var_map_clear (re);
6187 if (single_pred_p (fin_bb))
6188 while (1)
6190 psi = gsi_start_phis (fin_bb);
6191 if (gsi_end_p (psi))
6192 break;
6193 remove_phi_node (&psi, false);
6196 /* Make phi node for trip. */
6197 phi = create_phi_node (trip_main, iter_part_bb);
6198 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6199 UNKNOWN_LOCATION);
6200 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6201 UNKNOWN_LOCATION);
6204 if (!broken_loop)
6205 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6206 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6207 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6208 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6209 recompute_dominator (CDI_DOMINATORS, fin_bb));
6210 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6211 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6212 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6213 recompute_dominator (CDI_DOMINATORS, body_bb));
6215 if (!broken_loop)
6217 class loop *loop = body_bb->loop_father;
6218 class loop *trip_loop = alloc_loop ();
6219 trip_loop->header = iter_part_bb;
6220 trip_loop->latch = trip_update_bb;
6221 add_loop (trip_loop, iter_part_bb->loop_father);
6223 if (loop != entry_bb->loop_father)
6225 gcc_assert (loop->header == body_bb);
6226 gcc_assert (loop->latch == region->cont
6227 || single_pred (loop->latch) == region->cont);
6228 trip_loop->inner = loop;
6229 return;
6232 if (!gimple_omp_for_combined_p (fd->for_stmt))
6234 loop = alloc_loop ();
6235 loop->header = body_bb;
6236 if (collapse_bb == NULL)
6237 loop->latch = cont_bb;
6238 add_loop (loop, trip_loop);
6243 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6244 loop. Given parameters:
6246 for (V = N1; V cond N2; V += STEP) BODY;
6248 where COND is "<" or ">", we generate pseudocode
6250 V = N1;
6251 goto L1;
6253 BODY;
6254 V += STEP;
6256 if (V cond N2) goto L0; else goto L2;
6259 For collapsed loops, emit the outer loops as scalar
6260 and only try to vectorize the innermost loop. */
6262 static void
6263 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6265 tree type, t;
6266 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6267 gimple_stmt_iterator gsi;
6268 gimple *stmt;
6269 gcond *cond_stmt;
6270 bool broken_loop = region->cont == NULL;
6271 edge e, ne;
6272 tree *counts = NULL;
6273 int i;
6274 int safelen_int = INT_MAX;
6275 bool dont_vectorize = false;
6276 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6277 OMP_CLAUSE_SAFELEN);
6278 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6279 OMP_CLAUSE__SIMDUID_);
6280 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6281 OMP_CLAUSE_IF);
6282 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6283 OMP_CLAUSE_SIMDLEN);
6284 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6285 OMP_CLAUSE__CONDTEMP_);
6286 tree n1, n2;
6287 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6289 if (safelen)
6291 poly_uint64 val;
6292 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6293 if (!poly_int_tree_p (safelen, &val))
6294 safelen_int = 0;
6295 else
6296 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6297 if (safelen_int == 1)
6298 safelen_int = 0;
6300 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6301 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6303 safelen_int = 0;
6304 dont_vectorize = true;
6306 type = TREE_TYPE (fd->loop.v);
6307 entry_bb = region->entry;
6308 cont_bb = region->cont;
6309 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6310 gcc_assert (broken_loop
6311 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6312 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6313 if (!broken_loop)
6315 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6316 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6317 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6318 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6320 else
6322 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6323 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6324 l2_bb = single_succ (l1_bb);
6326 exit_bb = region->exit;
6327 l2_dom_bb = NULL;
6329 gsi = gsi_last_nondebug_bb (entry_bb);
6331 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6332 /* Not needed in SSA form right now. */
6333 gcc_assert (!gimple_in_ssa_p (cfun));
6334 if (fd->collapse > 1
6335 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6336 || broken_loop))
6338 int first_zero_iter = -1, dummy = -1;
6339 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6341 counts = XALLOCAVEC (tree, fd->collapse);
6342 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6343 zero_iter_bb, first_zero_iter,
6344 dummy_bb, dummy, l2_dom_bb);
6346 if (l2_dom_bb == NULL)
6347 l2_dom_bb = l1_bb;
6349 n1 = fd->loop.n1;
6350 n2 = fd->loop.n2;
6351 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6353 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6354 OMP_CLAUSE__LOOPTEMP_);
6355 gcc_assert (innerc);
6356 n1 = OMP_CLAUSE_DECL (innerc);
6357 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6358 OMP_CLAUSE__LOOPTEMP_);
6359 gcc_assert (innerc);
6360 n2 = OMP_CLAUSE_DECL (innerc);
6362 tree step = fd->loop.step;
6364 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6365 OMP_CLAUSE__SIMT_);
6366 if (is_simt)
6368 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6369 is_simt = safelen_int > 1;
6371 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6372 if (is_simt)
6374 simt_lane = create_tmp_var (unsigned_type_node);
6375 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6376 gimple_call_set_lhs (g, simt_lane);
6377 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6378 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6379 fold_convert (TREE_TYPE (step), simt_lane));
6380 n1 = fold_convert (type, n1);
6381 if (POINTER_TYPE_P (type))
6382 n1 = fold_build_pointer_plus (n1, offset);
6383 else
6384 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6386 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6387 if (fd->collapse > 1)
6388 simt_maxlane = build_one_cst (unsigned_type_node);
6389 else if (safelen_int < omp_max_simt_vf ())
6390 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6391 tree vf
6392 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6393 unsigned_type_node, 0);
6394 if (simt_maxlane)
6395 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6396 vf = fold_convert (TREE_TYPE (step), vf);
6397 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6400 tree n2var = NULL_TREE;
6401 tree n2v = NULL_TREE;
6402 tree *nonrect_bounds = NULL;
6403 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6404 if (fd->collapse > 1)
6406 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6408 if (fd->non_rect)
6410 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6411 memset (nonrect_bounds, 0,
6412 sizeof (tree) * (fd->last_nonrect + 1));
6414 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6415 gcc_assert (entry_bb == gsi_bb (gsi));
6416 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6417 gsi_prev (&gsi);
6418 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6419 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6420 NULL, n1);
6421 gsi = gsi_for_stmt (fd->for_stmt);
6423 if (broken_loop)
6425 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6427 /* Compute in n2var the limit for the first innermost loop,
6428 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6429 where cnt is how many iterations would the loop have if
6430 all further iterations were assigned to the current task. */
6431 n2var = create_tmp_var (type);
6432 i = fd->collapse - 1;
6433 tree itype = TREE_TYPE (fd->loops[i].v);
6434 if (POINTER_TYPE_P (itype))
6435 itype = signed_type_for (itype);
6436 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6437 ? -1 : 1));
6438 t = fold_build2 (PLUS_EXPR, itype,
6439 fold_convert (itype, fd->loops[i].step), t);
6440 t = fold_build2 (PLUS_EXPR, itype, t,
6441 fold_convert (itype, fd->loops[i].n2));
6442 if (fd->loops[i].m2)
6444 tree t2 = fold_convert (itype,
6445 fd->loops[i - fd->loops[i].outer].v);
6446 tree t3 = fold_convert (itype, fd->loops[i].m2);
6447 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6448 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6450 t = fold_build2 (MINUS_EXPR, itype, t,
6451 fold_convert (itype, fd->loops[i].v));
6452 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6453 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6454 fold_build1 (NEGATE_EXPR, itype, t),
6455 fold_build1 (NEGATE_EXPR, itype,
6456 fold_convert (itype,
6457 fd->loops[i].step)));
6458 else
6459 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6460 fold_convert (itype, fd->loops[i].step));
6461 t = fold_convert (type, t);
6462 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6463 min_arg1 = create_tmp_var (type);
6464 expand_omp_build_assign (&gsi, min_arg1, t2);
6465 min_arg2 = create_tmp_var (type);
6466 expand_omp_build_assign (&gsi, min_arg2, t);
6468 else
6470 if (TREE_CODE (n2) == INTEGER_CST)
6472 /* Indicate for lastprivate handling that at least one iteration
6473 has been performed, without wasting runtime. */
6474 if (integer_nonzerop (n2))
6475 expand_omp_build_assign (&gsi, fd->loop.v,
6476 fold_convert (type, n2));
6477 else
6478 /* Indicate that no iteration has been performed. */
6479 expand_omp_build_assign (&gsi, fd->loop.v,
6480 build_one_cst (type));
6482 else
6484 expand_omp_build_assign (&gsi, fd->loop.v,
6485 build_zero_cst (type));
6486 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6488 for (i = 0; i < fd->collapse; i++)
6490 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6491 if (fd->loops[i].m1)
6493 tree t2
6494 = fold_convert (TREE_TYPE (t),
6495 fd->loops[i - fd->loops[i].outer].v);
6496 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6497 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6498 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6500 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6501 /* For normal non-combined collapsed loops just initialize
6502 the outermost iterator in the entry_bb. */
6503 if (!broken_loop)
6504 break;
6508 else
6509 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6510 tree altv = NULL_TREE, altn2 = NULL_TREE;
6511 if (fd->collapse == 1
6512 && !broken_loop
6513 && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
6515 /* The vectorizer currently punts on loops with non-constant steps
6516 for the main IV (can't compute number of iterations and gives up
6517 because of that). As for OpenMP loops it is always possible to
6518 compute the number of iterations upfront, use an alternate IV
6519 as the loop iterator:
6520 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6521 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6522 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6523 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6524 tree itype = TREE_TYPE (fd->loop.v);
6525 if (POINTER_TYPE_P (itype))
6526 itype = signed_type_for (itype);
6527 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6528 t = fold_build2 (PLUS_EXPR, itype,
6529 fold_convert (itype, fd->loop.step), t);
6530 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6531 t = fold_build2 (MINUS_EXPR, itype, t,
6532 fold_convert (itype, fd->loop.v));
6533 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6534 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6535 fold_build1 (NEGATE_EXPR, itype, t),
6536 fold_build1 (NEGATE_EXPR, itype,
6537 fold_convert (itype, fd->loop.step)));
6538 else
6539 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6540 fold_convert (itype, fd->loop.step));
6541 t = fold_convert (TREE_TYPE (altv), t);
6542 altn2 = create_tmp_var (TREE_TYPE (altv));
6543 expand_omp_build_assign (&gsi, altn2, t);
6544 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6545 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6546 true, GSI_SAME_STMT);
6547 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6548 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6549 build_zero_cst (TREE_TYPE (altv)));
6550 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6552 else if (fd->collapse > 1
6553 && !broken_loop
6554 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6555 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6557 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6558 altn2 = create_tmp_var (TREE_TYPE (altv));
6560 if (cond_var)
6562 if (POINTER_TYPE_P (type)
6563 || TREE_CODE (n1) != INTEGER_CST
6564 || fd->loop.cond_code != LT_EXPR
6565 || tree_int_cst_sgn (n1) != 1)
6566 expand_omp_build_assign (&gsi, cond_var,
6567 build_one_cst (TREE_TYPE (cond_var)));
6568 else
6569 expand_omp_build_assign (&gsi, cond_var,
6570 fold_convert (TREE_TYPE (cond_var), n1));
6573 /* Remove the GIMPLE_OMP_FOR statement. */
6574 gsi_remove (&gsi, true);
6576 if (!broken_loop)
6578 /* Code to control the increment goes in the CONT_BB. */
6579 gsi = gsi_last_nondebug_bb (cont_bb);
6580 stmt = gsi_stmt (gsi);
6581 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6583 if (fd->collapse == 1
6584 || gimple_omp_for_combined_into_p (fd->for_stmt))
6586 if (POINTER_TYPE_P (type))
6587 t = fold_build_pointer_plus (fd->loop.v, step);
6588 else
6589 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6590 expand_omp_build_assign (&gsi, fd->loop.v, t);
6592 else if (TREE_CODE (n2) != INTEGER_CST)
6593 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6594 if (altv)
6596 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6597 build_one_cst (TREE_TYPE (altv)));
6598 expand_omp_build_assign (&gsi, altv, t);
6601 if (fd->collapse > 1)
6603 i = fd->collapse - 1;
6604 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6606 t = fold_convert (sizetype, fd->loops[i].step);
6607 t = fold_build_pointer_plus (fd->loops[i].v, t);
6609 else
6611 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6612 fd->loops[i].step);
6613 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6614 fd->loops[i].v, t);
6616 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6618 if (cond_var)
6620 if (POINTER_TYPE_P (type)
6621 || TREE_CODE (n1) != INTEGER_CST
6622 || fd->loop.cond_code != LT_EXPR
6623 || tree_int_cst_sgn (n1) != 1)
6624 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6625 build_one_cst (TREE_TYPE (cond_var)));
6626 else
6627 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6628 fold_convert (TREE_TYPE (cond_var), step));
6629 expand_omp_build_assign (&gsi, cond_var, t);
6632 /* Remove GIMPLE_OMP_CONTINUE. */
6633 gsi_remove (&gsi, true);
6636 /* Emit the condition in L1_BB. */
6637 gsi = gsi_start_bb (l1_bb);
6639 if (altv)
6640 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6641 else if (fd->collapse > 1
6642 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6643 && !broken_loop)
6645 i = fd->collapse - 1;
6646 tree itype = TREE_TYPE (fd->loops[i].v);
6647 if (fd->loops[i].m2)
6648 t = n2v = create_tmp_var (itype);
6649 else
6650 t = fold_convert (itype, fd->loops[i].n2);
6651 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6652 false, GSI_CONTINUE_LINKING);
6653 tree v = fd->loops[i].v;
6654 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6655 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6656 false, GSI_CONTINUE_LINKING);
6657 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6659 else
6661 if (fd->collapse > 1 && !broken_loop)
6662 t = n2var;
6663 else
6664 t = fold_convert (type, n2);
6665 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6666 false, GSI_CONTINUE_LINKING);
6667 tree v = fd->loop.v;
6668 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6669 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6670 false, GSI_CONTINUE_LINKING);
6671 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6673 cond_stmt = gimple_build_cond_empty (t);
6674 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6675 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6676 NULL, NULL)
6677 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6678 NULL, NULL))
6680 gsi = gsi_for_stmt (cond_stmt);
6681 gimple_regimplify_operands (cond_stmt, &gsi);
6684 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6685 if (is_simt)
6687 gsi = gsi_start_bb (l2_bb);
6688 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6689 if (POINTER_TYPE_P (type))
6690 t = fold_build_pointer_plus (fd->loop.v, step);
6691 else
6692 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6693 expand_omp_build_assign (&gsi, fd->loop.v, t);
6696 /* Remove GIMPLE_OMP_RETURN. */
6697 gsi = gsi_last_nondebug_bb (exit_bb);
6698 gsi_remove (&gsi, true);
6700 /* Connect the new blocks. */
6701 remove_edge (FALLTHRU_EDGE (entry_bb));
6703 if (!broken_loop)
6705 remove_edge (BRANCH_EDGE (entry_bb));
6706 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6708 e = BRANCH_EDGE (l1_bb);
6709 ne = FALLTHRU_EDGE (l1_bb);
6710 e->flags = EDGE_TRUE_VALUE;
6712 else
6714 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6716 ne = single_succ_edge (l1_bb);
6717 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6720 ne->flags = EDGE_FALSE_VALUE;
6721 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6722 ne->probability = e->probability.invert ();
6724 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6725 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6727 if (simt_maxlane)
6729 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6730 NULL_TREE, NULL_TREE);
6731 gsi = gsi_last_bb (entry_bb);
6732 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6733 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6734 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6735 FALLTHRU_EDGE (entry_bb)->probability
6736 = profile_probability::guessed_always ().apply_scale (7, 8);
6737 BRANCH_EDGE (entry_bb)->probability
6738 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6739 l2_dom_bb = entry_bb;
6741 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6743 if (!broken_loop && fd->collapse > 1)
6745 basic_block last_bb = l1_bb;
6746 basic_block init_bb = NULL;
6747 for (i = fd->collapse - 2; i >= 0; i--)
6749 tree nextn2v = NULL_TREE;
6750 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6751 e = EDGE_SUCC (last_bb, 0);
6752 else
6753 e = EDGE_SUCC (last_bb, 1);
6754 basic_block bb = split_edge (e);
6755 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6757 t = fold_convert (sizetype, fd->loops[i].step);
6758 t = fold_build_pointer_plus (fd->loops[i].v, t);
6760 else
6762 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6763 fd->loops[i].step);
6764 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6765 fd->loops[i].v, t);
6767 gsi = gsi_after_labels (bb);
6768 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6770 bb = split_block (bb, last_stmt (bb))->dest;
6771 gsi = gsi_start_bb (bb);
6772 tree itype = TREE_TYPE (fd->loops[i].v);
6773 if (fd->loops[i].m2)
6774 t = nextn2v = create_tmp_var (itype);
6775 else
6776 t = fold_convert (itype, fd->loops[i].n2);
6777 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6778 false, GSI_CONTINUE_LINKING);
6779 tree v = fd->loops[i].v;
6780 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6781 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6782 false, GSI_CONTINUE_LINKING);
6783 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6784 cond_stmt = gimple_build_cond_empty (t);
6785 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6786 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6787 expand_omp_regimplify_p, NULL, NULL)
6788 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6789 expand_omp_regimplify_p, NULL, NULL))
6791 gsi = gsi_for_stmt (cond_stmt);
6792 gimple_regimplify_operands (cond_stmt, &gsi);
6794 ne = single_succ_edge (bb);
6795 ne->flags = EDGE_FALSE_VALUE;
6797 init_bb = create_empty_bb (bb);
6798 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6799 add_bb_to_loop (init_bb, bb->loop_father);
6800 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6801 e->probability
6802 = profile_probability::guessed_always ().apply_scale (7, 8);
6803 ne->probability = e->probability.invert ();
6805 gsi = gsi_after_labels (init_bb);
6806 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6807 fd->loops[i + 1].n1);
6808 if (fd->loops[i + 1].m1)
6810 tree t2 = fold_convert (TREE_TYPE (t),
6811 fd->loops[i + 1
6812 - fd->loops[i + 1].outer].v);
6813 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6814 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6815 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6817 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6818 if (fd->loops[i + 1].m2)
6820 if (i + 2 == fd->collapse && (n2var || altv))
6822 gcc_assert (n2v == NULL_TREE);
6823 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6825 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6826 fd->loops[i + 1].n2);
6827 tree t2 = fold_convert (TREE_TYPE (t),
6828 fd->loops[i + 1
6829 - fd->loops[i + 1].outer].v);
6830 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6831 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6832 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6833 expand_omp_build_assign (&gsi, n2v, t);
6835 if (i + 2 == fd->collapse && n2var)
6837 /* For composite simd, n2 is the first iteration the current
6838 task shouldn't already handle, so we effectively want to use
6839 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6840 as the vectorized loop. Except the vectorizer will not
6841 vectorize that, so instead compute N2VAR as
6842 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6843 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6844 as the loop to vectorize. */
6845 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6846 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6848 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6849 == LT_EXPR ? -1 : 1));
6850 t = fold_build2 (PLUS_EXPR, itype,
6851 fold_convert (itype,
6852 fd->loops[i + 1].step), t);
6853 if (fd->loops[i + 1].m2)
6854 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6855 else
6856 t = fold_build2 (PLUS_EXPR, itype, t,
6857 fold_convert (itype,
6858 fd->loops[i + 1].n2));
6859 t = fold_build2 (MINUS_EXPR, itype, t,
6860 fold_convert (itype, fd->loops[i + 1].v));
6861 tree step = fold_convert (itype, fd->loops[i + 1].step);
6862 if (TYPE_UNSIGNED (itype)
6863 && fd->loops[i + 1].cond_code == GT_EXPR)
6864 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6865 fold_build1 (NEGATE_EXPR, itype, t),
6866 fold_build1 (NEGATE_EXPR, itype, step));
6867 else
6868 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6869 t = fold_convert (type, t);
6871 else
6872 t = counts[i + 1];
6873 expand_omp_build_assign (&gsi, min_arg1, t2);
6874 expand_omp_build_assign (&gsi, min_arg2, t);
6875 e = split_block (init_bb, last_stmt (init_bb));
6876 gsi = gsi_after_labels (e->dest);
6877 init_bb = e->dest;
6878 remove_edge (FALLTHRU_EDGE (entry_bb));
6879 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6880 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6881 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6882 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6883 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6884 expand_omp_build_assign (&gsi, n2var, t);
6886 if (i + 2 == fd->collapse && altv)
6888 /* The vectorizer currently punts on loops with non-constant
6889 steps for the main IV (can't compute number of iterations
6890 and gives up because of that). As for OpenMP loops it is
6891 always possible to compute the number of iterations upfront,
6892 use an alternate IV as the loop iterator. */
6893 expand_omp_build_assign (&gsi, altv,
6894 build_zero_cst (TREE_TYPE (altv)));
6895 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6896 if (POINTER_TYPE_P (itype))
6897 itype = signed_type_for (itype);
6898 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6899 ? -1 : 1));
6900 t = fold_build2 (PLUS_EXPR, itype,
6901 fold_convert (itype, fd->loops[i + 1].step), t);
6902 t = fold_build2 (PLUS_EXPR, itype, t,
6903 fold_convert (itype,
6904 fd->loops[i + 1].m2
6905 ? n2v : fd->loops[i + 1].n2));
6906 t = fold_build2 (MINUS_EXPR, itype, t,
6907 fold_convert (itype, fd->loops[i + 1].v));
6908 tree step = fold_convert (itype, fd->loops[i + 1].step);
6909 if (TYPE_UNSIGNED (itype)
6910 && fd->loops[i + 1].cond_code == GT_EXPR)
6911 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6912 fold_build1 (NEGATE_EXPR, itype, t),
6913 fold_build1 (NEGATE_EXPR, itype, step));
6914 else
6915 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6916 t = fold_convert (TREE_TYPE (altv), t);
6917 expand_omp_build_assign (&gsi, altn2, t);
6918 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6919 fd->loops[i + 1].m2
6920 ? n2v : fd->loops[i + 1].n2);
6921 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6922 true, GSI_SAME_STMT);
6923 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6924 fd->loops[i + 1].v, t2);
6925 gassign *g
6926 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6927 build_zero_cst (TREE_TYPE (altv)));
6928 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6930 n2v = nextn2v;
6932 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6933 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6935 e = find_edge (entry_bb, last_bb);
6936 redirect_edge_succ (e, bb);
6937 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6938 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6941 last_bb = bb;
6944 if (!broken_loop)
6946 class loop *loop = alloc_loop ();
6947 loop->header = l1_bb;
6948 loop->latch = cont_bb;
6949 add_loop (loop, l1_bb->loop_father);
6950 loop->safelen = safelen_int;
6951 if (simduid)
6953 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6954 cfun->has_simduid_loops = true;
6956 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6957 the loop. */
6958 if ((flag_tree_loop_vectorize
6959 || !global_options_set.x_flag_tree_loop_vectorize)
6960 && flag_tree_loop_optimize
6961 && loop->safelen > 1)
6963 loop->force_vectorize = true;
6964 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6966 unsigned HOST_WIDE_INT v
6967 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6968 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6969 loop->simdlen = v;
6971 cfun->has_force_vectorize_loops = true;
6973 else if (dont_vectorize)
6974 loop->dont_vectorize = true;
6976 else if (simduid)
6977 cfun->has_simduid_loops = true;
6980 /* Taskloop construct is represented after gimplification with
6981 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6982 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6983 which should just compute all the needed loop temporaries
6984 for GIMPLE_OMP_TASK. */
6986 static void
6987 expand_omp_taskloop_for_outer (struct omp_region *region,
6988 struct omp_for_data *fd,
6989 gimple *inner_stmt)
6991 tree type, bias = NULL_TREE;
6992 basic_block entry_bb, cont_bb, exit_bb;
6993 gimple_stmt_iterator gsi;
6994 gassign *assign_stmt;
6995 tree *counts = NULL;
6996 int i;
6998 gcc_assert (inner_stmt);
6999 gcc_assert (region->cont);
7000 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7001 && gimple_omp_task_taskloop_p (inner_stmt));
7002 type = TREE_TYPE (fd->loop.v);
7004 /* See if we need to bias by LLONG_MIN. */
7005 if (fd->iter_type == long_long_unsigned_type_node
7006 && TREE_CODE (type) == INTEGER_TYPE
7007 && !TYPE_UNSIGNED (type))
7009 tree n1, n2;
7011 if (fd->loop.cond_code == LT_EXPR)
7013 n1 = fd->loop.n1;
7014 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7016 else
7018 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7019 n2 = fd->loop.n1;
7021 if (TREE_CODE (n1) != INTEGER_CST
7022 || TREE_CODE (n2) != INTEGER_CST
7023 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7024 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7027 entry_bb = region->entry;
7028 cont_bb = region->cont;
7029 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7030 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7031 exit_bb = region->exit;
7033 gsi = gsi_last_nondebug_bb (entry_bb);
7034 gimple *for_stmt = gsi_stmt (gsi);
7035 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7036 if (fd->collapse > 1)
7038 int first_zero_iter = -1, dummy = -1;
7039 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7041 counts = XALLOCAVEC (tree, fd->collapse);
7042 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7043 zero_iter_bb, first_zero_iter,
7044 dummy_bb, dummy, l2_dom_bb);
7046 if (zero_iter_bb)
7048 /* Some counts[i] vars might be uninitialized if
7049 some loop has zero iterations. But the body shouldn't
7050 be executed in that case, so just avoid uninit warnings. */
7051 for (i = first_zero_iter; i < fd->collapse; i++)
7052 if (SSA_VAR_P (counts[i]))
7053 TREE_NO_WARNING (counts[i]) = 1;
7054 gsi_prev (&gsi);
7055 edge e = split_block (entry_bb, gsi_stmt (gsi));
7056 entry_bb = e->dest;
7057 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7058 gsi = gsi_last_bb (entry_bb);
7059 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7060 get_immediate_dominator (CDI_DOMINATORS,
7061 zero_iter_bb));
7065 tree t0, t1;
7066 t1 = fd->loop.n2;
7067 t0 = fd->loop.n1;
7068 if (POINTER_TYPE_P (TREE_TYPE (t0))
7069 && TYPE_PRECISION (TREE_TYPE (t0))
7070 != TYPE_PRECISION (fd->iter_type))
7072 /* Avoid casting pointers to integer of a different size. */
7073 tree itype = signed_type_for (type);
7074 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7075 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7077 else
7079 t1 = fold_convert (fd->iter_type, t1);
7080 t0 = fold_convert (fd->iter_type, t0);
7082 if (bias)
7084 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7085 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7088 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7089 OMP_CLAUSE__LOOPTEMP_);
7090 gcc_assert (innerc);
7091 tree startvar = OMP_CLAUSE_DECL (innerc);
7092 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7093 gcc_assert (innerc);
7094 tree endvar = OMP_CLAUSE_DECL (innerc);
7095 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7097 innerc = find_lastprivate_looptemp (fd, innerc);
7098 if (innerc)
7100 /* If needed (inner taskloop has lastprivate clause), propagate
7101 down the total number of iterations. */
7102 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7103 NULL_TREE, false,
7104 GSI_CONTINUE_LINKING);
7105 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7110 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7111 GSI_CONTINUE_LINKING);
7112 assign_stmt = gimple_build_assign (startvar, t0);
7113 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7115 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7116 GSI_CONTINUE_LINKING);
7117 assign_stmt = gimple_build_assign (endvar, t1);
7118 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7119 if (fd->collapse > 1)
7120 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7122 /* Remove the GIMPLE_OMP_FOR statement. */
7123 gsi = gsi_for_stmt (for_stmt);
7124 gsi_remove (&gsi, true);
7126 gsi = gsi_last_nondebug_bb (cont_bb);
7127 gsi_remove (&gsi, true);
7129 gsi = gsi_last_nondebug_bb (exit_bb);
7130 gsi_remove (&gsi, true);
7132 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7133 remove_edge (BRANCH_EDGE (entry_bb));
7134 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7135 remove_edge (BRANCH_EDGE (cont_bb));
7136 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7137 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7138 recompute_dominator (CDI_DOMINATORS, region->entry));
7141 /* Taskloop construct is represented after gimplification with
7142 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7143 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7144 GOMP_taskloop{,_ull} function arranges for each task to be given just
7145 a single range of iterations. */
7147 static void
7148 expand_omp_taskloop_for_inner (struct omp_region *region,
7149 struct omp_for_data *fd,
7150 gimple *inner_stmt)
7152 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7153 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7154 basic_block fin_bb;
7155 gimple_stmt_iterator gsi;
7156 edge ep;
7157 bool broken_loop = region->cont == NULL;
7158 tree *counts = NULL;
7159 tree n1, n2, step;
7161 itype = type = TREE_TYPE (fd->loop.v);
7162 if (POINTER_TYPE_P (type))
7163 itype = signed_type_for (type);
7165 /* See if we need to bias by LLONG_MIN. */
7166 if (fd->iter_type == long_long_unsigned_type_node
7167 && TREE_CODE (type) == INTEGER_TYPE
7168 && !TYPE_UNSIGNED (type))
7170 tree n1, n2;
7172 if (fd->loop.cond_code == LT_EXPR)
7174 n1 = fd->loop.n1;
7175 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7177 else
7179 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7180 n2 = fd->loop.n1;
7182 if (TREE_CODE (n1) != INTEGER_CST
7183 || TREE_CODE (n2) != INTEGER_CST
7184 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7185 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7188 entry_bb = region->entry;
7189 cont_bb = region->cont;
7190 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7191 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7192 gcc_assert (broken_loop
7193 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7194 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7195 if (!broken_loop)
7197 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7198 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7200 exit_bb = region->exit;
7202 /* Iteration space partitioning goes in ENTRY_BB. */
7203 gsi = gsi_last_nondebug_bb (entry_bb);
7204 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7206 if (fd->collapse > 1)
7208 int first_zero_iter = -1, dummy = -1;
7209 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7211 counts = XALLOCAVEC (tree, fd->collapse);
7212 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7213 fin_bb, first_zero_iter,
7214 dummy_bb, dummy, l2_dom_bb);
7215 t = NULL_TREE;
7217 else
7218 t = integer_one_node;
7220 step = fd->loop.step;
7221 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7222 OMP_CLAUSE__LOOPTEMP_);
7223 gcc_assert (innerc);
7224 n1 = OMP_CLAUSE_DECL (innerc);
7225 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7226 gcc_assert (innerc);
7227 n2 = OMP_CLAUSE_DECL (innerc);
7228 if (bias)
7230 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7231 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7233 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7234 true, NULL_TREE, true, GSI_SAME_STMT);
7235 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7236 true, NULL_TREE, true, GSI_SAME_STMT);
7237 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7238 true, NULL_TREE, true, GSI_SAME_STMT);
7240 tree startvar = fd->loop.v;
7241 tree endvar = NULL_TREE;
7243 if (gimple_omp_for_combined_p (fd->for_stmt))
7245 tree clauses = gimple_omp_for_clauses (inner_stmt);
7246 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7247 gcc_assert (innerc);
7248 startvar = OMP_CLAUSE_DECL (innerc);
7249 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7250 OMP_CLAUSE__LOOPTEMP_);
7251 gcc_assert (innerc);
7252 endvar = OMP_CLAUSE_DECL (innerc);
7254 t = fold_convert (TREE_TYPE (startvar), n1);
7255 t = force_gimple_operand_gsi (&gsi, t,
7256 DECL_P (startvar)
7257 && TREE_ADDRESSABLE (startvar),
7258 NULL_TREE, false, GSI_CONTINUE_LINKING);
7259 gimple *assign_stmt = gimple_build_assign (startvar, t);
7260 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7262 t = fold_convert (TREE_TYPE (startvar), n2);
7263 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7264 false, GSI_CONTINUE_LINKING);
7265 if (endvar)
7267 assign_stmt = gimple_build_assign (endvar, e);
7268 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7269 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7270 assign_stmt = gimple_build_assign (fd->loop.v, e);
7271 else
7272 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7273 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7276 tree *nonrect_bounds = NULL;
7277 if (fd->collapse > 1)
7279 if (fd->non_rect)
7281 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7282 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7284 gcc_assert (gsi_bb (gsi) == entry_bb);
7285 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7286 startvar);
7287 entry_bb = gsi_bb (gsi);
7290 if (!broken_loop)
7292 /* The code controlling the sequential loop replaces the
7293 GIMPLE_OMP_CONTINUE. */
7294 gsi = gsi_last_nondebug_bb (cont_bb);
7295 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7296 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7297 vmain = gimple_omp_continue_control_use (cont_stmt);
7298 vback = gimple_omp_continue_control_def (cont_stmt);
7300 if (!gimple_omp_for_combined_p (fd->for_stmt))
7302 if (POINTER_TYPE_P (type))
7303 t = fold_build_pointer_plus (vmain, step);
7304 else
7305 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7306 t = force_gimple_operand_gsi (&gsi, t,
7307 DECL_P (vback)
7308 && TREE_ADDRESSABLE (vback),
7309 NULL_TREE, true, GSI_SAME_STMT);
7310 assign_stmt = gimple_build_assign (vback, t);
7311 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7313 t = build2 (fd->loop.cond_code, boolean_type_node,
7314 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7315 ? t : vback, e);
7316 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7319 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7320 gsi_remove (&gsi, true);
7322 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7323 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7324 cont_bb, body_bb);
7327 /* Remove the GIMPLE_OMP_FOR statement. */
7328 gsi = gsi_for_stmt (fd->for_stmt);
7329 gsi_remove (&gsi, true);
7331 /* Remove the GIMPLE_OMP_RETURN statement. */
7332 gsi = gsi_last_nondebug_bb (exit_bb);
7333 gsi_remove (&gsi, true);
7335 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7336 if (!broken_loop)
7337 remove_edge (BRANCH_EDGE (entry_bb));
7338 else
7340 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7341 region->outer->cont = NULL;
7344 /* Connect all the blocks. */
7345 if (!broken_loop)
7347 ep = find_edge (cont_bb, body_bb);
7348 if (gimple_omp_for_combined_p (fd->for_stmt))
7350 remove_edge (ep);
7351 ep = NULL;
7353 else if (fd->collapse > 1)
7355 remove_edge (ep);
7356 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7358 else
7359 ep->flags = EDGE_TRUE_VALUE;
7360 find_edge (cont_bb, fin_bb)->flags
7361 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7364 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7365 recompute_dominator (CDI_DOMINATORS, body_bb));
7366 if (!broken_loop)
7367 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7368 recompute_dominator (CDI_DOMINATORS, fin_bb));
7370 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7372 class loop *loop = alloc_loop ();
7373 loop->header = body_bb;
7374 if (collapse_bb == NULL)
7375 loop->latch = cont_bb;
7376 add_loop (loop, body_bb->loop_father);
7380 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7381 partitioned loop. The lowering here is abstracted, in that the
7382 loop parameters are passed through internal functions, which are
7383 further lowered by oacc_device_lower, once we get to the target
7384 compiler. The loop is of the form:
7386 for (V = B; V LTGT E; V += S) {BODY}
7388 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7389 (constant 0 for no chunking) and we will have a GWV partitioning
7390 mask, specifying dimensions over which the loop is to be
7391 partitioned (see note below). We generate code that looks like
7392 (this ignores tiling):
7394 <entry_bb> [incoming FALL->body, BRANCH->exit]
7395 typedef signedintify (typeof (V)) T; // underlying signed integral type
7396 T range = E - B;
7397 T chunk_no = 0;
7398 T DIR = LTGT == '<' ? +1 : -1;
7399 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7400 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7402 <head_bb> [created by splitting end of entry_bb]
7403 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7404 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7405 if (!(offset LTGT bound)) goto bottom_bb;
7407 <body_bb> [incoming]
7408 V = B + offset;
7409 {BODY}
7411 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7412 offset += step;
7413 if (offset LTGT bound) goto body_bb; [*]
7415 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7416 chunk_no++;
7417 if (chunk < chunk_max) goto head_bb;
7419 <exit_bb> [incoming]
7420 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7422 [*] Needed if V live at end of loop. */
7424 static void
7425 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7427 bool is_oacc_kernels_parallelized
7428 = (lookup_attribute ("oacc kernels parallelized",
7429 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7431 bool is_oacc_kernels
7432 = (lookup_attribute ("oacc kernels",
7433 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7434 if (is_oacc_kernels_parallelized)
7435 gcc_checking_assert (is_oacc_kernels);
7437 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7438 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7439 for SSA specifics, and some are for 'parloops' OpenACC
7440 'kernels'-parallelized specifics. */
7442 tree v = fd->loop.v;
7443 enum tree_code cond_code = fd->loop.cond_code;
7444 enum tree_code plus_code = PLUS_EXPR;
7446 tree chunk_size = integer_minus_one_node;
7447 tree gwv = integer_zero_node;
7448 tree iter_type = TREE_TYPE (v);
7449 tree diff_type = iter_type;
7450 tree plus_type = iter_type;
7451 struct oacc_collapse *counts = NULL;
7453 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7454 == GF_OMP_FOR_KIND_OACC_LOOP);
7455 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7456 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7458 if (POINTER_TYPE_P (iter_type))
7460 plus_code = POINTER_PLUS_EXPR;
7461 plus_type = sizetype;
7463 for (int ix = fd->collapse; ix--;)
7465 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7466 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7467 diff_type = diff_type2;
7469 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7470 diff_type = signed_type_for (diff_type);
7471 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7472 diff_type = integer_type_node;
7474 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7475 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7476 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7477 basic_block bottom_bb = NULL;
7479 /* entry_bb has two successors; the branch edge is to the exit
7480 block, fallthrough edge to body. */
7481 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7482 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7484 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7485 body_bb, or to a block whose only successor is the body_bb. Its
7486 fallthrough successor is the final block (same as the branch
7487 successor of the entry_bb). */
7488 if (cont_bb)
7490 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7491 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7493 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7494 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7496 else
7497 gcc_assert (!gimple_in_ssa_p (cfun));
7499 /* The exit block only has entry_bb and cont_bb as predecessors. */
7500 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7502 tree chunk_no;
7503 tree chunk_max = NULL_TREE;
7504 tree bound, offset;
7505 tree step = create_tmp_var (diff_type, ".step");
7506 bool up = cond_code == LT_EXPR;
7507 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7508 bool chunking = !gimple_in_ssa_p (cfun);
7509 bool negating;
7511 /* Tiling vars. */
7512 tree tile_size = NULL_TREE;
7513 tree element_s = NULL_TREE;
7514 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7515 basic_block elem_body_bb = NULL;
7516 basic_block elem_cont_bb = NULL;
7518 /* SSA instances. */
7519 tree offset_incr = NULL_TREE;
7520 tree offset_init = NULL_TREE;
7522 gimple_stmt_iterator gsi;
7523 gassign *ass;
7524 gcall *call;
7525 gimple *stmt;
7526 tree expr;
7527 location_t loc;
7528 edge split, be, fte;
7530 /* Split the end of entry_bb to create head_bb. */
7531 split = split_block (entry_bb, last_stmt (entry_bb));
7532 basic_block head_bb = split->dest;
7533 entry_bb = split->src;
7535 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7536 gsi = gsi_last_nondebug_bb (entry_bb);
7537 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7538 loc = gimple_location (for_stmt);
7540 if (gimple_in_ssa_p (cfun))
7542 offset_init = gimple_omp_for_index (for_stmt, 0);
7543 gcc_assert (integer_zerop (fd->loop.n1));
7544 /* The SSA parallelizer does gang parallelism. */
7545 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7548 if (fd->collapse > 1 || fd->tiling)
7550 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7551 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7552 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7553 TREE_TYPE (fd->loop.n2), loc);
7555 if (SSA_VAR_P (fd->loop.n2))
7557 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7558 true, GSI_SAME_STMT);
7559 ass = gimple_build_assign (fd->loop.n2, total);
7560 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7564 tree b = fd->loop.n1;
7565 tree e = fd->loop.n2;
7566 tree s = fd->loop.step;
7568 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7569 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7571 /* Convert the step, avoiding possible unsigned->signed overflow. */
7572 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7573 if (negating)
7574 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7575 s = fold_convert (diff_type, s);
7576 if (negating)
7577 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7578 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7580 if (!chunking)
7581 chunk_size = integer_zero_node;
7582 expr = fold_convert (diff_type, chunk_size);
7583 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7584 NULL_TREE, true, GSI_SAME_STMT);
7586 if (fd->tiling)
7588 /* Determine the tile size and element step,
7589 modify the outer loop step size. */
7590 tile_size = create_tmp_var (diff_type, ".tile_size");
7591 expr = build_int_cst (diff_type, 1);
7592 for (int ix = 0; ix < fd->collapse; ix++)
7593 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7594 expr = force_gimple_operand_gsi (&gsi, expr, true,
7595 NULL_TREE, true, GSI_SAME_STMT);
7596 ass = gimple_build_assign (tile_size, expr);
7597 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7599 element_s = create_tmp_var (diff_type, ".element_s");
7600 ass = gimple_build_assign (element_s, s);
7601 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7603 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7604 s = force_gimple_operand_gsi (&gsi, expr, true,
7605 NULL_TREE, true, GSI_SAME_STMT);
7608 /* Determine the range, avoiding possible unsigned->signed overflow. */
7609 negating = !up && TYPE_UNSIGNED (iter_type);
7610 expr = fold_build2 (MINUS_EXPR, plus_type,
7611 fold_convert (plus_type, negating ? b : e),
7612 fold_convert (plus_type, negating ? e : b));
7613 expr = fold_convert (diff_type, expr);
7614 if (negating)
7615 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7616 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7617 NULL_TREE, true, GSI_SAME_STMT);
7619 chunk_no = build_int_cst (diff_type, 0);
7620 if (chunking)
7622 gcc_assert (!gimple_in_ssa_p (cfun));
7624 expr = chunk_no;
7625 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7626 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7628 ass = gimple_build_assign (chunk_no, expr);
7629 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7631 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7632 build_int_cst (integer_type_node,
7633 IFN_GOACC_LOOP_CHUNKS),
7634 dir, range, s, chunk_size, gwv);
7635 gimple_call_set_lhs (call, chunk_max);
7636 gimple_set_location (call, loc);
7637 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7639 else
7640 chunk_size = chunk_no;
7642 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7643 build_int_cst (integer_type_node,
7644 IFN_GOACC_LOOP_STEP),
7645 dir, range, s, chunk_size, gwv);
7646 gimple_call_set_lhs (call, step);
7647 gimple_set_location (call, loc);
7648 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7650 /* Remove the GIMPLE_OMP_FOR. */
7651 gsi_remove (&gsi, true);
7653 /* Fixup edges from head_bb. */
7654 be = BRANCH_EDGE (head_bb);
7655 fte = FALLTHRU_EDGE (head_bb);
7656 be->flags |= EDGE_FALSE_VALUE;
7657 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7659 basic_block body_bb = fte->dest;
7661 if (gimple_in_ssa_p (cfun))
7663 gsi = gsi_last_nondebug_bb (cont_bb);
7664 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7666 offset = gimple_omp_continue_control_use (cont_stmt);
7667 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7669 else
7671 offset = create_tmp_var (diff_type, ".offset");
7672 offset_init = offset_incr = offset;
7674 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7676 /* Loop offset & bound go into head_bb. */
7677 gsi = gsi_start_bb (head_bb);
7679 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7680 build_int_cst (integer_type_node,
7681 IFN_GOACC_LOOP_OFFSET),
7682 dir, range, s,
7683 chunk_size, gwv, chunk_no);
7684 gimple_call_set_lhs (call, offset_init);
7685 gimple_set_location (call, loc);
7686 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7688 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7689 build_int_cst (integer_type_node,
7690 IFN_GOACC_LOOP_BOUND),
7691 dir, range, s,
7692 chunk_size, gwv, offset_init);
7693 gimple_call_set_lhs (call, bound);
7694 gimple_set_location (call, loc);
7695 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7697 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7698 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7699 GSI_CONTINUE_LINKING);
7701 /* V assignment goes into body_bb. */
7702 if (!gimple_in_ssa_p (cfun))
7704 gsi = gsi_start_bb (body_bb);
7706 expr = build2 (plus_code, iter_type, b,
7707 fold_convert (plus_type, offset));
7708 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7709 true, GSI_SAME_STMT);
7710 ass = gimple_build_assign (v, expr);
7711 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7713 if (fd->collapse > 1 || fd->tiling)
7714 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7716 if (fd->tiling)
7718 /* Determine the range of the element loop -- usually simply
7719 the tile_size, but could be smaller if the final
7720 iteration of the outer loop is a partial tile. */
7721 tree e_range = create_tmp_var (diff_type, ".e_range");
7723 expr = build2 (MIN_EXPR, diff_type,
7724 build2 (MINUS_EXPR, diff_type, bound, offset),
7725 build2 (MULT_EXPR, diff_type, tile_size,
7726 element_s));
7727 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7728 true, GSI_SAME_STMT);
7729 ass = gimple_build_assign (e_range, expr);
7730 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7732 /* Determine bound, offset & step of inner loop. */
7733 e_bound = create_tmp_var (diff_type, ".e_bound");
7734 e_offset = create_tmp_var (diff_type, ".e_offset");
7735 e_step = create_tmp_var (diff_type, ".e_step");
7737 /* Mark these as element loops. */
7738 tree t, e_gwv = integer_minus_one_node;
7739 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7741 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7742 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7743 element_s, chunk, e_gwv, chunk);
7744 gimple_call_set_lhs (call, e_offset);
7745 gimple_set_location (call, loc);
7746 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7748 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7749 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7750 element_s, chunk, e_gwv, e_offset);
7751 gimple_call_set_lhs (call, e_bound);
7752 gimple_set_location (call, loc);
7753 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7755 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7756 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7757 element_s, chunk, e_gwv);
7758 gimple_call_set_lhs (call, e_step);
7759 gimple_set_location (call, loc);
7760 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7762 /* Add test and split block. */
7763 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7764 stmt = gimple_build_cond_empty (expr);
7765 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7766 split = split_block (body_bb, stmt);
7767 elem_body_bb = split->dest;
7768 if (cont_bb == body_bb)
7769 cont_bb = elem_body_bb;
7770 body_bb = split->src;
7772 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7774 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7775 if (cont_bb == NULL)
7777 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7778 e->probability = profile_probability::even ();
7779 split->probability = profile_probability::even ();
7782 /* Initialize the user's loop vars. */
7783 gsi = gsi_start_bb (elem_body_bb);
7784 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7785 diff_type);
7789 /* Loop increment goes into cont_bb. If this is not a loop, we
7790 will have spawned threads as if it was, and each one will
7791 execute one iteration. The specification is not explicit about
7792 whether such constructs are ill-formed or not, and they can
7793 occur, especially when noreturn routines are involved. */
7794 if (cont_bb)
7796 gsi = gsi_last_nondebug_bb (cont_bb);
7797 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7798 loc = gimple_location (cont_stmt);
7800 if (fd->tiling)
7802 /* Insert element loop increment and test. */
7803 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7804 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7805 true, GSI_SAME_STMT);
7806 ass = gimple_build_assign (e_offset, expr);
7807 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7808 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7810 stmt = gimple_build_cond_empty (expr);
7811 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7812 split = split_block (cont_bb, stmt);
7813 elem_cont_bb = split->src;
7814 cont_bb = split->dest;
7816 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7817 split->probability = profile_probability::unlikely ().guessed ();
7818 edge latch_edge
7819 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7820 latch_edge->probability = profile_probability::likely ().guessed ();
7822 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7823 skip_edge->probability = profile_probability::unlikely ().guessed ();
7824 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7825 loop_entry_edge->probability
7826 = profile_probability::likely ().guessed ();
7828 gsi = gsi_for_stmt (cont_stmt);
7831 /* Increment offset. */
7832 if (gimple_in_ssa_p (cfun))
7833 expr = build2 (plus_code, iter_type, offset,
7834 fold_convert (plus_type, step));
7835 else
7836 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7837 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7838 true, GSI_SAME_STMT);
7839 ass = gimple_build_assign (offset_incr, expr);
7840 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7841 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7842 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7844 /* Remove the GIMPLE_OMP_CONTINUE. */
7845 gsi_remove (&gsi, true);
7847 /* Fixup edges from cont_bb. */
7848 be = BRANCH_EDGE (cont_bb);
7849 fte = FALLTHRU_EDGE (cont_bb);
7850 be->flags |= EDGE_TRUE_VALUE;
7851 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7853 if (chunking)
7855 /* Split the beginning of exit_bb to make bottom_bb. We
7856 need to insert a nop at the start, because splitting is
7857 after a stmt, not before. */
7858 gsi = gsi_start_bb (exit_bb);
7859 stmt = gimple_build_nop ();
7860 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7861 split = split_block (exit_bb, stmt);
7862 bottom_bb = split->src;
7863 exit_bb = split->dest;
7864 gsi = gsi_last_bb (bottom_bb);
7866 /* Chunk increment and test goes into bottom_bb. */
7867 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7868 build_int_cst (diff_type, 1));
7869 ass = gimple_build_assign (chunk_no, expr);
7870 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7872 /* Chunk test at end of bottom_bb. */
7873 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7874 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7875 GSI_CONTINUE_LINKING);
7877 /* Fixup edges from bottom_bb. */
7878 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7879 split->probability = profile_probability::unlikely ().guessed ();
7880 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7881 latch_edge->probability = profile_probability::likely ().guessed ();
7885 gsi = gsi_last_nondebug_bb (exit_bb);
7886 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7887 loc = gimple_location (gsi_stmt (gsi));
7889 if (!gimple_in_ssa_p (cfun))
7891 /* Insert the final value of V, in case it is live. This is the
7892 value for the only thread that survives past the join. */
7893 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7894 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7895 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7896 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7897 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7898 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7899 true, GSI_SAME_STMT);
7900 ass = gimple_build_assign (v, expr);
7901 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7904 /* Remove the OMP_RETURN. */
7905 gsi_remove (&gsi, true);
7907 if (cont_bb)
7909 /* We now have one, two or three nested loops. Update the loop
7910 structures. */
7911 class loop *parent = entry_bb->loop_father;
7912 class loop *body = body_bb->loop_father;
7914 if (chunking)
7916 class loop *chunk_loop = alloc_loop ();
7917 chunk_loop->header = head_bb;
7918 chunk_loop->latch = bottom_bb;
7919 add_loop (chunk_loop, parent);
7920 parent = chunk_loop;
7922 else if (parent != body)
7924 gcc_assert (body->header == body_bb);
7925 gcc_assert (body->latch == cont_bb
7926 || single_pred (body->latch) == cont_bb);
7927 parent = NULL;
7930 if (parent)
7932 class loop *body_loop = alloc_loop ();
7933 body_loop->header = body_bb;
7934 body_loop->latch = cont_bb;
7935 add_loop (body_loop, parent);
7937 if (fd->tiling)
7939 /* Insert tiling's element loop. */
7940 class loop *inner_loop = alloc_loop ();
7941 inner_loop->header = elem_body_bb;
7942 inner_loop->latch = elem_cont_bb;
7943 add_loop (inner_loop, body_loop);
7949 /* Expand the OMP loop defined by REGION. */
7951 static void
7952 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7954 struct omp_for_data fd;
7955 struct omp_for_data_loop *loops;
7957 loops = XALLOCAVEC (struct omp_for_data_loop,
7958 gimple_omp_for_collapse (last_stmt (region->entry)));
7959 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7960 &fd, loops);
7961 region->sched_kind = fd.sched_kind;
7962 region->sched_modifiers = fd.sched_modifiers;
7963 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7964 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7966 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7967 if ((loops[i].m1 || loops[i].m2)
7968 && (loops[i].m1 == NULL_TREE
7969 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7970 && (loops[i].m2 == NULL_TREE
7971 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7972 && TREE_CODE (loops[i].step) == INTEGER_CST
7973 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7975 tree t;
7976 tree itype = TREE_TYPE (loops[i].v);
7977 if (loops[i].m1 && loops[i].m2)
7978 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7979 else if (loops[i].m1)
7980 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7981 else
7982 t = loops[i].m2;
7983 t = fold_build2 (MULT_EXPR, itype, t,
7984 fold_convert (itype,
7985 loops[i - loops[i].outer].step));
7986 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7987 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7988 fold_build1 (NEGATE_EXPR, itype, t),
7989 fold_build1 (NEGATE_EXPR, itype,
7990 fold_convert (itype,
7991 loops[i].step)));
7992 else
7993 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7994 fold_convert (itype, loops[i].step));
7995 if (integer_nonzerop (t))
7996 error_at (gimple_location (fd.for_stmt),
7997 "invalid OpenMP non-rectangular loop step; "
7998 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7999 "step %qE",
8000 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8001 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8002 loops[i - loops[i].outer].step, i + 1,
8003 loops[i].step);
8007 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8008 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8009 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8010 if (region->cont)
8012 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8013 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8014 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8016 else
8017 /* If there isn't a continue then this is a degerate case where
8018 the introduction of abnormal edges during lowering will prevent
8019 original loops from being detected. Fix that up. */
8020 loops_state_set (LOOPS_NEED_FIXUP);
8022 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8023 expand_omp_simd (region, &fd);
8024 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8026 gcc_assert (!inner_stmt && !fd.non_rect);
8027 expand_oacc_for (region, &fd);
8029 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8031 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8032 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8033 else
8034 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8036 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8037 && !fd.have_ordered)
8039 if (fd.chunk_size == NULL)
8040 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8041 else
8042 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8044 else
8046 int fn_index, start_ix, next_ix;
8047 unsigned HOST_WIDE_INT sched = 0;
8048 tree sched_arg = NULL_TREE;
8050 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8051 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8052 if (fd.chunk_size == NULL
8053 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8054 fd.chunk_size = integer_zero_node;
8055 switch (fd.sched_kind)
8057 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8058 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8059 && fd.lastprivate_conditional == 0)
8061 gcc_assert (!fd.have_ordered);
8062 fn_index = 6;
8063 sched = 4;
8065 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8066 && !fd.have_ordered
8067 && fd.lastprivate_conditional == 0)
8068 fn_index = 7;
8069 else
8071 fn_index = 3;
8072 sched = (HOST_WIDE_INT_1U << 31);
8074 break;
8075 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8076 case OMP_CLAUSE_SCHEDULE_GUIDED:
8077 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8078 && !fd.have_ordered
8079 && fd.lastprivate_conditional == 0)
8081 fn_index = 3 + fd.sched_kind;
8082 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8083 break;
8085 fn_index = fd.sched_kind;
8086 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8087 sched += (HOST_WIDE_INT_1U << 31);
8088 break;
8089 case OMP_CLAUSE_SCHEDULE_STATIC:
8090 gcc_assert (fd.have_ordered);
8091 fn_index = 0;
8092 sched = (HOST_WIDE_INT_1U << 31) + 1;
8093 break;
8094 default:
8095 gcc_unreachable ();
8097 if (!fd.ordered)
8098 fn_index += fd.have_ordered * 8;
8099 if (fd.ordered)
8100 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8101 else
8102 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8103 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8104 if (fd.have_reductemp || fd.have_pointer_condtemp)
8106 if (fd.ordered)
8107 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8108 else if (fd.have_ordered)
8109 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8110 else
8111 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8112 sched_arg = build_int_cstu (long_integer_type_node, sched);
8113 if (!fd.chunk_size)
8114 fd.chunk_size = integer_zero_node;
8116 if (fd.iter_type == long_long_unsigned_type_node)
8118 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8119 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8120 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8121 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8123 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8124 (enum built_in_function) next_ix, sched_arg,
8125 inner_stmt);
8128 if (gimple_in_ssa_p (cfun))
8129 update_ssa (TODO_update_ssa_only_virtuals);
8132 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8134 v = GOMP_sections_start (n);
8136 switch (v)
8138 case 0:
8139 goto L2;
8140 case 1:
8141 section 1;
8142 goto L1;
8143 case 2:
8145 case n:
8147 default:
8148 abort ();
8151 v = GOMP_sections_next ();
8152 goto L0;
8154 reduction;
8156 If this is a combined parallel sections, replace the call to
8157 GOMP_sections_start with call to GOMP_sections_next. */
8159 static void
8160 expand_omp_sections (struct omp_region *region)
8162 tree t, u, vin = NULL, vmain, vnext, l2;
8163 unsigned len;
8164 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8165 gimple_stmt_iterator si, switch_si;
8166 gomp_sections *sections_stmt;
8167 gimple *stmt;
8168 gomp_continue *cont;
8169 edge_iterator ei;
8170 edge e;
8171 struct omp_region *inner;
8172 unsigned i, casei;
8173 bool exit_reachable = region->cont != NULL;
8175 gcc_assert (region->exit != NULL);
8176 entry_bb = region->entry;
8177 l0_bb = single_succ (entry_bb);
8178 l1_bb = region->cont;
8179 l2_bb = region->exit;
8180 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8181 l2 = gimple_block_label (l2_bb);
8182 else
8184 /* This can happen if there are reductions. */
8185 len = EDGE_COUNT (l0_bb->succs);
8186 gcc_assert (len > 0);
8187 e = EDGE_SUCC (l0_bb, len - 1);
8188 si = gsi_last_nondebug_bb (e->dest);
8189 l2 = NULL_TREE;
8190 if (gsi_end_p (si)
8191 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8192 l2 = gimple_block_label (e->dest);
8193 else
8194 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8196 si = gsi_last_nondebug_bb (e->dest);
8197 if (gsi_end_p (si)
8198 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8200 l2 = gimple_block_label (e->dest);
8201 break;
8205 if (exit_reachable)
8206 default_bb = create_empty_bb (l1_bb->prev_bb);
8207 else
8208 default_bb = create_empty_bb (l0_bb);
8210 /* We will build a switch() with enough cases for all the
8211 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8212 and a default case to abort if something goes wrong. */
8213 len = EDGE_COUNT (l0_bb->succs);
8215 /* Use vec::quick_push on label_vec throughout, since we know the size
8216 in advance. */
8217 auto_vec<tree> label_vec (len);
8219 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8220 GIMPLE_OMP_SECTIONS statement. */
8221 si = gsi_last_nondebug_bb (entry_bb);
8222 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8223 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8224 vin = gimple_omp_sections_control (sections_stmt);
8225 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8226 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8227 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8228 tree cond_var = NULL_TREE;
8229 if (reductmp || condtmp)
8231 tree reductions = null_pointer_node, mem = null_pointer_node;
8232 tree memv = NULL_TREE, condtemp = NULL_TREE;
8233 gimple_stmt_iterator gsi = gsi_none ();
8234 gimple *g = NULL;
8235 if (reductmp)
8237 reductions = OMP_CLAUSE_DECL (reductmp);
8238 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8239 g = SSA_NAME_DEF_STMT (reductions);
8240 reductions = gimple_assign_rhs1 (g);
8241 OMP_CLAUSE_DECL (reductmp) = reductions;
8242 gsi = gsi_for_stmt (g);
8244 else
8245 gsi = si;
8246 if (condtmp)
8248 condtemp = OMP_CLAUSE_DECL (condtmp);
8249 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8250 OMP_CLAUSE__CONDTEMP_);
8251 cond_var = OMP_CLAUSE_DECL (c);
8252 tree type = TREE_TYPE (condtemp);
8253 memv = create_tmp_var (type);
8254 TREE_ADDRESSABLE (memv) = 1;
8255 unsigned cnt = 0;
8256 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8257 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8258 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8259 ++cnt;
8260 unsigned HOST_WIDE_INT sz
8261 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8262 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8263 false);
8264 mem = build_fold_addr_expr (memv);
8266 t = build_int_cst (unsigned_type_node, len - 1);
8267 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8268 stmt = gimple_build_call (u, 3, t, reductions, mem);
8269 gimple_call_set_lhs (stmt, vin);
8270 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8271 if (condtmp)
8273 expand_omp_build_assign (&gsi, condtemp, memv, false);
8274 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8275 vin, build_one_cst (TREE_TYPE (cond_var)));
8276 expand_omp_build_assign (&gsi, cond_var, t, false);
8278 if (reductmp)
8280 gsi_remove (&gsi, true);
8281 release_ssa_name (gimple_assign_lhs (g));
8284 else if (!is_combined_parallel (region))
8286 /* If we are not inside a combined parallel+sections region,
8287 call GOMP_sections_start. */
8288 t = build_int_cst (unsigned_type_node, len - 1);
8289 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8290 stmt = gimple_build_call (u, 1, t);
8292 else
8294 /* Otherwise, call GOMP_sections_next. */
8295 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8296 stmt = gimple_build_call (u, 0);
8298 if (!reductmp && !condtmp)
8300 gimple_call_set_lhs (stmt, vin);
8301 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8303 gsi_remove (&si, true);
8305 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8306 L0_BB. */
8307 switch_si = gsi_last_nondebug_bb (l0_bb);
8308 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8309 if (exit_reachable)
8311 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8312 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8313 vmain = gimple_omp_continue_control_use (cont);
8314 vnext = gimple_omp_continue_control_def (cont);
8316 else
8318 vmain = vin;
8319 vnext = NULL_TREE;
8322 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8323 label_vec.quick_push (t);
8324 i = 1;
8326 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8327 for (inner = region->inner, casei = 1;
8328 inner;
8329 inner = inner->next, i++, casei++)
8331 basic_block s_entry_bb, s_exit_bb;
8333 /* Skip optional reduction region. */
8334 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8336 --i;
8337 --casei;
8338 continue;
8341 s_entry_bb = inner->entry;
8342 s_exit_bb = inner->exit;
8344 t = gimple_block_label (s_entry_bb);
8345 u = build_int_cst (unsigned_type_node, casei);
8346 u = build_case_label (u, NULL, t);
8347 label_vec.quick_push (u);
8349 si = gsi_last_nondebug_bb (s_entry_bb);
8350 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8351 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8352 gsi_remove (&si, true);
8353 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8355 if (s_exit_bb == NULL)
8356 continue;
8358 si = gsi_last_nondebug_bb (s_exit_bb);
8359 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8360 gsi_remove (&si, true);
8362 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8365 /* Error handling code goes in DEFAULT_BB. */
8366 t = gimple_block_label (default_bb);
8367 u = build_case_label (NULL, NULL, t);
8368 make_edge (l0_bb, default_bb, 0);
8369 add_bb_to_loop (default_bb, current_loops->tree_root);
8371 stmt = gimple_build_switch (vmain, u, label_vec);
8372 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8373 gsi_remove (&switch_si, true);
8375 si = gsi_start_bb (default_bb);
8376 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8377 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8379 if (exit_reachable)
8381 tree bfn_decl;
8383 /* Code to get the next section goes in L1_BB. */
8384 si = gsi_last_nondebug_bb (l1_bb);
8385 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8387 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8388 stmt = gimple_build_call (bfn_decl, 0);
8389 gimple_call_set_lhs (stmt, vnext);
8390 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8391 if (cond_var)
8393 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8394 vnext, build_one_cst (TREE_TYPE (cond_var)));
8395 expand_omp_build_assign (&si, cond_var, t, false);
8397 gsi_remove (&si, true);
8399 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8402 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8403 si = gsi_last_nondebug_bb (l2_bb);
8404 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8405 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8406 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8407 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8408 else
8409 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8410 stmt = gimple_build_call (t, 0);
8411 if (gimple_omp_return_lhs (gsi_stmt (si)))
8412 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8413 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8414 gsi_remove (&si, true);
8416 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8419 /* Expand code for an OpenMP single directive. We've already expanded
8420 much of the code, here we simply place the GOMP_barrier call. */
8422 static void
8423 expand_omp_single (struct omp_region *region)
8425 basic_block entry_bb, exit_bb;
8426 gimple_stmt_iterator si;
8428 entry_bb = region->entry;
8429 exit_bb = region->exit;
8431 si = gsi_last_nondebug_bb (entry_bb);
8432 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8433 gsi_remove (&si, true);
8434 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8436 si = gsi_last_nondebug_bb (exit_bb);
8437 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8439 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8440 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8442 gsi_remove (&si, true);
8443 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8446 /* Generic expansion for OpenMP synchronization directives: master,
8447 ordered and critical. All we need to do here is remove the entry
8448 and exit markers for REGION. */
8450 static void
8451 expand_omp_synch (struct omp_region *region)
8453 basic_block entry_bb, exit_bb;
8454 gimple_stmt_iterator si;
8456 entry_bb = region->entry;
8457 exit_bb = region->exit;
8459 si = gsi_last_nondebug_bb (entry_bb);
8460 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8461 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8462 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8463 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8464 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8465 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8466 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8467 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8469 expand_omp_taskreg (region);
8470 return;
8472 gsi_remove (&si, true);
8473 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8475 if (exit_bb)
8477 si = gsi_last_nondebug_bb (exit_bb);
8478 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8479 gsi_remove (&si, true);
8480 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8484 /* Translate enum omp_memory_order to enum memmodel. The two enums
8485 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8486 is 0. */
8488 static enum memmodel
8489 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8491 switch (mo)
8493 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8494 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8495 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8496 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8497 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8498 default: gcc_unreachable ();
8502 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8503 operation as a normal volatile load. */
8505 static bool
8506 expand_omp_atomic_load (basic_block load_bb, tree addr,
8507 tree loaded_val, int index)
8509 enum built_in_function tmpbase;
8510 gimple_stmt_iterator gsi;
8511 basic_block store_bb;
8512 location_t loc;
8513 gimple *stmt;
8514 tree decl, call, type, itype;
8516 gsi = gsi_last_nondebug_bb (load_bb);
8517 stmt = gsi_stmt (gsi);
8518 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8519 loc = gimple_location (stmt);
8521 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8522 is smaller than word size, then expand_atomic_load assumes that the load
8523 is atomic. We could avoid the builtin entirely in this case. */
8525 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8526 decl = builtin_decl_explicit (tmpbase);
8527 if (decl == NULL_TREE)
8528 return false;
8530 type = TREE_TYPE (loaded_val);
8531 itype = TREE_TYPE (TREE_TYPE (decl));
8533 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8534 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8535 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8536 if (!useless_type_conversion_p (type, itype))
8537 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8538 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8540 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8541 gsi_remove (&gsi, true);
8543 store_bb = single_succ (load_bb);
8544 gsi = gsi_last_nondebug_bb (store_bb);
8545 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8546 gsi_remove (&gsi, true);
8548 if (gimple_in_ssa_p (cfun))
8549 update_ssa (TODO_update_ssa_no_phi);
8551 return true;
8554 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8555 operation as a normal volatile store. */
8557 static bool
8558 expand_omp_atomic_store (basic_block load_bb, tree addr,
8559 tree loaded_val, tree stored_val, int index)
8561 enum built_in_function tmpbase;
8562 gimple_stmt_iterator gsi;
8563 basic_block store_bb = single_succ (load_bb);
8564 location_t loc;
8565 gimple *stmt;
8566 tree decl, call, type, itype;
8567 machine_mode imode;
8568 bool exchange;
8570 gsi = gsi_last_nondebug_bb (load_bb);
8571 stmt = gsi_stmt (gsi);
8572 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8574 /* If the load value is needed, then this isn't a store but an exchange. */
8575 exchange = gimple_omp_atomic_need_value_p (stmt);
8577 gsi = gsi_last_nondebug_bb (store_bb);
8578 stmt = gsi_stmt (gsi);
8579 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8580 loc = gimple_location (stmt);
8582 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8583 is smaller than word size, then expand_atomic_store assumes that the store
8584 is atomic. We could avoid the builtin entirely in this case. */
8586 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8587 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8588 decl = builtin_decl_explicit (tmpbase);
8589 if (decl == NULL_TREE)
8590 return false;
8592 type = TREE_TYPE (stored_val);
8594 /* Dig out the type of the function's second argument. */
8595 itype = TREE_TYPE (decl);
8596 itype = TYPE_ARG_TYPES (itype);
8597 itype = TREE_CHAIN (itype);
8598 itype = TREE_VALUE (itype);
8599 imode = TYPE_MODE (itype);
8601 if (exchange && !can_atomic_exchange_p (imode, true))
8602 return false;
8604 if (!useless_type_conversion_p (itype, type))
8605 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8606 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8607 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8608 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8609 if (exchange)
8611 if (!useless_type_conversion_p (type, itype))
8612 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8613 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8616 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8617 gsi_remove (&gsi, true);
8619 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8620 gsi = gsi_last_nondebug_bb (load_bb);
8621 gsi_remove (&gsi, true);
8623 if (gimple_in_ssa_p (cfun))
8624 update_ssa (TODO_update_ssa_no_phi);
8626 return true;
8629 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8630 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8631 size of the data type, and thus usable to find the index of the builtin
8632 decl. Returns false if the expression is not of the proper form. */
8634 static bool
8635 expand_omp_atomic_fetch_op (basic_block load_bb,
8636 tree addr, tree loaded_val,
8637 tree stored_val, int index)
8639 enum built_in_function oldbase, newbase, tmpbase;
8640 tree decl, itype, call;
8641 tree lhs, rhs;
8642 basic_block store_bb = single_succ (load_bb);
8643 gimple_stmt_iterator gsi;
8644 gimple *stmt;
8645 location_t loc;
8646 enum tree_code code;
8647 bool need_old, need_new;
8648 machine_mode imode;
8650 /* We expect to find the following sequences:
8652 load_bb:
8653 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8655 store_bb:
8656 val = tmp OP something; (or: something OP tmp)
8657 GIMPLE_OMP_STORE (val)
8659 ???FIXME: Allow a more flexible sequence.
8660 Perhaps use data flow to pick the statements.
8664 gsi = gsi_after_labels (store_bb);
8665 stmt = gsi_stmt (gsi);
8666 if (is_gimple_debug (stmt))
8668 gsi_next_nondebug (&gsi);
8669 if (gsi_end_p (gsi))
8670 return false;
8671 stmt = gsi_stmt (gsi);
8673 loc = gimple_location (stmt);
8674 if (!is_gimple_assign (stmt))
8675 return false;
8676 gsi_next_nondebug (&gsi);
8677 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8678 return false;
8679 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8680 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8681 enum omp_memory_order omo
8682 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8683 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8684 gcc_checking_assert (!need_old || !need_new);
8686 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8687 return false;
8689 /* Check for one of the supported fetch-op operations. */
8690 code = gimple_assign_rhs_code (stmt);
8691 switch (code)
8693 case PLUS_EXPR:
8694 case POINTER_PLUS_EXPR:
8695 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8696 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8697 break;
8698 case MINUS_EXPR:
8699 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8700 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8701 break;
8702 case BIT_AND_EXPR:
8703 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8704 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8705 break;
8706 case BIT_IOR_EXPR:
8707 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8708 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8709 break;
8710 case BIT_XOR_EXPR:
8711 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8712 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8713 break;
8714 default:
8715 return false;
8718 /* Make sure the expression is of the proper form. */
8719 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8720 rhs = gimple_assign_rhs2 (stmt);
8721 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8722 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8723 rhs = gimple_assign_rhs1 (stmt);
8724 else
8725 return false;
8727 tmpbase = ((enum built_in_function)
8728 ((need_new ? newbase : oldbase) + index + 1));
8729 decl = builtin_decl_explicit (tmpbase);
8730 if (decl == NULL_TREE)
8731 return false;
8732 itype = TREE_TYPE (TREE_TYPE (decl));
8733 imode = TYPE_MODE (itype);
8735 /* We could test all of the various optabs involved, but the fact of the
8736 matter is that (with the exception of i486 vs i586 and xadd) all targets
8737 that support any atomic operaton optab also implements compare-and-swap.
8738 Let optabs.c take care of expanding any compare-and-swap loop. */
8739 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8740 return false;
8742 gsi = gsi_last_nondebug_bb (load_bb);
8743 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8745 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8746 It only requires that the operation happen atomically. Thus we can
8747 use the RELAXED memory model. */
8748 call = build_call_expr_loc (loc, decl, 3, addr,
8749 fold_convert_loc (loc, itype, rhs),
8750 build_int_cst (NULL, mo));
8752 if (need_old || need_new)
8754 lhs = need_old ? loaded_val : stored_val;
8755 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8756 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8758 else
8759 call = fold_convert_loc (loc, void_type_node, call);
8760 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8761 gsi_remove (&gsi, true);
8763 gsi = gsi_last_nondebug_bb (store_bb);
8764 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8765 gsi_remove (&gsi, true);
8766 gsi = gsi_last_nondebug_bb (store_bb);
8767 stmt = gsi_stmt (gsi);
8768 gsi_remove (&gsi, true);
8770 if (gimple_in_ssa_p (cfun))
8772 release_defs (stmt);
8773 update_ssa (TODO_update_ssa_no_phi);
8776 return true;
8779 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8781 oldval = *addr;
8782 repeat:
8783 newval = rhs; // with oldval replacing *addr in rhs
8784 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8785 if (oldval != newval)
8786 goto repeat;
8788 INDEX is log2 of the size of the data type, and thus usable to find the
8789 index of the builtin decl. */
8791 static bool
8792 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8793 tree addr, tree loaded_val, tree stored_val,
8794 int index)
8796 tree loadedi, storedi, initial, new_storedi, old_vali;
8797 tree type, itype, cmpxchg, iaddr, atype;
8798 gimple_stmt_iterator si;
8799 basic_block loop_header = single_succ (load_bb);
8800 gimple *phi, *stmt;
8801 edge e;
8802 enum built_in_function fncode;
8804 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8805 order to use the RELAXED memory model effectively. */
8806 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8807 + index + 1);
8808 cmpxchg = builtin_decl_explicit (fncode);
8809 if (cmpxchg == NULL_TREE)
8810 return false;
8811 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8812 atype = type;
8813 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8815 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8816 || !can_atomic_load_p (TYPE_MODE (itype)))
8817 return false;
8819 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8820 si = gsi_last_nondebug_bb (load_bb);
8821 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8823 /* For floating-point values, we'll need to view-convert them to integers
8824 so that we can perform the atomic compare and swap. Simplify the
8825 following code by always setting up the "i"ntegral variables. */
8826 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8828 tree iaddr_val;
8830 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8831 true));
8832 atype = itype;
8833 iaddr_val
8834 = force_gimple_operand_gsi (&si,
8835 fold_convert (TREE_TYPE (iaddr), addr),
8836 false, NULL_TREE, true, GSI_SAME_STMT);
8837 stmt = gimple_build_assign (iaddr, iaddr_val);
8838 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8839 loadedi = create_tmp_var (itype);
8840 if (gimple_in_ssa_p (cfun))
8841 loadedi = make_ssa_name (loadedi);
8843 else
8845 iaddr = addr;
8846 loadedi = loaded_val;
8849 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8850 tree loaddecl = builtin_decl_explicit (fncode);
8851 if (loaddecl)
8852 initial
8853 = fold_convert (atype,
8854 build_call_expr (loaddecl, 2, iaddr,
8855 build_int_cst (NULL_TREE,
8856 MEMMODEL_RELAXED)));
8857 else
8859 tree off
8860 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8861 true), 0);
8862 initial = build2 (MEM_REF, atype, iaddr, off);
8865 initial
8866 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8867 GSI_SAME_STMT);
8869 /* Move the value to the LOADEDI temporary. */
8870 if (gimple_in_ssa_p (cfun))
8872 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8873 phi = create_phi_node (loadedi, loop_header);
8874 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8875 initial);
8877 else
8878 gsi_insert_before (&si,
8879 gimple_build_assign (loadedi, initial),
8880 GSI_SAME_STMT);
8881 if (loadedi != loaded_val)
8883 gimple_stmt_iterator gsi2;
8884 tree x;
8886 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8887 gsi2 = gsi_start_bb (loop_header);
8888 if (gimple_in_ssa_p (cfun))
8890 gassign *stmt;
8891 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8892 true, GSI_SAME_STMT);
8893 stmt = gimple_build_assign (loaded_val, x);
8894 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8896 else
8898 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8899 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8900 true, GSI_SAME_STMT);
8903 gsi_remove (&si, true);
8905 si = gsi_last_nondebug_bb (store_bb);
8906 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8908 if (iaddr == addr)
8909 storedi = stored_val;
8910 else
8911 storedi
8912 = force_gimple_operand_gsi (&si,
8913 build1 (VIEW_CONVERT_EXPR, itype,
8914 stored_val), true, NULL_TREE, true,
8915 GSI_SAME_STMT);
8917 /* Build the compare&swap statement. */
8918 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8919 new_storedi = force_gimple_operand_gsi (&si,
8920 fold_convert (TREE_TYPE (loadedi),
8921 new_storedi),
8922 true, NULL_TREE,
8923 true, GSI_SAME_STMT);
8925 if (gimple_in_ssa_p (cfun))
8926 old_vali = loadedi;
8927 else
8929 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8930 stmt = gimple_build_assign (old_vali, loadedi);
8931 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8933 stmt = gimple_build_assign (loadedi, new_storedi);
8934 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8937 /* Note that we always perform the comparison as an integer, even for
8938 floating point. This allows the atomic operation to properly
8939 succeed even with NaNs and -0.0. */
8940 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8941 stmt = gimple_build_cond_empty (ne);
8942 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8944 /* Update cfg. */
8945 e = single_succ_edge (store_bb);
8946 e->flags &= ~EDGE_FALLTHRU;
8947 e->flags |= EDGE_FALSE_VALUE;
8948 /* Expect no looping. */
8949 e->probability = profile_probability::guessed_always ();
8951 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8952 e->probability = profile_probability::guessed_never ();
8954 /* Copy the new value to loadedi (we already did that before the condition
8955 if we are not in SSA). */
8956 if (gimple_in_ssa_p (cfun))
8958 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8959 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8962 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8963 gsi_remove (&si, true);
8965 class loop *loop = alloc_loop ();
8966 loop->header = loop_header;
8967 loop->latch = store_bb;
8968 add_loop (loop, loop_header->loop_father);
8970 if (gimple_in_ssa_p (cfun))
8971 update_ssa (TODO_update_ssa_no_phi);
8973 return true;
8976 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8978 GOMP_atomic_start ();
8979 *addr = rhs;
8980 GOMP_atomic_end ();
8982 The result is not globally atomic, but works so long as all parallel
8983 references are within #pragma omp atomic directives. According to
8984 responses received from omp@openmp.org, appears to be within spec.
8985 Which makes sense, since that's how several other compilers handle
8986 this situation as well.
8987 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8988 expanding. STORED_VAL is the operand of the matching
8989 GIMPLE_OMP_ATOMIC_STORE.
8991 We replace
8992 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8993 loaded_val = *addr;
8995 and replace
8996 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8997 *addr = stored_val;
9000 static bool
9001 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9002 tree addr, tree loaded_val, tree stored_val)
9004 gimple_stmt_iterator si;
9005 gassign *stmt;
9006 tree t;
9008 si = gsi_last_nondebug_bb (load_bb);
9009 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9011 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9012 t = build_call_expr (t, 0);
9013 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9015 tree mem = build_simple_mem_ref (addr);
9016 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9017 TREE_OPERAND (mem, 1)
9018 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9019 true),
9020 TREE_OPERAND (mem, 1));
9021 stmt = gimple_build_assign (loaded_val, mem);
9022 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9023 gsi_remove (&si, true);
9025 si = gsi_last_nondebug_bb (store_bb);
9026 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9028 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9029 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9031 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9032 t = build_call_expr (t, 0);
9033 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9034 gsi_remove (&si, true);
9036 if (gimple_in_ssa_p (cfun))
9037 update_ssa (TODO_update_ssa_no_phi);
9038 return true;
9041 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9042 using expand_omp_atomic_fetch_op. If it failed, we try to
9043 call expand_omp_atomic_pipeline, and if it fails too, the
9044 ultimate fallback is wrapping the operation in a mutex
9045 (expand_omp_atomic_mutex). REGION is the atomic region built
9046 by build_omp_regions_1(). */
9048 static void
9049 expand_omp_atomic (struct omp_region *region)
9051 basic_block load_bb = region->entry, store_bb = region->exit;
9052 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9053 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9054 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9055 tree addr = gimple_omp_atomic_load_rhs (load);
9056 tree stored_val = gimple_omp_atomic_store_val (store);
9057 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9058 HOST_WIDE_INT index;
9060 /* Make sure the type is one of the supported sizes. */
9061 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9062 index = exact_log2 (index);
9063 if (index >= 0 && index <= 4)
9065 unsigned int align = TYPE_ALIGN_UNIT (type);
9067 /* __sync builtins require strict data alignment. */
9068 if (exact_log2 (align) >= index)
9070 /* Atomic load. */
9071 scalar_mode smode;
9072 if (loaded_val == stored_val
9073 && (is_int_mode (TYPE_MODE (type), &smode)
9074 || is_float_mode (TYPE_MODE (type), &smode))
9075 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9076 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9077 return;
9079 /* Atomic store. */
9080 if ((is_int_mode (TYPE_MODE (type), &smode)
9081 || is_float_mode (TYPE_MODE (type), &smode))
9082 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9083 && store_bb == single_succ (load_bb)
9084 && first_stmt (store_bb) == store
9085 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9086 stored_val, index))
9087 return;
9089 /* When possible, use specialized atomic update functions. */
9090 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9091 && store_bb == single_succ (load_bb)
9092 && expand_omp_atomic_fetch_op (load_bb, addr,
9093 loaded_val, stored_val, index))
9094 return;
9096 /* If we don't have specialized __sync builtins, try and implement
9097 as a compare and swap loop. */
9098 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9099 loaded_val, stored_val, index))
9100 return;
9104 /* The ultimate fallback is wrapping the operation in a mutex. */
9105 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9108 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9109 at REGION_EXIT. */
9111 static void
9112 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9113 basic_block region_exit)
9115 class loop *outer = region_entry->loop_father;
9116 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9118 /* Don't parallelize the kernels region if it contains more than one outer
9119 loop. */
9120 unsigned int nr_outer_loops = 0;
9121 class loop *single_outer = NULL;
9122 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9124 gcc_assert (loop_outer (loop) == outer);
9126 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9127 continue;
9129 if (region_exit != NULL
9130 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9131 continue;
9133 nr_outer_loops++;
9134 single_outer = loop;
9136 if (nr_outer_loops != 1)
9137 return;
9139 for (class loop *loop = single_outer->inner;
9140 loop != NULL;
9141 loop = loop->inner)
9142 if (loop->next)
9143 return;
9145 /* Mark the loops in the region. */
9146 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9147 loop->in_oacc_kernels_region = true;
9150 /* Build target argument identifier from the DEVICE identifier, value
9151 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9153 static tree
9154 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9156 tree t = build_int_cst (integer_type_node, device);
9157 if (subseqent_param)
9158 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9159 build_int_cst (integer_type_node,
9160 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9161 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9162 build_int_cst (integer_type_node, id));
9163 return t;
9166 /* Like above but return it in type that can be directly stored as an element
9167 of the argument array. */
9169 static tree
9170 get_target_argument_identifier (int device, bool subseqent_param, int id)
9172 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9173 return fold_convert (ptr_type_node, t);
9176 /* Return a target argument consisting of DEVICE identifier, value identifier
9177 ID, and the actual VALUE. */
9179 static tree
9180 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9181 tree value)
9183 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9184 fold_convert (integer_type_node, value),
9185 build_int_cst (unsigned_type_node,
9186 GOMP_TARGET_ARG_VALUE_SHIFT));
9187 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9188 get_target_argument_identifier_1 (device, false, id));
9189 t = fold_convert (ptr_type_node, t);
9190 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9193 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9194 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9195 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9196 arguments. */
9198 static void
9199 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9200 int id, tree value, vec <tree> *args)
9202 if (tree_fits_shwi_p (value)
9203 && tree_to_shwi (value) > -(1 << 15)
9204 && tree_to_shwi (value) < (1 << 15))
9205 args->quick_push (get_target_argument_value (gsi, device, id, value));
9206 else
9208 args->quick_push (get_target_argument_identifier (device, true, id));
9209 value = fold_convert (ptr_type_node, value);
9210 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9211 GSI_SAME_STMT);
9212 args->quick_push (value);
9216 /* Create an array of arguments that is then passed to GOMP_target. */
9218 static tree
9219 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9221 auto_vec <tree, 6> args;
9222 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9223 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9224 if (c)
9225 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9226 else
9227 t = integer_minus_one_node;
9228 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9229 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9231 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9232 if (c)
9233 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9234 else
9235 t = integer_minus_one_node;
9236 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9237 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9238 &args);
9240 /* Produce more, perhaps device specific, arguments here. */
9242 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9243 args.length () + 1),
9244 ".omp_target_args");
9245 for (unsigned i = 0; i < args.length (); i++)
9247 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9248 build_int_cst (integer_type_node, i),
9249 NULL_TREE, NULL_TREE);
9250 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9251 GSI_SAME_STMT);
9253 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9254 build_int_cst (integer_type_node, args.length ()),
9255 NULL_TREE, NULL_TREE);
9256 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9257 GSI_SAME_STMT);
9258 TREE_ADDRESSABLE (argarray) = 1;
9259 return build_fold_addr_expr (argarray);
9262 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9264 static void
9265 expand_omp_target (struct omp_region *region)
9267 basic_block entry_bb, exit_bb, new_bb;
9268 struct function *child_cfun;
9269 tree child_fn, block, t;
9270 gimple_stmt_iterator gsi;
9271 gomp_target *entry_stmt;
9272 gimple *stmt;
9273 edge e;
9274 bool offloaded;
9275 int target_kind;
9277 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9278 target_kind = gimple_omp_target_kind (entry_stmt);
9279 new_bb = region->entry;
9281 offloaded = is_gimple_omp_offloaded (entry_stmt);
9282 switch (target_kind)
9284 case GF_OMP_TARGET_KIND_REGION:
9285 case GF_OMP_TARGET_KIND_UPDATE:
9286 case GF_OMP_TARGET_KIND_ENTER_DATA:
9287 case GF_OMP_TARGET_KIND_EXIT_DATA:
9288 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9289 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9290 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9291 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9292 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9293 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9294 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9295 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9296 case GF_OMP_TARGET_KIND_DATA:
9297 case GF_OMP_TARGET_KIND_OACC_DATA:
9298 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9299 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9300 break;
9301 default:
9302 gcc_unreachable ();
9305 child_fn = NULL_TREE;
9306 child_cfun = NULL;
9307 if (offloaded)
9309 child_fn = gimple_omp_target_child_fn (entry_stmt);
9310 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9313 /* Supported by expand_omp_taskreg, but not here. */
9314 if (child_cfun != NULL)
9315 gcc_checking_assert (!child_cfun->cfg);
9316 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9318 entry_bb = region->entry;
9319 exit_bb = region->exit;
9321 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9322 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9324 /* Going on, all OpenACC compute constructs are mapped to
9325 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9326 To distinguish between them, we attach attributes. */
9327 switch (target_kind)
9329 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9330 DECL_ATTRIBUTES (child_fn)
9331 = tree_cons (get_identifier ("oacc parallel"),
9332 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9333 break;
9334 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9335 DECL_ATTRIBUTES (child_fn)
9336 = tree_cons (get_identifier ("oacc kernels"),
9337 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9338 break;
9339 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9340 DECL_ATTRIBUTES (child_fn)
9341 = tree_cons (get_identifier ("oacc serial"),
9342 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9343 break;
9344 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9345 DECL_ATTRIBUTES (child_fn)
9346 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9347 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9348 break;
9349 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9350 DECL_ATTRIBUTES (child_fn)
9351 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9352 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9353 break;
9354 default:
9355 /* Make sure we don't miss any. */
9356 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9357 && is_gimple_omp_offloaded (entry_stmt)));
9358 break;
9361 if (offloaded)
9363 unsigned srcidx, dstidx, num;
9365 /* If the offloading region needs data sent from the parent
9366 function, then the very first statement (except possible
9367 tree profile counter updates) of the offloading body
9368 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9369 &.OMP_DATA_O is passed as an argument to the child function,
9370 we need to replace it with the argument as seen by the child
9371 function.
9373 In most cases, this will end up being the identity assignment
9374 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9375 a function call that has been inlined, the original PARM_DECL
9376 .OMP_DATA_I may have been converted into a different local
9377 variable. In which case, we need to keep the assignment. */
9378 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9379 if (data_arg)
9381 basic_block entry_succ_bb = single_succ (entry_bb);
9382 gimple_stmt_iterator gsi;
9383 tree arg;
9384 gimple *tgtcopy_stmt = NULL;
9385 tree sender = TREE_VEC_ELT (data_arg, 0);
9387 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9389 gcc_assert (!gsi_end_p (gsi));
9390 stmt = gsi_stmt (gsi);
9391 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9392 continue;
9394 if (gimple_num_ops (stmt) == 2)
9396 tree arg = gimple_assign_rhs1 (stmt);
9398 /* We're ignoring the subcode because we're
9399 effectively doing a STRIP_NOPS. */
9401 if (TREE_CODE (arg) == ADDR_EXPR
9402 && TREE_OPERAND (arg, 0) == sender)
9404 tgtcopy_stmt = stmt;
9405 break;
9410 gcc_assert (tgtcopy_stmt != NULL);
9411 arg = DECL_ARGUMENTS (child_fn);
9413 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9414 gsi_remove (&gsi, true);
9417 /* Declare local variables needed in CHILD_CFUN. */
9418 block = DECL_INITIAL (child_fn);
9419 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9420 /* The gimplifier could record temporaries in the offloading block
9421 rather than in containing function's local_decls chain,
9422 which would mean cgraph missed finalizing them. Do it now. */
9423 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9424 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9425 varpool_node::finalize_decl (t);
9426 DECL_SAVED_TREE (child_fn) = NULL;
9427 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9428 gimple_set_body (child_fn, NULL);
9429 TREE_USED (block) = 1;
9431 /* Reset DECL_CONTEXT on function arguments. */
9432 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9433 DECL_CONTEXT (t) = child_fn;
9435 /* Split ENTRY_BB at GIMPLE_*,
9436 so that it can be moved to the child function. */
9437 gsi = gsi_last_nondebug_bb (entry_bb);
9438 stmt = gsi_stmt (gsi);
9439 gcc_assert (stmt
9440 && gimple_code (stmt) == gimple_code (entry_stmt));
9441 e = split_block (entry_bb, stmt);
9442 gsi_remove (&gsi, true);
9443 entry_bb = e->dest;
9444 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9446 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9447 if (exit_bb)
9449 gsi = gsi_last_nondebug_bb (exit_bb);
9450 gcc_assert (!gsi_end_p (gsi)
9451 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9452 stmt = gimple_build_return (NULL);
9453 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9454 gsi_remove (&gsi, true);
9457 /* Move the offloading region into CHILD_CFUN. */
9459 block = gimple_block (entry_stmt);
9461 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9462 if (exit_bb)
9463 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9464 /* When the OMP expansion process cannot guarantee an up-to-date
9465 loop tree arrange for the child function to fixup loops. */
9466 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9467 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9469 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9470 num = vec_safe_length (child_cfun->local_decls);
9471 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9473 t = (*child_cfun->local_decls)[srcidx];
9474 if (DECL_CONTEXT (t) == cfun->decl)
9475 continue;
9476 if (srcidx != dstidx)
9477 (*child_cfun->local_decls)[dstidx] = t;
9478 dstidx++;
9480 if (dstidx != num)
9481 vec_safe_truncate (child_cfun->local_decls, dstidx);
9483 /* Inform the callgraph about the new function. */
9484 child_cfun->curr_properties = cfun->curr_properties;
9485 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9486 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9487 cgraph_node *node = cgraph_node::get_create (child_fn);
9488 node->parallelized_function = 1;
9489 cgraph_node::add_new_function (child_fn, true);
9491 /* Add the new function to the offload table. */
9492 if (ENABLE_OFFLOADING)
9494 if (in_lto_p)
9495 DECL_PRESERVE_P (child_fn) = 1;
9496 vec_safe_push (offload_funcs, child_fn);
9499 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9500 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9502 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9503 fixed in a following pass. */
9504 push_cfun (child_cfun);
9505 if (need_asm)
9506 assign_assembler_name_if_needed (child_fn);
9507 cgraph_edge::rebuild_edges ();
9509 /* Some EH regions might become dead, see PR34608. If
9510 pass_cleanup_cfg isn't the first pass to happen with the
9511 new child, these dead EH edges might cause problems.
9512 Clean them up now. */
9513 if (flag_exceptions)
9515 basic_block bb;
9516 bool changed = false;
9518 FOR_EACH_BB_FN (bb, cfun)
9519 changed |= gimple_purge_dead_eh_edges (bb);
9520 if (changed)
9521 cleanup_tree_cfg ();
9523 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9524 verify_loop_structure ();
9525 pop_cfun ();
9527 if (dump_file && !gimple_in_ssa_p (cfun))
9529 omp_any_child_fn_dumped = true;
9530 dump_function_header (dump_file, child_fn, dump_flags);
9531 dump_function_to_file (child_fn, dump_file, dump_flags);
9534 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9537 /* Emit a library call to launch the offloading region, or do data
9538 transfers. */
9539 tree t1, t2, t3, t4, depend, c, clauses;
9540 enum built_in_function start_ix;
9541 unsigned int flags_i = 0;
9543 switch (gimple_omp_target_kind (entry_stmt))
9545 case GF_OMP_TARGET_KIND_REGION:
9546 start_ix = BUILT_IN_GOMP_TARGET;
9547 break;
9548 case GF_OMP_TARGET_KIND_DATA:
9549 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9550 break;
9551 case GF_OMP_TARGET_KIND_UPDATE:
9552 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9553 break;
9554 case GF_OMP_TARGET_KIND_ENTER_DATA:
9555 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9556 break;
9557 case GF_OMP_TARGET_KIND_EXIT_DATA:
9558 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9559 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9560 break;
9561 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9562 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9563 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9564 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9565 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9566 start_ix = BUILT_IN_GOACC_PARALLEL;
9567 break;
9568 case GF_OMP_TARGET_KIND_OACC_DATA:
9569 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9570 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9571 start_ix = BUILT_IN_GOACC_DATA_START;
9572 break;
9573 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9574 start_ix = BUILT_IN_GOACC_UPDATE;
9575 break;
9576 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9577 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9578 break;
9579 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9580 start_ix = BUILT_IN_GOACC_DECLARE;
9581 break;
9582 default:
9583 gcc_unreachable ();
9586 clauses = gimple_omp_target_clauses (entry_stmt);
9588 tree device = NULL_TREE;
9589 location_t device_loc = UNKNOWN_LOCATION;
9590 tree goacc_flags = NULL_TREE;
9591 if (is_gimple_omp_oacc (entry_stmt))
9593 /* By default, no GOACC_FLAGs are set. */
9594 goacc_flags = integer_zero_node;
9596 else
9598 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9599 if (c)
9601 device = OMP_CLAUSE_DEVICE_ID (c);
9602 device_loc = OMP_CLAUSE_LOCATION (c);
9604 else
9606 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9607 library choose). */
9608 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9609 device_loc = gimple_location (entry_stmt);
9612 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9613 if (c)
9614 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9617 /* By default, there is no conditional. */
9618 tree cond = NULL_TREE;
9619 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9620 if (c)
9621 cond = OMP_CLAUSE_IF_EXPR (c);
9622 /* If we found the clause 'if (cond)', build:
9623 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9624 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9625 if (cond)
9627 tree *tp;
9628 if (is_gimple_omp_oacc (entry_stmt))
9629 tp = &goacc_flags;
9630 else
9632 /* Ensure 'device' is of the correct type. */
9633 device = fold_convert_loc (device_loc, integer_type_node, device);
9635 tp = &device;
9638 cond = gimple_boolify (cond);
9640 basic_block cond_bb, then_bb, else_bb;
9641 edge e;
9642 tree tmp_var;
9644 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9645 if (offloaded)
9646 e = split_block_after_labels (new_bb);
9647 else
9649 gsi = gsi_last_nondebug_bb (new_bb);
9650 gsi_prev (&gsi);
9651 e = split_block (new_bb, gsi_stmt (gsi));
9653 cond_bb = e->src;
9654 new_bb = e->dest;
9655 remove_edge (e);
9657 then_bb = create_empty_bb (cond_bb);
9658 else_bb = create_empty_bb (then_bb);
9659 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9660 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9662 stmt = gimple_build_cond_empty (cond);
9663 gsi = gsi_last_bb (cond_bb);
9664 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9666 gsi = gsi_start_bb (then_bb);
9667 stmt = gimple_build_assign (tmp_var, *tp);
9668 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9670 gsi = gsi_start_bb (else_bb);
9671 if (is_gimple_omp_oacc (entry_stmt))
9672 stmt = gimple_build_assign (tmp_var,
9673 BIT_IOR_EXPR,
9674 *tp,
9675 build_int_cst (integer_type_node,
9676 GOACC_FLAG_HOST_FALLBACK));
9677 else
9678 stmt = gimple_build_assign (tmp_var,
9679 build_int_cst (integer_type_node,
9680 GOMP_DEVICE_HOST_FALLBACK));
9681 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9683 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9684 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9685 add_bb_to_loop (then_bb, cond_bb->loop_father);
9686 add_bb_to_loop (else_bb, cond_bb->loop_father);
9687 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9688 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9690 *tp = tmp_var;
9692 gsi = gsi_last_nondebug_bb (new_bb);
9694 else
9696 gsi = gsi_last_nondebug_bb (new_bb);
9698 if (device != NULL_TREE)
9699 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9700 true, GSI_SAME_STMT);
9703 t = gimple_omp_target_data_arg (entry_stmt);
9704 if (t == NULL)
9706 t1 = size_zero_node;
9707 t2 = build_zero_cst (ptr_type_node);
9708 t3 = t2;
9709 t4 = t2;
9711 else
9713 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9714 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9715 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9716 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9717 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9720 gimple *g;
9721 bool tagging = false;
9722 /* The maximum number used by any start_ix, without varargs. */
9723 auto_vec<tree, 11> args;
9724 if (is_gimple_omp_oacc (entry_stmt))
9726 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9727 TREE_TYPE (goacc_flags), goacc_flags);
9728 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9729 NULL_TREE, true,
9730 GSI_SAME_STMT);
9731 args.quick_push (goacc_flags_m);
9733 else
9734 args.quick_push (device);
9735 if (offloaded)
9736 args.quick_push (build_fold_addr_expr (child_fn));
9737 args.quick_push (t1);
9738 args.quick_push (t2);
9739 args.quick_push (t3);
9740 args.quick_push (t4);
9741 switch (start_ix)
9743 case BUILT_IN_GOACC_DATA_START:
9744 case BUILT_IN_GOACC_DECLARE:
9745 case BUILT_IN_GOMP_TARGET_DATA:
9746 break;
9747 case BUILT_IN_GOMP_TARGET:
9748 case BUILT_IN_GOMP_TARGET_UPDATE:
9749 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9750 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9751 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9752 if (c)
9753 depend = OMP_CLAUSE_DECL (c);
9754 else
9755 depend = build_int_cst (ptr_type_node, 0);
9756 args.quick_push (depend);
9757 if (start_ix == BUILT_IN_GOMP_TARGET)
9758 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9759 break;
9760 case BUILT_IN_GOACC_PARALLEL:
9761 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9763 tree dims = NULL_TREE;
9764 unsigned int ix;
9766 /* For serial constructs we set all dimensions to 1. */
9767 for (ix = GOMP_DIM_MAX; ix--;)
9768 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9769 oacc_replace_fn_attrib (child_fn, dims);
9771 else
9772 oacc_set_fn_attrib (child_fn, clauses, &args);
9773 tagging = true;
9774 /* FALLTHRU */
9775 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9776 case BUILT_IN_GOACC_UPDATE:
9778 tree t_async = NULL_TREE;
9780 /* If present, use the value specified by the respective
9781 clause, making sure that is of the correct type. */
9782 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9783 if (c)
9784 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9785 integer_type_node,
9786 OMP_CLAUSE_ASYNC_EXPR (c));
9787 else if (!tagging)
9788 /* Default values for t_async. */
9789 t_async = fold_convert_loc (gimple_location (entry_stmt),
9790 integer_type_node,
9791 build_int_cst (integer_type_node,
9792 GOMP_ASYNC_SYNC));
9793 if (tagging && t_async)
9795 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9797 if (TREE_CODE (t_async) == INTEGER_CST)
9799 /* See if we can pack the async arg in to the tag's
9800 operand. */
9801 i_async = TREE_INT_CST_LOW (t_async);
9802 if (i_async < GOMP_LAUNCH_OP_MAX)
9803 t_async = NULL_TREE;
9804 else
9805 i_async = GOMP_LAUNCH_OP_MAX;
9807 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9808 i_async));
9810 if (t_async)
9811 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9812 NULL_TREE, true,
9813 GSI_SAME_STMT));
9815 /* Save the argument index, and ... */
9816 unsigned t_wait_idx = args.length ();
9817 unsigned num_waits = 0;
9818 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9819 if (!tagging || c)
9820 /* ... push a placeholder. */
9821 args.safe_push (integer_zero_node);
9823 for (; c; c = OMP_CLAUSE_CHAIN (c))
9824 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9826 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9827 integer_type_node,
9828 OMP_CLAUSE_WAIT_EXPR (c));
9829 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9830 GSI_SAME_STMT);
9831 args.safe_push (arg);
9832 num_waits++;
9835 if (!tagging || num_waits)
9837 tree len;
9839 /* Now that we know the number, update the placeholder. */
9840 if (tagging)
9841 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9842 else
9843 len = build_int_cst (integer_type_node, num_waits);
9844 len = fold_convert_loc (gimple_location (entry_stmt),
9845 unsigned_type_node, len);
9846 args[t_wait_idx] = len;
9849 break;
9850 default:
9851 gcc_unreachable ();
9853 if (tagging)
9854 /* Push terminal marker - zero. */
9855 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9857 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9858 gimple_set_location (g, gimple_location (entry_stmt));
9859 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9860 if (!offloaded)
9862 g = gsi_stmt (gsi);
9863 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9864 gsi_remove (&gsi, true);
9868 /* Expand the parallel region tree rooted at REGION. Expansion
9869 proceeds in depth-first order. Innermost regions are expanded
9870 first. This way, parallel regions that require a new function to
9871 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9872 internal dependencies in their body. */
9874 static void
9875 expand_omp (struct omp_region *region)
9877 omp_any_child_fn_dumped = false;
9878 while (region)
9880 location_t saved_location;
9881 gimple *inner_stmt = NULL;
9883 /* First, determine whether this is a combined parallel+workshare
9884 region. */
9885 if (region->type == GIMPLE_OMP_PARALLEL)
9886 determine_parallel_type (region);
9888 if (region->type == GIMPLE_OMP_FOR
9889 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9890 inner_stmt = last_stmt (region->inner->entry);
9892 if (region->inner)
9893 expand_omp (region->inner);
9895 saved_location = input_location;
9896 if (gimple_has_location (last_stmt (region->entry)))
9897 input_location = gimple_location (last_stmt (region->entry));
9899 switch (region->type)
9901 case GIMPLE_OMP_PARALLEL:
9902 case GIMPLE_OMP_TASK:
9903 expand_omp_taskreg (region);
9904 break;
9906 case GIMPLE_OMP_FOR:
9907 expand_omp_for (region, inner_stmt);
9908 break;
9910 case GIMPLE_OMP_SECTIONS:
9911 expand_omp_sections (region);
9912 break;
9914 case GIMPLE_OMP_SECTION:
9915 /* Individual omp sections are handled together with their
9916 parent GIMPLE_OMP_SECTIONS region. */
9917 break;
9919 case GIMPLE_OMP_SINGLE:
9920 expand_omp_single (region);
9921 break;
9923 case GIMPLE_OMP_ORDERED:
9925 gomp_ordered *ord_stmt
9926 = as_a <gomp_ordered *> (last_stmt (region->entry));
9927 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9928 OMP_CLAUSE_DEPEND))
9930 /* We'll expand these when expanding corresponding
9931 worksharing region with ordered(n) clause. */
9932 gcc_assert (region->outer
9933 && region->outer->type == GIMPLE_OMP_FOR);
9934 region->ord_stmt = ord_stmt;
9935 break;
9938 /* FALLTHRU */
9939 case GIMPLE_OMP_MASTER:
9940 case GIMPLE_OMP_TASKGROUP:
9941 case GIMPLE_OMP_CRITICAL:
9942 case GIMPLE_OMP_TEAMS:
9943 expand_omp_synch (region);
9944 break;
9946 case GIMPLE_OMP_ATOMIC_LOAD:
9947 expand_omp_atomic (region);
9948 break;
9950 case GIMPLE_OMP_TARGET:
9951 expand_omp_target (region);
9952 break;
9954 default:
9955 gcc_unreachable ();
9958 input_location = saved_location;
9959 region = region->next;
9961 if (omp_any_child_fn_dumped)
9963 if (dump_file)
9964 dump_function_header (dump_file, current_function_decl, dump_flags);
9965 omp_any_child_fn_dumped = false;
9969 /* Helper for build_omp_regions. Scan the dominator tree starting at
9970 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9971 true, the function ends once a single tree is built (otherwise, whole
9972 forest of OMP constructs may be built). */
9974 static void
9975 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9976 bool single_tree)
9978 gimple_stmt_iterator gsi;
9979 gimple *stmt;
9980 basic_block son;
9982 gsi = gsi_last_nondebug_bb (bb);
9983 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9985 struct omp_region *region;
9986 enum gimple_code code;
9988 stmt = gsi_stmt (gsi);
9989 code = gimple_code (stmt);
9990 if (code == GIMPLE_OMP_RETURN)
9992 /* STMT is the return point out of region PARENT. Mark it
9993 as the exit point and make PARENT the immediately
9994 enclosing region. */
9995 gcc_assert (parent);
9996 region = parent;
9997 region->exit = bb;
9998 parent = parent->outer;
10000 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10002 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10003 GIMPLE_OMP_RETURN, but matches with
10004 GIMPLE_OMP_ATOMIC_LOAD. */
10005 gcc_assert (parent);
10006 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10007 region = parent;
10008 region->exit = bb;
10009 parent = parent->outer;
10011 else if (code == GIMPLE_OMP_CONTINUE)
10013 gcc_assert (parent);
10014 parent->cont = bb;
10016 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10018 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10019 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10021 else
10023 region = new_omp_region (bb, code, parent);
10024 /* Otherwise... */
10025 if (code == GIMPLE_OMP_TARGET)
10027 switch (gimple_omp_target_kind (stmt))
10029 case GF_OMP_TARGET_KIND_REGION:
10030 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10031 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10032 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10033 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10034 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10035 break;
10036 case GF_OMP_TARGET_KIND_UPDATE:
10037 case GF_OMP_TARGET_KIND_ENTER_DATA:
10038 case GF_OMP_TARGET_KIND_EXIT_DATA:
10039 case GF_OMP_TARGET_KIND_DATA:
10040 case GF_OMP_TARGET_KIND_OACC_DATA:
10041 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10042 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10043 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10044 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10045 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10046 /* ..., other than for those stand-alone directives... */
10047 region = NULL;
10048 break;
10049 default:
10050 gcc_unreachable ();
10053 else if (code == GIMPLE_OMP_ORDERED
10054 && omp_find_clause (gimple_omp_ordered_clauses
10055 (as_a <gomp_ordered *> (stmt)),
10056 OMP_CLAUSE_DEPEND))
10057 /* #pragma omp ordered depend is also just a stand-alone
10058 directive. */
10059 region = NULL;
10060 else if (code == GIMPLE_OMP_TASK
10061 && gimple_omp_task_taskwait_p (stmt))
10062 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10063 region = NULL;
10064 /* ..., this directive becomes the parent for a new region. */
10065 if (region)
10066 parent = region;
10070 if (single_tree && !parent)
10071 return;
10073 for (son = first_dom_son (CDI_DOMINATORS, bb);
10074 son;
10075 son = next_dom_son (CDI_DOMINATORS, son))
10076 build_omp_regions_1 (son, parent, single_tree);
10079 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10080 root_omp_region. */
10082 static void
10083 build_omp_regions_root (basic_block root)
10085 gcc_assert (root_omp_region == NULL);
10086 build_omp_regions_1 (root, NULL, true);
10087 gcc_assert (root_omp_region != NULL);
10090 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10092 void
10093 omp_expand_local (basic_block head)
10095 build_omp_regions_root (head);
10096 if (dump_file && (dump_flags & TDF_DETAILS))
10098 fprintf (dump_file, "\nOMP region tree\n\n");
10099 dump_omp_region (dump_file, root_omp_region, 0);
10100 fprintf (dump_file, "\n");
10103 remove_exit_barriers (root_omp_region);
10104 expand_omp (root_omp_region);
10106 omp_free_regions ();
10109 /* Scan the CFG and build a tree of OMP regions. Return the root of
10110 the OMP region tree. */
10112 static void
10113 build_omp_regions (void)
10115 gcc_assert (root_omp_region == NULL);
10116 calculate_dominance_info (CDI_DOMINATORS);
10117 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10120 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10122 static unsigned int
10123 execute_expand_omp (void)
10125 build_omp_regions ();
10127 if (!root_omp_region)
10128 return 0;
10130 if (dump_file)
10132 fprintf (dump_file, "\nOMP region tree\n\n");
10133 dump_omp_region (dump_file, root_omp_region, 0);
10134 fprintf (dump_file, "\n");
10137 remove_exit_barriers (root_omp_region);
10139 expand_omp (root_omp_region);
10141 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10142 verify_loop_structure ();
10143 cleanup_tree_cfg ();
10145 omp_free_regions ();
10147 return 0;
10150 /* OMP expansion -- the default pass, run before creation of SSA form. */
10152 namespace {
10154 const pass_data pass_data_expand_omp =
10156 GIMPLE_PASS, /* type */
10157 "ompexp", /* name */
10158 OPTGROUP_OMP, /* optinfo_flags */
10159 TV_NONE, /* tv_id */
10160 PROP_gimple_any, /* properties_required */
10161 PROP_gimple_eomp, /* properties_provided */
10162 0, /* properties_destroyed */
10163 0, /* todo_flags_start */
10164 0, /* todo_flags_finish */
10167 class pass_expand_omp : public gimple_opt_pass
10169 public:
10170 pass_expand_omp (gcc::context *ctxt)
10171 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10174 /* opt_pass methods: */
10175 virtual unsigned int execute (function *)
10177 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10178 || flag_openmp_simd != 0)
10179 && !seen_error ());
10181 /* This pass always runs, to provide PROP_gimple_eomp.
10182 But often, there is nothing to do. */
10183 if (!gate)
10184 return 0;
10186 return execute_expand_omp ();
10189 }; // class pass_expand_omp
10191 } // anon namespace
10193 gimple_opt_pass *
10194 make_pass_expand_omp (gcc::context *ctxt)
10196 return new pass_expand_omp (ctxt);
10199 namespace {
10201 const pass_data pass_data_expand_omp_ssa =
10203 GIMPLE_PASS, /* type */
10204 "ompexpssa", /* name */
10205 OPTGROUP_OMP, /* optinfo_flags */
10206 TV_NONE, /* tv_id */
10207 PROP_cfg | PROP_ssa, /* properties_required */
10208 PROP_gimple_eomp, /* properties_provided */
10209 0, /* properties_destroyed */
10210 0, /* todo_flags_start */
10211 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10214 class pass_expand_omp_ssa : public gimple_opt_pass
10216 public:
10217 pass_expand_omp_ssa (gcc::context *ctxt)
10218 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10221 /* opt_pass methods: */
10222 virtual bool gate (function *fun)
10224 return !(fun->curr_properties & PROP_gimple_eomp);
10226 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10227 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10229 }; // class pass_expand_omp_ssa
10231 } // anon namespace
10233 gimple_opt_pass *
10234 make_pass_expand_omp_ssa (gcc::context *ctxt)
10236 return new pass_expand_omp_ssa (ctxt);
10239 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10240 GIMPLE_* codes. */
10242 bool
10243 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10244 int *region_idx)
10246 gimple *last = last_stmt (bb);
10247 enum gimple_code code = gimple_code (last);
10248 struct omp_region *cur_region = *region;
10249 bool fallthru = false;
10251 switch (code)
10253 case GIMPLE_OMP_PARALLEL:
10254 case GIMPLE_OMP_FOR:
10255 case GIMPLE_OMP_SINGLE:
10256 case GIMPLE_OMP_TEAMS:
10257 case GIMPLE_OMP_MASTER:
10258 case GIMPLE_OMP_TASKGROUP:
10259 case GIMPLE_OMP_CRITICAL:
10260 case GIMPLE_OMP_SECTION:
10261 cur_region = new_omp_region (bb, code, cur_region);
10262 fallthru = true;
10263 break;
10265 case GIMPLE_OMP_TASK:
10266 cur_region = new_omp_region (bb, code, cur_region);
10267 fallthru = true;
10268 if (gimple_omp_task_taskwait_p (last))
10269 cur_region = cur_region->outer;
10270 break;
10272 case GIMPLE_OMP_ORDERED:
10273 cur_region = new_omp_region (bb, code, cur_region);
10274 fallthru = true;
10275 if (omp_find_clause (gimple_omp_ordered_clauses
10276 (as_a <gomp_ordered *> (last)),
10277 OMP_CLAUSE_DEPEND))
10278 cur_region = cur_region->outer;
10279 break;
10281 case GIMPLE_OMP_TARGET:
10282 cur_region = new_omp_region (bb, code, cur_region);
10283 fallthru = true;
10284 switch (gimple_omp_target_kind (last))
10286 case GF_OMP_TARGET_KIND_REGION:
10287 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10288 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10289 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10290 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10291 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10292 break;
10293 case GF_OMP_TARGET_KIND_UPDATE:
10294 case GF_OMP_TARGET_KIND_ENTER_DATA:
10295 case GF_OMP_TARGET_KIND_EXIT_DATA:
10296 case GF_OMP_TARGET_KIND_DATA:
10297 case GF_OMP_TARGET_KIND_OACC_DATA:
10298 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10299 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10300 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10301 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10302 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10303 cur_region = cur_region->outer;
10304 break;
10305 default:
10306 gcc_unreachable ();
10308 break;
10310 case GIMPLE_OMP_SECTIONS:
10311 cur_region = new_omp_region (bb, code, cur_region);
10312 fallthru = true;
10313 break;
10315 case GIMPLE_OMP_SECTIONS_SWITCH:
10316 fallthru = false;
10317 break;
10319 case GIMPLE_OMP_ATOMIC_LOAD:
10320 case GIMPLE_OMP_ATOMIC_STORE:
10321 fallthru = true;
10322 break;
10324 case GIMPLE_OMP_RETURN:
10325 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10326 somewhere other than the next block. This will be
10327 created later. */
10328 cur_region->exit = bb;
10329 if (cur_region->type == GIMPLE_OMP_TASK)
10330 /* Add an edge corresponding to not scheduling the task
10331 immediately. */
10332 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10333 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10334 cur_region = cur_region->outer;
10335 break;
10337 case GIMPLE_OMP_CONTINUE:
10338 cur_region->cont = bb;
10339 switch (cur_region->type)
10341 case GIMPLE_OMP_FOR:
10342 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10343 succs edges as abnormal to prevent splitting
10344 them. */
10345 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10346 /* Make the loopback edge. */
10347 make_edge (bb, single_succ (cur_region->entry),
10348 EDGE_ABNORMAL);
10350 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10351 corresponds to the case that the body of the loop
10352 is not executed at all. */
10353 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10354 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10355 fallthru = false;
10356 break;
10358 case GIMPLE_OMP_SECTIONS:
10359 /* Wire up the edges into and out of the nested sections. */
10361 basic_block switch_bb = single_succ (cur_region->entry);
10363 struct omp_region *i;
10364 for (i = cur_region->inner; i ; i = i->next)
10366 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10367 make_edge (switch_bb, i->entry, 0);
10368 make_edge (i->exit, bb, EDGE_FALLTHRU);
10371 /* Make the loopback edge to the block with
10372 GIMPLE_OMP_SECTIONS_SWITCH. */
10373 make_edge (bb, switch_bb, 0);
10375 /* Make the edge from the switch to exit. */
10376 make_edge (switch_bb, bb->next_bb, 0);
10377 fallthru = false;
10379 break;
10381 case GIMPLE_OMP_TASK:
10382 fallthru = true;
10383 break;
10385 default:
10386 gcc_unreachable ();
10388 break;
10390 default:
10391 gcc_unreachable ();
10394 if (*region != cur_region)
10396 *region = cur_region;
10397 if (cur_region)
10398 *region_idx = cur_region->entry->index;
10399 else
10400 *region_idx = 0;
10403 return fallthru;