Daily bump.
[official-gcc.git] / gcc / omp-expand.c
blob8f1286e3176d02670d9c1901b22baad49b1da4ec
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
120 /* Return true if REGION is a combined parallel+workshare region. */
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
125 return region->is_combined_parallel;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
142 Is lowered into:
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
201 return true;
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 struct omp_for_data fd;
238 tree n1, n2;
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
244 if (gimple_omp_for_combined_into_p (for_stmt))
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
268 if (fd.chunk_size)
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
275 return ws_args;
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
289 gcc_unreachable ();
292 /* Discover whether REGION is a combined parallel+workshare region. */
294 static void
295 determine_parallel_type (struct omp_region *region)
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
335 if (region->inner->type == GIMPLE_OMP_FOR)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
386 if (region->cont)
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
405 dump_omp_region (stderr, region, 0);
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr, root_omp_region, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
420 struct omp_region *region = XCNEW (struct omp_region);
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
426 if (parent)
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
433 else
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
441 return region;
444 /* Release the memory associated with the region tree rooted at REGION. */
446 static void
447 free_omp_region_1 (struct omp_region *region)
449 struct omp_region *i, *n;
451 for (i = region->inner; i ; i = n)
453 n = i->next;
454 free_omp_region_1 (i);
457 free (region);
460 /* Release the memory for the entire omp region tree. */
462 void
463 omp_free_regions (void)
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
468 n = r->next;
469 free_omp_region_1 (r);
471 root_omp_region = NULL;
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
477 static gcond *
478 gimple_build_cond_empty (tree cond)
480 enum tree_code pred_code;
481 tree lhs, rhs;
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
570 switch (region->inner->type)
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
631 else
632 clause_loc = gimple_location (entry_stmt);
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
645 cond = gimple_boolify (cond);
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
664 else
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 if (gimple_in_ssa_p (cfun))
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 val = tmp_join;
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
735 if (rtmp)
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 unsigned int iflags
767 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
768 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
769 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
771 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
772 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
773 tree num_tasks = NULL_TREE;
774 bool ull = false;
775 if (taskloop_p)
777 gimple *g = last_stmt (region->outer->entry);
778 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
780 struct omp_for_data fd;
781 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
782 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
783 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
784 OMP_CLAUSE__LOOPTEMP_);
785 startvar = OMP_CLAUSE_DECL (startvar);
786 endvar = OMP_CLAUSE_DECL (endvar);
787 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
788 if (fd.loop.cond_code == LT_EXPR)
789 iflags |= GOMP_TASK_FLAG_UP;
790 tree tclauses = gimple_omp_for_clauses (g);
791 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
792 if (num_tasks)
793 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
794 else
796 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
797 if (num_tasks)
799 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
800 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
802 else
803 num_tasks = integer_zero_node;
805 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
806 if (ifc == NULL_TREE)
807 iflags |= GOMP_TASK_FLAG_IF;
808 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
809 iflags |= GOMP_TASK_FLAG_NOGROUP;
810 ull = fd.iter_type == long_long_unsigned_type_node;
811 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
812 iflags |= GOMP_TASK_FLAG_REDUCTION;
814 else if (priority)
815 iflags |= GOMP_TASK_FLAG_PRIORITY;
817 tree flags = build_int_cst (unsigned_type_node, iflags);
819 tree cond = boolean_true_node;
820 if (ifc)
822 if (taskloop_p)
824 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
825 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
826 build_int_cst (unsigned_type_node,
827 GOMP_TASK_FLAG_IF),
828 build_int_cst (unsigned_type_node, 0));
829 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
830 flags, t);
832 else
833 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
836 if (finalc)
838 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_FINAL),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
845 if (depend)
846 depend = OMP_CLAUSE_DECL (depend);
847 else
848 depend = build_int_cst (ptr_type_node, 0);
849 if (priority)
850 priority = fold_convert (integer_type_node,
851 OMP_CLAUSE_PRIORITY_EXPR (priority));
852 else
853 priority = integer_zero_node;
855 gsi = gsi_last_nondebug_bb (bb);
856 tree t = gimple_omp_task_data_arg (entry_stmt);
857 if (t == NULL)
858 t2 = null_pointer_node;
859 else
860 t2 = build_fold_addr_expr_loc (loc, t);
861 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
862 t = gimple_omp_task_copy_fn (entry_stmt);
863 if (t == NULL)
864 t3 = null_pointer_node;
865 else
866 t3 = build_fold_addr_expr_loc (loc, t);
868 if (taskloop_p)
869 t = build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
872 11, t1, t2, t3,
873 gimple_omp_task_arg_size (entry_stmt),
874 gimple_omp_task_arg_align (entry_stmt), flags,
875 num_tasks, priority, startvar, endvar, step);
876 else
877 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
878 9, t1, t2, t3,
879 gimple_omp_task_arg_size (entry_stmt),
880 gimple_omp_task_arg_align (entry_stmt), cond, flags,
881 depend, priority);
883 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
889 code. */
891 static void
892 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
894 tree clauses = gimple_omp_task_clauses (entry_stmt);
895 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
896 if (depend == NULL_TREE)
897 return;
899 depend = OMP_CLAUSE_DECL (depend);
901 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
902 tree t
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
904 1, depend);
906 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
907 false, GSI_CONTINUE_LINKING);
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
914 static void
915 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
917 tree clauses = gimple_omp_teams_clauses (entry_stmt);
918 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
919 if (num_teams == NULL_TREE)
920 num_teams = build_int_cst (unsigned_type_node, 0);
921 else
923 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
924 num_teams = fold_convert (unsigned_type_node, num_teams);
926 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
927 if (thread_limit == NULL_TREE)
928 thread_limit = build_int_cst (unsigned_type_node, 0);
929 else
931 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
932 thread_limit = fold_convert (unsigned_type_node, thread_limit);
935 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
936 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
937 if (t == NULL)
938 t1 = null_pointer_node;
939 else
940 t1 = build_fold_addr_expr (t);
941 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
942 tree t2 = build_fold_addr_expr (child_fndecl);
944 vec<tree, va_gc> *args;
945 vec_alloc (args, 5);
946 args->quick_push (t2);
947 args->quick_push (t1);
948 args->quick_push (num_teams);
949 args->quick_push (thread_limit);
950 /* For future extensibility. */
951 args->quick_push (build_zero_cst (unsigned_type_node));
953 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
955 args);
957 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
958 false, GSI_CONTINUE_LINKING);
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
963 static tree
964 vec2chain (vec<tree, va_gc> *v)
966 tree chain = NULL_TREE, t;
967 unsigned ix;
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
971 DECL_CHAIN (t) = chain;
972 chain = t;
975 return chain;
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
982 removed. */
984 static void
985 remove_exit_barrier (struct omp_region *region)
987 gimple_stmt_iterator gsi;
988 basic_block exit_bb;
989 edge_iterator ei;
990 edge e;
991 gimple *stmt;
992 int any_addressable_vars = -1;
994 exit_bb = region->exit;
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
997 block at all. */
998 if (! exit_bb)
999 return;
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi = gsi_last_nondebug_bb (exit_bb);
1007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1008 gsi_prev_nondebug (&gsi);
1009 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1010 return;
1012 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1014 gsi = gsi_last_nondebug_bb (e->src);
1015 if (gsi_end_p (gsi))
1016 continue;
1017 stmt = gsi_stmt (gsi);
1018 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt))
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars < 0)
1031 gomp_parallel *parallel_stmt
1032 = as_a <gomp_parallel *> (last_stmt (region->entry));
1033 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1034 tree local_decls, block, decl;
1035 unsigned ix;
1037 any_addressable_vars = 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1039 if (TREE_ADDRESSABLE (decl))
1041 any_addressable_vars = 1;
1042 break;
1044 for (block = gimple_block (stmt);
1045 !any_addressable_vars
1046 && block
1047 && TREE_CODE (block) == BLOCK;
1048 block = BLOCK_SUPERCONTEXT (block))
1050 for (local_decls = BLOCK_VARS (block);
1051 local_decls;
1052 local_decls = DECL_CHAIN (local_decls))
1053 if (TREE_ADDRESSABLE (local_decls))
1055 any_addressable_vars = 1;
1056 break;
1058 if (block == gimple_block (parallel_stmt))
1059 break;
1062 if (!any_addressable_vars)
1063 gimple_omp_return_set_nowait (stmt);
1068 static void
1069 remove_exit_barriers (struct omp_region *region)
1071 if (region->type == GIMPLE_OMP_PARALLEL)
1072 remove_exit_barrier (region);
1074 if (region->inner)
1076 region = region->inner;
1077 remove_exit_barriers (region);
1078 while (region->next)
1080 region = region->next;
1081 remove_exit_barriers (region);
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1094 static void
1095 optimize_omp_library_calls (gimple *entry_stmt)
1097 basic_block bb;
1098 gimple_stmt_iterator gsi;
1099 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1100 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1101 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1102 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1103 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1105 OMP_CLAUSE_UNTIED) != NULL);
1107 FOR_EACH_BB_FN (bb, cfun)
1108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1110 gimple *call = gsi_stmt (gsi);
1111 tree decl;
1113 if (is_gimple_call (call)
1114 && (decl = gimple_call_fndecl (call))
1115 && DECL_EXTERNAL (decl)
1116 && TREE_PUBLIC (decl)
1117 && DECL_INITIAL (decl) == NULL)
1119 tree built_in;
1121 if (DECL_NAME (decl) == thr_num_id)
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1125 if (untied_task)
1126 continue;
1127 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1129 else if (DECL_NAME (decl) == num_thr_id)
1130 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 else
1132 continue;
1134 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1135 || gimple_call_num_args (call) != 0)
1136 continue;
1138 if (flag_exceptions && !TREE_NOTHROW (decl))
1139 continue;
1141 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1143 TREE_TYPE (TREE_TYPE (built_in))))
1144 continue;
1146 gimple_call_set_fndecl (call, built_in);
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1152 regimplified. */
1154 static tree
1155 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1157 tree t = *tp;
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1161 return t;
1163 if (TREE_CODE (t) == ADDR_EXPR)
1164 recompute_tree_invariant_for_addr_expr (t);
1166 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1167 return NULL_TREE;
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1172 static void
1173 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1174 bool after)
1176 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1177 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1178 !after, after ? GSI_CONTINUE_LINKING
1179 : GSI_SAME_STMT);
1180 gimple *stmt = gimple_build_assign (to, from);
1181 if (after)
1182 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1183 else
1184 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1185 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1186 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1189 gimple_regimplify_operands (stmt, &gsi);
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1195 static void
1196 expand_omp_taskreg (struct omp_region *region)
1198 basic_block entry_bb, exit_bb, new_bb;
1199 struct function *child_cfun;
1200 tree child_fn, block, t;
1201 gimple_stmt_iterator gsi;
1202 gimple *entry_stmt, *stmt;
1203 edge e;
1204 vec<tree, va_gc> *ws_args;
1206 entry_stmt = last_stmt (region->entry);
1207 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt))
1210 new_bb = region->entry;
1211 gsi = gsi_last_nondebug_bb (region->entry);
1212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1213 gsi_remove (&gsi, true);
1214 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1215 return;
1218 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1219 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1221 entry_bb = region->entry;
1222 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1223 exit_bb = region->cont;
1224 else
1225 exit_bb = region->exit;
1227 if (is_combined_parallel (region))
1228 ws_args = region->ws_args;
1229 else
1230 ws_args = NULL;
1232 if (child_cfun->cfg)
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e, exit_succ_e;
1239 entry_succ_e = single_succ_edge (entry_bb);
1241 gsi = gsi_last_nondebug_bb (entry_bb);
1242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1245 gsi_remove (&gsi, true);
1247 new_bb = entry_bb;
1248 if (exit_bb)
1250 exit_succ_e = single_succ_edge (exit_bb);
1251 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1253 remove_edge_and_dominated_blocks (entry_succ_e);
1255 else
1257 unsigned srcidx, dstidx, num;
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1265 function.
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt))
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1276 : FALLTHRU_EDGE (entry_bb)->dest;
1277 tree arg;
1278 gimple *parcopy_stmt = NULL;
1280 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1282 gimple *stmt;
1284 gcc_assert (!gsi_end_p (gsi));
1285 stmt = gsi_stmt (gsi);
1286 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1287 continue;
1289 if (gimple_num_ops (stmt) == 2)
1291 tree arg = gimple_assign_rhs1 (stmt);
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1296 if (TREE_CODE (arg) == ADDR_EXPR
1297 && (TREE_OPERAND (arg, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt)))
1300 parcopy_stmt = stmt;
1301 break;
1306 gcc_assert (parcopy_stmt != NULL);
1307 arg = DECL_ARGUMENTS (child_fn);
1309 if (!gimple_in_ssa_p (cfun))
1311 if (gimple_assign_lhs (parcopy_stmt) == arg)
1312 gsi_remove (&gsi, true);
1313 else
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1317 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1320 else
1322 tree lhs = gimple_assign_lhs (parcopy_stmt);
1323 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 update_stmt (parcopy_stmt);
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block = DECL_INITIAL (child_fn);
1337 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1342 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1343 varpool_node::finalize_decl (t);
1344 DECL_SAVED_TREE (child_fn) = NULL;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn, NULL);
1347 TREE_USED (block) = 1;
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1351 DECL_CONTEXT (t) = child_fn;
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi = gsi_last_nondebug_bb (entry_bb);
1356 stmt = gsi_stmt (gsi);
1357 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1360 e = split_block (entry_bb, stmt);
1361 gsi_remove (&gsi, true);
1362 entry_bb = e->dest;
1363 edge e2 = NULL;
1364 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1365 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1366 else
1368 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1369 gcc_assert (e2->dest == region->exit);
1370 remove_edge (BRANCH_EDGE (entry_bb));
1371 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1372 gsi = gsi_last_nondebug_bb (region->exit);
1373 gcc_assert (!gsi_end_p (gsi)
1374 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1375 gsi_remove (&gsi, true);
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1379 if (exit_bb)
1381 gsi = gsi_last_nondebug_bb (exit_bb);
1382 gcc_assert (!gsi_end_p (gsi)
1383 && (gimple_code (gsi_stmt (gsi))
1384 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1385 stmt = gimple_build_return (NULL);
1386 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1387 gsi_remove (&gsi, true);
1390 /* Move the parallel region into CHILD_CFUN. */
1392 if (gimple_in_ssa_p (cfun))
1394 init_tree_ssa (child_cfun);
1395 init_ssa_operands (child_cfun);
1396 child_cfun->gimple_df->in_ssa_p = true;
1397 block = NULL_TREE;
1399 else
1400 block = gimple_block (entry_stmt);
1402 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1403 if (exit_bb)
1404 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1405 if (e2)
1407 basic_block dest_bb = e2->dest;
1408 if (!exit_bb)
1409 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1410 remove_edge (e2);
1411 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1416 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num = vec_safe_length (child_cfun->local_decls);
1420 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1422 t = (*child_cfun->local_decls)[srcidx];
1423 if (DECL_CONTEXT (t) == cfun->decl)
1424 continue;
1425 if (srcidx != dstidx)
1426 (*child_cfun->local_decls)[dstidx] = t;
1427 dstidx++;
1429 if (dstidx != num)
1430 vec_safe_truncate (child_cfun->local_decls, dstidx);
1432 /* Inform the callgraph about the new function. */
1433 child_cfun->curr_properties = cfun->curr_properties;
1434 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1435 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1436 cgraph_node *node = cgraph_node::get_create (child_fn);
1437 node->parallelized_function = 1;
1438 cgraph_node::add_new_function (child_fn, true);
1440 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun);
1446 if (need_asm)
1447 assign_assembler_name_if_needed (child_fn);
1449 if (optimize)
1450 optimize_omp_library_calls (entry_stmt);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions)
1460 basic_block bb;
1461 bool changed = false;
1463 FOR_EACH_BB_FN (bb, cfun)
1464 changed |= gimple_purge_dead_eh_edges (bb);
1465 if (changed)
1466 cleanup_tree_cfg ();
1468 if (gimple_in_ssa_p (cfun))
1469 update_ssa (TODO_update_ssa);
1470 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1471 verify_loop_structure ();
1472 pop_cfun ();
1474 if (dump_file && !gimple_in_ssa_p (cfun))
1476 omp_any_child_fn_dumped = true;
1477 dump_function_header (dump_file, child_fn, dump_flags);
1478 dump_function_to_file (child_fn, dump_file, dump_flags);
1482 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1484 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1485 expand_parallel_call (region, new_bb,
1486 as_a <gomp_parallel *> (entry_stmt), ws_args);
1487 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1488 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1489 else
1490 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1491 if (gimple_in_ssa_p (cfun))
1492 update_ssa (TODO_update_ssa_only_virtuals);
1495 /* Information about members of an OpenACC collapsed loop nest. */
1497 struct oacc_collapse
1499 tree base; /* Base value. */
1500 tree iters; /* Number of steps. */
1501 tree step; /* Step size. */
1502 tree tile; /* Tile increment (if tiled). */
1503 tree outer; /* Tile iterator var. */
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1510 static tree
1511 expand_oacc_collapse_init (const struct omp_for_data *fd,
1512 gimple_stmt_iterator *gsi,
1513 oacc_collapse *counts, tree bound_type,
1514 location_t loc)
1516 tree tiling = fd->tiling;
1517 tree total = build_int_cst (bound_type, 1);
1518 int ix;
1520 gcc_assert (integer_onep (fd->loop.step));
1521 gcc_assert (integer_zerop (fd->loop.n1));
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix = fd->collapse; ix--;)
1528 const omp_for_data_loop *loop = &fd->loops[ix];
1530 tree iter_type = TREE_TYPE (loop->v);
1531 tree diff_type = iter_type;
1532 tree plus_type = iter_type;
1534 gcc_assert (loop->cond_code == fd->loop.cond_code);
1536 if (POINTER_TYPE_P (iter_type))
1537 plus_type = sizetype;
1538 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1539 diff_type = signed_type_for (diff_type);
1540 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1541 diff_type = integer_type_node;
1543 if (tiling)
1545 tree num = build_int_cst (integer_type_node, fd->collapse);
1546 tree loop_no = build_int_cst (integer_type_node, ix);
1547 tree tile = TREE_VALUE (tiling);
1548 gcall *call
1549 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1550 /* gwv-outer=*/integer_zero_node,
1551 /* gwv-inner=*/integer_zero_node);
1553 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1554 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1555 gimple_call_set_lhs (call, counts[ix].tile);
1556 gimple_set_location (call, loc);
1557 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1559 tiling = TREE_CHAIN (tiling);
1561 else
1563 counts[ix].tile = NULL;
1564 counts[ix].outer = loop->v;
1567 tree b = loop->n1;
1568 tree e = loop->n2;
1569 tree s = loop->step;
1570 bool up = loop->cond_code == LT_EXPR;
1571 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1572 bool negating;
1573 tree expr;
1575 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1576 true, GSI_SAME_STMT);
1577 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1578 true, GSI_SAME_STMT);
1580 /* Convert the step, avoiding possible unsigned->signed overflow. */
1581 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1582 if (negating)
1583 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1584 s = fold_convert (diff_type, s);
1585 if (negating)
1586 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1587 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1590 /* Determine the range, avoiding possible unsigned->signed overflow. */
1591 negating = !up && TYPE_UNSIGNED (iter_type);
1592 expr = fold_build2 (MINUS_EXPR, plus_type,
1593 fold_convert (plus_type, negating ? b : e),
1594 fold_convert (plus_type, negating ? e : b));
1595 expr = fold_convert (diff_type, expr);
1596 if (negating)
1597 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1598 tree range = force_gimple_operand_gsi
1599 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1601 /* Determine number of iterations. */
1602 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1603 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1604 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1606 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1609 counts[ix].base = b;
1610 counts[ix].iters = iters;
1611 counts[ix].step = s;
1613 total = fold_build2 (MULT_EXPR, bound_type, total,
1614 fold_convert (bound_type, iters));
1617 return total;
1620 /* Emit initializers for collapsed loop members. INNER is true if
1621 this is for the element loop of a TILE. IVAR is the outer
1622 loop iteration variable, from which collapsed loop iteration values
1623 are calculated. COUNTS array has been initialized by
1624 expand_oacc_collapse_inits. */
1626 static void
1627 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1628 gimple_stmt_iterator *gsi,
1629 const oacc_collapse *counts, tree ivar)
1631 tree ivar_type = TREE_TYPE (ivar);
1633 /* The most rapidly changing iteration variable is the innermost
1634 one. */
1635 for (int ix = fd->collapse; ix--;)
1637 const omp_for_data_loop *loop = &fd->loops[ix];
1638 const oacc_collapse *collapse = &counts[ix];
1639 tree v = inner ? loop->v : collapse->outer;
1640 tree iter_type = TREE_TYPE (v);
1641 tree diff_type = TREE_TYPE (collapse->step);
1642 tree plus_type = iter_type;
1643 enum tree_code plus_code = PLUS_EXPR;
1644 tree expr;
1646 if (POINTER_TYPE_P (iter_type))
1648 plus_code = POINTER_PLUS_EXPR;
1649 plus_type = sizetype;
1652 expr = ivar;
1653 if (ix)
1655 tree mod = fold_convert (ivar_type, collapse->iters);
1656 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1657 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1658 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1659 true, GSI_SAME_STMT);
1662 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1663 collapse->step);
1664 expr = fold_build2 (plus_code, iter_type,
1665 inner ? collapse->outer : collapse->base,
1666 fold_convert (plus_type, expr));
1667 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1668 true, GSI_SAME_STMT);
1669 gassign *ass = gimple_build_assign (v, expr);
1670 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1674 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1675 of the combined collapse > 1 loop constructs, generate code like:
1676 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1677 if (cond3 is <)
1678 adj = STEP3 - 1;
1679 else
1680 adj = STEP3 + 1;
1681 count3 = (adj + N32 - N31) / STEP3;
1682 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1683 if (cond2 is <)
1684 adj = STEP2 - 1;
1685 else
1686 adj = STEP2 + 1;
1687 count2 = (adj + N22 - N21) / STEP2;
1688 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1689 if (cond1 is <)
1690 adj = STEP1 - 1;
1691 else
1692 adj = STEP1 + 1;
1693 count1 = (adj + N12 - N11) / STEP1;
1694 count = count1 * count2 * count3;
1695 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1696 count = 0;
1697 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1698 of the combined loop constructs, just initialize COUNTS array
1699 from the _looptemp_ clauses. For loop nests with non-rectangular
1700 loops, do this only for the rectangular loops. Then pick
1701 the loops which reference outer vars in their bound expressions
1702 and the loops which they refer to and for this sub-nest compute
1703 number of iterations. For triangular loops use Faulhaber's formula
1704 (TBD.), otherwise as a fallback, compute by iterating the loops.
1705 If e.g. the sub-nest is
1706 for (I = N11; I COND1 N12; I += STEP1)
1707 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1708 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1710 COUNT = 0;
1711 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1712 for (tmpj = M21 * tmpi + N21;
1713 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1715 int tmpk1 = M31 * tmpj + N31;
1716 int tmpk2 = M32 * tmpj + N32;
1717 if (tmpk1 COND3 tmpk2)
1719 if (COND3 is <)
1720 adj = STEP3 - 1;
1721 else
1722 adj = STEP3 + 1;
1723 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1726 and finally multiply the counts of the rectangular loops not
1727 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1728 store number of iterations of the loops from fd->first_nonrect
1729 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1730 by the counts of rectangular loops not referenced in any non-rectangular
1731 loops sandwitched in between those. */
1733 /* NOTE: It *could* be better to moosh all of the BBs together,
1734 creating one larger BB with all the computation and the unexpected
1735 jump at the end. I.e.
1737 bool zero3, zero2, zero1, zero;
1739 zero3 = N32 c3 N31;
1740 count3 = (N32 - N31) /[cl] STEP3;
1741 zero2 = N22 c2 N21;
1742 count2 = (N22 - N21) /[cl] STEP2;
1743 zero1 = N12 c1 N11;
1744 count1 = (N12 - N11) /[cl] STEP1;
1745 zero = zero3 || zero2 || zero1;
1746 count = count1 * count2 * count3;
1747 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749 After all, we expect the zero=false, and thus we expect to have to
1750 evaluate all of the comparison expressions, so short-circuiting
1751 oughtn't be a win. Since the condition isn't protecting a
1752 denominator, we're not concerned about divide-by-zero, so we can
1753 fully evaluate count even if a numerator turned out to be wrong.
1755 It seems like putting this all together would create much better
1756 scheduling opportunities, and less pressure on the chip's branch
1757 predictor. */
1759 static void
1760 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1761 basic_block &entry_bb, tree *counts,
1762 basic_block &zero_iter1_bb, int &first_zero_iter1,
1763 basic_block &zero_iter2_bb, int &first_zero_iter2,
1764 basic_block &l2_dom_bb)
1766 tree t, type = TREE_TYPE (fd->loop.v);
1767 edge e, ne;
1768 int i;
1770 /* Collapsed loops need work for expansion into SSA form. */
1771 gcc_assert (!gimple_in_ssa_p (cfun));
1773 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1774 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1776 gcc_assert (fd->ordered == 0);
1777 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1778 isn't supposed to be handled, as the inner loop doesn't
1779 use it. */
1780 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1781 OMP_CLAUSE__LOOPTEMP_);
1782 gcc_assert (innerc);
1783 for (i = 0; i < fd->collapse; i++)
1785 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 if (i)
1789 counts[i] = OMP_CLAUSE_DECL (innerc);
1790 else
1791 counts[0] = NULL_TREE;
1793 return;
1796 for (i = fd->collapse; i < fd->ordered; i++)
1798 tree itype = TREE_TYPE (fd->loops[i].v);
1799 counts[i] = NULL_TREE;
1800 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1801 fold_convert (itype, fd->loops[i].n1),
1802 fold_convert (itype, fd->loops[i].n2));
1803 if (t && integer_zerop (t))
1805 for (i = fd->collapse; i < fd->ordered; i++)
1806 counts[i] = build_int_cst (type, 0);
1807 break;
1810 bool rect_count_seen = false;
1811 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1813 tree itype = TREE_TYPE (fd->loops[i].v);
1815 if (i >= fd->collapse && counts[i])
1816 continue;
1817 if (fd->non_rect)
1819 /* Skip loops that use outer iterators in their expressions
1820 during this phase. */
1821 if (fd->loops[i].m1 || fd->loops[i].m2)
1823 counts[i] = build_zero_cst (type);
1824 continue;
1827 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1828 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1829 fold_convert (itype, fd->loops[i].n1),
1830 fold_convert (itype, fd->loops[i].n2)))
1831 == NULL_TREE || !integer_onep (t)))
1833 gcond *cond_stmt;
1834 tree n1, n2;
1835 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1836 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1837 true, GSI_SAME_STMT);
1838 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1839 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1840 true, GSI_SAME_STMT);
1841 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1842 NULL_TREE, NULL_TREE);
1843 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1844 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1845 expand_omp_regimplify_p, NULL, NULL)
1846 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1847 expand_omp_regimplify_p, NULL, NULL))
1849 *gsi = gsi_for_stmt (cond_stmt);
1850 gimple_regimplify_operands (cond_stmt, gsi);
1852 e = split_block (entry_bb, cond_stmt);
1853 basic_block &zero_iter_bb
1854 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1855 int &first_zero_iter
1856 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1857 if (zero_iter_bb == NULL)
1859 gassign *assign_stmt;
1860 first_zero_iter = i;
1861 zero_iter_bb = create_empty_bb (entry_bb);
1862 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1863 *gsi = gsi_after_labels (zero_iter_bb);
1864 if (i < fd->collapse)
1865 assign_stmt = gimple_build_assign (fd->loop.n2,
1866 build_zero_cst (type));
1867 else
1869 counts[i] = create_tmp_reg (type, ".count");
1870 assign_stmt
1871 = gimple_build_assign (counts[i], build_zero_cst (type));
1873 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1874 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1875 entry_bb);
1877 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1878 ne->probability = profile_probability::very_unlikely ();
1879 e->flags = EDGE_TRUE_VALUE;
1880 e->probability = ne->probability.invert ();
1881 if (l2_dom_bb == NULL)
1882 l2_dom_bb = entry_bb;
1883 entry_bb = e->dest;
1884 *gsi = gsi_last_nondebug_bb (entry_bb);
1887 if (POINTER_TYPE_P (itype))
1888 itype = signed_type_for (itype);
1889 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1890 ? -1 : 1));
1891 t = fold_build2 (PLUS_EXPR, itype,
1892 fold_convert (itype, fd->loops[i].step), t);
1893 t = fold_build2 (PLUS_EXPR, itype, t,
1894 fold_convert (itype, fd->loops[i].n2));
1895 t = fold_build2 (MINUS_EXPR, itype, t,
1896 fold_convert (itype, fd->loops[i].n1));
1897 /* ?? We could probably use CEIL_DIV_EXPR instead of
1898 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1899 generate the same code in the end because generically we
1900 don't know that the values involved must be negative for
1901 GT?? */
1902 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1903 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1904 fold_build1 (NEGATE_EXPR, itype, t),
1905 fold_build1 (NEGATE_EXPR, itype,
1906 fold_convert (itype,
1907 fd->loops[i].step)));
1908 else
1909 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1910 fold_convert (itype, fd->loops[i].step));
1911 t = fold_convert (type, t);
1912 if (TREE_CODE (t) == INTEGER_CST)
1913 counts[i] = t;
1914 else
1916 if (i < fd->collapse || i != first_zero_iter2)
1917 counts[i] = create_tmp_reg (type, ".count");
1918 expand_omp_build_assign (gsi, counts[i], t);
1920 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1922 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1923 continue;
1924 if (!rect_count_seen)
1926 t = counts[i];
1927 rect_count_seen = true;
1929 else
1930 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1931 expand_omp_build_assign (gsi, fd->loop.n2, t);
1934 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1936 gcc_assert (fd->last_nonrect != -1);
1938 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1939 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1940 build_zero_cst (type));
1941 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1942 if (fd->loops[i].m1
1943 || fd->loops[i].m2
1944 || fd->loops[i].non_rect_referenced)
1945 break;
1946 if (i == fd->last_nonrect
1947 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1948 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1950 int o = fd->first_nonrect;
1951 tree itype = TREE_TYPE (fd->loops[o].v);
1952 tree n1o = create_tmp_reg (itype, ".n1o");
1953 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1954 expand_omp_build_assign (gsi, n1o, t);
1955 tree n2o = create_tmp_reg (itype, ".n2o");
1956 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1957 expand_omp_build_assign (gsi, n2o, t);
1958 if (fd->loops[i].m1 && fd->loops[i].m2)
1959 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1960 unshare_expr (fd->loops[i].m1));
1961 else if (fd->loops[i].m1)
1962 t = fold_unary (NEGATE_EXPR, itype,
1963 unshare_expr (fd->loops[i].m1));
1964 else
1965 t = unshare_expr (fd->loops[i].m2);
1966 tree m2minusm1
1967 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1968 true, GSI_SAME_STMT);
1970 gimple_stmt_iterator gsi2 = *gsi;
1971 gsi_prev (&gsi2);
1972 e = split_block (entry_bb, gsi_stmt (gsi2));
1973 e = split_block (e->dest, (gimple *) NULL);
1974 basic_block bb1 = e->src;
1975 entry_bb = e->dest;
1976 *gsi = gsi_after_labels (entry_bb);
1978 gsi2 = gsi_after_labels (bb1);
1979 tree ostep = fold_convert (itype, fd->loops[o].step);
1980 t = build_int_cst (itype, (fd->loops[o].cond_code
1981 == LT_EXPR ? -1 : 1));
1982 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
1983 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
1984 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
1985 if (TYPE_UNSIGNED (itype)
1986 && fd->loops[o].cond_code == GT_EXPR)
1987 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1988 fold_build1 (NEGATE_EXPR, itype, t),
1989 fold_build1 (NEGATE_EXPR, itype, ostep));
1990 else
1991 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
1992 tree outer_niters
1993 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
1994 true, GSI_SAME_STMT);
1995 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
1996 build_one_cst (itype));
1997 t = fold_build2 (MULT_EXPR, itype, t, ostep);
1998 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
1999 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2000 true, GSI_SAME_STMT);
2001 tree n1, n2, n1e, n2e;
2002 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2003 if (fd->loops[i].m1)
2005 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2006 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2007 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2009 else
2010 n1 = t;
2011 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2013 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2014 if (fd->loops[i].m2)
2016 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2017 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2018 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2020 else
2021 n2 = t;
2022 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2025 if (fd->loops[i].m1)
2027 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2028 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2029 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2031 else
2032 n1e = t;
2033 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2034 true, GSI_SAME_STMT);
2035 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2036 if (fd->loops[i].m2)
2038 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2039 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2040 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2042 else
2043 n2e = t;
2044 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2045 true, GSI_SAME_STMT);
2046 gcond *cond_stmt
2047 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2048 NULL_TREE, NULL_TREE);
2049 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2050 e = split_block (bb1, cond_stmt);
2051 e->flags = EDGE_TRUE_VALUE;
2052 e->probability = profile_probability::likely ().guessed ();
2053 basic_block bb2 = e->dest;
2054 gsi2 = gsi_after_labels (bb2);
2056 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2057 NULL_TREE, NULL_TREE);
2058 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2059 e = split_block (bb2, cond_stmt);
2060 e->flags = EDGE_TRUE_VALUE;
2061 e->probability = profile_probability::likely ().guessed ();
2062 gsi2 = gsi_after_labels (e->dest);
2064 tree step = fold_convert (itype, fd->loops[i].step);
2065 t = build_int_cst (itype, (fd->loops[i].cond_code
2066 == LT_EXPR ? -1 : 1));
2067 t = fold_build2 (PLUS_EXPR, itype, step, t);
2068 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2069 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2070 if (TYPE_UNSIGNED (itype)
2071 && fd->loops[i].cond_code == GT_EXPR)
2072 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2073 fold_build1 (NEGATE_EXPR, itype, t),
2074 fold_build1 (NEGATE_EXPR, itype, step));
2075 else
2076 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2077 tree first_inner_iterations
2078 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2081 if (TYPE_UNSIGNED (itype)
2082 && fd->loops[i].cond_code == GT_EXPR)
2083 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2084 fold_build1 (NEGATE_EXPR, itype, t),
2085 fold_build1 (NEGATE_EXPR, itype, step));
2086 else
2087 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2088 tree factor
2089 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2090 true, GSI_SAME_STMT);
2091 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2092 build_one_cst (itype));
2093 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2094 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2095 t = fold_build2 (MULT_EXPR, itype, factor, t);
2096 t = fold_build2 (PLUS_EXPR, itype,
2097 fold_build2 (MULT_EXPR, itype, outer_niters,
2098 first_inner_iterations), t);
2099 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2100 fold_convert (type, t));
2102 basic_block bb3 = create_empty_bb (bb1);
2103 add_bb_to_loop (bb3, bb1->loop_father);
2105 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2106 e->probability = profile_probability::unlikely ().guessed ();
2108 gsi2 = gsi_after_labels (bb3);
2109 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2110 NULL_TREE, NULL_TREE);
2111 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2112 e = split_block (bb3, cond_stmt);
2113 e->flags = EDGE_TRUE_VALUE;
2114 e->probability = profile_probability::likely ().guessed ();
2115 basic_block bb4 = e->dest;
2117 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2118 ne->probability = e->probability.invert ();
2120 basic_block bb5 = create_empty_bb (bb2);
2121 add_bb_to_loop (bb5, bb2->loop_father);
2123 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2124 ne->probability = profile_probability::unlikely ().guessed ();
2126 for (int j = 0; j < 2; j++)
2128 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2129 t = fold_build2 (MINUS_EXPR, itype,
2130 unshare_expr (fd->loops[i].n1),
2131 unshare_expr (fd->loops[i].n2));
2132 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2133 tree tem
2134 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2135 true, GSI_SAME_STMT);
2136 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2137 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2138 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2139 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2140 true, GSI_SAME_STMT);
2141 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2142 if (fd->loops[i].m1)
2144 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2145 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2146 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2148 else
2149 n1 = t;
2150 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2151 true, GSI_SAME_STMT);
2152 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2153 if (fd->loops[i].m2)
2155 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2156 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2157 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2159 else
2160 n2 = t;
2161 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2162 true, GSI_SAME_STMT);
2163 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2165 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2166 NULL_TREE, NULL_TREE);
2167 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2168 e = split_block (gsi_bb (gsi2), cond_stmt);
2169 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2170 e->probability = profile_probability::unlikely ().guessed ();
2171 ne = make_edge (e->src, bb1,
2172 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2173 ne->probability = e->probability.invert ();
2174 gsi2 = gsi_after_labels (e->dest);
2176 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2177 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2179 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2182 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2183 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2184 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2186 if (fd->first_nonrect + 1 == fd->last_nonrect)
2188 fd->first_inner_iterations = first_inner_iterations;
2189 fd->factor = factor;
2190 fd->adjn1 = n1o;
2193 else
2195 /* Fallback implementation. Evaluate the loops with m1/m2
2196 non-NULL as well as their outer loops at runtime using temporaries
2197 instead of the original iteration variables, and in the
2198 body just bump the counter. */
2199 gimple_stmt_iterator gsi2 = *gsi;
2200 gsi_prev (&gsi2);
2201 e = split_block (entry_bb, gsi_stmt (gsi2));
2202 e = split_block (e->dest, (gimple *) NULL);
2203 basic_block cur_bb = e->src;
2204 basic_block next_bb = e->dest;
2205 entry_bb = e->dest;
2206 *gsi = gsi_after_labels (entry_bb);
2208 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2209 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2211 for (i = 0; i <= fd->last_nonrect; i++)
2213 if (fd->loops[i].m1 == NULL_TREE
2214 && fd->loops[i].m2 == NULL_TREE
2215 && !fd->loops[i].non_rect_referenced)
2216 continue;
2218 tree itype = TREE_TYPE (fd->loops[i].v);
2220 gsi2 = gsi_after_labels (cur_bb);
2221 tree n1, n2;
2222 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2223 if (fd->loops[i].m1)
2225 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2226 n1 = fold_build2 (MULT_EXPR, itype,
2227 vs[i - fd->loops[i].outer], n1);
2228 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2230 else
2231 n1 = t;
2232 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2233 true, GSI_SAME_STMT);
2234 if (i < fd->last_nonrect)
2236 vs[i] = create_tmp_reg (itype, ".it");
2237 expand_omp_build_assign (&gsi2, vs[i], n1);
2239 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2240 if (fd->loops[i].m2)
2242 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2243 n2 = fold_build2 (MULT_EXPR, itype,
2244 vs[i - fd->loops[i].outer], n2);
2245 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2247 else
2248 n2 = t;
2249 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2250 true, GSI_SAME_STMT);
2251 if (i == fd->last_nonrect)
2253 gcond *cond_stmt
2254 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2255 NULL_TREE, NULL_TREE);
2256 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2257 e = split_block (cur_bb, cond_stmt);
2258 e->flags = EDGE_TRUE_VALUE;
2259 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2260 e->probability = profile_probability::likely ().guessed ();
2261 ne->probability = e->probability.invert ();
2262 gsi2 = gsi_after_labels (e->dest);
2264 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2265 ? -1 : 1));
2266 t = fold_build2 (PLUS_EXPR, itype,
2267 fold_convert (itype, fd->loops[i].step), t);
2268 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2269 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2270 tree step = fold_convert (itype, fd->loops[i].step);
2271 if (TYPE_UNSIGNED (itype)
2272 && fd->loops[i].cond_code == GT_EXPR)
2273 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2274 fold_build1 (NEGATE_EXPR, itype, t),
2275 fold_build1 (NEGATE_EXPR, itype, step));
2276 else
2277 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2278 t = fold_convert (type, t);
2279 t = fold_build2 (PLUS_EXPR, type,
2280 counts[fd->last_nonrect], t);
2281 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2282 true, GSI_SAME_STMT);
2283 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2284 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2285 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2286 break;
2288 e = split_block (cur_bb, last_stmt (cur_bb));
2290 basic_block new_cur_bb = create_empty_bb (cur_bb);
2291 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2293 gsi2 = gsi_after_labels (e->dest);
2294 tree step = fold_convert (itype,
2295 unshare_expr (fd->loops[i].step));
2296 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2297 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2298 true, GSI_SAME_STMT);
2299 expand_omp_build_assign (&gsi2, vs[i], t);
2301 ne = split_block (e->dest, last_stmt (e->dest));
2302 gsi2 = gsi_after_labels (ne->dest);
2304 gcond *cond_stmt
2305 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2306 NULL_TREE, NULL_TREE);
2307 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2308 edge e3, e4;
2309 if (next_bb == entry_bb)
2311 e3 = find_edge (ne->dest, next_bb);
2312 e3->flags = EDGE_FALSE_VALUE;
2314 else
2315 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2316 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2317 e4->probability = profile_probability::likely ().guessed ();
2318 e3->probability = e4->probability.invert ();
2319 basic_block esrc = e->src;
2320 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2321 cur_bb = new_cur_bb;
2322 basic_block latch_bb = next_bb;
2323 next_bb = e->dest;
2324 remove_edge (e);
2325 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2326 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2327 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2330 t = NULL_TREE;
2331 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2332 if (!fd->loops[i].non_rect_referenced
2333 && fd->loops[i].m1 == NULL_TREE
2334 && fd->loops[i].m2 == NULL_TREE)
2336 if (t == NULL_TREE)
2337 t = counts[i];
2338 else
2339 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2341 if (t)
2343 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2344 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2346 if (!rect_count_seen)
2347 t = counts[fd->last_nonrect];
2348 else
2349 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2350 counts[fd->last_nonrect]);
2351 expand_omp_build_assign (gsi, fd->loop.n2, t);
2353 else if (fd->non_rect)
2355 tree t = fd->loop.n2;
2356 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2357 int non_rect_referenced = 0, non_rect = 0;
2358 for (i = 0; i < fd->collapse; i++)
2360 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2361 && !integer_zerop (counts[i]))
2362 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2363 if (fd->loops[i].non_rect_referenced)
2364 non_rect_referenced++;
2365 if (fd->loops[i].m1 || fd->loops[i].m2)
2366 non_rect++;
2368 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2369 counts[fd->last_nonrect] = t;
2373 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2374 T = V;
2375 V3 = N31 + (T % count3) * STEP3;
2376 T = T / count3;
2377 V2 = N21 + (T % count2) * STEP2;
2378 T = T / count2;
2379 V1 = N11 + T * STEP1;
2380 if this loop doesn't have an inner loop construct combined with it.
2381 If it does have an inner loop construct combined with it and the
2382 iteration count isn't known constant, store values from counts array
2383 into its _looptemp_ temporaries instead.
2384 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2385 inclusive), use the count of all those loops together, and either
2386 find quadratic etc. equation roots (TBD), or as a fallback, do:
2387 COUNT = 0;
2388 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2389 for (tmpj = M21 * tmpi + N21;
2390 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2392 int tmpk1 = M31 * tmpj + N31;
2393 int tmpk2 = M32 * tmpj + N32;
2394 if (tmpk1 COND3 tmpk2)
2396 if (COND3 is <)
2397 adj = STEP3 - 1;
2398 else
2399 adj = STEP3 + 1;
2400 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2401 if (COUNT + temp > T)
2403 V1 = tmpi;
2404 V2 = tmpj;
2405 V3 = tmpk1 + (T - COUNT) * STEP3;
2406 goto done;
2408 else
2409 COUNT += temp;
2412 done:;
2413 but for optional innermost or outermost rectangular loops that aren't
2414 referenced by other loop expressions keep doing the division/modulo. */
2416 static void
2417 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2418 tree *counts, tree *nonrect_bounds,
2419 gimple *inner_stmt, tree startvar)
2421 int i;
2422 if (gimple_omp_for_combined_p (fd->for_stmt))
2424 /* If fd->loop.n2 is constant, then no propagation of the counts
2425 is needed, they are constant. */
2426 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2427 return;
2429 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2430 ? gimple_omp_taskreg_clauses (inner_stmt)
2431 : gimple_omp_for_clauses (inner_stmt);
2432 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2433 isn't supposed to be handled, as the inner loop doesn't
2434 use it. */
2435 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2436 gcc_assert (innerc);
2437 for (i = 0; i < fd->collapse; i++)
2439 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2440 OMP_CLAUSE__LOOPTEMP_);
2441 gcc_assert (innerc);
2442 if (i)
2444 tree tem = OMP_CLAUSE_DECL (innerc);
2445 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
2446 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2447 false, GSI_CONTINUE_LINKING);
2448 gassign *stmt = gimple_build_assign (tem, t);
2449 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2452 return;
2455 tree type = TREE_TYPE (fd->loop.v);
2456 tree tem = create_tmp_reg (type, ".tem");
2457 gassign *stmt = gimple_build_assign (tem, startvar);
2458 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2460 for (i = fd->collapse - 1; i >= 0; i--)
2462 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2463 itype = vtype;
2464 if (POINTER_TYPE_P (vtype))
2465 itype = signed_type_for (vtype);
2466 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2467 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2468 else
2469 t = tem;
2470 if (i == fd->last_nonrect)
2472 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2473 false, GSI_CONTINUE_LINKING);
2474 tree stopval = t;
2475 tree idx = create_tmp_reg (type, ".count");
2476 expand_omp_build_assign (gsi, idx,
2477 build_zero_cst (type), true);
2478 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2479 if (fd->first_nonrect + 1 == fd->last_nonrect
2480 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2481 || (fd->first_inner_iterations
2482 /* For now. Later add clauses to propagate the
2483 values. */
2484 && !gimple_omp_for_combined_into_p (fd->for_stmt)))
2485 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2486 != CODE_FOR_nothing))
2488 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2489 tree itype = TREE_TYPE (fd->loops[i].v);
2490 tree first_inner_iterations = fd->first_inner_iterations;
2491 tree factor = fd->factor;
2492 gcond *cond_stmt
2493 = gimple_build_cond (NE_EXPR, factor,
2494 build_zero_cst (TREE_TYPE (factor)),
2495 NULL_TREE, NULL_TREE);
2496 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2497 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2498 basic_block bb0 = e->src;
2499 e->flags = EDGE_TRUE_VALUE;
2500 e->probability = profile_probability::likely ();
2501 bb_triang_dom = bb0;
2502 *gsi = gsi_after_labels (e->dest);
2503 tree slltype = long_long_integer_type_node;
2504 tree ulltype = long_long_unsigned_type_node;
2505 tree stopvalull = fold_convert (ulltype, stopval);
2506 stopvalull
2507 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2508 false, GSI_CONTINUE_LINKING);
2509 first_inner_iterations
2510 = fold_convert (slltype, first_inner_iterations);
2511 first_inner_iterations
2512 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2513 NULL_TREE, false,
2514 GSI_CONTINUE_LINKING);
2515 factor = fold_convert (slltype, factor);
2516 factor
2517 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2518 false, GSI_CONTINUE_LINKING);
2519 tree first_inner_iterationsd
2520 = fold_build1 (FLOAT_EXPR, double_type_node,
2521 first_inner_iterations);
2522 first_inner_iterationsd
2523 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2524 NULL_TREE, false,
2525 GSI_CONTINUE_LINKING);
2526 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2527 factor);
2528 factord = force_gimple_operand_gsi (gsi, factord, true,
2529 NULL_TREE, false,
2530 GSI_CONTINUE_LINKING);
2531 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2532 stopvalull);
2533 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2534 NULL_TREE, false,
2535 GSI_CONTINUE_LINKING);
2536 /* Temporarily disable flag_rounding_math, values will be
2537 decimal numbers divided by 2 and worst case imprecisions
2538 due to too large values ought to be caught later by the
2539 checks for fallback. */
2540 int save_flag_rounding_math = flag_rounding_math;
2541 flag_rounding_math = 0;
2542 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2543 build_real (double_type_node, dconst2));
2544 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2545 first_inner_iterationsd, t);
2546 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2547 GSI_CONTINUE_LINKING);
2548 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2549 build_real (double_type_node, dconst2));
2550 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2551 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2552 fold_build2 (MULT_EXPR, double_type_node,
2553 t3, t3));
2554 flag_rounding_math = save_flag_rounding_math;
2555 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2556 GSI_CONTINUE_LINKING);
2557 if (flag_exceptions
2558 && cfun->can_throw_non_call_exceptions
2559 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2561 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2562 build_zero_cst (double_type_node));
2563 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2564 false, GSI_CONTINUE_LINKING);
2565 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2566 boolean_false_node,
2567 NULL_TREE, NULL_TREE);
2569 else
2570 cond_stmt
2571 = gimple_build_cond (LT_EXPR, t,
2572 build_zero_cst (double_type_node),
2573 NULL_TREE, NULL_TREE);
2574 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2575 e = split_block (gsi_bb (*gsi), cond_stmt);
2576 basic_block bb1 = e->src;
2577 e->flags = EDGE_FALSE_VALUE;
2578 e->probability = profile_probability::very_likely ();
2579 *gsi = gsi_after_labels (e->dest);
2580 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2581 tree sqrtr = create_tmp_var (double_type_node);
2582 gimple_call_set_lhs (call, sqrtr);
2583 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2584 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2585 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2586 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2587 tree c = create_tmp_var (ulltype);
2588 tree d = create_tmp_var (ulltype);
2589 expand_omp_build_assign (gsi, c, t, true);
2590 t = fold_build2 (MINUS_EXPR, ulltype, c,
2591 build_one_cst (ulltype));
2592 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2593 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2594 t = fold_build2 (MULT_EXPR, ulltype,
2595 fold_convert (ulltype, fd->factor), t);
2596 tree t2
2597 = fold_build2 (MULT_EXPR, ulltype, c,
2598 fold_convert (ulltype,
2599 fd->first_inner_iterations));
2600 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2601 expand_omp_build_assign (gsi, d, t, true);
2602 t = fold_build2 (MULT_EXPR, ulltype,
2603 fold_convert (ulltype, fd->factor), c);
2604 t = fold_build2 (PLUS_EXPR, ulltype,
2605 t, fold_convert (ulltype,
2606 fd->first_inner_iterations));
2607 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2608 GSI_CONTINUE_LINKING);
2609 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2610 NULL_TREE, NULL_TREE);
2611 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2612 e = split_block (gsi_bb (*gsi), cond_stmt);
2613 basic_block bb2 = e->src;
2614 e->flags = EDGE_TRUE_VALUE;
2615 e->probability = profile_probability::very_likely ();
2616 *gsi = gsi_after_labels (e->dest);
2617 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2618 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2619 GSI_CONTINUE_LINKING);
2620 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2621 NULL_TREE, NULL_TREE);
2622 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2623 e = split_block (gsi_bb (*gsi), cond_stmt);
2624 basic_block bb3 = e->src;
2625 e->flags = EDGE_FALSE_VALUE;
2626 e->probability = profile_probability::very_likely ();
2627 *gsi = gsi_after_labels (e->dest);
2628 t = fold_convert (itype, c);
2629 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2630 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2631 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2632 GSI_CONTINUE_LINKING);
2633 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2634 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2635 t2 = fold_convert (itype, t2);
2636 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2637 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2638 if (fd->loops[i].m1)
2640 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2641 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2643 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2644 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2645 bb_triang = e->src;
2646 *gsi = gsi_after_labels (e->dest);
2647 remove_edge (e);
2648 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2649 e->probability = profile_probability::very_unlikely ();
2650 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2651 e->probability = profile_probability::very_unlikely ();
2652 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2653 e->probability = profile_probability::very_unlikely ();
2655 basic_block bb4 = create_empty_bb (bb0);
2656 add_bb_to_loop (bb4, bb0->loop_father);
2657 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2658 e->probability = profile_probability::unlikely ();
2659 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2660 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2661 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2662 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2663 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2664 counts[i], counts[i - 1]);
2665 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2666 GSI_CONTINUE_LINKING);
2667 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2668 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2669 t = fold_convert (itype, t);
2670 t2 = fold_convert (itype, t2);
2671 t = fold_build2 (MULT_EXPR, itype, t,
2672 fold_convert (itype, fd->loops[i].step));
2673 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2674 t2 = fold_build2 (MULT_EXPR, itype, t2,
2675 fold_convert (itype, fd->loops[i - 1].step));
2676 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2677 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2678 false, GSI_CONTINUE_LINKING);
2679 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2680 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2681 if (fd->loops[i].m1)
2683 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2684 fd->loops[i - 1].v);
2685 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2687 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2688 false, GSI_CONTINUE_LINKING);
2689 stmt = gimple_build_assign (fd->loops[i].v, t);
2690 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2692 /* Fallback implementation. Evaluate the loops in between
2693 (inclusive) fd->first_nonrect and fd->last_nonrect at
2694 runtime unsing temporaries instead of the original iteration
2695 variables, in the body just bump the counter and compare
2696 with the desired value. */
2697 gimple_stmt_iterator gsi2 = *gsi;
2698 basic_block entry_bb = gsi_bb (gsi2);
2699 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2700 e = split_block (e->dest, (gimple *) NULL);
2701 basic_block dom_bb = NULL;
2702 basic_block cur_bb = e->src;
2703 basic_block next_bb = e->dest;
2704 entry_bb = e->dest;
2705 *gsi = gsi_after_labels (entry_bb);
2707 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2708 tree n1 = NULL_TREE, n2 = NULL_TREE;
2709 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2711 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2713 tree itype = TREE_TYPE (fd->loops[j].v);
2714 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2715 && fd->loops[j].m2 == NULL_TREE
2716 && !fd->loops[j].non_rect_referenced);
2717 gsi2 = gsi_after_labels (cur_bb);
2718 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2719 if (fd->loops[j].m1)
2721 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2722 n1 = fold_build2 (MULT_EXPR, itype,
2723 vs[j - fd->loops[j].outer], n1);
2724 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2726 else if (rect_p)
2727 n1 = build_zero_cst (type);
2728 else
2729 n1 = t;
2730 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2731 true, GSI_SAME_STMT);
2732 if (j < fd->last_nonrect)
2734 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2735 expand_omp_build_assign (&gsi2, vs[j], n1);
2737 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2738 if (fd->loops[j].m2)
2740 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2741 n2 = fold_build2 (MULT_EXPR, itype,
2742 vs[j - fd->loops[j].outer], n2);
2743 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2745 else if (rect_p)
2746 n2 = counts[j];
2747 else
2748 n2 = t;
2749 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2750 true, GSI_SAME_STMT);
2751 if (j == fd->last_nonrect)
2753 gcond *cond_stmt
2754 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2755 NULL_TREE, NULL_TREE);
2756 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2757 e = split_block (cur_bb, cond_stmt);
2758 e->flags = EDGE_TRUE_VALUE;
2759 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2760 e->probability = profile_probability::likely ().guessed ();
2761 ne->probability = e->probability.invert ();
2762 gsi2 = gsi_after_labels (e->dest);
2764 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2765 ? -1 : 1));
2766 t = fold_build2 (PLUS_EXPR, itype,
2767 fold_convert (itype, fd->loops[j].step), t);
2768 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2769 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2770 tree step = fold_convert (itype, fd->loops[j].step);
2771 if (TYPE_UNSIGNED (itype)
2772 && fd->loops[j].cond_code == GT_EXPR)
2773 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2774 fold_build1 (NEGATE_EXPR, itype, t),
2775 fold_build1 (NEGATE_EXPR, itype, step));
2776 else
2777 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2778 t = fold_convert (type, t);
2779 t = fold_build2 (PLUS_EXPR, type, idx, t);
2780 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2781 true, GSI_SAME_STMT);
2782 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2783 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2784 cond_stmt
2785 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2786 NULL_TREE);
2787 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2788 e = split_block (gsi_bb (gsi2), cond_stmt);
2789 e->flags = EDGE_TRUE_VALUE;
2790 e->probability = profile_probability::likely ().guessed ();
2791 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2792 ne->probability = e->probability.invert ();
2793 gsi2 = gsi_after_labels (e->dest);
2794 expand_omp_build_assign (&gsi2, idx, t);
2795 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2796 break;
2798 e = split_block (cur_bb, last_stmt (cur_bb));
2800 basic_block new_cur_bb = create_empty_bb (cur_bb);
2801 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2803 gsi2 = gsi_after_labels (e->dest);
2804 if (rect_p)
2805 t = fold_build2 (PLUS_EXPR, type, vs[j],
2806 build_one_cst (type));
2807 else
2809 tree step
2810 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2811 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2813 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2814 true, GSI_SAME_STMT);
2815 expand_omp_build_assign (&gsi2, vs[j], t);
2817 edge ne = split_block (e->dest, last_stmt (e->dest));
2818 gsi2 = gsi_after_labels (ne->dest);
2820 gcond *cond_stmt;
2821 if (next_bb == entry_bb)
2822 /* No need to actually check the outermost condition. */
2823 cond_stmt
2824 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2825 boolean_true_node,
2826 NULL_TREE, NULL_TREE);
2827 else
2828 cond_stmt
2829 = gimple_build_cond (rect_p ? LT_EXPR
2830 : fd->loops[j].cond_code,
2831 vs[j], n2, NULL_TREE, NULL_TREE);
2832 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2833 edge e3, e4;
2834 if (next_bb == entry_bb)
2836 e3 = find_edge (ne->dest, next_bb);
2837 e3->flags = EDGE_FALSE_VALUE;
2838 dom_bb = ne->dest;
2840 else
2841 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2842 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2843 e4->probability = profile_probability::likely ().guessed ();
2844 e3->probability = e4->probability.invert ();
2845 basic_block esrc = e->src;
2846 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2847 cur_bb = new_cur_bb;
2848 basic_block latch_bb = next_bb;
2849 next_bb = e->dest;
2850 remove_edge (e);
2851 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2852 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2853 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2855 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2857 tree itype = TREE_TYPE (fd->loops[j].v);
2858 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2859 && fd->loops[j].m2 == NULL_TREE
2860 && !fd->loops[j].non_rect_referenced);
2861 if (j == fd->last_nonrect)
2863 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2864 t = fold_convert (itype, t);
2865 tree t2
2866 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2867 t = fold_build2 (MULT_EXPR, itype, t, t2);
2868 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2870 else if (rect_p)
2872 t = fold_convert (itype, vs[j]);
2873 t = fold_build2 (MULT_EXPR, itype, t,
2874 fold_convert (itype, fd->loops[j].step));
2875 if (POINTER_TYPE_P (vtype))
2876 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2877 else
2878 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2880 else
2881 t = vs[j];
2882 t = force_gimple_operand_gsi (gsi, t, false,
2883 NULL_TREE, true,
2884 GSI_SAME_STMT);
2885 stmt = gimple_build_assign (fd->loops[j].v, t);
2886 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2888 if (gsi_end_p (*gsi))
2889 *gsi = gsi_last_bb (gsi_bb (*gsi));
2890 else
2891 gsi_prev (gsi);
2892 if (bb_triang)
2894 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2895 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2896 *gsi = gsi_after_labels (e->dest);
2897 if (!gsi_end_p (*gsi))
2898 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2899 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2902 else
2904 t = fold_convert (itype, t);
2905 t = fold_build2 (MULT_EXPR, itype, t,
2906 fold_convert (itype, fd->loops[i].step));
2907 if (POINTER_TYPE_P (vtype))
2908 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2909 else
2910 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2911 t = force_gimple_operand_gsi (gsi, t,
2912 DECL_P (fd->loops[i].v)
2913 && TREE_ADDRESSABLE (fd->loops[i].v),
2914 NULL_TREE, false,
2915 GSI_CONTINUE_LINKING);
2916 stmt = gimple_build_assign (fd->loops[i].v, t);
2917 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2919 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2921 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2922 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2923 false, GSI_CONTINUE_LINKING);
2924 stmt = gimple_build_assign (tem, t);
2925 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2927 if (i == fd->last_nonrect)
2928 i = fd->first_nonrect;
2930 if (fd->non_rect)
2931 for (i = 0; i <= fd->last_nonrect; i++)
2932 if (fd->loops[i].m2)
2934 tree itype = TREE_TYPE (fd->loops[i].v);
2936 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2937 t = fold_build2 (MULT_EXPR, itype,
2938 fd->loops[i - fd->loops[i].outer].v, t);
2939 t = fold_build2 (PLUS_EXPR, itype, t,
2940 fold_convert (itype,
2941 unshare_expr (fd->loops[i].n2)));
2942 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2943 t = force_gimple_operand_gsi (gsi, t, false,
2944 NULL_TREE, false,
2945 GSI_CONTINUE_LINKING);
2946 stmt = gimple_build_assign (nonrect_bounds[i], t);
2947 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2951 /* Helper function for expand_omp_for_*. Generate code like:
2952 L10:
2953 V3 += STEP3;
2954 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2955 L11:
2956 V3 = N31;
2957 V2 += STEP2;
2958 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2959 L12:
2960 V2 = N21;
2961 V1 += STEP1;
2962 goto BODY_BB;
2963 For non-rectangular loops, use temporaries stored in nonrect_bounds
2964 for the upper bounds if M?2 multiplier is present. Given e.g.
2965 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2966 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2967 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2968 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
2970 L10:
2971 V4 += STEP4;
2972 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
2973 L11:
2974 V4 = N41 + M41 * V2; // This can be left out if the loop
2975 // refers to the immediate parent loop
2976 V3 += STEP3;
2977 if (V3 cond3 N32) goto BODY_BB; else goto L12;
2978 L12:
2979 V3 = N31;
2980 V2 += STEP2;
2981 if (V2 cond2 N22) goto L120; else goto L13;
2982 L120:
2983 V4 = N41 + M41 * V2;
2984 NONRECT_BOUND4 = N42 + M42 * V2;
2985 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
2986 L13:
2987 V2 = N21;
2988 V1 += STEP1;
2989 goto L120; */
2991 static basic_block
2992 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
2993 basic_block cont_bb, basic_block body_bb)
2995 basic_block last_bb, bb, collapse_bb = NULL;
2996 int i;
2997 gimple_stmt_iterator gsi;
2998 edge e;
2999 tree t;
3000 gimple *stmt;
3002 last_bb = cont_bb;
3003 for (i = fd->collapse - 1; i >= 0; i--)
3005 tree vtype = TREE_TYPE (fd->loops[i].v);
3007 bb = create_empty_bb (last_bb);
3008 add_bb_to_loop (bb, last_bb->loop_father);
3009 gsi = gsi_start_bb (bb);
3011 if (i < fd->collapse - 1)
3013 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3014 e->probability
3015 = profile_probability::guessed_always ().apply_scale (1, 8);
3017 struct omp_for_data_loop *l = &fd->loops[i + 1];
3018 if (l->m1 == NULL_TREE || l->outer != 1)
3020 t = l->n1;
3021 if (l->m1)
3023 tree t2
3024 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3025 fd->loops[i + 1 - l->outer].v, l->m1);
3026 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3028 t = force_gimple_operand_gsi (&gsi, t,
3029 DECL_P (l->v)
3030 && TREE_ADDRESSABLE (l->v),
3031 NULL_TREE, false,
3032 GSI_CONTINUE_LINKING);
3033 stmt = gimple_build_assign (l->v, t);
3034 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3037 else
3038 collapse_bb = bb;
3040 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3042 if (POINTER_TYPE_P (vtype))
3043 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3044 else
3045 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3046 t = force_gimple_operand_gsi (&gsi, t,
3047 DECL_P (fd->loops[i].v)
3048 && TREE_ADDRESSABLE (fd->loops[i].v),
3049 NULL_TREE, false, GSI_CONTINUE_LINKING);
3050 stmt = gimple_build_assign (fd->loops[i].v, t);
3051 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3053 if (fd->loops[i].non_rect_referenced)
3055 basic_block update_bb = NULL, prev_bb = NULL;
3056 for (int j = i + 1; j <= fd->last_nonrect; j++)
3057 if (j - fd->loops[j].outer == i)
3059 tree n1, n2;
3060 struct omp_for_data_loop *l = &fd->loops[j];
3061 basic_block this_bb = create_empty_bb (last_bb);
3062 add_bb_to_loop (this_bb, last_bb->loop_father);
3063 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3064 if (prev_bb)
3066 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3067 e->probability
3068 = profile_probability::guessed_always ().apply_scale (7,
3070 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3072 if (l->m1)
3074 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3075 fd->loops[i].v);
3076 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3077 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3078 false,
3079 GSI_CONTINUE_LINKING);
3080 stmt = gimple_build_assign (l->v, n1);
3081 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3082 n1 = l->v;
3084 else
3085 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3086 NULL_TREE, false,
3087 GSI_CONTINUE_LINKING);
3088 if (l->m2)
3090 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3091 fd->loops[i].v);
3092 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3093 t, unshare_expr (l->n2));
3094 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3095 false,
3096 GSI_CONTINUE_LINKING);
3097 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3098 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3099 n2 = nonrect_bounds[j];
3101 else
3102 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3103 true, NULL_TREE, false,
3104 GSI_CONTINUE_LINKING);
3105 gcond *cond_stmt
3106 = gimple_build_cond (l->cond_code, n1, n2,
3107 NULL_TREE, NULL_TREE);
3108 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3109 if (update_bb == NULL)
3110 update_bb = this_bb;
3111 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3112 e->probability
3113 = profile_probability::guessed_always ().apply_scale (1, 8);
3114 if (prev_bb == NULL)
3115 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3116 prev_bb = this_bb;
3118 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3119 e->probability
3120 = profile_probability::guessed_always ().apply_scale (7, 8);
3121 body_bb = update_bb;
3124 if (i > 0)
3126 if (fd->loops[i].m2)
3127 t = nonrect_bounds[i];
3128 else
3129 t = unshare_expr (fd->loops[i].n2);
3130 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3131 false, GSI_CONTINUE_LINKING);
3132 tree v = fd->loops[i].v;
3133 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3134 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3135 false, GSI_CONTINUE_LINKING);
3136 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3137 stmt = gimple_build_cond_empty (t);
3138 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3139 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3140 expand_omp_regimplify_p, NULL, NULL)
3141 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3142 expand_omp_regimplify_p, NULL, NULL))
3143 gimple_regimplify_operands (stmt, &gsi);
3144 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3145 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3147 else
3148 make_edge (bb, body_bb, EDGE_FALLTHRU);
3149 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3150 last_bb = bb;
3153 return collapse_bb;
3156 /* Expand #pragma omp ordered depend(source). */
3158 static void
3159 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3160 tree *counts, location_t loc)
3162 enum built_in_function source_ix
3163 = fd->iter_type == long_integer_type_node
3164 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3165 gimple *g
3166 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3167 build_fold_addr_expr (counts[fd->ordered]));
3168 gimple_set_location (g, loc);
3169 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3172 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3174 static void
3175 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3176 tree *counts, tree c, location_t loc)
3178 auto_vec<tree, 10> args;
3179 enum built_in_function sink_ix
3180 = fd->iter_type == long_integer_type_node
3181 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3182 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3183 int i;
3184 gimple_stmt_iterator gsi2 = *gsi;
3185 bool warned_step = false;
3187 for (i = 0; i < fd->ordered; i++)
3189 tree step = NULL_TREE;
3190 off = TREE_PURPOSE (deps);
3191 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3193 step = TREE_OPERAND (off, 1);
3194 off = TREE_OPERAND (off, 0);
3196 if (!integer_zerop (off))
3198 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3199 || fd->loops[i].cond_code == GT_EXPR);
3200 bool forward = fd->loops[i].cond_code == LT_EXPR;
3201 if (step)
3203 /* Non-simple Fortran DO loops. If step is variable,
3204 we don't know at compile even the direction, so can't
3205 warn. */
3206 if (TREE_CODE (step) != INTEGER_CST)
3207 break;
3208 forward = tree_int_cst_sgn (step) != -1;
3210 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3211 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3212 "waiting for lexically later iteration");
3213 break;
3215 deps = TREE_CHAIN (deps);
3217 /* If all offsets corresponding to the collapsed loops are zero,
3218 this depend clause can be ignored. FIXME: but there is still a
3219 flush needed. We need to emit one __sync_synchronize () for it
3220 though (perhaps conditionally)? Solve this together with the
3221 conservative dependence folding optimization.
3222 if (i >= fd->collapse)
3223 return; */
3225 deps = OMP_CLAUSE_DECL (c);
3226 gsi_prev (&gsi2);
3227 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3228 edge e2 = split_block_after_labels (e1->dest);
3230 gsi2 = gsi_after_labels (e1->dest);
3231 *gsi = gsi_last_bb (e1->src);
3232 for (i = 0; i < fd->ordered; i++)
3234 tree itype = TREE_TYPE (fd->loops[i].v);
3235 tree step = NULL_TREE;
3236 tree orig_off = NULL_TREE;
3237 if (POINTER_TYPE_P (itype))
3238 itype = sizetype;
3239 if (i)
3240 deps = TREE_CHAIN (deps);
3241 off = TREE_PURPOSE (deps);
3242 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3244 step = TREE_OPERAND (off, 1);
3245 off = TREE_OPERAND (off, 0);
3246 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3247 && integer_onep (fd->loops[i].step)
3248 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3250 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3251 if (step)
3253 off = fold_convert_loc (loc, itype, off);
3254 orig_off = off;
3255 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3258 if (integer_zerop (off))
3259 t = boolean_true_node;
3260 else
3262 tree a;
3263 tree co = fold_convert_loc (loc, itype, off);
3264 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3266 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3267 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3268 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3269 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3270 co);
3272 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3273 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3274 fd->loops[i].v, co);
3275 else
3276 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3277 fd->loops[i].v, co);
3278 if (step)
3280 tree t1, t2;
3281 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3282 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3283 fd->loops[i].n1);
3284 else
3285 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3286 fd->loops[i].n2);
3287 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3288 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3289 fd->loops[i].n2);
3290 else
3291 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3292 fd->loops[i].n1);
3293 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3294 step, build_int_cst (TREE_TYPE (step), 0));
3295 if (TREE_CODE (step) != INTEGER_CST)
3297 t1 = unshare_expr (t1);
3298 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3299 false, GSI_CONTINUE_LINKING);
3300 t2 = unshare_expr (t2);
3301 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3302 false, GSI_CONTINUE_LINKING);
3304 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3305 t, t2, t1);
3307 else if (fd->loops[i].cond_code == LT_EXPR)
3309 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3310 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3311 fd->loops[i].n1);
3312 else
3313 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3314 fd->loops[i].n2);
3316 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3317 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3318 fd->loops[i].n2);
3319 else
3320 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3321 fd->loops[i].n1);
3323 if (cond)
3324 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3325 else
3326 cond = t;
3328 off = fold_convert_loc (loc, itype, off);
3330 if (step
3331 || (fd->loops[i].cond_code == LT_EXPR
3332 ? !integer_onep (fd->loops[i].step)
3333 : !integer_minus_onep (fd->loops[i].step)))
3335 if (step == NULL_TREE
3336 && TYPE_UNSIGNED (itype)
3337 && fd->loops[i].cond_code == GT_EXPR)
3338 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3339 fold_build1_loc (loc, NEGATE_EXPR, itype,
3340 s));
3341 else
3342 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3343 orig_off ? orig_off : off, s);
3344 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3345 build_int_cst (itype, 0));
3346 if (integer_zerop (t) && !warned_step)
3348 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3349 "refers to iteration never in the iteration "
3350 "space");
3351 warned_step = true;
3353 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3354 cond, t);
3357 if (i <= fd->collapse - 1 && fd->collapse > 1)
3358 t = fd->loop.v;
3359 else if (counts[i])
3360 t = counts[i];
3361 else
3363 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3364 fd->loops[i].v, fd->loops[i].n1);
3365 t = fold_convert_loc (loc, fd->iter_type, t);
3367 if (step)
3368 /* We have divided off by step already earlier. */;
3369 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3370 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3371 fold_build1_loc (loc, NEGATE_EXPR, itype,
3372 s));
3373 else
3374 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3375 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3376 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3377 off = fold_convert_loc (loc, fd->iter_type, off);
3378 if (i <= fd->collapse - 1 && fd->collapse > 1)
3380 if (i)
3381 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3382 off);
3383 if (i < fd->collapse - 1)
3385 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3386 counts[i]);
3387 continue;
3390 off = unshare_expr (off);
3391 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3392 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3393 true, GSI_SAME_STMT);
3394 args.safe_push (t);
3396 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3397 gimple_set_location (g, loc);
3398 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3400 cond = unshare_expr (cond);
3401 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3402 GSI_CONTINUE_LINKING);
3403 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3404 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3405 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3406 e1->probability = e3->probability.invert ();
3407 e1->flags = EDGE_TRUE_VALUE;
3408 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3410 *gsi = gsi_after_labels (e2->dest);
3413 /* Expand all #pragma omp ordered depend(source) and
3414 #pragma omp ordered depend(sink:...) constructs in the current
3415 #pragma omp for ordered(n) region. */
3417 static void
3418 expand_omp_ordered_source_sink (struct omp_region *region,
3419 struct omp_for_data *fd, tree *counts,
3420 basic_block cont_bb)
3422 struct omp_region *inner;
3423 int i;
3424 for (i = fd->collapse - 1; i < fd->ordered; i++)
3425 if (i == fd->collapse - 1 && fd->collapse > 1)
3426 counts[i] = NULL_TREE;
3427 else if (i >= fd->collapse && !cont_bb)
3428 counts[i] = build_zero_cst (fd->iter_type);
3429 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3430 && integer_onep (fd->loops[i].step))
3431 counts[i] = NULL_TREE;
3432 else
3433 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3434 tree atype
3435 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3436 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3437 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3439 for (inner = region->inner; inner; inner = inner->next)
3440 if (inner->type == GIMPLE_OMP_ORDERED)
3442 gomp_ordered *ord_stmt = inner->ord_stmt;
3443 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3444 location_t loc = gimple_location (ord_stmt);
3445 tree c;
3446 for (c = gimple_omp_ordered_clauses (ord_stmt);
3447 c; c = OMP_CLAUSE_CHAIN (c))
3448 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3449 break;
3450 if (c)
3451 expand_omp_ordered_source (&gsi, fd, counts, loc);
3452 for (c = gimple_omp_ordered_clauses (ord_stmt);
3453 c; c = OMP_CLAUSE_CHAIN (c))
3454 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3455 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3456 gsi_remove (&gsi, true);
3460 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3461 collapsed. */
3463 static basic_block
3464 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3465 basic_block cont_bb, basic_block body_bb,
3466 bool ordered_lastprivate)
3468 if (fd->ordered == fd->collapse)
3469 return cont_bb;
3471 if (!cont_bb)
3473 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3474 for (int i = fd->collapse; i < fd->ordered; i++)
3476 tree type = TREE_TYPE (fd->loops[i].v);
3477 tree n1 = fold_convert (type, fd->loops[i].n1);
3478 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3479 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3480 size_int (i - fd->collapse + 1),
3481 NULL_TREE, NULL_TREE);
3482 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3484 return NULL;
3487 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3489 tree t, type = TREE_TYPE (fd->loops[i].v);
3490 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3491 expand_omp_build_assign (&gsi, fd->loops[i].v,
3492 fold_convert (type, fd->loops[i].n1));
3493 if (counts[i])
3494 expand_omp_build_assign (&gsi, counts[i],
3495 build_zero_cst (fd->iter_type));
3496 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3497 size_int (i - fd->collapse + 1),
3498 NULL_TREE, NULL_TREE);
3499 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3500 if (!gsi_end_p (gsi))
3501 gsi_prev (&gsi);
3502 else
3503 gsi = gsi_last_bb (body_bb);
3504 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3505 basic_block new_body = e1->dest;
3506 if (body_bb == cont_bb)
3507 cont_bb = new_body;
3508 edge e2 = NULL;
3509 basic_block new_header;
3510 if (EDGE_COUNT (cont_bb->preds) > 0)
3512 gsi = gsi_last_bb (cont_bb);
3513 if (POINTER_TYPE_P (type))
3514 t = fold_build_pointer_plus (fd->loops[i].v,
3515 fold_convert (sizetype,
3516 fd->loops[i].step));
3517 else
3518 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3519 fold_convert (type, fd->loops[i].step));
3520 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3521 if (counts[i])
3523 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3524 build_int_cst (fd->iter_type, 1));
3525 expand_omp_build_assign (&gsi, counts[i], t);
3526 t = counts[i];
3528 else
3530 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3531 fd->loops[i].v, fd->loops[i].n1);
3532 t = fold_convert (fd->iter_type, t);
3533 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3534 true, GSI_SAME_STMT);
3536 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3537 size_int (i - fd->collapse + 1),
3538 NULL_TREE, NULL_TREE);
3539 expand_omp_build_assign (&gsi, aref, t);
3540 gsi_prev (&gsi);
3541 e2 = split_block (cont_bb, gsi_stmt (gsi));
3542 new_header = e2->dest;
3544 else
3545 new_header = cont_bb;
3546 gsi = gsi_after_labels (new_header);
3547 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3548 true, GSI_SAME_STMT);
3549 tree n2
3550 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3551 true, NULL_TREE, true, GSI_SAME_STMT);
3552 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3553 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3554 edge e3 = split_block (new_header, gsi_stmt (gsi));
3555 cont_bb = e3->dest;
3556 remove_edge (e1);
3557 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3558 e3->flags = EDGE_FALSE_VALUE;
3559 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3560 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3561 e1->probability = e3->probability.invert ();
3563 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3564 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3566 if (e2)
3568 class loop *loop = alloc_loop ();
3569 loop->header = new_header;
3570 loop->latch = e2->src;
3571 add_loop (loop, body_bb->loop_father);
3575 /* If there are any lastprivate clauses and it is possible some loops
3576 might have zero iterations, ensure all the decls are initialized,
3577 otherwise we could crash evaluating C++ class iterators with lastprivate
3578 clauses. */
3579 bool need_inits = false;
3580 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3581 if (need_inits)
3583 tree type = TREE_TYPE (fd->loops[i].v);
3584 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3585 expand_omp_build_assign (&gsi, fd->loops[i].v,
3586 fold_convert (type, fd->loops[i].n1));
3588 else
3590 tree type = TREE_TYPE (fd->loops[i].v);
3591 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3592 boolean_type_node,
3593 fold_convert (type, fd->loops[i].n1),
3594 fold_convert (type, fd->loops[i].n2));
3595 if (!integer_onep (this_cond))
3596 need_inits = true;
3599 return cont_bb;
3602 /* A subroutine of expand_omp_for. Generate code for a parallel
3603 loop with any schedule. Given parameters:
3605 for (V = N1; V cond N2; V += STEP) BODY;
3607 where COND is "<" or ">", we generate pseudocode
3609 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3610 if (more) goto L0; else goto L3;
3612 V = istart0;
3613 iend = iend0;
3615 BODY;
3616 V += STEP;
3617 if (V cond iend) goto L1; else goto L2;
3619 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3622 If this is a combined omp parallel loop, instead of the call to
3623 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3624 If this is gimple_omp_for_combined_p loop, then instead of assigning
3625 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3626 inner GIMPLE_OMP_FOR and V += STEP; and
3627 if (V cond iend) goto L1; else goto L2; are removed.
3629 For collapsed loops, given parameters:
3630 collapse(3)
3631 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3632 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3633 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3634 BODY;
3636 we generate pseudocode
3638 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3639 if (cond3 is <)
3640 adj = STEP3 - 1;
3641 else
3642 adj = STEP3 + 1;
3643 count3 = (adj + N32 - N31) / STEP3;
3644 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3645 if (cond2 is <)
3646 adj = STEP2 - 1;
3647 else
3648 adj = STEP2 + 1;
3649 count2 = (adj + N22 - N21) / STEP2;
3650 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3651 if (cond1 is <)
3652 adj = STEP1 - 1;
3653 else
3654 adj = STEP1 + 1;
3655 count1 = (adj + N12 - N11) / STEP1;
3656 count = count1 * count2 * count3;
3657 goto Z1;
3659 count = 0;
3661 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3662 if (more) goto L0; else goto L3;
3664 V = istart0;
3665 T = V;
3666 V3 = N31 + (T % count3) * STEP3;
3667 T = T / count3;
3668 V2 = N21 + (T % count2) * STEP2;
3669 T = T / count2;
3670 V1 = N11 + T * STEP1;
3671 iend = iend0;
3673 BODY;
3674 V += 1;
3675 if (V < iend) goto L10; else goto L2;
3676 L10:
3677 V3 += STEP3;
3678 if (V3 cond3 N32) goto L1; else goto L11;
3679 L11:
3680 V3 = N31;
3681 V2 += STEP2;
3682 if (V2 cond2 N22) goto L1; else goto L12;
3683 L12:
3684 V2 = N21;
3685 V1 += STEP1;
3686 goto L1;
3688 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3693 static void
3694 expand_omp_for_generic (struct omp_region *region,
3695 struct omp_for_data *fd,
3696 enum built_in_function start_fn,
3697 enum built_in_function next_fn,
3698 tree sched_arg,
3699 gimple *inner_stmt)
3701 tree type, istart0, iend0, iend;
3702 tree t, vmain, vback, bias = NULL_TREE;
3703 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3704 basic_block l2_bb = NULL, l3_bb = NULL;
3705 gimple_stmt_iterator gsi;
3706 gassign *assign_stmt;
3707 bool in_combined_parallel = is_combined_parallel (region);
3708 bool broken_loop = region->cont == NULL;
3709 edge e, ne;
3710 tree *counts = NULL;
3711 int i;
3712 bool ordered_lastprivate = false;
3714 gcc_assert (!broken_loop || !in_combined_parallel);
3715 gcc_assert (fd->iter_type == long_integer_type_node
3716 || !in_combined_parallel);
3718 entry_bb = region->entry;
3719 cont_bb = region->cont;
3720 collapse_bb = NULL;
3721 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3722 gcc_assert (broken_loop
3723 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3724 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3725 l1_bb = single_succ (l0_bb);
3726 if (!broken_loop)
3728 l2_bb = create_empty_bb (cont_bb);
3729 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3730 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3731 == l1_bb));
3732 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3734 else
3735 l2_bb = NULL;
3736 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3737 exit_bb = region->exit;
3739 gsi = gsi_last_nondebug_bb (entry_bb);
3741 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3742 if (fd->ordered
3743 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3744 OMP_CLAUSE_LASTPRIVATE))
3745 ordered_lastprivate = false;
3746 tree reductions = NULL_TREE;
3747 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3748 tree memv = NULL_TREE;
3749 if (fd->lastprivate_conditional)
3751 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3752 OMP_CLAUSE__CONDTEMP_);
3753 if (fd->have_pointer_condtemp)
3754 condtemp = OMP_CLAUSE_DECL (c);
3755 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3756 cond_var = OMP_CLAUSE_DECL (c);
3758 if (sched_arg)
3760 if (fd->have_reductemp)
3762 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3763 OMP_CLAUSE__REDUCTEMP_);
3764 reductions = OMP_CLAUSE_DECL (c);
3765 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3766 gimple *g = SSA_NAME_DEF_STMT (reductions);
3767 reductions = gimple_assign_rhs1 (g);
3768 OMP_CLAUSE_DECL (c) = reductions;
3769 entry_bb = gimple_bb (g);
3770 edge e = split_block (entry_bb, g);
3771 if (region->entry == entry_bb)
3772 region->entry = e->dest;
3773 gsi = gsi_last_bb (entry_bb);
3775 else
3776 reductions = null_pointer_node;
3777 if (fd->have_pointer_condtemp)
3779 tree type = TREE_TYPE (condtemp);
3780 memv = create_tmp_var (type);
3781 TREE_ADDRESSABLE (memv) = 1;
3782 unsigned HOST_WIDE_INT sz
3783 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3784 sz *= fd->lastprivate_conditional;
3785 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3786 false);
3787 mem = build_fold_addr_expr (memv);
3789 else
3790 mem = null_pointer_node;
3792 if (fd->collapse > 1 || fd->ordered)
3794 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3795 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3797 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3798 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3799 zero_iter1_bb, first_zero_iter1,
3800 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3802 if (zero_iter1_bb)
3804 /* Some counts[i] vars might be uninitialized if
3805 some loop has zero iterations. But the body shouldn't
3806 be executed in that case, so just avoid uninit warnings. */
3807 for (i = first_zero_iter1;
3808 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3809 if (SSA_VAR_P (counts[i]))
3810 TREE_NO_WARNING (counts[i]) = 1;
3811 gsi_prev (&gsi);
3812 e = split_block (entry_bb, gsi_stmt (gsi));
3813 entry_bb = e->dest;
3814 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3815 gsi = gsi_last_nondebug_bb (entry_bb);
3816 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3817 get_immediate_dominator (CDI_DOMINATORS,
3818 zero_iter1_bb));
3820 if (zero_iter2_bb)
3822 /* Some counts[i] vars might be uninitialized if
3823 some loop has zero iterations. But the body shouldn't
3824 be executed in that case, so just avoid uninit warnings. */
3825 for (i = first_zero_iter2; i < fd->ordered; i++)
3826 if (SSA_VAR_P (counts[i]))
3827 TREE_NO_WARNING (counts[i]) = 1;
3828 if (zero_iter1_bb)
3829 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3830 else
3832 gsi_prev (&gsi);
3833 e = split_block (entry_bb, gsi_stmt (gsi));
3834 entry_bb = e->dest;
3835 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3836 gsi = gsi_last_nondebug_bb (entry_bb);
3837 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3838 get_immediate_dominator
3839 (CDI_DOMINATORS, zero_iter2_bb));
3842 if (fd->collapse == 1)
3844 counts[0] = fd->loop.n2;
3845 fd->loop = fd->loops[0];
3849 type = TREE_TYPE (fd->loop.v);
3850 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3851 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3852 TREE_ADDRESSABLE (istart0) = 1;
3853 TREE_ADDRESSABLE (iend0) = 1;
3855 /* See if we need to bias by LLONG_MIN. */
3856 if (fd->iter_type == long_long_unsigned_type_node
3857 && TREE_CODE (type) == INTEGER_TYPE
3858 && !TYPE_UNSIGNED (type)
3859 && fd->ordered == 0)
3861 tree n1, n2;
3863 if (fd->loop.cond_code == LT_EXPR)
3865 n1 = fd->loop.n1;
3866 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3868 else
3870 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3871 n2 = fd->loop.n1;
3873 if (TREE_CODE (n1) != INTEGER_CST
3874 || TREE_CODE (n2) != INTEGER_CST
3875 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3876 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3879 gimple_stmt_iterator gsif = gsi;
3880 gsi_prev (&gsif);
3882 tree arr = NULL_TREE;
3883 if (in_combined_parallel)
3885 gcc_assert (fd->ordered == 0);
3886 /* In a combined parallel loop, emit a call to
3887 GOMP_loop_foo_next. */
3888 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3889 build_fold_addr_expr (istart0),
3890 build_fold_addr_expr (iend0));
3892 else
3894 tree t0, t1, t2, t3, t4;
3895 /* If this is not a combined parallel loop, emit a call to
3896 GOMP_loop_foo_start in ENTRY_BB. */
3897 t4 = build_fold_addr_expr (iend0);
3898 t3 = build_fold_addr_expr (istart0);
3899 if (fd->ordered)
3901 t0 = build_int_cst (unsigned_type_node,
3902 fd->ordered - fd->collapse + 1);
3903 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3904 fd->ordered
3905 - fd->collapse + 1),
3906 ".omp_counts");
3907 DECL_NAMELESS (arr) = 1;
3908 TREE_ADDRESSABLE (arr) = 1;
3909 TREE_STATIC (arr) = 1;
3910 vec<constructor_elt, va_gc> *v;
3911 vec_alloc (v, fd->ordered - fd->collapse + 1);
3912 int idx;
3914 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3916 tree c;
3917 if (idx == 0 && fd->collapse > 1)
3918 c = fd->loop.n2;
3919 else
3920 c = counts[idx + fd->collapse - 1];
3921 tree purpose = size_int (idx);
3922 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3923 if (TREE_CODE (c) != INTEGER_CST)
3924 TREE_STATIC (arr) = 0;
3927 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3928 if (!TREE_STATIC (arr))
3929 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3930 void_type_node, arr),
3931 true, NULL_TREE, true, GSI_SAME_STMT);
3932 t1 = build_fold_addr_expr (arr);
3933 t2 = NULL_TREE;
3935 else
3937 t2 = fold_convert (fd->iter_type, fd->loop.step);
3938 t1 = fd->loop.n2;
3939 t0 = fd->loop.n1;
3940 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3942 tree innerc
3943 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3944 OMP_CLAUSE__LOOPTEMP_);
3945 gcc_assert (innerc);
3946 t0 = OMP_CLAUSE_DECL (innerc);
3947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3948 OMP_CLAUSE__LOOPTEMP_);
3949 gcc_assert (innerc);
3950 t1 = OMP_CLAUSE_DECL (innerc);
3952 if (POINTER_TYPE_P (TREE_TYPE (t0))
3953 && TYPE_PRECISION (TREE_TYPE (t0))
3954 != TYPE_PRECISION (fd->iter_type))
3956 /* Avoid casting pointers to integer of a different size. */
3957 tree itype = signed_type_for (type);
3958 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3959 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3961 else
3963 t1 = fold_convert (fd->iter_type, t1);
3964 t0 = fold_convert (fd->iter_type, t0);
3966 if (bias)
3968 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
3969 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
3972 if (fd->iter_type == long_integer_type_node || fd->ordered)
3974 if (fd->chunk_size)
3976 t = fold_convert (fd->iter_type, fd->chunk_size);
3977 t = omp_adjust_chunk_size (t, fd->simd_schedule);
3978 if (sched_arg)
3980 if (fd->ordered)
3981 t = build_call_expr (builtin_decl_explicit (start_fn),
3982 8, t0, t1, sched_arg, t, t3, t4,
3983 reductions, mem);
3984 else
3985 t = build_call_expr (builtin_decl_explicit (start_fn),
3986 9, t0, t1, t2, sched_arg, t, t3, t4,
3987 reductions, mem);
3989 else if (fd->ordered)
3990 t = build_call_expr (builtin_decl_explicit (start_fn),
3991 5, t0, t1, t, t3, t4);
3992 else
3993 t = build_call_expr (builtin_decl_explicit (start_fn),
3994 6, t0, t1, t2, t, t3, t4);
3996 else if (fd->ordered)
3997 t = build_call_expr (builtin_decl_explicit (start_fn),
3998 4, t0, t1, t3, t4);
3999 else
4000 t = build_call_expr (builtin_decl_explicit (start_fn),
4001 5, t0, t1, t2, t3, t4);
4003 else
4005 tree t5;
4006 tree c_bool_type;
4007 tree bfn_decl;
4009 /* The GOMP_loop_ull_*start functions have additional boolean
4010 argument, true for < loops and false for > loops.
4011 In Fortran, the C bool type can be different from
4012 boolean_type_node. */
4013 bfn_decl = builtin_decl_explicit (start_fn);
4014 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4015 t5 = build_int_cst (c_bool_type,
4016 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4017 if (fd->chunk_size)
4019 tree bfn_decl = builtin_decl_explicit (start_fn);
4020 t = fold_convert (fd->iter_type, fd->chunk_size);
4021 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4022 if (sched_arg)
4023 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4024 t, t3, t4, reductions, mem);
4025 else
4026 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4028 else
4029 t = build_call_expr (builtin_decl_explicit (start_fn),
4030 6, t5, t0, t1, t2, t3, t4);
4033 if (TREE_TYPE (t) != boolean_type_node)
4034 t = fold_build2 (NE_EXPR, boolean_type_node,
4035 t, build_int_cst (TREE_TYPE (t), 0));
4036 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4037 true, GSI_SAME_STMT);
4038 if (arr && !TREE_STATIC (arr))
4040 tree clobber = build_clobber (TREE_TYPE (arr));
4041 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4042 GSI_SAME_STMT);
4044 if (fd->have_pointer_condtemp)
4045 expand_omp_build_assign (&gsi, condtemp, memv, false);
4046 if (fd->have_reductemp)
4048 gimple *g = gsi_stmt (gsi);
4049 gsi_remove (&gsi, true);
4050 release_ssa_name (gimple_assign_lhs (g));
4052 entry_bb = region->entry;
4053 gsi = gsi_last_nondebug_bb (entry_bb);
4055 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4057 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4059 /* Remove the GIMPLE_OMP_FOR statement. */
4060 gsi_remove (&gsi, true);
4062 if (gsi_end_p (gsif))
4063 gsif = gsi_after_labels (gsi_bb (gsif));
4064 gsi_next (&gsif);
4066 /* Iteration setup for sequential loop goes in L0_BB. */
4067 tree startvar = fd->loop.v;
4068 tree endvar = NULL_TREE;
4070 if (gimple_omp_for_combined_p (fd->for_stmt))
4072 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4073 && gimple_omp_for_kind (inner_stmt)
4074 == GF_OMP_FOR_KIND_SIMD);
4075 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4076 OMP_CLAUSE__LOOPTEMP_);
4077 gcc_assert (innerc);
4078 startvar = OMP_CLAUSE_DECL (innerc);
4079 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4080 OMP_CLAUSE__LOOPTEMP_);
4081 gcc_assert (innerc);
4082 endvar = OMP_CLAUSE_DECL (innerc);
4085 gsi = gsi_start_bb (l0_bb);
4086 t = istart0;
4087 if (fd->ordered && fd->collapse == 1)
4088 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4089 fold_convert (fd->iter_type, fd->loop.step));
4090 else if (bias)
4091 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4092 if (fd->ordered && fd->collapse == 1)
4094 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4095 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4096 fd->loop.n1, fold_convert (sizetype, t));
4097 else
4099 t = fold_convert (TREE_TYPE (startvar), t);
4100 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4101 fd->loop.n1, t);
4104 else
4106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4107 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4108 t = fold_convert (TREE_TYPE (startvar), t);
4110 t = force_gimple_operand_gsi (&gsi, t,
4111 DECL_P (startvar)
4112 && TREE_ADDRESSABLE (startvar),
4113 NULL_TREE, false, GSI_CONTINUE_LINKING);
4114 assign_stmt = gimple_build_assign (startvar, t);
4115 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4116 if (cond_var)
4118 tree itype = TREE_TYPE (cond_var);
4119 /* For lastprivate(conditional:) itervar, we need some iteration
4120 counter that starts at unsigned non-zero and increases.
4121 Prefer as few IVs as possible, so if we can use startvar
4122 itself, use that, or startvar + constant (those would be
4123 incremented with step), and as last resort use the s0 + 1
4124 incremented by 1. */
4125 if ((fd->ordered && fd->collapse == 1)
4126 || bias
4127 || POINTER_TYPE_P (type)
4128 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4129 || fd->loop.cond_code != LT_EXPR)
4130 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4131 build_int_cst (itype, 1));
4132 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4133 t = fold_convert (itype, t);
4134 else
4136 tree c = fold_convert (itype, fd->loop.n1);
4137 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4138 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4140 t = force_gimple_operand_gsi (&gsi, t, false,
4141 NULL_TREE, false, GSI_CONTINUE_LINKING);
4142 assign_stmt = gimple_build_assign (cond_var, t);
4143 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4146 t = iend0;
4147 if (fd->ordered && fd->collapse == 1)
4148 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4149 fold_convert (fd->iter_type, fd->loop.step));
4150 else if (bias)
4151 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4152 if (fd->ordered && fd->collapse == 1)
4154 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4155 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4156 fd->loop.n1, fold_convert (sizetype, t));
4157 else
4159 t = fold_convert (TREE_TYPE (startvar), t);
4160 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4161 fd->loop.n1, t);
4164 else
4166 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4167 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4168 t = fold_convert (TREE_TYPE (startvar), t);
4170 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4171 false, GSI_CONTINUE_LINKING);
4172 if (endvar)
4174 assign_stmt = gimple_build_assign (endvar, iend);
4175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4176 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4177 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4178 else
4179 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4182 /* Handle linear clause adjustments. */
4183 tree itercnt = NULL_TREE;
4184 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4185 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4186 c; c = OMP_CLAUSE_CHAIN (c))
4187 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4188 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4190 tree d = OMP_CLAUSE_DECL (c);
4191 bool is_ref = omp_is_reference (d);
4192 tree t = d, a, dest;
4193 if (is_ref)
4194 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4195 tree type = TREE_TYPE (t);
4196 if (POINTER_TYPE_P (type))
4197 type = sizetype;
4198 dest = unshare_expr (t);
4199 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4200 expand_omp_build_assign (&gsif, v, t);
4201 if (itercnt == NULL_TREE)
4203 itercnt = startvar;
4204 tree n1 = fd->loop.n1;
4205 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4207 itercnt
4208 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4209 itercnt);
4210 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4212 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4213 itercnt, n1);
4214 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4215 itercnt, fd->loop.step);
4216 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4217 NULL_TREE, false,
4218 GSI_CONTINUE_LINKING);
4220 a = fold_build2 (MULT_EXPR, type,
4221 fold_convert (type, itercnt),
4222 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4223 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4224 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4225 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4226 false, GSI_CONTINUE_LINKING);
4227 assign_stmt = gimple_build_assign (dest, t);
4228 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4230 if (fd->collapse > 1)
4231 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4233 if (fd->ordered)
4235 /* Until now, counts array contained number of iterations or
4236 variable containing it for ith loop. From now on, we need
4237 those counts only for collapsed loops, and only for the 2nd
4238 till the last collapsed one. Move those one element earlier,
4239 we'll use counts[fd->collapse - 1] for the first source/sink
4240 iteration counter and so on and counts[fd->ordered]
4241 as the array holding the current counter values for
4242 depend(source). */
4243 if (fd->collapse > 1)
4244 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4245 if (broken_loop)
4247 int i;
4248 for (i = fd->collapse; i < fd->ordered; i++)
4250 tree type = TREE_TYPE (fd->loops[i].v);
4251 tree this_cond
4252 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4253 fold_convert (type, fd->loops[i].n1),
4254 fold_convert (type, fd->loops[i].n2));
4255 if (!integer_onep (this_cond))
4256 break;
4258 if (i < fd->ordered)
4260 cont_bb
4261 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4262 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4263 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4264 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4265 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4266 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4267 make_edge (cont_bb, l1_bb, 0);
4268 l2_bb = create_empty_bb (cont_bb);
4269 broken_loop = false;
4272 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4273 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4274 ordered_lastprivate);
4275 if (counts[fd->collapse - 1])
4277 gcc_assert (fd->collapse == 1);
4278 gsi = gsi_last_bb (l0_bb);
4279 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4280 istart0, true);
4281 gsi = gsi_last_bb (cont_bb);
4282 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4283 build_int_cst (fd->iter_type, 1));
4284 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4285 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4286 size_zero_node, NULL_TREE, NULL_TREE);
4287 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4288 t = counts[fd->collapse - 1];
4290 else if (fd->collapse > 1)
4291 t = fd->loop.v;
4292 else
4294 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4295 fd->loops[0].v, fd->loops[0].n1);
4296 t = fold_convert (fd->iter_type, t);
4298 gsi = gsi_last_bb (l0_bb);
4299 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4300 size_zero_node, NULL_TREE, NULL_TREE);
4301 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4302 false, GSI_CONTINUE_LINKING);
4303 expand_omp_build_assign (&gsi, aref, t, true);
4306 if (!broken_loop)
4308 /* Code to control the increment and predicate for the sequential
4309 loop goes in the CONT_BB. */
4310 gsi = gsi_last_nondebug_bb (cont_bb);
4311 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4312 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4313 vmain = gimple_omp_continue_control_use (cont_stmt);
4314 vback = gimple_omp_continue_control_def (cont_stmt);
4316 if (cond_var)
4318 tree itype = TREE_TYPE (cond_var);
4319 tree t2;
4320 if ((fd->ordered && fd->collapse == 1)
4321 || bias
4322 || POINTER_TYPE_P (type)
4323 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4324 || fd->loop.cond_code != LT_EXPR)
4325 t2 = build_int_cst (itype, 1);
4326 else
4327 t2 = fold_convert (itype, fd->loop.step);
4328 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4329 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4330 NULL_TREE, true, GSI_SAME_STMT);
4331 assign_stmt = gimple_build_assign (cond_var, t2);
4332 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4335 if (!gimple_omp_for_combined_p (fd->for_stmt))
4337 if (POINTER_TYPE_P (type))
4338 t = fold_build_pointer_plus (vmain, fd->loop.step);
4339 else
4340 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4341 t = force_gimple_operand_gsi (&gsi, t,
4342 DECL_P (vback)
4343 && TREE_ADDRESSABLE (vback),
4344 NULL_TREE, true, GSI_SAME_STMT);
4345 assign_stmt = gimple_build_assign (vback, t);
4346 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4348 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4350 tree tem;
4351 if (fd->collapse > 1)
4352 tem = fd->loop.v;
4353 else
4355 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4356 fd->loops[0].v, fd->loops[0].n1);
4357 tem = fold_convert (fd->iter_type, tem);
4359 tree aref = build4 (ARRAY_REF, fd->iter_type,
4360 counts[fd->ordered], size_zero_node,
4361 NULL_TREE, NULL_TREE);
4362 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4363 true, GSI_SAME_STMT);
4364 expand_omp_build_assign (&gsi, aref, tem);
4367 t = build2 (fd->loop.cond_code, boolean_type_node,
4368 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4369 iend);
4370 gcond *cond_stmt = gimple_build_cond_empty (t);
4371 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4374 /* Remove GIMPLE_OMP_CONTINUE. */
4375 gsi_remove (&gsi, true);
4377 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4378 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4380 /* Emit code to get the next parallel iteration in L2_BB. */
4381 gsi = gsi_start_bb (l2_bb);
4383 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4384 build_fold_addr_expr (istart0),
4385 build_fold_addr_expr (iend0));
4386 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4387 false, GSI_CONTINUE_LINKING);
4388 if (TREE_TYPE (t) != boolean_type_node)
4389 t = fold_build2 (NE_EXPR, boolean_type_node,
4390 t, build_int_cst (TREE_TYPE (t), 0));
4391 gcond *cond_stmt = gimple_build_cond_empty (t);
4392 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4395 /* Add the loop cleanup function. */
4396 gsi = gsi_last_nondebug_bb (exit_bb);
4397 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4398 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4399 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4400 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4401 else
4402 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4403 gcall *call_stmt = gimple_build_call (t, 0);
4404 if (fd->ordered)
4406 tree arr = counts[fd->ordered];
4407 tree clobber = build_clobber (TREE_TYPE (arr));
4408 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4409 GSI_SAME_STMT);
4411 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4413 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4414 if (fd->have_reductemp)
4416 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4417 gimple_call_lhs (call_stmt));
4418 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4421 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4422 gsi_remove (&gsi, true);
4424 /* Connect the new blocks. */
4425 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4426 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4428 if (!broken_loop)
4430 gimple_seq phis;
4432 e = find_edge (cont_bb, l3_bb);
4433 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4435 phis = phi_nodes (l3_bb);
4436 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4438 gimple *phi = gsi_stmt (gsi);
4439 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4440 PHI_ARG_DEF_FROM_EDGE (phi, e));
4442 remove_edge (e);
4444 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4445 e = find_edge (cont_bb, l1_bb);
4446 if (e == NULL)
4448 e = BRANCH_EDGE (cont_bb);
4449 gcc_assert (single_succ (e->dest) == l1_bb);
4451 if (gimple_omp_for_combined_p (fd->for_stmt))
4453 remove_edge (e);
4454 e = NULL;
4456 else if (fd->collapse > 1)
4458 remove_edge (e);
4459 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4461 else
4462 e->flags = EDGE_TRUE_VALUE;
4463 if (e)
4465 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4466 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4468 else
4470 e = find_edge (cont_bb, l2_bb);
4471 e->flags = EDGE_FALLTHRU;
4473 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4475 if (gimple_in_ssa_p (cfun))
4477 /* Add phis to the outer loop that connect to the phis in the inner,
4478 original loop, and move the loop entry value of the inner phi to
4479 the loop entry value of the outer phi. */
4480 gphi_iterator psi;
4481 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4483 location_t locus;
4484 gphi *nphi;
4485 gphi *exit_phi = psi.phi ();
4487 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4488 continue;
4490 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4491 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4493 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4494 edge latch_to_l1 = find_edge (latch, l1_bb);
4495 gphi *inner_phi
4496 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4498 tree t = gimple_phi_result (exit_phi);
4499 tree new_res = copy_ssa_name (t, NULL);
4500 nphi = create_phi_node (new_res, l0_bb);
4502 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4503 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4504 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4505 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4506 add_phi_arg (nphi, t, entry_to_l0, locus);
4508 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4509 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4511 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4515 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4516 recompute_dominator (CDI_DOMINATORS, l2_bb));
4517 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4518 recompute_dominator (CDI_DOMINATORS, l3_bb));
4519 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4520 recompute_dominator (CDI_DOMINATORS, l0_bb));
4521 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4522 recompute_dominator (CDI_DOMINATORS, l1_bb));
4524 /* We enter expand_omp_for_generic with a loop. This original loop may
4525 have its own loop struct, or it may be part of an outer loop struct
4526 (which may be the fake loop). */
4527 class loop *outer_loop = entry_bb->loop_father;
4528 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4530 add_bb_to_loop (l2_bb, outer_loop);
4532 /* We've added a new loop around the original loop. Allocate the
4533 corresponding loop struct. */
4534 class loop *new_loop = alloc_loop ();
4535 new_loop->header = l0_bb;
4536 new_loop->latch = l2_bb;
4537 add_loop (new_loop, outer_loop);
4539 /* Allocate a loop structure for the original loop unless we already
4540 had one. */
4541 if (!orig_loop_has_loop_struct
4542 && !gimple_omp_for_combined_p (fd->for_stmt))
4544 class loop *orig_loop = alloc_loop ();
4545 orig_loop->header = l1_bb;
4546 /* The loop may have multiple latches. */
4547 add_loop (orig_loop, new_loop);
4552 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4553 compute needed allocation size. If !ALLOC of team allocations,
4554 if ALLOC of thread allocation. SZ is the initial needed size for
4555 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4556 CNT number of elements of each array, for !ALLOC this is
4557 omp_get_num_threads (), for ALLOC number of iterations handled by the
4558 current thread. If PTR is non-NULL, it is the start of the allocation
4559 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4560 clauses pointers to the corresponding arrays. */
4562 static tree
4563 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4564 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4565 gimple_stmt_iterator *gsi, bool alloc)
4567 tree eltsz = NULL_TREE;
4568 unsigned HOST_WIDE_INT preval = 0;
4569 if (ptr && sz)
4570 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4571 ptr, size_int (sz));
4572 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4573 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4574 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4575 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4577 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4578 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4579 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4581 unsigned HOST_WIDE_INT szl
4582 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4583 szl = least_bit_hwi (szl);
4584 if (szl)
4585 al = MIN (al, szl);
4587 if (ptr == NULL_TREE)
4589 if (eltsz == NULL_TREE)
4590 eltsz = TYPE_SIZE_UNIT (pointee_type);
4591 else
4592 eltsz = size_binop (PLUS_EXPR, eltsz,
4593 TYPE_SIZE_UNIT (pointee_type));
4595 if (preval == 0 && al <= alloc_align)
4597 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4598 sz += diff;
4599 if (diff && ptr)
4600 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4601 ptr, size_int (diff));
4603 else if (al > preval)
4605 if (ptr)
4607 ptr = fold_convert (pointer_sized_int_node, ptr);
4608 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4609 build_int_cst (pointer_sized_int_node,
4610 al - 1));
4611 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4612 build_int_cst (pointer_sized_int_node,
4613 -(HOST_WIDE_INT) al));
4614 ptr = fold_convert (ptr_type_node, ptr);
4616 else
4617 sz += al - 1;
4619 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4620 preval = al;
4621 else
4622 preval = 1;
4623 if (ptr)
4625 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4626 ptr = OMP_CLAUSE_DECL (c);
4627 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4628 size_binop (MULT_EXPR, cnt,
4629 TYPE_SIZE_UNIT (pointee_type)));
4633 if (ptr == NULL_TREE)
4635 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4636 if (sz)
4637 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4638 return eltsz;
4640 else
4641 return ptr;
4644 /* A subroutine of expand_omp_for. Generate code for a parallel
4645 loop with static schedule and no specified chunk size. Given
4646 parameters:
4648 for (V = N1; V cond N2; V += STEP) BODY;
4650 where COND is "<" or ">", we generate pseudocode
4652 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4653 if (cond is <)
4654 adj = STEP - 1;
4655 else
4656 adj = STEP + 1;
4657 if ((__typeof (V)) -1 > 0 && cond is >)
4658 n = -(adj + N2 - N1) / -STEP;
4659 else
4660 n = (adj + N2 - N1) / STEP;
4661 q = n / nthreads;
4662 tt = n % nthreads;
4663 if (threadid < tt) goto L3; else goto L4;
4665 tt = 0;
4666 q = q + 1;
4668 s0 = q * threadid + tt;
4669 e0 = s0 + q;
4670 V = s0 * STEP + N1;
4671 if (s0 >= e0) goto L2; else goto L0;
4673 e = e0 * STEP + N1;
4675 BODY;
4676 V += STEP;
4677 if (V cond e) goto L1;
4681 static void
4682 expand_omp_for_static_nochunk (struct omp_region *region,
4683 struct omp_for_data *fd,
4684 gimple *inner_stmt)
4686 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4687 tree type, itype, vmain, vback;
4688 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4689 basic_block body_bb, cont_bb, collapse_bb = NULL;
4690 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4691 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4692 gimple_stmt_iterator gsi, gsip;
4693 edge ep;
4694 bool broken_loop = region->cont == NULL;
4695 tree *counts = NULL;
4696 tree n1, n2, step;
4697 tree reductions = NULL_TREE;
4698 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4700 itype = type = TREE_TYPE (fd->loop.v);
4701 if (POINTER_TYPE_P (type))
4702 itype = signed_type_for (type);
4704 entry_bb = region->entry;
4705 cont_bb = region->cont;
4706 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4707 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4708 gcc_assert (broken_loop
4709 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4710 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4711 body_bb = single_succ (seq_start_bb);
4712 if (!broken_loop)
4714 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4715 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4716 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4718 exit_bb = region->exit;
4720 /* Iteration space partitioning goes in ENTRY_BB. */
4721 gsi = gsi_last_nondebug_bb (entry_bb);
4722 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4723 gsip = gsi;
4724 gsi_prev (&gsip);
4726 if (fd->collapse > 1)
4728 int first_zero_iter = -1, dummy = -1;
4729 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4731 counts = XALLOCAVEC (tree, fd->collapse);
4732 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4733 fin_bb, first_zero_iter,
4734 dummy_bb, dummy, l2_dom_bb);
4735 t = NULL_TREE;
4737 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4738 t = integer_one_node;
4739 else
4740 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4741 fold_convert (type, fd->loop.n1),
4742 fold_convert (type, fd->loop.n2));
4743 if (fd->collapse == 1
4744 && TYPE_UNSIGNED (type)
4745 && (t == NULL_TREE || !integer_onep (t)))
4747 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4748 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4749 true, GSI_SAME_STMT);
4750 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4751 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4752 true, GSI_SAME_STMT);
4753 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4754 NULL_TREE, NULL_TREE);
4755 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4756 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4757 expand_omp_regimplify_p, NULL, NULL)
4758 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4759 expand_omp_regimplify_p, NULL, NULL))
4761 gsi = gsi_for_stmt (cond_stmt);
4762 gimple_regimplify_operands (cond_stmt, &gsi);
4764 ep = split_block (entry_bb, cond_stmt);
4765 ep->flags = EDGE_TRUE_VALUE;
4766 entry_bb = ep->dest;
4767 ep->probability = profile_probability::very_likely ();
4768 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4769 ep->probability = profile_probability::very_unlikely ();
4770 if (gimple_in_ssa_p (cfun))
4772 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4773 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4774 !gsi_end_p (gpi); gsi_next (&gpi))
4776 gphi *phi = gpi.phi ();
4777 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4778 ep, UNKNOWN_LOCATION);
4781 gsi = gsi_last_bb (entry_bb);
4784 if (fd->lastprivate_conditional)
4786 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4787 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4788 if (fd->have_pointer_condtemp)
4789 condtemp = OMP_CLAUSE_DECL (c);
4790 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4791 cond_var = OMP_CLAUSE_DECL (c);
4793 if (fd->have_reductemp
4794 /* For scan, we don't want to reinitialize condtemp before the
4795 second loop. */
4796 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4797 || fd->have_nonctrl_scantemp)
4799 tree t1 = build_int_cst (long_integer_type_node, 0);
4800 tree t2 = build_int_cst (long_integer_type_node, 1);
4801 tree t3 = build_int_cstu (long_integer_type_node,
4802 (HOST_WIDE_INT_1U << 31) + 1);
4803 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4804 gimple_stmt_iterator gsi2 = gsi_none ();
4805 gimple *g = NULL;
4806 tree mem = null_pointer_node, memv = NULL_TREE;
4807 unsigned HOST_WIDE_INT condtemp_sz = 0;
4808 unsigned HOST_WIDE_INT alloc_align = 0;
4809 if (fd->have_reductemp)
4811 gcc_assert (!fd->have_nonctrl_scantemp);
4812 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4813 reductions = OMP_CLAUSE_DECL (c);
4814 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4815 g = SSA_NAME_DEF_STMT (reductions);
4816 reductions = gimple_assign_rhs1 (g);
4817 OMP_CLAUSE_DECL (c) = reductions;
4818 gsi2 = gsi_for_stmt (g);
4820 else
4822 if (gsi_end_p (gsip))
4823 gsi2 = gsi_after_labels (region->entry);
4824 else
4825 gsi2 = gsip;
4826 reductions = null_pointer_node;
4828 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4830 tree type;
4831 if (fd->have_pointer_condtemp)
4832 type = TREE_TYPE (condtemp);
4833 else
4834 type = ptr_type_node;
4835 memv = create_tmp_var (type);
4836 TREE_ADDRESSABLE (memv) = 1;
4837 unsigned HOST_WIDE_INT sz = 0;
4838 tree size = NULL_TREE;
4839 if (fd->have_pointer_condtemp)
4841 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4842 sz *= fd->lastprivate_conditional;
4843 condtemp_sz = sz;
4845 if (fd->have_nonctrl_scantemp)
4847 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4848 gimple *g = gimple_build_call (nthreads, 0);
4849 nthreads = create_tmp_var (integer_type_node);
4850 gimple_call_set_lhs (g, nthreads);
4851 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4852 nthreads = fold_convert (sizetype, nthreads);
4853 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4854 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4855 alloc_align, nthreads, NULL,
4856 false);
4857 size = fold_convert (type, size);
4859 else
4860 size = build_int_cst (type, sz);
4861 expand_omp_build_assign (&gsi2, memv, size, false);
4862 mem = build_fold_addr_expr (memv);
4864 tree t
4865 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4866 9, t1, t2, t2, t3, t1, null_pointer_node,
4867 null_pointer_node, reductions, mem);
4868 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4869 true, GSI_SAME_STMT);
4870 if (fd->have_pointer_condtemp)
4871 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4872 if (fd->have_nonctrl_scantemp)
4874 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4875 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4876 alloc_align, nthreads, &gsi2, false);
4878 if (fd->have_reductemp)
4880 gsi_remove (&gsi2, true);
4881 release_ssa_name (gimple_assign_lhs (g));
4884 switch (gimple_omp_for_kind (fd->for_stmt))
4886 case GF_OMP_FOR_KIND_FOR:
4887 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4888 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4889 break;
4890 case GF_OMP_FOR_KIND_DISTRIBUTE:
4891 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4892 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4893 break;
4894 default:
4895 gcc_unreachable ();
4897 nthreads = build_call_expr (nthreads, 0);
4898 nthreads = fold_convert (itype, nthreads);
4899 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4900 true, GSI_SAME_STMT);
4901 threadid = build_call_expr (threadid, 0);
4902 threadid = fold_convert (itype, threadid);
4903 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4904 true, GSI_SAME_STMT);
4906 n1 = fd->loop.n1;
4907 n2 = fd->loop.n2;
4908 step = fd->loop.step;
4909 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4911 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4912 OMP_CLAUSE__LOOPTEMP_);
4913 gcc_assert (innerc);
4914 n1 = OMP_CLAUSE_DECL (innerc);
4915 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4916 OMP_CLAUSE__LOOPTEMP_);
4917 gcc_assert (innerc);
4918 n2 = OMP_CLAUSE_DECL (innerc);
4920 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4921 true, NULL_TREE, true, GSI_SAME_STMT);
4922 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4923 true, NULL_TREE, true, GSI_SAME_STMT);
4924 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4925 true, NULL_TREE, true, GSI_SAME_STMT);
4927 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4928 t = fold_build2 (PLUS_EXPR, itype, step, t);
4929 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4930 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4931 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4932 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4933 fold_build1 (NEGATE_EXPR, itype, t),
4934 fold_build1 (NEGATE_EXPR, itype, step));
4935 else
4936 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4937 t = fold_convert (itype, t);
4938 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4940 q = create_tmp_reg (itype, "q");
4941 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
4942 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4943 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
4945 tt = create_tmp_reg (itype, "tt");
4946 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
4947 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4948 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
4950 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
4951 gcond *cond_stmt = gimple_build_cond_empty (t);
4952 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4954 second_bb = split_block (entry_bb, cond_stmt)->dest;
4955 gsi = gsi_last_nondebug_bb (second_bb);
4956 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4958 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
4959 GSI_SAME_STMT);
4960 gassign *assign_stmt
4961 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
4962 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4964 third_bb = split_block (second_bb, assign_stmt)->dest;
4965 gsi = gsi_last_nondebug_bb (third_bb);
4966 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4968 if (fd->have_nonctrl_scantemp)
4970 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4971 tree controlp = NULL_TREE, controlb = NULL_TREE;
4972 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4973 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4974 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4976 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4977 controlb = OMP_CLAUSE_DECL (c);
4978 else
4979 controlp = OMP_CLAUSE_DECL (c);
4980 if (controlb && controlp)
4981 break;
4983 gcc_assert (controlp && controlb);
4984 tree cnt = create_tmp_var (sizetype);
4985 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
4986 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4987 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
4988 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
4989 alloc_align, cnt, NULL, true);
4990 tree size = create_tmp_var (sizetype);
4991 expand_omp_build_assign (&gsi, size, sz, false);
4992 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
4993 size, size_int (16384));
4994 expand_omp_build_assign (&gsi, controlb, cmp);
4995 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4996 NULL_TREE, NULL_TREE);
4997 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4998 fourth_bb = split_block (third_bb, g)->dest;
4999 gsi = gsi_last_nondebug_bb (fourth_bb);
5000 /* FIXME: Once we have allocators, this should use allocator. */
5001 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5002 gimple_call_set_lhs (g, controlp);
5003 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5004 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5005 &gsi, true);
5006 gsi_prev (&gsi);
5007 g = gsi_stmt (gsi);
5008 fifth_bb = split_block (fourth_bb, g)->dest;
5009 gsi = gsi_last_nondebug_bb (fifth_bb);
5011 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5012 gimple_call_set_lhs (g, controlp);
5013 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5014 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5015 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5016 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5017 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5019 tree tmp = create_tmp_var (sizetype);
5020 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5021 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5022 TYPE_SIZE_UNIT (pointee_type));
5023 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5024 g = gimple_build_call (alloca_decl, 2, tmp,
5025 size_int (TYPE_ALIGN (pointee_type)));
5026 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5027 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5030 sixth_bb = split_block (fifth_bb, g)->dest;
5031 gsi = gsi_last_nondebug_bb (sixth_bb);
5034 t = build2 (MULT_EXPR, itype, q, threadid);
5035 t = build2 (PLUS_EXPR, itype, t, tt);
5036 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5038 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5039 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5041 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5042 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5044 /* Remove the GIMPLE_OMP_FOR statement. */
5045 gsi_remove (&gsi, true);
5047 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5048 gsi = gsi_start_bb (seq_start_bb);
5050 tree startvar = fd->loop.v;
5051 tree endvar = NULL_TREE;
5053 if (gimple_omp_for_combined_p (fd->for_stmt))
5055 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5056 ? gimple_omp_parallel_clauses (inner_stmt)
5057 : gimple_omp_for_clauses (inner_stmt);
5058 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5059 gcc_assert (innerc);
5060 startvar = OMP_CLAUSE_DECL (innerc);
5061 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5062 OMP_CLAUSE__LOOPTEMP_);
5063 gcc_assert (innerc);
5064 endvar = OMP_CLAUSE_DECL (innerc);
5065 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5066 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5068 int i;
5069 for (i = 1; i < fd->collapse; i++)
5071 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5072 OMP_CLAUSE__LOOPTEMP_);
5073 gcc_assert (innerc);
5075 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5076 OMP_CLAUSE__LOOPTEMP_);
5077 if (innerc)
5079 /* If needed (distribute parallel for with lastprivate),
5080 propagate down the total number of iterations. */
5081 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5082 fd->loop.n2);
5083 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5084 GSI_CONTINUE_LINKING);
5085 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5086 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5090 t = fold_convert (itype, s0);
5091 t = fold_build2 (MULT_EXPR, itype, t, step);
5092 if (POINTER_TYPE_P (type))
5094 t = fold_build_pointer_plus (n1, t);
5095 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5096 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5097 t = fold_convert (signed_type_for (type), t);
5099 else
5100 t = fold_build2 (PLUS_EXPR, type, t, n1);
5101 t = fold_convert (TREE_TYPE (startvar), t);
5102 t = force_gimple_operand_gsi (&gsi, t,
5103 DECL_P (startvar)
5104 && TREE_ADDRESSABLE (startvar),
5105 NULL_TREE, false, GSI_CONTINUE_LINKING);
5106 assign_stmt = gimple_build_assign (startvar, t);
5107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5108 if (cond_var)
5110 tree itype = TREE_TYPE (cond_var);
5111 /* For lastprivate(conditional:) itervar, we need some iteration
5112 counter that starts at unsigned non-zero and increases.
5113 Prefer as few IVs as possible, so if we can use startvar
5114 itself, use that, or startvar + constant (those would be
5115 incremented with step), and as last resort use the s0 + 1
5116 incremented by 1. */
5117 if (POINTER_TYPE_P (type)
5118 || TREE_CODE (n1) != INTEGER_CST
5119 || fd->loop.cond_code != LT_EXPR)
5120 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5121 build_int_cst (itype, 1));
5122 else if (tree_int_cst_sgn (n1) == 1)
5123 t = fold_convert (itype, t);
5124 else
5126 tree c = fold_convert (itype, n1);
5127 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5128 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5130 t = force_gimple_operand_gsi (&gsi, t, false,
5131 NULL_TREE, false, GSI_CONTINUE_LINKING);
5132 assign_stmt = gimple_build_assign (cond_var, t);
5133 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5136 t = fold_convert (itype, e0);
5137 t = fold_build2 (MULT_EXPR, itype, t, step);
5138 if (POINTER_TYPE_P (type))
5140 t = fold_build_pointer_plus (n1, t);
5141 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5142 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5143 t = fold_convert (signed_type_for (type), t);
5145 else
5146 t = fold_build2 (PLUS_EXPR, type, t, n1);
5147 t = fold_convert (TREE_TYPE (startvar), t);
5148 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5149 false, GSI_CONTINUE_LINKING);
5150 if (endvar)
5152 assign_stmt = gimple_build_assign (endvar, e);
5153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5154 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5155 assign_stmt = gimple_build_assign (fd->loop.v, e);
5156 else
5157 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5158 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5160 /* Handle linear clause adjustments. */
5161 tree itercnt = NULL_TREE;
5162 tree *nonrect_bounds = NULL;
5163 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5164 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5165 c; c = OMP_CLAUSE_CHAIN (c))
5166 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5167 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5169 tree d = OMP_CLAUSE_DECL (c);
5170 bool is_ref = omp_is_reference (d);
5171 tree t = d, a, dest;
5172 if (is_ref)
5173 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5174 if (itercnt == NULL_TREE)
5176 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5178 itercnt = fold_build2 (MINUS_EXPR, itype,
5179 fold_convert (itype, n1),
5180 fold_convert (itype, fd->loop.n1));
5181 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5182 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5183 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5184 NULL_TREE, false,
5185 GSI_CONTINUE_LINKING);
5187 else
5188 itercnt = s0;
5190 tree type = TREE_TYPE (t);
5191 if (POINTER_TYPE_P (type))
5192 type = sizetype;
5193 a = fold_build2 (MULT_EXPR, type,
5194 fold_convert (type, itercnt),
5195 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5196 dest = unshare_expr (t);
5197 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5198 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5199 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5200 false, GSI_CONTINUE_LINKING);
5201 assign_stmt = gimple_build_assign (dest, t);
5202 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5204 if (fd->collapse > 1)
5206 if (fd->non_rect)
5208 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5209 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5211 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5212 startvar);
5215 if (!broken_loop)
5217 /* The code controlling the sequential loop replaces the
5218 GIMPLE_OMP_CONTINUE. */
5219 gsi = gsi_last_nondebug_bb (cont_bb);
5220 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5221 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5222 vmain = gimple_omp_continue_control_use (cont_stmt);
5223 vback = gimple_omp_continue_control_def (cont_stmt);
5225 if (cond_var)
5227 tree itype = TREE_TYPE (cond_var);
5228 tree t2;
5229 if (POINTER_TYPE_P (type)
5230 || TREE_CODE (n1) != INTEGER_CST
5231 || fd->loop.cond_code != LT_EXPR)
5232 t2 = build_int_cst (itype, 1);
5233 else
5234 t2 = fold_convert (itype, step);
5235 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5236 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5237 NULL_TREE, true, GSI_SAME_STMT);
5238 assign_stmt = gimple_build_assign (cond_var, t2);
5239 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5242 if (!gimple_omp_for_combined_p (fd->for_stmt))
5244 if (POINTER_TYPE_P (type))
5245 t = fold_build_pointer_plus (vmain, step);
5246 else
5247 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5248 t = force_gimple_operand_gsi (&gsi, t,
5249 DECL_P (vback)
5250 && TREE_ADDRESSABLE (vback),
5251 NULL_TREE, true, GSI_SAME_STMT);
5252 assign_stmt = gimple_build_assign (vback, t);
5253 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5255 t = build2 (fd->loop.cond_code, boolean_type_node,
5256 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5257 ? t : vback, e);
5258 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5261 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5262 gsi_remove (&gsi, true);
5264 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5265 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5266 cont_bb, body_bb);
5269 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5270 gsi = gsi_last_nondebug_bb (exit_bb);
5271 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5273 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5274 if (fd->have_reductemp
5275 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5276 && !fd->have_nonctrl_scantemp))
5278 tree fn;
5279 if (t)
5280 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5281 else
5282 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5283 gcall *g = gimple_build_call (fn, 0);
5284 if (t)
5286 gimple_call_set_lhs (g, t);
5287 if (fd->have_reductemp)
5288 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5289 NOP_EXPR, t),
5290 GSI_SAME_STMT);
5292 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5294 else
5295 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5297 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5298 && !fd->have_nonctrl_scantemp)
5300 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5301 gcall *g = gimple_build_call (fn, 0);
5302 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5304 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5306 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5307 tree controlp = NULL_TREE, controlb = NULL_TREE;
5308 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5309 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5310 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5312 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5313 controlb = OMP_CLAUSE_DECL (c);
5314 else
5315 controlp = OMP_CLAUSE_DECL (c);
5316 if (controlb && controlp)
5317 break;
5319 gcc_assert (controlp && controlb);
5320 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5321 NULL_TREE, NULL_TREE);
5322 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5323 exit1_bb = split_block (exit_bb, g)->dest;
5324 gsi = gsi_after_labels (exit1_bb);
5325 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5326 controlp);
5327 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5328 exit2_bb = split_block (exit1_bb, g)->dest;
5329 gsi = gsi_after_labels (exit2_bb);
5330 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5331 controlp);
5332 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5333 exit3_bb = split_block (exit2_bb, g)->dest;
5334 gsi = gsi_after_labels (exit3_bb);
5336 gsi_remove (&gsi, true);
5338 /* Connect all the blocks. */
5339 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5340 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5341 ep = find_edge (entry_bb, second_bb);
5342 ep->flags = EDGE_TRUE_VALUE;
5343 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5344 if (fourth_bb)
5346 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5347 ep->probability
5348 = profile_probability::guessed_always ().apply_scale (1, 2);
5349 ep = find_edge (third_bb, fourth_bb);
5350 ep->flags = EDGE_TRUE_VALUE;
5351 ep->probability
5352 = profile_probability::guessed_always ().apply_scale (1, 2);
5353 ep = find_edge (fourth_bb, fifth_bb);
5354 redirect_edge_and_branch (ep, sixth_bb);
5356 else
5357 sixth_bb = third_bb;
5358 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5359 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5360 if (exit1_bb)
5362 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5363 ep->probability
5364 = profile_probability::guessed_always ().apply_scale (1, 2);
5365 ep = find_edge (exit_bb, exit1_bb);
5366 ep->flags = EDGE_TRUE_VALUE;
5367 ep->probability
5368 = profile_probability::guessed_always ().apply_scale (1, 2);
5369 ep = find_edge (exit1_bb, exit2_bb);
5370 redirect_edge_and_branch (ep, exit3_bb);
5373 if (!broken_loop)
5375 ep = find_edge (cont_bb, body_bb);
5376 if (ep == NULL)
5378 ep = BRANCH_EDGE (cont_bb);
5379 gcc_assert (single_succ (ep->dest) == body_bb);
5381 if (gimple_omp_for_combined_p (fd->for_stmt))
5383 remove_edge (ep);
5384 ep = NULL;
5386 else if (fd->collapse > 1)
5388 remove_edge (ep);
5389 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5391 else
5392 ep->flags = EDGE_TRUE_VALUE;
5393 find_edge (cont_bb, fin_bb)->flags
5394 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5397 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5398 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5399 if (fourth_bb)
5401 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5402 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5404 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5406 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5407 recompute_dominator (CDI_DOMINATORS, body_bb));
5408 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5409 recompute_dominator (CDI_DOMINATORS, fin_bb));
5410 if (exit1_bb)
5412 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5413 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5416 class loop *loop = body_bb->loop_father;
5417 if (loop != entry_bb->loop_father)
5419 gcc_assert (broken_loop || loop->header == body_bb);
5420 gcc_assert (broken_loop
5421 || loop->latch == region->cont
5422 || single_pred (loop->latch) == region->cont);
5423 return;
5426 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5428 loop = alloc_loop ();
5429 loop->header = body_bb;
5430 if (collapse_bb == NULL)
5431 loop->latch = cont_bb;
5432 add_loop (loop, body_bb->loop_father);
5436 /* Return phi in E->DEST with ARG on edge E. */
5438 static gphi *
5439 find_phi_with_arg_on_edge (tree arg, edge e)
5441 basic_block bb = e->dest;
5443 for (gphi_iterator gpi = gsi_start_phis (bb);
5444 !gsi_end_p (gpi);
5445 gsi_next (&gpi))
5447 gphi *phi = gpi.phi ();
5448 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5449 return phi;
5452 return NULL;
5455 /* A subroutine of expand_omp_for. Generate code for a parallel
5456 loop with static schedule and a specified chunk size. Given
5457 parameters:
5459 for (V = N1; V cond N2; V += STEP) BODY;
5461 where COND is "<" or ">", we generate pseudocode
5463 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5464 if (cond is <)
5465 adj = STEP - 1;
5466 else
5467 adj = STEP + 1;
5468 if ((__typeof (V)) -1 > 0 && cond is >)
5469 n = -(adj + N2 - N1) / -STEP;
5470 else
5471 n = (adj + N2 - N1) / STEP;
5472 trip = 0;
5473 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5474 here so that V is defined
5475 if the loop is not entered
5477 s0 = (trip * nthreads + threadid) * CHUNK;
5478 e0 = min (s0 + CHUNK, n);
5479 if (s0 < n) goto L1; else goto L4;
5481 V = s0 * STEP + N1;
5482 e = e0 * STEP + N1;
5484 BODY;
5485 V += STEP;
5486 if (V cond e) goto L2; else goto L3;
5488 trip += 1;
5489 goto L0;
5493 static void
5494 expand_omp_for_static_chunk (struct omp_region *region,
5495 struct omp_for_data *fd, gimple *inner_stmt)
5497 tree n, s0, e0, e, t;
5498 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5499 tree type, itype, vmain, vback, vextra;
5500 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5501 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5502 gimple_stmt_iterator gsi, gsip;
5503 edge se;
5504 bool broken_loop = region->cont == NULL;
5505 tree *counts = NULL;
5506 tree n1, n2, step;
5507 tree reductions = NULL_TREE;
5508 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5510 itype = type = TREE_TYPE (fd->loop.v);
5511 if (POINTER_TYPE_P (type))
5512 itype = signed_type_for (type);
5514 entry_bb = region->entry;
5515 se = split_block (entry_bb, last_stmt (entry_bb));
5516 entry_bb = se->src;
5517 iter_part_bb = se->dest;
5518 cont_bb = region->cont;
5519 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5520 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5521 gcc_assert (broken_loop
5522 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5523 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5524 body_bb = single_succ (seq_start_bb);
5525 if (!broken_loop)
5527 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5528 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5529 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5530 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5532 exit_bb = region->exit;
5534 /* Trip and adjustment setup goes in ENTRY_BB. */
5535 gsi = gsi_last_nondebug_bb (entry_bb);
5536 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5537 gsip = gsi;
5538 gsi_prev (&gsip);
5540 if (fd->collapse > 1)
5542 int first_zero_iter = -1, dummy = -1;
5543 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5545 counts = XALLOCAVEC (tree, fd->collapse);
5546 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5547 fin_bb, first_zero_iter,
5548 dummy_bb, dummy, l2_dom_bb);
5549 t = NULL_TREE;
5551 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5552 t = integer_one_node;
5553 else
5554 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5555 fold_convert (type, fd->loop.n1),
5556 fold_convert (type, fd->loop.n2));
5557 if (fd->collapse == 1
5558 && TYPE_UNSIGNED (type)
5559 && (t == NULL_TREE || !integer_onep (t)))
5561 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5562 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5563 true, GSI_SAME_STMT);
5564 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5565 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5566 true, GSI_SAME_STMT);
5567 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5568 NULL_TREE, NULL_TREE);
5569 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5570 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5571 expand_omp_regimplify_p, NULL, NULL)
5572 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5573 expand_omp_regimplify_p, NULL, NULL))
5575 gsi = gsi_for_stmt (cond_stmt);
5576 gimple_regimplify_operands (cond_stmt, &gsi);
5578 se = split_block (entry_bb, cond_stmt);
5579 se->flags = EDGE_TRUE_VALUE;
5580 entry_bb = se->dest;
5581 se->probability = profile_probability::very_likely ();
5582 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5583 se->probability = profile_probability::very_unlikely ();
5584 if (gimple_in_ssa_p (cfun))
5586 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5587 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5588 !gsi_end_p (gpi); gsi_next (&gpi))
5590 gphi *phi = gpi.phi ();
5591 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5592 se, UNKNOWN_LOCATION);
5595 gsi = gsi_last_bb (entry_bb);
5598 if (fd->lastprivate_conditional)
5600 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5601 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5602 if (fd->have_pointer_condtemp)
5603 condtemp = OMP_CLAUSE_DECL (c);
5604 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5605 cond_var = OMP_CLAUSE_DECL (c);
5607 if (fd->have_reductemp || fd->have_pointer_condtemp)
5609 tree t1 = build_int_cst (long_integer_type_node, 0);
5610 tree t2 = build_int_cst (long_integer_type_node, 1);
5611 tree t3 = build_int_cstu (long_integer_type_node,
5612 (HOST_WIDE_INT_1U << 31) + 1);
5613 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5614 gimple_stmt_iterator gsi2 = gsi_none ();
5615 gimple *g = NULL;
5616 tree mem = null_pointer_node, memv = NULL_TREE;
5617 if (fd->have_reductemp)
5619 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5620 reductions = OMP_CLAUSE_DECL (c);
5621 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5622 g = SSA_NAME_DEF_STMT (reductions);
5623 reductions = gimple_assign_rhs1 (g);
5624 OMP_CLAUSE_DECL (c) = reductions;
5625 gsi2 = gsi_for_stmt (g);
5627 else
5629 if (gsi_end_p (gsip))
5630 gsi2 = gsi_after_labels (region->entry);
5631 else
5632 gsi2 = gsip;
5633 reductions = null_pointer_node;
5635 if (fd->have_pointer_condtemp)
5637 tree type = TREE_TYPE (condtemp);
5638 memv = create_tmp_var (type);
5639 TREE_ADDRESSABLE (memv) = 1;
5640 unsigned HOST_WIDE_INT sz
5641 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5642 sz *= fd->lastprivate_conditional;
5643 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5644 false);
5645 mem = build_fold_addr_expr (memv);
5647 tree t
5648 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5649 9, t1, t2, t2, t3, t1, null_pointer_node,
5650 null_pointer_node, reductions, mem);
5651 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5652 true, GSI_SAME_STMT);
5653 if (fd->have_pointer_condtemp)
5654 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5655 if (fd->have_reductemp)
5657 gsi_remove (&gsi2, true);
5658 release_ssa_name (gimple_assign_lhs (g));
5661 switch (gimple_omp_for_kind (fd->for_stmt))
5663 case GF_OMP_FOR_KIND_FOR:
5664 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5665 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5666 break;
5667 case GF_OMP_FOR_KIND_DISTRIBUTE:
5668 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5669 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5670 break;
5671 default:
5672 gcc_unreachable ();
5674 nthreads = build_call_expr (nthreads, 0);
5675 nthreads = fold_convert (itype, nthreads);
5676 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5677 true, GSI_SAME_STMT);
5678 threadid = build_call_expr (threadid, 0);
5679 threadid = fold_convert (itype, threadid);
5680 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5681 true, GSI_SAME_STMT);
5683 n1 = fd->loop.n1;
5684 n2 = fd->loop.n2;
5685 step = fd->loop.step;
5686 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5688 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5689 OMP_CLAUSE__LOOPTEMP_);
5690 gcc_assert (innerc);
5691 n1 = OMP_CLAUSE_DECL (innerc);
5692 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5693 OMP_CLAUSE__LOOPTEMP_);
5694 gcc_assert (innerc);
5695 n2 = OMP_CLAUSE_DECL (innerc);
5697 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5698 true, NULL_TREE, true, GSI_SAME_STMT);
5699 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5700 true, NULL_TREE, true, GSI_SAME_STMT);
5701 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5702 true, NULL_TREE, true, GSI_SAME_STMT);
5703 tree chunk_size = fold_convert (itype, fd->chunk_size);
5704 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5705 chunk_size
5706 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5707 GSI_SAME_STMT);
5709 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5710 t = fold_build2 (PLUS_EXPR, itype, step, t);
5711 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5712 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5713 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5714 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5715 fold_build1 (NEGATE_EXPR, itype, t),
5716 fold_build1 (NEGATE_EXPR, itype, step));
5717 else
5718 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5719 t = fold_convert (itype, t);
5720 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5721 true, GSI_SAME_STMT);
5723 trip_var = create_tmp_reg (itype, ".trip");
5724 if (gimple_in_ssa_p (cfun))
5726 trip_init = make_ssa_name (trip_var);
5727 trip_main = make_ssa_name (trip_var);
5728 trip_back = make_ssa_name (trip_var);
5730 else
5732 trip_init = trip_var;
5733 trip_main = trip_var;
5734 trip_back = trip_var;
5737 gassign *assign_stmt
5738 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5739 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5741 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5742 t = fold_build2 (MULT_EXPR, itype, t, step);
5743 if (POINTER_TYPE_P (type))
5744 t = fold_build_pointer_plus (n1, t);
5745 else
5746 t = fold_build2 (PLUS_EXPR, type, t, n1);
5747 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5748 true, GSI_SAME_STMT);
5750 /* Remove the GIMPLE_OMP_FOR. */
5751 gsi_remove (&gsi, true);
5753 gimple_stmt_iterator gsif = gsi;
5755 /* Iteration space partitioning goes in ITER_PART_BB. */
5756 gsi = gsi_last_bb (iter_part_bb);
5758 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5759 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5760 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5761 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5762 false, GSI_CONTINUE_LINKING);
5764 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5765 t = fold_build2 (MIN_EXPR, itype, t, n);
5766 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5767 false, GSI_CONTINUE_LINKING);
5769 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5770 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5772 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5773 gsi = gsi_start_bb (seq_start_bb);
5775 tree startvar = fd->loop.v;
5776 tree endvar = NULL_TREE;
5778 if (gimple_omp_for_combined_p (fd->for_stmt))
5780 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5781 ? gimple_omp_parallel_clauses (inner_stmt)
5782 : gimple_omp_for_clauses (inner_stmt);
5783 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5784 gcc_assert (innerc);
5785 startvar = OMP_CLAUSE_DECL (innerc);
5786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5787 OMP_CLAUSE__LOOPTEMP_);
5788 gcc_assert (innerc);
5789 endvar = OMP_CLAUSE_DECL (innerc);
5790 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5791 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5793 int i;
5794 for (i = 1; i < fd->collapse; i++)
5796 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5797 OMP_CLAUSE__LOOPTEMP_);
5798 gcc_assert (innerc);
5800 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5801 OMP_CLAUSE__LOOPTEMP_);
5802 if (innerc)
5804 /* If needed (distribute parallel for with lastprivate),
5805 propagate down the total number of iterations. */
5806 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5807 fd->loop.n2);
5808 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5809 GSI_CONTINUE_LINKING);
5810 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5811 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5816 t = fold_convert (itype, s0);
5817 t = fold_build2 (MULT_EXPR, itype, t, step);
5818 if (POINTER_TYPE_P (type))
5820 t = fold_build_pointer_plus (n1, t);
5821 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5822 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5823 t = fold_convert (signed_type_for (type), t);
5825 else
5826 t = fold_build2 (PLUS_EXPR, type, t, n1);
5827 t = fold_convert (TREE_TYPE (startvar), t);
5828 t = force_gimple_operand_gsi (&gsi, t,
5829 DECL_P (startvar)
5830 && TREE_ADDRESSABLE (startvar),
5831 NULL_TREE, false, GSI_CONTINUE_LINKING);
5832 assign_stmt = gimple_build_assign (startvar, t);
5833 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5834 if (cond_var)
5836 tree itype = TREE_TYPE (cond_var);
5837 /* For lastprivate(conditional:) itervar, we need some iteration
5838 counter that starts at unsigned non-zero and increases.
5839 Prefer as few IVs as possible, so if we can use startvar
5840 itself, use that, or startvar + constant (those would be
5841 incremented with step), and as last resort use the s0 + 1
5842 incremented by 1. */
5843 if (POINTER_TYPE_P (type)
5844 || TREE_CODE (n1) != INTEGER_CST
5845 || fd->loop.cond_code != LT_EXPR)
5846 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5847 build_int_cst (itype, 1));
5848 else if (tree_int_cst_sgn (n1) == 1)
5849 t = fold_convert (itype, t);
5850 else
5852 tree c = fold_convert (itype, n1);
5853 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5854 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5856 t = force_gimple_operand_gsi (&gsi, t, false,
5857 NULL_TREE, false, GSI_CONTINUE_LINKING);
5858 assign_stmt = gimple_build_assign (cond_var, t);
5859 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5862 t = fold_convert (itype, e0);
5863 t = fold_build2 (MULT_EXPR, itype, t, step);
5864 if (POINTER_TYPE_P (type))
5866 t = fold_build_pointer_plus (n1, t);
5867 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5868 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5869 t = fold_convert (signed_type_for (type), t);
5871 else
5872 t = fold_build2 (PLUS_EXPR, type, t, n1);
5873 t = fold_convert (TREE_TYPE (startvar), t);
5874 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5875 false, GSI_CONTINUE_LINKING);
5876 if (endvar)
5878 assign_stmt = gimple_build_assign (endvar, e);
5879 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5880 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5881 assign_stmt = gimple_build_assign (fd->loop.v, e);
5882 else
5883 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5884 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5886 /* Handle linear clause adjustments. */
5887 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5888 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5889 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5890 c; c = OMP_CLAUSE_CHAIN (c))
5891 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5892 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5894 tree d = OMP_CLAUSE_DECL (c);
5895 bool is_ref = omp_is_reference (d);
5896 tree t = d, a, dest;
5897 if (is_ref)
5898 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5899 tree type = TREE_TYPE (t);
5900 if (POINTER_TYPE_P (type))
5901 type = sizetype;
5902 dest = unshare_expr (t);
5903 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5904 expand_omp_build_assign (&gsif, v, t);
5905 if (itercnt == NULL_TREE)
5907 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5909 itercntbias
5910 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5911 fold_convert (itype, fd->loop.n1));
5912 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5913 itercntbias, step);
5914 itercntbias
5915 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5916 NULL_TREE, true,
5917 GSI_SAME_STMT);
5918 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5919 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5920 NULL_TREE, false,
5921 GSI_CONTINUE_LINKING);
5923 else
5924 itercnt = s0;
5926 a = fold_build2 (MULT_EXPR, type,
5927 fold_convert (type, itercnt),
5928 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5929 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5930 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5931 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5932 false, GSI_CONTINUE_LINKING);
5933 assign_stmt = gimple_build_assign (dest, t);
5934 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5936 if (fd->collapse > 1)
5937 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5939 if (!broken_loop)
5941 /* The code controlling the sequential loop goes in CONT_BB,
5942 replacing the GIMPLE_OMP_CONTINUE. */
5943 gsi = gsi_last_nondebug_bb (cont_bb);
5944 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5945 vmain = gimple_omp_continue_control_use (cont_stmt);
5946 vback = gimple_omp_continue_control_def (cont_stmt);
5948 if (cond_var)
5950 tree itype = TREE_TYPE (cond_var);
5951 tree t2;
5952 if (POINTER_TYPE_P (type)
5953 || TREE_CODE (n1) != INTEGER_CST
5954 || fd->loop.cond_code != LT_EXPR)
5955 t2 = build_int_cst (itype, 1);
5956 else
5957 t2 = fold_convert (itype, step);
5958 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5959 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5960 NULL_TREE, true, GSI_SAME_STMT);
5961 assign_stmt = gimple_build_assign (cond_var, t2);
5962 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5965 if (!gimple_omp_for_combined_p (fd->for_stmt))
5967 if (POINTER_TYPE_P (type))
5968 t = fold_build_pointer_plus (vmain, step);
5969 else
5970 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5971 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
5972 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5973 true, GSI_SAME_STMT);
5974 assign_stmt = gimple_build_assign (vback, t);
5975 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5977 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
5978 t = build2 (EQ_EXPR, boolean_type_node,
5979 build_int_cst (itype, 0),
5980 build_int_cst (itype, 1));
5981 else
5982 t = build2 (fd->loop.cond_code, boolean_type_node,
5983 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5984 ? t : vback, e);
5985 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5988 /* Remove GIMPLE_OMP_CONTINUE. */
5989 gsi_remove (&gsi, true);
5991 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5992 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
5994 /* Trip update code goes into TRIP_UPDATE_BB. */
5995 gsi = gsi_start_bb (trip_update_bb);
5997 t = build_int_cst (itype, 1);
5998 t = build2 (PLUS_EXPR, itype, trip_main, t);
5999 assign_stmt = gimple_build_assign (trip_back, t);
6000 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6003 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6004 gsi = gsi_last_nondebug_bb (exit_bb);
6005 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6007 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6008 if (fd->have_reductemp || fd->have_pointer_condtemp)
6010 tree fn;
6011 if (t)
6012 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6013 else
6014 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6015 gcall *g = gimple_build_call (fn, 0);
6016 if (t)
6018 gimple_call_set_lhs (g, t);
6019 if (fd->have_reductemp)
6020 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6021 NOP_EXPR, t),
6022 GSI_SAME_STMT);
6024 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6026 else
6027 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6029 else if (fd->have_pointer_condtemp)
6031 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6032 gcall *g = gimple_build_call (fn, 0);
6033 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6035 gsi_remove (&gsi, true);
6037 /* Connect the new blocks. */
6038 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6039 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6041 if (!broken_loop)
6043 se = find_edge (cont_bb, body_bb);
6044 if (se == NULL)
6046 se = BRANCH_EDGE (cont_bb);
6047 gcc_assert (single_succ (se->dest) == body_bb);
6049 if (gimple_omp_for_combined_p (fd->for_stmt))
6051 remove_edge (se);
6052 se = NULL;
6054 else if (fd->collapse > 1)
6056 remove_edge (se);
6057 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6059 else
6060 se->flags = EDGE_TRUE_VALUE;
6061 find_edge (cont_bb, trip_update_bb)->flags
6062 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6064 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6065 iter_part_bb);
6068 if (gimple_in_ssa_p (cfun))
6070 gphi_iterator psi;
6071 gphi *phi;
6072 edge re, ene;
6073 edge_var_map *vm;
6074 size_t i;
6076 gcc_assert (fd->collapse == 1 && !broken_loop);
6078 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6079 remove arguments of the phi nodes in fin_bb. We need to create
6080 appropriate phi nodes in iter_part_bb instead. */
6081 se = find_edge (iter_part_bb, fin_bb);
6082 re = single_succ_edge (trip_update_bb);
6083 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6084 ene = single_succ_edge (entry_bb);
6086 psi = gsi_start_phis (fin_bb);
6087 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6088 gsi_next (&psi), ++i)
6090 gphi *nphi;
6091 location_t locus;
6093 phi = psi.phi ();
6094 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6095 redirect_edge_var_map_def (vm), 0))
6096 continue;
6098 t = gimple_phi_result (phi);
6099 gcc_assert (t == redirect_edge_var_map_result (vm));
6101 if (!single_pred_p (fin_bb))
6102 t = copy_ssa_name (t, phi);
6104 nphi = create_phi_node (t, iter_part_bb);
6106 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6107 locus = gimple_phi_arg_location_from_edge (phi, se);
6109 /* A special case -- fd->loop.v is not yet computed in
6110 iter_part_bb, we need to use vextra instead. */
6111 if (t == fd->loop.v)
6112 t = vextra;
6113 add_phi_arg (nphi, t, ene, locus);
6114 locus = redirect_edge_var_map_location (vm);
6115 tree back_arg = redirect_edge_var_map_def (vm);
6116 add_phi_arg (nphi, back_arg, re, locus);
6117 edge ce = find_edge (cont_bb, body_bb);
6118 if (ce == NULL)
6120 ce = BRANCH_EDGE (cont_bb);
6121 gcc_assert (single_succ (ce->dest) == body_bb);
6122 ce = single_succ_edge (ce->dest);
6124 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6125 gcc_assert (inner_loop_phi != NULL);
6126 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6127 find_edge (seq_start_bb, body_bb), locus);
6129 if (!single_pred_p (fin_bb))
6130 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6132 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6133 redirect_edge_var_map_clear (re);
6134 if (single_pred_p (fin_bb))
6135 while (1)
6137 psi = gsi_start_phis (fin_bb);
6138 if (gsi_end_p (psi))
6139 break;
6140 remove_phi_node (&psi, false);
6143 /* Make phi node for trip. */
6144 phi = create_phi_node (trip_main, iter_part_bb);
6145 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6146 UNKNOWN_LOCATION);
6147 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6148 UNKNOWN_LOCATION);
6151 if (!broken_loop)
6152 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6153 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6154 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6155 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6156 recompute_dominator (CDI_DOMINATORS, fin_bb));
6157 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6158 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6159 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6160 recompute_dominator (CDI_DOMINATORS, body_bb));
6162 if (!broken_loop)
6164 class loop *loop = body_bb->loop_father;
6165 class loop *trip_loop = alloc_loop ();
6166 trip_loop->header = iter_part_bb;
6167 trip_loop->latch = trip_update_bb;
6168 add_loop (trip_loop, iter_part_bb->loop_father);
6170 if (loop != entry_bb->loop_father)
6172 gcc_assert (loop->header == body_bb);
6173 gcc_assert (loop->latch == region->cont
6174 || single_pred (loop->latch) == region->cont);
6175 trip_loop->inner = loop;
6176 return;
6179 if (!gimple_omp_for_combined_p (fd->for_stmt))
6181 loop = alloc_loop ();
6182 loop->header = body_bb;
6183 if (collapse_bb == NULL)
6184 loop->latch = cont_bb;
6185 add_loop (loop, trip_loop);
6190 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6191 loop. Given parameters:
6193 for (V = N1; V cond N2; V += STEP) BODY;
6195 where COND is "<" or ">", we generate pseudocode
6197 V = N1;
6198 goto L1;
6200 BODY;
6201 V += STEP;
6203 if (V cond N2) goto L0; else goto L2;
6206 For collapsed loops, given parameters:
6207 collapse(3)
6208 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
6209 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
6210 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
6211 BODY;
6213 we generate pseudocode
6215 if (cond3 is <)
6216 adj = STEP3 - 1;
6217 else
6218 adj = STEP3 + 1;
6219 count3 = (adj + N32 - N31) / STEP3;
6220 if (cond2 is <)
6221 adj = STEP2 - 1;
6222 else
6223 adj = STEP2 + 1;
6224 count2 = (adj + N22 - N21) / STEP2;
6225 if (cond1 is <)
6226 adj = STEP1 - 1;
6227 else
6228 adj = STEP1 + 1;
6229 count1 = (adj + N12 - N11) / STEP1;
6230 count = count1 * count2 * count3;
6231 V = 0;
6232 V1 = N11;
6233 V2 = N21;
6234 V3 = N31;
6235 goto L1;
6237 BODY;
6238 V += 1;
6239 V3 += STEP3;
6240 V2 += (V3 cond3 N32) ? 0 : STEP2;
6241 V3 = (V3 cond3 N32) ? V3 : N31;
6242 V1 += (V2 cond2 N22) ? 0 : STEP1;
6243 V2 = (V2 cond2 N22) ? V2 : N21;
6245 if (V < count) goto L0; else goto L2;
6250 static void
6251 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6253 tree type, t;
6254 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6255 gimple_stmt_iterator gsi;
6256 gimple *stmt;
6257 gcond *cond_stmt;
6258 bool broken_loop = region->cont == NULL;
6259 edge e, ne;
6260 tree *counts = NULL;
6261 int i;
6262 int safelen_int = INT_MAX;
6263 bool dont_vectorize = false;
6264 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6265 OMP_CLAUSE_SAFELEN);
6266 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6267 OMP_CLAUSE__SIMDUID_);
6268 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6269 OMP_CLAUSE_IF);
6270 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6271 OMP_CLAUSE_SIMDLEN);
6272 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6273 OMP_CLAUSE__CONDTEMP_);
6274 tree n1, n2;
6275 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6277 if (safelen)
6279 poly_uint64 val;
6280 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6281 if (!poly_int_tree_p (safelen, &val))
6282 safelen_int = 0;
6283 else
6284 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6285 if (safelen_int == 1)
6286 safelen_int = 0;
6288 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6289 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6291 safelen_int = 0;
6292 dont_vectorize = true;
6294 type = TREE_TYPE (fd->loop.v);
6295 entry_bb = region->entry;
6296 cont_bb = region->cont;
6297 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6298 gcc_assert (broken_loop
6299 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6300 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6301 if (!broken_loop)
6303 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6304 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6305 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6306 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6308 else
6310 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6311 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6312 l2_bb = single_succ (l1_bb);
6314 exit_bb = region->exit;
6315 l2_dom_bb = NULL;
6317 gsi = gsi_last_nondebug_bb (entry_bb);
6319 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6320 /* Not needed in SSA form right now. */
6321 gcc_assert (!gimple_in_ssa_p (cfun));
6322 if (fd->collapse > 1)
6324 int first_zero_iter = -1, dummy = -1;
6325 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6327 counts = XALLOCAVEC (tree, fd->collapse);
6328 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6329 zero_iter_bb, first_zero_iter,
6330 dummy_bb, dummy, l2_dom_bb);
6332 if (l2_dom_bb == NULL)
6333 l2_dom_bb = l1_bb;
6335 n1 = fd->loop.n1;
6336 n2 = fd->loop.n2;
6337 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6339 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6340 OMP_CLAUSE__LOOPTEMP_);
6341 gcc_assert (innerc);
6342 n1 = OMP_CLAUSE_DECL (innerc);
6343 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6344 OMP_CLAUSE__LOOPTEMP_);
6345 gcc_assert (innerc);
6346 n2 = OMP_CLAUSE_DECL (innerc);
6348 tree step = fd->loop.step;
6350 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6351 OMP_CLAUSE__SIMT_);
6352 if (is_simt)
6354 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6355 is_simt = safelen_int > 1;
6357 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6358 if (is_simt)
6360 simt_lane = create_tmp_var (unsigned_type_node);
6361 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6362 gimple_call_set_lhs (g, simt_lane);
6363 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6364 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6365 fold_convert (TREE_TYPE (step), simt_lane));
6366 n1 = fold_convert (type, n1);
6367 if (POINTER_TYPE_P (type))
6368 n1 = fold_build_pointer_plus (n1, offset);
6369 else
6370 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6372 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6373 if (fd->collapse > 1)
6374 simt_maxlane = build_one_cst (unsigned_type_node);
6375 else if (safelen_int < omp_max_simt_vf ())
6376 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6377 tree vf
6378 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6379 unsigned_type_node, 0);
6380 if (simt_maxlane)
6381 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6382 vf = fold_convert (TREE_TYPE (step), vf);
6383 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6386 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6387 if (fd->collapse > 1)
6389 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6391 gsi_prev (&gsi);
6392 expand_omp_for_init_vars (fd, &gsi, counts, NULL, NULL, n1);
6393 gsi_next (&gsi);
6395 else
6396 for (i = 0; i < fd->collapse; i++)
6398 tree itype = TREE_TYPE (fd->loops[i].v);
6399 if (POINTER_TYPE_P (itype))
6400 itype = signed_type_for (itype);
6401 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6402 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6405 if (cond_var)
6407 if (POINTER_TYPE_P (type)
6408 || TREE_CODE (n1) != INTEGER_CST
6409 || fd->loop.cond_code != LT_EXPR
6410 || tree_int_cst_sgn (n1) != 1)
6411 expand_omp_build_assign (&gsi, cond_var,
6412 build_one_cst (TREE_TYPE (cond_var)));
6413 else
6414 expand_omp_build_assign (&gsi, cond_var,
6415 fold_convert (TREE_TYPE (cond_var), n1));
6418 /* Remove the GIMPLE_OMP_FOR statement. */
6419 gsi_remove (&gsi, true);
6421 if (!broken_loop)
6423 /* Code to control the increment goes in the CONT_BB. */
6424 gsi = gsi_last_nondebug_bb (cont_bb);
6425 stmt = gsi_stmt (gsi);
6426 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6428 if (POINTER_TYPE_P (type))
6429 t = fold_build_pointer_plus (fd->loop.v, step);
6430 else
6431 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6432 expand_omp_build_assign (&gsi, fd->loop.v, t);
6434 if (fd->collapse > 1)
6436 i = fd->collapse - 1;
6437 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6439 t = fold_convert (sizetype, fd->loops[i].step);
6440 t = fold_build_pointer_plus (fd->loops[i].v, t);
6442 else
6444 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6445 fd->loops[i].step);
6446 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6447 fd->loops[i].v, t);
6449 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6451 for (i = fd->collapse - 1; i > 0; i--)
6453 tree itype = TREE_TYPE (fd->loops[i].v);
6454 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
6455 if (POINTER_TYPE_P (itype2))
6456 itype2 = signed_type_for (itype2);
6457 t = fold_convert (itype2, fd->loops[i - 1].step);
6458 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
6459 GSI_SAME_STMT);
6460 t = build3 (COND_EXPR, itype2,
6461 build2 (fd->loops[i].cond_code, boolean_type_node,
6462 fd->loops[i].v,
6463 fold_convert (itype, fd->loops[i].n2)),
6464 build_int_cst (itype2, 0), t);
6465 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
6466 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
6467 else
6468 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
6469 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
6471 t = fold_convert (itype, fd->loops[i].n1);
6472 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
6473 GSI_SAME_STMT);
6474 t = build3 (COND_EXPR, itype,
6475 build2 (fd->loops[i].cond_code, boolean_type_node,
6476 fd->loops[i].v,
6477 fold_convert (itype, fd->loops[i].n2)),
6478 fd->loops[i].v, t);
6479 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6482 if (cond_var)
6484 if (POINTER_TYPE_P (type)
6485 || TREE_CODE (n1) != INTEGER_CST
6486 || fd->loop.cond_code != LT_EXPR
6487 || tree_int_cst_sgn (n1) != 1)
6488 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6489 build_one_cst (TREE_TYPE (cond_var)));
6490 else
6491 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6492 fold_convert (TREE_TYPE (cond_var), step));
6493 expand_omp_build_assign (&gsi, cond_var, t);
6496 /* Remove GIMPLE_OMP_CONTINUE. */
6497 gsi_remove (&gsi, true);
6500 /* Emit the condition in L1_BB. */
6501 gsi = gsi_start_bb (l1_bb);
6503 t = fold_convert (type, n2);
6504 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6505 false, GSI_CONTINUE_LINKING);
6506 tree v = fd->loop.v;
6507 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6508 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6509 false, GSI_CONTINUE_LINKING);
6510 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6511 cond_stmt = gimple_build_cond_empty (t);
6512 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6513 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6514 NULL, NULL)
6515 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6516 NULL, NULL))
6518 gsi = gsi_for_stmt (cond_stmt);
6519 gimple_regimplify_operands (cond_stmt, &gsi);
6522 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6523 if (is_simt)
6525 gsi = gsi_start_bb (l2_bb);
6526 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6527 if (POINTER_TYPE_P (type))
6528 t = fold_build_pointer_plus (fd->loop.v, step);
6529 else
6530 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6531 expand_omp_build_assign (&gsi, fd->loop.v, t);
6534 /* Remove GIMPLE_OMP_RETURN. */
6535 gsi = gsi_last_nondebug_bb (exit_bb);
6536 gsi_remove (&gsi, true);
6538 /* Connect the new blocks. */
6539 remove_edge (FALLTHRU_EDGE (entry_bb));
6541 if (!broken_loop)
6543 remove_edge (BRANCH_EDGE (entry_bb));
6544 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6546 e = BRANCH_EDGE (l1_bb);
6547 ne = FALLTHRU_EDGE (l1_bb);
6548 e->flags = EDGE_TRUE_VALUE;
6550 else
6552 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6554 ne = single_succ_edge (l1_bb);
6555 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6558 ne->flags = EDGE_FALSE_VALUE;
6559 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6560 ne->probability = e->probability.invert ();
6562 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6563 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6565 if (simt_maxlane)
6567 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6568 NULL_TREE, NULL_TREE);
6569 gsi = gsi_last_bb (entry_bb);
6570 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6571 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6572 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6573 FALLTHRU_EDGE (entry_bb)->probability
6574 = profile_probability::guessed_always ().apply_scale (7, 8);
6575 BRANCH_EDGE (entry_bb)->probability
6576 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6577 l2_dom_bb = entry_bb;
6579 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6581 if (!broken_loop)
6583 class loop *loop = alloc_loop ();
6584 loop->header = l1_bb;
6585 loop->latch = cont_bb;
6586 add_loop (loop, l1_bb->loop_father);
6587 loop->safelen = safelen_int;
6588 if (simduid)
6590 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6591 cfun->has_simduid_loops = true;
6593 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6594 the loop. */
6595 if ((flag_tree_loop_vectorize
6596 || !global_options_set.x_flag_tree_loop_vectorize)
6597 && flag_tree_loop_optimize
6598 && loop->safelen > 1)
6600 loop->force_vectorize = true;
6601 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6603 unsigned HOST_WIDE_INT v
6604 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6605 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6606 loop->simdlen = v;
6608 cfun->has_force_vectorize_loops = true;
6610 else if (dont_vectorize)
6611 loop->dont_vectorize = true;
6613 else if (simduid)
6614 cfun->has_simduid_loops = true;
6617 /* Taskloop construct is represented after gimplification with
6618 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6619 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6620 which should just compute all the needed loop temporaries
6621 for GIMPLE_OMP_TASK. */
6623 static void
6624 expand_omp_taskloop_for_outer (struct omp_region *region,
6625 struct omp_for_data *fd,
6626 gimple *inner_stmt)
6628 tree type, bias = NULL_TREE;
6629 basic_block entry_bb, cont_bb, exit_bb;
6630 gimple_stmt_iterator gsi;
6631 gassign *assign_stmt;
6632 tree *counts = NULL;
6633 int i;
6635 gcc_assert (inner_stmt);
6636 gcc_assert (region->cont);
6637 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6638 && gimple_omp_task_taskloop_p (inner_stmt));
6639 type = TREE_TYPE (fd->loop.v);
6641 /* See if we need to bias by LLONG_MIN. */
6642 if (fd->iter_type == long_long_unsigned_type_node
6643 && TREE_CODE (type) == INTEGER_TYPE
6644 && !TYPE_UNSIGNED (type))
6646 tree n1, n2;
6648 if (fd->loop.cond_code == LT_EXPR)
6650 n1 = fd->loop.n1;
6651 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6653 else
6655 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6656 n2 = fd->loop.n1;
6658 if (TREE_CODE (n1) != INTEGER_CST
6659 || TREE_CODE (n2) != INTEGER_CST
6660 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6661 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6664 entry_bb = region->entry;
6665 cont_bb = region->cont;
6666 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6667 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6668 exit_bb = region->exit;
6670 gsi = gsi_last_nondebug_bb (entry_bb);
6671 gimple *for_stmt = gsi_stmt (gsi);
6672 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
6673 if (fd->collapse > 1)
6675 int first_zero_iter = -1, dummy = -1;
6676 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
6678 counts = XALLOCAVEC (tree, fd->collapse);
6679 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6680 zero_iter_bb, first_zero_iter,
6681 dummy_bb, dummy, l2_dom_bb);
6683 if (zero_iter_bb)
6685 /* Some counts[i] vars might be uninitialized if
6686 some loop has zero iterations. But the body shouldn't
6687 be executed in that case, so just avoid uninit warnings. */
6688 for (i = first_zero_iter; i < fd->collapse; i++)
6689 if (SSA_VAR_P (counts[i]))
6690 TREE_NO_WARNING (counts[i]) = 1;
6691 gsi_prev (&gsi);
6692 edge e = split_block (entry_bb, gsi_stmt (gsi));
6693 entry_bb = e->dest;
6694 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
6695 gsi = gsi_last_bb (entry_bb);
6696 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
6697 get_immediate_dominator (CDI_DOMINATORS,
6698 zero_iter_bb));
6702 tree t0, t1;
6703 t1 = fd->loop.n2;
6704 t0 = fd->loop.n1;
6705 if (POINTER_TYPE_P (TREE_TYPE (t0))
6706 && TYPE_PRECISION (TREE_TYPE (t0))
6707 != TYPE_PRECISION (fd->iter_type))
6709 /* Avoid casting pointers to integer of a different size. */
6710 tree itype = signed_type_for (type);
6711 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
6712 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
6714 else
6716 t1 = fold_convert (fd->iter_type, t1);
6717 t0 = fold_convert (fd->iter_type, t0);
6719 if (bias)
6721 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
6722 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
6725 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
6726 OMP_CLAUSE__LOOPTEMP_);
6727 gcc_assert (innerc);
6728 tree startvar = OMP_CLAUSE_DECL (innerc);
6729 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
6730 gcc_assert (innerc);
6731 tree endvar = OMP_CLAUSE_DECL (innerc);
6732 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
6734 gcc_assert (innerc);
6735 for (i = 1; i < fd->collapse; i++)
6737 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6738 OMP_CLAUSE__LOOPTEMP_);
6739 gcc_assert (innerc);
6741 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6742 OMP_CLAUSE__LOOPTEMP_);
6743 if (innerc)
6745 /* If needed (inner taskloop has lastprivate clause), propagate
6746 down the total number of iterations. */
6747 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
6748 NULL_TREE, false,
6749 GSI_CONTINUE_LINKING);
6750 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6751 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6755 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
6756 GSI_CONTINUE_LINKING);
6757 assign_stmt = gimple_build_assign (startvar, t0);
6758 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6760 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
6761 GSI_CONTINUE_LINKING);
6762 assign_stmt = gimple_build_assign (endvar, t1);
6763 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6764 if (fd->collapse > 1)
6765 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6767 /* Remove the GIMPLE_OMP_FOR statement. */
6768 gsi = gsi_for_stmt (for_stmt);
6769 gsi_remove (&gsi, true);
6771 gsi = gsi_last_nondebug_bb (cont_bb);
6772 gsi_remove (&gsi, true);
6774 gsi = gsi_last_nondebug_bb (exit_bb);
6775 gsi_remove (&gsi, true);
6777 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
6778 remove_edge (BRANCH_EDGE (entry_bb));
6779 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
6780 remove_edge (BRANCH_EDGE (cont_bb));
6781 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
6782 set_immediate_dominator (CDI_DOMINATORS, region->entry,
6783 recompute_dominator (CDI_DOMINATORS, region->entry));
6786 /* Taskloop construct is represented after gimplification with
6787 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6788 in between them. This routine expands the inner GIMPLE_OMP_FOR.
6789 GOMP_taskloop{,_ull} function arranges for each task to be given just
6790 a single range of iterations. */
6792 static void
6793 expand_omp_taskloop_for_inner (struct omp_region *region,
6794 struct omp_for_data *fd,
6795 gimple *inner_stmt)
6797 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
6798 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
6799 basic_block fin_bb;
6800 gimple_stmt_iterator gsi;
6801 edge ep;
6802 bool broken_loop = region->cont == NULL;
6803 tree *counts = NULL;
6804 tree n1, n2, step;
6806 itype = type = TREE_TYPE (fd->loop.v);
6807 if (POINTER_TYPE_P (type))
6808 itype = signed_type_for (type);
6810 /* See if we need to bias by LLONG_MIN. */
6811 if (fd->iter_type == long_long_unsigned_type_node
6812 && TREE_CODE (type) == INTEGER_TYPE
6813 && !TYPE_UNSIGNED (type))
6815 tree n1, n2;
6817 if (fd->loop.cond_code == LT_EXPR)
6819 n1 = fd->loop.n1;
6820 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6822 else
6824 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6825 n2 = fd->loop.n1;
6827 if (TREE_CODE (n1) != INTEGER_CST
6828 || TREE_CODE (n2) != INTEGER_CST
6829 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6830 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6833 entry_bb = region->entry;
6834 cont_bb = region->cont;
6835 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6836 fin_bb = BRANCH_EDGE (entry_bb)->dest;
6837 gcc_assert (broken_loop
6838 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
6839 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6840 if (!broken_loop)
6842 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
6843 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6845 exit_bb = region->exit;
6847 /* Iteration space partitioning goes in ENTRY_BB. */
6848 gsi = gsi_last_nondebug_bb (entry_bb);
6849 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6851 if (fd->collapse > 1)
6853 int first_zero_iter = -1, dummy = -1;
6854 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
6856 counts = XALLOCAVEC (tree, fd->collapse);
6857 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6858 fin_bb, first_zero_iter,
6859 dummy_bb, dummy, l2_dom_bb);
6860 t = NULL_TREE;
6862 else
6863 t = integer_one_node;
6865 step = fd->loop.step;
6866 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6867 OMP_CLAUSE__LOOPTEMP_);
6868 gcc_assert (innerc);
6869 n1 = OMP_CLAUSE_DECL (innerc);
6870 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
6871 gcc_assert (innerc);
6872 n2 = OMP_CLAUSE_DECL (innerc);
6873 if (bias)
6875 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
6876 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
6878 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
6879 true, NULL_TREE, true, GSI_SAME_STMT);
6880 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
6881 true, NULL_TREE, true, GSI_SAME_STMT);
6882 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
6883 true, NULL_TREE, true, GSI_SAME_STMT);
6885 tree startvar = fd->loop.v;
6886 tree endvar = NULL_TREE;
6888 if (gimple_omp_for_combined_p (fd->for_stmt))
6890 tree clauses = gimple_omp_for_clauses (inner_stmt);
6891 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6892 gcc_assert (innerc);
6893 startvar = OMP_CLAUSE_DECL (innerc);
6894 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6895 OMP_CLAUSE__LOOPTEMP_);
6896 gcc_assert (innerc);
6897 endvar = OMP_CLAUSE_DECL (innerc);
6899 t = fold_convert (TREE_TYPE (startvar), n1);
6900 t = force_gimple_operand_gsi (&gsi, t,
6901 DECL_P (startvar)
6902 && TREE_ADDRESSABLE (startvar),
6903 NULL_TREE, false, GSI_CONTINUE_LINKING);
6904 gimple *assign_stmt = gimple_build_assign (startvar, t);
6905 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6907 t = fold_convert (TREE_TYPE (startvar), n2);
6908 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6909 false, GSI_CONTINUE_LINKING);
6910 if (endvar)
6912 assign_stmt = gimple_build_assign (endvar, e);
6913 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6914 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6915 assign_stmt = gimple_build_assign (fd->loop.v, e);
6916 else
6917 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6918 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6921 tree *nonrect_bounds = NULL;
6922 if (fd->collapse > 1)
6924 if (fd->non_rect)
6926 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6927 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
6929 gcc_assert (gsi_bb (gsi) == entry_bb);
6930 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
6931 startvar);
6932 entry_bb = gsi_bb (gsi);
6935 if (!broken_loop)
6937 /* The code controlling the sequential loop replaces the
6938 GIMPLE_OMP_CONTINUE. */
6939 gsi = gsi_last_nondebug_bb (cont_bb);
6940 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6941 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
6942 vmain = gimple_omp_continue_control_use (cont_stmt);
6943 vback = gimple_omp_continue_control_def (cont_stmt);
6945 if (!gimple_omp_for_combined_p (fd->for_stmt))
6947 if (POINTER_TYPE_P (type))
6948 t = fold_build_pointer_plus (vmain, step);
6949 else
6950 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6951 t = force_gimple_operand_gsi (&gsi, t,
6952 DECL_P (vback)
6953 && TREE_ADDRESSABLE (vback),
6954 NULL_TREE, true, GSI_SAME_STMT);
6955 assign_stmt = gimple_build_assign (vback, t);
6956 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6958 t = build2 (fd->loop.cond_code, boolean_type_node,
6959 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6960 ? t : vback, e);
6961 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6964 /* Remove the GIMPLE_OMP_CONTINUE statement. */
6965 gsi_remove (&gsi, true);
6967 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6968 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
6969 cont_bb, body_bb);
6972 /* Remove the GIMPLE_OMP_FOR statement. */
6973 gsi = gsi_for_stmt (fd->for_stmt);
6974 gsi_remove (&gsi, true);
6976 /* Remove the GIMPLE_OMP_RETURN statement. */
6977 gsi = gsi_last_nondebug_bb (exit_bb);
6978 gsi_remove (&gsi, true);
6980 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
6981 if (!broken_loop)
6982 remove_edge (BRANCH_EDGE (entry_bb));
6983 else
6985 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
6986 region->outer->cont = NULL;
6989 /* Connect all the blocks. */
6990 if (!broken_loop)
6992 ep = find_edge (cont_bb, body_bb);
6993 if (gimple_omp_for_combined_p (fd->for_stmt))
6995 remove_edge (ep);
6996 ep = NULL;
6998 else if (fd->collapse > 1)
7000 remove_edge (ep);
7001 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7003 else
7004 ep->flags = EDGE_TRUE_VALUE;
7005 find_edge (cont_bb, fin_bb)->flags
7006 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7009 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7010 recompute_dominator (CDI_DOMINATORS, body_bb));
7011 if (!broken_loop)
7012 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7013 recompute_dominator (CDI_DOMINATORS, fin_bb));
7015 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7017 class loop *loop = alloc_loop ();
7018 loop->header = body_bb;
7019 if (collapse_bb == NULL)
7020 loop->latch = cont_bb;
7021 add_loop (loop, body_bb->loop_father);
7025 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7026 partitioned loop. The lowering here is abstracted, in that the
7027 loop parameters are passed through internal functions, which are
7028 further lowered by oacc_device_lower, once we get to the target
7029 compiler. The loop is of the form:
7031 for (V = B; V LTGT E; V += S) {BODY}
7033 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7034 (constant 0 for no chunking) and we will have a GWV partitioning
7035 mask, specifying dimensions over which the loop is to be
7036 partitioned (see note below). We generate code that looks like
7037 (this ignores tiling):
7039 <entry_bb> [incoming FALL->body, BRANCH->exit]
7040 typedef signedintify (typeof (V)) T; // underlying signed integral type
7041 T range = E - B;
7042 T chunk_no = 0;
7043 T DIR = LTGT == '<' ? +1 : -1;
7044 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7045 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7047 <head_bb> [created by splitting end of entry_bb]
7048 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7049 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7050 if (!(offset LTGT bound)) goto bottom_bb;
7052 <body_bb> [incoming]
7053 V = B + offset;
7054 {BODY}
7056 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7057 offset += step;
7058 if (offset LTGT bound) goto body_bb; [*]
7060 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7061 chunk_no++;
7062 if (chunk < chunk_max) goto head_bb;
7064 <exit_bb> [incoming]
7065 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7067 [*] Needed if V live at end of loop. */
7069 static void
7070 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7072 tree v = fd->loop.v;
7073 enum tree_code cond_code = fd->loop.cond_code;
7074 enum tree_code plus_code = PLUS_EXPR;
7076 tree chunk_size = integer_minus_one_node;
7077 tree gwv = integer_zero_node;
7078 tree iter_type = TREE_TYPE (v);
7079 tree diff_type = iter_type;
7080 tree plus_type = iter_type;
7081 struct oacc_collapse *counts = NULL;
7083 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7084 == GF_OMP_FOR_KIND_OACC_LOOP);
7085 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7086 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7088 if (POINTER_TYPE_P (iter_type))
7090 plus_code = POINTER_PLUS_EXPR;
7091 plus_type = sizetype;
7093 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7094 diff_type = signed_type_for (diff_type);
7095 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7096 diff_type = integer_type_node;
7098 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7099 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7100 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7101 basic_block bottom_bb = NULL;
7103 /* entry_bb has two successors; the branch edge is to the exit
7104 block, fallthrough edge to body. */
7105 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7106 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7108 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7109 body_bb, or to a block whose only successor is the body_bb. Its
7110 fallthrough successor is the final block (same as the branch
7111 successor of the entry_bb). */
7112 if (cont_bb)
7114 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7115 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7117 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7118 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7120 else
7121 gcc_assert (!gimple_in_ssa_p (cfun));
7123 /* The exit block only has entry_bb and cont_bb as predecessors. */
7124 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7126 tree chunk_no;
7127 tree chunk_max = NULL_TREE;
7128 tree bound, offset;
7129 tree step = create_tmp_var (diff_type, ".step");
7130 bool up = cond_code == LT_EXPR;
7131 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7132 bool chunking = !gimple_in_ssa_p (cfun);
7133 bool negating;
7135 /* Tiling vars. */
7136 tree tile_size = NULL_TREE;
7137 tree element_s = NULL_TREE;
7138 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7139 basic_block elem_body_bb = NULL;
7140 basic_block elem_cont_bb = NULL;
7142 /* SSA instances. */
7143 tree offset_incr = NULL_TREE;
7144 tree offset_init = NULL_TREE;
7146 gimple_stmt_iterator gsi;
7147 gassign *ass;
7148 gcall *call;
7149 gimple *stmt;
7150 tree expr;
7151 location_t loc;
7152 edge split, be, fte;
7154 /* Split the end of entry_bb to create head_bb. */
7155 split = split_block (entry_bb, last_stmt (entry_bb));
7156 basic_block head_bb = split->dest;
7157 entry_bb = split->src;
7159 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7160 gsi = gsi_last_nondebug_bb (entry_bb);
7161 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7162 loc = gimple_location (for_stmt);
7164 if (gimple_in_ssa_p (cfun))
7166 offset_init = gimple_omp_for_index (for_stmt, 0);
7167 gcc_assert (integer_zerop (fd->loop.n1));
7168 /* The SSA parallelizer does gang parallelism. */
7169 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7172 if (fd->collapse > 1 || fd->tiling)
7174 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7175 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7176 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
7177 TREE_TYPE (fd->loop.n2), loc);
7179 if (SSA_VAR_P (fd->loop.n2))
7181 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7182 true, GSI_SAME_STMT);
7183 ass = gimple_build_assign (fd->loop.n2, total);
7184 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7188 tree b = fd->loop.n1;
7189 tree e = fd->loop.n2;
7190 tree s = fd->loop.step;
7192 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7193 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7195 /* Convert the step, avoiding possible unsigned->signed overflow. */
7196 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7197 if (negating)
7198 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7199 s = fold_convert (diff_type, s);
7200 if (negating)
7201 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7202 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7204 if (!chunking)
7205 chunk_size = integer_zero_node;
7206 expr = fold_convert (diff_type, chunk_size);
7207 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7208 NULL_TREE, true, GSI_SAME_STMT);
7210 if (fd->tiling)
7212 /* Determine the tile size and element step,
7213 modify the outer loop step size. */
7214 tile_size = create_tmp_var (diff_type, ".tile_size");
7215 expr = build_int_cst (diff_type, 1);
7216 for (int ix = 0; ix < fd->collapse; ix++)
7217 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7218 expr = force_gimple_operand_gsi (&gsi, expr, true,
7219 NULL_TREE, true, GSI_SAME_STMT);
7220 ass = gimple_build_assign (tile_size, expr);
7221 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7223 element_s = create_tmp_var (diff_type, ".element_s");
7224 ass = gimple_build_assign (element_s, s);
7225 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7227 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7228 s = force_gimple_operand_gsi (&gsi, expr, true,
7229 NULL_TREE, true, GSI_SAME_STMT);
7232 /* Determine the range, avoiding possible unsigned->signed overflow. */
7233 negating = !up && TYPE_UNSIGNED (iter_type);
7234 expr = fold_build2 (MINUS_EXPR, plus_type,
7235 fold_convert (plus_type, negating ? b : e),
7236 fold_convert (plus_type, negating ? e : b));
7237 expr = fold_convert (diff_type, expr);
7238 if (negating)
7239 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7240 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7241 NULL_TREE, true, GSI_SAME_STMT);
7243 chunk_no = build_int_cst (diff_type, 0);
7244 if (chunking)
7246 gcc_assert (!gimple_in_ssa_p (cfun));
7248 expr = chunk_no;
7249 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7250 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7252 ass = gimple_build_assign (chunk_no, expr);
7253 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7255 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7256 build_int_cst (integer_type_node,
7257 IFN_GOACC_LOOP_CHUNKS),
7258 dir, range, s, chunk_size, gwv);
7259 gimple_call_set_lhs (call, chunk_max);
7260 gimple_set_location (call, loc);
7261 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7263 else
7264 chunk_size = chunk_no;
7266 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7267 build_int_cst (integer_type_node,
7268 IFN_GOACC_LOOP_STEP),
7269 dir, range, s, chunk_size, gwv);
7270 gimple_call_set_lhs (call, step);
7271 gimple_set_location (call, loc);
7272 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7274 /* Remove the GIMPLE_OMP_FOR. */
7275 gsi_remove (&gsi, true);
7277 /* Fixup edges from head_bb. */
7278 be = BRANCH_EDGE (head_bb);
7279 fte = FALLTHRU_EDGE (head_bb);
7280 be->flags |= EDGE_FALSE_VALUE;
7281 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7283 basic_block body_bb = fte->dest;
7285 if (gimple_in_ssa_p (cfun))
7287 gsi = gsi_last_nondebug_bb (cont_bb);
7288 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7290 offset = gimple_omp_continue_control_use (cont_stmt);
7291 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7293 else
7295 offset = create_tmp_var (diff_type, ".offset");
7296 offset_init = offset_incr = offset;
7298 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7300 /* Loop offset & bound go into head_bb. */
7301 gsi = gsi_start_bb (head_bb);
7303 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7304 build_int_cst (integer_type_node,
7305 IFN_GOACC_LOOP_OFFSET),
7306 dir, range, s,
7307 chunk_size, gwv, chunk_no);
7308 gimple_call_set_lhs (call, offset_init);
7309 gimple_set_location (call, loc);
7310 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7312 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7313 build_int_cst (integer_type_node,
7314 IFN_GOACC_LOOP_BOUND),
7315 dir, range, s,
7316 chunk_size, gwv, offset_init);
7317 gimple_call_set_lhs (call, bound);
7318 gimple_set_location (call, loc);
7319 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7321 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7322 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7323 GSI_CONTINUE_LINKING);
7325 /* V assignment goes into body_bb. */
7326 if (!gimple_in_ssa_p (cfun))
7328 gsi = gsi_start_bb (body_bb);
7330 expr = build2 (plus_code, iter_type, b,
7331 fold_convert (plus_type, offset));
7332 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7333 true, GSI_SAME_STMT);
7334 ass = gimple_build_assign (v, expr);
7335 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7337 if (fd->collapse > 1 || fd->tiling)
7338 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7340 if (fd->tiling)
7342 /* Determine the range of the element loop -- usually simply
7343 the tile_size, but could be smaller if the final
7344 iteration of the outer loop is a partial tile. */
7345 tree e_range = create_tmp_var (diff_type, ".e_range");
7347 expr = build2 (MIN_EXPR, diff_type,
7348 build2 (MINUS_EXPR, diff_type, bound, offset),
7349 build2 (MULT_EXPR, diff_type, tile_size,
7350 element_s));
7351 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7352 true, GSI_SAME_STMT);
7353 ass = gimple_build_assign (e_range, expr);
7354 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7356 /* Determine bound, offset & step of inner loop. */
7357 e_bound = create_tmp_var (diff_type, ".e_bound");
7358 e_offset = create_tmp_var (diff_type, ".e_offset");
7359 e_step = create_tmp_var (diff_type, ".e_step");
7361 /* Mark these as element loops. */
7362 tree t, e_gwv = integer_minus_one_node;
7363 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7365 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7366 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7367 element_s, chunk, e_gwv, chunk);
7368 gimple_call_set_lhs (call, e_offset);
7369 gimple_set_location (call, loc);
7370 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7372 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7373 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7374 element_s, chunk, e_gwv, e_offset);
7375 gimple_call_set_lhs (call, e_bound);
7376 gimple_set_location (call, loc);
7377 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7379 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7380 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7381 element_s, chunk, e_gwv);
7382 gimple_call_set_lhs (call, e_step);
7383 gimple_set_location (call, loc);
7384 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7386 /* Add test and split block. */
7387 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7388 stmt = gimple_build_cond_empty (expr);
7389 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7390 split = split_block (body_bb, stmt);
7391 elem_body_bb = split->dest;
7392 if (cont_bb == body_bb)
7393 cont_bb = elem_body_bb;
7394 body_bb = split->src;
7396 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7398 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7399 if (cont_bb == NULL)
7401 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7402 e->probability = profile_probability::even ();
7403 split->probability = profile_probability::even ();
7406 /* Initialize the user's loop vars. */
7407 gsi = gsi_start_bb (elem_body_bb);
7408 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7412 /* Loop increment goes into cont_bb. If this is not a loop, we
7413 will have spawned threads as if it was, and each one will
7414 execute one iteration. The specification is not explicit about
7415 whether such constructs are ill-formed or not, and they can
7416 occur, especially when noreturn routines are involved. */
7417 if (cont_bb)
7419 gsi = gsi_last_nondebug_bb (cont_bb);
7420 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7421 loc = gimple_location (cont_stmt);
7423 if (fd->tiling)
7425 /* Insert element loop increment and test. */
7426 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7427 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7428 true, GSI_SAME_STMT);
7429 ass = gimple_build_assign (e_offset, expr);
7430 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7431 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7433 stmt = gimple_build_cond_empty (expr);
7434 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7435 split = split_block (cont_bb, stmt);
7436 elem_cont_bb = split->src;
7437 cont_bb = split->dest;
7439 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7440 split->probability = profile_probability::unlikely ().guessed ();
7441 edge latch_edge
7442 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7443 latch_edge->probability = profile_probability::likely ().guessed ();
7445 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7446 skip_edge->probability = profile_probability::unlikely ().guessed ();
7447 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7448 loop_entry_edge->probability
7449 = profile_probability::likely ().guessed ();
7451 gsi = gsi_for_stmt (cont_stmt);
7454 /* Increment offset. */
7455 if (gimple_in_ssa_p (cfun))
7456 expr = build2 (plus_code, iter_type, offset,
7457 fold_convert (plus_type, step));
7458 else
7459 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7460 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7461 true, GSI_SAME_STMT);
7462 ass = gimple_build_assign (offset_incr, expr);
7463 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7464 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7465 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7467 /* Remove the GIMPLE_OMP_CONTINUE. */
7468 gsi_remove (&gsi, true);
7470 /* Fixup edges from cont_bb. */
7471 be = BRANCH_EDGE (cont_bb);
7472 fte = FALLTHRU_EDGE (cont_bb);
7473 be->flags |= EDGE_TRUE_VALUE;
7474 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7476 if (chunking)
7478 /* Split the beginning of exit_bb to make bottom_bb. We
7479 need to insert a nop at the start, because splitting is
7480 after a stmt, not before. */
7481 gsi = gsi_start_bb (exit_bb);
7482 stmt = gimple_build_nop ();
7483 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7484 split = split_block (exit_bb, stmt);
7485 bottom_bb = split->src;
7486 exit_bb = split->dest;
7487 gsi = gsi_last_bb (bottom_bb);
7489 /* Chunk increment and test goes into bottom_bb. */
7490 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7491 build_int_cst (diff_type, 1));
7492 ass = gimple_build_assign (chunk_no, expr);
7493 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7495 /* Chunk test at end of bottom_bb. */
7496 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7497 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7498 GSI_CONTINUE_LINKING);
7500 /* Fixup edges from bottom_bb. */
7501 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7502 split->probability = profile_probability::unlikely ().guessed ();
7503 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7504 latch_edge->probability = profile_probability::likely ().guessed ();
7508 gsi = gsi_last_nondebug_bb (exit_bb);
7509 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7510 loc = gimple_location (gsi_stmt (gsi));
7512 if (!gimple_in_ssa_p (cfun))
7514 /* Insert the final value of V, in case it is live. This is the
7515 value for the only thread that survives past the join. */
7516 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7517 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7518 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7519 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7520 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7521 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7522 true, GSI_SAME_STMT);
7523 ass = gimple_build_assign (v, expr);
7524 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7527 /* Remove the OMP_RETURN. */
7528 gsi_remove (&gsi, true);
7530 if (cont_bb)
7532 /* We now have one, two or three nested loops. Update the loop
7533 structures. */
7534 class loop *parent = entry_bb->loop_father;
7535 class loop *body = body_bb->loop_father;
7537 if (chunking)
7539 class loop *chunk_loop = alloc_loop ();
7540 chunk_loop->header = head_bb;
7541 chunk_loop->latch = bottom_bb;
7542 add_loop (chunk_loop, parent);
7543 parent = chunk_loop;
7545 else if (parent != body)
7547 gcc_assert (body->header == body_bb);
7548 gcc_assert (body->latch == cont_bb
7549 || single_pred (body->latch) == cont_bb);
7550 parent = NULL;
7553 if (parent)
7555 class loop *body_loop = alloc_loop ();
7556 body_loop->header = body_bb;
7557 body_loop->latch = cont_bb;
7558 add_loop (body_loop, parent);
7560 if (fd->tiling)
7562 /* Insert tiling's element loop. */
7563 class loop *inner_loop = alloc_loop ();
7564 inner_loop->header = elem_body_bb;
7565 inner_loop->latch = elem_cont_bb;
7566 add_loop (inner_loop, body_loop);
7572 /* Expand the OMP loop defined by REGION. */
7574 static void
7575 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7577 struct omp_for_data fd;
7578 struct omp_for_data_loop *loops;
7580 loops = XALLOCAVEC (struct omp_for_data_loop,
7581 gimple_omp_for_collapse (last_stmt (region->entry)));
7582 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7583 &fd, loops);
7584 region->sched_kind = fd.sched_kind;
7585 region->sched_modifiers = fd.sched_modifiers;
7586 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7587 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7589 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7590 if ((loops[i].m1 || loops[i].m2)
7591 && (loops[i].m1 == NULL_TREE
7592 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7593 && (loops[i].m2 == NULL_TREE
7594 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7595 && TREE_CODE (loops[i].step) == INTEGER_CST
7596 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7598 tree t;
7599 tree itype = TREE_TYPE (loops[i].v);
7600 if (loops[i].m1 && loops[i].m2)
7601 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7602 else if (loops[i].m1)
7603 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7604 else
7605 t = loops[i].m2;
7606 t = fold_build2 (MULT_EXPR, itype, t,
7607 fold_convert (itype,
7608 loops[i - loops[i].outer].step));
7609 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7610 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7611 fold_build1 (NEGATE_EXPR, itype, t),
7612 fold_build1 (NEGATE_EXPR, itype,
7613 fold_convert (itype,
7614 loops[i].step)));
7615 else
7616 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7617 fold_convert (itype, loops[i].step));
7618 if (integer_nonzerop (t))
7619 error_at (gimple_location (fd.for_stmt),
7620 "invalid OpenMP non-rectangular loop step; "
7621 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7622 "step %qE",
7623 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7624 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7625 loops[i - loops[i].outer].step, i + 1,
7626 loops[i].step);
7630 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7631 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7632 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7633 if (region->cont)
7635 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7636 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7637 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7639 else
7640 /* If there isn't a continue then this is a degerate case where
7641 the introduction of abnormal edges during lowering will prevent
7642 original loops from being detected. Fix that up. */
7643 loops_state_set (LOOPS_NEED_FIXUP);
7645 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
7647 if (fd.non_rect)
7648 sorry_at (gimple_location (fd.for_stmt),
7649 "non-rectangular %<simd%> not supported yet");
7650 expand_omp_simd (region, &fd);
7652 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7654 gcc_assert (!inner_stmt && !fd.non_rect);
7655 expand_oacc_for (region, &fd);
7657 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
7659 if (gimple_omp_for_combined_into_p (fd.for_stmt))
7660 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
7661 else
7662 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
7664 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
7665 && !fd.have_ordered)
7667 if (fd.chunk_size == NULL)
7668 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
7669 else
7670 expand_omp_for_static_chunk (region, &fd, inner_stmt);
7672 else
7674 int fn_index, start_ix, next_ix;
7675 unsigned HOST_WIDE_INT sched = 0;
7676 tree sched_arg = NULL_TREE;
7678 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
7679 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
7680 if (fd.chunk_size == NULL
7681 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
7682 fd.chunk_size = integer_zero_node;
7683 switch (fd.sched_kind)
7685 case OMP_CLAUSE_SCHEDULE_RUNTIME:
7686 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
7687 && fd.lastprivate_conditional == 0)
7689 gcc_assert (!fd.have_ordered);
7690 fn_index = 6;
7691 sched = 4;
7693 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
7694 && !fd.have_ordered
7695 && fd.lastprivate_conditional == 0)
7696 fn_index = 7;
7697 else
7699 fn_index = 3;
7700 sched = (HOST_WIDE_INT_1U << 31);
7702 break;
7703 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
7704 case OMP_CLAUSE_SCHEDULE_GUIDED:
7705 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
7706 && !fd.have_ordered
7707 && fd.lastprivate_conditional == 0)
7709 fn_index = 3 + fd.sched_kind;
7710 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
7711 break;
7713 fn_index = fd.sched_kind;
7714 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
7715 sched += (HOST_WIDE_INT_1U << 31);
7716 break;
7717 case OMP_CLAUSE_SCHEDULE_STATIC:
7718 gcc_assert (fd.have_ordered);
7719 fn_index = 0;
7720 sched = (HOST_WIDE_INT_1U << 31) + 1;
7721 break;
7722 default:
7723 gcc_unreachable ();
7725 if (!fd.ordered)
7726 fn_index += fd.have_ordered * 8;
7727 if (fd.ordered)
7728 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
7729 else
7730 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
7731 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
7732 if (fd.have_reductemp || fd.have_pointer_condtemp)
7734 if (fd.ordered)
7735 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
7736 else if (fd.have_ordered)
7737 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
7738 else
7739 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
7740 sched_arg = build_int_cstu (long_integer_type_node, sched);
7741 if (!fd.chunk_size)
7742 fd.chunk_size = integer_zero_node;
7744 if (fd.iter_type == long_long_unsigned_type_node)
7746 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
7747 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
7748 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
7749 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
7751 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
7752 (enum built_in_function) next_ix, sched_arg,
7753 inner_stmt);
7756 if (gimple_in_ssa_p (cfun))
7757 update_ssa (TODO_update_ssa_only_virtuals);
7760 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
7762 v = GOMP_sections_start (n);
7764 switch (v)
7766 case 0:
7767 goto L2;
7768 case 1:
7769 section 1;
7770 goto L1;
7771 case 2:
7773 case n:
7775 default:
7776 abort ();
7779 v = GOMP_sections_next ();
7780 goto L0;
7782 reduction;
7784 If this is a combined parallel sections, replace the call to
7785 GOMP_sections_start with call to GOMP_sections_next. */
7787 static void
7788 expand_omp_sections (struct omp_region *region)
7790 tree t, u, vin = NULL, vmain, vnext, l2;
7791 unsigned len;
7792 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
7793 gimple_stmt_iterator si, switch_si;
7794 gomp_sections *sections_stmt;
7795 gimple *stmt;
7796 gomp_continue *cont;
7797 edge_iterator ei;
7798 edge e;
7799 struct omp_region *inner;
7800 unsigned i, casei;
7801 bool exit_reachable = region->cont != NULL;
7803 gcc_assert (region->exit != NULL);
7804 entry_bb = region->entry;
7805 l0_bb = single_succ (entry_bb);
7806 l1_bb = region->cont;
7807 l2_bb = region->exit;
7808 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
7809 l2 = gimple_block_label (l2_bb);
7810 else
7812 /* This can happen if there are reductions. */
7813 len = EDGE_COUNT (l0_bb->succs);
7814 gcc_assert (len > 0);
7815 e = EDGE_SUCC (l0_bb, len - 1);
7816 si = gsi_last_nondebug_bb (e->dest);
7817 l2 = NULL_TREE;
7818 if (gsi_end_p (si)
7819 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
7820 l2 = gimple_block_label (e->dest);
7821 else
7822 FOR_EACH_EDGE (e, ei, l0_bb->succs)
7824 si = gsi_last_nondebug_bb (e->dest);
7825 if (gsi_end_p (si)
7826 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
7828 l2 = gimple_block_label (e->dest);
7829 break;
7833 if (exit_reachable)
7834 default_bb = create_empty_bb (l1_bb->prev_bb);
7835 else
7836 default_bb = create_empty_bb (l0_bb);
7838 /* We will build a switch() with enough cases for all the
7839 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
7840 and a default case to abort if something goes wrong. */
7841 len = EDGE_COUNT (l0_bb->succs);
7843 /* Use vec::quick_push on label_vec throughout, since we know the size
7844 in advance. */
7845 auto_vec<tree> label_vec (len);
7847 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
7848 GIMPLE_OMP_SECTIONS statement. */
7849 si = gsi_last_nondebug_bb (entry_bb);
7850 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
7851 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
7852 vin = gimple_omp_sections_control (sections_stmt);
7853 tree clauses = gimple_omp_sections_clauses (sections_stmt);
7854 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
7855 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
7856 tree cond_var = NULL_TREE;
7857 if (reductmp || condtmp)
7859 tree reductions = null_pointer_node, mem = null_pointer_node;
7860 tree memv = NULL_TREE, condtemp = NULL_TREE;
7861 gimple_stmt_iterator gsi = gsi_none ();
7862 gimple *g = NULL;
7863 if (reductmp)
7865 reductions = OMP_CLAUSE_DECL (reductmp);
7866 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
7867 g = SSA_NAME_DEF_STMT (reductions);
7868 reductions = gimple_assign_rhs1 (g);
7869 OMP_CLAUSE_DECL (reductmp) = reductions;
7870 gsi = gsi_for_stmt (g);
7872 else
7873 gsi = si;
7874 if (condtmp)
7876 condtemp = OMP_CLAUSE_DECL (condtmp);
7877 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
7878 OMP_CLAUSE__CONDTEMP_);
7879 cond_var = OMP_CLAUSE_DECL (c);
7880 tree type = TREE_TYPE (condtemp);
7881 memv = create_tmp_var (type);
7882 TREE_ADDRESSABLE (memv) = 1;
7883 unsigned cnt = 0;
7884 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
7885 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
7886 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
7887 ++cnt;
7888 unsigned HOST_WIDE_INT sz
7889 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
7890 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
7891 false);
7892 mem = build_fold_addr_expr (memv);
7894 t = build_int_cst (unsigned_type_node, len - 1);
7895 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
7896 stmt = gimple_build_call (u, 3, t, reductions, mem);
7897 gimple_call_set_lhs (stmt, vin);
7898 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7899 if (condtmp)
7901 expand_omp_build_assign (&gsi, condtemp, memv, false);
7902 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
7903 vin, build_one_cst (TREE_TYPE (cond_var)));
7904 expand_omp_build_assign (&gsi, cond_var, t, false);
7906 if (reductmp)
7908 gsi_remove (&gsi, true);
7909 release_ssa_name (gimple_assign_lhs (g));
7912 else if (!is_combined_parallel (region))
7914 /* If we are not inside a combined parallel+sections region,
7915 call GOMP_sections_start. */
7916 t = build_int_cst (unsigned_type_node, len - 1);
7917 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
7918 stmt = gimple_build_call (u, 1, t);
7920 else
7922 /* Otherwise, call GOMP_sections_next. */
7923 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
7924 stmt = gimple_build_call (u, 0);
7926 if (!reductmp && !condtmp)
7928 gimple_call_set_lhs (stmt, vin);
7929 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
7931 gsi_remove (&si, true);
7933 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
7934 L0_BB. */
7935 switch_si = gsi_last_nondebug_bb (l0_bb);
7936 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
7937 if (exit_reachable)
7939 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
7940 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
7941 vmain = gimple_omp_continue_control_use (cont);
7942 vnext = gimple_omp_continue_control_def (cont);
7944 else
7946 vmain = vin;
7947 vnext = NULL_TREE;
7950 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
7951 label_vec.quick_push (t);
7952 i = 1;
7954 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
7955 for (inner = region->inner, casei = 1;
7956 inner;
7957 inner = inner->next, i++, casei++)
7959 basic_block s_entry_bb, s_exit_bb;
7961 /* Skip optional reduction region. */
7962 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
7964 --i;
7965 --casei;
7966 continue;
7969 s_entry_bb = inner->entry;
7970 s_exit_bb = inner->exit;
7972 t = gimple_block_label (s_entry_bb);
7973 u = build_int_cst (unsigned_type_node, casei);
7974 u = build_case_label (u, NULL, t);
7975 label_vec.quick_push (u);
7977 si = gsi_last_nondebug_bb (s_entry_bb);
7978 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
7979 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
7980 gsi_remove (&si, true);
7981 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
7983 if (s_exit_bb == NULL)
7984 continue;
7986 si = gsi_last_nondebug_bb (s_exit_bb);
7987 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7988 gsi_remove (&si, true);
7990 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
7993 /* Error handling code goes in DEFAULT_BB. */
7994 t = gimple_block_label (default_bb);
7995 u = build_case_label (NULL, NULL, t);
7996 make_edge (l0_bb, default_bb, 0);
7997 add_bb_to_loop (default_bb, current_loops->tree_root);
7999 stmt = gimple_build_switch (vmain, u, label_vec);
8000 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8001 gsi_remove (&switch_si, true);
8003 si = gsi_start_bb (default_bb);
8004 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8005 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8007 if (exit_reachable)
8009 tree bfn_decl;
8011 /* Code to get the next section goes in L1_BB. */
8012 si = gsi_last_nondebug_bb (l1_bb);
8013 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8015 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8016 stmt = gimple_build_call (bfn_decl, 0);
8017 gimple_call_set_lhs (stmt, vnext);
8018 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8019 if (cond_var)
8021 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8022 vnext, build_one_cst (TREE_TYPE (cond_var)));
8023 expand_omp_build_assign (&si, cond_var, t, false);
8025 gsi_remove (&si, true);
8027 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8030 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8031 si = gsi_last_nondebug_bb (l2_bb);
8032 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8033 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8034 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8035 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8036 else
8037 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8038 stmt = gimple_build_call (t, 0);
8039 if (gimple_omp_return_lhs (gsi_stmt (si)))
8040 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8041 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8042 gsi_remove (&si, true);
8044 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8047 /* Expand code for an OpenMP single directive. We've already expanded
8048 much of the code, here we simply place the GOMP_barrier call. */
8050 static void
8051 expand_omp_single (struct omp_region *region)
8053 basic_block entry_bb, exit_bb;
8054 gimple_stmt_iterator si;
8056 entry_bb = region->entry;
8057 exit_bb = region->exit;
8059 si = gsi_last_nondebug_bb (entry_bb);
8060 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8061 gsi_remove (&si, true);
8062 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8064 si = gsi_last_nondebug_bb (exit_bb);
8065 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8067 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8068 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8070 gsi_remove (&si, true);
8071 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8074 /* Generic expansion for OpenMP synchronization directives: master,
8075 ordered and critical. All we need to do here is remove the entry
8076 and exit markers for REGION. */
8078 static void
8079 expand_omp_synch (struct omp_region *region)
8081 basic_block entry_bb, exit_bb;
8082 gimple_stmt_iterator si;
8084 entry_bb = region->entry;
8085 exit_bb = region->exit;
8087 si = gsi_last_nondebug_bb (entry_bb);
8088 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8089 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8090 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8091 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8092 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8093 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8094 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8095 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8097 expand_omp_taskreg (region);
8098 return;
8100 gsi_remove (&si, true);
8101 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8103 if (exit_bb)
8105 si = gsi_last_nondebug_bb (exit_bb);
8106 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8107 gsi_remove (&si, true);
8108 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8112 /* Translate enum omp_memory_order to enum memmodel. The two enums
8113 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8114 is 0. */
8116 static enum memmodel
8117 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8119 switch (mo)
8121 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8122 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8123 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8124 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8125 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8126 default: gcc_unreachable ();
8130 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8131 operation as a normal volatile load. */
8133 static bool
8134 expand_omp_atomic_load (basic_block load_bb, tree addr,
8135 tree loaded_val, int index)
8137 enum built_in_function tmpbase;
8138 gimple_stmt_iterator gsi;
8139 basic_block store_bb;
8140 location_t loc;
8141 gimple *stmt;
8142 tree decl, call, type, itype;
8144 gsi = gsi_last_nondebug_bb (load_bb);
8145 stmt = gsi_stmt (gsi);
8146 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8147 loc = gimple_location (stmt);
8149 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8150 is smaller than word size, then expand_atomic_load assumes that the load
8151 is atomic. We could avoid the builtin entirely in this case. */
8153 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8154 decl = builtin_decl_explicit (tmpbase);
8155 if (decl == NULL_TREE)
8156 return false;
8158 type = TREE_TYPE (loaded_val);
8159 itype = TREE_TYPE (TREE_TYPE (decl));
8161 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8162 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8163 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8164 if (!useless_type_conversion_p (type, itype))
8165 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8166 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8168 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8169 gsi_remove (&gsi, true);
8171 store_bb = single_succ (load_bb);
8172 gsi = gsi_last_nondebug_bb (store_bb);
8173 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8174 gsi_remove (&gsi, true);
8176 if (gimple_in_ssa_p (cfun))
8177 update_ssa (TODO_update_ssa_no_phi);
8179 return true;
8182 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8183 operation as a normal volatile store. */
8185 static bool
8186 expand_omp_atomic_store (basic_block load_bb, tree addr,
8187 tree loaded_val, tree stored_val, int index)
8189 enum built_in_function tmpbase;
8190 gimple_stmt_iterator gsi;
8191 basic_block store_bb = single_succ (load_bb);
8192 location_t loc;
8193 gimple *stmt;
8194 tree decl, call, type, itype;
8195 machine_mode imode;
8196 bool exchange;
8198 gsi = gsi_last_nondebug_bb (load_bb);
8199 stmt = gsi_stmt (gsi);
8200 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8202 /* If the load value is needed, then this isn't a store but an exchange. */
8203 exchange = gimple_omp_atomic_need_value_p (stmt);
8205 gsi = gsi_last_nondebug_bb (store_bb);
8206 stmt = gsi_stmt (gsi);
8207 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8208 loc = gimple_location (stmt);
8210 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8211 is smaller than word size, then expand_atomic_store assumes that the store
8212 is atomic. We could avoid the builtin entirely in this case. */
8214 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8215 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8216 decl = builtin_decl_explicit (tmpbase);
8217 if (decl == NULL_TREE)
8218 return false;
8220 type = TREE_TYPE (stored_val);
8222 /* Dig out the type of the function's second argument. */
8223 itype = TREE_TYPE (decl);
8224 itype = TYPE_ARG_TYPES (itype);
8225 itype = TREE_CHAIN (itype);
8226 itype = TREE_VALUE (itype);
8227 imode = TYPE_MODE (itype);
8229 if (exchange && !can_atomic_exchange_p (imode, true))
8230 return false;
8232 if (!useless_type_conversion_p (itype, type))
8233 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8234 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8235 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8236 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8237 if (exchange)
8239 if (!useless_type_conversion_p (type, itype))
8240 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8241 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8244 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8245 gsi_remove (&gsi, true);
8247 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8248 gsi = gsi_last_nondebug_bb (load_bb);
8249 gsi_remove (&gsi, true);
8251 if (gimple_in_ssa_p (cfun))
8252 update_ssa (TODO_update_ssa_no_phi);
8254 return true;
8257 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8258 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8259 size of the data type, and thus usable to find the index of the builtin
8260 decl. Returns false if the expression is not of the proper form. */
8262 static bool
8263 expand_omp_atomic_fetch_op (basic_block load_bb,
8264 tree addr, tree loaded_val,
8265 tree stored_val, int index)
8267 enum built_in_function oldbase, newbase, tmpbase;
8268 tree decl, itype, call;
8269 tree lhs, rhs;
8270 basic_block store_bb = single_succ (load_bb);
8271 gimple_stmt_iterator gsi;
8272 gimple *stmt;
8273 location_t loc;
8274 enum tree_code code;
8275 bool need_old, need_new;
8276 machine_mode imode;
8278 /* We expect to find the following sequences:
8280 load_bb:
8281 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8283 store_bb:
8284 val = tmp OP something; (or: something OP tmp)
8285 GIMPLE_OMP_STORE (val)
8287 ???FIXME: Allow a more flexible sequence.
8288 Perhaps use data flow to pick the statements.
8292 gsi = gsi_after_labels (store_bb);
8293 stmt = gsi_stmt (gsi);
8294 if (is_gimple_debug (stmt))
8296 gsi_next_nondebug (&gsi);
8297 if (gsi_end_p (gsi))
8298 return false;
8299 stmt = gsi_stmt (gsi);
8301 loc = gimple_location (stmt);
8302 if (!is_gimple_assign (stmt))
8303 return false;
8304 gsi_next_nondebug (&gsi);
8305 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8306 return false;
8307 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8308 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8309 enum omp_memory_order omo
8310 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8311 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8312 gcc_checking_assert (!need_old || !need_new);
8314 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8315 return false;
8317 /* Check for one of the supported fetch-op operations. */
8318 code = gimple_assign_rhs_code (stmt);
8319 switch (code)
8321 case PLUS_EXPR:
8322 case POINTER_PLUS_EXPR:
8323 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8324 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8325 break;
8326 case MINUS_EXPR:
8327 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8328 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8329 break;
8330 case BIT_AND_EXPR:
8331 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8332 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8333 break;
8334 case BIT_IOR_EXPR:
8335 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8336 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8337 break;
8338 case BIT_XOR_EXPR:
8339 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8340 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8341 break;
8342 default:
8343 return false;
8346 /* Make sure the expression is of the proper form. */
8347 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8348 rhs = gimple_assign_rhs2 (stmt);
8349 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8350 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8351 rhs = gimple_assign_rhs1 (stmt);
8352 else
8353 return false;
8355 tmpbase = ((enum built_in_function)
8356 ((need_new ? newbase : oldbase) + index + 1));
8357 decl = builtin_decl_explicit (tmpbase);
8358 if (decl == NULL_TREE)
8359 return false;
8360 itype = TREE_TYPE (TREE_TYPE (decl));
8361 imode = TYPE_MODE (itype);
8363 /* We could test all of the various optabs involved, but the fact of the
8364 matter is that (with the exception of i486 vs i586 and xadd) all targets
8365 that support any atomic operaton optab also implements compare-and-swap.
8366 Let optabs.c take care of expanding any compare-and-swap loop. */
8367 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8368 return false;
8370 gsi = gsi_last_nondebug_bb (load_bb);
8371 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8373 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8374 It only requires that the operation happen atomically. Thus we can
8375 use the RELAXED memory model. */
8376 call = build_call_expr_loc (loc, decl, 3, addr,
8377 fold_convert_loc (loc, itype, rhs),
8378 build_int_cst (NULL, mo));
8380 if (need_old || need_new)
8382 lhs = need_old ? loaded_val : stored_val;
8383 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8384 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8386 else
8387 call = fold_convert_loc (loc, void_type_node, call);
8388 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8389 gsi_remove (&gsi, true);
8391 gsi = gsi_last_nondebug_bb (store_bb);
8392 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8393 gsi_remove (&gsi, true);
8394 gsi = gsi_last_nondebug_bb (store_bb);
8395 stmt = gsi_stmt (gsi);
8396 gsi_remove (&gsi, true);
8398 if (gimple_in_ssa_p (cfun))
8400 release_defs (stmt);
8401 update_ssa (TODO_update_ssa_no_phi);
8404 return true;
8407 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8409 oldval = *addr;
8410 repeat:
8411 newval = rhs; // with oldval replacing *addr in rhs
8412 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8413 if (oldval != newval)
8414 goto repeat;
8416 INDEX is log2 of the size of the data type, and thus usable to find the
8417 index of the builtin decl. */
8419 static bool
8420 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8421 tree addr, tree loaded_val, tree stored_val,
8422 int index)
8424 tree loadedi, storedi, initial, new_storedi, old_vali;
8425 tree type, itype, cmpxchg, iaddr, atype;
8426 gimple_stmt_iterator si;
8427 basic_block loop_header = single_succ (load_bb);
8428 gimple *phi, *stmt;
8429 edge e;
8430 enum built_in_function fncode;
8432 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8433 order to use the RELAXED memory model effectively. */
8434 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8435 + index + 1);
8436 cmpxchg = builtin_decl_explicit (fncode);
8437 if (cmpxchg == NULL_TREE)
8438 return false;
8439 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8440 atype = type;
8441 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8443 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8444 || !can_atomic_load_p (TYPE_MODE (itype)))
8445 return false;
8447 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8448 si = gsi_last_nondebug_bb (load_bb);
8449 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8451 /* For floating-point values, we'll need to view-convert them to integers
8452 so that we can perform the atomic compare and swap. Simplify the
8453 following code by always setting up the "i"ntegral variables. */
8454 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8456 tree iaddr_val;
8458 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8459 true));
8460 atype = itype;
8461 iaddr_val
8462 = force_gimple_operand_gsi (&si,
8463 fold_convert (TREE_TYPE (iaddr), addr),
8464 false, NULL_TREE, true, GSI_SAME_STMT);
8465 stmt = gimple_build_assign (iaddr, iaddr_val);
8466 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8467 loadedi = create_tmp_var (itype);
8468 if (gimple_in_ssa_p (cfun))
8469 loadedi = make_ssa_name (loadedi);
8471 else
8473 iaddr = addr;
8474 loadedi = loaded_val;
8477 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8478 tree loaddecl = builtin_decl_explicit (fncode);
8479 if (loaddecl)
8480 initial
8481 = fold_convert (atype,
8482 build_call_expr (loaddecl, 2, iaddr,
8483 build_int_cst (NULL_TREE,
8484 MEMMODEL_RELAXED)));
8485 else
8487 tree off
8488 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8489 true), 0);
8490 initial = build2 (MEM_REF, atype, iaddr, off);
8493 initial
8494 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8495 GSI_SAME_STMT);
8497 /* Move the value to the LOADEDI temporary. */
8498 if (gimple_in_ssa_p (cfun))
8500 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8501 phi = create_phi_node (loadedi, loop_header);
8502 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8503 initial);
8505 else
8506 gsi_insert_before (&si,
8507 gimple_build_assign (loadedi, initial),
8508 GSI_SAME_STMT);
8509 if (loadedi != loaded_val)
8511 gimple_stmt_iterator gsi2;
8512 tree x;
8514 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8515 gsi2 = gsi_start_bb (loop_header);
8516 if (gimple_in_ssa_p (cfun))
8518 gassign *stmt;
8519 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8520 true, GSI_SAME_STMT);
8521 stmt = gimple_build_assign (loaded_val, x);
8522 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8524 else
8526 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8527 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8528 true, GSI_SAME_STMT);
8531 gsi_remove (&si, true);
8533 si = gsi_last_nondebug_bb (store_bb);
8534 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8536 if (iaddr == addr)
8537 storedi = stored_val;
8538 else
8539 storedi
8540 = force_gimple_operand_gsi (&si,
8541 build1 (VIEW_CONVERT_EXPR, itype,
8542 stored_val), true, NULL_TREE, true,
8543 GSI_SAME_STMT);
8545 /* Build the compare&swap statement. */
8546 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8547 new_storedi = force_gimple_operand_gsi (&si,
8548 fold_convert (TREE_TYPE (loadedi),
8549 new_storedi),
8550 true, NULL_TREE,
8551 true, GSI_SAME_STMT);
8553 if (gimple_in_ssa_p (cfun))
8554 old_vali = loadedi;
8555 else
8557 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8558 stmt = gimple_build_assign (old_vali, loadedi);
8559 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8561 stmt = gimple_build_assign (loadedi, new_storedi);
8562 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8565 /* Note that we always perform the comparison as an integer, even for
8566 floating point. This allows the atomic operation to properly
8567 succeed even with NaNs and -0.0. */
8568 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8569 stmt = gimple_build_cond_empty (ne);
8570 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8572 /* Update cfg. */
8573 e = single_succ_edge (store_bb);
8574 e->flags &= ~EDGE_FALLTHRU;
8575 e->flags |= EDGE_FALSE_VALUE;
8576 /* Expect no looping. */
8577 e->probability = profile_probability::guessed_always ();
8579 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8580 e->probability = profile_probability::guessed_never ();
8582 /* Copy the new value to loadedi (we already did that before the condition
8583 if we are not in SSA). */
8584 if (gimple_in_ssa_p (cfun))
8586 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8587 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8590 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8591 gsi_remove (&si, true);
8593 class loop *loop = alloc_loop ();
8594 loop->header = loop_header;
8595 loop->latch = store_bb;
8596 add_loop (loop, loop_header->loop_father);
8598 if (gimple_in_ssa_p (cfun))
8599 update_ssa (TODO_update_ssa_no_phi);
8601 return true;
8604 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8606 GOMP_atomic_start ();
8607 *addr = rhs;
8608 GOMP_atomic_end ();
8610 The result is not globally atomic, but works so long as all parallel
8611 references are within #pragma omp atomic directives. According to
8612 responses received from omp@openmp.org, appears to be within spec.
8613 Which makes sense, since that's how several other compilers handle
8614 this situation as well.
8615 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8616 expanding. STORED_VAL is the operand of the matching
8617 GIMPLE_OMP_ATOMIC_STORE.
8619 We replace
8620 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8621 loaded_val = *addr;
8623 and replace
8624 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8625 *addr = stored_val;
8628 static bool
8629 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8630 tree addr, tree loaded_val, tree stored_val)
8632 gimple_stmt_iterator si;
8633 gassign *stmt;
8634 tree t;
8636 si = gsi_last_nondebug_bb (load_bb);
8637 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8639 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8640 t = build_call_expr (t, 0);
8641 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8643 tree mem = build_simple_mem_ref (addr);
8644 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8645 TREE_OPERAND (mem, 1)
8646 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8647 true),
8648 TREE_OPERAND (mem, 1));
8649 stmt = gimple_build_assign (loaded_val, mem);
8650 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8651 gsi_remove (&si, true);
8653 si = gsi_last_nondebug_bb (store_bb);
8654 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8656 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
8657 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8659 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
8660 t = build_call_expr (t, 0);
8661 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8662 gsi_remove (&si, true);
8664 if (gimple_in_ssa_p (cfun))
8665 update_ssa (TODO_update_ssa_no_phi);
8666 return true;
8669 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
8670 using expand_omp_atomic_fetch_op. If it failed, we try to
8671 call expand_omp_atomic_pipeline, and if it fails too, the
8672 ultimate fallback is wrapping the operation in a mutex
8673 (expand_omp_atomic_mutex). REGION is the atomic region built
8674 by build_omp_regions_1(). */
8676 static void
8677 expand_omp_atomic (struct omp_region *region)
8679 basic_block load_bb = region->entry, store_bb = region->exit;
8680 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
8681 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
8682 tree loaded_val = gimple_omp_atomic_load_lhs (load);
8683 tree addr = gimple_omp_atomic_load_rhs (load);
8684 tree stored_val = gimple_omp_atomic_store_val (store);
8685 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8686 HOST_WIDE_INT index;
8688 /* Make sure the type is one of the supported sizes. */
8689 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
8690 index = exact_log2 (index);
8691 if (index >= 0 && index <= 4)
8693 unsigned int align = TYPE_ALIGN_UNIT (type);
8695 /* __sync builtins require strict data alignment. */
8696 if (exact_log2 (align) >= index)
8698 /* Atomic load. */
8699 scalar_mode smode;
8700 if (loaded_val == stored_val
8701 && (is_int_mode (TYPE_MODE (type), &smode)
8702 || is_float_mode (TYPE_MODE (type), &smode))
8703 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
8704 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
8705 return;
8707 /* Atomic store. */
8708 if ((is_int_mode (TYPE_MODE (type), &smode)
8709 || is_float_mode (TYPE_MODE (type), &smode))
8710 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
8711 && store_bb == single_succ (load_bb)
8712 && first_stmt (store_bb) == store
8713 && expand_omp_atomic_store (load_bb, addr, loaded_val,
8714 stored_val, index))
8715 return;
8717 /* When possible, use specialized atomic update functions. */
8718 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
8719 && store_bb == single_succ (load_bb)
8720 && expand_omp_atomic_fetch_op (load_bb, addr,
8721 loaded_val, stored_val, index))
8722 return;
8724 /* If we don't have specialized __sync builtins, try and implement
8725 as a compare and swap loop. */
8726 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
8727 loaded_val, stored_val, index))
8728 return;
8732 /* The ultimate fallback is wrapping the operation in a mutex. */
8733 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
8736 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
8737 at REGION_EXIT. */
8739 static void
8740 mark_loops_in_oacc_kernels_region (basic_block region_entry,
8741 basic_block region_exit)
8743 class loop *outer = region_entry->loop_father;
8744 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
8746 /* Don't parallelize the kernels region if it contains more than one outer
8747 loop. */
8748 unsigned int nr_outer_loops = 0;
8749 class loop *single_outer = NULL;
8750 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
8752 gcc_assert (loop_outer (loop) == outer);
8754 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
8755 continue;
8757 if (region_exit != NULL
8758 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
8759 continue;
8761 nr_outer_loops++;
8762 single_outer = loop;
8764 if (nr_outer_loops != 1)
8765 return;
8767 for (class loop *loop = single_outer->inner;
8768 loop != NULL;
8769 loop = loop->inner)
8770 if (loop->next)
8771 return;
8773 /* Mark the loops in the region. */
8774 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
8775 loop->in_oacc_kernels_region = true;
8778 /* Build target argument identifier from the DEVICE identifier, value
8779 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
8781 static tree
8782 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
8784 tree t = build_int_cst (integer_type_node, device);
8785 if (subseqent_param)
8786 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8787 build_int_cst (integer_type_node,
8788 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
8789 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8790 build_int_cst (integer_type_node, id));
8791 return t;
8794 /* Like above but return it in type that can be directly stored as an element
8795 of the argument array. */
8797 static tree
8798 get_target_argument_identifier (int device, bool subseqent_param, int id)
8800 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
8801 return fold_convert (ptr_type_node, t);
8804 /* Return a target argument consisting of DEVICE identifier, value identifier
8805 ID, and the actual VALUE. */
8807 static tree
8808 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
8809 tree value)
8811 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
8812 fold_convert (integer_type_node, value),
8813 build_int_cst (unsigned_type_node,
8814 GOMP_TARGET_ARG_VALUE_SHIFT));
8815 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8816 get_target_argument_identifier_1 (device, false, id));
8817 t = fold_convert (ptr_type_node, t);
8818 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
8821 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
8822 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
8823 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
8824 arguments. */
8826 static void
8827 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
8828 int id, tree value, vec <tree> *args)
8830 if (tree_fits_shwi_p (value)
8831 && tree_to_shwi (value) > -(1 << 15)
8832 && tree_to_shwi (value) < (1 << 15))
8833 args->quick_push (get_target_argument_value (gsi, device, id, value));
8834 else
8836 args->quick_push (get_target_argument_identifier (device, true, id));
8837 value = fold_convert (ptr_type_node, value);
8838 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
8839 GSI_SAME_STMT);
8840 args->quick_push (value);
8844 /* Create an array of arguments that is then passed to GOMP_target. */
8846 static tree
8847 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
8849 auto_vec <tree, 6> args;
8850 tree clauses = gimple_omp_target_clauses (tgt_stmt);
8851 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
8852 if (c)
8853 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
8854 else
8855 t = integer_minus_one_node;
8856 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
8857 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
8859 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
8860 if (c)
8861 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
8862 else
8863 t = integer_minus_one_node;
8864 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
8865 GOMP_TARGET_ARG_THREAD_LIMIT, t,
8866 &args);
8868 /* Produce more, perhaps device specific, arguments here. */
8870 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
8871 args.length () + 1),
8872 ".omp_target_args");
8873 for (unsigned i = 0; i < args.length (); i++)
8875 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
8876 build_int_cst (integer_type_node, i),
8877 NULL_TREE, NULL_TREE);
8878 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
8879 GSI_SAME_STMT);
8881 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
8882 build_int_cst (integer_type_node, args.length ()),
8883 NULL_TREE, NULL_TREE);
8884 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
8885 GSI_SAME_STMT);
8886 TREE_ADDRESSABLE (argarray) = 1;
8887 return build_fold_addr_expr (argarray);
8890 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
8892 static void
8893 expand_omp_target (struct omp_region *region)
8895 basic_block entry_bb, exit_bb, new_bb;
8896 struct function *child_cfun;
8897 tree child_fn, block, t;
8898 gimple_stmt_iterator gsi;
8899 gomp_target *entry_stmt;
8900 gimple *stmt;
8901 edge e;
8902 bool offloaded, data_region;
8903 int target_kind;
8905 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
8906 target_kind = gimple_omp_target_kind (entry_stmt);
8907 new_bb = region->entry;
8909 offloaded = is_gimple_omp_offloaded (entry_stmt);
8910 switch (target_kind)
8912 case GF_OMP_TARGET_KIND_REGION:
8913 case GF_OMP_TARGET_KIND_UPDATE:
8914 case GF_OMP_TARGET_KIND_ENTER_DATA:
8915 case GF_OMP_TARGET_KIND_EXIT_DATA:
8916 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8917 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8918 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8919 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8920 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8921 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8922 data_region = false;
8923 break;
8924 case GF_OMP_TARGET_KIND_DATA:
8925 case GF_OMP_TARGET_KIND_OACC_DATA:
8926 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8927 data_region = true;
8928 break;
8929 default:
8930 gcc_unreachable ();
8933 child_fn = NULL_TREE;
8934 child_cfun = NULL;
8935 if (offloaded)
8937 child_fn = gimple_omp_target_child_fn (entry_stmt);
8938 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
8941 /* Supported by expand_omp_taskreg, but not here. */
8942 if (child_cfun != NULL)
8943 gcc_checking_assert (!child_cfun->cfg);
8944 gcc_checking_assert (!gimple_in_ssa_p (cfun));
8946 entry_bb = region->entry;
8947 exit_bb = region->exit;
8949 switch (target_kind)
8951 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8952 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
8954 /* Further down, all OpenACC compute constructs will be mapped to
8955 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
8956 is an "oacc kernels" attribute set for OpenACC kernels. */
8957 DECL_ATTRIBUTES (child_fn)
8958 = tree_cons (get_identifier ("oacc kernels"),
8959 NULL_TREE, DECL_ATTRIBUTES (child_fn));
8960 break;
8961 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8962 /* Further down, all OpenACC compute constructs will be mapped to
8963 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
8964 is an "oacc serial" attribute set for OpenACC serial. */
8965 DECL_ATTRIBUTES (child_fn)
8966 = tree_cons (get_identifier ("oacc serial"),
8967 NULL_TREE, DECL_ATTRIBUTES (child_fn));
8968 break;
8969 default:
8970 break;
8973 if (offloaded)
8975 unsigned srcidx, dstidx, num;
8977 /* If the offloading region needs data sent from the parent
8978 function, then the very first statement (except possible
8979 tree profile counter updates) of the offloading body
8980 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
8981 &.OMP_DATA_O is passed as an argument to the child function,
8982 we need to replace it with the argument as seen by the child
8983 function.
8985 In most cases, this will end up being the identity assignment
8986 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
8987 a function call that has been inlined, the original PARM_DECL
8988 .OMP_DATA_I may have been converted into a different local
8989 variable. In which case, we need to keep the assignment. */
8990 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
8991 if (data_arg)
8993 basic_block entry_succ_bb = single_succ (entry_bb);
8994 gimple_stmt_iterator gsi;
8995 tree arg;
8996 gimple *tgtcopy_stmt = NULL;
8997 tree sender = TREE_VEC_ELT (data_arg, 0);
8999 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9001 gcc_assert (!gsi_end_p (gsi));
9002 stmt = gsi_stmt (gsi);
9003 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9004 continue;
9006 if (gimple_num_ops (stmt) == 2)
9008 tree arg = gimple_assign_rhs1 (stmt);
9010 /* We're ignoring the subcode because we're
9011 effectively doing a STRIP_NOPS. */
9013 if (TREE_CODE (arg) == ADDR_EXPR
9014 && TREE_OPERAND (arg, 0) == sender)
9016 tgtcopy_stmt = stmt;
9017 break;
9022 gcc_assert (tgtcopy_stmt != NULL);
9023 arg = DECL_ARGUMENTS (child_fn);
9025 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9026 gsi_remove (&gsi, true);
9029 /* Declare local variables needed in CHILD_CFUN. */
9030 block = DECL_INITIAL (child_fn);
9031 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9032 /* The gimplifier could record temporaries in the offloading block
9033 rather than in containing function's local_decls chain,
9034 which would mean cgraph missed finalizing them. Do it now. */
9035 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9036 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9037 varpool_node::finalize_decl (t);
9038 DECL_SAVED_TREE (child_fn) = NULL;
9039 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9040 gimple_set_body (child_fn, NULL);
9041 TREE_USED (block) = 1;
9043 /* Reset DECL_CONTEXT on function arguments. */
9044 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9045 DECL_CONTEXT (t) = child_fn;
9047 /* Split ENTRY_BB at GIMPLE_*,
9048 so that it can be moved to the child function. */
9049 gsi = gsi_last_nondebug_bb (entry_bb);
9050 stmt = gsi_stmt (gsi);
9051 gcc_assert (stmt
9052 && gimple_code (stmt) == gimple_code (entry_stmt));
9053 e = split_block (entry_bb, stmt);
9054 gsi_remove (&gsi, true);
9055 entry_bb = e->dest;
9056 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9058 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9059 if (exit_bb)
9061 gsi = gsi_last_nondebug_bb (exit_bb);
9062 gcc_assert (!gsi_end_p (gsi)
9063 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9064 stmt = gimple_build_return (NULL);
9065 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9066 gsi_remove (&gsi, true);
9069 /* Move the offloading region into CHILD_CFUN. */
9071 block = gimple_block (entry_stmt);
9073 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9074 if (exit_bb)
9075 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9076 /* When the OMP expansion process cannot guarantee an up-to-date
9077 loop tree arrange for the child function to fixup loops. */
9078 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9079 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9081 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9082 num = vec_safe_length (child_cfun->local_decls);
9083 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9085 t = (*child_cfun->local_decls)[srcidx];
9086 if (DECL_CONTEXT (t) == cfun->decl)
9087 continue;
9088 if (srcidx != dstidx)
9089 (*child_cfun->local_decls)[dstidx] = t;
9090 dstidx++;
9092 if (dstidx != num)
9093 vec_safe_truncate (child_cfun->local_decls, dstidx);
9095 /* Inform the callgraph about the new function. */
9096 child_cfun->curr_properties = cfun->curr_properties;
9097 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9098 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9099 cgraph_node *node = cgraph_node::get_create (child_fn);
9100 node->parallelized_function = 1;
9101 cgraph_node::add_new_function (child_fn, true);
9103 /* Add the new function to the offload table. */
9104 if (ENABLE_OFFLOADING)
9106 if (in_lto_p)
9107 DECL_PRESERVE_P (child_fn) = 1;
9108 vec_safe_push (offload_funcs, child_fn);
9111 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9112 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9114 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9115 fixed in a following pass. */
9116 push_cfun (child_cfun);
9117 if (need_asm)
9118 assign_assembler_name_if_needed (child_fn);
9119 cgraph_edge::rebuild_edges ();
9121 /* Some EH regions might become dead, see PR34608. If
9122 pass_cleanup_cfg isn't the first pass to happen with the
9123 new child, these dead EH edges might cause problems.
9124 Clean them up now. */
9125 if (flag_exceptions)
9127 basic_block bb;
9128 bool changed = false;
9130 FOR_EACH_BB_FN (bb, cfun)
9131 changed |= gimple_purge_dead_eh_edges (bb);
9132 if (changed)
9133 cleanup_tree_cfg ();
9135 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9136 verify_loop_structure ();
9137 pop_cfun ();
9139 if (dump_file && !gimple_in_ssa_p (cfun))
9141 omp_any_child_fn_dumped = true;
9142 dump_function_header (dump_file, child_fn, dump_flags);
9143 dump_function_to_file (child_fn, dump_file, dump_flags);
9146 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9149 /* Emit a library call to launch the offloading region, or do data
9150 transfers. */
9151 tree t1, t2, t3, t4, depend, c, clauses;
9152 enum built_in_function start_ix;
9153 unsigned int flags_i = 0;
9155 switch (gimple_omp_target_kind (entry_stmt))
9157 case GF_OMP_TARGET_KIND_REGION:
9158 start_ix = BUILT_IN_GOMP_TARGET;
9159 break;
9160 case GF_OMP_TARGET_KIND_DATA:
9161 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9162 break;
9163 case GF_OMP_TARGET_KIND_UPDATE:
9164 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9165 break;
9166 case GF_OMP_TARGET_KIND_ENTER_DATA:
9167 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9168 break;
9169 case GF_OMP_TARGET_KIND_EXIT_DATA:
9170 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9171 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9172 break;
9173 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9174 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9175 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9176 start_ix = BUILT_IN_GOACC_PARALLEL;
9177 break;
9178 case GF_OMP_TARGET_KIND_OACC_DATA:
9179 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9180 start_ix = BUILT_IN_GOACC_DATA_START;
9181 break;
9182 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9183 start_ix = BUILT_IN_GOACC_UPDATE;
9184 break;
9185 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9186 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9187 break;
9188 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9189 start_ix = BUILT_IN_GOACC_DECLARE;
9190 break;
9191 default:
9192 gcc_unreachable ();
9195 clauses = gimple_omp_target_clauses (entry_stmt);
9197 tree device = NULL_TREE;
9198 location_t device_loc = UNKNOWN_LOCATION;
9199 tree goacc_flags = NULL_TREE;
9200 if (is_gimple_omp_oacc (entry_stmt))
9202 /* By default, no GOACC_FLAGs are set. */
9203 goacc_flags = integer_zero_node;
9205 else
9207 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9208 if (c)
9210 device = OMP_CLAUSE_DEVICE_ID (c);
9211 device_loc = OMP_CLAUSE_LOCATION (c);
9213 else
9215 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9216 library choose). */
9217 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9218 device_loc = gimple_location (entry_stmt);
9221 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9222 if (c)
9223 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9226 /* By default, there is no conditional. */
9227 tree cond = NULL_TREE;
9228 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9229 if (c)
9230 cond = OMP_CLAUSE_IF_EXPR (c);
9231 /* If we found the clause 'if (cond)', build:
9232 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9233 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9234 if (cond)
9236 tree *tp;
9237 if (is_gimple_omp_oacc (entry_stmt))
9238 tp = &goacc_flags;
9239 else
9241 /* Ensure 'device' is of the correct type. */
9242 device = fold_convert_loc (device_loc, integer_type_node, device);
9244 tp = &device;
9247 cond = gimple_boolify (cond);
9249 basic_block cond_bb, then_bb, else_bb;
9250 edge e;
9251 tree tmp_var;
9253 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9254 if (offloaded)
9255 e = split_block_after_labels (new_bb);
9256 else
9258 gsi = gsi_last_nondebug_bb (new_bb);
9259 gsi_prev (&gsi);
9260 e = split_block (new_bb, gsi_stmt (gsi));
9262 cond_bb = e->src;
9263 new_bb = e->dest;
9264 remove_edge (e);
9266 then_bb = create_empty_bb (cond_bb);
9267 else_bb = create_empty_bb (then_bb);
9268 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9269 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9271 stmt = gimple_build_cond_empty (cond);
9272 gsi = gsi_last_bb (cond_bb);
9273 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9275 gsi = gsi_start_bb (then_bb);
9276 stmt = gimple_build_assign (tmp_var, *tp);
9277 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9279 gsi = gsi_start_bb (else_bb);
9280 if (is_gimple_omp_oacc (entry_stmt))
9281 stmt = gimple_build_assign (tmp_var,
9282 BIT_IOR_EXPR,
9283 *tp,
9284 build_int_cst (integer_type_node,
9285 GOACC_FLAG_HOST_FALLBACK));
9286 else
9287 stmt = gimple_build_assign (tmp_var,
9288 build_int_cst (integer_type_node,
9289 GOMP_DEVICE_HOST_FALLBACK));
9290 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9292 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9293 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9294 add_bb_to_loop (then_bb, cond_bb->loop_father);
9295 add_bb_to_loop (else_bb, cond_bb->loop_father);
9296 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9297 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9299 *tp = tmp_var;
9301 gsi = gsi_last_nondebug_bb (new_bb);
9303 else
9305 gsi = gsi_last_nondebug_bb (new_bb);
9307 if (device != NULL_TREE)
9308 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9309 true, GSI_SAME_STMT);
9312 t = gimple_omp_target_data_arg (entry_stmt);
9313 if (t == NULL)
9315 t1 = size_zero_node;
9316 t2 = build_zero_cst (ptr_type_node);
9317 t3 = t2;
9318 t4 = t2;
9320 else
9322 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9323 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9324 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9325 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9326 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9329 gimple *g;
9330 bool tagging = false;
9331 /* The maximum number used by any start_ix, without varargs. */
9332 auto_vec<tree, 11> args;
9333 if (is_gimple_omp_oacc (entry_stmt))
9335 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9336 TREE_TYPE (goacc_flags), goacc_flags);
9337 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9338 NULL_TREE, true,
9339 GSI_SAME_STMT);
9340 args.quick_push (goacc_flags_m);
9342 else
9343 args.quick_push (device);
9344 if (offloaded)
9345 args.quick_push (build_fold_addr_expr (child_fn));
9346 args.quick_push (t1);
9347 args.quick_push (t2);
9348 args.quick_push (t3);
9349 args.quick_push (t4);
9350 switch (start_ix)
9352 case BUILT_IN_GOACC_DATA_START:
9353 case BUILT_IN_GOACC_DECLARE:
9354 case BUILT_IN_GOMP_TARGET_DATA:
9355 break;
9356 case BUILT_IN_GOMP_TARGET:
9357 case BUILT_IN_GOMP_TARGET_UPDATE:
9358 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9359 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9360 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9361 if (c)
9362 depend = OMP_CLAUSE_DECL (c);
9363 else
9364 depend = build_int_cst (ptr_type_node, 0);
9365 args.quick_push (depend);
9366 if (start_ix == BUILT_IN_GOMP_TARGET)
9367 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9368 break;
9369 case BUILT_IN_GOACC_PARALLEL:
9370 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9372 tree dims = NULL_TREE;
9373 unsigned int ix;
9375 /* For serial constructs we set all dimensions to 1. */
9376 for (ix = GOMP_DIM_MAX; ix--;)
9377 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9378 oacc_replace_fn_attrib (child_fn, dims);
9380 else
9381 oacc_set_fn_attrib (child_fn, clauses, &args);
9382 tagging = true;
9383 /* FALLTHRU */
9384 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9385 case BUILT_IN_GOACC_UPDATE:
9387 tree t_async = NULL_TREE;
9389 /* If present, use the value specified by the respective
9390 clause, making sure that is of the correct type. */
9391 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9392 if (c)
9393 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9394 integer_type_node,
9395 OMP_CLAUSE_ASYNC_EXPR (c));
9396 else if (!tagging)
9397 /* Default values for t_async. */
9398 t_async = fold_convert_loc (gimple_location (entry_stmt),
9399 integer_type_node,
9400 build_int_cst (integer_type_node,
9401 GOMP_ASYNC_SYNC));
9402 if (tagging && t_async)
9404 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9406 if (TREE_CODE (t_async) == INTEGER_CST)
9408 /* See if we can pack the async arg in to the tag's
9409 operand. */
9410 i_async = TREE_INT_CST_LOW (t_async);
9411 if (i_async < GOMP_LAUNCH_OP_MAX)
9412 t_async = NULL_TREE;
9413 else
9414 i_async = GOMP_LAUNCH_OP_MAX;
9416 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9417 i_async));
9419 if (t_async)
9420 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9421 NULL_TREE, true,
9422 GSI_SAME_STMT));
9424 /* Save the argument index, and ... */
9425 unsigned t_wait_idx = args.length ();
9426 unsigned num_waits = 0;
9427 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9428 if (!tagging || c)
9429 /* ... push a placeholder. */
9430 args.safe_push (integer_zero_node);
9432 for (; c; c = OMP_CLAUSE_CHAIN (c))
9433 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9435 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9436 integer_type_node,
9437 OMP_CLAUSE_WAIT_EXPR (c));
9438 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9439 GSI_SAME_STMT);
9440 args.safe_push (arg);
9441 num_waits++;
9444 if (!tagging || num_waits)
9446 tree len;
9448 /* Now that we know the number, update the placeholder. */
9449 if (tagging)
9450 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9451 else
9452 len = build_int_cst (integer_type_node, num_waits);
9453 len = fold_convert_loc (gimple_location (entry_stmt),
9454 unsigned_type_node, len);
9455 args[t_wait_idx] = len;
9458 break;
9459 default:
9460 gcc_unreachable ();
9462 if (tagging)
9463 /* Push terminal marker - zero. */
9464 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9466 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9467 gimple_set_location (g, gimple_location (entry_stmt));
9468 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9469 if (!offloaded)
9471 g = gsi_stmt (gsi);
9472 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9473 gsi_remove (&gsi, true);
9475 if (data_region && region->exit)
9477 gsi = gsi_last_nondebug_bb (region->exit);
9478 g = gsi_stmt (gsi);
9479 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9480 gsi_remove (&gsi, true);
9484 /* Expand the parallel region tree rooted at REGION. Expansion
9485 proceeds in depth-first order. Innermost regions are expanded
9486 first. This way, parallel regions that require a new function to
9487 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9488 internal dependencies in their body. */
9490 static void
9491 expand_omp (struct omp_region *region)
9493 omp_any_child_fn_dumped = false;
9494 while (region)
9496 location_t saved_location;
9497 gimple *inner_stmt = NULL;
9499 /* First, determine whether this is a combined parallel+workshare
9500 region. */
9501 if (region->type == GIMPLE_OMP_PARALLEL)
9502 determine_parallel_type (region);
9504 if (region->type == GIMPLE_OMP_FOR
9505 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9506 inner_stmt = last_stmt (region->inner->entry);
9508 if (region->inner)
9509 expand_omp (region->inner);
9511 saved_location = input_location;
9512 if (gimple_has_location (last_stmt (region->entry)))
9513 input_location = gimple_location (last_stmt (region->entry));
9515 switch (region->type)
9517 case GIMPLE_OMP_PARALLEL:
9518 case GIMPLE_OMP_TASK:
9519 expand_omp_taskreg (region);
9520 break;
9522 case GIMPLE_OMP_FOR:
9523 expand_omp_for (region, inner_stmt);
9524 break;
9526 case GIMPLE_OMP_SECTIONS:
9527 expand_omp_sections (region);
9528 break;
9530 case GIMPLE_OMP_SECTION:
9531 /* Individual omp sections are handled together with their
9532 parent GIMPLE_OMP_SECTIONS region. */
9533 break;
9535 case GIMPLE_OMP_SINGLE:
9536 expand_omp_single (region);
9537 break;
9539 case GIMPLE_OMP_ORDERED:
9541 gomp_ordered *ord_stmt
9542 = as_a <gomp_ordered *> (last_stmt (region->entry));
9543 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9544 OMP_CLAUSE_DEPEND))
9546 /* We'll expand these when expanding corresponding
9547 worksharing region with ordered(n) clause. */
9548 gcc_assert (region->outer
9549 && region->outer->type == GIMPLE_OMP_FOR);
9550 region->ord_stmt = ord_stmt;
9551 break;
9554 /* FALLTHRU */
9555 case GIMPLE_OMP_MASTER:
9556 case GIMPLE_OMP_TASKGROUP:
9557 case GIMPLE_OMP_CRITICAL:
9558 case GIMPLE_OMP_TEAMS:
9559 expand_omp_synch (region);
9560 break;
9562 case GIMPLE_OMP_ATOMIC_LOAD:
9563 expand_omp_atomic (region);
9564 break;
9566 case GIMPLE_OMP_TARGET:
9567 expand_omp_target (region);
9568 break;
9570 default:
9571 gcc_unreachable ();
9574 input_location = saved_location;
9575 region = region->next;
9577 if (omp_any_child_fn_dumped)
9579 if (dump_file)
9580 dump_function_header (dump_file, current_function_decl, dump_flags);
9581 omp_any_child_fn_dumped = false;
9585 /* Helper for build_omp_regions. Scan the dominator tree starting at
9586 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9587 true, the function ends once a single tree is built (otherwise, whole
9588 forest of OMP constructs may be built). */
9590 static void
9591 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9592 bool single_tree)
9594 gimple_stmt_iterator gsi;
9595 gimple *stmt;
9596 basic_block son;
9598 gsi = gsi_last_nondebug_bb (bb);
9599 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9601 struct omp_region *region;
9602 enum gimple_code code;
9604 stmt = gsi_stmt (gsi);
9605 code = gimple_code (stmt);
9606 if (code == GIMPLE_OMP_RETURN)
9608 /* STMT is the return point out of region PARENT. Mark it
9609 as the exit point and make PARENT the immediately
9610 enclosing region. */
9611 gcc_assert (parent);
9612 region = parent;
9613 region->exit = bb;
9614 parent = parent->outer;
9616 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9618 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9619 GIMPLE_OMP_RETURN, but matches with
9620 GIMPLE_OMP_ATOMIC_LOAD. */
9621 gcc_assert (parent);
9622 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9623 region = parent;
9624 region->exit = bb;
9625 parent = parent->outer;
9627 else if (code == GIMPLE_OMP_CONTINUE)
9629 gcc_assert (parent);
9630 parent->cont = bb;
9632 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9634 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9635 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9637 else
9639 region = new_omp_region (bb, code, parent);
9640 /* Otherwise... */
9641 if (code == GIMPLE_OMP_TARGET)
9643 switch (gimple_omp_target_kind (stmt))
9645 case GF_OMP_TARGET_KIND_REGION:
9646 case GF_OMP_TARGET_KIND_DATA:
9647 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9648 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9649 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9650 case GF_OMP_TARGET_KIND_OACC_DATA:
9651 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9652 break;
9653 case GF_OMP_TARGET_KIND_UPDATE:
9654 case GF_OMP_TARGET_KIND_ENTER_DATA:
9655 case GF_OMP_TARGET_KIND_EXIT_DATA:
9656 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9657 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9658 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9659 /* ..., other than for those stand-alone directives... */
9660 region = NULL;
9661 break;
9662 default:
9663 gcc_unreachable ();
9666 else if (code == GIMPLE_OMP_ORDERED
9667 && omp_find_clause (gimple_omp_ordered_clauses
9668 (as_a <gomp_ordered *> (stmt)),
9669 OMP_CLAUSE_DEPEND))
9670 /* #pragma omp ordered depend is also just a stand-alone
9671 directive. */
9672 region = NULL;
9673 else if (code == GIMPLE_OMP_TASK
9674 && gimple_omp_task_taskwait_p (stmt))
9675 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
9676 region = NULL;
9677 /* ..., this directive becomes the parent for a new region. */
9678 if (region)
9679 parent = region;
9683 if (single_tree && !parent)
9684 return;
9686 for (son = first_dom_son (CDI_DOMINATORS, bb);
9687 son;
9688 son = next_dom_son (CDI_DOMINATORS, son))
9689 build_omp_regions_1 (son, parent, single_tree);
9692 /* Builds the tree of OMP regions rooted at ROOT, storing it to
9693 root_omp_region. */
9695 static void
9696 build_omp_regions_root (basic_block root)
9698 gcc_assert (root_omp_region == NULL);
9699 build_omp_regions_1 (root, NULL, true);
9700 gcc_assert (root_omp_region != NULL);
9703 /* Expands omp construct (and its subconstructs) starting in HEAD. */
9705 void
9706 omp_expand_local (basic_block head)
9708 build_omp_regions_root (head);
9709 if (dump_file && (dump_flags & TDF_DETAILS))
9711 fprintf (dump_file, "\nOMP region tree\n\n");
9712 dump_omp_region (dump_file, root_omp_region, 0);
9713 fprintf (dump_file, "\n");
9716 remove_exit_barriers (root_omp_region);
9717 expand_omp (root_omp_region);
9719 omp_free_regions ();
9722 /* Scan the CFG and build a tree of OMP regions. Return the root of
9723 the OMP region tree. */
9725 static void
9726 build_omp_regions (void)
9728 gcc_assert (root_omp_region == NULL);
9729 calculate_dominance_info (CDI_DOMINATORS);
9730 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9733 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
9735 static unsigned int
9736 execute_expand_omp (void)
9738 build_omp_regions ();
9740 if (!root_omp_region)
9741 return 0;
9743 if (dump_file)
9745 fprintf (dump_file, "\nOMP region tree\n\n");
9746 dump_omp_region (dump_file, root_omp_region, 0);
9747 fprintf (dump_file, "\n");
9750 remove_exit_barriers (root_omp_region);
9752 expand_omp (root_omp_region);
9754 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9755 verify_loop_structure ();
9756 cleanup_tree_cfg ();
9758 omp_free_regions ();
9760 return 0;
9763 /* OMP expansion -- the default pass, run before creation of SSA form. */
9765 namespace {
9767 const pass_data pass_data_expand_omp =
9769 GIMPLE_PASS, /* type */
9770 "ompexp", /* name */
9771 OPTGROUP_OMP, /* optinfo_flags */
9772 TV_NONE, /* tv_id */
9773 PROP_gimple_any, /* properties_required */
9774 PROP_gimple_eomp, /* properties_provided */
9775 0, /* properties_destroyed */
9776 0, /* todo_flags_start */
9777 0, /* todo_flags_finish */
9780 class pass_expand_omp : public gimple_opt_pass
9782 public:
9783 pass_expand_omp (gcc::context *ctxt)
9784 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9787 /* opt_pass methods: */
9788 virtual unsigned int execute (function *)
9790 bool gate = ((flag_openacc != 0 || flag_openmp != 0
9791 || flag_openmp_simd != 0)
9792 && !seen_error ());
9794 /* This pass always runs, to provide PROP_gimple_eomp.
9795 But often, there is nothing to do. */
9796 if (!gate)
9797 return 0;
9799 return execute_expand_omp ();
9802 }; // class pass_expand_omp
9804 } // anon namespace
9806 gimple_opt_pass *
9807 make_pass_expand_omp (gcc::context *ctxt)
9809 return new pass_expand_omp (ctxt);
9812 namespace {
9814 const pass_data pass_data_expand_omp_ssa =
9816 GIMPLE_PASS, /* type */
9817 "ompexpssa", /* name */
9818 OPTGROUP_OMP, /* optinfo_flags */
9819 TV_NONE, /* tv_id */
9820 PROP_cfg | PROP_ssa, /* properties_required */
9821 PROP_gimple_eomp, /* properties_provided */
9822 0, /* properties_destroyed */
9823 0, /* todo_flags_start */
9824 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9827 class pass_expand_omp_ssa : public gimple_opt_pass
9829 public:
9830 pass_expand_omp_ssa (gcc::context *ctxt)
9831 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9834 /* opt_pass methods: */
9835 virtual bool gate (function *fun)
9837 return !(fun->curr_properties & PROP_gimple_eomp);
9839 virtual unsigned int execute (function *) { return execute_expand_omp (); }
9840 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9842 }; // class pass_expand_omp_ssa
9844 } // anon namespace
9846 gimple_opt_pass *
9847 make_pass_expand_omp_ssa (gcc::context *ctxt)
9849 return new pass_expand_omp_ssa (ctxt);
9852 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9853 GIMPLE_* codes. */
9855 bool
9856 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9857 int *region_idx)
9859 gimple *last = last_stmt (bb);
9860 enum gimple_code code = gimple_code (last);
9861 struct omp_region *cur_region = *region;
9862 bool fallthru = false;
9864 switch (code)
9866 case GIMPLE_OMP_PARALLEL:
9867 case GIMPLE_OMP_FOR:
9868 case GIMPLE_OMP_SINGLE:
9869 case GIMPLE_OMP_TEAMS:
9870 case GIMPLE_OMP_MASTER:
9871 case GIMPLE_OMP_TASKGROUP:
9872 case GIMPLE_OMP_CRITICAL:
9873 case GIMPLE_OMP_SECTION:
9874 cur_region = new_omp_region (bb, code, cur_region);
9875 fallthru = true;
9876 break;
9878 case GIMPLE_OMP_TASK:
9879 cur_region = new_omp_region (bb, code, cur_region);
9880 fallthru = true;
9881 if (gimple_omp_task_taskwait_p (last))
9882 cur_region = cur_region->outer;
9883 break;
9885 case GIMPLE_OMP_ORDERED:
9886 cur_region = new_omp_region (bb, code, cur_region);
9887 fallthru = true;
9888 if (omp_find_clause (gimple_omp_ordered_clauses
9889 (as_a <gomp_ordered *> (last)),
9890 OMP_CLAUSE_DEPEND))
9891 cur_region = cur_region->outer;
9892 break;
9894 case GIMPLE_OMP_TARGET:
9895 cur_region = new_omp_region (bb, code, cur_region);
9896 fallthru = true;
9897 switch (gimple_omp_target_kind (last))
9899 case GF_OMP_TARGET_KIND_REGION:
9900 case GF_OMP_TARGET_KIND_DATA:
9901 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9902 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9903 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9904 case GF_OMP_TARGET_KIND_OACC_DATA:
9905 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9906 break;
9907 case GF_OMP_TARGET_KIND_UPDATE:
9908 case GF_OMP_TARGET_KIND_ENTER_DATA:
9909 case GF_OMP_TARGET_KIND_EXIT_DATA:
9910 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9911 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9912 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9913 cur_region = cur_region->outer;
9914 break;
9915 default:
9916 gcc_unreachable ();
9918 break;
9920 case GIMPLE_OMP_SECTIONS:
9921 cur_region = new_omp_region (bb, code, cur_region);
9922 fallthru = true;
9923 break;
9925 case GIMPLE_OMP_SECTIONS_SWITCH:
9926 fallthru = false;
9927 break;
9929 case GIMPLE_OMP_ATOMIC_LOAD:
9930 case GIMPLE_OMP_ATOMIC_STORE:
9931 fallthru = true;
9932 break;
9934 case GIMPLE_OMP_RETURN:
9935 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9936 somewhere other than the next block. This will be
9937 created later. */
9938 cur_region->exit = bb;
9939 if (cur_region->type == GIMPLE_OMP_TASK)
9940 /* Add an edge corresponding to not scheduling the task
9941 immediately. */
9942 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9943 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9944 cur_region = cur_region->outer;
9945 break;
9947 case GIMPLE_OMP_CONTINUE:
9948 cur_region->cont = bb;
9949 switch (cur_region->type)
9951 case GIMPLE_OMP_FOR:
9952 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9953 succs edges as abnormal to prevent splitting
9954 them. */
9955 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9956 /* Make the loopback edge. */
9957 make_edge (bb, single_succ (cur_region->entry),
9958 EDGE_ABNORMAL);
9960 /* Create an edge from GIMPLE_OMP_FOR to exit, which
9961 corresponds to the case that the body of the loop
9962 is not executed at all. */
9963 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9964 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9965 fallthru = false;
9966 break;
9968 case GIMPLE_OMP_SECTIONS:
9969 /* Wire up the edges into and out of the nested sections. */
9971 basic_block switch_bb = single_succ (cur_region->entry);
9973 struct omp_region *i;
9974 for (i = cur_region->inner; i ; i = i->next)
9976 gcc_assert (i->type == GIMPLE_OMP_SECTION);
9977 make_edge (switch_bb, i->entry, 0);
9978 make_edge (i->exit, bb, EDGE_FALLTHRU);
9981 /* Make the loopback edge to the block with
9982 GIMPLE_OMP_SECTIONS_SWITCH. */
9983 make_edge (bb, switch_bb, 0);
9985 /* Make the edge from the switch to exit. */
9986 make_edge (switch_bb, bb->next_bb, 0);
9987 fallthru = false;
9989 break;
9991 case GIMPLE_OMP_TASK:
9992 fallthru = true;
9993 break;
9995 default:
9996 gcc_unreachable ();
9998 break;
10000 default:
10001 gcc_unreachable ();
10004 if (*region != cur_region)
10006 *region = cur_region;
10007 if (cur_region)
10008 *region_idx = cur_region->entry->index;
10009 else
10010 *region_idx = 0;
10013 return fallthru;