Daily bump.
[official-gcc.git] / gcc / omp-expand.c
blob496c0e6cdd6a8a5c821ce1ab5d678a29bd594a38
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2021 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
120 /* Return true if REGION is a combined parallel+workshare region. */
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
125 return region->is_combined_parallel;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
142 Is lowered into:
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
201 return true;
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 struct omp_for_data fd;
238 tree n1, n2;
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
244 if (gimple_omp_for_combined_into_p (for_stmt))
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
268 if (fd.chunk_size)
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
275 return ws_args;
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
289 gcc_unreachable ();
292 /* Discover whether REGION is a combined parallel+workshare region. */
294 static void
295 determine_parallel_type (struct omp_region *region)
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
335 if (region->inner->type == GIMPLE_OMP_FOR)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
386 if (region->cont)
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
405 dump_omp_region (stderr, region, 0);
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr, root_omp_region, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
420 struct omp_region *region = XCNEW (struct omp_region);
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
426 if (parent)
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
433 else
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
441 return region;
444 /* Release the memory associated with the region tree rooted at REGION. */
446 static void
447 free_omp_region_1 (struct omp_region *region)
449 struct omp_region *i, *n;
451 for (i = region->inner; i ; i = n)
453 n = i->next;
454 free_omp_region_1 (i);
457 free (region);
460 /* Release the memory for the entire omp region tree. */
462 void
463 omp_free_regions (void)
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
468 n = r->next;
469 free_omp_region_1 (r);
471 root_omp_region = NULL;
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
477 static gcond *
478 gimple_build_cond_empty (tree cond)
480 enum tree_code pred_code;
481 tree lhs, rhs;
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
570 switch (region->inner->type)
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
631 else
632 clause_loc = gimple_location (entry_stmt);
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
645 cond = gimple_boolify (cond);
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
664 else
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 if (gimple_in_ssa_p (cfun))
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 val = tmp_join;
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
735 if (rtmp)
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
765 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767 unsigned int iflags
768 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
769 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
770 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
773 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
774 tree num_tasks = NULL_TREE;
775 bool ull = false;
776 if (taskloop_p)
778 gimple *g = last_stmt (region->outer->entry);
779 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
780 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
781 struct omp_for_data fd;
782 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
783 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
784 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
785 OMP_CLAUSE__LOOPTEMP_);
786 startvar = OMP_CLAUSE_DECL (startvar);
787 endvar = OMP_CLAUSE_DECL (endvar);
788 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
789 if (fd.loop.cond_code == LT_EXPR)
790 iflags |= GOMP_TASK_FLAG_UP;
791 tree tclauses = gimple_omp_for_clauses (g);
792 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
793 if (num_tasks)
795 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
796 iflags |= GOMP_TASK_FLAG_STRICT;
797 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
799 else
801 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
802 if (num_tasks)
804 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
805 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
806 iflags |= GOMP_TASK_FLAG_STRICT;
807 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
809 else
810 num_tasks = integer_zero_node;
812 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
813 if (ifc == NULL_TREE)
814 iflags |= GOMP_TASK_FLAG_IF;
815 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
816 iflags |= GOMP_TASK_FLAG_NOGROUP;
817 ull = fd.iter_type == long_long_unsigned_type_node;
818 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
819 iflags |= GOMP_TASK_FLAG_REDUCTION;
821 else
823 if (priority)
824 iflags |= GOMP_TASK_FLAG_PRIORITY;
825 if (detach)
826 iflags |= GOMP_TASK_FLAG_DETACH;
829 tree flags = build_int_cst (unsigned_type_node, iflags);
831 tree cond = boolean_true_node;
832 if (ifc)
834 if (taskloop_p)
836 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
837 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
838 build_int_cst (unsigned_type_node,
839 GOMP_TASK_FLAG_IF),
840 build_int_cst (unsigned_type_node, 0));
841 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
842 flags, t);
844 else
845 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
848 if (finalc)
850 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
851 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
852 build_int_cst (unsigned_type_node,
853 GOMP_TASK_FLAG_FINAL),
854 build_int_cst (unsigned_type_node, 0));
855 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
857 if (depend)
858 depend = OMP_CLAUSE_DECL (depend);
859 else
860 depend = build_int_cst (ptr_type_node, 0);
861 if (priority)
862 priority = fold_convert (integer_type_node,
863 OMP_CLAUSE_PRIORITY_EXPR (priority));
864 else
865 priority = integer_zero_node;
867 gsi = gsi_last_nondebug_bb (bb);
869 detach = (detach
870 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
871 : null_pointer_node);
873 tree t = gimple_omp_task_data_arg (entry_stmt);
874 if (t == NULL)
875 t2 = null_pointer_node;
876 else
877 t2 = build_fold_addr_expr_loc (loc, t);
878 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
879 t = gimple_omp_task_copy_fn (entry_stmt);
880 if (t == NULL)
881 t3 = null_pointer_node;
882 else
883 t3 = build_fold_addr_expr_loc (loc, t);
885 if (taskloop_p)
886 t = build_call_expr (ull
887 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
888 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
889 11, t1, t2, t3,
890 gimple_omp_task_arg_size (entry_stmt),
891 gimple_omp_task_arg_align (entry_stmt), flags,
892 num_tasks, priority, startvar, endvar, step);
893 else
894 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
895 10, t1, t2, t3,
896 gimple_omp_task_arg_size (entry_stmt),
897 gimple_omp_task_arg_align (entry_stmt), cond, flags,
898 depend, priority, detach);
900 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
901 false, GSI_CONTINUE_LINKING);
904 /* Build the function call to GOMP_taskwait_depend to actually
905 generate the taskwait operation. BB is the block where to insert the
906 code. */
908 static void
909 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
911 tree clauses = gimple_omp_task_clauses (entry_stmt);
912 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
913 if (depend == NULL_TREE)
914 return;
916 depend = OMP_CLAUSE_DECL (depend);
918 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
919 tree t
920 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
921 1, depend);
923 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
924 false, GSI_CONTINUE_LINKING);
927 /* Build the function call to GOMP_teams_reg to actually
928 generate the host teams operation. REGION is the teams region
929 being expanded. BB is the block where to insert the code. */
931 static void
932 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
934 tree clauses = gimple_omp_teams_clauses (entry_stmt);
935 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
936 if (num_teams == NULL_TREE)
937 num_teams = build_int_cst (unsigned_type_node, 0);
938 else
940 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
941 num_teams = fold_convert (unsigned_type_node, num_teams);
943 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
944 if (thread_limit == NULL_TREE)
945 thread_limit = build_int_cst (unsigned_type_node, 0);
946 else
948 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
949 thread_limit = fold_convert (unsigned_type_node, thread_limit);
952 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
953 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
954 if (t == NULL)
955 t1 = null_pointer_node;
956 else
957 t1 = build_fold_addr_expr (t);
958 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
959 tree t2 = build_fold_addr_expr (child_fndecl);
961 vec<tree, va_gc> *args;
962 vec_alloc (args, 5);
963 args->quick_push (t2);
964 args->quick_push (t1);
965 args->quick_push (num_teams);
966 args->quick_push (thread_limit);
967 /* For future extensibility. */
968 args->quick_push (build_zero_cst (unsigned_type_node));
970 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
971 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
972 args);
974 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
975 false, GSI_CONTINUE_LINKING);
978 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
980 static tree
981 vec2chain (vec<tree, va_gc> *v)
983 tree chain = NULL_TREE, t;
984 unsigned ix;
986 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
988 DECL_CHAIN (t) = chain;
989 chain = t;
992 return chain;
995 /* Remove barriers in REGION->EXIT's block. Note that this is only
996 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
997 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
998 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
999 removed. */
1001 static void
1002 remove_exit_barrier (struct omp_region *region)
1004 gimple_stmt_iterator gsi;
1005 basic_block exit_bb;
1006 edge_iterator ei;
1007 edge e;
1008 gimple *stmt;
1009 int any_addressable_vars = -1;
1011 exit_bb = region->exit;
1013 /* If the parallel region doesn't return, we don't have REGION->EXIT
1014 block at all. */
1015 if (! exit_bb)
1016 return;
1018 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1019 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1020 statements that can appear in between are extremely limited -- no
1021 memory operations at all. Here, we allow nothing at all, so the
1022 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1023 gsi = gsi_last_nondebug_bb (exit_bb);
1024 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1025 gsi_prev_nondebug (&gsi);
1026 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1027 return;
1029 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1031 gsi = gsi_last_nondebug_bb (e->src);
1032 if (gsi_end_p (gsi))
1033 continue;
1034 stmt = gsi_stmt (gsi);
1035 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1036 && !gimple_omp_return_nowait_p (stmt))
1038 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1039 in many cases. If there could be tasks queued, the barrier
1040 might be needed to let the tasks run before some local
1041 variable of the parallel that the task uses as shared
1042 runs out of scope. The task can be spawned either
1043 from within current function (this would be easy to check)
1044 or from some function it calls and gets passed an address
1045 of such a variable. */
1046 if (any_addressable_vars < 0)
1048 gomp_parallel *parallel_stmt
1049 = as_a <gomp_parallel *> (last_stmt (region->entry));
1050 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1051 tree local_decls, block, decl;
1052 unsigned ix;
1054 any_addressable_vars = 0;
1055 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1056 if (TREE_ADDRESSABLE (decl))
1058 any_addressable_vars = 1;
1059 break;
1061 for (block = gimple_block (stmt);
1062 !any_addressable_vars
1063 && block
1064 && TREE_CODE (block) == BLOCK;
1065 block = BLOCK_SUPERCONTEXT (block))
1067 for (local_decls = BLOCK_VARS (block);
1068 local_decls;
1069 local_decls = DECL_CHAIN (local_decls))
1070 if (TREE_ADDRESSABLE (local_decls))
1072 any_addressable_vars = 1;
1073 break;
1075 if (block == gimple_block (parallel_stmt))
1076 break;
1079 if (!any_addressable_vars)
1080 gimple_omp_return_set_nowait (stmt);
1085 static void
1086 remove_exit_barriers (struct omp_region *region)
1088 if (region->type == GIMPLE_OMP_PARALLEL)
1089 remove_exit_barrier (region);
1091 if (region->inner)
1093 region = region->inner;
1094 remove_exit_barriers (region);
1095 while (region->next)
1097 region = region->next;
1098 remove_exit_barriers (region);
1103 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1104 calls. These can't be declared as const functions, but
1105 within one parallel body they are constant, so they can be
1106 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1107 which are declared const. Similarly for task body, except
1108 that in untied task omp_get_thread_num () can change at any task
1109 scheduling point. */
1111 static void
1112 optimize_omp_library_calls (gimple *entry_stmt)
1114 basic_block bb;
1115 gimple_stmt_iterator gsi;
1116 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1117 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1118 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1119 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1120 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1121 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1122 OMP_CLAUSE_UNTIED) != NULL);
1124 FOR_EACH_BB_FN (bb, cfun)
1125 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1127 gimple *call = gsi_stmt (gsi);
1128 tree decl;
1130 if (is_gimple_call (call)
1131 && (decl = gimple_call_fndecl (call))
1132 && DECL_EXTERNAL (decl)
1133 && TREE_PUBLIC (decl)
1134 && DECL_INITIAL (decl) == NULL)
1136 tree built_in;
1138 if (DECL_NAME (decl) == thr_num_id)
1140 /* In #pragma omp task untied omp_get_thread_num () can change
1141 during the execution of the task region. */
1142 if (untied_task)
1143 continue;
1144 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1146 else if (DECL_NAME (decl) == num_thr_id)
1147 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1148 else
1149 continue;
1151 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1152 || gimple_call_num_args (call) != 0)
1153 continue;
1155 if (flag_exceptions && !TREE_NOTHROW (decl))
1156 continue;
1158 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1159 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1160 TREE_TYPE (TREE_TYPE (built_in))))
1161 continue;
1163 gimple_call_set_fndecl (call, built_in);
1168 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1169 regimplified. */
1171 static tree
1172 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1174 tree t = *tp;
1176 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1177 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1178 return t;
1180 if (TREE_CODE (t) == ADDR_EXPR)
1181 recompute_tree_invariant_for_addr_expr (t);
1183 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1184 return NULL_TREE;
1187 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1189 static void
1190 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1191 bool after)
1193 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1194 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1195 !after, after ? GSI_CONTINUE_LINKING
1196 : GSI_SAME_STMT);
1197 gimple *stmt = gimple_build_assign (to, from);
1198 if (after)
1199 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1200 else
1201 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1202 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1203 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 gimple_regimplify_operands (stmt, &gsi);
1210 /* Expand the OpenMP parallel or task directive starting at REGION. */
1212 static void
1213 expand_omp_taskreg (struct omp_region *region)
1215 basic_block entry_bb, exit_bb, new_bb;
1216 struct function *child_cfun;
1217 tree child_fn, block, t;
1218 gimple_stmt_iterator gsi;
1219 gimple *entry_stmt, *stmt;
1220 edge e;
1221 vec<tree, va_gc> *ws_args;
1223 entry_stmt = last_stmt (region->entry);
1224 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1225 && gimple_omp_task_taskwait_p (entry_stmt))
1227 new_bb = region->entry;
1228 gsi = gsi_last_nondebug_bb (region->entry);
1229 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1230 gsi_remove (&gsi, true);
1231 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1232 return;
1235 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1236 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1238 entry_bb = region->entry;
1239 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1240 exit_bb = region->cont;
1241 else
1242 exit_bb = region->exit;
1244 if (is_combined_parallel (region))
1245 ws_args = region->ws_args;
1246 else
1247 ws_args = NULL;
1249 if (child_cfun->cfg)
1251 /* Due to inlining, it may happen that we have already outlined
1252 the region, in which case all we need to do is make the
1253 sub-graph unreachable and emit the parallel call. */
1254 edge entry_succ_e, exit_succ_e;
1256 entry_succ_e = single_succ_edge (entry_bb);
1258 gsi = gsi_last_nondebug_bb (entry_bb);
1259 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1260 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1261 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1262 gsi_remove (&gsi, true);
1264 new_bb = entry_bb;
1265 if (exit_bb)
1267 exit_succ_e = single_succ_edge (exit_bb);
1268 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1270 remove_edge_and_dominated_blocks (entry_succ_e);
1272 else
1274 unsigned srcidx, dstidx, num;
1276 /* If the parallel region needs data sent from the parent
1277 function, then the very first statement (except possible
1278 tree profile counter updates) of the parallel body
1279 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1280 &.OMP_DATA_O is passed as an argument to the child function,
1281 we need to replace it with the argument as seen by the child
1282 function.
1284 In most cases, this will end up being the identity assignment
1285 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1286 a function call that has been inlined, the original PARM_DECL
1287 .OMP_DATA_I may have been converted into a different local
1288 variable. In which case, we need to keep the assignment. */
1289 if (gimple_omp_taskreg_data_arg (entry_stmt))
1291 basic_block entry_succ_bb
1292 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1293 : FALLTHRU_EDGE (entry_bb)->dest;
1294 tree arg;
1295 gimple *parcopy_stmt = NULL;
1297 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1299 gimple *stmt;
1301 gcc_assert (!gsi_end_p (gsi));
1302 stmt = gsi_stmt (gsi);
1303 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1304 continue;
1306 if (gimple_num_ops (stmt) == 2)
1308 tree arg = gimple_assign_rhs1 (stmt);
1310 /* We're ignore the subcode because we're
1311 effectively doing a STRIP_NOPS. */
1313 if (TREE_CODE (arg) == ADDR_EXPR
1314 && (TREE_OPERAND (arg, 0)
1315 == gimple_omp_taskreg_data_arg (entry_stmt)))
1317 parcopy_stmt = stmt;
1318 break;
1323 gcc_assert (parcopy_stmt != NULL);
1324 arg = DECL_ARGUMENTS (child_fn);
1326 if (!gimple_in_ssa_p (cfun))
1328 if (gimple_assign_lhs (parcopy_stmt) == arg)
1329 gsi_remove (&gsi, true);
1330 else
1332 /* ?? Is setting the subcode really necessary ?? */
1333 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1334 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1337 else
1339 tree lhs = gimple_assign_lhs (parcopy_stmt);
1340 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1341 /* We'd like to set the rhs to the default def in the child_fn,
1342 but it's too early to create ssa names in the child_fn.
1343 Instead, we set the rhs to the parm. In
1344 move_sese_region_to_fn, we introduce a default def for the
1345 parm, map the parm to it's default def, and once we encounter
1346 this stmt, replace the parm with the default def. */
1347 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1348 update_stmt (parcopy_stmt);
1352 /* Declare local variables needed in CHILD_CFUN. */
1353 block = DECL_INITIAL (child_fn);
1354 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1355 /* The gimplifier could record temporaries in parallel/task block
1356 rather than in containing function's local_decls chain,
1357 which would mean cgraph missed finalizing them. Do it now. */
1358 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1359 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1360 varpool_node::finalize_decl (t);
1361 DECL_SAVED_TREE (child_fn) = NULL;
1362 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1363 gimple_set_body (child_fn, NULL);
1364 TREE_USED (block) = 1;
1366 /* Reset DECL_CONTEXT on function arguments. */
1367 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1368 DECL_CONTEXT (t) = child_fn;
1370 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1371 so that it can be moved to the child function. */
1372 gsi = gsi_last_nondebug_bb (entry_bb);
1373 stmt = gsi_stmt (gsi);
1374 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1375 || gimple_code (stmt) == GIMPLE_OMP_TASK
1376 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1377 e = split_block (entry_bb, stmt);
1378 gsi_remove (&gsi, true);
1379 entry_bb = e->dest;
1380 edge e2 = NULL;
1381 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1382 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1383 else
1385 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1386 gcc_assert (e2->dest == region->exit);
1387 remove_edge (BRANCH_EDGE (entry_bb));
1388 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1389 gsi = gsi_last_nondebug_bb (region->exit);
1390 gcc_assert (!gsi_end_p (gsi)
1391 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1392 gsi_remove (&gsi, true);
1395 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1396 if (exit_bb)
1398 gsi = gsi_last_nondebug_bb (exit_bb);
1399 gcc_assert (!gsi_end_p (gsi)
1400 && (gimple_code (gsi_stmt (gsi))
1401 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1402 stmt = gimple_build_return (NULL);
1403 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1404 gsi_remove (&gsi, true);
1407 /* Move the parallel region into CHILD_CFUN. */
1409 if (gimple_in_ssa_p (cfun))
1411 init_tree_ssa (child_cfun);
1412 init_ssa_operands (child_cfun);
1413 child_cfun->gimple_df->in_ssa_p = true;
1414 block = NULL_TREE;
1416 else
1417 block = gimple_block (entry_stmt);
1419 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1420 if (exit_bb)
1421 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1422 if (e2)
1424 basic_block dest_bb = e2->dest;
1425 if (!exit_bb)
1426 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1427 remove_edge (e2);
1428 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1430 /* When the OMP expansion process cannot guarantee an up-to-date
1431 loop tree arrange for the child function to fixup loops. */
1432 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1433 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1435 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1436 num = vec_safe_length (child_cfun->local_decls);
1437 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1439 t = (*child_cfun->local_decls)[srcidx];
1440 if (DECL_CONTEXT (t) == cfun->decl)
1441 continue;
1442 if (srcidx != dstidx)
1443 (*child_cfun->local_decls)[dstidx] = t;
1444 dstidx++;
1446 if (dstidx != num)
1447 vec_safe_truncate (child_cfun->local_decls, dstidx);
1449 /* Inform the callgraph about the new function. */
1450 child_cfun->curr_properties = cfun->curr_properties;
1451 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1452 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1453 cgraph_node *node = cgraph_node::get_create (child_fn);
1454 node->parallelized_function = 1;
1455 cgraph_node::add_new_function (child_fn, true);
1457 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1458 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1460 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1461 fixed in a following pass. */
1462 push_cfun (child_cfun);
1463 if (need_asm)
1464 assign_assembler_name_if_needed (child_fn);
1466 if (optimize)
1467 optimize_omp_library_calls (entry_stmt);
1468 update_max_bb_count ();
1469 cgraph_edge::rebuild_edges ();
1471 /* Some EH regions might become dead, see PR34608. If
1472 pass_cleanup_cfg isn't the first pass to happen with the
1473 new child, these dead EH edges might cause problems.
1474 Clean them up now. */
1475 if (flag_exceptions)
1477 basic_block bb;
1478 bool changed = false;
1480 FOR_EACH_BB_FN (bb, cfun)
1481 changed |= gimple_purge_dead_eh_edges (bb);
1482 if (changed)
1483 cleanup_tree_cfg ();
1485 if (gimple_in_ssa_p (cfun))
1486 update_ssa (TODO_update_ssa);
1487 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1488 verify_loop_structure ();
1489 pop_cfun ();
1491 if (dump_file && !gimple_in_ssa_p (cfun))
1493 omp_any_child_fn_dumped = true;
1494 dump_function_header (dump_file, child_fn, dump_flags);
1495 dump_function_to_file (child_fn, dump_file, dump_flags);
1499 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1501 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1502 expand_parallel_call (region, new_bb,
1503 as_a <gomp_parallel *> (entry_stmt), ws_args);
1504 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1505 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1506 else
1507 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1508 if (gimple_in_ssa_p (cfun))
1509 update_ssa (TODO_update_ssa_only_virtuals);
1512 /* Information about members of an OpenACC collapsed loop nest. */
1514 struct oacc_collapse
1516 tree base; /* Base value. */
1517 tree iters; /* Number of steps. */
1518 tree step; /* Step size. */
1519 tree tile; /* Tile increment (if tiled). */
1520 tree outer; /* Tile iterator var. */
1523 /* Helper for expand_oacc_for. Determine collapsed loop information.
1524 Fill in COUNTS array. Emit any initialization code before GSI.
1525 Return the calculated outer loop bound of BOUND_TYPE. */
1527 static tree
1528 expand_oacc_collapse_init (const struct omp_for_data *fd,
1529 gimple_stmt_iterator *gsi,
1530 oacc_collapse *counts, tree diff_type,
1531 tree bound_type, location_t loc)
1533 tree tiling = fd->tiling;
1534 tree total = build_int_cst (bound_type, 1);
1535 int ix;
1537 gcc_assert (integer_onep (fd->loop.step));
1538 gcc_assert (integer_zerop (fd->loop.n1));
1540 /* When tiling, the first operand of the tile clause applies to the
1541 innermost loop, and we work outwards from there. Seems
1542 backwards, but whatever. */
1543 for (ix = fd->collapse; ix--;)
1545 const omp_for_data_loop *loop = &fd->loops[ix];
1547 tree iter_type = TREE_TYPE (loop->v);
1548 tree plus_type = iter_type;
1550 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1552 if (POINTER_TYPE_P (iter_type))
1553 plus_type = sizetype;
1555 if (tiling)
1557 tree num = build_int_cst (integer_type_node, fd->collapse);
1558 tree loop_no = build_int_cst (integer_type_node, ix);
1559 tree tile = TREE_VALUE (tiling);
1560 gcall *call
1561 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1562 /* gwv-outer=*/integer_zero_node,
1563 /* gwv-inner=*/integer_zero_node);
1565 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1566 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1567 gimple_call_set_lhs (call, counts[ix].tile);
1568 gimple_set_location (call, loc);
1569 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1571 tiling = TREE_CHAIN (tiling);
1573 else
1575 counts[ix].tile = NULL;
1576 counts[ix].outer = loop->v;
1579 tree b = loop->n1;
1580 tree e = loop->n2;
1581 tree s = loop->step;
1582 bool up = loop->cond_code == LT_EXPR;
1583 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1584 bool negating;
1585 tree expr;
1587 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1589 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1590 true, GSI_SAME_STMT);
1592 /* Convert the step, avoiding possible unsigned->signed overflow. */
1593 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1594 if (negating)
1595 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1596 s = fold_convert (diff_type, s);
1597 if (negating)
1598 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1599 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1600 true, GSI_SAME_STMT);
1602 /* Determine the range, avoiding possible unsigned->signed overflow. */
1603 negating = !up && TYPE_UNSIGNED (iter_type);
1604 expr = fold_build2 (MINUS_EXPR, plus_type,
1605 fold_convert (plus_type, negating ? b : e),
1606 fold_convert (plus_type, negating ? e : b));
1607 expr = fold_convert (diff_type, expr);
1608 if (negating)
1609 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1610 tree range = force_gimple_operand_gsi
1611 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1613 /* Determine number of iterations. */
1614 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1615 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1616 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1618 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1619 true, GSI_SAME_STMT);
1621 counts[ix].base = b;
1622 counts[ix].iters = iters;
1623 counts[ix].step = s;
1625 total = fold_build2 (MULT_EXPR, bound_type, total,
1626 fold_convert (bound_type, iters));
1629 return total;
1632 /* Emit initializers for collapsed loop members. INNER is true if
1633 this is for the element loop of a TILE. IVAR is the outer
1634 loop iteration variable, from which collapsed loop iteration values
1635 are calculated. COUNTS array has been initialized by
1636 expand_oacc_collapse_inits. */
1638 static void
1639 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1640 gimple_stmt_iterator *gsi,
1641 const oacc_collapse *counts, tree ivar,
1642 tree diff_type)
1644 tree ivar_type = TREE_TYPE (ivar);
1646 /* The most rapidly changing iteration variable is the innermost
1647 one. */
1648 for (int ix = fd->collapse; ix--;)
1650 const omp_for_data_loop *loop = &fd->loops[ix];
1651 const oacc_collapse *collapse = &counts[ix];
1652 tree v = inner ? loop->v : collapse->outer;
1653 tree iter_type = TREE_TYPE (v);
1654 tree plus_type = iter_type;
1655 enum tree_code plus_code = PLUS_EXPR;
1656 tree expr;
1658 if (POINTER_TYPE_P (iter_type))
1660 plus_code = POINTER_PLUS_EXPR;
1661 plus_type = sizetype;
1664 expr = ivar;
1665 if (ix)
1667 tree mod = fold_convert (ivar_type, collapse->iters);
1668 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1669 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1670 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1671 true, GSI_SAME_STMT);
1674 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1675 fold_convert (diff_type, collapse->step));
1676 expr = fold_build2 (plus_code, iter_type,
1677 inner ? collapse->outer : collapse->base,
1678 fold_convert (plus_type, expr));
1679 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1680 true, GSI_SAME_STMT);
1681 gassign *ass = gimple_build_assign (v, expr);
1682 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1686 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1687 of the combined collapse > 1 loop constructs, generate code like:
1688 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1689 if (cond3 is <)
1690 adj = STEP3 - 1;
1691 else
1692 adj = STEP3 + 1;
1693 count3 = (adj + N32 - N31) / STEP3;
1694 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1695 if (cond2 is <)
1696 adj = STEP2 - 1;
1697 else
1698 adj = STEP2 + 1;
1699 count2 = (adj + N22 - N21) / STEP2;
1700 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1701 if (cond1 is <)
1702 adj = STEP1 - 1;
1703 else
1704 adj = STEP1 + 1;
1705 count1 = (adj + N12 - N11) / STEP1;
1706 count = count1 * count2 * count3;
1707 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1708 count = 0;
1709 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1710 of the combined loop constructs, just initialize COUNTS array
1711 from the _looptemp_ clauses. For loop nests with non-rectangular
1712 loops, do this only for the rectangular loops. Then pick
1713 the loops which reference outer vars in their bound expressions
1714 and the loops which they refer to and for this sub-nest compute
1715 number of iterations. For triangular loops use Faulhaber's formula,
1716 otherwise as a fallback, compute by iterating the loops.
1717 If e.g. the sub-nest is
1718 for (I = N11; I COND1 N12; I += STEP1)
1719 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1720 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1722 COUNT = 0;
1723 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1724 for (tmpj = M21 * tmpi + N21;
1725 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1727 int tmpk1 = M31 * tmpj + N31;
1728 int tmpk2 = M32 * tmpj + N32;
1729 if (tmpk1 COND3 tmpk2)
1731 if (COND3 is <)
1732 adj = STEP3 - 1;
1733 else
1734 adj = STEP3 + 1;
1735 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1738 and finally multiply the counts of the rectangular loops not
1739 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1740 store number of iterations of the loops from fd->first_nonrect
1741 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1742 by the counts of rectangular loops not referenced in any non-rectangular
1743 loops sandwitched in between those. */
1745 /* NOTE: It *could* be better to moosh all of the BBs together,
1746 creating one larger BB with all the computation and the unexpected
1747 jump at the end. I.e.
1749 bool zero3, zero2, zero1, zero;
1751 zero3 = N32 c3 N31;
1752 count3 = (N32 - N31) /[cl] STEP3;
1753 zero2 = N22 c2 N21;
1754 count2 = (N22 - N21) /[cl] STEP2;
1755 zero1 = N12 c1 N11;
1756 count1 = (N12 - N11) /[cl] STEP1;
1757 zero = zero3 || zero2 || zero1;
1758 count = count1 * count2 * count3;
1759 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1761 After all, we expect the zero=false, and thus we expect to have to
1762 evaluate all of the comparison expressions, so short-circuiting
1763 oughtn't be a win. Since the condition isn't protecting a
1764 denominator, we're not concerned about divide-by-zero, so we can
1765 fully evaluate count even if a numerator turned out to be wrong.
1767 It seems like putting this all together would create much better
1768 scheduling opportunities, and less pressure on the chip's branch
1769 predictor. */
1771 static void
1772 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1773 basic_block &entry_bb, tree *counts,
1774 basic_block &zero_iter1_bb, int &first_zero_iter1,
1775 basic_block &zero_iter2_bb, int &first_zero_iter2,
1776 basic_block &l2_dom_bb)
1778 tree t, type = TREE_TYPE (fd->loop.v);
1779 edge e, ne;
1780 int i;
1782 /* Collapsed loops need work for expansion into SSA form. */
1783 gcc_assert (!gimple_in_ssa_p (cfun));
1785 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1786 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1788 gcc_assert (fd->ordered == 0);
1789 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1790 isn't supposed to be handled, as the inner loop doesn't
1791 use it. */
1792 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1793 OMP_CLAUSE__LOOPTEMP_);
1794 gcc_assert (innerc);
1795 for (i = 0; i < fd->collapse; i++)
1797 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1798 OMP_CLAUSE__LOOPTEMP_);
1799 gcc_assert (innerc);
1800 if (i)
1801 counts[i] = OMP_CLAUSE_DECL (innerc);
1802 else
1803 counts[0] = NULL_TREE;
1805 if (fd->non_rect
1806 && fd->last_nonrect == fd->first_nonrect + 1
1807 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1809 tree c[4];
1810 for (i = 0; i < 4; i++)
1812 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1813 OMP_CLAUSE__LOOPTEMP_);
1814 gcc_assert (innerc);
1815 c[i] = OMP_CLAUSE_DECL (innerc);
1817 counts[0] = c[0];
1818 fd->first_inner_iterations = c[1];
1819 fd->factor = c[2];
1820 fd->adjn1 = c[3];
1822 return;
1825 for (i = fd->collapse; i < fd->ordered; i++)
1827 tree itype = TREE_TYPE (fd->loops[i].v);
1828 counts[i] = NULL_TREE;
1829 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1830 fold_convert (itype, fd->loops[i].n1),
1831 fold_convert (itype, fd->loops[i].n2));
1832 if (t && integer_zerop (t))
1834 for (i = fd->collapse; i < fd->ordered; i++)
1835 counts[i] = build_int_cst (type, 0);
1836 break;
1839 bool rect_count_seen = false;
1840 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1842 tree itype = TREE_TYPE (fd->loops[i].v);
1844 if (i >= fd->collapse && counts[i])
1845 continue;
1846 if (fd->non_rect)
1848 /* Skip loops that use outer iterators in their expressions
1849 during this phase. */
1850 if (fd->loops[i].m1 || fd->loops[i].m2)
1852 counts[i] = build_zero_cst (type);
1853 continue;
1856 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1857 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1858 fold_convert (itype, fd->loops[i].n1),
1859 fold_convert (itype, fd->loops[i].n2)))
1860 == NULL_TREE || !integer_onep (t)))
1862 gcond *cond_stmt;
1863 tree n1, n2;
1864 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1865 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1866 true, GSI_SAME_STMT);
1867 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1868 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1869 true, GSI_SAME_STMT);
1870 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1871 NULL_TREE, NULL_TREE);
1872 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1873 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1874 expand_omp_regimplify_p, NULL, NULL)
1875 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1876 expand_omp_regimplify_p, NULL, NULL))
1878 *gsi = gsi_for_stmt (cond_stmt);
1879 gimple_regimplify_operands (cond_stmt, gsi);
1881 e = split_block (entry_bb, cond_stmt);
1882 basic_block &zero_iter_bb
1883 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1884 int &first_zero_iter
1885 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1886 if (zero_iter_bb == NULL)
1888 gassign *assign_stmt;
1889 first_zero_iter = i;
1890 zero_iter_bb = create_empty_bb (entry_bb);
1891 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1892 *gsi = gsi_after_labels (zero_iter_bb);
1893 if (i < fd->collapse)
1894 assign_stmt = gimple_build_assign (fd->loop.n2,
1895 build_zero_cst (type));
1896 else
1898 counts[i] = create_tmp_reg (type, ".count");
1899 assign_stmt
1900 = gimple_build_assign (counts[i], build_zero_cst (type));
1902 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1903 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1904 entry_bb);
1906 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1907 ne->probability = profile_probability::very_unlikely ();
1908 e->flags = EDGE_TRUE_VALUE;
1909 e->probability = ne->probability.invert ();
1910 if (l2_dom_bb == NULL)
1911 l2_dom_bb = entry_bb;
1912 entry_bb = e->dest;
1913 *gsi = gsi_last_nondebug_bb (entry_bb);
1916 if (POINTER_TYPE_P (itype))
1917 itype = signed_type_for (itype);
1918 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1919 ? -1 : 1));
1920 t = fold_build2 (PLUS_EXPR, itype,
1921 fold_convert (itype, fd->loops[i].step), t);
1922 t = fold_build2 (PLUS_EXPR, itype, t,
1923 fold_convert (itype, fd->loops[i].n2));
1924 t = fold_build2 (MINUS_EXPR, itype, t,
1925 fold_convert (itype, fd->loops[i].n1));
1926 /* ?? We could probably use CEIL_DIV_EXPR instead of
1927 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1928 generate the same code in the end because generically we
1929 don't know that the values involved must be negative for
1930 GT?? */
1931 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1932 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1933 fold_build1 (NEGATE_EXPR, itype, t),
1934 fold_build1 (NEGATE_EXPR, itype,
1935 fold_convert (itype,
1936 fd->loops[i].step)));
1937 else
1938 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].step));
1940 t = fold_convert (type, t);
1941 if (TREE_CODE (t) == INTEGER_CST)
1942 counts[i] = t;
1943 else
1945 if (i < fd->collapse || i != first_zero_iter2)
1946 counts[i] = create_tmp_reg (type, ".count");
1947 expand_omp_build_assign (gsi, counts[i], t);
1949 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1951 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1952 continue;
1953 if (!rect_count_seen)
1955 t = counts[i];
1956 rect_count_seen = true;
1958 else
1959 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1960 expand_omp_build_assign (gsi, fd->loop.n2, t);
1963 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1965 gcc_assert (fd->last_nonrect != -1);
1967 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1968 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1969 build_zero_cst (type));
1970 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1971 if (fd->loops[i].m1
1972 || fd->loops[i].m2
1973 || fd->loops[i].non_rect_referenced)
1974 break;
1975 if (i == fd->last_nonrect
1976 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1977 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1979 int o = fd->first_nonrect;
1980 tree itype = TREE_TYPE (fd->loops[o].v);
1981 tree n1o = create_tmp_reg (itype, ".n1o");
1982 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1983 expand_omp_build_assign (gsi, n1o, t);
1984 tree n2o = create_tmp_reg (itype, ".n2o");
1985 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1986 expand_omp_build_assign (gsi, n2o, t);
1987 if (fd->loops[i].m1 && fd->loops[i].m2)
1988 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1989 unshare_expr (fd->loops[i].m1));
1990 else if (fd->loops[i].m1)
1991 t = fold_unary (NEGATE_EXPR, itype,
1992 unshare_expr (fd->loops[i].m1));
1993 else
1994 t = unshare_expr (fd->loops[i].m2);
1995 tree m2minusm1
1996 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1997 true, GSI_SAME_STMT);
1999 gimple_stmt_iterator gsi2 = *gsi;
2000 gsi_prev (&gsi2);
2001 e = split_block (entry_bb, gsi_stmt (gsi2));
2002 e = split_block (e->dest, (gimple *) NULL);
2003 basic_block bb1 = e->src;
2004 entry_bb = e->dest;
2005 *gsi = gsi_after_labels (entry_bb);
2007 gsi2 = gsi_after_labels (bb1);
2008 tree ostep = fold_convert (itype, fd->loops[o].step);
2009 t = build_int_cst (itype, (fd->loops[o].cond_code
2010 == LT_EXPR ? -1 : 1));
2011 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2012 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2013 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2014 if (TYPE_UNSIGNED (itype)
2015 && fd->loops[o].cond_code == GT_EXPR)
2016 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2017 fold_build1 (NEGATE_EXPR, itype, t),
2018 fold_build1 (NEGATE_EXPR, itype, ostep));
2019 else
2020 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2021 tree outer_niters
2022 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2025 build_one_cst (itype));
2026 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2027 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2028 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2029 true, GSI_SAME_STMT);
2030 tree n1, n2, n1e, n2e;
2031 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2032 if (fd->loops[i].m1)
2034 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2035 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2036 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2038 else
2039 n1 = t;
2040 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2041 true, GSI_SAME_STMT);
2042 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2043 if (fd->loops[i].m2)
2045 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2046 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2047 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2049 else
2050 n2 = t;
2051 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2052 true, GSI_SAME_STMT);
2053 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2054 if (fd->loops[i].m1)
2056 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2057 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2058 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2060 else
2061 n1e = t;
2062 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2063 true, GSI_SAME_STMT);
2064 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2065 if (fd->loops[i].m2)
2067 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2068 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2069 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2071 else
2072 n2e = t;
2073 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2074 true, GSI_SAME_STMT);
2075 gcond *cond_stmt
2076 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2077 NULL_TREE, NULL_TREE);
2078 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2079 e = split_block (bb1, cond_stmt);
2080 e->flags = EDGE_TRUE_VALUE;
2081 e->probability = profile_probability::likely ().guessed ();
2082 basic_block bb2 = e->dest;
2083 gsi2 = gsi_after_labels (bb2);
2085 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2086 NULL_TREE, NULL_TREE);
2087 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2088 e = split_block (bb2, cond_stmt);
2089 e->flags = EDGE_TRUE_VALUE;
2090 e->probability = profile_probability::likely ().guessed ();
2091 gsi2 = gsi_after_labels (e->dest);
2093 tree step = fold_convert (itype, fd->loops[i].step);
2094 t = build_int_cst (itype, (fd->loops[i].cond_code
2095 == LT_EXPR ? -1 : 1));
2096 t = fold_build2 (PLUS_EXPR, itype, step, t);
2097 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2098 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2099 if (TYPE_UNSIGNED (itype)
2100 && fd->loops[i].cond_code == GT_EXPR)
2101 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2102 fold_build1 (NEGATE_EXPR, itype, t),
2103 fold_build1 (NEGATE_EXPR, itype, step));
2104 else
2105 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2106 tree first_inner_iterations
2107 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2108 true, GSI_SAME_STMT);
2109 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2110 if (TYPE_UNSIGNED (itype)
2111 && fd->loops[i].cond_code == GT_EXPR)
2112 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2113 fold_build1 (NEGATE_EXPR, itype, t),
2114 fold_build1 (NEGATE_EXPR, itype, step));
2115 else
2116 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2117 tree factor
2118 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2119 true, GSI_SAME_STMT);
2120 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2121 build_one_cst (itype));
2122 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2123 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2124 t = fold_build2 (MULT_EXPR, itype, factor, t);
2125 t = fold_build2 (PLUS_EXPR, itype,
2126 fold_build2 (MULT_EXPR, itype, outer_niters,
2127 first_inner_iterations), t);
2128 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2129 fold_convert (type, t));
2131 basic_block bb3 = create_empty_bb (bb1);
2132 add_bb_to_loop (bb3, bb1->loop_father);
2134 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2135 e->probability = profile_probability::unlikely ().guessed ();
2137 gsi2 = gsi_after_labels (bb3);
2138 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2139 NULL_TREE, NULL_TREE);
2140 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2141 e = split_block (bb3, cond_stmt);
2142 e->flags = EDGE_TRUE_VALUE;
2143 e->probability = profile_probability::likely ().guessed ();
2144 basic_block bb4 = e->dest;
2146 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2147 ne->probability = e->probability.invert ();
2149 basic_block bb5 = create_empty_bb (bb2);
2150 add_bb_to_loop (bb5, bb2->loop_father);
2152 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2153 ne->probability = profile_probability::unlikely ().guessed ();
2155 for (int j = 0; j < 2; j++)
2157 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2158 t = fold_build2 (MINUS_EXPR, itype,
2159 unshare_expr (fd->loops[i].n1),
2160 unshare_expr (fd->loops[i].n2));
2161 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2162 tree tem
2163 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2164 true, GSI_SAME_STMT);
2165 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2166 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2167 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2168 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2169 true, GSI_SAME_STMT);
2170 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2171 if (fd->loops[i].m1)
2173 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2174 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2175 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2177 else
2178 n1 = t;
2179 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2180 true, GSI_SAME_STMT);
2181 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2182 if (fd->loops[i].m2)
2184 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2185 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2186 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2188 else
2189 n2 = t;
2190 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2191 true, GSI_SAME_STMT);
2192 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2194 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2195 NULL_TREE, NULL_TREE);
2196 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2197 e = split_block (gsi_bb (gsi2), cond_stmt);
2198 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2199 e->probability = profile_probability::unlikely ().guessed ();
2200 ne = make_edge (e->src, bb1,
2201 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2202 ne->probability = e->probability.invert ();
2203 gsi2 = gsi_after_labels (e->dest);
2205 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2206 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2208 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2211 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2212 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2213 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2215 if (fd->first_nonrect + 1 == fd->last_nonrect)
2217 fd->first_inner_iterations = first_inner_iterations;
2218 fd->factor = factor;
2219 fd->adjn1 = n1o;
2222 else
2224 /* Fallback implementation. Evaluate the loops with m1/m2
2225 non-NULL as well as their outer loops at runtime using temporaries
2226 instead of the original iteration variables, and in the
2227 body just bump the counter. */
2228 gimple_stmt_iterator gsi2 = *gsi;
2229 gsi_prev (&gsi2);
2230 e = split_block (entry_bb, gsi_stmt (gsi2));
2231 e = split_block (e->dest, (gimple *) NULL);
2232 basic_block cur_bb = e->src;
2233 basic_block next_bb = e->dest;
2234 entry_bb = e->dest;
2235 *gsi = gsi_after_labels (entry_bb);
2237 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2238 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2240 for (i = 0; i <= fd->last_nonrect; i++)
2242 if (fd->loops[i].m1 == NULL_TREE
2243 && fd->loops[i].m2 == NULL_TREE
2244 && !fd->loops[i].non_rect_referenced)
2245 continue;
2247 tree itype = TREE_TYPE (fd->loops[i].v);
2249 gsi2 = gsi_after_labels (cur_bb);
2250 tree n1, n2;
2251 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2252 if (fd->loops[i].m1)
2254 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2255 n1 = fold_build2 (MULT_EXPR, itype,
2256 vs[i - fd->loops[i].outer], n1);
2257 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2259 else
2260 n1 = t;
2261 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2262 true, GSI_SAME_STMT);
2263 if (i < fd->last_nonrect)
2265 vs[i] = create_tmp_reg (itype, ".it");
2266 expand_omp_build_assign (&gsi2, vs[i], n1);
2268 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2269 if (fd->loops[i].m2)
2271 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2272 n2 = fold_build2 (MULT_EXPR, itype,
2273 vs[i - fd->loops[i].outer], n2);
2274 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2276 else
2277 n2 = t;
2278 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2279 true, GSI_SAME_STMT);
2280 if (i == fd->last_nonrect)
2282 gcond *cond_stmt
2283 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2284 NULL_TREE, NULL_TREE);
2285 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2286 e = split_block (cur_bb, cond_stmt);
2287 e->flags = EDGE_TRUE_VALUE;
2288 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2289 e->probability = profile_probability::likely ().guessed ();
2290 ne->probability = e->probability.invert ();
2291 gsi2 = gsi_after_labels (e->dest);
2293 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2294 ? -1 : 1));
2295 t = fold_build2 (PLUS_EXPR, itype,
2296 fold_convert (itype, fd->loops[i].step), t);
2297 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2298 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2299 tree step = fold_convert (itype, fd->loops[i].step);
2300 if (TYPE_UNSIGNED (itype)
2301 && fd->loops[i].cond_code == GT_EXPR)
2302 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2303 fold_build1 (NEGATE_EXPR, itype, t),
2304 fold_build1 (NEGATE_EXPR, itype, step));
2305 else
2306 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2307 t = fold_convert (type, t);
2308 t = fold_build2 (PLUS_EXPR, type,
2309 counts[fd->last_nonrect], t);
2310 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2311 true, GSI_SAME_STMT);
2312 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2313 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2314 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2315 break;
2317 e = split_block (cur_bb, last_stmt (cur_bb));
2319 basic_block new_cur_bb = create_empty_bb (cur_bb);
2320 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2322 gsi2 = gsi_after_labels (e->dest);
2323 tree step = fold_convert (itype,
2324 unshare_expr (fd->loops[i].step));
2325 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2326 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2327 true, GSI_SAME_STMT);
2328 expand_omp_build_assign (&gsi2, vs[i], t);
2330 ne = split_block (e->dest, last_stmt (e->dest));
2331 gsi2 = gsi_after_labels (ne->dest);
2333 gcond *cond_stmt
2334 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2335 NULL_TREE, NULL_TREE);
2336 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2337 edge e3, e4;
2338 if (next_bb == entry_bb)
2340 e3 = find_edge (ne->dest, next_bb);
2341 e3->flags = EDGE_FALSE_VALUE;
2343 else
2344 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2345 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2346 e4->probability = profile_probability::likely ().guessed ();
2347 e3->probability = e4->probability.invert ();
2348 basic_block esrc = e->src;
2349 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2350 cur_bb = new_cur_bb;
2351 basic_block latch_bb = next_bb;
2352 next_bb = e->dest;
2353 remove_edge (e);
2354 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2355 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2356 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2359 t = NULL_TREE;
2360 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2361 if (!fd->loops[i].non_rect_referenced
2362 && fd->loops[i].m1 == NULL_TREE
2363 && fd->loops[i].m2 == NULL_TREE)
2365 if (t == NULL_TREE)
2366 t = counts[i];
2367 else
2368 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2370 if (t)
2372 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2373 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2375 if (!rect_count_seen)
2376 t = counts[fd->last_nonrect];
2377 else
2378 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2379 counts[fd->last_nonrect]);
2380 expand_omp_build_assign (gsi, fd->loop.n2, t);
2382 else if (fd->non_rect)
2384 tree t = fd->loop.n2;
2385 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2386 int non_rect_referenced = 0, non_rect = 0;
2387 for (i = 0; i < fd->collapse; i++)
2389 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2390 && !integer_zerop (counts[i]))
2391 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2392 if (fd->loops[i].non_rect_referenced)
2393 non_rect_referenced++;
2394 if (fd->loops[i].m1 || fd->loops[i].m2)
2395 non_rect++;
2397 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2398 counts[fd->last_nonrect] = t;
2402 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2403 T = V;
2404 V3 = N31 + (T % count3) * STEP3;
2405 T = T / count3;
2406 V2 = N21 + (T % count2) * STEP2;
2407 T = T / count2;
2408 V1 = N11 + T * STEP1;
2409 if this loop doesn't have an inner loop construct combined with it.
2410 If it does have an inner loop construct combined with it and the
2411 iteration count isn't known constant, store values from counts array
2412 into its _looptemp_ temporaries instead.
2413 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2414 inclusive), use the count of all those loops together, and either
2415 find quadratic etc. equation roots, or as a fallback, do:
2416 COUNT = 0;
2417 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2418 for (tmpj = M21 * tmpi + N21;
2419 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2421 int tmpk1 = M31 * tmpj + N31;
2422 int tmpk2 = M32 * tmpj + N32;
2423 if (tmpk1 COND3 tmpk2)
2425 if (COND3 is <)
2426 adj = STEP3 - 1;
2427 else
2428 adj = STEP3 + 1;
2429 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2430 if (COUNT + temp > T)
2432 V1 = tmpi;
2433 V2 = tmpj;
2434 V3 = tmpk1 + (T - COUNT) * STEP3;
2435 goto done;
2437 else
2438 COUNT += temp;
2441 done:;
2442 but for optional innermost or outermost rectangular loops that aren't
2443 referenced by other loop expressions keep doing the division/modulo. */
2445 static void
2446 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2447 tree *counts, tree *nonrect_bounds,
2448 gimple *inner_stmt, tree startvar)
2450 int i;
2451 if (gimple_omp_for_combined_p (fd->for_stmt))
2453 /* If fd->loop.n2 is constant, then no propagation of the counts
2454 is needed, they are constant. */
2455 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2456 return;
2458 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2459 ? gimple_omp_taskreg_clauses (inner_stmt)
2460 : gimple_omp_for_clauses (inner_stmt);
2461 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2462 isn't supposed to be handled, as the inner loop doesn't
2463 use it. */
2464 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2465 gcc_assert (innerc);
2466 int count = 0;
2467 if (fd->non_rect
2468 && fd->last_nonrect == fd->first_nonrect + 1
2469 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2470 count = 4;
2471 for (i = 0; i < fd->collapse + count; i++)
2473 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2474 OMP_CLAUSE__LOOPTEMP_);
2475 gcc_assert (innerc);
2476 if (i)
2478 tree tem = OMP_CLAUSE_DECL (innerc);
2479 tree t;
2480 if (i < fd->collapse)
2481 t = counts[i];
2482 else
2483 switch (i - fd->collapse)
2485 case 0: t = counts[0]; break;
2486 case 1: t = fd->first_inner_iterations; break;
2487 case 2: t = fd->factor; break;
2488 case 3: t = fd->adjn1; break;
2489 default: gcc_unreachable ();
2491 t = fold_convert (TREE_TYPE (tem), t);
2492 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2493 false, GSI_CONTINUE_LINKING);
2494 gassign *stmt = gimple_build_assign (tem, t);
2495 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2498 return;
2501 tree type = TREE_TYPE (fd->loop.v);
2502 tree tem = create_tmp_reg (type, ".tem");
2503 gassign *stmt = gimple_build_assign (tem, startvar);
2504 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2506 for (i = fd->collapse - 1; i >= 0; i--)
2508 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2509 itype = vtype;
2510 if (POINTER_TYPE_P (vtype))
2511 itype = signed_type_for (vtype);
2512 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2513 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2514 else
2515 t = tem;
2516 if (i == fd->last_nonrect)
2518 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2519 false, GSI_CONTINUE_LINKING);
2520 tree stopval = t;
2521 tree idx = create_tmp_reg (type, ".count");
2522 expand_omp_build_assign (gsi, idx,
2523 build_zero_cst (type), true);
2524 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2525 if (fd->first_nonrect + 1 == fd->last_nonrect
2526 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2527 || fd->first_inner_iterations)
2528 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2529 != CODE_FOR_nothing)
2530 && !integer_zerop (fd->loop.n2))
2532 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2533 tree itype = TREE_TYPE (fd->loops[i].v);
2534 tree first_inner_iterations = fd->first_inner_iterations;
2535 tree factor = fd->factor;
2536 gcond *cond_stmt
2537 = gimple_build_cond (NE_EXPR, factor,
2538 build_zero_cst (TREE_TYPE (factor)),
2539 NULL_TREE, NULL_TREE);
2540 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2541 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2542 basic_block bb0 = e->src;
2543 e->flags = EDGE_TRUE_VALUE;
2544 e->probability = profile_probability::likely ();
2545 bb_triang_dom = bb0;
2546 *gsi = gsi_after_labels (e->dest);
2547 tree slltype = long_long_integer_type_node;
2548 tree ulltype = long_long_unsigned_type_node;
2549 tree stopvalull = fold_convert (ulltype, stopval);
2550 stopvalull
2551 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2552 false, GSI_CONTINUE_LINKING);
2553 first_inner_iterations
2554 = fold_convert (slltype, first_inner_iterations);
2555 first_inner_iterations
2556 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2557 NULL_TREE, false,
2558 GSI_CONTINUE_LINKING);
2559 factor = fold_convert (slltype, factor);
2560 factor
2561 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2562 false, GSI_CONTINUE_LINKING);
2563 tree first_inner_iterationsd
2564 = fold_build1 (FLOAT_EXPR, double_type_node,
2565 first_inner_iterations);
2566 first_inner_iterationsd
2567 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2568 NULL_TREE, false,
2569 GSI_CONTINUE_LINKING);
2570 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2571 factor);
2572 factord = force_gimple_operand_gsi (gsi, factord, true,
2573 NULL_TREE, false,
2574 GSI_CONTINUE_LINKING);
2575 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2576 stopvalull);
2577 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2578 NULL_TREE, false,
2579 GSI_CONTINUE_LINKING);
2580 /* Temporarily disable flag_rounding_math, values will be
2581 decimal numbers divided by 2 and worst case imprecisions
2582 due to too large values ought to be caught later by the
2583 checks for fallback. */
2584 int save_flag_rounding_math = flag_rounding_math;
2585 flag_rounding_math = 0;
2586 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2587 build_real (double_type_node, dconst2));
2588 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2589 first_inner_iterationsd, t);
2590 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2591 GSI_CONTINUE_LINKING);
2592 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2593 build_real (double_type_node, dconst2));
2594 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2595 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2596 fold_build2 (MULT_EXPR, double_type_node,
2597 t3, t3));
2598 flag_rounding_math = save_flag_rounding_math;
2599 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2600 GSI_CONTINUE_LINKING);
2601 if (flag_exceptions
2602 && cfun->can_throw_non_call_exceptions
2603 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2605 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2606 build_zero_cst (double_type_node));
2607 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2608 false, GSI_CONTINUE_LINKING);
2609 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2610 boolean_false_node,
2611 NULL_TREE, NULL_TREE);
2613 else
2614 cond_stmt
2615 = gimple_build_cond (LT_EXPR, t,
2616 build_zero_cst (double_type_node),
2617 NULL_TREE, NULL_TREE);
2618 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2619 e = split_block (gsi_bb (*gsi), cond_stmt);
2620 basic_block bb1 = e->src;
2621 e->flags = EDGE_FALSE_VALUE;
2622 e->probability = profile_probability::very_likely ();
2623 *gsi = gsi_after_labels (e->dest);
2624 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2625 tree sqrtr = create_tmp_var (double_type_node);
2626 gimple_call_set_lhs (call, sqrtr);
2627 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2628 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2629 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2630 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2631 tree c = create_tmp_var (ulltype);
2632 tree d = create_tmp_var (ulltype);
2633 expand_omp_build_assign (gsi, c, t, true);
2634 t = fold_build2 (MINUS_EXPR, ulltype, c,
2635 build_one_cst (ulltype));
2636 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2637 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2638 t = fold_build2 (MULT_EXPR, ulltype,
2639 fold_convert (ulltype, fd->factor), t);
2640 tree t2
2641 = fold_build2 (MULT_EXPR, ulltype, c,
2642 fold_convert (ulltype,
2643 fd->first_inner_iterations));
2644 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2645 expand_omp_build_assign (gsi, d, t, true);
2646 t = fold_build2 (MULT_EXPR, ulltype,
2647 fold_convert (ulltype, fd->factor), c);
2648 t = fold_build2 (PLUS_EXPR, ulltype,
2649 t, fold_convert (ulltype,
2650 fd->first_inner_iterations));
2651 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2652 GSI_CONTINUE_LINKING);
2653 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2654 NULL_TREE, NULL_TREE);
2655 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2656 e = split_block (gsi_bb (*gsi), cond_stmt);
2657 basic_block bb2 = e->src;
2658 e->flags = EDGE_TRUE_VALUE;
2659 e->probability = profile_probability::very_likely ();
2660 *gsi = gsi_after_labels (e->dest);
2661 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2662 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2663 GSI_CONTINUE_LINKING);
2664 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2665 NULL_TREE, NULL_TREE);
2666 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2667 e = split_block (gsi_bb (*gsi), cond_stmt);
2668 basic_block bb3 = e->src;
2669 e->flags = EDGE_FALSE_VALUE;
2670 e->probability = profile_probability::very_likely ();
2671 *gsi = gsi_after_labels (e->dest);
2672 t = fold_convert (itype, c);
2673 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2674 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2675 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2676 GSI_CONTINUE_LINKING);
2677 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2678 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2679 t2 = fold_convert (itype, t2);
2680 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2681 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2682 if (fd->loops[i].m1)
2684 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2685 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2687 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2688 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2689 bb_triang = e->src;
2690 *gsi = gsi_after_labels (e->dest);
2691 remove_edge (e);
2692 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2693 e->probability = profile_probability::very_unlikely ();
2694 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2695 e->probability = profile_probability::very_unlikely ();
2696 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2697 e->probability = profile_probability::very_unlikely ();
2699 basic_block bb4 = create_empty_bb (bb0);
2700 add_bb_to_loop (bb4, bb0->loop_father);
2701 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2702 e->probability = profile_probability::unlikely ();
2703 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2704 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2705 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2706 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2707 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2708 counts[i], counts[i - 1]);
2709 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2710 GSI_CONTINUE_LINKING);
2711 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2712 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2713 t = fold_convert (itype, t);
2714 t2 = fold_convert (itype, t2);
2715 t = fold_build2 (MULT_EXPR, itype, t,
2716 fold_convert (itype, fd->loops[i].step));
2717 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2718 t2 = fold_build2 (MULT_EXPR, itype, t2,
2719 fold_convert (itype, fd->loops[i - 1].step));
2720 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2721 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2722 false, GSI_CONTINUE_LINKING);
2723 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2724 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2725 if (fd->loops[i].m1)
2727 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2728 fd->loops[i - 1].v);
2729 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2731 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2732 false, GSI_CONTINUE_LINKING);
2733 stmt = gimple_build_assign (fd->loops[i].v, t);
2734 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2736 /* Fallback implementation. Evaluate the loops in between
2737 (inclusive) fd->first_nonrect and fd->last_nonrect at
2738 runtime unsing temporaries instead of the original iteration
2739 variables, in the body just bump the counter and compare
2740 with the desired value. */
2741 gimple_stmt_iterator gsi2 = *gsi;
2742 basic_block entry_bb = gsi_bb (gsi2);
2743 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2744 e = split_block (e->dest, (gimple *) NULL);
2745 basic_block dom_bb = NULL;
2746 basic_block cur_bb = e->src;
2747 basic_block next_bb = e->dest;
2748 entry_bb = e->dest;
2749 *gsi = gsi_after_labels (entry_bb);
2751 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2752 tree n1 = NULL_TREE, n2 = NULL_TREE;
2753 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2755 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2757 tree itype = TREE_TYPE (fd->loops[j].v);
2758 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2759 && fd->loops[j].m2 == NULL_TREE
2760 && !fd->loops[j].non_rect_referenced);
2761 gsi2 = gsi_after_labels (cur_bb);
2762 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2763 if (fd->loops[j].m1)
2765 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2766 n1 = fold_build2 (MULT_EXPR, itype,
2767 vs[j - fd->loops[j].outer], n1);
2768 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2770 else if (rect_p)
2771 n1 = build_zero_cst (type);
2772 else
2773 n1 = t;
2774 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2775 true, GSI_SAME_STMT);
2776 if (j < fd->last_nonrect)
2778 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2779 expand_omp_build_assign (&gsi2, vs[j], n1);
2781 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2782 if (fd->loops[j].m2)
2784 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2785 n2 = fold_build2 (MULT_EXPR, itype,
2786 vs[j - fd->loops[j].outer], n2);
2787 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2789 else if (rect_p)
2790 n2 = counts[j];
2791 else
2792 n2 = t;
2793 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2794 true, GSI_SAME_STMT);
2795 if (j == fd->last_nonrect)
2797 gcond *cond_stmt
2798 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2799 NULL_TREE, NULL_TREE);
2800 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2801 e = split_block (cur_bb, cond_stmt);
2802 e->flags = EDGE_TRUE_VALUE;
2803 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2804 e->probability = profile_probability::likely ().guessed ();
2805 ne->probability = e->probability.invert ();
2806 gsi2 = gsi_after_labels (e->dest);
2808 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2809 ? -1 : 1));
2810 t = fold_build2 (PLUS_EXPR, itype,
2811 fold_convert (itype, fd->loops[j].step), t);
2812 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2813 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2814 tree step = fold_convert (itype, fd->loops[j].step);
2815 if (TYPE_UNSIGNED (itype)
2816 && fd->loops[j].cond_code == GT_EXPR)
2817 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2818 fold_build1 (NEGATE_EXPR, itype, t),
2819 fold_build1 (NEGATE_EXPR, itype, step));
2820 else
2821 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2822 t = fold_convert (type, t);
2823 t = fold_build2 (PLUS_EXPR, type, idx, t);
2824 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2825 true, GSI_SAME_STMT);
2826 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2827 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2828 cond_stmt
2829 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2830 NULL_TREE);
2831 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2832 e = split_block (gsi_bb (gsi2), cond_stmt);
2833 e->flags = EDGE_TRUE_VALUE;
2834 e->probability = profile_probability::likely ().guessed ();
2835 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2836 ne->probability = e->probability.invert ();
2837 gsi2 = gsi_after_labels (e->dest);
2838 expand_omp_build_assign (&gsi2, idx, t);
2839 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2840 break;
2842 e = split_block (cur_bb, last_stmt (cur_bb));
2844 basic_block new_cur_bb = create_empty_bb (cur_bb);
2845 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2847 gsi2 = gsi_after_labels (e->dest);
2848 if (rect_p)
2849 t = fold_build2 (PLUS_EXPR, type, vs[j],
2850 build_one_cst (type));
2851 else
2853 tree step
2854 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2855 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2857 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2858 true, GSI_SAME_STMT);
2859 expand_omp_build_assign (&gsi2, vs[j], t);
2861 edge ne = split_block (e->dest, last_stmt (e->dest));
2862 gsi2 = gsi_after_labels (ne->dest);
2864 gcond *cond_stmt;
2865 if (next_bb == entry_bb)
2866 /* No need to actually check the outermost condition. */
2867 cond_stmt
2868 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2869 boolean_true_node,
2870 NULL_TREE, NULL_TREE);
2871 else
2872 cond_stmt
2873 = gimple_build_cond (rect_p ? LT_EXPR
2874 : fd->loops[j].cond_code,
2875 vs[j], n2, NULL_TREE, NULL_TREE);
2876 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2877 edge e3, e4;
2878 if (next_bb == entry_bb)
2880 e3 = find_edge (ne->dest, next_bb);
2881 e3->flags = EDGE_FALSE_VALUE;
2882 dom_bb = ne->dest;
2884 else
2885 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2886 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2887 e4->probability = profile_probability::likely ().guessed ();
2888 e3->probability = e4->probability.invert ();
2889 basic_block esrc = e->src;
2890 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2891 cur_bb = new_cur_bb;
2892 basic_block latch_bb = next_bb;
2893 next_bb = e->dest;
2894 remove_edge (e);
2895 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2896 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2897 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2899 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2901 tree itype = TREE_TYPE (fd->loops[j].v);
2902 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2903 && fd->loops[j].m2 == NULL_TREE
2904 && !fd->loops[j].non_rect_referenced);
2905 if (j == fd->last_nonrect)
2907 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2908 t = fold_convert (itype, t);
2909 tree t2
2910 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2911 t = fold_build2 (MULT_EXPR, itype, t, t2);
2912 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2914 else if (rect_p)
2916 t = fold_convert (itype, vs[j]);
2917 t = fold_build2 (MULT_EXPR, itype, t,
2918 fold_convert (itype, fd->loops[j].step));
2919 if (POINTER_TYPE_P (vtype))
2920 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2921 else
2922 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2924 else
2925 t = vs[j];
2926 t = force_gimple_operand_gsi (gsi, t, false,
2927 NULL_TREE, true,
2928 GSI_SAME_STMT);
2929 stmt = gimple_build_assign (fd->loops[j].v, t);
2930 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2932 if (gsi_end_p (*gsi))
2933 *gsi = gsi_last_bb (gsi_bb (*gsi));
2934 else
2935 gsi_prev (gsi);
2936 if (bb_triang)
2938 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2939 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2940 *gsi = gsi_after_labels (e->dest);
2941 if (!gsi_end_p (*gsi))
2942 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2943 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2946 else
2948 t = fold_convert (itype, t);
2949 t = fold_build2 (MULT_EXPR, itype, t,
2950 fold_convert (itype, fd->loops[i].step));
2951 if (POINTER_TYPE_P (vtype))
2952 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2953 else
2954 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2955 t = force_gimple_operand_gsi (gsi, t,
2956 DECL_P (fd->loops[i].v)
2957 && TREE_ADDRESSABLE (fd->loops[i].v),
2958 NULL_TREE, false,
2959 GSI_CONTINUE_LINKING);
2960 stmt = gimple_build_assign (fd->loops[i].v, t);
2961 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2963 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2965 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2966 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2967 false, GSI_CONTINUE_LINKING);
2968 stmt = gimple_build_assign (tem, t);
2969 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2971 if (i == fd->last_nonrect)
2972 i = fd->first_nonrect;
2974 if (fd->non_rect)
2975 for (i = 0; i <= fd->last_nonrect; i++)
2976 if (fd->loops[i].m2)
2978 tree itype = TREE_TYPE (fd->loops[i].v);
2980 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2981 t = fold_build2 (MULT_EXPR, itype,
2982 fd->loops[i - fd->loops[i].outer].v, t);
2983 t = fold_build2 (PLUS_EXPR, itype, t,
2984 fold_convert (itype,
2985 unshare_expr (fd->loops[i].n2)));
2986 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2987 t = force_gimple_operand_gsi (gsi, t, false,
2988 NULL_TREE, false,
2989 GSI_CONTINUE_LINKING);
2990 stmt = gimple_build_assign (nonrect_bounds[i], t);
2991 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2995 /* Helper function for expand_omp_for_*. Generate code like:
2996 L10:
2997 V3 += STEP3;
2998 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2999 L11:
3000 V3 = N31;
3001 V2 += STEP2;
3002 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3003 L12:
3004 V2 = N21;
3005 V1 += STEP1;
3006 goto BODY_BB;
3007 For non-rectangular loops, use temporaries stored in nonrect_bounds
3008 for the upper bounds if M?2 multiplier is present. Given e.g.
3009 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3010 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3011 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3012 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3014 L10:
3015 V4 += STEP4;
3016 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3017 L11:
3018 V4 = N41 + M41 * V2; // This can be left out if the loop
3019 // refers to the immediate parent loop
3020 V3 += STEP3;
3021 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3022 L12:
3023 V3 = N31;
3024 V2 += STEP2;
3025 if (V2 cond2 N22) goto L120; else goto L13;
3026 L120:
3027 V4 = N41 + M41 * V2;
3028 NONRECT_BOUND4 = N42 + M42 * V2;
3029 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3030 L13:
3031 V2 = N21;
3032 V1 += STEP1;
3033 goto L120; */
3035 static basic_block
3036 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3037 basic_block cont_bb, basic_block body_bb)
3039 basic_block last_bb, bb, collapse_bb = NULL;
3040 int i;
3041 gimple_stmt_iterator gsi;
3042 edge e;
3043 tree t;
3044 gimple *stmt;
3046 last_bb = cont_bb;
3047 for (i = fd->collapse - 1; i >= 0; i--)
3049 tree vtype = TREE_TYPE (fd->loops[i].v);
3051 bb = create_empty_bb (last_bb);
3052 add_bb_to_loop (bb, last_bb->loop_father);
3053 gsi = gsi_start_bb (bb);
3055 if (i < fd->collapse - 1)
3057 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3058 e->probability
3059 = profile_probability::guessed_always ().apply_scale (1, 8);
3061 struct omp_for_data_loop *l = &fd->loops[i + 1];
3062 if (l->m1 == NULL_TREE || l->outer != 1)
3064 t = l->n1;
3065 if (l->m1)
3067 tree t2
3068 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3069 fd->loops[i + 1 - l->outer].v, l->m1);
3070 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3072 t = force_gimple_operand_gsi (&gsi, t,
3073 DECL_P (l->v)
3074 && TREE_ADDRESSABLE (l->v),
3075 NULL_TREE, false,
3076 GSI_CONTINUE_LINKING);
3077 stmt = gimple_build_assign (l->v, t);
3078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3081 else
3082 collapse_bb = bb;
3084 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3086 if (POINTER_TYPE_P (vtype))
3087 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3088 else
3089 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3090 t = force_gimple_operand_gsi (&gsi, t,
3091 DECL_P (fd->loops[i].v)
3092 && TREE_ADDRESSABLE (fd->loops[i].v),
3093 NULL_TREE, false, GSI_CONTINUE_LINKING);
3094 stmt = gimple_build_assign (fd->loops[i].v, t);
3095 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3097 if (fd->loops[i].non_rect_referenced)
3099 basic_block update_bb = NULL, prev_bb = NULL;
3100 for (int j = i + 1; j <= fd->last_nonrect; j++)
3101 if (j - fd->loops[j].outer == i)
3103 tree n1, n2;
3104 struct omp_for_data_loop *l = &fd->loops[j];
3105 basic_block this_bb = create_empty_bb (last_bb);
3106 add_bb_to_loop (this_bb, last_bb->loop_father);
3107 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3108 if (prev_bb)
3110 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3111 e->probability
3112 = profile_probability::guessed_always ().apply_scale (7,
3114 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3116 if (l->m1)
3118 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3119 fd->loops[i].v);
3120 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3121 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3122 false,
3123 GSI_CONTINUE_LINKING);
3124 stmt = gimple_build_assign (l->v, n1);
3125 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3126 n1 = l->v;
3128 else
3129 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3130 NULL_TREE, false,
3131 GSI_CONTINUE_LINKING);
3132 if (l->m2)
3134 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3135 fd->loops[i].v);
3136 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3137 t, unshare_expr (l->n2));
3138 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3139 false,
3140 GSI_CONTINUE_LINKING);
3141 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3142 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3143 n2 = nonrect_bounds[j];
3145 else
3146 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3147 true, NULL_TREE, false,
3148 GSI_CONTINUE_LINKING);
3149 gcond *cond_stmt
3150 = gimple_build_cond (l->cond_code, n1, n2,
3151 NULL_TREE, NULL_TREE);
3152 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3153 if (update_bb == NULL)
3154 update_bb = this_bb;
3155 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3156 e->probability
3157 = profile_probability::guessed_always ().apply_scale (1, 8);
3158 if (prev_bb == NULL)
3159 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3160 prev_bb = this_bb;
3162 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3163 e->probability
3164 = profile_probability::guessed_always ().apply_scale (7, 8);
3165 body_bb = update_bb;
3168 if (i > 0)
3170 if (fd->loops[i].m2)
3171 t = nonrect_bounds[i];
3172 else
3173 t = unshare_expr (fd->loops[i].n2);
3174 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3175 false, GSI_CONTINUE_LINKING);
3176 tree v = fd->loops[i].v;
3177 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3178 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3179 false, GSI_CONTINUE_LINKING);
3180 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3181 stmt = gimple_build_cond_empty (t);
3182 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3183 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3184 expand_omp_regimplify_p, NULL, NULL)
3185 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3186 expand_omp_regimplify_p, NULL, NULL))
3187 gimple_regimplify_operands (stmt, &gsi);
3188 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3189 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3191 else
3192 make_edge (bb, body_bb, EDGE_FALLTHRU);
3193 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3194 last_bb = bb;
3197 return collapse_bb;
3200 /* Expand #pragma omp ordered depend(source). */
3202 static void
3203 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3204 tree *counts, location_t loc)
3206 enum built_in_function source_ix
3207 = fd->iter_type == long_integer_type_node
3208 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3209 gimple *g
3210 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3211 build_fold_addr_expr (counts[fd->ordered]));
3212 gimple_set_location (g, loc);
3213 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3216 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3218 static void
3219 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3220 tree *counts, tree c, location_t loc)
3222 auto_vec<tree, 10> args;
3223 enum built_in_function sink_ix
3224 = fd->iter_type == long_integer_type_node
3225 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3226 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3227 int i;
3228 gimple_stmt_iterator gsi2 = *gsi;
3229 bool warned_step = false;
3231 for (i = 0; i < fd->ordered; i++)
3233 tree step = NULL_TREE;
3234 off = TREE_PURPOSE (deps);
3235 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3237 step = TREE_OPERAND (off, 1);
3238 off = TREE_OPERAND (off, 0);
3240 if (!integer_zerop (off))
3242 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3243 || fd->loops[i].cond_code == GT_EXPR);
3244 bool forward = fd->loops[i].cond_code == LT_EXPR;
3245 if (step)
3247 /* Non-simple Fortran DO loops. If step is variable,
3248 we don't know at compile even the direction, so can't
3249 warn. */
3250 if (TREE_CODE (step) != INTEGER_CST)
3251 break;
3252 forward = tree_int_cst_sgn (step) != -1;
3254 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3255 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3256 "waiting for lexically later iteration");
3257 break;
3259 deps = TREE_CHAIN (deps);
3261 /* If all offsets corresponding to the collapsed loops are zero,
3262 this depend clause can be ignored. FIXME: but there is still a
3263 flush needed. We need to emit one __sync_synchronize () for it
3264 though (perhaps conditionally)? Solve this together with the
3265 conservative dependence folding optimization.
3266 if (i >= fd->collapse)
3267 return; */
3269 deps = OMP_CLAUSE_DECL (c);
3270 gsi_prev (&gsi2);
3271 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3272 edge e2 = split_block_after_labels (e1->dest);
3274 gsi2 = gsi_after_labels (e1->dest);
3275 *gsi = gsi_last_bb (e1->src);
3276 for (i = 0; i < fd->ordered; i++)
3278 tree itype = TREE_TYPE (fd->loops[i].v);
3279 tree step = NULL_TREE;
3280 tree orig_off = NULL_TREE;
3281 if (POINTER_TYPE_P (itype))
3282 itype = sizetype;
3283 if (i)
3284 deps = TREE_CHAIN (deps);
3285 off = TREE_PURPOSE (deps);
3286 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3288 step = TREE_OPERAND (off, 1);
3289 off = TREE_OPERAND (off, 0);
3290 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3291 && integer_onep (fd->loops[i].step)
3292 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3294 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3295 if (step)
3297 off = fold_convert_loc (loc, itype, off);
3298 orig_off = off;
3299 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3302 if (integer_zerop (off))
3303 t = boolean_true_node;
3304 else
3306 tree a;
3307 tree co = fold_convert_loc (loc, itype, off);
3308 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3310 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3311 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3312 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3313 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3314 co);
3316 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3317 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3318 fd->loops[i].v, co);
3319 else
3320 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3321 fd->loops[i].v, co);
3322 if (step)
3324 tree t1, t2;
3325 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3326 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3327 fd->loops[i].n1);
3328 else
3329 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3330 fd->loops[i].n2);
3331 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3332 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3333 fd->loops[i].n2);
3334 else
3335 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3336 fd->loops[i].n1);
3337 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3338 step, build_int_cst (TREE_TYPE (step), 0));
3339 if (TREE_CODE (step) != INTEGER_CST)
3341 t1 = unshare_expr (t1);
3342 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3343 false, GSI_CONTINUE_LINKING);
3344 t2 = unshare_expr (t2);
3345 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3346 false, GSI_CONTINUE_LINKING);
3348 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3349 t, t2, t1);
3351 else if (fd->loops[i].cond_code == LT_EXPR)
3353 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3354 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3355 fd->loops[i].n1);
3356 else
3357 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3358 fd->loops[i].n2);
3360 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3361 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3362 fd->loops[i].n2);
3363 else
3364 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3365 fd->loops[i].n1);
3367 if (cond)
3368 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3369 else
3370 cond = t;
3372 off = fold_convert_loc (loc, itype, off);
3374 if (step
3375 || (fd->loops[i].cond_code == LT_EXPR
3376 ? !integer_onep (fd->loops[i].step)
3377 : !integer_minus_onep (fd->loops[i].step)))
3379 if (step == NULL_TREE
3380 && TYPE_UNSIGNED (itype)
3381 && fd->loops[i].cond_code == GT_EXPR)
3382 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3383 fold_build1_loc (loc, NEGATE_EXPR, itype,
3384 s));
3385 else
3386 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3387 orig_off ? orig_off : off, s);
3388 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3389 build_int_cst (itype, 0));
3390 if (integer_zerop (t) && !warned_step)
3392 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3393 "refers to iteration never in the iteration "
3394 "space");
3395 warned_step = true;
3397 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3398 cond, t);
3401 if (i <= fd->collapse - 1 && fd->collapse > 1)
3402 t = fd->loop.v;
3403 else if (counts[i])
3404 t = counts[i];
3405 else
3407 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3408 fd->loops[i].v, fd->loops[i].n1);
3409 t = fold_convert_loc (loc, fd->iter_type, t);
3411 if (step)
3412 /* We have divided off by step already earlier. */;
3413 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3414 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3415 fold_build1_loc (loc, NEGATE_EXPR, itype,
3416 s));
3417 else
3418 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3419 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3420 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3421 off = fold_convert_loc (loc, fd->iter_type, off);
3422 if (i <= fd->collapse - 1 && fd->collapse > 1)
3424 if (i)
3425 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3426 off);
3427 if (i < fd->collapse - 1)
3429 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3430 counts[i]);
3431 continue;
3434 off = unshare_expr (off);
3435 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3436 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3437 true, GSI_SAME_STMT);
3438 args.safe_push (t);
3440 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3441 gimple_set_location (g, loc);
3442 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3444 cond = unshare_expr (cond);
3445 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3446 GSI_CONTINUE_LINKING);
3447 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3448 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3449 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3450 e1->probability = e3->probability.invert ();
3451 e1->flags = EDGE_TRUE_VALUE;
3452 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3454 *gsi = gsi_after_labels (e2->dest);
3457 /* Expand all #pragma omp ordered depend(source) and
3458 #pragma omp ordered depend(sink:...) constructs in the current
3459 #pragma omp for ordered(n) region. */
3461 static void
3462 expand_omp_ordered_source_sink (struct omp_region *region,
3463 struct omp_for_data *fd, tree *counts,
3464 basic_block cont_bb)
3466 struct omp_region *inner;
3467 int i;
3468 for (i = fd->collapse - 1; i < fd->ordered; i++)
3469 if (i == fd->collapse - 1 && fd->collapse > 1)
3470 counts[i] = NULL_TREE;
3471 else if (i >= fd->collapse && !cont_bb)
3472 counts[i] = build_zero_cst (fd->iter_type);
3473 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3474 && integer_onep (fd->loops[i].step))
3475 counts[i] = NULL_TREE;
3476 else
3477 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3478 tree atype
3479 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3480 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3481 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3483 for (inner = region->inner; inner; inner = inner->next)
3484 if (inner->type == GIMPLE_OMP_ORDERED)
3486 gomp_ordered *ord_stmt = inner->ord_stmt;
3487 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3488 location_t loc = gimple_location (ord_stmt);
3489 tree c;
3490 for (c = gimple_omp_ordered_clauses (ord_stmt);
3491 c; c = OMP_CLAUSE_CHAIN (c))
3492 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3493 break;
3494 if (c)
3495 expand_omp_ordered_source (&gsi, fd, counts, loc);
3496 for (c = gimple_omp_ordered_clauses (ord_stmt);
3497 c; c = OMP_CLAUSE_CHAIN (c))
3498 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3499 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3500 gsi_remove (&gsi, true);
3504 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3505 collapsed. */
3507 static basic_block
3508 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3509 basic_block cont_bb, basic_block body_bb,
3510 bool ordered_lastprivate)
3512 if (fd->ordered == fd->collapse)
3513 return cont_bb;
3515 if (!cont_bb)
3517 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3518 for (int i = fd->collapse; i < fd->ordered; i++)
3520 tree type = TREE_TYPE (fd->loops[i].v);
3521 tree n1 = fold_convert (type, fd->loops[i].n1);
3522 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3523 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3524 size_int (i - fd->collapse + 1),
3525 NULL_TREE, NULL_TREE);
3526 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3528 return NULL;
3531 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3533 tree t, type = TREE_TYPE (fd->loops[i].v);
3534 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3535 expand_omp_build_assign (&gsi, fd->loops[i].v,
3536 fold_convert (type, fd->loops[i].n1));
3537 if (counts[i])
3538 expand_omp_build_assign (&gsi, counts[i],
3539 build_zero_cst (fd->iter_type));
3540 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3541 size_int (i - fd->collapse + 1),
3542 NULL_TREE, NULL_TREE);
3543 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3544 if (!gsi_end_p (gsi))
3545 gsi_prev (&gsi);
3546 else
3547 gsi = gsi_last_bb (body_bb);
3548 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3549 basic_block new_body = e1->dest;
3550 if (body_bb == cont_bb)
3551 cont_bb = new_body;
3552 edge e2 = NULL;
3553 basic_block new_header;
3554 if (EDGE_COUNT (cont_bb->preds) > 0)
3556 gsi = gsi_last_bb (cont_bb);
3557 if (POINTER_TYPE_P (type))
3558 t = fold_build_pointer_plus (fd->loops[i].v,
3559 fold_convert (sizetype,
3560 fd->loops[i].step));
3561 else
3562 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3563 fold_convert (type, fd->loops[i].step));
3564 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3565 if (counts[i])
3567 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3568 build_int_cst (fd->iter_type, 1));
3569 expand_omp_build_assign (&gsi, counts[i], t);
3570 t = counts[i];
3572 else
3574 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3575 fd->loops[i].v, fd->loops[i].n1);
3576 t = fold_convert (fd->iter_type, t);
3577 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3578 true, GSI_SAME_STMT);
3580 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3581 size_int (i - fd->collapse + 1),
3582 NULL_TREE, NULL_TREE);
3583 expand_omp_build_assign (&gsi, aref, t);
3584 gsi_prev (&gsi);
3585 e2 = split_block (cont_bb, gsi_stmt (gsi));
3586 new_header = e2->dest;
3588 else
3589 new_header = cont_bb;
3590 gsi = gsi_after_labels (new_header);
3591 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3592 true, GSI_SAME_STMT);
3593 tree n2
3594 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3595 true, NULL_TREE, true, GSI_SAME_STMT);
3596 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3597 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3598 edge e3 = split_block (new_header, gsi_stmt (gsi));
3599 cont_bb = e3->dest;
3600 remove_edge (e1);
3601 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3602 e3->flags = EDGE_FALSE_VALUE;
3603 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3604 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3605 e1->probability = e3->probability.invert ();
3607 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3608 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3610 if (e2)
3612 class loop *loop = alloc_loop ();
3613 loop->header = new_header;
3614 loop->latch = e2->src;
3615 add_loop (loop, body_bb->loop_father);
3619 /* If there are any lastprivate clauses and it is possible some loops
3620 might have zero iterations, ensure all the decls are initialized,
3621 otherwise we could crash evaluating C++ class iterators with lastprivate
3622 clauses. */
3623 bool need_inits = false;
3624 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3625 if (need_inits)
3627 tree type = TREE_TYPE (fd->loops[i].v);
3628 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3629 expand_omp_build_assign (&gsi, fd->loops[i].v,
3630 fold_convert (type, fd->loops[i].n1));
3632 else
3634 tree type = TREE_TYPE (fd->loops[i].v);
3635 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3636 boolean_type_node,
3637 fold_convert (type, fd->loops[i].n1),
3638 fold_convert (type, fd->loops[i].n2));
3639 if (!integer_onep (this_cond))
3640 need_inits = true;
3643 return cont_bb;
3646 /* A subroutine of expand_omp_for. Generate code for a parallel
3647 loop with any schedule. Given parameters:
3649 for (V = N1; V cond N2; V += STEP) BODY;
3651 where COND is "<" or ">", we generate pseudocode
3653 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3654 if (more) goto L0; else goto L3;
3656 V = istart0;
3657 iend = iend0;
3659 BODY;
3660 V += STEP;
3661 if (V cond iend) goto L1; else goto L2;
3663 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3666 If this is a combined omp parallel loop, instead of the call to
3667 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3668 If this is gimple_omp_for_combined_p loop, then instead of assigning
3669 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3670 inner GIMPLE_OMP_FOR and V += STEP; and
3671 if (V cond iend) goto L1; else goto L2; are removed.
3673 For collapsed loops, given parameters:
3674 collapse(3)
3675 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3676 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3677 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3678 BODY;
3680 we generate pseudocode
3682 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3683 if (cond3 is <)
3684 adj = STEP3 - 1;
3685 else
3686 adj = STEP3 + 1;
3687 count3 = (adj + N32 - N31) / STEP3;
3688 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3689 if (cond2 is <)
3690 adj = STEP2 - 1;
3691 else
3692 adj = STEP2 + 1;
3693 count2 = (adj + N22 - N21) / STEP2;
3694 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3695 if (cond1 is <)
3696 adj = STEP1 - 1;
3697 else
3698 adj = STEP1 + 1;
3699 count1 = (adj + N12 - N11) / STEP1;
3700 count = count1 * count2 * count3;
3701 goto Z1;
3703 count = 0;
3705 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3706 if (more) goto L0; else goto L3;
3708 V = istart0;
3709 T = V;
3710 V3 = N31 + (T % count3) * STEP3;
3711 T = T / count3;
3712 V2 = N21 + (T % count2) * STEP2;
3713 T = T / count2;
3714 V1 = N11 + T * STEP1;
3715 iend = iend0;
3717 BODY;
3718 V += 1;
3719 if (V < iend) goto L10; else goto L2;
3720 L10:
3721 V3 += STEP3;
3722 if (V3 cond3 N32) goto L1; else goto L11;
3723 L11:
3724 V3 = N31;
3725 V2 += STEP2;
3726 if (V2 cond2 N22) goto L1; else goto L12;
3727 L12:
3728 V2 = N21;
3729 V1 += STEP1;
3730 goto L1;
3732 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3737 static void
3738 expand_omp_for_generic (struct omp_region *region,
3739 struct omp_for_data *fd,
3740 enum built_in_function start_fn,
3741 enum built_in_function next_fn,
3742 tree sched_arg,
3743 gimple *inner_stmt)
3745 tree type, istart0, iend0, iend;
3746 tree t, vmain, vback, bias = NULL_TREE;
3747 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3748 basic_block l2_bb = NULL, l3_bb = NULL;
3749 gimple_stmt_iterator gsi;
3750 gassign *assign_stmt;
3751 bool in_combined_parallel = is_combined_parallel (region);
3752 bool broken_loop = region->cont == NULL;
3753 edge e, ne;
3754 tree *counts = NULL;
3755 int i;
3756 bool ordered_lastprivate = false;
3758 gcc_assert (!broken_loop || !in_combined_parallel);
3759 gcc_assert (fd->iter_type == long_integer_type_node
3760 || !in_combined_parallel);
3762 entry_bb = region->entry;
3763 cont_bb = region->cont;
3764 collapse_bb = NULL;
3765 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3766 gcc_assert (broken_loop
3767 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3768 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3769 l1_bb = single_succ (l0_bb);
3770 if (!broken_loop)
3772 l2_bb = create_empty_bb (cont_bb);
3773 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3774 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3775 == l1_bb));
3776 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3778 else
3779 l2_bb = NULL;
3780 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3781 exit_bb = region->exit;
3783 gsi = gsi_last_nondebug_bb (entry_bb);
3785 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3786 if (fd->ordered
3787 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3788 OMP_CLAUSE_LASTPRIVATE))
3789 ordered_lastprivate = false;
3790 tree reductions = NULL_TREE;
3791 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3792 tree memv = NULL_TREE;
3793 if (fd->lastprivate_conditional)
3795 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3796 OMP_CLAUSE__CONDTEMP_);
3797 if (fd->have_pointer_condtemp)
3798 condtemp = OMP_CLAUSE_DECL (c);
3799 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3800 cond_var = OMP_CLAUSE_DECL (c);
3802 if (sched_arg)
3804 if (fd->have_reductemp)
3806 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3807 OMP_CLAUSE__REDUCTEMP_);
3808 reductions = OMP_CLAUSE_DECL (c);
3809 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3810 gimple *g = SSA_NAME_DEF_STMT (reductions);
3811 reductions = gimple_assign_rhs1 (g);
3812 OMP_CLAUSE_DECL (c) = reductions;
3813 entry_bb = gimple_bb (g);
3814 edge e = split_block (entry_bb, g);
3815 if (region->entry == entry_bb)
3816 region->entry = e->dest;
3817 gsi = gsi_last_bb (entry_bb);
3819 else
3820 reductions = null_pointer_node;
3821 if (fd->have_pointer_condtemp)
3823 tree type = TREE_TYPE (condtemp);
3824 memv = create_tmp_var (type);
3825 TREE_ADDRESSABLE (memv) = 1;
3826 unsigned HOST_WIDE_INT sz
3827 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3828 sz *= fd->lastprivate_conditional;
3829 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3830 false);
3831 mem = build_fold_addr_expr (memv);
3833 else
3834 mem = null_pointer_node;
3836 if (fd->collapse > 1 || fd->ordered)
3838 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3839 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3841 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3842 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3843 zero_iter1_bb, first_zero_iter1,
3844 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3846 if (zero_iter1_bb)
3848 /* Some counts[i] vars might be uninitialized if
3849 some loop has zero iterations. But the body shouldn't
3850 be executed in that case, so just avoid uninit warnings. */
3851 for (i = first_zero_iter1;
3852 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3853 if (SSA_VAR_P (counts[i]))
3854 suppress_warning (counts[i], OPT_Wuninitialized);
3855 gsi_prev (&gsi);
3856 e = split_block (entry_bb, gsi_stmt (gsi));
3857 entry_bb = e->dest;
3858 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3859 gsi = gsi_last_nondebug_bb (entry_bb);
3860 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3861 get_immediate_dominator (CDI_DOMINATORS,
3862 zero_iter1_bb));
3864 if (zero_iter2_bb)
3866 /* Some counts[i] vars might be uninitialized if
3867 some loop has zero iterations. But the body shouldn't
3868 be executed in that case, so just avoid uninit warnings. */
3869 for (i = first_zero_iter2; i < fd->ordered; i++)
3870 if (SSA_VAR_P (counts[i]))
3871 suppress_warning (counts[i], OPT_Wuninitialized);
3872 if (zero_iter1_bb)
3873 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3874 else
3876 gsi_prev (&gsi);
3877 e = split_block (entry_bb, gsi_stmt (gsi));
3878 entry_bb = e->dest;
3879 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3880 gsi = gsi_last_nondebug_bb (entry_bb);
3881 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3882 get_immediate_dominator
3883 (CDI_DOMINATORS, zero_iter2_bb));
3886 if (fd->collapse == 1)
3888 counts[0] = fd->loop.n2;
3889 fd->loop = fd->loops[0];
3893 type = TREE_TYPE (fd->loop.v);
3894 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3895 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3896 TREE_ADDRESSABLE (istart0) = 1;
3897 TREE_ADDRESSABLE (iend0) = 1;
3899 /* See if we need to bias by LLONG_MIN. */
3900 if (fd->iter_type == long_long_unsigned_type_node
3901 && TREE_CODE (type) == INTEGER_TYPE
3902 && !TYPE_UNSIGNED (type)
3903 && fd->ordered == 0)
3905 tree n1, n2;
3907 if (fd->loop.cond_code == LT_EXPR)
3909 n1 = fd->loop.n1;
3910 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3912 else
3914 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3915 n2 = fd->loop.n1;
3917 if (TREE_CODE (n1) != INTEGER_CST
3918 || TREE_CODE (n2) != INTEGER_CST
3919 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3920 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3923 gimple_stmt_iterator gsif = gsi;
3924 gsi_prev (&gsif);
3926 tree arr = NULL_TREE;
3927 if (in_combined_parallel)
3929 gcc_assert (fd->ordered == 0);
3930 /* In a combined parallel loop, emit a call to
3931 GOMP_loop_foo_next. */
3932 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3933 build_fold_addr_expr (istart0),
3934 build_fold_addr_expr (iend0));
3936 else
3938 tree t0, t1, t2, t3, t4;
3939 /* If this is not a combined parallel loop, emit a call to
3940 GOMP_loop_foo_start in ENTRY_BB. */
3941 t4 = build_fold_addr_expr (iend0);
3942 t3 = build_fold_addr_expr (istart0);
3943 if (fd->ordered)
3945 t0 = build_int_cst (unsigned_type_node,
3946 fd->ordered - fd->collapse + 1);
3947 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3948 fd->ordered
3949 - fd->collapse + 1),
3950 ".omp_counts");
3951 DECL_NAMELESS (arr) = 1;
3952 TREE_ADDRESSABLE (arr) = 1;
3953 TREE_STATIC (arr) = 1;
3954 vec<constructor_elt, va_gc> *v;
3955 vec_alloc (v, fd->ordered - fd->collapse + 1);
3956 int idx;
3958 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3960 tree c;
3961 if (idx == 0 && fd->collapse > 1)
3962 c = fd->loop.n2;
3963 else
3964 c = counts[idx + fd->collapse - 1];
3965 tree purpose = size_int (idx);
3966 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3967 if (TREE_CODE (c) != INTEGER_CST)
3968 TREE_STATIC (arr) = 0;
3971 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3972 if (!TREE_STATIC (arr))
3973 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3974 void_type_node, arr),
3975 true, NULL_TREE, true, GSI_SAME_STMT);
3976 t1 = build_fold_addr_expr (arr);
3977 t2 = NULL_TREE;
3979 else
3981 t2 = fold_convert (fd->iter_type, fd->loop.step);
3982 t1 = fd->loop.n2;
3983 t0 = fd->loop.n1;
3984 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3986 tree innerc
3987 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3988 OMP_CLAUSE__LOOPTEMP_);
3989 gcc_assert (innerc);
3990 t0 = OMP_CLAUSE_DECL (innerc);
3991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3992 OMP_CLAUSE__LOOPTEMP_);
3993 gcc_assert (innerc);
3994 t1 = OMP_CLAUSE_DECL (innerc);
3996 if (POINTER_TYPE_P (TREE_TYPE (t0))
3997 && TYPE_PRECISION (TREE_TYPE (t0))
3998 != TYPE_PRECISION (fd->iter_type))
4000 /* Avoid casting pointers to integer of a different size. */
4001 tree itype = signed_type_for (type);
4002 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4003 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4005 else
4007 t1 = fold_convert (fd->iter_type, t1);
4008 t0 = fold_convert (fd->iter_type, t0);
4010 if (bias)
4012 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4013 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4016 if (fd->iter_type == long_integer_type_node || fd->ordered)
4018 if (fd->chunk_size)
4020 t = fold_convert (fd->iter_type, fd->chunk_size);
4021 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4022 if (sched_arg)
4024 if (fd->ordered)
4025 t = build_call_expr (builtin_decl_explicit (start_fn),
4026 8, t0, t1, sched_arg, t, t3, t4,
4027 reductions, mem);
4028 else
4029 t = build_call_expr (builtin_decl_explicit (start_fn),
4030 9, t0, t1, t2, sched_arg, t, t3, t4,
4031 reductions, mem);
4033 else if (fd->ordered)
4034 t = build_call_expr (builtin_decl_explicit (start_fn),
4035 5, t0, t1, t, t3, t4);
4036 else
4037 t = build_call_expr (builtin_decl_explicit (start_fn),
4038 6, t0, t1, t2, t, t3, t4);
4040 else if (fd->ordered)
4041 t = build_call_expr (builtin_decl_explicit (start_fn),
4042 4, t0, t1, t3, t4);
4043 else
4044 t = build_call_expr (builtin_decl_explicit (start_fn),
4045 5, t0, t1, t2, t3, t4);
4047 else
4049 tree t5;
4050 tree c_bool_type;
4051 tree bfn_decl;
4053 /* The GOMP_loop_ull_*start functions have additional boolean
4054 argument, true for < loops and false for > loops.
4055 In Fortran, the C bool type can be different from
4056 boolean_type_node. */
4057 bfn_decl = builtin_decl_explicit (start_fn);
4058 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4059 t5 = build_int_cst (c_bool_type,
4060 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4061 if (fd->chunk_size)
4063 tree bfn_decl = builtin_decl_explicit (start_fn);
4064 t = fold_convert (fd->iter_type, fd->chunk_size);
4065 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4066 if (sched_arg)
4067 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4068 t, t3, t4, reductions, mem);
4069 else
4070 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4072 else
4073 t = build_call_expr (builtin_decl_explicit (start_fn),
4074 6, t5, t0, t1, t2, t3, t4);
4077 if (TREE_TYPE (t) != boolean_type_node)
4078 t = fold_build2 (NE_EXPR, boolean_type_node,
4079 t, build_int_cst (TREE_TYPE (t), 0));
4080 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4081 true, GSI_SAME_STMT);
4082 if (arr && !TREE_STATIC (arr))
4084 tree clobber = build_clobber (TREE_TYPE (arr));
4085 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4086 GSI_SAME_STMT);
4088 if (fd->have_pointer_condtemp)
4089 expand_omp_build_assign (&gsi, condtemp, memv, false);
4090 if (fd->have_reductemp)
4092 gimple *g = gsi_stmt (gsi);
4093 gsi_remove (&gsi, true);
4094 release_ssa_name (gimple_assign_lhs (g));
4096 entry_bb = region->entry;
4097 gsi = gsi_last_nondebug_bb (entry_bb);
4099 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4101 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4103 /* Remove the GIMPLE_OMP_FOR statement. */
4104 gsi_remove (&gsi, true);
4106 if (gsi_end_p (gsif))
4107 gsif = gsi_after_labels (gsi_bb (gsif));
4108 gsi_next (&gsif);
4110 /* Iteration setup for sequential loop goes in L0_BB. */
4111 tree startvar = fd->loop.v;
4112 tree endvar = NULL_TREE;
4114 if (gimple_omp_for_combined_p (fd->for_stmt))
4116 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4117 && gimple_omp_for_kind (inner_stmt)
4118 == GF_OMP_FOR_KIND_SIMD);
4119 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4120 OMP_CLAUSE__LOOPTEMP_);
4121 gcc_assert (innerc);
4122 startvar = OMP_CLAUSE_DECL (innerc);
4123 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4124 OMP_CLAUSE__LOOPTEMP_);
4125 gcc_assert (innerc);
4126 endvar = OMP_CLAUSE_DECL (innerc);
4129 gsi = gsi_start_bb (l0_bb);
4130 t = istart0;
4131 if (fd->ordered && fd->collapse == 1)
4132 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4133 fold_convert (fd->iter_type, fd->loop.step));
4134 else if (bias)
4135 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4136 if (fd->ordered && fd->collapse == 1)
4138 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4139 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4140 fd->loop.n1, fold_convert (sizetype, t));
4141 else
4143 t = fold_convert (TREE_TYPE (startvar), t);
4144 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4145 fd->loop.n1, t);
4148 else
4150 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4151 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4152 t = fold_convert (TREE_TYPE (startvar), t);
4154 t = force_gimple_operand_gsi (&gsi, t,
4155 DECL_P (startvar)
4156 && TREE_ADDRESSABLE (startvar),
4157 NULL_TREE, false, GSI_CONTINUE_LINKING);
4158 assign_stmt = gimple_build_assign (startvar, t);
4159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4160 if (cond_var)
4162 tree itype = TREE_TYPE (cond_var);
4163 /* For lastprivate(conditional:) itervar, we need some iteration
4164 counter that starts at unsigned non-zero and increases.
4165 Prefer as few IVs as possible, so if we can use startvar
4166 itself, use that, or startvar + constant (those would be
4167 incremented with step), and as last resort use the s0 + 1
4168 incremented by 1. */
4169 if ((fd->ordered && fd->collapse == 1)
4170 || bias
4171 || POINTER_TYPE_P (type)
4172 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4173 || fd->loop.cond_code != LT_EXPR)
4174 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4175 build_int_cst (itype, 1));
4176 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4177 t = fold_convert (itype, t);
4178 else
4180 tree c = fold_convert (itype, fd->loop.n1);
4181 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4182 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4184 t = force_gimple_operand_gsi (&gsi, t, false,
4185 NULL_TREE, false, GSI_CONTINUE_LINKING);
4186 assign_stmt = gimple_build_assign (cond_var, t);
4187 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4190 t = iend0;
4191 if (fd->ordered && fd->collapse == 1)
4192 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4193 fold_convert (fd->iter_type, fd->loop.step));
4194 else if (bias)
4195 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4196 if (fd->ordered && fd->collapse == 1)
4198 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4199 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4200 fd->loop.n1, fold_convert (sizetype, t));
4201 else
4203 t = fold_convert (TREE_TYPE (startvar), t);
4204 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4205 fd->loop.n1, t);
4208 else
4210 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4211 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4212 t = fold_convert (TREE_TYPE (startvar), t);
4214 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4215 false, GSI_CONTINUE_LINKING);
4216 if (endvar)
4218 assign_stmt = gimple_build_assign (endvar, iend);
4219 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4220 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4221 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4222 else
4223 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4224 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4226 /* Handle linear clause adjustments. */
4227 tree itercnt = NULL_TREE;
4228 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4229 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4230 c; c = OMP_CLAUSE_CHAIN (c))
4231 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4232 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4234 tree d = OMP_CLAUSE_DECL (c);
4235 tree t = d, a, dest;
4236 if (omp_privatize_by_reference (t))
4237 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4238 tree type = TREE_TYPE (t);
4239 if (POINTER_TYPE_P (type))
4240 type = sizetype;
4241 dest = unshare_expr (t);
4242 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4243 expand_omp_build_assign (&gsif, v, t);
4244 if (itercnt == NULL_TREE)
4246 itercnt = startvar;
4247 tree n1 = fd->loop.n1;
4248 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4250 itercnt
4251 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4252 itercnt);
4253 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4255 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4256 itercnt, n1);
4257 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4258 itercnt, fd->loop.step);
4259 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4260 NULL_TREE, false,
4261 GSI_CONTINUE_LINKING);
4263 a = fold_build2 (MULT_EXPR, type,
4264 fold_convert (type, itercnt),
4265 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4266 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4267 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4268 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4269 false, GSI_CONTINUE_LINKING);
4270 expand_omp_build_assign (&gsi, dest, t, true);
4272 if (fd->collapse > 1)
4273 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4275 if (fd->ordered)
4277 /* Until now, counts array contained number of iterations or
4278 variable containing it for ith loop. From now on, we need
4279 those counts only for collapsed loops, and only for the 2nd
4280 till the last collapsed one. Move those one element earlier,
4281 we'll use counts[fd->collapse - 1] for the first source/sink
4282 iteration counter and so on and counts[fd->ordered]
4283 as the array holding the current counter values for
4284 depend(source). */
4285 if (fd->collapse > 1)
4286 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4287 if (broken_loop)
4289 int i;
4290 for (i = fd->collapse; i < fd->ordered; i++)
4292 tree type = TREE_TYPE (fd->loops[i].v);
4293 tree this_cond
4294 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4295 fold_convert (type, fd->loops[i].n1),
4296 fold_convert (type, fd->loops[i].n2));
4297 if (!integer_onep (this_cond))
4298 break;
4300 if (i < fd->ordered)
4302 cont_bb
4303 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4304 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4305 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4306 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4307 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4308 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4309 make_edge (cont_bb, l1_bb, 0);
4310 l2_bb = create_empty_bb (cont_bb);
4311 broken_loop = false;
4314 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4315 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4316 ordered_lastprivate);
4317 if (counts[fd->collapse - 1])
4319 gcc_assert (fd->collapse == 1);
4320 gsi = gsi_last_bb (l0_bb);
4321 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4322 istart0, true);
4323 if (cont_bb)
4325 gsi = gsi_last_bb (cont_bb);
4326 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4327 counts[fd->collapse - 1],
4328 build_int_cst (fd->iter_type, 1));
4329 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4330 tree aref = build4 (ARRAY_REF, fd->iter_type,
4331 counts[fd->ordered], size_zero_node,
4332 NULL_TREE, NULL_TREE);
4333 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4335 t = counts[fd->collapse - 1];
4337 else if (fd->collapse > 1)
4338 t = fd->loop.v;
4339 else
4341 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4342 fd->loops[0].v, fd->loops[0].n1);
4343 t = fold_convert (fd->iter_type, t);
4345 gsi = gsi_last_bb (l0_bb);
4346 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4347 size_zero_node, NULL_TREE, NULL_TREE);
4348 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4349 false, GSI_CONTINUE_LINKING);
4350 expand_omp_build_assign (&gsi, aref, t, true);
4353 if (!broken_loop)
4355 /* Code to control the increment and predicate for the sequential
4356 loop goes in the CONT_BB. */
4357 gsi = gsi_last_nondebug_bb (cont_bb);
4358 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4359 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4360 vmain = gimple_omp_continue_control_use (cont_stmt);
4361 vback = gimple_omp_continue_control_def (cont_stmt);
4363 if (cond_var)
4365 tree itype = TREE_TYPE (cond_var);
4366 tree t2;
4367 if ((fd->ordered && fd->collapse == 1)
4368 || bias
4369 || POINTER_TYPE_P (type)
4370 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4371 || fd->loop.cond_code != LT_EXPR)
4372 t2 = build_int_cst (itype, 1);
4373 else
4374 t2 = fold_convert (itype, fd->loop.step);
4375 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4376 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4377 NULL_TREE, true, GSI_SAME_STMT);
4378 assign_stmt = gimple_build_assign (cond_var, t2);
4379 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4382 if (!gimple_omp_for_combined_p (fd->for_stmt))
4384 if (POINTER_TYPE_P (type))
4385 t = fold_build_pointer_plus (vmain, fd->loop.step);
4386 else
4387 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4388 t = force_gimple_operand_gsi (&gsi, t,
4389 DECL_P (vback)
4390 && TREE_ADDRESSABLE (vback),
4391 NULL_TREE, true, GSI_SAME_STMT);
4392 assign_stmt = gimple_build_assign (vback, t);
4393 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4395 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4397 tree tem;
4398 if (fd->collapse > 1)
4399 tem = fd->loop.v;
4400 else
4402 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4403 fd->loops[0].v, fd->loops[0].n1);
4404 tem = fold_convert (fd->iter_type, tem);
4406 tree aref = build4 (ARRAY_REF, fd->iter_type,
4407 counts[fd->ordered], size_zero_node,
4408 NULL_TREE, NULL_TREE);
4409 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4410 true, GSI_SAME_STMT);
4411 expand_omp_build_assign (&gsi, aref, tem);
4414 t = build2 (fd->loop.cond_code, boolean_type_node,
4415 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4416 iend);
4417 gcond *cond_stmt = gimple_build_cond_empty (t);
4418 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4421 /* Remove GIMPLE_OMP_CONTINUE. */
4422 gsi_remove (&gsi, true);
4424 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4425 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4427 /* Emit code to get the next parallel iteration in L2_BB. */
4428 gsi = gsi_start_bb (l2_bb);
4430 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4431 build_fold_addr_expr (istart0),
4432 build_fold_addr_expr (iend0));
4433 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4434 false, GSI_CONTINUE_LINKING);
4435 if (TREE_TYPE (t) != boolean_type_node)
4436 t = fold_build2 (NE_EXPR, boolean_type_node,
4437 t, build_int_cst (TREE_TYPE (t), 0));
4438 gcond *cond_stmt = gimple_build_cond_empty (t);
4439 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4442 /* Add the loop cleanup function. */
4443 gsi = gsi_last_nondebug_bb (exit_bb);
4444 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4445 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4446 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4447 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4448 else
4449 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4450 gcall *call_stmt = gimple_build_call (t, 0);
4451 if (fd->ordered)
4453 tree arr = counts[fd->ordered];
4454 tree clobber = build_clobber (TREE_TYPE (arr));
4455 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4456 GSI_SAME_STMT);
4458 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4460 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4461 if (fd->have_reductemp)
4463 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4464 gimple_call_lhs (call_stmt));
4465 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4468 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4469 gsi_remove (&gsi, true);
4471 /* Connect the new blocks. */
4472 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4473 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4475 if (!broken_loop)
4477 gimple_seq phis;
4479 e = find_edge (cont_bb, l3_bb);
4480 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4482 phis = phi_nodes (l3_bb);
4483 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4485 gimple *phi = gsi_stmt (gsi);
4486 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4487 PHI_ARG_DEF_FROM_EDGE (phi, e));
4489 remove_edge (e);
4491 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4492 e = find_edge (cont_bb, l1_bb);
4493 if (e == NULL)
4495 e = BRANCH_EDGE (cont_bb);
4496 gcc_assert (single_succ (e->dest) == l1_bb);
4498 if (gimple_omp_for_combined_p (fd->for_stmt))
4500 remove_edge (e);
4501 e = NULL;
4503 else if (fd->collapse > 1)
4505 remove_edge (e);
4506 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4508 else
4509 e->flags = EDGE_TRUE_VALUE;
4510 if (e)
4512 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4513 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4515 else
4517 e = find_edge (cont_bb, l2_bb);
4518 e->flags = EDGE_FALLTHRU;
4520 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4522 if (gimple_in_ssa_p (cfun))
4524 /* Add phis to the outer loop that connect to the phis in the inner,
4525 original loop, and move the loop entry value of the inner phi to
4526 the loop entry value of the outer phi. */
4527 gphi_iterator psi;
4528 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4530 location_t locus;
4531 gphi *nphi;
4532 gphi *exit_phi = psi.phi ();
4534 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4535 continue;
4537 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4538 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4540 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4541 edge latch_to_l1 = find_edge (latch, l1_bb);
4542 gphi *inner_phi
4543 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4545 tree t = gimple_phi_result (exit_phi);
4546 tree new_res = copy_ssa_name (t, NULL);
4547 nphi = create_phi_node (new_res, l0_bb);
4549 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4550 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4551 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4552 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4553 add_phi_arg (nphi, t, entry_to_l0, locus);
4555 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4556 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4558 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4562 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4563 recompute_dominator (CDI_DOMINATORS, l2_bb));
4564 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4565 recompute_dominator (CDI_DOMINATORS, l3_bb));
4566 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4567 recompute_dominator (CDI_DOMINATORS, l0_bb));
4568 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4569 recompute_dominator (CDI_DOMINATORS, l1_bb));
4571 /* We enter expand_omp_for_generic with a loop. This original loop may
4572 have its own loop struct, or it may be part of an outer loop struct
4573 (which may be the fake loop). */
4574 class loop *outer_loop = entry_bb->loop_father;
4575 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4577 add_bb_to_loop (l2_bb, outer_loop);
4579 /* We've added a new loop around the original loop. Allocate the
4580 corresponding loop struct. */
4581 class loop *new_loop = alloc_loop ();
4582 new_loop->header = l0_bb;
4583 new_loop->latch = l2_bb;
4584 add_loop (new_loop, outer_loop);
4586 /* Allocate a loop structure for the original loop unless we already
4587 had one. */
4588 if (!orig_loop_has_loop_struct
4589 && !gimple_omp_for_combined_p (fd->for_stmt))
4591 class loop *orig_loop = alloc_loop ();
4592 orig_loop->header = l1_bb;
4593 /* The loop may have multiple latches. */
4594 add_loop (orig_loop, new_loop);
4599 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4600 compute needed allocation size. If !ALLOC of team allocations,
4601 if ALLOC of thread allocation. SZ is the initial needed size for
4602 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4603 CNT number of elements of each array, for !ALLOC this is
4604 omp_get_num_threads (), for ALLOC number of iterations handled by the
4605 current thread. If PTR is non-NULL, it is the start of the allocation
4606 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4607 clauses pointers to the corresponding arrays. */
4609 static tree
4610 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4611 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4612 gimple_stmt_iterator *gsi, bool alloc)
4614 tree eltsz = NULL_TREE;
4615 unsigned HOST_WIDE_INT preval = 0;
4616 if (ptr && sz)
4617 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4618 ptr, size_int (sz));
4619 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4620 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4621 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4622 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4624 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4625 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4626 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4628 unsigned HOST_WIDE_INT szl
4629 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4630 szl = least_bit_hwi (szl);
4631 if (szl)
4632 al = MIN (al, szl);
4634 if (ptr == NULL_TREE)
4636 if (eltsz == NULL_TREE)
4637 eltsz = TYPE_SIZE_UNIT (pointee_type);
4638 else
4639 eltsz = size_binop (PLUS_EXPR, eltsz,
4640 TYPE_SIZE_UNIT (pointee_type));
4642 if (preval == 0 && al <= alloc_align)
4644 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4645 sz += diff;
4646 if (diff && ptr)
4647 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4648 ptr, size_int (diff));
4650 else if (al > preval)
4652 if (ptr)
4654 ptr = fold_convert (pointer_sized_int_node, ptr);
4655 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4656 build_int_cst (pointer_sized_int_node,
4657 al - 1));
4658 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4659 build_int_cst (pointer_sized_int_node,
4660 -(HOST_WIDE_INT) al));
4661 ptr = fold_convert (ptr_type_node, ptr);
4663 else
4664 sz += al - 1;
4666 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4667 preval = al;
4668 else
4669 preval = 1;
4670 if (ptr)
4672 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4673 ptr = OMP_CLAUSE_DECL (c);
4674 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4675 size_binop (MULT_EXPR, cnt,
4676 TYPE_SIZE_UNIT (pointee_type)));
4680 if (ptr == NULL_TREE)
4682 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4683 if (sz)
4684 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4685 return eltsz;
4687 else
4688 return ptr;
4691 /* Return the last _looptemp_ clause if one has been created for
4692 lastprivate on distribute parallel for{, simd} or taskloop.
4693 FD is the loop data and INNERC should be the second _looptemp_
4694 clause (the one holding the end of the range).
4695 This is followed by collapse - 1 _looptemp_ clauses for the
4696 counts[1] and up, and for triangular loops followed by 4
4697 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4698 one factor and one adjn1). After this there is optionally one
4699 _looptemp_ clause that this function returns. */
4701 static tree
4702 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4704 gcc_assert (innerc);
4705 int count = fd->collapse - 1;
4706 if (fd->non_rect
4707 && fd->last_nonrect == fd->first_nonrect + 1
4708 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4709 count += 4;
4710 for (int i = 0; i < count; i++)
4712 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4713 OMP_CLAUSE__LOOPTEMP_);
4714 gcc_assert (innerc);
4716 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4717 OMP_CLAUSE__LOOPTEMP_);
4720 /* A subroutine of expand_omp_for. Generate code for a parallel
4721 loop with static schedule and no specified chunk size. Given
4722 parameters:
4724 for (V = N1; V cond N2; V += STEP) BODY;
4726 where COND is "<" or ">", we generate pseudocode
4728 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4729 if (cond is <)
4730 adj = STEP - 1;
4731 else
4732 adj = STEP + 1;
4733 if ((__typeof (V)) -1 > 0 && cond is >)
4734 n = -(adj + N2 - N1) / -STEP;
4735 else
4736 n = (adj + N2 - N1) / STEP;
4737 q = n / nthreads;
4738 tt = n % nthreads;
4739 if (threadid < tt) goto L3; else goto L4;
4741 tt = 0;
4742 q = q + 1;
4744 s0 = q * threadid + tt;
4745 e0 = s0 + q;
4746 V = s0 * STEP + N1;
4747 if (s0 >= e0) goto L2; else goto L0;
4749 e = e0 * STEP + N1;
4751 BODY;
4752 V += STEP;
4753 if (V cond e) goto L1;
4757 static void
4758 expand_omp_for_static_nochunk (struct omp_region *region,
4759 struct omp_for_data *fd,
4760 gimple *inner_stmt)
4762 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4763 tree type, itype, vmain, vback;
4764 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4765 basic_block body_bb, cont_bb, collapse_bb = NULL;
4766 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4767 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4768 gimple_stmt_iterator gsi, gsip;
4769 edge ep;
4770 bool broken_loop = region->cont == NULL;
4771 tree *counts = NULL;
4772 tree n1, n2, step;
4773 tree reductions = NULL_TREE;
4774 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4776 itype = type = TREE_TYPE (fd->loop.v);
4777 if (POINTER_TYPE_P (type))
4778 itype = signed_type_for (type);
4780 entry_bb = region->entry;
4781 cont_bb = region->cont;
4782 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4783 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4784 gcc_assert (broken_loop
4785 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4786 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4787 body_bb = single_succ (seq_start_bb);
4788 if (!broken_loop)
4790 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4791 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4792 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4794 exit_bb = region->exit;
4796 /* Iteration space partitioning goes in ENTRY_BB. */
4797 gsi = gsi_last_nondebug_bb (entry_bb);
4798 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4799 gsip = gsi;
4800 gsi_prev (&gsip);
4802 if (fd->collapse > 1)
4804 int first_zero_iter = -1, dummy = -1;
4805 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4807 counts = XALLOCAVEC (tree, fd->collapse);
4808 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4809 fin_bb, first_zero_iter,
4810 dummy_bb, dummy, l2_dom_bb);
4811 t = NULL_TREE;
4813 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4814 t = integer_one_node;
4815 else
4816 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4817 fold_convert (type, fd->loop.n1),
4818 fold_convert (type, fd->loop.n2));
4819 if (fd->collapse == 1
4820 && TYPE_UNSIGNED (type)
4821 && (t == NULL_TREE || !integer_onep (t)))
4823 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4824 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4825 true, GSI_SAME_STMT);
4826 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4827 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4828 true, GSI_SAME_STMT);
4829 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4830 NULL_TREE, NULL_TREE);
4831 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4832 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4833 expand_omp_regimplify_p, NULL, NULL)
4834 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4835 expand_omp_regimplify_p, NULL, NULL))
4837 gsi = gsi_for_stmt (cond_stmt);
4838 gimple_regimplify_operands (cond_stmt, &gsi);
4840 ep = split_block (entry_bb, cond_stmt);
4841 ep->flags = EDGE_TRUE_VALUE;
4842 entry_bb = ep->dest;
4843 ep->probability = profile_probability::very_likely ();
4844 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4845 ep->probability = profile_probability::very_unlikely ();
4846 if (gimple_in_ssa_p (cfun))
4848 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4849 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4850 !gsi_end_p (gpi); gsi_next (&gpi))
4852 gphi *phi = gpi.phi ();
4853 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4854 ep, UNKNOWN_LOCATION);
4857 gsi = gsi_last_bb (entry_bb);
4860 if (fd->lastprivate_conditional)
4862 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4863 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4864 if (fd->have_pointer_condtemp)
4865 condtemp = OMP_CLAUSE_DECL (c);
4866 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4867 cond_var = OMP_CLAUSE_DECL (c);
4869 if (fd->have_reductemp
4870 /* For scan, we don't want to reinitialize condtemp before the
4871 second loop. */
4872 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4873 || fd->have_nonctrl_scantemp)
4875 tree t1 = build_int_cst (long_integer_type_node, 0);
4876 tree t2 = build_int_cst (long_integer_type_node, 1);
4877 tree t3 = build_int_cstu (long_integer_type_node,
4878 (HOST_WIDE_INT_1U << 31) + 1);
4879 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4880 gimple_stmt_iterator gsi2 = gsi_none ();
4881 gimple *g = NULL;
4882 tree mem = null_pointer_node, memv = NULL_TREE;
4883 unsigned HOST_WIDE_INT condtemp_sz = 0;
4884 unsigned HOST_WIDE_INT alloc_align = 0;
4885 if (fd->have_reductemp)
4887 gcc_assert (!fd->have_nonctrl_scantemp);
4888 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4889 reductions = OMP_CLAUSE_DECL (c);
4890 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4891 g = SSA_NAME_DEF_STMT (reductions);
4892 reductions = gimple_assign_rhs1 (g);
4893 OMP_CLAUSE_DECL (c) = reductions;
4894 gsi2 = gsi_for_stmt (g);
4896 else
4898 if (gsi_end_p (gsip))
4899 gsi2 = gsi_after_labels (region->entry);
4900 else
4901 gsi2 = gsip;
4902 reductions = null_pointer_node;
4904 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4906 tree type;
4907 if (fd->have_pointer_condtemp)
4908 type = TREE_TYPE (condtemp);
4909 else
4910 type = ptr_type_node;
4911 memv = create_tmp_var (type);
4912 TREE_ADDRESSABLE (memv) = 1;
4913 unsigned HOST_WIDE_INT sz = 0;
4914 tree size = NULL_TREE;
4915 if (fd->have_pointer_condtemp)
4917 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4918 sz *= fd->lastprivate_conditional;
4919 condtemp_sz = sz;
4921 if (fd->have_nonctrl_scantemp)
4923 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4924 gimple *g = gimple_build_call (nthreads, 0);
4925 nthreads = create_tmp_var (integer_type_node);
4926 gimple_call_set_lhs (g, nthreads);
4927 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4928 nthreads = fold_convert (sizetype, nthreads);
4929 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4930 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4931 alloc_align, nthreads, NULL,
4932 false);
4933 size = fold_convert (type, size);
4935 else
4936 size = build_int_cst (type, sz);
4937 expand_omp_build_assign (&gsi2, memv, size, false);
4938 mem = build_fold_addr_expr (memv);
4940 tree t
4941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4942 9, t1, t2, t2, t3, t1, null_pointer_node,
4943 null_pointer_node, reductions, mem);
4944 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4945 true, GSI_SAME_STMT);
4946 if (fd->have_pointer_condtemp)
4947 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4948 if (fd->have_nonctrl_scantemp)
4950 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4951 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4952 alloc_align, nthreads, &gsi2, false);
4954 if (fd->have_reductemp)
4956 gsi_remove (&gsi2, true);
4957 release_ssa_name (gimple_assign_lhs (g));
4960 switch (gimple_omp_for_kind (fd->for_stmt))
4962 case GF_OMP_FOR_KIND_FOR:
4963 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4964 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4965 break;
4966 case GF_OMP_FOR_KIND_DISTRIBUTE:
4967 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4968 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4969 break;
4970 default:
4971 gcc_unreachable ();
4973 nthreads = build_call_expr (nthreads, 0);
4974 nthreads = fold_convert (itype, nthreads);
4975 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4976 true, GSI_SAME_STMT);
4977 threadid = build_call_expr (threadid, 0);
4978 threadid = fold_convert (itype, threadid);
4979 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4980 true, GSI_SAME_STMT);
4982 n1 = fd->loop.n1;
4983 n2 = fd->loop.n2;
4984 step = fd->loop.step;
4985 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4987 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4988 OMP_CLAUSE__LOOPTEMP_);
4989 gcc_assert (innerc);
4990 n1 = OMP_CLAUSE_DECL (innerc);
4991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4992 OMP_CLAUSE__LOOPTEMP_);
4993 gcc_assert (innerc);
4994 n2 = OMP_CLAUSE_DECL (innerc);
4996 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4997 true, NULL_TREE, true, GSI_SAME_STMT);
4998 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4999 true, NULL_TREE, true, GSI_SAME_STMT);
5000 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5001 true, NULL_TREE, true, GSI_SAME_STMT);
5003 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5004 t = fold_build2 (PLUS_EXPR, itype, step, t);
5005 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5006 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5007 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5008 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5009 fold_build1 (NEGATE_EXPR, itype, t),
5010 fold_build1 (NEGATE_EXPR, itype, step));
5011 else
5012 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5013 t = fold_convert (itype, t);
5014 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5016 q = create_tmp_reg (itype, "q");
5017 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5018 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5019 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5021 tt = create_tmp_reg (itype, "tt");
5022 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5023 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5024 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5026 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5027 gcond *cond_stmt = gimple_build_cond_empty (t);
5028 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5030 second_bb = split_block (entry_bb, cond_stmt)->dest;
5031 gsi = gsi_last_nondebug_bb (second_bb);
5032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5034 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5035 GSI_SAME_STMT);
5036 gassign *assign_stmt
5037 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5038 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5040 third_bb = split_block (second_bb, assign_stmt)->dest;
5041 gsi = gsi_last_nondebug_bb (third_bb);
5042 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5044 if (fd->have_nonctrl_scantemp)
5046 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5047 tree controlp = NULL_TREE, controlb = NULL_TREE;
5048 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5049 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5050 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5052 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5053 controlb = OMP_CLAUSE_DECL (c);
5054 else
5055 controlp = OMP_CLAUSE_DECL (c);
5056 if (controlb && controlp)
5057 break;
5059 gcc_assert (controlp && controlb);
5060 tree cnt = create_tmp_var (sizetype);
5061 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5062 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5063 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5064 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5065 alloc_align, cnt, NULL, true);
5066 tree size = create_tmp_var (sizetype);
5067 expand_omp_build_assign (&gsi, size, sz, false);
5068 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5069 size, size_int (16384));
5070 expand_omp_build_assign (&gsi, controlb, cmp);
5071 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5072 NULL_TREE, NULL_TREE);
5073 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5074 fourth_bb = split_block (third_bb, g)->dest;
5075 gsi = gsi_last_nondebug_bb (fourth_bb);
5076 /* FIXME: Once we have allocators, this should use allocator. */
5077 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5078 gimple_call_set_lhs (g, controlp);
5079 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5080 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5081 &gsi, true);
5082 gsi_prev (&gsi);
5083 g = gsi_stmt (gsi);
5084 fifth_bb = split_block (fourth_bb, g)->dest;
5085 gsi = gsi_last_nondebug_bb (fifth_bb);
5087 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5088 gimple_call_set_lhs (g, controlp);
5089 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5090 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5091 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5092 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5093 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5095 tree tmp = create_tmp_var (sizetype);
5096 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5097 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5098 TYPE_SIZE_UNIT (pointee_type));
5099 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5100 g = gimple_build_call (alloca_decl, 2, tmp,
5101 size_int (TYPE_ALIGN (pointee_type)));
5102 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5103 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5106 sixth_bb = split_block (fifth_bb, g)->dest;
5107 gsi = gsi_last_nondebug_bb (sixth_bb);
5110 t = build2 (MULT_EXPR, itype, q, threadid);
5111 t = build2 (PLUS_EXPR, itype, t, tt);
5112 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5114 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5115 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5117 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5118 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5120 /* Remove the GIMPLE_OMP_FOR statement. */
5121 gsi_remove (&gsi, true);
5123 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5124 gsi = gsi_start_bb (seq_start_bb);
5126 tree startvar = fd->loop.v;
5127 tree endvar = NULL_TREE;
5129 if (gimple_omp_for_combined_p (fd->for_stmt))
5131 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5132 ? gimple_omp_parallel_clauses (inner_stmt)
5133 : gimple_omp_for_clauses (inner_stmt);
5134 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5135 gcc_assert (innerc);
5136 startvar = OMP_CLAUSE_DECL (innerc);
5137 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5138 OMP_CLAUSE__LOOPTEMP_);
5139 gcc_assert (innerc);
5140 endvar = OMP_CLAUSE_DECL (innerc);
5141 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5142 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5144 innerc = find_lastprivate_looptemp (fd, innerc);
5145 if (innerc)
5147 /* If needed (distribute parallel for with lastprivate),
5148 propagate down the total number of iterations. */
5149 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5150 fd->loop.n2);
5151 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5152 GSI_CONTINUE_LINKING);
5153 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5154 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5158 t = fold_convert (itype, s0);
5159 t = fold_build2 (MULT_EXPR, itype, t, step);
5160 if (POINTER_TYPE_P (type))
5162 t = fold_build_pointer_plus (n1, t);
5163 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5164 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5165 t = fold_convert (signed_type_for (type), t);
5167 else
5168 t = fold_build2 (PLUS_EXPR, type, t, n1);
5169 t = fold_convert (TREE_TYPE (startvar), t);
5170 t = force_gimple_operand_gsi (&gsi, t,
5171 DECL_P (startvar)
5172 && TREE_ADDRESSABLE (startvar),
5173 NULL_TREE, false, GSI_CONTINUE_LINKING);
5174 assign_stmt = gimple_build_assign (startvar, t);
5175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5176 if (cond_var)
5178 tree itype = TREE_TYPE (cond_var);
5179 /* For lastprivate(conditional:) itervar, we need some iteration
5180 counter that starts at unsigned non-zero and increases.
5181 Prefer as few IVs as possible, so if we can use startvar
5182 itself, use that, or startvar + constant (those would be
5183 incremented with step), and as last resort use the s0 + 1
5184 incremented by 1. */
5185 if (POINTER_TYPE_P (type)
5186 || TREE_CODE (n1) != INTEGER_CST
5187 || fd->loop.cond_code != LT_EXPR)
5188 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5189 build_int_cst (itype, 1));
5190 else if (tree_int_cst_sgn (n1) == 1)
5191 t = fold_convert (itype, t);
5192 else
5194 tree c = fold_convert (itype, n1);
5195 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5196 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5198 t = force_gimple_operand_gsi (&gsi, t, false,
5199 NULL_TREE, false, GSI_CONTINUE_LINKING);
5200 assign_stmt = gimple_build_assign (cond_var, t);
5201 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5204 t = fold_convert (itype, e0);
5205 t = fold_build2 (MULT_EXPR, itype, t, step);
5206 if (POINTER_TYPE_P (type))
5208 t = fold_build_pointer_plus (n1, t);
5209 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5210 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5211 t = fold_convert (signed_type_for (type), t);
5213 else
5214 t = fold_build2 (PLUS_EXPR, type, t, n1);
5215 t = fold_convert (TREE_TYPE (startvar), t);
5216 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5217 false, GSI_CONTINUE_LINKING);
5218 if (endvar)
5220 assign_stmt = gimple_build_assign (endvar, e);
5221 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5222 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5223 assign_stmt = gimple_build_assign (fd->loop.v, e);
5224 else
5225 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5226 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5228 /* Handle linear clause adjustments. */
5229 tree itercnt = NULL_TREE;
5230 tree *nonrect_bounds = NULL;
5231 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5232 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5233 c; c = OMP_CLAUSE_CHAIN (c))
5234 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5235 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5237 tree d = OMP_CLAUSE_DECL (c);
5238 tree t = d, a, dest;
5239 if (omp_privatize_by_reference (t))
5240 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5241 if (itercnt == NULL_TREE)
5243 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5245 itercnt = fold_build2 (MINUS_EXPR, itype,
5246 fold_convert (itype, n1),
5247 fold_convert (itype, fd->loop.n1));
5248 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5249 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5250 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5251 NULL_TREE, false,
5252 GSI_CONTINUE_LINKING);
5254 else
5255 itercnt = s0;
5257 tree type = TREE_TYPE (t);
5258 if (POINTER_TYPE_P (type))
5259 type = sizetype;
5260 a = fold_build2 (MULT_EXPR, type,
5261 fold_convert (type, itercnt),
5262 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5263 dest = unshare_expr (t);
5264 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5265 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5266 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5267 false, GSI_CONTINUE_LINKING);
5268 expand_omp_build_assign (&gsi, dest, t, true);
5270 if (fd->collapse > 1)
5272 if (fd->non_rect)
5274 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5275 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5277 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5278 startvar);
5281 if (!broken_loop)
5283 /* The code controlling the sequential loop replaces the
5284 GIMPLE_OMP_CONTINUE. */
5285 gsi = gsi_last_nondebug_bb (cont_bb);
5286 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5287 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5288 vmain = gimple_omp_continue_control_use (cont_stmt);
5289 vback = gimple_omp_continue_control_def (cont_stmt);
5291 if (cond_var)
5293 tree itype = TREE_TYPE (cond_var);
5294 tree t2;
5295 if (POINTER_TYPE_P (type)
5296 || TREE_CODE (n1) != INTEGER_CST
5297 || fd->loop.cond_code != LT_EXPR)
5298 t2 = build_int_cst (itype, 1);
5299 else
5300 t2 = fold_convert (itype, step);
5301 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5302 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5303 NULL_TREE, true, GSI_SAME_STMT);
5304 assign_stmt = gimple_build_assign (cond_var, t2);
5305 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5308 if (!gimple_omp_for_combined_p (fd->for_stmt))
5310 if (POINTER_TYPE_P (type))
5311 t = fold_build_pointer_plus (vmain, step);
5312 else
5313 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5314 t = force_gimple_operand_gsi (&gsi, t,
5315 DECL_P (vback)
5316 && TREE_ADDRESSABLE (vback),
5317 NULL_TREE, true, GSI_SAME_STMT);
5318 assign_stmt = gimple_build_assign (vback, t);
5319 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5321 t = build2 (fd->loop.cond_code, boolean_type_node,
5322 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5323 ? t : vback, e);
5324 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5327 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5328 gsi_remove (&gsi, true);
5330 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5331 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5332 cont_bb, body_bb);
5335 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5336 gsi = gsi_last_nondebug_bb (exit_bb);
5337 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5339 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5340 if (fd->have_reductemp
5341 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5342 && !fd->have_nonctrl_scantemp))
5344 tree fn;
5345 if (t)
5346 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5347 else
5348 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5349 gcall *g = gimple_build_call (fn, 0);
5350 if (t)
5352 gimple_call_set_lhs (g, t);
5353 if (fd->have_reductemp)
5354 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5355 NOP_EXPR, t),
5356 GSI_SAME_STMT);
5358 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5360 else
5361 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5363 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5364 && !fd->have_nonctrl_scantemp)
5366 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5367 gcall *g = gimple_build_call (fn, 0);
5368 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5370 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5372 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5373 tree controlp = NULL_TREE, controlb = NULL_TREE;
5374 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5375 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5376 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5378 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5379 controlb = OMP_CLAUSE_DECL (c);
5380 else
5381 controlp = OMP_CLAUSE_DECL (c);
5382 if (controlb && controlp)
5383 break;
5385 gcc_assert (controlp && controlb);
5386 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5387 NULL_TREE, NULL_TREE);
5388 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5389 exit1_bb = split_block (exit_bb, g)->dest;
5390 gsi = gsi_after_labels (exit1_bb);
5391 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5392 controlp);
5393 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5394 exit2_bb = split_block (exit1_bb, g)->dest;
5395 gsi = gsi_after_labels (exit2_bb);
5396 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5397 controlp);
5398 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5399 exit3_bb = split_block (exit2_bb, g)->dest;
5400 gsi = gsi_after_labels (exit3_bb);
5402 gsi_remove (&gsi, true);
5404 /* Connect all the blocks. */
5405 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5406 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5407 ep = find_edge (entry_bb, second_bb);
5408 ep->flags = EDGE_TRUE_VALUE;
5409 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5410 if (fourth_bb)
5412 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5413 ep->probability
5414 = profile_probability::guessed_always ().apply_scale (1, 2);
5415 ep = find_edge (third_bb, fourth_bb);
5416 ep->flags = EDGE_TRUE_VALUE;
5417 ep->probability
5418 = profile_probability::guessed_always ().apply_scale (1, 2);
5419 ep = find_edge (fourth_bb, fifth_bb);
5420 redirect_edge_and_branch (ep, sixth_bb);
5422 else
5423 sixth_bb = third_bb;
5424 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5425 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5426 if (exit1_bb)
5428 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5429 ep->probability
5430 = profile_probability::guessed_always ().apply_scale (1, 2);
5431 ep = find_edge (exit_bb, exit1_bb);
5432 ep->flags = EDGE_TRUE_VALUE;
5433 ep->probability
5434 = profile_probability::guessed_always ().apply_scale (1, 2);
5435 ep = find_edge (exit1_bb, exit2_bb);
5436 redirect_edge_and_branch (ep, exit3_bb);
5439 if (!broken_loop)
5441 ep = find_edge (cont_bb, body_bb);
5442 if (ep == NULL)
5444 ep = BRANCH_EDGE (cont_bb);
5445 gcc_assert (single_succ (ep->dest) == body_bb);
5447 if (gimple_omp_for_combined_p (fd->for_stmt))
5449 remove_edge (ep);
5450 ep = NULL;
5452 else if (fd->collapse > 1)
5454 remove_edge (ep);
5455 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5457 else
5458 ep->flags = EDGE_TRUE_VALUE;
5459 find_edge (cont_bb, fin_bb)->flags
5460 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5463 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5464 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5465 if (fourth_bb)
5467 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5468 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5470 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5472 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5473 recompute_dominator (CDI_DOMINATORS, body_bb));
5474 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5475 recompute_dominator (CDI_DOMINATORS, fin_bb));
5476 if (exit1_bb)
5478 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5479 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5482 class loop *loop = body_bb->loop_father;
5483 if (loop != entry_bb->loop_father)
5485 gcc_assert (broken_loop || loop->header == body_bb);
5486 gcc_assert (broken_loop
5487 || loop->latch == region->cont
5488 || single_pred (loop->latch) == region->cont);
5489 return;
5492 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5494 loop = alloc_loop ();
5495 loop->header = body_bb;
5496 if (collapse_bb == NULL)
5497 loop->latch = cont_bb;
5498 add_loop (loop, body_bb->loop_father);
5502 /* Return phi in E->DEST with ARG on edge E. */
5504 static gphi *
5505 find_phi_with_arg_on_edge (tree arg, edge e)
5507 basic_block bb = e->dest;
5509 for (gphi_iterator gpi = gsi_start_phis (bb);
5510 !gsi_end_p (gpi);
5511 gsi_next (&gpi))
5513 gphi *phi = gpi.phi ();
5514 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5515 return phi;
5518 return NULL;
5521 /* A subroutine of expand_omp_for. Generate code for a parallel
5522 loop with static schedule and a specified chunk size. Given
5523 parameters:
5525 for (V = N1; V cond N2; V += STEP) BODY;
5527 where COND is "<" or ">", we generate pseudocode
5529 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5530 if (cond is <)
5531 adj = STEP - 1;
5532 else
5533 adj = STEP + 1;
5534 if ((__typeof (V)) -1 > 0 && cond is >)
5535 n = -(adj + N2 - N1) / -STEP;
5536 else
5537 n = (adj + N2 - N1) / STEP;
5538 trip = 0;
5539 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5540 here so that V is defined
5541 if the loop is not entered
5543 s0 = (trip * nthreads + threadid) * CHUNK;
5544 e0 = min (s0 + CHUNK, n);
5545 if (s0 < n) goto L1; else goto L4;
5547 V = s0 * STEP + N1;
5548 e = e0 * STEP + N1;
5550 BODY;
5551 V += STEP;
5552 if (V cond e) goto L2; else goto L3;
5554 trip += 1;
5555 goto L0;
5559 static void
5560 expand_omp_for_static_chunk (struct omp_region *region,
5561 struct omp_for_data *fd, gimple *inner_stmt)
5563 tree n, s0, e0, e, t;
5564 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5565 tree type, itype, vmain, vback, vextra;
5566 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5567 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5568 gimple_stmt_iterator gsi, gsip;
5569 edge se;
5570 bool broken_loop = region->cont == NULL;
5571 tree *counts = NULL;
5572 tree n1, n2, step;
5573 tree reductions = NULL_TREE;
5574 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5576 itype = type = TREE_TYPE (fd->loop.v);
5577 if (POINTER_TYPE_P (type))
5578 itype = signed_type_for (type);
5580 entry_bb = region->entry;
5581 se = split_block (entry_bb, last_stmt (entry_bb));
5582 entry_bb = se->src;
5583 iter_part_bb = se->dest;
5584 cont_bb = region->cont;
5585 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5586 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5587 gcc_assert (broken_loop
5588 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5589 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5590 body_bb = single_succ (seq_start_bb);
5591 if (!broken_loop)
5593 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5594 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5595 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5596 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5598 exit_bb = region->exit;
5600 /* Trip and adjustment setup goes in ENTRY_BB. */
5601 gsi = gsi_last_nondebug_bb (entry_bb);
5602 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5603 gsip = gsi;
5604 gsi_prev (&gsip);
5606 if (fd->collapse > 1)
5608 int first_zero_iter = -1, dummy = -1;
5609 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5611 counts = XALLOCAVEC (tree, fd->collapse);
5612 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5613 fin_bb, first_zero_iter,
5614 dummy_bb, dummy, l2_dom_bb);
5615 t = NULL_TREE;
5617 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5618 t = integer_one_node;
5619 else
5620 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5621 fold_convert (type, fd->loop.n1),
5622 fold_convert (type, fd->loop.n2));
5623 if (fd->collapse == 1
5624 && TYPE_UNSIGNED (type)
5625 && (t == NULL_TREE || !integer_onep (t)))
5627 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5628 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5629 true, GSI_SAME_STMT);
5630 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5631 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5632 true, GSI_SAME_STMT);
5633 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5634 NULL_TREE, NULL_TREE);
5635 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5636 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5637 expand_omp_regimplify_p, NULL, NULL)
5638 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5639 expand_omp_regimplify_p, NULL, NULL))
5641 gsi = gsi_for_stmt (cond_stmt);
5642 gimple_regimplify_operands (cond_stmt, &gsi);
5644 se = split_block (entry_bb, cond_stmt);
5645 se->flags = EDGE_TRUE_VALUE;
5646 entry_bb = se->dest;
5647 se->probability = profile_probability::very_likely ();
5648 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5649 se->probability = profile_probability::very_unlikely ();
5650 if (gimple_in_ssa_p (cfun))
5652 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5653 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5654 !gsi_end_p (gpi); gsi_next (&gpi))
5656 gphi *phi = gpi.phi ();
5657 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5658 se, UNKNOWN_LOCATION);
5661 gsi = gsi_last_bb (entry_bb);
5664 if (fd->lastprivate_conditional)
5666 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5667 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5668 if (fd->have_pointer_condtemp)
5669 condtemp = OMP_CLAUSE_DECL (c);
5670 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5671 cond_var = OMP_CLAUSE_DECL (c);
5673 if (fd->have_reductemp || fd->have_pointer_condtemp)
5675 tree t1 = build_int_cst (long_integer_type_node, 0);
5676 tree t2 = build_int_cst (long_integer_type_node, 1);
5677 tree t3 = build_int_cstu (long_integer_type_node,
5678 (HOST_WIDE_INT_1U << 31) + 1);
5679 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5680 gimple_stmt_iterator gsi2 = gsi_none ();
5681 gimple *g = NULL;
5682 tree mem = null_pointer_node, memv = NULL_TREE;
5683 if (fd->have_reductemp)
5685 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5686 reductions = OMP_CLAUSE_DECL (c);
5687 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5688 g = SSA_NAME_DEF_STMT (reductions);
5689 reductions = gimple_assign_rhs1 (g);
5690 OMP_CLAUSE_DECL (c) = reductions;
5691 gsi2 = gsi_for_stmt (g);
5693 else
5695 if (gsi_end_p (gsip))
5696 gsi2 = gsi_after_labels (region->entry);
5697 else
5698 gsi2 = gsip;
5699 reductions = null_pointer_node;
5701 if (fd->have_pointer_condtemp)
5703 tree type = TREE_TYPE (condtemp);
5704 memv = create_tmp_var (type);
5705 TREE_ADDRESSABLE (memv) = 1;
5706 unsigned HOST_WIDE_INT sz
5707 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5708 sz *= fd->lastprivate_conditional;
5709 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5710 false);
5711 mem = build_fold_addr_expr (memv);
5713 tree t
5714 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5715 9, t1, t2, t2, t3, t1, null_pointer_node,
5716 null_pointer_node, reductions, mem);
5717 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5718 true, GSI_SAME_STMT);
5719 if (fd->have_pointer_condtemp)
5720 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5721 if (fd->have_reductemp)
5723 gsi_remove (&gsi2, true);
5724 release_ssa_name (gimple_assign_lhs (g));
5727 switch (gimple_omp_for_kind (fd->for_stmt))
5729 case GF_OMP_FOR_KIND_FOR:
5730 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5731 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5732 break;
5733 case GF_OMP_FOR_KIND_DISTRIBUTE:
5734 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5735 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5736 break;
5737 default:
5738 gcc_unreachable ();
5740 nthreads = build_call_expr (nthreads, 0);
5741 nthreads = fold_convert (itype, nthreads);
5742 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5743 true, GSI_SAME_STMT);
5744 threadid = build_call_expr (threadid, 0);
5745 threadid = fold_convert (itype, threadid);
5746 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5747 true, GSI_SAME_STMT);
5749 n1 = fd->loop.n1;
5750 n2 = fd->loop.n2;
5751 step = fd->loop.step;
5752 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5754 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5755 OMP_CLAUSE__LOOPTEMP_);
5756 gcc_assert (innerc);
5757 n1 = OMP_CLAUSE_DECL (innerc);
5758 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5759 OMP_CLAUSE__LOOPTEMP_);
5760 gcc_assert (innerc);
5761 n2 = OMP_CLAUSE_DECL (innerc);
5763 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5764 true, NULL_TREE, true, GSI_SAME_STMT);
5765 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5766 true, NULL_TREE, true, GSI_SAME_STMT);
5767 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5768 true, NULL_TREE, true, GSI_SAME_STMT);
5769 tree chunk_size = fold_convert (itype, fd->chunk_size);
5770 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5771 chunk_size
5772 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5773 GSI_SAME_STMT);
5775 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5776 t = fold_build2 (PLUS_EXPR, itype, step, t);
5777 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5778 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5779 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5780 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5781 fold_build1 (NEGATE_EXPR, itype, t),
5782 fold_build1 (NEGATE_EXPR, itype, step));
5783 else
5784 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5785 t = fold_convert (itype, t);
5786 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5787 true, GSI_SAME_STMT);
5789 trip_var = create_tmp_reg (itype, ".trip");
5790 if (gimple_in_ssa_p (cfun))
5792 trip_init = make_ssa_name (trip_var);
5793 trip_main = make_ssa_name (trip_var);
5794 trip_back = make_ssa_name (trip_var);
5796 else
5798 trip_init = trip_var;
5799 trip_main = trip_var;
5800 trip_back = trip_var;
5803 gassign *assign_stmt
5804 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5805 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5807 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5808 t = fold_build2 (MULT_EXPR, itype, t, step);
5809 if (POINTER_TYPE_P (type))
5810 t = fold_build_pointer_plus (n1, t);
5811 else
5812 t = fold_build2 (PLUS_EXPR, type, t, n1);
5813 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5814 true, GSI_SAME_STMT);
5816 /* Remove the GIMPLE_OMP_FOR. */
5817 gsi_remove (&gsi, true);
5819 gimple_stmt_iterator gsif = gsi;
5821 /* Iteration space partitioning goes in ITER_PART_BB. */
5822 gsi = gsi_last_bb (iter_part_bb);
5824 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5825 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5826 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5827 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5828 false, GSI_CONTINUE_LINKING);
5830 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5831 t = fold_build2 (MIN_EXPR, itype, t, n);
5832 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5833 false, GSI_CONTINUE_LINKING);
5835 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5836 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5838 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5839 gsi = gsi_start_bb (seq_start_bb);
5841 tree startvar = fd->loop.v;
5842 tree endvar = NULL_TREE;
5844 if (gimple_omp_for_combined_p (fd->for_stmt))
5846 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5847 ? gimple_omp_parallel_clauses (inner_stmt)
5848 : gimple_omp_for_clauses (inner_stmt);
5849 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5850 gcc_assert (innerc);
5851 startvar = OMP_CLAUSE_DECL (innerc);
5852 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5853 OMP_CLAUSE__LOOPTEMP_);
5854 gcc_assert (innerc);
5855 endvar = OMP_CLAUSE_DECL (innerc);
5856 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5857 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5859 innerc = find_lastprivate_looptemp (fd, innerc);
5860 if (innerc)
5862 /* If needed (distribute parallel for with lastprivate),
5863 propagate down the total number of iterations. */
5864 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5865 fd->loop.n2);
5866 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5867 GSI_CONTINUE_LINKING);
5868 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5869 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5874 t = fold_convert (itype, s0);
5875 t = fold_build2 (MULT_EXPR, itype, t, step);
5876 if (POINTER_TYPE_P (type))
5878 t = fold_build_pointer_plus (n1, t);
5879 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5880 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5881 t = fold_convert (signed_type_for (type), t);
5883 else
5884 t = fold_build2 (PLUS_EXPR, type, t, n1);
5885 t = fold_convert (TREE_TYPE (startvar), t);
5886 t = force_gimple_operand_gsi (&gsi, t,
5887 DECL_P (startvar)
5888 && TREE_ADDRESSABLE (startvar),
5889 NULL_TREE, false, GSI_CONTINUE_LINKING);
5890 assign_stmt = gimple_build_assign (startvar, t);
5891 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5892 if (cond_var)
5894 tree itype = TREE_TYPE (cond_var);
5895 /* For lastprivate(conditional:) itervar, we need some iteration
5896 counter that starts at unsigned non-zero and increases.
5897 Prefer as few IVs as possible, so if we can use startvar
5898 itself, use that, or startvar + constant (those would be
5899 incremented with step), and as last resort use the s0 + 1
5900 incremented by 1. */
5901 if (POINTER_TYPE_P (type)
5902 || TREE_CODE (n1) != INTEGER_CST
5903 || fd->loop.cond_code != LT_EXPR)
5904 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5905 build_int_cst (itype, 1));
5906 else if (tree_int_cst_sgn (n1) == 1)
5907 t = fold_convert (itype, t);
5908 else
5910 tree c = fold_convert (itype, n1);
5911 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5912 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5914 t = force_gimple_operand_gsi (&gsi, t, false,
5915 NULL_TREE, false, GSI_CONTINUE_LINKING);
5916 assign_stmt = gimple_build_assign (cond_var, t);
5917 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5920 t = fold_convert (itype, e0);
5921 t = fold_build2 (MULT_EXPR, itype, t, step);
5922 if (POINTER_TYPE_P (type))
5924 t = fold_build_pointer_plus (n1, t);
5925 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5926 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5927 t = fold_convert (signed_type_for (type), t);
5929 else
5930 t = fold_build2 (PLUS_EXPR, type, t, n1);
5931 t = fold_convert (TREE_TYPE (startvar), t);
5932 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5933 false, GSI_CONTINUE_LINKING);
5934 if (endvar)
5936 assign_stmt = gimple_build_assign (endvar, e);
5937 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5938 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5939 assign_stmt = gimple_build_assign (fd->loop.v, e);
5940 else
5941 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5944 /* Handle linear clause adjustments. */
5945 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5946 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5947 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5948 c; c = OMP_CLAUSE_CHAIN (c))
5949 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5950 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5952 tree d = OMP_CLAUSE_DECL (c);
5953 tree t = d, a, dest;
5954 if (omp_privatize_by_reference (t))
5955 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5956 tree type = TREE_TYPE (t);
5957 if (POINTER_TYPE_P (type))
5958 type = sizetype;
5959 dest = unshare_expr (t);
5960 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5961 expand_omp_build_assign (&gsif, v, t);
5962 if (itercnt == NULL_TREE)
5964 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5966 itercntbias
5967 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5968 fold_convert (itype, fd->loop.n1));
5969 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5970 itercntbias, step);
5971 itercntbias
5972 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5973 NULL_TREE, true,
5974 GSI_SAME_STMT);
5975 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5976 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5977 NULL_TREE, false,
5978 GSI_CONTINUE_LINKING);
5980 else
5981 itercnt = s0;
5983 a = fold_build2 (MULT_EXPR, type,
5984 fold_convert (type, itercnt),
5985 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5986 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5987 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5988 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5989 false, GSI_CONTINUE_LINKING);
5990 expand_omp_build_assign (&gsi, dest, t, true);
5992 if (fd->collapse > 1)
5993 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5995 if (!broken_loop)
5997 /* The code controlling the sequential loop goes in CONT_BB,
5998 replacing the GIMPLE_OMP_CONTINUE. */
5999 gsi = gsi_last_nondebug_bb (cont_bb);
6000 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6001 vmain = gimple_omp_continue_control_use (cont_stmt);
6002 vback = gimple_omp_continue_control_def (cont_stmt);
6004 if (cond_var)
6006 tree itype = TREE_TYPE (cond_var);
6007 tree t2;
6008 if (POINTER_TYPE_P (type)
6009 || TREE_CODE (n1) != INTEGER_CST
6010 || fd->loop.cond_code != LT_EXPR)
6011 t2 = build_int_cst (itype, 1);
6012 else
6013 t2 = fold_convert (itype, step);
6014 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6015 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6016 NULL_TREE, true, GSI_SAME_STMT);
6017 assign_stmt = gimple_build_assign (cond_var, t2);
6018 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6021 if (!gimple_omp_for_combined_p (fd->for_stmt))
6023 if (POINTER_TYPE_P (type))
6024 t = fold_build_pointer_plus (vmain, step);
6025 else
6026 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6027 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6028 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6029 true, GSI_SAME_STMT);
6030 assign_stmt = gimple_build_assign (vback, t);
6031 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6033 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6034 t = build2 (EQ_EXPR, boolean_type_node,
6035 build_int_cst (itype, 0),
6036 build_int_cst (itype, 1));
6037 else
6038 t = build2 (fd->loop.cond_code, boolean_type_node,
6039 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6040 ? t : vback, e);
6041 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6044 /* Remove GIMPLE_OMP_CONTINUE. */
6045 gsi_remove (&gsi, true);
6047 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6048 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6050 /* Trip update code goes into TRIP_UPDATE_BB. */
6051 gsi = gsi_start_bb (trip_update_bb);
6053 t = build_int_cst (itype, 1);
6054 t = build2 (PLUS_EXPR, itype, trip_main, t);
6055 assign_stmt = gimple_build_assign (trip_back, t);
6056 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6059 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6060 gsi = gsi_last_nondebug_bb (exit_bb);
6061 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6063 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6064 if (fd->have_reductemp || fd->have_pointer_condtemp)
6066 tree fn;
6067 if (t)
6068 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6069 else
6070 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6071 gcall *g = gimple_build_call (fn, 0);
6072 if (t)
6074 gimple_call_set_lhs (g, t);
6075 if (fd->have_reductemp)
6076 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6077 NOP_EXPR, t),
6078 GSI_SAME_STMT);
6080 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6082 else
6083 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6085 else if (fd->have_pointer_condtemp)
6087 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6088 gcall *g = gimple_build_call (fn, 0);
6089 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6091 gsi_remove (&gsi, true);
6093 /* Connect the new blocks. */
6094 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6095 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6097 if (!broken_loop)
6099 se = find_edge (cont_bb, body_bb);
6100 if (se == NULL)
6102 se = BRANCH_EDGE (cont_bb);
6103 gcc_assert (single_succ (se->dest) == body_bb);
6105 if (gimple_omp_for_combined_p (fd->for_stmt))
6107 remove_edge (se);
6108 se = NULL;
6110 else if (fd->collapse > 1)
6112 remove_edge (se);
6113 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6115 else
6116 se->flags = EDGE_TRUE_VALUE;
6117 find_edge (cont_bb, trip_update_bb)->flags
6118 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6120 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6121 iter_part_bb);
6124 if (gimple_in_ssa_p (cfun))
6126 gphi_iterator psi;
6127 gphi *phi;
6128 edge re, ene;
6129 edge_var_map *vm;
6130 size_t i;
6132 gcc_assert (fd->collapse == 1 && !broken_loop);
6134 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6135 remove arguments of the phi nodes in fin_bb. We need to create
6136 appropriate phi nodes in iter_part_bb instead. */
6137 se = find_edge (iter_part_bb, fin_bb);
6138 re = single_succ_edge (trip_update_bb);
6139 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6140 ene = single_succ_edge (entry_bb);
6142 psi = gsi_start_phis (fin_bb);
6143 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6144 gsi_next (&psi), ++i)
6146 gphi *nphi;
6147 location_t locus;
6149 phi = psi.phi ();
6150 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6151 redirect_edge_var_map_def (vm), 0))
6152 continue;
6154 t = gimple_phi_result (phi);
6155 gcc_assert (t == redirect_edge_var_map_result (vm));
6157 if (!single_pred_p (fin_bb))
6158 t = copy_ssa_name (t, phi);
6160 nphi = create_phi_node (t, iter_part_bb);
6162 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6163 locus = gimple_phi_arg_location_from_edge (phi, se);
6165 /* A special case -- fd->loop.v is not yet computed in
6166 iter_part_bb, we need to use vextra instead. */
6167 if (t == fd->loop.v)
6168 t = vextra;
6169 add_phi_arg (nphi, t, ene, locus);
6170 locus = redirect_edge_var_map_location (vm);
6171 tree back_arg = redirect_edge_var_map_def (vm);
6172 add_phi_arg (nphi, back_arg, re, locus);
6173 edge ce = find_edge (cont_bb, body_bb);
6174 if (ce == NULL)
6176 ce = BRANCH_EDGE (cont_bb);
6177 gcc_assert (single_succ (ce->dest) == body_bb);
6178 ce = single_succ_edge (ce->dest);
6180 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6181 gcc_assert (inner_loop_phi != NULL);
6182 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6183 find_edge (seq_start_bb, body_bb), locus);
6185 if (!single_pred_p (fin_bb))
6186 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6188 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6189 redirect_edge_var_map_clear (re);
6190 if (single_pred_p (fin_bb))
6191 while (1)
6193 psi = gsi_start_phis (fin_bb);
6194 if (gsi_end_p (psi))
6195 break;
6196 remove_phi_node (&psi, false);
6199 /* Make phi node for trip. */
6200 phi = create_phi_node (trip_main, iter_part_bb);
6201 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6202 UNKNOWN_LOCATION);
6203 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6204 UNKNOWN_LOCATION);
6207 if (!broken_loop)
6208 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6209 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6210 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6211 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6212 recompute_dominator (CDI_DOMINATORS, fin_bb));
6213 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6214 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6215 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6216 recompute_dominator (CDI_DOMINATORS, body_bb));
6218 if (!broken_loop)
6220 class loop *loop = body_bb->loop_father;
6221 class loop *trip_loop = alloc_loop ();
6222 trip_loop->header = iter_part_bb;
6223 trip_loop->latch = trip_update_bb;
6224 add_loop (trip_loop, iter_part_bb->loop_father);
6226 if (loop != entry_bb->loop_father)
6228 gcc_assert (loop->header == body_bb);
6229 gcc_assert (loop->latch == region->cont
6230 || single_pred (loop->latch) == region->cont);
6231 trip_loop->inner = loop;
6232 return;
6235 if (!gimple_omp_for_combined_p (fd->for_stmt))
6237 loop = alloc_loop ();
6238 loop->header = body_bb;
6239 if (collapse_bb == NULL)
6240 loop->latch = cont_bb;
6241 add_loop (loop, trip_loop);
6246 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6247 loop. Given parameters:
6249 for (V = N1; V cond N2; V += STEP) BODY;
6251 where COND is "<" or ">", we generate pseudocode
6253 V = N1;
6254 goto L1;
6256 BODY;
6257 V += STEP;
6259 if (V cond N2) goto L0; else goto L2;
6262 For collapsed loops, emit the outer loops as scalar
6263 and only try to vectorize the innermost loop. */
6265 static void
6266 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6268 tree type, t;
6269 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6270 gimple_stmt_iterator gsi;
6271 gimple *stmt;
6272 gcond *cond_stmt;
6273 bool broken_loop = region->cont == NULL;
6274 edge e, ne;
6275 tree *counts = NULL;
6276 int i;
6277 int safelen_int = INT_MAX;
6278 bool dont_vectorize = false;
6279 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6280 OMP_CLAUSE_SAFELEN);
6281 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6282 OMP_CLAUSE__SIMDUID_);
6283 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6284 OMP_CLAUSE_IF);
6285 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6286 OMP_CLAUSE_SIMDLEN);
6287 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6288 OMP_CLAUSE__CONDTEMP_);
6289 tree n1, n2;
6290 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6292 if (safelen)
6294 poly_uint64 val;
6295 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6296 if (!poly_int_tree_p (safelen, &val))
6297 safelen_int = 0;
6298 else
6299 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6300 if (safelen_int == 1)
6301 safelen_int = 0;
6303 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6304 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6306 safelen_int = 0;
6307 dont_vectorize = true;
6309 type = TREE_TYPE (fd->loop.v);
6310 entry_bb = region->entry;
6311 cont_bb = region->cont;
6312 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6313 gcc_assert (broken_loop
6314 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6315 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6316 if (!broken_loop)
6318 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6319 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6320 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6321 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6323 else
6325 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6326 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6327 l2_bb = single_succ (l1_bb);
6329 exit_bb = region->exit;
6330 l2_dom_bb = NULL;
6332 gsi = gsi_last_nondebug_bb (entry_bb);
6334 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6335 /* Not needed in SSA form right now. */
6336 gcc_assert (!gimple_in_ssa_p (cfun));
6337 if (fd->collapse > 1
6338 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6339 || broken_loop))
6341 int first_zero_iter = -1, dummy = -1;
6342 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6344 counts = XALLOCAVEC (tree, fd->collapse);
6345 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6346 zero_iter_bb, first_zero_iter,
6347 dummy_bb, dummy, l2_dom_bb);
6349 if (l2_dom_bb == NULL)
6350 l2_dom_bb = l1_bb;
6352 n1 = fd->loop.n1;
6353 n2 = fd->loop.n2;
6354 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6356 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6357 OMP_CLAUSE__LOOPTEMP_);
6358 gcc_assert (innerc);
6359 n1 = OMP_CLAUSE_DECL (innerc);
6360 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6361 OMP_CLAUSE__LOOPTEMP_);
6362 gcc_assert (innerc);
6363 n2 = OMP_CLAUSE_DECL (innerc);
6365 tree step = fd->loop.step;
6366 tree orig_step = step; /* May be different from step if is_simt. */
6368 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6369 OMP_CLAUSE__SIMT_);
6370 if (is_simt)
6372 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6373 is_simt = safelen_int > 1;
6375 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6376 if (is_simt)
6378 simt_lane = create_tmp_var (unsigned_type_node);
6379 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6380 gimple_call_set_lhs (g, simt_lane);
6381 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6382 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6383 fold_convert (TREE_TYPE (step), simt_lane));
6384 n1 = fold_convert (type, n1);
6385 if (POINTER_TYPE_P (type))
6386 n1 = fold_build_pointer_plus (n1, offset);
6387 else
6388 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6390 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6391 if (fd->collapse > 1)
6392 simt_maxlane = build_one_cst (unsigned_type_node);
6393 else if (safelen_int < omp_max_simt_vf ())
6394 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6395 tree vf
6396 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6397 unsigned_type_node, 0);
6398 if (simt_maxlane)
6399 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6400 vf = fold_convert (TREE_TYPE (step), vf);
6401 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6404 tree n2var = NULL_TREE;
6405 tree n2v = NULL_TREE;
6406 tree *nonrect_bounds = NULL;
6407 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6408 if (fd->collapse > 1)
6410 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6412 if (fd->non_rect)
6414 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6415 memset (nonrect_bounds, 0,
6416 sizeof (tree) * (fd->last_nonrect + 1));
6418 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6419 gcc_assert (entry_bb == gsi_bb (gsi));
6420 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6421 gsi_prev (&gsi);
6422 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6423 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6424 NULL, n1);
6425 gsi = gsi_for_stmt (fd->for_stmt);
6427 if (broken_loop)
6429 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6431 /* Compute in n2var the limit for the first innermost loop,
6432 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6433 where cnt is how many iterations would the loop have if
6434 all further iterations were assigned to the current task. */
6435 n2var = create_tmp_var (type);
6436 i = fd->collapse - 1;
6437 tree itype = TREE_TYPE (fd->loops[i].v);
6438 if (POINTER_TYPE_P (itype))
6439 itype = signed_type_for (itype);
6440 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6441 ? -1 : 1));
6442 t = fold_build2 (PLUS_EXPR, itype,
6443 fold_convert (itype, fd->loops[i].step), t);
6444 t = fold_build2 (PLUS_EXPR, itype, t,
6445 fold_convert (itype, fd->loops[i].n2));
6446 if (fd->loops[i].m2)
6448 tree t2 = fold_convert (itype,
6449 fd->loops[i - fd->loops[i].outer].v);
6450 tree t3 = fold_convert (itype, fd->loops[i].m2);
6451 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6452 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6454 t = fold_build2 (MINUS_EXPR, itype, t,
6455 fold_convert (itype, fd->loops[i].v));
6456 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6457 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6458 fold_build1 (NEGATE_EXPR, itype, t),
6459 fold_build1 (NEGATE_EXPR, itype,
6460 fold_convert (itype,
6461 fd->loops[i].step)));
6462 else
6463 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6464 fold_convert (itype, fd->loops[i].step));
6465 t = fold_convert (type, t);
6466 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6467 min_arg1 = create_tmp_var (type);
6468 expand_omp_build_assign (&gsi, min_arg1, t2);
6469 min_arg2 = create_tmp_var (type);
6470 expand_omp_build_assign (&gsi, min_arg2, t);
6472 else
6474 if (TREE_CODE (n2) == INTEGER_CST)
6476 /* Indicate for lastprivate handling that at least one iteration
6477 has been performed, without wasting runtime. */
6478 if (integer_nonzerop (n2))
6479 expand_omp_build_assign (&gsi, fd->loop.v,
6480 fold_convert (type, n2));
6481 else
6482 /* Indicate that no iteration has been performed. */
6483 expand_omp_build_assign (&gsi, fd->loop.v,
6484 build_one_cst (type));
6486 else
6488 expand_omp_build_assign (&gsi, fd->loop.v,
6489 build_zero_cst (type));
6490 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6492 for (i = 0; i < fd->collapse; i++)
6494 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6495 if (fd->loops[i].m1)
6497 tree t2
6498 = fold_convert (TREE_TYPE (t),
6499 fd->loops[i - fd->loops[i].outer].v);
6500 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6501 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6502 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6504 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6505 /* For normal non-combined collapsed loops just initialize
6506 the outermost iterator in the entry_bb. */
6507 if (!broken_loop)
6508 break;
6512 else
6513 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6514 tree altv = NULL_TREE, altn2 = NULL_TREE;
6515 if (fd->collapse == 1
6516 && !broken_loop
6517 && TREE_CODE (orig_step) != INTEGER_CST)
6519 /* The vectorizer currently punts on loops with non-constant steps
6520 for the main IV (can't compute number of iterations and gives up
6521 because of that). As for OpenMP loops it is always possible to
6522 compute the number of iterations upfront, use an alternate IV
6523 as the loop iterator:
6524 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6525 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6526 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6527 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6528 tree itype = TREE_TYPE (fd->loop.v);
6529 if (POINTER_TYPE_P (itype))
6530 itype = signed_type_for (itype);
6531 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6532 t = fold_build2 (PLUS_EXPR, itype,
6533 fold_convert (itype, step), t);
6534 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6535 t = fold_build2 (MINUS_EXPR, itype, t,
6536 fold_convert (itype, fd->loop.v));
6537 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6538 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6539 fold_build1 (NEGATE_EXPR, itype, t),
6540 fold_build1 (NEGATE_EXPR, itype,
6541 fold_convert (itype, step)));
6542 else
6543 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6544 fold_convert (itype, step));
6545 t = fold_convert (TREE_TYPE (altv), t);
6546 altn2 = create_tmp_var (TREE_TYPE (altv));
6547 expand_omp_build_assign (&gsi, altn2, t);
6548 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6549 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6550 true, GSI_SAME_STMT);
6551 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6552 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6553 build_zero_cst (TREE_TYPE (altv)));
6554 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6556 else if (fd->collapse > 1
6557 && !broken_loop
6558 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6559 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6561 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6562 altn2 = create_tmp_var (TREE_TYPE (altv));
6564 if (cond_var)
6566 if (POINTER_TYPE_P (type)
6567 || TREE_CODE (n1) != INTEGER_CST
6568 || fd->loop.cond_code != LT_EXPR
6569 || tree_int_cst_sgn (n1) != 1)
6570 expand_omp_build_assign (&gsi, cond_var,
6571 build_one_cst (TREE_TYPE (cond_var)));
6572 else
6573 expand_omp_build_assign (&gsi, cond_var,
6574 fold_convert (TREE_TYPE (cond_var), n1));
6577 /* Remove the GIMPLE_OMP_FOR statement. */
6578 gsi_remove (&gsi, true);
6580 if (!broken_loop)
6582 /* Code to control the increment goes in the CONT_BB. */
6583 gsi = gsi_last_nondebug_bb (cont_bb);
6584 stmt = gsi_stmt (gsi);
6585 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6587 if (fd->collapse == 1
6588 || gimple_omp_for_combined_into_p (fd->for_stmt))
6590 if (POINTER_TYPE_P (type))
6591 t = fold_build_pointer_plus (fd->loop.v, step);
6592 else
6593 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6594 expand_omp_build_assign (&gsi, fd->loop.v, t);
6596 else if (TREE_CODE (n2) != INTEGER_CST)
6597 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6598 if (altv)
6600 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6601 build_one_cst (TREE_TYPE (altv)));
6602 expand_omp_build_assign (&gsi, altv, t);
6605 if (fd->collapse > 1)
6607 i = fd->collapse - 1;
6608 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6610 t = fold_convert (sizetype, fd->loops[i].step);
6611 t = fold_build_pointer_plus (fd->loops[i].v, t);
6613 else
6615 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6616 fd->loops[i].step);
6617 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6618 fd->loops[i].v, t);
6620 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6622 if (cond_var)
6624 if (POINTER_TYPE_P (type)
6625 || TREE_CODE (n1) != INTEGER_CST
6626 || fd->loop.cond_code != LT_EXPR
6627 || tree_int_cst_sgn (n1) != 1)
6628 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6629 build_one_cst (TREE_TYPE (cond_var)));
6630 else
6631 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6632 fold_convert (TREE_TYPE (cond_var), step));
6633 expand_omp_build_assign (&gsi, cond_var, t);
6636 /* Remove GIMPLE_OMP_CONTINUE. */
6637 gsi_remove (&gsi, true);
6640 /* Emit the condition in L1_BB. */
6641 gsi = gsi_start_bb (l1_bb);
6643 if (altv)
6644 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6645 else if (fd->collapse > 1
6646 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6647 && !broken_loop)
6649 i = fd->collapse - 1;
6650 tree itype = TREE_TYPE (fd->loops[i].v);
6651 if (fd->loops[i].m2)
6652 t = n2v = create_tmp_var (itype);
6653 else
6654 t = fold_convert (itype, fd->loops[i].n2);
6655 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6656 false, GSI_CONTINUE_LINKING);
6657 tree v = fd->loops[i].v;
6658 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6659 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6660 false, GSI_CONTINUE_LINKING);
6661 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6663 else
6665 if (fd->collapse > 1 && !broken_loop)
6666 t = n2var;
6667 else
6668 t = fold_convert (type, n2);
6669 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6670 false, GSI_CONTINUE_LINKING);
6671 tree v = fd->loop.v;
6672 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6673 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6674 false, GSI_CONTINUE_LINKING);
6675 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6677 cond_stmt = gimple_build_cond_empty (t);
6678 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6679 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6680 NULL, NULL)
6681 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6682 NULL, NULL))
6684 gsi = gsi_for_stmt (cond_stmt);
6685 gimple_regimplify_operands (cond_stmt, &gsi);
6688 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6689 if (is_simt)
6691 gsi = gsi_start_bb (l2_bb);
6692 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6693 if (POINTER_TYPE_P (type))
6694 t = fold_build_pointer_plus (fd->loop.v, step);
6695 else
6696 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6697 expand_omp_build_assign (&gsi, fd->loop.v, t);
6700 /* Remove GIMPLE_OMP_RETURN. */
6701 gsi = gsi_last_nondebug_bb (exit_bb);
6702 gsi_remove (&gsi, true);
6704 /* Connect the new blocks. */
6705 remove_edge (FALLTHRU_EDGE (entry_bb));
6707 if (!broken_loop)
6709 remove_edge (BRANCH_EDGE (entry_bb));
6710 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6712 e = BRANCH_EDGE (l1_bb);
6713 ne = FALLTHRU_EDGE (l1_bb);
6714 e->flags = EDGE_TRUE_VALUE;
6716 else
6718 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6720 ne = single_succ_edge (l1_bb);
6721 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6724 ne->flags = EDGE_FALSE_VALUE;
6725 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6726 ne->probability = e->probability.invert ();
6728 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6729 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6731 if (simt_maxlane)
6733 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6734 NULL_TREE, NULL_TREE);
6735 gsi = gsi_last_bb (entry_bb);
6736 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6737 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6738 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6739 FALLTHRU_EDGE (entry_bb)->probability
6740 = profile_probability::guessed_always ().apply_scale (7, 8);
6741 BRANCH_EDGE (entry_bb)->probability
6742 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6743 l2_dom_bb = entry_bb;
6745 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6747 if (!broken_loop && fd->collapse > 1)
6749 basic_block last_bb = l1_bb;
6750 basic_block init_bb = NULL;
6751 for (i = fd->collapse - 2; i >= 0; i--)
6753 tree nextn2v = NULL_TREE;
6754 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6755 e = EDGE_SUCC (last_bb, 0);
6756 else
6757 e = EDGE_SUCC (last_bb, 1);
6758 basic_block bb = split_edge (e);
6759 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6761 t = fold_convert (sizetype, fd->loops[i].step);
6762 t = fold_build_pointer_plus (fd->loops[i].v, t);
6764 else
6766 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6767 fd->loops[i].step);
6768 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6769 fd->loops[i].v, t);
6771 gsi = gsi_after_labels (bb);
6772 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6774 bb = split_block (bb, last_stmt (bb))->dest;
6775 gsi = gsi_start_bb (bb);
6776 tree itype = TREE_TYPE (fd->loops[i].v);
6777 if (fd->loops[i].m2)
6778 t = nextn2v = create_tmp_var (itype);
6779 else
6780 t = fold_convert (itype, fd->loops[i].n2);
6781 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6782 false, GSI_CONTINUE_LINKING);
6783 tree v = fd->loops[i].v;
6784 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6785 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6786 false, GSI_CONTINUE_LINKING);
6787 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6788 cond_stmt = gimple_build_cond_empty (t);
6789 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6790 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6791 expand_omp_regimplify_p, NULL, NULL)
6792 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6793 expand_omp_regimplify_p, NULL, NULL))
6795 gsi = gsi_for_stmt (cond_stmt);
6796 gimple_regimplify_operands (cond_stmt, &gsi);
6798 ne = single_succ_edge (bb);
6799 ne->flags = EDGE_FALSE_VALUE;
6801 init_bb = create_empty_bb (bb);
6802 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6803 add_bb_to_loop (init_bb, bb->loop_father);
6804 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6805 e->probability
6806 = profile_probability::guessed_always ().apply_scale (7, 8);
6807 ne->probability = e->probability.invert ();
6809 gsi = gsi_after_labels (init_bb);
6810 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6811 fd->loops[i + 1].n1);
6812 if (fd->loops[i + 1].m1)
6814 tree t2 = fold_convert (TREE_TYPE (t),
6815 fd->loops[i + 1
6816 - fd->loops[i + 1].outer].v);
6817 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6818 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6819 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6821 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6822 if (fd->loops[i + 1].m2)
6824 if (i + 2 == fd->collapse && (n2var || altv))
6826 gcc_assert (n2v == NULL_TREE);
6827 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6829 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6830 fd->loops[i + 1].n2);
6831 tree t2 = fold_convert (TREE_TYPE (t),
6832 fd->loops[i + 1
6833 - fd->loops[i + 1].outer].v);
6834 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6835 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6836 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6837 expand_omp_build_assign (&gsi, n2v, t);
6839 if (i + 2 == fd->collapse && n2var)
6841 /* For composite simd, n2 is the first iteration the current
6842 task shouldn't already handle, so we effectively want to use
6843 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6844 as the vectorized loop. Except the vectorizer will not
6845 vectorize that, so instead compute N2VAR as
6846 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6847 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6848 as the loop to vectorize. */
6849 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6850 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6852 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6853 == LT_EXPR ? -1 : 1));
6854 t = fold_build2 (PLUS_EXPR, itype,
6855 fold_convert (itype,
6856 fd->loops[i + 1].step), t);
6857 if (fd->loops[i + 1].m2)
6858 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6859 else
6860 t = fold_build2 (PLUS_EXPR, itype, t,
6861 fold_convert (itype,
6862 fd->loops[i + 1].n2));
6863 t = fold_build2 (MINUS_EXPR, itype, t,
6864 fold_convert (itype, fd->loops[i + 1].v));
6865 tree step = fold_convert (itype, fd->loops[i + 1].step);
6866 if (TYPE_UNSIGNED (itype)
6867 && fd->loops[i + 1].cond_code == GT_EXPR)
6868 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6869 fold_build1 (NEGATE_EXPR, itype, t),
6870 fold_build1 (NEGATE_EXPR, itype, step));
6871 else
6872 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6873 t = fold_convert (type, t);
6875 else
6876 t = counts[i + 1];
6877 expand_omp_build_assign (&gsi, min_arg1, t2);
6878 expand_omp_build_assign (&gsi, min_arg2, t);
6879 e = split_block (init_bb, last_stmt (init_bb));
6880 gsi = gsi_after_labels (e->dest);
6881 init_bb = e->dest;
6882 remove_edge (FALLTHRU_EDGE (entry_bb));
6883 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6884 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6885 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6886 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6887 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6888 expand_omp_build_assign (&gsi, n2var, t);
6890 if (i + 2 == fd->collapse && altv)
6892 /* The vectorizer currently punts on loops with non-constant
6893 steps for the main IV (can't compute number of iterations
6894 and gives up because of that). As for OpenMP loops it is
6895 always possible to compute the number of iterations upfront,
6896 use an alternate IV as the loop iterator. */
6897 expand_omp_build_assign (&gsi, altv,
6898 build_zero_cst (TREE_TYPE (altv)));
6899 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6900 if (POINTER_TYPE_P (itype))
6901 itype = signed_type_for (itype);
6902 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6903 ? -1 : 1));
6904 t = fold_build2 (PLUS_EXPR, itype,
6905 fold_convert (itype, fd->loops[i + 1].step), t);
6906 t = fold_build2 (PLUS_EXPR, itype, t,
6907 fold_convert (itype,
6908 fd->loops[i + 1].m2
6909 ? n2v : fd->loops[i + 1].n2));
6910 t = fold_build2 (MINUS_EXPR, itype, t,
6911 fold_convert (itype, fd->loops[i + 1].v));
6912 tree step = fold_convert (itype, fd->loops[i + 1].step);
6913 if (TYPE_UNSIGNED (itype)
6914 && fd->loops[i + 1].cond_code == GT_EXPR)
6915 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6916 fold_build1 (NEGATE_EXPR, itype, t),
6917 fold_build1 (NEGATE_EXPR, itype, step));
6918 else
6919 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6920 t = fold_convert (TREE_TYPE (altv), t);
6921 expand_omp_build_assign (&gsi, altn2, t);
6922 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6923 fd->loops[i + 1].m2
6924 ? n2v : fd->loops[i + 1].n2);
6925 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6926 true, GSI_SAME_STMT);
6927 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6928 fd->loops[i + 1].v, t2);
6929 gassign *g
6930 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6931 build_zero_cst (TREE_TYPE (altv)));
6932 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6934 n2v = nextn2v;
6936 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6937 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6939 e = find_edge (entry_bb, last_bb);
6940 redirect_edge_succ (e, bb);
6941 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6942 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6945 last_bb = bb;
6948 if (!broken_loop)
6950 class loop *loop = alloc_loop ();
6951 loop->header = l1_bb;
6952 loop->latch = cont_bb;
6953 add_loop (loop, l1_bb->loop_father);
6954 loop->safelen = safelen_int;
6955 if (simduid)
6957 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6958 cfun->has_simduid_loops = true;
6960 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6961 the loop. */
6962 if ((flag_tree_loop_vectorize
6963 || !global_options_set.x_flag_tree_loop_vectorize)
6964 && flag_tree_loop_optimize
6965 && loop->safelen > 1)
6967 loop->force_vectorize = true;
6968 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6970 unsigned HOST_WIDE_INT v
6971 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6972 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6973 loop->simdlen = v;
6975 cfun->has_force_vectorize_loops = true;
6977 else if (dont_vectorize)
6978 loop->dont_vectorize = true;
6980 else if (simduid)
6981 cfun->has_simduid_loops = true;
6984 /* Taskloop construct is represented after gimplification with
6985 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6986 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6987 which should just compute all the needed loop temporaries
6988 for GIMPLE_OMP_TASK. */
6990 static void
6991 expand_omp_taskloop_for_outer (struct omp_region *region,
6992 struct omp_for_data *fd,
6993 gimple *inner_stmt)
6995 tree type, bias = NULL_TREE;
6996 basic_block entry_bb, cont_bb, exit_bb;
6997 gimple_stmt_iterator gsi;
6998 gassign *assign_stmt;
6999 tree *counts = NULL;
7000 int i;
7002 gcc_assert (inner_stmt);
7003 gcc_assert (region->cont);
7004 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7005 && gimple_omp_task_taskloop_p (inner_stmt));
7006 type = TREE_TYPE (fd->loop.v);
7008 /* See if we need to bias by LLONG_MIN. */
7009 if (fd->iter_type == long_long_unsigned_type_node
7010 && TREE_CODE (type) == INTEGER_TYPE
7011 && !TYPE_UNSIGNED (type))
7013 tree n1, n2;
7015 if (fd->loop.cond_code == LT_EXPR)
7017 n1 = fd->loop.n1;
7018 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7020 else
7022 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7023 n2 = fd->loop.n1;
7025 if (TREE_CODE (n1) != INTEGER_CST
7026 || TREE_CODE (n2) != INTEGER_CST
7027 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7028 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7031 entry_bb = region->entry;
7032 cont_bb = region->cont;
7033 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7034 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7035 exit_bb = region->exit;
7037 gsi = gsi_last_nondebug_bb (entry_bb);
7038 gimple *for_stmt = gsi_stmt (gsi);
7039 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7040 if (fd->collapse > 1)
7042 int first_zero_iter = -1, dummy = -1;
7043 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7045 counts = XALLOCAVEC (tree, fd->collapse);
7046 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7047 zero_iter_bb, first_zero_iter,
7048 dummy_bb, dummy, l2_dom_bb);
7050 if (zero_iter_bb)
7052 /* Some counts[i] vars might be uninitialized if
7053 some loop has zero iterations. But the body shouldn't
7054 be executed in that case, so just avoid uninit warnings. */
7055 for (i = first_zero_iter; i < fd->collapse; i++)
7056 if (SSA_VAR_P (counts[i]))
7057 suppress_warning (counts[i], OPT_Wuninitialized);
7058 gsi_prev (&gsi);
7059 edge e = split_block (entry_bb, gsi_stmt (gsi));
7060 entry_bb = e->dest;
7061 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7062 gsi = gsi_last_bb (entry_bb);
7063 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7064 get_immediate_dominator (CDI_DOMINATORS,
7065 zero_iter_bb));
7069 tree t0, t1;
7070 t1 = fd->loop.n2;
7071 t0 = fd->loop.n1;
7072 if (POINTER_TYPE_P (TREE_TYPE (t0))
7073 && TYPE_PRECISION (TREE_TYPE (t0))
7074 != TYPE_PRECISION (fd->iter_type))
7076 /* Avoid casting pointers to integer of a different size. */
7077 tree itype = signed_type_for (type);
7078 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7079 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7081 else
7083 t1 = fold_convert (fd->iter_type, t1);
7084 t0 = fold_convert (fd->iter_type, t0);
7086 if (bias)
7088 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7089 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7092 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7093 OMP_CLAUSE__LOOPTEMP_);
7094 gcc_assert (innerc);
7095 tree startvar = OMP_CLAUSE_DECL (innerc);
7096 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7097 gcc_assert (innerc);
7098 tree endvar = OMP_CLAUSE_DECL (innerc);
7099 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7101 innerc = find_lastprivate_looptemp (fd, innerc);
7102 if (innerc)
7104 /* If needed (inner taskloop has lastprivate clause), propagate
7105 down the total number of iterations. */
7106 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7107 NULL_TREE, false,
7108 GSI_CONTINUE_LINKING);
7109 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7110 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7114 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7115 GSI_CONTINUE_LINKING);
7116 assign_stmt = gimple_build_assign (startvar, t0);
7117 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7119 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7120 GSI_CONTINUE_LINKING);
7121 assign_stmt = gimple_build_assign (endvar, t1);
7122 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7123 if (fd->collapse > 1)
7124 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7126 /* Remove the GIMPLE_OMP_FOR statement. */
7127 gsi = gsi_for_stmt (for_stmt);
7128 gsi_remove (&gsi, true);
7130 gsi = gsi_last_nondebug_bb (cont_bb);
7131 gsi_remove (&gsi, true);
7133 gsi = gsi_last_nondebug_bb (exit_bb);
7134 gsi_remove (&gsi, true);
7136 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7137 remove_edge (BRANCH_EDGE (entry_bb));
7138 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7139 remove_edge (BRANCH_EDGE (cont_bb));
7140 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7141 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7142 recompute_dominator (CDI_DOMINATORS, region->entry));
7145 /* Taskloop construct is represented after gimplification with
7146 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7147 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7148 GOMP_taskloop{,_ull} function arranges for each task to be given just
7149 a single range of iterations. */
7151 static void
7152 expand_omp_taskloop_for_inner (struct omp_region *region,
7153 struct omp_for_data *fd,
7154 gimple *inner_stmt)
7156 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7157 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7158 basic_block fin_bb;
7159 gimple_stmt_iterator gsi;
7160 edge ep;
7161 bool broken_loop = region->cont == NULL;
7162 tree *counts = NULL;
7163 tree n1, n2, step;
7165 itype = type = TREE_TYPE (fd->loop.v);
7166 if (POINTER_TYPE_P (type))
7167 itype = signed_type_for (type);
7169 /* See if we need to bias by LLONG_MIN. */
7170 if (fd->iter_type == long_long_unsigned_type_node
7171 && TREE_CODE (type) == INTEGER_TYPE
7172 && !TYPE_UNSIGNED (type))
7174 tree n1, n2;
7176 if (fd->loop.cond_code == LT_EXPR)
7178 n1 = fd->loop.n1;
7179 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7181 else
7183 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7184 n2 = fd->loop.n1;
7186 if (TREE_CODE (n1) != INTEGER_CST
7187 || TREE_CODE (n2) != INTEGER_CST
7188 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7189 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7192 entry_bb = region->entry;
7193 cont_bb = region->cont;
7194 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7195 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7196 gcc_assert (broken_loop
7197 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7198 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7199 if (!broken_loop)
7201 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7202 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7204 exit_bb = region->exit;
7206 /* Iteration space partitioning goes in ENTRY_BB. */
7207 gsi = gsi_last_nondebug_bb (entry_bb);
7208 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7210 if (fd->collapse > 1)
7212 int first_zero_iter = -1, dummy = -1;
7213 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7215 counts = XALLOCAVEC (tree, fd->collapse);
7216 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7217 fin_bb, first_zero_iter,
7218 dummy_bb, dummy, l2_dom_bb);
7219 t = NULL_TREE;
7221 else
7222 t = integer_one_node;
7224 step = fd->loop.step;
7225 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7226 OMP_CLAUSE__LOOPTEMP_);
7227 gcc_assert (innerc);
7228 n1 = OMP_CLAUSE_DECL (innerc);
7229 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7230 gcc_assert (innerc);
7231 n2 = OMP_CLAUSE_DECL (innerc);
7232 if (bias)
7234 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7235 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7237 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7238 true, NULL_TREE, true, GSI_SAME_STMT);
7239 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7240 true, NULL_TREE, true, GSI_SAME_STMT);
7241 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7242 true, NULL_TREE, true, GSI_SAME_STMT);
7244 tree startvar = fd->loop.v;
7245 tree endvar = NULL_TREE;
7247 if (gimple_omp_for_combined_p (fd->for_stmt))
7249 tree clauses = gimple_omp_for_clauses (inner_stmt);
7250 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7251 gcc_assert (innerc);
7252 startvar = OMP_CLAUSE_DECL (innerc);
7253 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7254 OMP_CLAUSE__LOOPTEMP_);
7255 gcc_assert (innerc);
7256 endvar = OMP_CLAUSE_DECL (innerc);
7258 t = fold_convert (TREE_TYPE (startvar), n1);
7259 t = force_gimple_operand_gsi (&gsi, t,
7260 DECL_P (startvar)
7261 && TREE_ADDRESSABLE (startvar),
7262 NULL_TREE, false, GSI_CONTINUE_LINKING);
7263 gimple *assign_stmt = gimple_build_assign (startvar, t);
7264 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7266 t = fold_convert (TREE_TYPE (startvar), n2);
7267 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7268 false, GSI_CONTINUE_LINKING);
7269 if (endvar)
7271 assign_stmt = gimple_build_assign (endvar, e);
7272 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7273 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7274 assign_stmt = gimple_build_assign (fd->loop.v, e);
7275 else
7276 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7277 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7280 tree *nonrect_bounds = NULL;
7281 if (fd->collapse > 1)
7283 if (fd->non_rect)
7285 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7286 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7288 gcc_assert (gsi_bb (gsi) == entry_bb);
7289 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7290 startvar);
7291 entry_bb = gsi_bb (gsi);
7294 if (!broken_loop)
7296 /* The code controlling the sequential loop replaces the
7297 GIMPLE_OMP_CONTINUE. */
7298 gsi = gsi_last_nondebug_bb (cont_bb);
7299 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7300 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7301 vmain = gimple_omp_continue_control_use (cont_stmt);
7302 vback = gimple_omp_continue_control_def (cont_stmt);
7304 if (!gimple_omp_for_combined_p (fd->for_stmt))
7306 if (POINTER_TYPE_P (type))
7307 t = fold_build_pointer_plus (vmain, step);
7308 else
7309 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7310 t = force_gimple_operand_gsi (&gsi, t,
7311 DECL_P (vback)
7312 && TREE_ADDRESSABLE (vback),
7313 NULL_TREE, true, GSI_SAME_STMT);
7314 assign_stmt = gimple_build_assign (vback, t);
7315 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7317 t = build2 (fd->loop.cond_code, boolean_type_node,
7318 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7319 ? t : vback, e);
7320 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7323 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7324 gsi_remove (&gsi, true);
7326 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7327 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7328 cont_bb, body_bb);
7331 /* Remove the GIMPLE_OMP_FOR statement. */
7332 gsi = gsi_for_stmt (fd->for_stmt);
7333 gsi_remove (&gsi, true);
7335 /* Remove the GIMPLE_OMP_RETURN statement. */
7336 gsi = gsi_last_nondebug_bb (exit_bb);
7337 gsi_remove (&gsi, true);
7339 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7340 if (!broken_loop)
7341 remove_edge (BRANCH_EDGE (entry_bb));
7342 else
7344 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7345 region->outer->cont = NULL;
7348 /* Connect all the blocks. */
7349 if (!broken_loop)
7351 ep = find_edge (cont_bb, body_bb);
7352 if (gimple_omp_for_combined_p (fd->for_stmt))
7354 remove_edge (ep);
7355 ep = NULL;
7357 else if (fd->collapse > 1)
7359 remove_edge (ep);
7360 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7362 else
7363 ep->flags = EDGE_TRUE_VALUE;
7364 find_edge (cont_bb, fin_bb)->flags
7365 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7368 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7369 recompute_dominator (CDI_DOMINATORS, body_bb));
7370 if (!broken_loop)
7371 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7372 recompute_dominator (CDI_DOMINATORS, fin_bb));
7374 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7376 class loop *loop = alloc_loop ();
7377 loop->header = body_bb;
7378 if (collapse_bb == NULL)
7379 loop->latch = cont_bb;
7380 add_loop (loop, body_bb->loop_father);
7384 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7385 partitioned loop. The lowering here is abstracted, in that the
7386 loop parameters are passed through internal functions, which are
7387 further lowered by oacc_device_lower, once we get to the target
7388 compiler. The loop is of the form:
7390 for (V = B; V LTGT E; V += S) {BODY}
7392 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7393 (constant 0 for no chunking) and we will have a GWV partitioning
7394 mask, specifying dimensions over which the loop is to be
7395 partitioned (see note below). We generate code that looks like
7396 (this ignores tiling):
7398 <entry_bb> [incoming FALL->body, BRANCH->exit]
7399 typedef signedintify (typeof (V)) T; // underlying signed integral type
7400 T range = E - B;
7401 T chunk_no = 0;
7402 T DIR = LTGT == '<' ? +1 : -1;
7403 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7404 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7406 <head_bb> [created by splitting end of entry_bb]
7407 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7408 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7409 if (!(offset LTGT bound)) goto bottom_bb;
7411 <body_bb> [incoming]
7412 V = B + offset;
7413 {BODY}
7415 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7416 offset += step;
7417 if (offset LTGT bound) goto body_bb; [*]
7419 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7420 chunk_no++;
7421 if (chunk < chunk_max) goto head_bb;
7423 <exit_bb> [incoming]
7424 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7426 [*] Needed if V live at end of loop. */
7428 static void
7429 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7431 bool is_oacc_kernels_parallelized
7432 = (lookup_attribute ("oacc kernels parallelized",
7433 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7435 bool is_oacc_kernels
7436 = (lookup_attribute ("oacc kernels",
7437 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7438 if (is_oacc_kernels_parallelized)
7439 gcc_checking_assert (is_oacc_kernels);
7441 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7442 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7443 for SSA specifics, and some are for 'parloops' OpenACC
7444 'kernels'-parallelized specifics. */
7446 tree v = fd->loop.v;
7447 enum tree_code cond_code = fd->loop.cond_code;
7448 enum tree_code plus_code = PLUS_EXPR;
7450 tree chunk_size = integer_minus_one_node;
7451 tree gwv = integer_zero_node;
7452 tree iter_type = TREE_TYPE (v);
7453 tree diff_type = iter_type;
7454 tree plus_type = iter_type;
7455 struct oacc_collapse *counts = NULL;
7457 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7458 == GF_OMP_FOR_KIND_OACC_LOOP);
7459 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7460 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7462 if (POINTER_TYPE_P (iter_type))
7464 plus_code = POINTER_PLUS_EXPR;
7465 plus_type = sizetype;
7467 for (int ix = fd->collapse; ix--;)
7469 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7470 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7471 diff_type = diff_type2;
7473 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7474 diff_type = signed_type_for (diff_type);
7475 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7476 diff_type = integer_type_node;
7478 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7479 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7480 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7481 basic_block bottom_bb = NULL;
7483 /* entry_bb has two successors; the branch edge is to the exit
7484 block, fallthrough edge to body. */
7485 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7486 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7488 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7489 body_bb, or to a block whose only successor is the body_bb. Its
7490 fallthrough successor is the final block (same as the branch
7491 successor of the entry_bb). */
7492 if (cont_bb)
7494 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7495 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7497 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7498 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7500 else
7501 gcc_assert (!gimple_in_ssa_p (cfun));
7503 /* The exit block only has entry_bb and cont_bb as predecessors. */
7504 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7506 tree chunk_no;
7507 tree chunk_max = NULL_TREE;
7508 tree bound, offset;
7509 tree step = create_tmp_var (diff_type, ".step");
7510 bool up = cond_code == LT_EXPR;
7511 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7512 bool chunking = !gimple_in_ssa_p (cfun);
7513 bool negating;
7515 /* Tiling vars. */
7516 tree tile_size = NULL_TREE;
7517 tree element_s = NULL_TREE;
7518 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7519 basic_block elem_body_bb = NULL;
7520 basic_block elem_cont_bb = NULL;
7522 /* SSA instances. */
7523 tree offset_incr = NULL_TREE;
7524 tree offset_init = NULL_TREE;
7526 gimple_stmt_iterator gsi;
7527 gassign *ass;
7528 gcall *call;
7529 gimple *stmt;
7530 tree expr;
7531 location_t loc;
7532 edge split, be, fte;
7534 /* Split the end of entry_bb to create head_bb. */
7535 split = split_block (entry_bb, last_stmt (entry_bb));
7536 basic_block head_bb = split->dest;
7537 entry_bb = split->src;
7539 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7540 gsi = gsi_last_nondebug_bb (entry_bb);
7541 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7542 loc = gimple_location (for_stmt);
7544 if (gimple_in_ssa_p (cfun))
7546 offset_init = gimple_omp_for_index (for_stmt, 0);
7547 gcc_assert (integer_zerop (fd->loop.n1));
7548 /* The SSA parallelizer does gang parallelism. */
7549 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7552 if (fd->collapse > 1 || fd->tiling)
7554 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7555 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7556 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7557 TREE_TYPE (fd->loop.n2), loc);
7559 if (SSA_VAR_P (fd->loop.n2))
7561 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7562 true, GSI_SAME_STMT);
7563 ass = gimple_build_assign (fd->loop.n2, total);
7564 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7568 tree b = fd->loop.n1;
7569 tree e = fd->loop.n2;
7570 tree s = fd->loop.step;
7572 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7573 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7575 /* Convert the step, avoiding possible unsigned->signed overflow. */
7576 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7577 if (negating)
7578 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7579 s = fold_convert (diff_type, s);
7580 if (negating)
7581 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7582 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7584 if (!chunking)
7585 chunk_size = integer_zero_node;
7586 expr = fold_convert (diff_type, chunk_size);
7587 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7588 NULL_TREE, true, GSI_SAME_STMT);
7590 if (fd->tiling)
7592 /* Determine the tile size and element step,
7593 modify the outer loop step size. */
7594 tile_size = create_tmp_var (diff_type, ".tile_size");
7595 expr = build_int_cst (diff_type, 1);
7596 for (int ix = 0; ix < fd->collapse; ix++)
7597 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7598 expr = force_gimple_operand_gsi (&gsi, expr, true,
7599 NULL_TREE, true, GSI_SAME_STMT);
7600 ass = gimple_build_assign (tile_size, expr);
7601 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7603 element_s = create_tmp_var (diff_type, ".element_s");
7604 ass = gimple_build_assign (element_s, s);
7605 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7607 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7608 s = force_gimple_operand_gsi (&gsi, expr, true,
7609 NULL_TREE, true, GSI_SAME_STMT);
7612 /* Determine the range, avoiding possible unsigned->signed overflow. */
7613 negating = !up && TYPE_UNSIGNED (iter_type);
7614 expr = fold_build2 (MINUS_EXPR, plus_type,
7615 fold_convert (plus_type, negating ? b : e),
7616 fold_convert (plus_type, negating ? e : b));
7617 expr = fold_convert (diff_type, expr);
7618 if (negating)
7619 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7620 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7621 NULL_TREE, true, GSI_SAME_STMT);
7623 chunk_no = build_int_cst (diff_type, 0);
7624 if (chunking)
7626 gcc_assert (!gimple_in_ssa_p (cfun));
7628 expr = chunk_no;
7629 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7630 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7632 ass = gimple_build_assign (chunk_no, expr);
7633 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7635 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7636 build_int_cst (integer_type_node,
7637 IFN_GOACC_LOOP_CHUNKS),
7638 dir, range, s, chunk_size, gwv);
7639 gimple_call_set_lhs (call, chunk_max);
7640 gimple_set_location (call, loc);
7641 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7643 else
7644 chunk_size = chunk_no;
7646 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7647 build_int_cst (integer_type_node,
7648 IFN_GOACC_LOOP_STEP),
7649 dir, range, s, chunk_size, gwv);
7650 gimple_call_set_lhs (call, step);
7651 gimple_set_location (call, loc);
7652 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7654 /* Remove the GIMPLE_OMP_FOR. */
7655 gsi_remove (&gsi, true);
7657 /* Fixup edges from head_bb. */
7658 be = BRANCH_EDGE (head_bb);
7659 fte = FALLTHRU_EDGE (head_bb);
7660 be->flags |= EDGE_FALSE_VALUE;
7661 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7663 basic_block body_bb = fte->dest;
7665 if (gimple_in_ssa_p (cfun))
7667 gsi = gsi_last_nondebug_bb (cont_bb);
7668 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7670 offset = gimple_omp_continue_control_use (cont_stmt);
7671 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7673 else
7675 offset = create_tmp_var (diff_type, ".offset");
7676 offset_init = offset_incr = offset;
7678 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7680 /* Loop offset & bound go into head_bb. */
7681 gsi = gsi_start_bb (head_bb);
7683 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7684 build_int_cst (integer_type_node,
7685 IFN_GOACC_LOOP_OFFSET),
7686 dir, range, s,
7687 chunk_size, gwv, chunk_no);
7688 gimple_call_set_lhs (call, offset_init);
7689 gimple_set_location (call, loc);
7690 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7692 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7693 build_int_cst (integer_type_node,
7694 IFN_GOACC_LOOP_BOUND),
7695 dir, range, s,
7696 chunk_size, gwv, offset_init);
7697 gimple_call_set_lhs (call, bound);
7698 gimple_set_location (call, loc);
7699 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7701 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7702 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7703 GSI_CONTINUE_LINKING);
7705 /* V assignment goes into body_bb. */
7706 if (!gimple_in_ssa_p (cfun))
7708 gsi = gsi_start_bb (body_bb);
7710 expr = build2 (plus_code, iter_type, b,
7711 fold_convert (plus_type, offset));
7712 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7713 true, GSI_SAME_STMT);
7714 ass = gimple_build_assign (v, expr);
7715 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7717 if (fd->collapse > 1 || fd->tiling)
7718 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7720 if (fd->tiling)
7722 /* Determine the range of the element loop -- usually simply
7723 the tile_size, but could be smaller if the final
7724 iteration of the outer loop is a partial tile. */
7725 tree e_range = create_tmp_var (diff_type, ".e_range");
7727 expr = build2 (MIN_EXPR, diff_type,
7728 build2 (MINUS_EXPR, diff_type, bound, offset),
7729 build2 (MULT_EXPR, diff_type, tile_size,
7730 element_s));
7731 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7732 true, GSI_SAME_STMT);
7733 ass = gimple_build_assign (e_range, expr);
7734 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7736 /* Determine bound, offset & step of inner loop. */
7737 e_bound = create_tmp_var (diff_type, ".e_bound");
7738 e_offset = create_tmp_var (diff_type, ".e_offset");
7739 e_step = create_tmp_var (diff_type, ".e_step");
7741 /* Mark these as element loops. */
7742 tree t, e_gwv = integer_minus_one_node;
7743 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7745 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7746 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7747 element_s, chunk, e_gwv, chunk);
7748 gimple_call_set_lhs (call, e_offset);
7749 gimple_set_location (call, loc);
7750 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7752 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7753 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7754 element_s, chunk, e_gwv, e_offset);
7755 gimple_call_set_lhs (call, e_bound);
7756 gimple_set_location (call, loc);
7757 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7759 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7760 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7761 element_s, chunk, e_gwv);
7762 gimple_call_set_lhs (call, e_step);
7763 gimple_set_location (call, loc);
7764 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7766 /* Add test and split block. */
7767 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7768 stmt = gimple_build_cond_empty (expr);
7769 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7770 split = split_block (body_bb, stmt);
7771 elem_body_bb = split->dest;
7772 if (cont_bb == body_bb)
7773 cont_bb = elem_body_bb;
7774 body_bb = split->src;
7776 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7778 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7779 if (cont_bb == NULL)
7781 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7782 e->probability = profile_probability::even ();
7783 split->probability = profile_probability::even ();
7786 /* Initialize the user's loop vars. */
7787 gsi = gsi_start_bb (elem_body_bb);
7788 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7789 diff_type);
7793 /* Loop increment goes into cont_bb. If this is not a loop, we
7794 will have spawned threads as if it was, and each one will
7795 execute one iteration. The specification is not explicit about
7796 whether such constructs are ill-formed or not, and they can
7797 occur, especially when noreturn routines are involved. */
7798 if (cont_bb)
7800 gsi = gsi_last_nondebug_bb (cont_bb);
7801 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7802 loc = gimple_location (cont_stmt);
7804 if (fd->tiling)
7806 /* Insert element loop increment and test. */
7807 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7808 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7809 true, GSI_SAME_STMT);
7810 ass = gimple_build_assign (e_offset, expr);
7811 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7812 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7814 stmt = gimple_build_cond_empty (expr);
7815 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7816 split = split_block (cont_bb, stmt);
7817 elem_cont_bb = split->src;
7818 cont_bb = split->dest;
7820 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7821 split->probability = profile_probability::unlikely ().guessed ();
7822 edge latch_edge
7823 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7824 latch_edge->probability = profile_probability::likely ().guessed ();
7826 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7827 skip_edge->probability = profile_probability::unlikely ().guessed ();
7828 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7829 loop_entry_edge->probability
7830 = profile_probability::likely ().guessed ();
7832 gsi = gsi_for_stmt (cont_stmt);
7835 /* Increment offset. */
7836 if (gimple_in_ssa_p (cfun))
7837 expr = build2 (plus_code, iter_type, offset,
7838 fold_convert (plus_type, step));
7839 else
7840 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7841 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7842 true, GSI_SAME_STMT);
7843 ass = gimple_build_assign (offset_incr, expr);
7844 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7845 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7846 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7848 /* Remove the GIMPLE_OMP_CONTINUE. */
7849 gsi_remove (&gsi, true);
7851 /* Fixup edges from cont_bb. */
7852 be = BRANCH_EDGE (cont_bb);
7853 fte = FALLTHRU_EDGE (cont_bb);
7854 be->flags |= EDGE_TRUE_VALUE;
7855 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7857 if (chunking)
7859 /* Split the beginning of exit_bb to make bottom_bb. We
7860 need to insert a nop at the start, because splitting is
7861 after a stmt, not before. */
7862 gsi = gsi_start_bb (exit_bb);
7863 stmt = gimple_build_nop ();
7864 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7865 split = split_block (exit_bb, stmt);
7866 bottom_bb = split->src;
7867 exit_bb = split->dest;
7868 gsi = gsi_last_bb (bottom_bb);
7870 /* Chunk increment and test goes into bottom_bb. */
7871 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7872 build_int_cst (diff_type, 1));
7873 ass = gimple_build_assign (chunk_no, expr);
7874 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7876 /* Chunk test at end of bottom_bb. */
7877 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7878 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7879 GSI_CONTINUE_LINKING);
7881 /* Fixup edges from bottom_bb. */
7882 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7883 split->probability = profile_probability::unlikely ().guessed ();
7884 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7885 latch_edge->probability = profile_probability::likely ().guessed ();
7889 gsi = gsi_last_nondebug_bb (exit_bb);
7890 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7891 loc = gimple_location (gsi_stmt (gsi));
7893 if (!gimple_in_ssa_p (cfun))
7895 /* Insert the final value of V, in case it is live. This is the
7896 value for the only thread that survives past the join. */
7897 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7898 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7899 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7900 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7901 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7902 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7903 true, GSI_SAME_STMT);
7904 ass = gimple_build_assign (v, expr);
7905 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7908 /* Remove the OMP_RETURN. */
7909 gsi_remove (&gsi, true);
7911 if (cont_bb)
7913 /* We now have one, two or three nested loops. Update the loop
7914 structures. */
7915 class loop *parent = entry_bb->loop_father;
7916 class loop *body = body_bb->loop_father;
7918 if (chunking)
7920 class loop *chunk_loop = alloc_loop ();
7921 chunk_loop->header = head_bb;
7922 chunk_loop->latch = bottom_bb;
7923 add_loop (chunk_loop, parent);
7924 parent = chunk_loop;
7926 else if (parent != body)
7928 gcc_assert (body->header == body_bb);
7929 gcc_assert (body->latch == cont_bb
7930 || single_pred (body->latch) == cont_bb);
7931 parent = NULL;
7934 if (parent)
7936 class loop *body_loop = alloc_loop ();
7937 body_loop->header = body_bb;
7938 body_loop->latch = cont_bb;
7939 add_loop (body_loop, parent);
7941 if (fd->tiling)
7943 /* Insert tiling's element loop. */
7944 class loop *inner_loop = alloc_loop ();
7945 inner_loop->header = elem_body_bb;
7946 inner_loop->latch = elem_cont_bb;
7947 add_loop (inner_loop, body_loop);
7953 /* Expand the OMP loop defined by REGION. */
7955 static void
7956 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7958 struct omp_for_data fd;
7959 struct omp_for_data_loop *loops;
7961 loops = XALLOCAVEC (struct omp_for_data_loop,
7962 gimple_omp_for_collapse (last_stmt (region->entry)));
7963 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7964 &fd, loops);
7965 region->sched_kind = fd.sched_kind;
7966 region->sched_modifiers = fd.sched_modifiers;
7967 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7968 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7970 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7971 if ((loops[i].m1 || loops[i].m2)
7972 && (loops[i].m1 == NULL_TREE
7973 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7974 && (loops[i].m2 == NULL_TREE
7975 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7976 && TREE_CODE (loops[i].step) == INTEGER_CST
7977 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7979 tree t;
7980 tree itype = TREE_TYPE (loops[i].v);
7981 if (loops[i].m1 && loops[i].m2)
7982 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7983 else if (loops[i].m1)
7984 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7985 else
7986 t = loops[i].m2;
7987 t = fold_build2 (MULT_EXPR, itype, t,
7988 fold_convert (itype,
7989 loops[i - loops[i].outer].step));
7990 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7991 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7992 fold_build1 (NEGATE_EXPR, itype, t),
7993 fold_build1 (NEGATE_EXPR, itype,
7994 fold_convert (itype,
7995 loops[i].step)));
7996 else
7997 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7998 fold_convert (itype, loops[i].step));
7999 if (integer_nonzerop (t))
8000 error_at (gimple_location (fd.for_stmt),
8001 "invalid OpenMP non-rectangular loop step; "
8002 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8003 "step %qE",
8004 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8005 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8006 loops[i - loops[i].outer].step, i + 1,
8007 loops[i].step);
8011 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8012 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8013 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8014 if (region->cont)
8016 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8017 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8018 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8020 else
8021 /* If there isn't a continue then this is a degerate case where
8022 the introduction of abnormal edges during lowering will prevent
8023 original loops from being detected. Fix that up. */
8024 loops_state_set (LOOPS_NEED_FIXUP);
8026 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8027 expand_omp_simd (region, &fd);
8028 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8030 gcc_assert (!inner_stmt && !fd.non_rect);
8031 expand_oacc_for (region, &fd);
8033 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8035 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8036 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8037 else
8038 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8040 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8041 && !fd.have_ordered)
8043 if (fd.chunk_size == NULL)
8044 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8045 else
8046 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8048 else
8050 int fn_index, start_ix, next_ix;
8051 unsigned HOST_WIDE_INT sched = 0;
8052 tree sched_arg = NULL_TREE;
8054 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8055 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8056 if (fd.chunk_size == NULL
8057 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8058 fd.chunk_size = integer_zero_node;
8059 switch (fd.sched_kind)
8061 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8062 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8063 && fd.lastprivate_conditional == 0)
8065 gcc_assert (!fd.have_ordered);
8066 fn_index = 6;
8067 sched = 4;
8069 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8070 && !fd.have_ordered
8071 && fd.lastprivate_conditional == 0)
8072 fn_index = 7;
8073 else
8075 fn_index = 3;
8076 sched = (HOST_WIDE_INT_1U << 31);
8078 break;
8079 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8080 case OMP_CLAUSE_SCHEDULE_GUIDED:
8081 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8082 && !fd.have_ordered
8083 && fd.lastprivate_conditional == 0)
8085 fn_index = 3 + fd.sched_kind;
8086 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8087 break;
8089 fn_index = fd.sched_kind;
8090 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8091 sched += (HOST_WIDE_INT_1U << 31);
8092 break;
8093 case OMP_CLAUSE_SCHEDULE_STATIC:
8094 gcc_assert (fd.have_ordered);
8095 fn_index = 0;
8096 sched = (HOST_WIDE_INT_1U << 31) + 1;
8097 break;
8098 default:
8099 gcc_unreachable ();
8101 if (!fd.ordered)
8102 fn_index += fd.have_ordered * 8;
8103 if (fd.ordered)
8104 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8105 else
8106 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8107 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8108 if (fd.have_reductemp || fd.have_pointer_condtemp)
8110 if (fd.ordered)
8111 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8112 else if (fd.have_ordered)
8113 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8114 else
8115 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8116 sched_arg = build_int_cstu (long_integer_type_node, sched);
8117 if (!fd.chunk_size)
8118 fd.chunk_size = integer_zero_node;
8120 if (fd.iter_type == long_long_unsigned_type_node)
8122 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8123 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8124 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8125 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8127 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8128 (enum built_in_function) next_ix, sched_arg,
8129 inner_stmt);
8132 if (gimple_in_ssa_p (cfun))
8133 update_ssa (TODO_update_ssa_only_virtuals);
8136 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8138 v = GOMP_sections_start (n);
8140 switch (v)
8142 case 0:
8143 goto L2;
8144 case 1:
8145 section 1;
8146 goto L1;
8147 case 2:
8149 case n:
8151 default:
8152 abort ();
8155 v = GOMP_sections_next ();
8156 goto L0;
8158 reduction;
8160 If this is a combined parallel sections, replace the call to
8161 GOMP_sections_start with call to GOMP_sections_next. */
8163 static void
8164 expand_omp_sections (struct omp_region *region)
8166 tree t, u, vin = NULL, vmain, vnext, l2;
8167 unsigned len;
8168 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8169 gimple_stmt_iterator si, switch_si;
8170 gomp_sections *sections_stmt;
8171 gimple *stmt;
8172 gomp_continue *cont;
8173 edge_iterator ei;
8174 edge e;
8175 struct omp_region *inner;
8176 unsigned i, casei;
8177 bool exit_reachable = region->cont != NULL;
8179 gcc_assert (region->exit != NULL);
8180 entry_bb = region->entry;
8181 l0_bb = single_succ (entry_bb);
8182 l1_bb = region->cont;
8183 l2_bb = region->exit;
8184 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8185 l2 = gimple_block_label (l2_bb);
8186 else
8188 /* This can happen if there are reductions. */
8189 len = EDGE_COUNT (l0_bb->succs);
8190 gcc_assert (len > 0);
8191 e = EDGE_SUCC (l0_bb, len - 1);
8192 si = gsi_last_nondebug_bb (e->dest);
8193 l2 = NULL_TREE;
8194 if (gsi_end_p (si)
8195 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8196 l2 = gimple_block_label (e->dest);
8197 else
8198 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8200 si = gsi_last_nondebug_bb (e->dest);
8201 if (gsi_end_p (si)
8202 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8204 l2 = gimple_block_label (e->dest);
8205 break;
8209 if (exit_reachable)
8210 default_bb = create_empty_bb (l1_bb->prev_bb);
8211 else
8212 default_bb = create_empty_bb (l0_bb);
8214 /* We will build a switch() with enough cases for all the
8215 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8216 and a default case to abort if something goes wrong. */
8217 len = EDGE_COUNT (l0_bb->succs);
8219 /* Use vec::quick_push on label_vec throughout, since we know the size
8220 in advance. */
8221 auto_vec<tree> label_vec (len);
8223 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8224 GIMPLE_OMP_SECTIONS statement. */
8225 si = gsi_last_nondebug_bb (entry_bb);
8226 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8227 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8228 vin = gimple_omp_sections_control (sections_stmt);
8229 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8230 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8231 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8232 tree cond_var = NULL_TREE;
8233 if (reductmp || condtmp)
8235 tree reductions = null_pointer_node, mem = null_pointer_node;
8236 tree memv = NULL_TREE, condtemp = NULL_TREE;
8237 gimple_stmt_iterator gsi = gsi_none ();
8238 gimple *g = NULL;
8239 if (reductmp)
8241 reductions = OMP_CLAUSE_DECL (reductmp);
8242 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8243 g = SSA_NAME_DEF_STMT (reductions);
8244 reductions = gimple_assign_rhs1 (g);
8245 OMP_CLAUSE_DECL (reductmp) = reductions;
8246 gsi = gsi_for_stmt (g);
8248 else
8249 gsi = si;
8250 if (condtmp)
8252 condtemp = OMP_CLAUSE_DECL (condtmp);
8253 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8254 OMP_CLAUSE__CONDTEMP_);
8255 cond_var = OMP_CLAUSE_DECL (c);
8256 tree type = TREE_TYPE (condtemp);
8257 memv = create_tmp_var (type);
8258 TREE_ADDRESSABLE (memv) = 1;
8259 unsigned cnt = 0;
8260 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8261 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8262 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8263 ++cnt;
8264 unsigned HOST_WIDE_INT sz
8265 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8266 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8267 false);
8268 mem = build_fold_addr_expr (memv);
8270 t = build_int_cst (unsigned_type_node, len - 1);
8271 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8272 stmt = gimple_build_call (u, 3, t, reductions, mem);
8273 gimple_call_set_lhs (stmt, vin);
8274 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8275 if (condtmp)
8277 expand_omp_build_assign (&gsi, condtemp, memv, false);
8278 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8279 vin, build_one_cst (TREE_TYPE (cond_var)));
8280 expand_omp_build_assign (&gsi, cond_var, t, false);
8282 if (reductmp)
8284 gsi_remove (&gsi, true);
8285 release_ssa_name (gimple_assign_lhs (g));
8288 else if (!is_combined_parallel (region))
8290 /* If we are not inside a combined parallel+sections region,
8291 call GOMP_sections_start. */
8292 t = build_int_cst (unsigned_type_node, len - 1);
8293 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8294 stmt = gimple_build_call (u, 1, t);
8296 else
8298 /* Otherwise, call GOMP_sections_next. */
8299 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8300 stmt = gimple_build_call (u, 0);
8302 if (!reductmp && !condtmp)
8304 gimple_call_set_lhs (stmt, vin);
8305 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8307 gsi_remove (&si, true);
8309 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8310 L0_BB. */
8311 switch_si = gsi_last_nondebug_bb (l0_bb);
8312 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8313 if (exit_reachable)
8315 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8316 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8317 vmain = gimple_omp_continue_control_use (cont);
8318 vnext = gimple_omp_continue_control_def (cont);
8320 else
8322 vmain = vin;
8323 vnext = NULL_TREE;
8326 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8327 label_vec.quick_push (t);
8328 i = 1;
8330 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8331 for (inner = region->inner, casei = 1;
8332 inner;
8333 inner = inner->next, i++, casei++)
8335 basic_block s_entry_bb, s_exit_bb;
8337 /* Skip optional reduction region. */
8338 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8340 --i;
8341 --casei;
8342 continue;
8345 s_entry_bb = inner->entry;
8346 s_exit_bb = inner->exit;
8348 t = gimple_block_label (s_entry_bb);
8349 u = build_int_cst (unsigned_type_node, casei);
8350 u = build_case_label (u, NULL, t);
8351 label_vec.quick_push (u);
8353 si = gsi_last_nondebug_bb (s_entry_bb);
8354 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8355 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8356 gsi_remove (&si, true);
8357 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8359 if (s_exit_bb == NULL)
8360 continue;
8362 si = gsi_last_nondebug_bb (s_exit_bb);
8363 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8364 gsi_remove (&si, true);
8366 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8369 /* Error handling code goes in DEFAULT_BB. */
8370 t = gimple_block_label (default_bb);
8371 u = build_case_label (NULL, NULL, t);
8372 make_edge (l0_bb, default_bb, 0);
8373 add_bb_to_loop (default_bb, current_loops->tree_root);
8375 stmt = gimple_build_switch (vmain, u, label_vec);
8376 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8377 gsi_remove (&switch_si, true);
8379 si = gsi_start_bb (default_bb);
8380 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8381 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8383 if (exit_reachable)
8385 tree bfn_decl;
8387 /* Code to get the next section goes in L1_BB. */
8388 si = gsi_last_nondebug_bb (l1_bb);
8389 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8391 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8392 stmt = gimple_build_call (bfn_decl, 0);
8393 gimple_call_set_lhs (stmt, vnext);
8394 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8395 if (cond_var)
8397 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8398 vnext, build_one_cst (TREE_TYPE (cond_var)));
8399 expand_omp_build_assign (&si, cond_var, t, false);
8401 gsi_remove (&si, true);
8403 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8406 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8407 si = gsi_last_nondebug_bb (l2_bb);
8408 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8409 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8410 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8411 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8412 else
8413 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8414 stmt = gimple_build_call (t, 0);
8415 if (gimple_omp_return_lhs (gsi_stmt (si)))
8416 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8417 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8418 gsi_remove (&si, true);
8420 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8423 /* Expand code for an OpenMP single or scope directive. We've already expanded
8424 much of the code, here we simply place the GOMP_barrier call. */
8426 static void
8427 expand_omp_single (struct omp_region *region)
8429 basic_block entry_bb, exit_bb;
8430 gimple_stmt_iterator si;
8432 entry_bb = region->entry;
8433 exit_bb = region->exit;
8435 si = gsi_last_nondebug_bb (entry_bb);
8436 enum gimple_code code = gimple_code (gsi_stmt (si));
8437 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8438 gsi_remove (&si, true);
8439 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8441 if (exit_bb == NULL)
8443 gcc_assert (code == GIMPLE_OMP_SCOPE);
8444 return;
8447 si = gsi_last_nondebug_bb (exit_bb);
8448 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8450 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8451 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8453 gsi_remove (&si, true);
8454 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8457 /* Generic expansion for OpenMP synchronization directives: master,
8458 ordered and critical. All we need to do here is remove the entry
8459 and exit markers for REGION. */
8461 static void
8462 expand_omp_synch (struct omp_region *region)
8464 basic_block entry_bb, exit_bb;
8465 gimple_stmt_iterator si;
8467 entry_bb = region->entry;
8468 exit_bb = region->exit;
8470 si = gsi_last_nondebug_bb (entry_bb);
8471 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8472 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8473 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8474 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8475 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8476 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8477 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8478 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8479 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8481 expand_omp_taskreg (region);
8482 return;
8484 gsi_remove (&si, true);
8485 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8487 if (exit_bb)
8489 si = gsi_last_nondebug_bb (exit_bb);
8490 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8491 gsi_remove (&si, true);
8492 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8496 /* Translate enum omp_memory_order to enum memmodel for the embedded
8497 fail clause in there. */
8499 static enum memmodel
8500 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8502 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8504 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8505 switch (mo & OMP_MEMORY_ORDER_MASK)
8507 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8508 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8509 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8510 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8511 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8512 default: break;
8514 gcc_unreachable ();
8515 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8516 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8517 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8518 default: gcc_unreachable ();
8522 /* Translate enum omp_memory_order to enum memmodel. The two enums
8523 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8524 is 0 and omp_memory_order has the fail mode encoded in it too. */
8526 static enum memmodel
8527 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8529 enum memmodel ret, fail_ret;
8530 switch (mo & OMP_MEMORY_ORDER_MASK)
8532 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8533 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8534 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8535 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8536 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8537 default: gcc_unreachable ();
8539 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8540 we can just return ret here unconditionally. Otherwise, work around
8541 it here and make sure fail memmodel is not stronger. */
8542 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8543 return ret;
8544 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8545 if (fail_ret > ret)
8546 return fail_ret;
8547 return ret;
8550 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8551 operation as a normal volatile load. */
8553 static bool
8554 expand_omp_atomic_load (basic_block load_bb, tree addr,
8555 tree loaded_val, int index)
8557 enum built_in_function tmpbase;
8558 gimple_stmt_iterator gsi;
8559 basic_block store_bb;
8560 location_t loc;
8561 gimple *stmt;
8562 tree decl, call, type, itype;
8564 gsi = gsi_last_nondebug_bb (load_bb);
8565 stmt = gsi_stmt (gsi);
8566 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8567 loc = gimple_location (stmt);
8569 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8570 is smaller than word size, then expand_atomic_load assumes that the load
8571 is atomic. We could avoid the builtin entirely in this case. */
8573 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8574 decl = builtin_decl_explicit (tmpbase);
8575 if (decl == NULL_TREE)
8576 return false;
8578 type = TREE_TYPE (loaded_val);
8579 itype = TREE_TYPE (TREE_TYPE (decl));
8581 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8582 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8583 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8584 if (!useless_type_conversion_p (type, itype))
8585 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8586 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8588 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8589 gsi_remove (&gsi, true);
8591 store_bb = single_succ (load_bb);
8592 gsi = gsi_last_nondebug_bb (store_bb);
8593 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8594 gsi_remove (&gsi, true);
8596 if (gimple_in_ssa_p (cfun))
8597 update_ssa (TODO_update_ssa_no_phi);
8599 return true;
8602 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8603 operation as a normal volatile store. */
8605 static bool
8606 expand_omp_atomic_store (basic_block load_bb, tree addr,
8607 tree loaded_val, tree stored_val, int index)
8609 enum built_in_function tmpbase;
8610 gimple_stmt_iterator gsi;
8611 basic_block store_bb = single_succ (load_bb);
8612 location_t loc;
8613 gimple *stmt;
8614 tree decl, call, type, itype;
8615 machine_mode imode;
8616 bool exchange;
8618 gsi = gsi_last_nondebug_bb (load_bb);
8619 stmt = gsi_stmt (gsi);
8620 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8622 /* If the load value is needed, then this isn't a store but an exchange. */
8623 exchange = gimple_omp_atomic_need_value_p (stmt);
8625 gsi = gsi_last_nondebug_bb (store_bb);
8626 stmt = gsi_stmt (gsi);
8627 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8628 loc = gimple_location (stmt);
8630 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8631 is smaller than word size, then expand_atomic_store assumes that the store
8632 is atomic. We could avoid the builtin entirely in this case. */
8634 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8635 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8636 decl = builtin_decl_explicit (tmpbase);
8637 if (decl == NULL_TREE)
8638 return false;
8640 type = TREE_TYPE (stored_val);
8642 /* Dig out the type of the function's second argument. */
8643 itype = TREE_TYPE (decl);
8644 itype = TYPE_ARG_TYPES (itype);
8645 itype = TREE_CHAIN (itype);
8646 itype = TREE_VALUE (itype);
8647 imode = TYPE_MODE (itype);
8649 if (exchange && !can_atomic_exchange_p (imode, true))
8650 return false;
8652 if (!useless_type_conversion_p (itype, type))
8653 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8654 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8655 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8656 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8657 if (exchange)
8659 if (!useless_type_conversion_p (type, itype))
8660 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8661 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8664 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8665 gsi_remove (&gsi, true);
8667 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8668 gsi = gsi_last_nondebug_bb (load_bb);
8669 gsi_remove (&gsi, true);
8671 if (gimple_in_ssa_p (cfun))
8672 update_ssa (TODO_update_ssa_no_phi);
8674 return true;
8677 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8678 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8679 size of the data type, and thus usable to find the index of the builtin
8680 decl. Returns false if the expression is not of the proper form. */
8682 static bool
8683 expand_omp_atomic_fetch_op (basic_block load_bb,
8684 tree addr, tree loaded_val,
8685 tree stored_val, int index)
8687 enum built_in_function oldbase, newbase, tmpbase;
8688 tree decl, itype, call;
8689 tree lhs, rhs;
8690 basic_block store_bb = single_succ (load_bb);
8691 gimple_stmt_iterator gsi;
8692 gimple *stmt;
8693 location_t loc;
8694 enum tree_code code;
8695 bool need_old, need_new;
8696 machine_mode imode;
8698 /* We expect to find the following sequences:
8700 load_bb:
8701 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8703 store_bb:
8704 val = tmp OP something; (or: something OP tmp)
8705 GIMPLE_OMP_STORE (val)
8707 ???FIXME: Allow a more flexible sequence.
8708 Perhaps use data flow to pick the statements.
8712 gsi = gsi_after_labels (store_bb);
8713 stmt = gsi_stmt (gsi);
8714 if (is_gimple_debug (stmt))
8716 gsi_next_nondebug (&gsi);
8717 if (gsi_end_p (gsi))
8718 return false;
8719 stmt = gsi_stmt (gsi);
8721 loc = gimple_location (stmt);
8722 if (!is_gimple_assign (stmt))
8723 return false;
8724 gsi_next_nondebug (&gsi);
8725 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8726 return false;
8727 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8728 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8729 enum omp_memory_order omo
8730 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8731 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8732 gcc_checking_assert (!need_old || !need_new);
8734 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8735 return false;
8737 /* Check for one of the supported fetch-op operations. */
8738 code = gimple_assign_rhs_code (stmt);
8739 switch (code)
8741 case PLUS_EXPR:
8742 case POINTER_PLUS_EXPR:
8743 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8744 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8745 break;
8746 case MINUS_EXPR:
8747 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8748 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8749 break;
8750 case BIT_AND_EXPR:
8751 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8752 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8753 break;
8754 case BIT_IOR_EXPR:
8755 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8756 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8757 break;
8758 case BIT_XOR_EXPR:
8759 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8760 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8761 break;
8762 default:
8763 return false;
8766 /* Make sure the expression is of the proper form. */
8767 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8768 rhs = gimple_assign_rhs2 (stmt);
8769 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8770 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8771 rhs = gimple_assign_rhs1 (stmt);
8772 else
8773 return false;
8775 tmpbase = ((enum built_in_function)
8776 ((need_new ? newbase : oldbase) + index + 1));
8777 decl = builtin_decl_explicit (tmpbase);
8778 if (decl == NULL_TREE)
8779 return false;
8780 itype = TREE_TYPE (TREE_TYPE (decl));
8781 imode = TYPE_MODE (itype);
8783 /* We could test all of the various optabs involved, but the fact of the
8784 matter is that (with the exception of i486 vs i586 and xadd) all targets
8785 that support any atomic operaton optab also implements compare-and-swap.
8786 Let optabs.c take care of expanding any compare-and-swap loop. */
8787 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8788 return false;
8790 gsi = gsi_last_nondebug_bb (load_bb);
8791 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8793 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8794 It only requires that the operation happen atomically. Thus we can
8795 use the RELAXED memory model. */
8796 call = build_call_expr_loc (loc, decl, 3, addr,
8797 fold_convert_loc (loc, itype, rhs),
8798 build_int_cst (NULL, mo));
8800 if (need_old || need_new)
8802 lhs = need_old ? loaded_val : stored_val;
8803 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8804 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8806 else
8807 call = fold_convert_loc (loc, void_type_node, call);
8808 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8809 gsi_remove (&gsi, true);
8811 gsi = gsi_last_nondebug_bb (store_bb);
8812 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8813 gsi_remove (&gsi, true);
8814 gsi = gsi_last_nondebug_bb (store_bb);
8815 stmt = gsi_stmt (gsi);
8816 gsi_remove (&gsi, true);
8818 if (gimple_in_ssa_p (cfun))
8820 release_defs (stmt);
8821 update_ssa (TODO_update_ssa_no_phi);
8824 return true;
8827 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8828 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8829 Returns false if the expression is not of the proper form. */
8831 static bool
8832 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8833 tree loaded_val, tree stored_val, int index)
8835 /* We expect to find the following sequences:
8837 load_bb:
8838 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8840 store_bb:
8841 val = tmp == e ? d : tmp;
8842 GIMPLE_OMP_ATOMIC_STORE (val)
8844 or in store_bb instead:
8845 tmp2 = tmp == e;
8846 val = tmp2 ? d : tmp;
8847 GIMPLE_OMP_ATOMIC_STORE (val)
8850 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8851 val = e == tmp3 ? d : tmp;
8852 GIMPLE_OMP_ATOMIC_STORE (val)
8854 etc. */
8857 basic_block store_bb = single_succ (load_bb);
8858 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8859 gimple *store_stmt = gsi_stmt (gsi);
8860 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8861 return false;
8862 gsi_prev_nondebug (&gsi);
8863 if (gsi_end_p (gsi))
8864 return false;
8865 gimple *condexpr_stmt = gsi_stmt (gsi);
8866 if (!is_gimple_assign (condexpr_stmt)
8867 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8868 return false;
8869 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8870 return false;
8871 gimple *cond_stmt = NULL;
8872 gimple *vce_stmt = NULL;
8873 gsi_prev_nondebug (&gsi);
8874 if (!gsi_end_p (gsi))
8876 cond_stmt = gsi_stmt (gsi);
8877 if (!is_gimple_assign (cond_stmt))
8878 return false;
8879 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8881 gsi_prev_nondebug (&gsi);
8882 if (!gsi_end_p (gsi))
8884 vce_stmt = gsi_stmt (gsi);
8885 if (!is_gimple_assign (vce_stmt)
8886 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8887 return false;
8890 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8891 std::swap (vce_stmt, cond_stmt);
8892 else
8893 return false;
8894 if (vce_stmt)
8896 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8897 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8898 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8899 return false;
8900 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8901 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8902 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8903 TYPE_SIZE (TREE_TYPE (loaded_val))))
8904 return false;
8905 gsi_prev_nondebug (&gsi);
8906 if (!gsi_end_p (gsi))
8907 return false;
8910 tree cond = gimple_assign_rhs1 (condexpr_stmt);
8911 tree cond_op1, cond_op2;
8912 if (cond_stmt)
8914 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8915 return false;
8916 cond_op1 = gimple_assign_rhs1 (cond_stmt);
8917 cond_op2 = gimple_assign_rhs2 (cond_stmt);
8919 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
8920 return false;
8921 else
8923 cond_op1 = TREE_OPERAND (cond, 0);
8924 cond_op2 = TREE_OPERAND (cond, 1);
8926 tree d;
8927 if (TREE_CODE (cond) == NE_EXPR)
8929 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
8930 return false;
8931 d = gimple_assign_rhs3 (condexpr_stmt);
8933 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
8934 return false;
8935 else
8936 d = gimple_assign_rhs2 (condexpr_stmt);
8937 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
8938 if (operand_equal_p (e, cond_op1))
8939 e = cond_op2;
8940 else if (operand_equal_p (e, cond_op2))
8941 e = cond_op1;
8942 else
8943 return false;
8945 location_t loc = gimple_location (store_stmt);
8946 gimple *load_stmt = last_stmt (load_bb);
8947 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
8948 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
8949 bool weak = gimple_omp_atomic_weak_p (load_stmt);
8950 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
8951 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8952 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
8953 gcc_checking_assert (!need_old || !need_new);
8955 enum built_in_function fncode
8956 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8957 + index + 1);
8958 tree cmpxchg = builtin_decl_explicit (fncode);
8959 if (cmpxchg == NULL_TREE)
8960 return false;
8961 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8963 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8964 || !can_atomic_load_p (TYPE_MODE (itype)))
8965 return false;
8967 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8968 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
8969 return false;
8971 gsi = gsi_for_stmt (store_stmt);
8972 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
8974 tree ne = create_tmp_reg (itype);
8975 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
8976 gimple_set_location (g, loc);
8977 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8978 e = ne;
8980 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
8982 tree nd = create_tmp_reg (itype);
8983 enum tree_code code;
8984 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
8986 code = VIEW_CONVERT_EXPR;
8987 d = build1 (VIEW_CONVERT_EXPR, itype, d);
8989 else
8990 code = NOP_EXPR;
8991 gimple *g = gimple_build_assign (nd, code, d);
8992 gimple_set_location (g, loc);
8993 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8994 d = nd;
8997 tree ctype = build_complex_type (itype);
8998 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
8999 gimple *g
9000 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9001 build_int_cst (integer_type_node, flag),
9002 mo, fmo);
9003 tree cres = create_tmp_reg (ctype);
9004 gimple_call_set_lhs (g, cres);
9005 gimple_set_location (g, loc);
9006 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9008 if (cond_stmt || need_old || need_new)
9010 tree im = create_tmp_reg (itype);
9011 g = gimple_build_assign (im, IMAGPART_EXPR,
9012 build1 (IMAGPART_EXPR, itype, cres));
9013 gimple_set_location (g, loc);
9014 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9016 tree re = NULL_TREE;
9017 if (need_old || need_new)
9019 re = create_tmp_reg (itype);
9020 g = gimple_build_assign (re, REALPART_EXPR,
9021 build1 (REALPART_EXPR, itype, cres));
9022 gimple_set_location (g, loc);
9023 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9026 if (cond_stmt)
9028 g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9029 NOP_EXPR, im);
9030 gimple_set_location (g, loc);
9031 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9033 else if (need_new)
9035 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9036 build2 (NE_EXPR, boolean_type_node,
9037 im, build_zero_cst (itype)),
9038 d, re);
9039 gimple_set_location (g, loc);
9040 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9041 re = gimple_assign_lhs (g);
9044 if (need_old || need_new)
9046 tree v = need_old ? loaded_val : stored_val;
9047 enum tree_code code;
9048 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9050 code = VIEW_CONVERT_EXPR;
9051 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9053 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9054 code = NOP_EXPR;
9055 else
9056 code = TREE_CODE (re);
9057 g = gimple_build_assign (v, code, re);
9058 gimple_set_location (g, loc);
9059 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9063 gsi_remove (&gsi, true);
9064 gsi = gsi_for_stmt (load_stmt);
9065 gsi_remove (&gsi, true);
9066 gsi = gsi_for_stmt (condexpr_stmt);
9067 gsi_remove (&gsi, true);
9068 if (cond_stmt)
9070 gsi = gsi_for_stmt (cond_stmt);
9071 gsi_remove (&gsi, true);
9073 if (vce_stmt)
9075 gsi = gsi_for_stmt (vce_stmt);
9076 gsi_remove (&gsi, true);
9079 return true;
9082 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9084 oldval = *addr;
9085 repeat:
9086 newval = rhs; // with oldval replacing *addr in rhs
9087 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9088 if (oldval != newval)
9089 goto repeat;
9091 INDEX is log2 of the size of the data type, and thus usable to find the
9092 index of the builtin decl. */
9094 static bool
9095 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9096 tree addr, tree loaded_val, tree stored_val,
9097 int index)
9099 tree loadedi, storedi, initial, new_storedi, old_vali;
9100 tree type, itype, cmpxchg, iaddr, atype;
9101 gimple_stmt_iterator si;
9102 basic_block loop_header = single_succ (load_bb);
9103 gimple *phi, *stmt;
9104 edge e;
9105 enum built_in_function fncode;
9107 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9108 + index + 1);
9109 cmpxchg = builtin_decl_explicit (fncode);
9110 if (cmpxchg == NULL_TREE)
9111 return false;
9112 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9113 atype = type;
9114 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9116 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9117 || !can_atomic_load_p (TYPE_MODE (itype)))
9118 return false;
9120 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9121 si = gsi_last_nondebug_bb (load_bb);
9122 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9123 location_t loc = gimple_location (gsi_stmt (si));
9124 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9125 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9126 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9128 /* For floating-point values, we'll need to view-convert them to integers
9129 so that we can perform the atomic compare and swap. Simplify the
9130 following code by always setting up the "i"ntegral variables. */
9131 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9133 tree iaddr_val;
9135 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9136 true));
9137 atype = itype;
9138 iaddr_val
9139 = force_gimple_operand_gsi (&si,
9140 fold_convert (TREE_TYPE (iaddr), addr),
9141 false, NULL_TREE, true, GSI_SAME_STMT);
9142 stmt = gimple_build_assign (iaddr, iaddr_val);
9143 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9144 loadedi = create_tmp_var (itype);
9145 if (gimple_in_ssa_p (cfun))
9146 loadedi = make_ssa_name (loadedi);
9148 else
9150 iaddr = addr;
9151 loadedi = loaded_val;
9154 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9155 tree loaddecl = builtin_decl_explicit (fncode);
9156 if (loaddecl)
9157 initial
9158 = fold_convert (atype,
9159 build_call_expr (loaddecl, 2, iaddr,
9160 build_int_cst (NULL_TREE,
9161 MEMMODEL_RELAXED)));
9162 else
9164 tree off
9165 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9166 true), 0);
9167 initial = build2 (MEM_REF, atype, iaddr, off);
9170 initial
9171 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9172 GSI_SAME_STMT);
9174 /* Move the value to the LOADEDI temporary. */
9175 if (gimple_in_ssa_p (cfun))
9177 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9178 phi = create_phi_node (loadedi, loop_header);
9179 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9180 initial);
9182 else
9183 gsi_insert_before (&si,
9184 gimple_build_assign (loadedi, initial),
9185 GSI_SAME_STMT);
9186 if (loadedi != loaded_val)
9188 gimple_stmt_iterator gsi2;
9189 tree x;
9191 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9192 gsi2 = gsi_start_bb (loop_header);
9193 if (gimple_in_ssa_p (cfun))
9195 gassign *stmt;
9196 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9197 true, GSI_SAME_STMT);
9198 stmt = gimple_build_assign (loaded_val, x);
9199 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9201 else
9203 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9204 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9205 true, GSI_SAME_STMT);
9208 gsi_remove (&si, true);
9210 si = gsi_last_nondebug_bb (store_bb);
9211 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9213 if (iaddr == addr)
9214 storedi = stored_val;
9215 else
9216 storedi
9217 = force_gimple_operand_gsi (&si,
9218 build1 (VIEW_CONVERT_EXPR, itype,
9219 stored_val), true, NULL_TREE, true,
9220 GSI_SAME_STMT);
9222 /* Build the compare&swap statement. */
9223 tree ctype = build_complex_type (itype);
9224 int flag = int_size_in_bytes (itype);
9225 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9226 ctype, 6, iaddr, loadedi,
9227 storedi,
9228 build_int_cst (integer_type_node,
9229 flag),
9230 mo, fmo);
9231 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9232 new_storedi = force_gimple_operand_gsi (&si,
9233 fold_convert (TREE_TYPE (loadedi),
9234 new_storedi),
9235 true, NULL_TREE,
9236 true, GSI_SAME_STMT);
9238 if (gimple_in_ssa_p (cfun))
9239 old_vali = loadedi;
9240 else
9242 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9243 stmt = gimple_build_assign (old_vali, loadedi);
9244 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9246 stmt = gimple_build_assign (loadedi, new_storedi);
9247 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9250 /* Note that we always perform the comparison as an integer, even for
9251 floating point. This allows the atomic operation to properly
9252 succeed even with NaNs and -0.0. */
9253 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9254 stmt = gimple_build_cond_empty (ne);
9255 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9257 /* Update cfg. */
9258 e = single_succ_edge (store_bb);
9259 e->flags &= ~EDGE_FALLTHRU;
9260 e->flags |= EDGE_FALSE_VALUE;
9261 /* Expect no looping. */
9262 e->probability = profile_probability::guessed_always ();
9264 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9265 e->probability = profile_probability::guessed_never ();
9267 /* Copy the new value to loadedi (we already did that before the condition
9268 if we are not in SSA). */
9269 if (gimple_in_ssa_p (cfun))
9271 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9272 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9275 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9276 gsi_remove (&si, true);
9278 class loop *loop = alloc_loop ();
9279 loop->header = loop_header;
9280 loop->latch = store_bb;
9281 add_loop (loop, loop_header->loop_father);
9283 if (gimple_in_ssa_p (cfun))
9284 update_ssa (TODO_update_ssa_no_phi);
9286 return true;
9289 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9291 GOMP_atomic_start ();
9292 *addr = rhs;
9293 GOMP_atomic_end ();
9295 The result is not globally atomic, but works so long as all parallel
9296 references are within #pragma omp atomic directives. According to
9297 responses received from omp@openmp.org, appears to be within spec.
9298 Which makes sense, since that's how several other compilers handle
9299 this situation as well.
9300 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9301 expanding. STORED_VAL is the operand of the matching
9302 GIMPLE_OMP_ATOMIC_STORE.
9304 We replace
9305 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9306 loaded_val = *addr;
9308 and replace
9309 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9310 *addr = stored_val;
9313 static bool
9314 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9315 tree addr, tree loaded_val, tree stored_val)
9317 gimple_stmt_iterator si;
9318 gassign *stmt;
9319 tree t;
9321 si = gsi_last_nondebug_bb (load_bb);
9322 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9324 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9325 t = build_call_expr (t, 0);
9326 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9328 tree mem = build_simple_mem_ref (addr);
9329 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9330 TREE_OPERAND (mem, 1)
9331 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9332 true),
9333 TREE_OPERAND (mem, 1));
9334 stmt = gimple_build_assign (loaded_val, mem);
9335 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9336 gsi_remove (&si, true);
9338 si = gsi_last_nondebug_bb (store_bb);
9339 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9341 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9342 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9344 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9345 t = build_call_expr (t, 0);
9346 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9347 gsi_remove (&si, true);
9349 if (gimple_in_ssa_p (cfun))
9350 update_ssa (TODO_update_ssa_no_phi);
9351 return true;
9354 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9355 using expand_omp_atomic_fetch_op. If it failed, we try to
9356 call expand_omp_atomic_pipeline, and if it fails too, the
9357 ultimate fallback is wrapping the operation in a mutex
9358 (expand_omp_atomic_mutex). REGION is the atomic region built
9359 by build_omp_regions_1(). */
9361 static void
9362 expand_omp_atomic (struct omp_region *region)
9364 basic_block load_bb = region->entry, store_bb = region->exit;
9365 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9366 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9367 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9368 tree addr = gimple_omp_atomic_load_rhs (load);
9369 tree stored_val = gimple_omp_atomic_store_val (store);
9370 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9371 HOST_WIDE_INT index;
9373 /* Make sure the type is one of the supported sizes. */
9374 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9375 index = exact_log2 (index);
9376 if (index >= 0 && index <= 4)
9378 unsigned int align = TYPE_ALIGN_UNIT (type);
9380 /* __sync builtins require strict data alignment. */
9381 if (exact_log2 (align) >= index)
9383 /* Atomic load. */
9384 scalar_mode smode;
9385 if (loaded_val == stored_val
9386 && (is_int_mode (TYPE_MODE (type), &smode)
9387 || is_float_mode (TYPE_MODE (type), &smode))
9388 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9389 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9390 return;
9392 /* Atomic store. */
9393 if ((is_int_mode (TYPE_MODE (type), &smode)
9394 || is_float_mode (TYPE_MODE (type), &smode))
9395 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9396 && store_bb == single_succ (load_bb)
9397 && first_stmt (store_bb) == store
9398 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9399 stored_val, index))
9400 return;
9402 /* When possible, use specialized atomic update functions. */
9403 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9404 && store_bb == single_succ (load_bb)
9405 && expand_omp_atomic_fetch_op (load_bb, addr,
9406 loaded_val, stored_val, index))
9407 return;
9409 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9410 if (store_bb == single_succ (load_bb)
9411 && !gimple_in_ssa_p (cfun)
9412 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9413 index))
9414 return;
9416 /* If we don't have specialized __sync builtins, try and implement
9417 as a compare and swap loop. */
9418 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9419 loaded_val, stored_val, index))
9420 return;
9424 /* The ultimate fallback is wrapping the operation in a mutex. */
9425 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9428 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9429 at REGION_EXIT. */
9431 static void
9432 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9433 basic_block region_exit)
9435 class loop *outer = region_entry->loop_father;
9436 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9438 /* Don't parallelize the kernels region if it contains more than one outer
9439 loop. */
9440 unsigned int nr_outer_loops = 0;
9441 class loop *single_outer = NULL;
9442 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9444 gcc_assert (loop_outer (loop) == outer);
9446 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9447 continue;
9449 if (region_exit != NULL
9450 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9451 continue;
9453 nr_outer_loops++;
9454 single_outer = loop;
9456 if (nr_outer_loops != 1)
9457 return;
9459 for (class loop *loop = single_outer->inner;
9460 loop != NULL;
9461 loop = loop->inner)
9462 if (loop->next)
9463 return;
9465 /* Mark the loops in the region. */
9466 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9467 loop->in_oacc_kernels_region = true;
9470 /* Build target argument identifier from the DEVICE identifier, value
9471 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9473 static tree
9474 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9476 tree t = build_int_cst (integer_type_node, device);
9477 if (subseqent_param)
9478 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9479 build_int_cst (integer_type_node,
9480 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9481 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9482 build_int_cst (integer_type_node, id));
9483 return t;
9486 /* Like above but return it in type that can be directly stored as an element
9487 of the argument array. */
9489 static tree
9490 get_target_argument_identifier (int device, bool subseqent_param, int id)
9492 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9493 return fold_convert (ptr_type_node, t);
9496 /* Return a target argument consisting of DEVICE identifier, value identifier
9497 ID, and the actual VALUE. */
9499 static tree
9500 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9501 tree value)
9503 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9504 fold_convert (integer_type_node, value),
9505 build_int_cst (unsigned_type_node,
9506 GOMP_TARGET_ARG_VALUE_SHIFT));
9507 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9508 get_target_argument_identifier_1 (device, false, id));
9509 t = fold_convert (ptr_type_node, t);
9510 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9513 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9514 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9515 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9516 arguments. */
9518 static void
9519 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9520 int id, tree value, vec <tree> *args)
9522 if (tree_fits_shwi_p (value)
9523 && tree_to_shwi (value) > -(1 << 15)
9524 && tree_to_shwi (value) < (1 << 15))
9525 args->quick_push (get_target_argument_value (gsi, device, id, value));
9526 else
9528 args->quick_push (get_target_argument_identifier (device, true, id));
9529 value = fold_convert (ptr_type_node, value);
9530 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9531 GSI_SAME_STMT);
9532 args->quick_push (value);
9536 /* Create an array of arguments that is then passed to GOMP_target. */
9538 static tree
9539 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9541 auto_vec <tree, 6> args;
9542 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9543 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9544 if (c)
9545 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9546 else
9547 t = integer_minus_one_node;
9548 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9549 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9551 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9552 if (c)
9553 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9554 else
9555 t = integer_minus_one_node;
9556 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9557 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9558 &args);
9560 /* Produce more, perhaps device specific, arguments here. */
9562 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9563 args.length () + 1),
9564 ".omp_target_args");
9565 for (unsigned i = 0; i < args.length (); i++)
9567 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9568 build_int_cst (integer_type_node, i),
9569 NULL_TREE, NULL_TREE);
9570 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9571 GSI_SAME_STMT);
9573 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9574 build_int_cst (integer_type_node, args.length ()),
9575 NULL_TREE, NULL_TREE);
9576 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9577 GSI_SAME_STMT);
9578 TREE_ADDRESSABLE (argarray) = 1;
9579 return build_fold_addr_expr (argarray);
9582 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9584 static void
9585 expand_omp_target (struct omp_region *region)
9587 basic_block entry_bb, exit_bb, new_bb;
9588 struct function *child_cfun;
9589 tree child_fn, block, t;
9590 gimple_stmt_iterator gsi;
9591 gomp_target *entry_stmt;
9592 gimple *stmt;
9593 edge e;
9594 bool offloaded;
9595 int target_kind;
9597 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9598 target_kind = gimple_omp_target_kind (entry_stmt);
9599 new_bb = region->entry;
9601 offloaded = is_gimple_omp_offloaded (entry_stmt);
9602 switch (target_kind)
9604 case GF_OMP_TARGET_KIND_REGION:
9605 case GF_OMP_TARGET_KIND_UPDATE:
9606 case GF_OMP_TARGET_KIND_ENTER_DATA:
9607 case GF_OMP_TARGET_KIND_EXIT_DATA:
9608 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9609 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9610 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9611 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9612 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9613 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9614 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9615 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9616 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9617 case GF_OMP_TARGET_KIND_DATA:
9618 case GF_OMP_TARGET_KIND_OACC_DATA:
9619 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9620 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9621 break;
9622 default:
9623 gcc_unreachable ();
9626 child_fn = NULL_TREE;
9627 child_cfun = NULL;
9628 if (offloaded)
9630 child_fn = gimple_omp_target_child_fn (entry_stmt);
9631 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9634 /* Supported by expand_omp_taskreg, but not here. */
9635 if (child_cfun != NULL)
9636 gcc_checking_assert (!child_cfun->cfg);
9637 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9639 entry_bb = region->entry;
9640 exit_bb = region->exit;
9642 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9643 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9645 /* Going on, all OpenACC compute constructs are mapped to
9646 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9647 To distinguish between them, we attach attributes. */
9648 switch (target_kind)
9650 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9651 DECL_ATTRIBUTES (child_fn)
9652 = tree_cons (get_identifier ("oacc parallel"),
9653 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9654 break;
9655 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9656 DECL_ATTRIBUTES (child_fn)
9657 = tree_cons (get_identifier ("oacc kernels"),
9658 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9659 break;
9660 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9661 DECL_ATTRIBUTES (child_fn)
9662 = tree_cons (get_identifier ("oacc serial"),
9663 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9664 break;
9665 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9666 DECL_ATTRIBUTES (child_fn)
9667 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9668 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9669 break;
9670 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9671 DECL_ATTRIBUTES (child_fn)
9672 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9673 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9674 break;
9675 default:
9676 /* Make sure we don't miss any. */
9677 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9678 && is_gimple_omp_offloaded (entry_stmt)));
9679 break;
9682 if (offloaded)
9684 unsigned srcidx, dstidx, num;
9686 /* If the offloading region needs data sent from the parent
9687 function, then the very first statement (except possible
9688 tree profile counter updates) of the offloading body
9689 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9690 &.OMP_DATA_O is passed as an argument to the child function,
9691 we need to replace it with the argument as seen by the child
9692 function.
9694 In most cases, this will end up being the identity assignment
9695 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9696 a function call that has been inlined, the original PARM_DECL
9697 .OMP_DATA_I may have been converted into a different local
9698 variable. In which case, we need to keep the assignment. */
9699 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9700 if (data_arg)
9702 basic_block entry_succ_bb = single_succ (entry_bb);
9703 gimple_stmt_iterator gsi;
9704 tree arg;
9705 gimple *tgtcopy_stmt = NULL;
9706 tree sender = TREE_VEC_ELT (data_arg, 0);
9708 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9710 gcc_assert (!gsi_end_p (gsi));
9711 stmt = gsi_stmt (gsi);
9712 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9713 continue;
9715 if (gimple_num_ops (stmt) == 2)
9717 tree arg = gimple_assign_rhs1 (stmt);
9719 /* We're ignoring the subcode because we're
9720 effectively doing a STRIP_NOPS. */
9722 if (TREE_CODE (arg) == ADDR_EXPR
9723 && TREE_OPERAND (arg, 0) == sender)
9725 tgtcopy_stmt = stmt;
9726 break;
9731 gcc_assert (tgtcopy_stmt != NULL);
9732 arg = DECL_ARGUMENTS (child_fn);
9734 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9735 gsi_remove (&gsi, true);
9738 /* Declare local variables needed in CHILD_CFUN. */
9739 block = DECL_INITIAL (child_fn);
9740 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9741 /* The gimplifier could record temporaries in the offloading block
9742 rather than in containing function's local_decls chain,
9743 which would mean cgraph missed finalizing them. Do it now. */
9744 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9745 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9746 varpool_node::finalize_decl (t);
9747 DECL_SAVED_TREE (child_fn) = NULL;
9748 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9749 gimple_set_body (child_fn, NULL);
9750 TREE_USED (block) = 1;
9752 /* Reset DECL_CONTEXT on function arguments. */
9753 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9754 DECL_CONTEXT (t) = child_fn;
9756 /* Split ENTRY_BB at GIMPLE_*,
9757 so that it can be moved to the child function. */
9758 gsi = gsi_last_nondebug_bb (entry_bb);
9759 stmt = gsi_stmt (gsi);
9760 gcc_assert (stmt
9761 && gimple_code (stmt) == gimple_code (entry_stmt));
9762 e = split_block (entry_bb, stmt);
9763 gsi_remove (&gsi, true);
9764 entry_bb = e->dest;
9765 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9767 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9768 if (exit_bb)
9770 gsi = gsi_last_nondebug_bb (exit_bb);
9771 gcc_assert (!gsi_end_p (gsi)
9772 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9773 stmt = gimple_build_return (NULL);
9774 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9775 gsi_remove (&gsi, true);
9778 /* Move the offloading region into CHILD_CFUN. */
9780 block = gimple_block (entry_stmt);
9782 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9783 if (exit_bb)
9784 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9785 /* When the OMP expansion process cannot guarantee an up-to-date
9786 loop tree arrange for the child function to fixup loops. */
9787 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9788 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9790 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9791 num = vec_safe_length (child_cfun->local_decls);
9792 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9794 t = (*child_cfun->local_decls)[srcidx];
9795 if (DECL_CONTEXT (t) == cfun->decl)
9796 continue;
9797 if (srcidx != dstidx)
9798 (*child_cfun->local_decls)[dstidx] = t;
9799 dstidx++;
9801 if (dstidx != num)
9802 vec_safe_truncate (child_cfun->local_decls, dstidx);
9804 /* Inform the callgraph about the new function. */
9805 child_cfun->curr_properties = cfun->curr_properties;
9806 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9807 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9808 cgraph_node *node = cgraph_node::get_create (child_fn);
9809 node->parallelized_function = 1;
9810 cgraph_node::add_new_function (child_fn, true);
9812 /* Add the new function to the offload table. */
9813 if (ENABLE_OFFLOADING)
9815 if (in_lto_p)
9816 DECL_PRESERVE_P (child_fn) = 1;
9817 vec_safe_push (offload_funcs, child_fn);
9820 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9821 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9823 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9824 fixed in a following pass. */
9825 push_cfun (child_cfun);
9826 if (need_asm)
9827 assign_assembler_name_if_needed (child_fn);
9828 cgraph_edge::rebuild_edges ();
9830 /* Some EH regions might become dead, see PR34608. If
9831 pass_cleanup_cfg isn't the first pass to happen with the
9832 new child, these dead EH edges might cause problems.
9833 Clean them up now. */
9834 if (flag_exceptions)
9836 basic_block bb;
9837 bool changed = false;
9839 FOR_EACH_BB_FN (bb, cfun)
9840 changed |= gimple_purge_dead_eh_edges (bb);
9841 if (changed)
9842 cleanup_tree_cfg ();
9844 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9845 verify_loop_structure ();
9846 pop_cfun ();
9848 if (dump_file && !gimple_in_ssa_p (cfun))
9850 omp_any_child_fn_dumped = true;
9851 dump_function_header (dump_file, child_fn, dump_flags);
9852 dump_function_to_file (child_fn, dump_file, dump_flags);
9855 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9858 /* Emit a library call to launch the offloading region, or do data
9859 transfers. */
9860 tree t1, t2, t3, t4, depend, c, clauses;
9861 enum built_in_function start_ix;
9862 unsigned int flags_i = 0;
9864 switch (gimple_omp_target_kind (entry_stmt))
9866 case GF_OMP_TARGET_KIND_REGION:
9867 start_ix = BUILT_IN_GOMP_TARGET;
9868 break;
9869 case GF_OMP_TARGET_KIND_DATA:
9870 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9871 break;
9872 case GF_OMP_TARGET_KIND_UPDATE:
9873 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9874 break;
9875 case GF_OMP_TARGET_KIND_ENTER_DATA:
9876 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9877 break;
9878 case GF_OMP_TARGET_KIND_EXIT_DATA:
9879 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9880 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9881 break;
9882 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9883 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9884 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9885 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9886 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9887 start_ix = BUILT_IN_GOACC_PARALLEL;
9888 break;
9889 case GF_OMP_TARGET_KIND_OACC_DATA:
9890 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9891 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9892 start_ix = BUILT_IN_GOACC_DATA_START;
9893 break;
9894 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9895 start_ix = BUILT_IN_GOACC_UPDATE;
9896 break;
9897 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9898 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9899 break;
9900 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9901 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9902 break;
9903 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9904 start_ix = BUILT_IN_GOACC_DECLARE;
9905 break;
9906 default:
9907 gcc_unreachable ();
9910 clauses = gimple_omp_target_clauses (entry_stmt);
9912 tree device = NULL_TREE;
9913 location_t device_loc = UNKNOWN_LOCATION;
9914 tree goacc_flags = NULL_TREE;
9915 if (is_gimple_omp_oacc (entry_stmt))
9917 /* By default, no GOACC_FLAGs are set. */
9918 goacc_flags = integer_zero_node;
9920 else
9922 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9923 if (c)
9925 device = OMP_CLAUSE_DEVICE_ID (c);
9926 device_loc = OMP_CLAUSE_LOCATION (c);
9927 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
9928 sorry_at (device_loc, "%<ancestor%> not yet supported");
9930 else
9932 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9933 library choose). */
9934 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9935 device_loc = gimple_location (entry_stmt);
9938 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9939 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
9940 nowait doesn't appear. */
9941 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
9942 c = NULL;
9943 if (c)
9944 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9947 /* By default, there is no conditional. */
9948 tree cond = NULL_TREE;
9949 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9950 if (c)
9951 cond = OMP_CLAUSE_IF_EXPR (c);
9952 /* If we found the clause 'if (cond)', build:
9953 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9954 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9955 if (cond)
9957 tree *tp;
9958 if (is_gimple_omp_oacc (entry_stmt))
9959 tp = &goacc_flags;
9960 else
9962 /* Ensure 'device' is of the correct type. */
9963 device = fold_convert_loc (device_loc, integer_type_node, device);
9965 tp = &device;
9968 cond = gimple_boolify (cond);
9970 basic_block cond_bb, then_bb, else_bb;
9971 edge e;
9972 tree tmp_var;
9974 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9975 if (offloaded)
9976 e = split_block_after_labels (new_bb);
9977 else
9979 gsi = gsi_last_nondebug_bb (new_bb);
9980 gsi_prev (&gsi);
9981 e = split_block (new_bb, gsi_stmt (gsi));
9983 cond_bb = e->src;
9984 new_bb = e->dest;
9985 remove_edge (e);
9987 then_bb = create_empty_bb (cond_bb);
9988 else_bb = create_empty_bb (then_bb);
9989 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9990 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9992 stmt = gimple_build_cond_empty (cond);
9993 gsi = gsi_last_bb (cond_bb);
9994 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9996 gsi = gsi_start_bb (then_bb);
9997 stmt = gimple_build_assign (tmp_var, *tp);
9998 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10000 gsi = gsi_start_bb (else_bb);
10001 if (is_gimple_omp_oacc (entry_stmt))
10002 stmt = gimple_build_assign (tmp_var,
10003 BIT_IOR_EXPR,
10004 *tp,
10005 build_int_cst (integer_type_node,
10006 GOACC_FLAG_HOST_FALLBACK));
10007 else
10008 stmt = gimple_build_assign (tmp_var,
10009 build_int_cst (integer_type_node,
10010 GOMP_DEVICE_HOST_FALLBACK));
10011 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10013 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10014 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10015 add_bb_to_loop (then_bb, cond_bb->loop_father);
10016 add_bb_to_loop (else_bb, cond_bb->loop_father);
10017 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10018 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10020 *tp = tmp_var;
10022 gsi = gsi_last_nondebug_bb (new_bb);
10024 else
10026 gsi = gsi_last_nondebug_bb (new_bb);
10028 if (device != NULL_TREE)
10029 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10030 true, GSI_SAME_STMT);
10033 t = gimple_omp_target_data_arg (entry_stmt);
10034 if (t == NULL)
10036 t1 = size_zero_node;
10037 t2 = build_zero_cst (ptr_type_node);
10038 t3 = t2;
10039 t4 = t2;
10041 else
10043 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10044 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10045 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10046 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10047 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10050 gimple *g;
10051 bool tagging = false;
10052 /* The maximum number used by any start_ix, without varargs. */
10053 auto_vec<tree, 11> args;
10054 if (is_gimple_omp_oacc (entry_stmt))
10056 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10057 TREE_TYPE (goacc_flags), goacc_flags);
10058 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10059 NULL_TREE, true,
10060 GSI_SAME_STMT);
10061 args.quick_push (goacc_flags_m);
10063 else
10064 args.quick_push (device);
10065 if (offloaded)
10066 args.quick_push (build_fold_addr_expr (child_fn));
10067 args.quick_push (t1);
10068 args.quick_push (t2);
10069 args.quick_push (t3);
10070 args.quick_push (t4);
10071 switch (start_ix)
10073 case BUILT_IN_GOACC_DATA_START:
10074 case BUILT_IN_GOACC_DECLARE:
10075 case BUILT_IN_GOMP_TARGET_DATA:
10076 break;
10077 case BUILT_IN_GOMP_TARGET:
10078 case BUILT_IN_GOMP_TARGET_UPDATE:
10079 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10080 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10081 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10082 if (c)
10083 depend = OMP_CLAUSE_DECL (c);
10084 else
10085 depend = build_int_cst (ptr_type_node, 0);
10086 args.quick_push (depend);
10087 if (start_ix == BUILT_IN_GOMP_TARGET)
10088 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10089 break;
10090 case BUILT_IN_GOACC_PARALLEL:
10091 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10093 tree dims = NULL_TREE;
10094 unsigned int ix;
10096 /* For serial constructs we set all dimensions to 1. */
10097 for (ix = GOMP_DIM_MAX; ix--;)
10098 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10099 oacc_replace_fn_attrib (child_fn, dims);
10101 else
10102 oacc_set_fn_attrib (child_fn, clauses, &args);
10103 tagging = true;
10104 /* FALLTHRU */
10105 case BUILT_IN_GOACC_ENTER_DATA:
10106 case BUILT_IN_GOACC_EXIT_DATA:
10107 case BUILT_IN_GOACC_UPDATE:
10109 tree t_async = NULL_TREE;
10111 /* If present, use the value specified by the respective
10112 clause, making sure that is of the correct type. */
10113 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10114 if (c)
10115 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10116 integer_type_node,
10117 OMP_CLAUSE_ASYNC_EXPR (c));
10118 else if (!tagging)
10119 /* Default values for t_async. */
10120 t_async = fold_convert_loc (gimple_location (entry_stmt),
10121 integer_type_node,
10122 build_int_cst (integer_type_node,
10123 GOMP_ASYNC_SYNC));
10124 if (tagging && t_async)
10126 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10128 if (TREE_CODE (t_async) == INTEGER_CST)
10130 /* See if we can pack the async arg in to the tag's
10131 operand. */
10132 i_async = TREE_INT_CST_LOW (t_async);
10133 if (i_async < GOMP_LAUNCH_OP_MAX)
10134 t_async = NULL_TREE;
10135 else
10136 i_async = GOMP_LAUNCH_OP_MAX;
10138 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10139 i_async));
10141 if (t_async)
10142 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10143 NULL_TREE, true,
10144 GSI_SAME_STMT));
10146 /* Save the argument index, and ... */
10147 unsigned t_wait_idx = args.length ();
10148 unsigned num_waits = 0;
10149 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10150 if (!tagging || c)
10151 /* ... push a placeholder. */
10152 args.safe_push (integer_zero_node);
10154 for (; c; c = OMP_CLAUSE_CHAIN (c))
10155 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10157 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10158 integer_type_node,
10159 OMP_CLAUSE_WAIT_EXPR (c));
10160 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10161 GSI_SAME_STMT);
10162 args.safe_push (arg);
10163 num_waits++;
10166 if (!tagging || num_waits)
10168 tree len;
10170 /* Now that we know the number, update the placeholder. */
10171 if (tagging)
10172 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10173 else
10174 len = build_int_cst (integer_type_node, num_waits);
10175 len = fold_convert_loc (gimple_location (entry_stmt),
10176 unsigned_type_node, len);
10177 args[t_wait_idx] = len;
10180 break;
10181 default:
10182 gcc_unreachable ();
10184 if (tagging)
10185 /* Push terminal marker - zero. */
10186 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10188 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10189 gimple_set_location (g, gimple_location (entry_stmt));
10190 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10191 if (!offloaded)
10193 g = gsi_stmt (gsi);
10194 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10195 gsi_remove (&gsi, true);
10199 /* Expand the parallel region tree rooted at REGION. Expansion
10200 proceeds in depth-first order. Innermost regions are expanded
10201 first. This way, parallel regions that require a new function to
10202 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10203 internal dependencies in their body. */
10205 static void
10206 expand_omp (struct omp_region *region)
10208 omp_any_child_fn_dumped = false;
10209 while (region)
10211 location_t saved_location;
10212 gimple *inner_stmt = NULL;
10214 /* First, determine whether this is a combined parallel+workshare
10215 region. */
10216 if (region->type == GIMPLE_OMP_PARALLEL)
10217 determine_parallel_type (region);
10219 if (region->type == GIMPLE_OMP_FOR
10220 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10221 inner_stmt = last_stmt (region->inner->entry);
10223 if (region->inner)
10224 expand_omp (region->inner);
10226 saved_location = input_location;
10227 if (gimple_has_location (last_stmt (region->entry)))
10228 input_location = gimple_location (last_stmt (region->entry));
10230 switch (region->type)
10232 case GIMPLE_OMP_PARALLEL:
10233 case GIMPLE_OMP_TASK:
10234 expand_omp_taskreg (region);
10235 break;
10237 case GIMPLE_OMP_FOR:
10238 expand_omp_for (region, inner_stmt);
10239 break;
10241 case GIMPLE_OMP_SECTIONS:
10242 expand_omp_sections (region);
10243 break;
10245 case GIMPLE_OMP_SECTION:
10246 /* Individual omp sections are handled together with their
10247 parent GIMPLE_OMP_SECTIONS region. */
10248 break;
10250 case GIMPLE_OMP_SINGLE:
10251 case GIMPLE_OMP_SCOPE:
10252 expand_omp_single (region);
10253 break;
10255 case GIMPLE_OMP_ORDERED:
10257 gomp_ordered *ord_stmt
10258 = as_a <gomp_ordered *> (last_stmt (region->entry));
10259 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10260 OMP_CLAUSE_DEPEND))
10262 /* We'll expand these when expanding corresponding
10263 worksharing region with ordered(n) clause. */
10264 gcc_assert (region->outer
10265 && region->outer->type == GIMPLE_OMP_FOR);
10266 region->ord_stmt = ord_stmt;
10267 break;
10270 /* FALLTHRU */
10271 case GIMPLE_OMP_MASTER:
10272 case GIMPLE_OMP_MASKED:
10273 case GIMPLE_OMP_TASKGROUP:
10274 case GIMPLE_OMP_CRITICAL:
10275 case GIMPLE_OMP_TEAMS:
10276 expand_omp_synch (region);
10277 break;
10279 case GIMPLE_OMP_ATOMIC_LOAD:
10280 expand_omp_atomic (region);
10281 break;
10283 case GIMPLE_OMP_TARGET:
10284 expand_omp_target (region);
10285 break;
10287 default:
10288 gcc_unreachable ();
10291 input_location = saved_location;
10292 region = region->next;
10294 if (omp_any_child_fn_dumped)
10296 if (dump_file)
10297 dump_function_header (dump_file, current_function_decl, dump_flags);
10298 omp_any_child_fn_dumped = false;
10302 /* Helper for build_omp_regions. Scan the dominator tree starting at
10303 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10304 true, the function ends once a single tree is built (otherwise, whole
10305 forest of OMP constructs may be built). */
10307 static void
10308 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10309 bool single_tree)
10311 gimple_stmt_iterator gsi;
10312 gimple *stmt;
10313 basic_block son;
10315 gsi = gsi_last_nondebug_bb (bb);
10316 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10318 struct omp_region *region;
10319 enum gimple_code code;
10321 stmt = gsi_stmt (gsi);
10322 code = gimple_code (stmt);
10323 if (code == GIMPLE_OMP_RETURN)
10325 /* STMT is the return point out of region PARENT. Mark it
10326 as the exit point and make PARENT the immediately
10327 enclosing region. */
10328 gcc_assert (parent);
10329 region = parent;
10330 region->exit = bb;
10331 parent = parent->outer;
10333 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10335 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10336 GIMPLE_OMP_RETURN, but matches with
10337 GIMPLE_OMP_ATOMIC_LOAD. */
10338 gcc_assert (parent);
10339 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10340 region = parent;
10341 region->exit = bb;
10342 parent = parent->outer;
10344 else if (code == GIMPLE_OMP_CONTINUE)
10346 gcc_assert (parent);
10347 parent->cont = bb;
10349 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10351 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10352 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10354 else
10356 region = new_omp_region (bb, code, parent);
10357 /* Otherwise... */
10358 if (code == GIMPLE_OMP_TARGET)
10360 switch (gimple_omp_target_kind (stmt))
10362 case GF_OMP_TARGET_KIND_REGION:
10363 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10364 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10365 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10366 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10367 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10368 break;
10369 case GF_OMP_TARGET_KIND_UPDATE:
10370 case GF_OMP_TARGET_KIND_ENTER_DATA:
10371 case GF_OMP_TARGET_KIND_EXIT_DATA:
10372 case GF_OMP_TARGET_KIND_DATA:
10373 case GF_OMP_TARGET_KIND_OACC_DATA:
10374 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10375 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10376 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10377 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10378 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10379 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10380 /* ..., other than for those stand-alone directives... */
10381 region = NULL;
10382 break;
10383 default:
10384 gcc_unreachable ();
10387 else if (code == GIMPLE_OMP_ORDERED
10388 && omp_find_clause (gimple_omp_ordered_clauses
10389 (as_a <gomp_ordered *> (stmt)),
10390 OMP_CLAUSE_DEPEND))
10391 /* #pragma omp ordered depend is also just a stand-alone
10392 directive. */
10393 region = NULL;
10394 else if (code == GIMPLE_OMP_TASK
10395 && gimple_omp_task_taskwait_p (stmt))
10396 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10397 region = NULL;
10398 /* ..., this directive becomes the parent for a new region. */
10399 if (region)
10400 parent = region;
10404 if (single_tree && !parent)
10405 return;
10407 for (son = first_dom_son (CDI_DOMINATORS, bb);
10408 son;
10409 son = next_dom_son (CDI_DOMINATORS, son))
10410 build_omp_regions_1 (son, parent, single_tree);
10413 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10414 root_omp_region. */
10416 static void
10417 build_omp_regions_root (basic_block root)
10419 gcc_assert (root_omp_region == NULL);
10420 build_omp_regions_1 (root, NULL, true);
10421 gcc_assert (root_omp_region != NULL);
10424 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10426 void
10427 omp_expand_local (basic_block head)
10429 build_omp_regions_root (head);
10430 if (dump_file && (dump_flags & TDF_DETAILS))
10432 fprintf (dump_file, "\nOMP region tree\n\n");
10433 dump_omp_region (dump_file, root_omp_region, 0);
10434 fprintf (dump_file, "\n");
10437 remove_exit_barriers (root_omp_region);
10438 expand_omp (root_omp_region);
10440 omp_free_regions ();
10443 /* Scan the CFG and build a tree of OMP regions. Return the root of
10444 the OMP region tree. */
10446 static void
10447 build_omp_regions (void)
10449 gcc_assert (root_omp_region == NULL);
10450 calculate_dominance_info (CDI_DOMINATORS);
10451 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10454 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10456 static unsigned int
10457 execute_expand_omp (void)
10459 build_omp_regions ();
10461 if (!root_omp_region)
10462 return 0;
10464 if (dump_file)
10466 fprintf (dump_file, "\nOMP region tree\n\n");
10467 dump_omp_region (dump_file, root_omp_region, 0);
10468 fprintf (dump_file, "\n");
10471 remove_exit_barriers (root_omp_region);
10473 expand_omp (root_omp_region);
10475 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10476 verify_loop_structure ();
10477 cleanup_tree_cfg ();
10479 omp_free_regions ();
10481 return 0;
10484 /* OMP expansion -- the default pass, run before creation of SSA form. */
10486 namespace {
10488 const pass_data pass_data_expand_omp =
10490 GIMPLE_PASS, /* type */
10491 "ompexp", /* name */
10492 OPTGROUP_OMP, /* optinfo_flags */
10493 TV_NONE, /* tv_id */
10494 PROP_gimple_any, /* properties_required */
10495 PROP_gimple_eomp, /* properties_provided */
10496 0, /* properties_destroyed */
10497 0, /* todo_flags_start */
10498 0, /* todo_flags_finish */
10501 class pass_expand_omp : public gimple_opt_pass
10503 public:
10504 pass_expand_omp (gcc::context *ctxt)
10505 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10508 /* opt_pass methods: */
10509 virtual unsigned int execute (function *)
10511 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10512 || flag_openmp_simd != 0)
10513 && !seen_error ());
10515 /* This pass always runs, to provide PROP_gimple_eomp.
10516 But often, there is nothing to do. */
10517 if (!gate)
10518 return 0;
10520 return execute_expand_omp ();
10523 }; // class pass_expand_omp
10525 } // anon namespace
10527 gimple_opt_pass *
10528 make_pass_expand_omp (gcc::context *ctxt)
10530 return new pass_expand_omp (ctxt);
10533 namespace {
10535 const pass_data pass_data_expand_omp_ssa =
10537 GIMPLE_PASS, /* type */
10538 "ompexpssa", /* name */
10539 OPTGROUP_OMP, /* optinfo_flags */
10540 TV_NONE, /* tv_id */
10541 PROP_cfg | PROP_ssa, /* properties_required */
10542 PROP_gimple_eomp, /* properties_provided */
10543 0, /* properties_destroyed */
10544 0, /* todo_flags_start */
10545 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10548 class pass_expand_omp_ssa : public gimple_opt_pass
10550 public:
10551 pass_expand_omp_ssa (gcc::context *ctxt)
10552 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10555 /* opt_pass methods: */
10556 virtual bool gate (function *fun)
10558 return !(fun->curr_properties & PROP_gimple_eomp);
10560 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10561 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10563 }; // class pass_expand_omp_ssa
10565 } // anon namespace
10567 gimple_opt_pass *
10568 make_pass_expand_omp_ssa (gcc::context *ctxt)
10570 return new pass_expand_omp_ssa (ctxt);
10573 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10574 GIMPLE_* codes. */
10576 bool
10577 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10578 int *region_idx)
10580 gimple *last = last_stmt (bb);
10581 enum gimple_code code = gimple_code (last);
10582 struct omp_region *cur_region = *region;
10583 bool fallthru = false;
10585 switch (code)
10587 case GIMPLE_OMP_PARALLEL:
10588 case GIMPLE_OMP_FOR:
10589 case GIMPLE_OMP_SINGLE:
10590 case GIMPLE_OMP_TEAMS:
10591 case GIMPLE_OMP_MASTER:
10592 case GIMPLE_OMP_MASKED:
10593 case GIMPLE_OMP_SCOPE:
10594 case GIMPLE_OMP_TASKGROUP:
10595 case GIMPLE_OMP_CRITICAL:
10596 case GIMPLE_OMP_SECTION:
10597 cur_region = new_omp_region (bb, code, cur_region);
10598 fallthru = true;
10599 break;
10601 case GIMPLE_OMP_TASK:
10602 cur_region = new_omp_region (bb, code, cur_region);
10603 fallthru = true;
10604 if (gimple_omp_task_taskwait_p (last))
10605 cur_region = cur_region->outer;
10606 break;
10608 case GIMPLE_OMP_ORDERED:
10609 cur_region = new_omp_region (bb, code, cur_region);
10610 fallthru = true;
10611 if (omp_find_clause (gimple_omp_ordered_clauses
10612 (as_a <gomp_ordered *> (last)),
10613 OMP_CLAUSE_DEPEND))
10614 cur_region = cur_region->outer;
10615 break;
10617 case GIMPLE_OMP_TARGET:
10618 cur_region = new_omp_region (bb, code, cur_region);
10619 fallthru = true;
10620 switch (gimple_omp_target_kind (last))
10622 case GF_OMP_TARGET_KIND_REGION:
10623 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10624 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10625 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10626 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10627 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10628 break;
10629 case GF_OMP_TARGET_KIND_UPDATE:
10630 case GF_OMP_TARGET_KIND_ENTER_DATA:
10631 case GF_OMP_TARGET_KIND_EXIT_DATA:
10632 case GF_OMP_TARGET_KIND_DATA:
10633 case GF_OMP_TARGET_KIND_OACC_DATA:
10634 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10635 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10636 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10637 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10638 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10639 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10640 cur_region = cur_region->outer;
10641 break;
10642 default:
10643 gcc_unreachable ();
10645 break;
10647 case GIMPLE_OMP_SECTIONS:
10648 cur_region = new_omp_region (bb, code, cur_region);
10649 fallthru = true;
10650 break;
10652 case GIMPLE_OMP_SECTIONS_SWITCH:
10653 fallthru = false;
10654 break;
10656 case GIMPLE_OMP_ATOMIC_LOAD:
10657 case GIMPLE_OMP_ATOMIC_STORE:
10658 fallthru = true;
10659 break;
10661 case GIMPLE_OMP_RETURN:
10662 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10663 somewhere other than the next block. This will be
10664 created later. */
10665 cur_region->exit = bb;
10666 if (cur_region->type == GIMPLE_OMP_TASK)
10667 /* Add an edge corresponding to not scheduling the task
10668 immediately. */
10669 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10670 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10671 cur_region = cur_region->outer;
10672 break;
10674 case GIMPLE_OMP_CONTINUE:
10675 cur_region->cont = bb;
10676 switch (cur_region->type)
10678 case GIMPLE_OMP_FOR:
10679 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10680 succs edges as abnormal to prevent splitting
10681 them. */
10682 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10683 /* Make the loopback edge. */
10684 make_edge (bb, single_succ (cur_region->entry),
10685 EDGE_ABNORMAL);
10687 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10688 corresponds to the case that the body of the loop
10689 is not executed at all. */
10690 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10691 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10692 fallthru = false;
10693 break;
10695 case GIMPLE_OMP_SECTIONS:
10696 /* Wire up the edges into and out of the nested sections. */
10698 basic_block switch_bb = single_succ (cur_region->entry);
10700 struct omp_region *i;
10701 for (i = cur_region->inner; i ; i = i->next)
10703 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10704 make_edge (switch_bb, i->entry, 0);
10705 make_edge (i->exit, bb, EDGE_FALLTHRU);
10708 /* Make the loopback edge to the block with
10709 GIMPLE_OMP_SECTIONS_SWITCH. */
10710 make_edge (bb, switch_bb, 0);
10712 /* Make the edge from the switch to exit. */
10713 make_edge (switch_bb, bb->next_bb, 0);
10714 fallthru = false;
10716 break;
10718 case GIMPLE_OMP_TASK:
10719 fallthru = true;
10720 break;
10722 default:
10723 gcc_unreachable ();
10725 break;
10727 default:
10728 gcc_unreachable ();
10731 if (*region != cur_region)
10733 *region = cur_region;
10734 if (cur_region)
10735 *region_idx = cur_region->entry->index;
10736 else
10737 *region_idx = 0;
10740 return fallthru;