Don't warn when alignment of global common data exceeds maximum alignment.
[official-gcc.git] / gcc / omp-expand.c
blobc868b8c3d3c1b51c7dba10a243a0ff7ffcddff37
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2021 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
120 /* Return true if REGION is a combined parallel+workshare region. */
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
125 return region->is_combined_parallel;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
142 Is lowered into:
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
201 return true;
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 struct omp_for_data fd;
238 tree n1, n2;
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
244 if (gimple_omp_for_combined_into_p (for_stmt))
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
268 if (fd.chunk_size)
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
275 return ws_args;
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
289 gcc_unreachable ();
292 /* Discover whether REGION is a combined parallel+workshare region. */
294 static void
295 determine_parallel_type (struct omp_region *region)
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
335 if (region->inner->type == GIMPLE_OMP_FOR)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
386 if (region->cont)
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
405 dump_omp_region (stderr, region, 0);
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr, root_omp_region, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
420 struct omp_region *region = XCNEW (struct omp_region);
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
426 if (parent)
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
433 else
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
441 return region;
444 /* Release the memory associated with the region tree rooted at REGION. */
446 static void
447 free_omp_region_1 (struct omp_region *region)
449 struct omp_region *i, *n;
451 for (i = region->inner; i ; i = n)
453 n = i->next;
454 free_omp_region_1 (i);
457 free (region);
460 /* Release the memory for the entire omp region tree. */
462 void
463 omp_free_regions (void)
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
468 n = r->next;
469 free_omp_region_1 (r);
471 root_omp_region = NULL;
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
477 static gcond *
478 gimple_build_cond_empty (tree cond)
480 enum tree_code pred_code;
481 tree lhs, rhs;
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
570 switch (region->inner->type)
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
631 else
632 clause_loc = gimple_location (entry_stmt);
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
645 cond = gimple_boolify (cond);
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
664 else
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 if (gimple_in_ssa_p (cfun))
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 val = tmp_join;
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
735 if (rtmp)
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
765 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767 unsigned int iflags
768 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
769 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
770 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
773 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
774 tree num_tasks = NULL_TREE;
775 bool ull = false;
776 if (taskloop_p)
778 gimple *g = last_stmt (region->outer->entry);
779 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
780 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
781 struct omp_for_data fd;
782 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
783 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
784 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
785 OMP_CLAUSE__LOOPTEMP_);
786 startvar = OMP_CLAUSE_DECL (startvar);
787 endvar = OMP_CLAUSE_DECL (endvar);
788 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
789 if (fd.loop.cond_code == LT_EXPR)
790 iflags |= GOMP_TASK_FLAG_UP;
791 tree tclauses = gimple_omp_for_clauses (g);
792 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
793 if (num_tasks)
794 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
795 else
797 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
798 if (num_tasks)
800 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
801 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
803 else
804 num_tasks = integer_zero_node;
806 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
807 if (ifc == NULL_TREE)
808 iflags |= GOMP_TASK_FLAG_IF;
809 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
810 iflags |= GOMP_TASK_FLAG_NOGROUP;
811 ull = fd.iter_type == long_long_unsigned_type_node;
812 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
813 iflags |= GOMP_TASK_FLAG_REDUCTION;
815 else
817 if (priority)
818 iflags |= GOMP_TASK_FLAG_PRIORITY;
819 if (detach)
820 iflags |= GOMP_TASK_FLAG_DETACH;
823 tree flags = build_int_cst (unsigned_type_node, iflags);
825 tree cond = boolean_true_node;
826 if (ifc)
828 if (taskloop_p)
830 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
831 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
832 build_int_cst (unsigned_type_node,
833 GOMP_TASK_FLAG_IF),
834 build_int_cst (unsigned_type_node, 0));
835 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
836 flags, t);
838 else
839 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
842 if (finalc)
844 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
845 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
846 build_int_cst (unsigned_type_node,
847 GOMP_TASK_FLAG_FINAL),
848 build_int_cst (unsigned_type_node, 0));
849 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
851 if (depend)
852 depend = OMP_CLAUSE_DECL (depend);
853 else
854 depend = build_int_cst (ptr_type_node, 0);
855 if (priority)
856 priority = fold_convert (integer_type_node,
857 OMP_CLAUSE_PRIORITY_EXPR (priority));
858 else
859 priority = integer_zero_node;
861 gsi = gsi_last_nondebug_bb (bb);
863 detach = (detach
864 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
865 : null_pointer_node);
867 tree t = gimple_omp_task_data_arg (entry_stmt);
868 if (t == NULL)
869 t2 = null_pointer_node;
870 else
871 t2 = build_fold_addr_expr_loc (loc, t);
872 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
873 t = gimple_omp_task_copy_fn (entry_stmt);
874 if (t == NULL)
875 t3 = null_pointer_node;
876 else
877 t3 = build_fold_addr_expr_loc (loc, t);
879 if (taskloop_p)
880 t = build_call_expr (ull
881 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
882 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
883 11, t1, t2, t3,
884 gimple_omp_task_arg_size (entry_stmt),
885 gimple_omp_task_arg_align (entry_stmt), flags,
886 num_tasks, priority, startvar, endvar, step);
887 else
888 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
889 10, t1, t2, t3,
890 gimple_omp_task_arg_size (entry_stmt),
891 gimple_omp_task_arg_align (entry_stmt), cond, flags,
892 depend, priority, detach);
894 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
895 false, GSI_CONTINUE_LINKING);
898 /* Build the function call to GOMP_taskwait_depend to actually
899 generate the taskwait operation. BB is the block where to insert the
900 code. */
902 static void
903 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
905 tree clauses = gimple_omp_task_clauses (entry_stmt);
906 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
907 if (depend == NULL_TREE)
908 return;
910 depend = OMP_CLAUSE_DECL (depend);
912 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
913 tree t
914 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
915 1, depend);
917 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
918 false, GSI_CONTINUE_LINKING);
921 /* Build the function call to GOMP_teams_reg to actually
922 generate the host teams operation. REGION is the teams region
923 being expanded. BB is the block where to insert the code. */
925 static void
926 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
928 tree clauses = gimple_omp_teams_clauses (entry_stmt);
929 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
930 if (num_teams == NULL_TREE)
931 num_teams = build_int_cst (unsigned_type_node, 0);
932 else
934 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
935 num_teams = fold_convert (unsigned_type_node, num_teams);
937 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
938 if (thread_limit == NULL_TREE)
939 thread_limit = build_int_cst (unsigned_type_node, 0);
940 else
942 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
943 thread_limit = fold_convert (unsigned_type_node, thread_limit);
946 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
947 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
948 if (t == NULL)
949 t1 = null_pointer_node;
950 else
951 t1 = build_fold_addr_expr (t);
952 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
953 tree t2 = build_fold_addr_expr (child_fndecl);
955 vec<tree, va_gc> *args;
956 vec_alloc (args, 5);
957 args->quick_push (t2);
958 args->quick_push (t1);
959 args->quick_push (num_teams);
960 args->quick_push (thread_limit);
961 /* For future extensibility. */
962 args->quick_push (build_zero_cst (unsigned_type_node));
964 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
965 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
966 args);
968 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
969 false, GSI_CONTINUE_LINKING);
972 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
974 static tree
975 vec2chain (vec<tree, va_gc> *v)
977 tree chain = NULL_TREE, t;
978 unsigned ix;
980 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
982 DECL_CHAIN (t) = chain;
983 chain = t;
986 return chain;
989 /* Remove barriers in REGION->EXIT's block. Note that this is only
990 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
991 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
992 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
993 removed. */
995 static void
996 remove_exit_barrier (struct omp_region *region)
998 gimple_stmt_iterator gsi;
999 basic_block exit_bb;
1000 edge_iterator ei;
1001 edge e;
1002 gimple *stmt;
1003 int any_addressable_vars = -1;
1005 exit_bb = region->exit;
1007 /* If the parallel region doesn't return, we don't have REGION->EXIT
1008 block at all. */
1009 if (! exit_bb)
1010 return;
1012 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1013 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1014 statements that can appear in between are extremely limited -- no
1015 memory operations at all. Here, we allow nothing at all, so the
1016 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1017 gsi = gsi_last_nondebug_bb (exit_bb);
1018 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1019 gsi_prev_nondebug (&gsi);
1020 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1021 return;
1023 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1025 gsi = gsi_last_nondebug_bb (e->src);
1026 if (gsi_end_p (gsi))
1027 continue;
1028 stmt = gsi_stmt (gsi);
1029 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1030 && !gimple_omp_return_nowait_p (stmt))
1032 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1033 in many cases. If there could be tasks queued, the barrier
1034 might be needed to let the tasks run before some local
1035 variable of the parallel that the task uses as shared
1036 runs out of scope. The task can be spawned either
1037 from within current function (this would be easy to check)
1038 or from some function it calls and gets passed an address
1039 of such a variable. */
1040 if (any_addressable_vars < 0)
1042 gomp_parallel *parallel_stmt
1043 = as_a <gomp_parallel *> (last_stmt (region->entry));
1044 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1045 tree local_decls, block, decl;
1046 unsigned ix;
1048 any_addressable_vars = 0;
1049 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1050 if (TREE_ADDRESSABLE (decl))
1052 any_addressable_vars = 1;
1053 break;
1055 for (block = gimple_block (stmt);
1056 !any_addressable_vars
1057 && block
1058 && TREE_CODE (block) == BLOCK;
1059 block = BLOCK_SUPERCONTEXT (block))
1061 for (local_decls = BLOCK_VARS (block);
1062 local_decls;
1063 local_decls = DECL_CHAIN (local_decls))
1064 if (TREE_ADDRESSABLE (local_decls))
1066 any_addressable_vars = 1;
1067 break;
1069 if (block == gimple_block (parallel_stmt))
1070 break;
1073 if (!any_addressable_vars)
1074 gimple_omp_return_set_nowait (stmt);
1079 static void
1080 remove_exit_barriers (struct omp_region *region)
1082 if (region->type == GIMPLE_OMP_PARALLEL)
1083 remove_exit_barrier (region);
1085 if (region->inner)
1087 region = region->inner;
1088 remove_exit_barriers (region);
1089 while (region->next)
1091 region = region->next;
1092 remove_exit_barriers (region);
1097 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1098 calls. These can't be declared as const functions, but
1099 within one parallel body they are constant, so they can be
1100 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1101 which are declared const. Similarly for task body, except
1102 that in untied task omp_get_thread_num () can change at any task
1103 scheduling point. */
1105 static void
1106 optimize_omp_library_calls (gimple *entry_stmt)
1108 basic_block bb;
1109 gimple_stmt_iterator gsi;
1110 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1111 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1112 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1113 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1114 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1115 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1116 OMP_CLAUSE_UNTIED) != NULL);
1118 FOR_EACH_BB_FN (bb, cfun)
1119 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1121 gimple *call = gsi_stmt (gsi);
1122 tree decl;
1124 if (is_gimple_call (call)
1125 && (decl = gimple_call_fndecl (call))
1126 && DECL_EXTERNAL (decl)
1127 && TREE_PUBLIC (decl)
1128 && DECL_INITIAL (decl) == NULL)
1130 tree built_in;
1132 if (DECL_NAME (decl) == thr_num_id)
1134 /* In #pragma omp task untied omp_get_thread_num () can change
1135 during the execution of the task region. */
1136 if (untied_task)
1137 continue;
1138 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1140 else if (DECL_NAME (decl) == num_thr_id)
1141 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1142 else
1143 continue;
1145 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1146 || gimple_call_num_args (call) != 0)
1147 continue;
1149 if (flag_exceptions && !TREE_NOTHROW (decl))
1150 continue;
1152 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1153 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1154 TREE_TYPE (TREE_TYPE (built_in))))
1155 continue;
1157 gimple_call_set_fndecl (call, built_in);
1162 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1163 regimplified. */
1165 static tree
1166 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1168 tree t = *tp;
1170 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1171 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1172 return t;
1174 if (TREE_CODE (t) == ADDR_EXPR)
1175 recompute_tree_invariant_for_addr_expr (t);
1177 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1178 return NULL_TREE;
1181 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1183 static void
1184 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1185 bool after)
1187 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1188 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1189 !after, after ? GSI_CONTINUE_LINKING
1190 : GSI_SAME_STMT);
1191 gimple *stmt = gimple_build_assign (to, from);
1192 if (after)
1193 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1194 else
1195 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1196 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1197 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1199 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1200 gimple_regimplify_operands (stmt, &gsi);
1204 /* Expand the OpenMP parallel or task directive starting at REGION. */
1206 static void
1207 expand_omp_taskreg (struct omp_region *region)
1209 basic_block entry_bb, exit_bb, new_bb;
1210 struct function *child_cfun;
1211 tree child_fn, block, t;
1212 gimple_stmt_iterator gsi;
1213 gimple *entry_stmt, *stmt;
1214 edge e;
1215 vec<tree, va_gc> *ws_args;
1217 entry_stmt = last_stmt (region->entry);
1218 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1219 && gimple_omp_task_taskwait_p (entry_stmt))
1221 new_bb = region->entry;
1222 gsi = gsi_last_nondebug_bb (region->entry);
1223 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1224 gsi_remove (&gsi, true);
1225 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1226 return;
1229 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1230 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1232 entry_bb = region->entry;
1233 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1234 exit_bb = region->cont;
1235 else
1236 exit_bb = region->exit;
1238 if (is_combined_parallel (region))
1239 ws_args = region->ws_args;
1240 else
1241 ws_args = NULL;
1243 if (child_cfun->cfg)
1245 /* Due to inlining, it may happen that we have already outlined
1246 the region, in which case all we need to do is make the
1247 sub-graph unreachable and emit the parallel call. */
1248 edge entry_succ_e, exit_succ_e;
1250 entry_succ_e = single_succ_edge (entry_bb);
1252 gsi = gsi_last_nondebug_bb (entry_bb);
1253 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1255 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1256 gsi_remove (&gsi, true);
1258 new_bb = entry_bb;
1259 if (exit_bb)
1261 exit_succ_e = single_succ_edge (exit_bb);
1262 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1264 remove_edge_and_dominated_blocks (entry_succ_e);
1266 else
1268 unsigned srcidx, dstidx, num;
1270 /* If the parallel region needs data sent from the parent
1271 function, then the very first statement (except possible
1272 tree profile counter updates) of the parallel body
1273 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1274 &.OMP_DATA_O is passed as an argument to the child function,
1275 we need to replace it with the argument as seen by the child
1276 function.
1278 In most cases, this will end up being the identity assignment
1279 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1280 a function call that has been inlined, the original PARM_DECL
1281 .OMP_DATA_I may have been converted into a different local
1282 variable. In which case, we need to keep the assignment. */
1283 if (gimple_omp_taskreg_data_arg (entry_stmt))
1285 basic_block entry_succ_bb
1286 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1287 : FALLTHRU_EDGE (entry_bb)->dest;
1288 tree arg;
1289 gimple *parcopy_stmt = NULL;
1291 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1293 gimple *stmt;
1295 gcc_assert (!gsi_end_p (gsi));
1296 stmt = gsi_stmt (gsi);
1297 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1298 continue;
1300 if (gimple_num_ops (stmt) == 2)
1302 tree arg = gimple_assign_rhs1 (stmt);
1304 /* We're ignore the subcode because we're
1305 effectively doing a STRIP_NOPS. */
1307 if (TREE_CODE (arg) == ADDR_EXPR
1308 && (TREE_OPERAND (arg, 0)
1309 == gimple_omp_taskreg_data_arg (entry_stmt)))
1311 parcopy_stmt = stmt;
1312 break;
1317 gcc_assert (parcopy_stmt != NULL);
1318 arg = DECL_ARGUMENTS (child_fn);
1320 if (!gimple_in_ssa_p (cfun))
1322 if (gimple_assign_lhs (parcopy_stmt) == arg)
1323 gsi_remove (&gsi, true);
1324 else
1326 /* ?? Is setting the subcode really necessary ?? */
1327 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1328 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 else
1333 tree lhs = gimple_assign_lhs (parcopy_stmt);
1334 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1335 /* We'd like to set the rhs to the default def in the child_fn,
1336 but it's too early to create ssa names in the child_fn.
1337 Instead, we set the rhs to the parm. In
1338 move_sese_region_to_fn, we introduce a default def for the
1339 parm, map the parm to it's default def, and once we encounter
1340 this stmt, replace the parm with the default def. */
1341 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1342 update_stmt (parcopy_stmt);
1346 /* Declare local variables needed in CHILD_CFUN. */
1347 block = DECL_INITIAL (child_fn);
1348 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1349 /* The gimplifier could record temporaries in parallel/task block
1350 rather than in containing function's local_decls chain,
1351 which would mean cgraph missed finalizing them. Do it now. */
1352 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1353 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1354 varpool_node::finalize_decl (t);
1355 DECL_SAVED_TREE (child_fn) = NULL;
1356 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1357 gimple_set_body (child_fn, NULL);
1358 TREE_USED (block) = 1;
1360 /* Reset DECL_CONTEXT on function arguments. */
1361 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1362 DECL_CONTEXT (t) = child_fn;
1364 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1365 so that it can be moved to the child function. */
1366 gsi = gsi_last_nondebug_bb (entry_bb);
1367 stmt = gsi_stmt (gsi);
1368 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1369 || gimple_code (stmt) == GIMPLE_OMP_TASK
1370 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1371 e = split_block (entry_bb, stmt);
1372 gsi_remove (&gsi, true);
1373 entry_bb = e->dest;
1374 edge e2 = NULL;
1375 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1376 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1377 else
1379 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1380 gcc_assert (e2->dest == region->exit);
1381 remove_edge (BRANCH_EDGE (entry_bb));
1382 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1383 gsi = gsi_last_nondebug_bb (region->exit);
1384 gcc_assert (!gsi_end_p (gsi)
1385 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1386 gsi_remove (&gsi, true);
1389 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1390 if (exit_bb)
1392 gsi = gsi_last_nondebug_bb (exit_bb);
1393 gcc_assert (!gsi_end_p (gsi)
1394 && (gimple_code (gsi_stmt (gsi))
1395 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1396 stmt = gimple_build_return (NULL);
1397 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1398 gsi_remove (&gsi, true);
1401 /* Move the parallel region into CHILD_CFUN. */
1403 if (gimple_in_ssa_p (cfun))
1405 init_tree_ssa (child_cfun);
1406 init_ssa_operands (child_cfun);
1407 child_cfun->gimple_df->in_ssa_p = true;
1408 block = NULL_TREE;
1410 else
1411 block = gimple_block (entry_stmt);
1413 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1414 if (exit_bb)
1415 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1416 if (e2)
1418 basic_block dest_bb = e2->dest;
1419 if (!exit_bb)
1420 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1421 remove_edge (e2);
1422 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1424 /* When the OMP expansion process cannot guarantee an up-to-date
1425 loop tree arrange for the child function to fixup loops. */
1426 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1427 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1429 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1430 num = vec_safe_length (child_cfun->local_decls);
1431 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1433 t = (*child_cfun->local_decls)[srcidx];
1434 if (DECL_CONTEXT (t) == cfun->decl)
1435 continue;
1436 if (srcidx != dstidx)
1437 (*child_cfun->local_decls)[dstidx] = t;
1438 dstidx++;
1440 if (dstidx != num)
1441 vec_safe_truncate (child_cfun->local_decls, dstidx);
1443 /* Inform the callgraph about the new function. */
1444 child_cfun->curr_properties = cfun->curr_properties;
1445 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1446 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1447 cgraph_node *node = cgraph_node::get_create (child_fn);
1448 node->parallelized_function = 1;
1449 cgraph_node::add_new_function (child_fn, true);
1451 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1452 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1454 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1455 fixed in a following pass. */
1456 push_cfun (child_cfun);
1457 if (need_asm)
1458 assign_assembler_name_if_needed (child_fn);
1460 if (optimize)
1461 optimize_omp_library_calls (entry_stmt);
1462 update_max_bb_count ();
1463 cgraph_edge::rebuild_edges ();
1465 /* Some EH regions might become dead, see PR34608. If
1466 pass_cleanup_cfg isn't the first pass to happen with the
1467 new child, these dead EH edges might cause problems.
1468 Clean them up now. */
1469 if (flag_exceptions)
1471 basic_block bb;
1472 bool changed = false;
1474 FOR_EACH_BB_FN (bb, cfun)
1475 changed |= gimple_purge_dead_eh_edges (bb);
1476 if (changed)
1477 cleanup_tree_cfg ();
1479 if (gimple_in_ssa_p (cfun))
1480 update_ssa (TODO_update_ssa);
1481 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1482 verify_loop_structure ();
1483 pop_cfun ();
1485 if (dump_file && !gimple_in_ssa_p (cfun))
1487 omp_any_child_fn_dumped = true;
1488 dump_function_header (dump_file, child_fn, dump_flags);
1489 dump_function_to_file (child_fn, dump_file, dump_flags);
1493 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1495 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1496 expand_parallel_call (region, new_bb,
1497 as_a <gomp_parallel *> (entry_stmt), ws_args);
1498 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1499 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1500 else
1501 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1502 if (gimple_in_ssa_p (cfun))
1503 update_ssa (TODO_update_ssa_only_virtuals);
1506 /* Information about members of an OpenACC collapsed loop nest. */
1508 struct oacc_collapse
1510 tree base; /* Base value. */
1511 tree iters; /* Number of steps. */
1512 tree step; /* Step size. */
1513 tree tile; /* Tile increment (if tiled). */
1514 tree outer; /* Tile iterator var. */
1517 /* Helper for expand_oacc_for. Determine collapsed loop information.
1518 Fill in COUNTS array. Emit any initialization code before GSI.
1519 Return the calculated outer loop bound of BOUND_TYPE. */
1521 static tree
1522 expand_oacc_collapse_init (const struct omp_for_data *fd,
1523 gimple_stmt_iterator *gsi,
1524 oacc_collapse *counts, tree diff_type,
1525 tree bound_type, location_t loc)
1527 tree tiling = fd->tiling;
1528 tree total = build_int_cst (bound_type, 1);
1529 int ix;
1531 gcc_assert (integer_onep (fd->loop.step));
1532 gcc_assert (integer_zerop (fd->loop.n1));
1534 /* When tiling, the first operand of the tile clause applies to the
1535 innermost loop, and we work outwards from there. Seems
1536 backwards, but whatever. */
1537 for (ix = fd->collapse; ix--;)
1539 const omp_for_data_loop *loop = &fd->loops[ix];
1541 tree iter_type = TREE_TYPE (loop->v);
1542 tree plus_type = iter_type;
1544 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1546 if (POINTER_TYPE_P (iter_type))
1547 plus_type = sizetype;
1549 if (tiling)
1551 tree num = build_int_cst (integer_type_node, fd->collapse);
1552 tree loop_no = build_int_cst (integer_type_node, ix);
1553 tree tile = TREE_VALUE (tiling);
1554 gcall *call
1555 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1556 /* gwv-outer=*/integer_zero_node,
1557 /* gwv-inner=*/integer_zero_node);
1559 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1560 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1561 gimple_call_set_lhs (call, counts[ix].tile);
1562 gimple_set_location (call, loc);
1563 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1565 tiling = TREE_CHAIN (tiling);
1567 else
1569 counts[ix].tile = NULL;
1570 counts[ix].outer = loop->v;
1573 tree b = loop->n1;
1574 tree e = loop->n2;
1575 tree s = loop->step;
1576 bool up = loop->cond_code == LT_EXPR;
1577 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1578 bool negating;
1579 tree expr;
1581 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1582 true, GSI_SAME_STMT);
1583 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1584 true, GSI_SAME_STMT);
1586 /* Convert the step, avoiding possible unsigned->signed overflow. */
1587 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1588 if (negating)
1589 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1590 s = fold_convert (diff_type, s);
1591 if (negating)
1592 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1593 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1594 true, GSI_SAME_STMT);
1596 /* Determine the range, avoiding possible unsigned->signed overflow. */
1597 negating = !up && TYPE_UNSIGNED (iter_type);
1598 expr = fold_build2 (MINUS_EXPR, plus_type,
1599 fold_convert (plus_type, negating ? b : e),
1600 fold_convert (plus_type, negating ? e : b));
1601 expr = fold_convert (diff_type, expr);
1602 if (negating)
1603 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1604 tree range = force_gimple_operand_gsi
1605 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1607 /* Determine number of iterations. */
1608 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1609 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1610 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1612 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1615 counts[ix].base = b;
1616 counts[ix].iters = iters;
1617 counts[ix].step = s;
1619 total = fold_build2 (MULT_EXPR, bound_type, total,
1620 fold_convert (bound_type, iters));
1623 return total;
1626 /* Emit initializers for collapsed loop members. INNER is true if
1627 this is for the element loop of a TILE. IVAR is the outer
1628 loop iteration variable, from which collapsed loop iteration values
1629 are calculated. COUNTS array has been initialized by
1630 expand_oacc_collapse_inits. */
1632 static void
1633 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1634 gimple_stmt_iterator *gsi,
1635 const oacc_collapse *counts, tree ivar,
1636 tree diff_type)
1638 tree ivar_type = TREE_TYPE (ivar);
1640 /* The most rapidly changing iteration variable is the innermost
1641 one. */
1642 for (int ix = fd->collapse; ix--;)
1644 const omp_for_data_loop *loop = &fd->loops[ix];
1645 const oacc_collapse *collapse = &counts[ix];
1646 tree v = inner ? loop->v : collapse->outer;
1647 tree iter_type = TREE_TYPE (v);
1648 tree plus_type = iter_type;
1649 enum tree_code plus_code = PLUS_EXPR;
1650 tree expr;
1652 if (POINTER_TYPE_P (iter_type))
1654 plus_code = POINTER_PLUS_EXPR;
1655 plus_type = sizetype;
1658 expr = ivar;
1659 if (ix)
1661 tree mod = fold_convert (ivar_type, collapse->iters);
1662 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1663 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1664 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1665 true, GSI_SAME_STMT);
1668 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1669 fold_convert (diff_type, collapse->step));
1670 expr = fold_build2 (plus_code, iter_type,
1671 inner ? collapse->outer : collapse->base,
1672 fold_convert (plus_type, expr));
1673 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1674 true, GSI_SAME_STMT);
1675 gassign *ass = gimple_build_assign (v, expr);
1676 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1680 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1681 of the combined collapse > 1 loop constructs, generate code like:
1682 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1683 if (cond3 is <)
1684 adj = STEP3 - 1;
1685 else
1686 adj = STEP3 + 1;
1687 count3 = (adj + N32 - N31) / STEP3;
1688 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1689 if (cond2 is <)
1690 adj = STEP2 - 1;
1691 else
1692 adj = STEP2 + 1;
1693 count2 = (adj + N22 - N21) / STEP2;
1694 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1695 if (cond1 is <)
1696 adj = STEP1 - 1;
1697 else
1698 adj = STEP1 + 1;
1699 count1 = (adj + N12 - N11) / STEP1;
1700 count = count1 * count2 * count3;
1701 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1702 count = 0;
1703 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1704 of the combined loop constructs, just initialize COUNTS array
1705 from the _looptemp_ clauses. For loop nests with non-rectangular
1706 loops, do this only for the rectangular loops. Then pick
1707 the loops which reference outer vars in their bound expressions
1708 and the loops which they refer to and for this sub-nest compute
1709 number of iterations. For triangular loops use Faulhaber's formula,
1710 otherwise as a fallback, compute by iterating the loops.
1711 If e.g. the sub-nest is
1712 for (I = N11; I COND1 N12; I += STEP1)
1713 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1714 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1716 COUNT = 0;
1717 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1718 for (tmpj = M21 * tmpi + N21;
1719 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1721 int tmpk1 = M31 * tmpj + N31;
1722 int tmpk2 = M32 * tmpj + N32;
1723 if (tmpk1 COND3 tmpk2)
1725 if (COND3 is <)
1726 adj = STEP3 - 1;
1727 else
1728 adj = STEP3 + 1;
1729 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1732 and finally multiply the counts of the rectangular loops not
1733 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1734 store number of iterations of the loops from fd->first_nonrect
1735 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1736 by the counts of rectangular loops not referenced in any non-rectangular
1737 loops sandwitched in between those. */
1739 /* NOTE: It *could* be better to moosh all of the BBs together,
1740 creating one larger BB with all the computation and the unexpected
1741 jump at the end. I.e.
1743 bool zero3, zero2, zero1, zero;
1745 zero3 = N32 c3 N31;
1746 count3 = (N32 - N31) /[cl] STEP3;
1747 zero2 = N22 c2 N21;
1748 count2 = (N22 - N21) /[cl] STEP2;
1749 zero1 = N12 c1 N11;
1750 count1 = (N12 - N11) /[cl] STEP1;
1751 zero = zero3 || zero2 || zero1;
1752 count = count1 * count2 * count3;
1753 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1755 After all, we expect the zero=false, and thus we expect to have to
1756 evaluate all of the comparison expressions, so short-circuiting
1757 oughtn't be a win. Since the condition isn't protecting a
1758 denominator, we're not concerned about divide-by-zero, so we can
1759 fully evaluate count even if a numerator turned out to be wrong.
1761 It seems like putting this all together would create much better
1762 scheduling opportunities, and less pressure on the chip's branch
1763 predictor. */
1765 static void
1766 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1767 basic_block &entry_bb, tree *counts,
1768 basic_block &zero_iter1_bb, int &first_zero_iter1,
1769 basic_block &zero_iter2_bb, int &first_zero_iter2,
1770 basic_block &l2_dom_bb)
1772 tree t, type = TREE_TYPE (fd->loop.v);
1773 edge e, ne;
1774 int i;
1776 /* Collapsed loops need work for expansion into SSA form. */
1777 gcc_assert (!gimple_in_ssa_p (cfun));
1779 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1780 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1782 gcc_assert (fd->ordered == 0);
1783 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1784 isn't supposed to be handled, as the inner loop doesn't
1785 use it. */
1786 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1787 OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1795 counts[i] = OMP_CLAUSE_DECL (innerc);
1796 else
1797 counts[0] = NULL_TREE;
1799 if (fd->non_rect
1800 && fd->last_nonrect == fd->first_nonrect + 1
1801 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1803 tree c[4];
1804 for (i = 0; i < 4; i++)
1806 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1807 OMP_CLAUSE__LOOPTEMP_);
1808 gcc_assert (innerc);
1809 c[i] = OMP_CLAUSE_DECL (innerc);
1811 counts[0] = c[0];
1812 fd->first_inner_iterations = c[1];
1813 fd->factor = c[2];
1814 fd->adjn1 = c[3];
1816 return;
1819 for (i = fd->collapse; i < fd->ordered; i++)
1821 tree itype = TREE_TYPE (fd->loops[i].v);
1822 counts[i] = NULL_TREE;
1823 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1824 fold_convert (itype, fd->loops[i].n1),
1825 fold_convert (itype, fd->loops[i].n2));
1826 if (t && integer_zerop (t))
1828 for (i = fd->collapse; i < fd->ordered; i++)
1829 counts[i] = build_int_cst (type, 0);
1830 break;
1833 bool rect_count_seen = false;
1834 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1836 tree itype = TREE_TYPE (fd->loops[i].v);
1838 if (i >= fd->collapse && counts[i])
1839 continue;
1840 if (fd->non_rect)
1842 /* Skip loops that use outer iterators in their expressions
1843 during this phase. */
1844 if (fd->loops[i].m1 || fd->loops[i].m2)
1846 counts[i] = build_zero_cst (type);
1847 continue;
1850 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1851 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1852 fold_convert (itype, fd->loops[i].n1),
1853 fold_convert (itype, fd->loops[i].n2)))
1854 == NULL_TREE || !integer_onep (t)))
1856 gcond *cond_stmt;
1857 tree n1, n2;
1858 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1859 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1860 true, GSI_SAME_STMT);
1861 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1862 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1863 true, GSI_SAME_STMT);
1864 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1865 NULL_TREE, NULL_TREE);
1866 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1867 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1868 expand_omp_regimplify_p, NULL, NULL)
1869 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1870 expand_omp_regimplify_p, NULL, NULL))
1872 *gsi = gsi_for_stmt (cond_stmt);
1873 gimple_regimplify_operands (cond_stmt, gsi);
1875 e = split_block (entry_bb, cond_stmt);
1876 basic_block &zero_iter_bb
1877 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1878 int &first_zero_iter
1879 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1880 if (zero_iter_bb == NULL)
1882 gassign *assign_stmt;
1883 first_zero_iter = i;
1884 zero_iter_bb = create_empty_bb (entry_bb);
1885 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1886 *gsi = gsi_after_labels (zero_iter_bb);
1887 if (i < fd->collapse)
1888 assign_stmt = gimple_build_assign (fd->loop.n2,
1889 build_zero_cst (type));
1890 else
1892 counts[i] = create_tmp_reg (type, ".count");
1893 assign_stmt
1894 = gimple_build_assign (counts[i], build_zero_cst (type));
1896 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1897 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1898 entry_bb);
1900 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1901 ne->probability = profile_probability::very_unlikely ();
1902 e->flags = EDGE_TRUE_VALUE;
1903 e->probability = ne->probability.invert ();
1904 if (l2_dom_bb == NULL)
1905 l2_dom_bb = entry_bb;
1906 entry_bb = e->dest;
1907 *gsi = gsi_last_nondebug_bb (entry_bb);
1910 if (POINTER_TYPE_P (itype))
1911 itype = signed_type_for (itype);
1912 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1913 ? -1 : 1));
1914 t = fold_build2 (PLUS_EXPR, itype,
1915 fold_convert (itype, fd->loops[i].step), t);
1916 t = fold_build2 (PLUS_EXPR, itype, t,
1917 fold_convert (itype, fd->loops[i].n2));
1918 t = fold_build2 (MINUS_EXPR, itype, t,
1919 fold_convert (itype, fd->loops[i].n1));
1920 /* ?? We could probably use CEIL_DIV_EXPR instead of
1921 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1922 generate the same code in the end because generically we
1923 don't know that the values involved must be negative for
1924 GT?? */
1925 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1926 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1927 fold_build1 (NEGATE_EXPR, itype, t),
1928 fold_build1 (NEGATE_EXPR, itype,
1929 fold_convert (itype,
1930 fd->loops[i].step)));
1931 else
1932 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1933 fold_convert (itype, fd->loops[i].step));
1934 t = fold_convert (type, t);
1935 if (TREE_CODE (t) == INTEGER_CST)
1936 counts[i] = t;
1937 else
1939 if (i < fd->collapse || i != first_zero_iter2)
1940 counts[i] = create_tmp_reg (type, ".count");
1941 expand_omp_build_assign (gsi, counts[i], t);
1943 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1945 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1946 continue;
1947 if (!rect_count_seen)
1949 t = counts[i];
1950 rect_count_seen = true;
1952 else
1953 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1954 expand_omp_build_assign (gsi, fd->loop.n2, t);
1957 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1959 gcc_assert (fd->last_nonrect != -1);
1961 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1962 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1963 build_zero_cst (type));
1964 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1965 if (fd->loops[i].m1
1966 || fd->loops[i].m2
1967 || fd->loops[i].non_rect_referenced)
1968 break;
1969 if (i == fd->last_nonrect
1970 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1971 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1973 int o = fd->first_nonrect;
1974 tree itype = TREE_TYPE (fd->loops[o].v);
1975 tree n1o = create_tmp_reg (itype, ".n1o");
1976 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1977 expand_omp_build_assign (gsi, n1o, t);
1978 tree n2o = create_tmp_reg (itype, ".n2o");
1979 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1980 expand_omp_build_assign (gsi, n2o, t);
1981 if (fd->loops[i].m1 && fd->loops[i].m2)
1982 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1983 unshare_expr (fd->loops[i].m1));
1984 else if (fd->loops[i].m1)
1985 t = fold_unary (NEGATE_EXPR, itype,
1986 unshare_expr (fd->loops[i].m1));
1987 else
1988 t = unshare_expr (fd->loops[i].m2);
1989 tree m2minusm1
1990 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1991 true, GSI_SAME_STMT);
1993 gimple_stmt_iterator gsi2 = *gsi;
1994 gsi_prev (&gsi2);
1995 e = split_block (entry_bb, gsi_stmt (gsi2));
1996 e = split_block (e->dest, (gimple *) NULL);
1997 basic_block bb1 = e->src;
1998 entry_bb = e->dest;
1999 *gsi = gsi_after_labels (entry_bb);
2001 gsi2 = gsi_after_labels (bb1);
2002 tree ostep = fold_convert (itype, fd->loops[o].step);
2003 t = build_int_cst (itype, (fd->loops[o].cond_code
2004 == LT_EXPR ? -1 : 1));
2005 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2006 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2007 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2008 if (TYPE_UNSIGNED (itype)
2009 && fd->loops[o].cond_code == GT_EXPR)
2010 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2011 fold_build1 (NEGATE_EXPR, itype, t),
2012 fold_build1 (NEGATE_EXPR, itype, ostep));
2013 else
2014 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2015 tree outer_niters
2016 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2019 build_one_cst (itype));
2020 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2021 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2022 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 tree n1, n2, n1e, n2e;
2025 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2026 if (fd->loops[i].m1)
2028 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2029 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2030 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2032 else
2033 n1 = t;
2034 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2035 true, GSI_SAME_STMT);
2036 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2037 if (fd->loops[i].m2)
2039 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2040 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2041 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2043 else
2044 n2 = t;
2045 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2046 true, GSI_SAME_STMT);
2047 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2048 if (fd->loops[i].m1)
2050 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2051 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2052 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2054 else
2055 n1e = t;
2056 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2057 true, GSI_SAME_STMT);
2058 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2059 if (fd->loops[i].m2)
2061 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2062 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2063 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2065 else
2066 n2e = t;
2067 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2068 true, GSI_SAME_STMT);
2069 gcond *cond_stmt
2070 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2071 NULL_TREE, NULL_TREE);
2072 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2073 e = split_block (bb1, cond_stmt);
2074 e->flags = EDGE_TRUE_VALUE;
2075 e->probability = profile_probability::likely ().guessed ();
2076 basic_block bb2 = e->dest;
2077 gsi2 = gsi_after_labels (bb2);
2079 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2080 NULL_TREE, NULL_TREE);
2081 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2082 e = split_block (bb2, cond_stmt);
2083 e->flags = EDGE_TRUE_VALUE;
2084 e->probability = profile_probability::likely ().guessed ();
2085 gsi2 = gsi_after_labels (e->dest);
2087 tree step = fold_convert (itype, fd->loops[i].step);
2088 t = build_int_cst (itype, (fd->loops[i].cond_code
2089 == LT_EXPR ? -1 : 1));
2090 t = fold_build2 (PLUS_EXPR, itype, step, t);
2091 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2092 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2093 if (TYPE_UNSIGNED (itype)
2094 && fd->loops[i].cond_code == GT_EXPR)
2095 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2096 fold_build1 (NEGATE_EXPR, itype, t),
2097 fold_build1 (NEGATE_EXPR, itype, step));
2098 else
2099 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2100 tree first_inner_iterations
2101 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2102 true, GSI_SAME_STMT);
2103 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2104 if (TYPE_UNSIGNED (itype)
2105 && fd->loops[i].cond_code == GT_EXPR)
2106 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2107 fold_build1 (NEGATE_EXPR, itype, t),
2108 fold_build1 (NEGATE_EXPR, itype, step));
2109 else
2110 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2111 tree factor
2112 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2113 true, GSI_SAME_STMT);
2114 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2115 build_one_cst (itype));
2116 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2117 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2118 t = fold_build2 (MULT_EXPR, itype, factor, t);
2119 t = fold_build2 (PLUS_EXPR, itype,
2120 fold_build2 (MULT_EXPR, itype, outer_niters,
2121 first_inner_iterations), t);
2122 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2123 fold_convert (type, t));
2125 basic_block bb3 = create_empty_bb (bb1);
2126 add_bb_to_loop (bb3, bb1->loop_father);
2128 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2129 e->probability = profile_probability::unlikely ().guessed ();
2131 gsi2 = gsi_after_labels (bb3);
2132 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2133 NULL_TREE, NULL_TREE);
2134 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2135 e = split_block (bb3, cond_stmt);
2136 e->flags = EDGE_TRUE_VALUE;
2137 e->probability = profile_probability::likely ().guessed ();
2138 basic_block bb4 = e->dest;
2140 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2141 ne->probability = e->probability.invert ();
2143 basic_block bb5 = create_empty_bb (bb2);
2144 add_bb_to_loop (bb5, bb2->loop_father);
2146 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2147 ne->probability = profile_probability::unlikely ().guessed ();
2149 for (int j = 0; j < 2; j++)
2151 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2152 t = fold_build2 (MINUS_EXPR, itype,
2153 unshare_expr (fd->loops[i].n1),
2154 unshare_expr (fd->loops[i].n2));
2155 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2156 tree tem
2157 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2158 true, GSI_SAME_STMT);
2159 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2160 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2161 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2162 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2163 true, GSI_SAME_STMT);
2164 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2165 if (fd->loops[i].m1)
2167 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2168 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2169 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2171 else
2172 n1 = t;
2173 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2174 true, GSI_SAME_STMT);
2175 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2176 if (fd->loops[i].m2)
2178 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2179 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2180 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2182 else
2183 n2 = t;
2184 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2185 true, GSI_SAME_STMT);
2186 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2188 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2189 NULL_TREE, NULL_TREE);
2190 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2191 e = split_block (gsi_bb (gsi2), cond_stmt);
2192 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2193 e->probability = profile_probability::unlikely ().guessed ();
2194 ne = make_edge (e->src, bb1,
2195 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2196 ne->probability = e->probability.invert ();
2197 gsi2 = gsi_after_labels (e->dest);
2199 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2200 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2202 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2205 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2206 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2207 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2209 if (fd->first_nonrect + 1 == fd->last_nonrect)
2211 fd->first_inner_iterations = first_inner_iterations;
2212 fd->factor = factor;
2213 fd->adjn1 = n1o;
2216 else
2218 /* Fallback implementation. Evaluate the loops with m1/m2
2219 non-NULL as well as their outer loops at runtime using temporaries
2220 instead of the original iteration variables, and in the
2221 body just bump the counter. */
2222 gimple_stmt_iterator gsi2 = *gsi;
2223 gsi_prev (&gsi2);
2224 e = split_block (entry_bb, gsi_stmt (gsi2));
2225 e = split_block (e->dest, (gimple *) NULL);
2226 basic_block cur_bb = e->src;
2227 basic_block next_bb = e->dest;
2228 entry_bb = e->dest;
2229 *gsi = gsi_after_labels (entry_bb);
2231 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2232 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2234 for (i = 0; i <= fd->last_nonrect; i++)
2236 if (fd->loops[i].m1 == NULL_TREE
2237 && fd->loops[i].m2 == NULL_TREE
2238 && !fd->loops[i].non_rect_referenced)
2239 continue;
2241 tree itype = TREE_TYPE (fd->loops[i].v);
2243 gsi2 = gsi_after_labels (cur_bb);
2244 tree n1, n2;
2245 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2246 if (fd->loops[i].m1)
2248 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2249 n1 = fold_build2 (MULT_EXPR, itype,
2250 vs[i - fd->loops[i].outer], n1);
2251 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2253 else
2254 n1 = t;
2255 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2256 true, GSI_SAME_STMT);
2257 if (i < fd->last_nonrect)
2259 vs[i] = create_tmp_reg (itype, ".it");
2260 expand_omp_build_assign (&gsi2, vs[i], n1);
2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2263 if (fd->loops[i].m2)
2265 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2266 n2 = fold_build2 (MULT_EXPR, itype,
2267 vs[i - fd->loops[i].outer], n2);
2268 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2270 else
2271 n2 = t;
2272 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2273 true, GSI_SAME_STMT);
2274 if (i == fd->last_nonrect)
2276 gcond *cond_stmt
2277 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2278 NULL_TREE, NULL_TREE);
2279 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2280 e = split_block (cur_bb, cond_stmt);
2281 e->flags = EDGE_TRUE_VALUE;
2282 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2283 e->probability = profile_probability::likely ().guessed ();
2284 ne->probability = e->probability.invert ();
2285 gsi2 = gsi_after_labels (e->dest);
2287 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2288 ? -1 : 1));
2289 t = fold_build2 (PLUS_EXPR, itype,
2290 fold_convert (itype, fd->loops[i].step), t);
2291 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2292 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2293 tree step = fold_convert (itype, fd->loops[i].step);
2294 if (TYPE_UNSIGNED (itype)
2295 && fd->loops[i].cond_code == GT_EXPR)
2296 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2297 fold_build1 (NEGATE_EXPR, itype, t),
2298 fold_build1 (NEGATE_EXPR, itype, step));
2299 else
2300 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2301 t = fold_convert (type, t);
2302 t = fold_build2 (PLUS_EXPR, type,
2303 counts[fd->last_nonrect], t);
2304 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2305 true, GSI_SAME_STMT);
2306 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2307 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2308 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2309 break;
2311 e = split_block (cur_bb, last_stmt (cur_bb));
2313 basic_block new_cur_bb = create_empty_bb (cur_bb);
2314 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2316 gsi2 = gsi_after_labels (e->dest);
2317 tree step = fold_convert (itype,
2318 unshare_expr (fd->loops[i].step));
2319 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2320 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2321 true, GSI_SAME_STMT);
2322 expand_omp_build_assign (&gsi2, vs[i], t);
2324 ne = split_block (e->dest, last_stmt (e->dest));
2325 gsi2 = gsi_after_labels (ne->dest);
2327 gcond *cond_stmt
2328 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2329 NULL_TREE, NULL_TREE);
2330 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2331 edge e3, e4;
2332 if (next_bb == entry_bb)
2334 e3 = find_edge (ne->dest, next_bb);
2335 e3->flags = EDGE_FALSE_VALUE;
2337 else
2338 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2339 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2340 e4->probability = profile_probability::likely ().guessed ();
2341 e3->probability = e4->probability.invert ();
2342 basic_block esrc = e->src;
2343 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2344 cur_bb = new_cur_bb;
2345 basic_block latch_bb = next_bb;
2346 next_bb = e->dest;
2347 remove_edge (e);
2348 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2349 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2350 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2353 t = NULL_TREE;
2354 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2355 if (!fd->loops[i].non_rect_referenced
2356 && fd->loops[i].m1 == NULL_TREE
2357 && fd->loops[i].m2 == NULL_TREE)
2359 if (t == NULL_TREE)
2360 t = counts[i];
2361 else
2362 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2364 if (t)
2366 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2367 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2369 if (!rect_count_seen)
2370 t = counts[fd->last_nonrect];
2371 else
2372 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2373 counts[fd->last_nonrect]);
2374 expand_omp_build_assign (gsi, fd->loop.n2, t);
2376 else if (fd->non_rect)
2378 tree t = fd->loop.n2;
2379 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2380 int non_rect_referenced = 0, non_rect = 0;
2381 for (i = 0; i < fd->collapse; i++)
2383 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2384 && !integer_zerop (counts[i]))
2385 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2386 if (fd->loops[i].non_rect_referenced)
2387 non_rect_referenced++;
2388 if (fd->loops[i].m1 || fd->loops[i].m2)
2389 non_rect++;
2391 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2392 counts[fd->last_nonrect] = t;
2396 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2397 T = V;
2398 V3 = N31 + (T % count3) * STEP3;
2399 T = T / count3;
2400 V2 = N21 + (T % count2) * STEP2;
2401 T = T / count2;
2402 V1 = N11 + T * STEP1;
2403 if this loop doesn't have an inner loop construct combined with it.
2404 If it does have an inner loop construct combined with it and the
2405 iteration count isn't known constant, store values from counts array
2406 into its _looptemp_ temporaries instead.
2407 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2408 inclusive), use the count of all those loops together, and either
2409 find quadratic etc. equation roots, or as a fallback, do:
2410 COUNT = 0;
2411 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2412 for (tmpj = M21 * tmpi + N21;
2413 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2415 int tmpk1 = M31 * tmpj + N31;
2416 int tmpk2 = M32 * tmpj + N32;
2417 if (tmpk1 COND3 tmpk2)
2419 if (COND3 is <)
2420 adj = STEP3 - 1;
2421 else
2422 adj = STEP3 + 1;
2423 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2424 if (COUNT + temp > T)
2426 V1 = tmpi;
2427 V2 = tmpj;
2428 V3 = tmpk1 + (T - COUNT) * STEP3;
2429 goto done;
2431 else
2432 COUNT += temp;
2435 done:;
2436 but for optional innermost or outermost rectangular loops that aren't
2437 referenced by other loop expressions keep doing the division/modulo. */
2439 static void
2440 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2441 tree *counts, tree *nonrect_bounds,
2442 gimple *inner_stmt, tree startvar)
2444 int i;
2445 if (gimple_omp_for_combined_p (fd->for_stmt))
2447 /* If fd->loop.n2 is constant, then no propagation of the counts
2448 is needed, they are constant. */
2449 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2450 return;
2452 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2453 ? gimple_omp_taskreg_clauses (inner_stmt)
2454 : gimple_omp_for_clauses (inner_stmt);
2455 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2456 isn't supposed to be handled, as the inner loop doesn't
2457 use it. */
2458 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2459 gcc_assert (innerc);
2460 int count = 0;
2461 if (fd->non_rect
2462 && fd->last_nonrect == fd->first_nonrect + 1
2463 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2464 count = 4;
2465 for (i = 0; i < fd->collapse + count; i++)
2467 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2468 OMP_CLAUSE__LOOPTEMP_);
2469 gcc_assert (innerc);
2470 if (i)
2472 tree tem = OMP_CLAUSE_DECL (innerc);
2473 tree t;
2474 if (i < fd->collapse)
2475 t = counts[i];
2476 else
2477 switch (i - fd->collapse)
2479 case 0: t = counts[0]; break;
2480 case 1: t = fd->first_inner_iterations; break;
2481 case 2: t = fd->factor; break;
2482 case 3: t = fd->adjn1; break;
2483 default: gcc_unreachable ();
2485 t = fold_convert (TREE_TYPE (tem), t);
2486 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2487 false, GSI_CONTINUE_LINKING);
2488 gassign *stmt = gimple_build_assign (tem, t);
2489 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2492 return;
2495 tree type = TREE_TYPE (fd->loop.v);
2496 tree tem = create_tmp_reg (type, ".tem");
2497 gassign *stmt = gimple_build_assign (tem, startvar);
2498 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2500 for (i = fd->collapse - 1; i >= 0; i--)
2502 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2503 itype = vtype;
2504 if (POINTER_TYPE_P (vtype))
2505 itype = signed_type_for (vtype);
2506 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2507 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2508 else
2509 t = tem;
2510 if (i == fd->last_nonrect)
2512 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2513 false, GSI_CONTINUE_LINKING);
2514 tree stopval = t;
2515 tree idx = create_tmp_reg (type, ".count");
2516 expand_omp_build_assign (gsi, idx,
2517 build_zero_cst (type), true);
2518 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2519 if (fd->first_nonrect + 1 == fd->last_nonrect
2520 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2521 || fd->first_inner_iterations)
2522 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2523 != CODE_FOR_nothing)
2524 && !integer_zerop (fd->loop.n2))
2526 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2527 tree itype = TREE_TYPE (fd->loops[i].v);
2528 tree first_inner_iterations = fd->first_inner_iterations;
2529 tree factor = fd->factor;
2530 gcond *cond_stmt
2531 = gimple_build_cond (NE_EXPR, factor,
2532 build_zero_cst (TREE_TYPE (factor)),
2533 NULL_TREE, NULL_TREE);
2534 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2535 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2536 basic_block bb0 = e->src;
2537 e->flags = EDGE_TRUE_VALUE;
2538 e->probability = profile_probability::likely ();
2539 bb_triang_dom = bb0;
2540 *gsi = gsi_after_labels (e->dest);
2541 tree slltype = long_long_integer_type_node;
2542 tree ulltype = long_long_unsigned_type_node;
2543 tree stopvalull = fold_convert (ulltype, stopval);
2544 stopvalull
2545 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2546 false, GSI_CONTINUE_LINKING);
2547 first_inner_iterations
2548 = fold_convert (slltype, first_inner_iterations);
2549 first_inner_iterations
2550 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2551 NULL_TREE, false,
2552 GSI_CONTINUE_LINKING);
2553 factor = fold_convert (slltype, factor);
2554 factor
2555 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2556 false, GSI_CONTINUE_LINKING);
2557 tree first_inner_iterationsd
2558 = fold_build1 (FLOAT_EXPR, double_type_node,
2559 first_inner_iterations);
2560 first_inner_iterationsd
2561 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2562 NULL_TREE, false,
2563 GSI_CONTINUE_LINKING);
2564 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2565 factor);
2566 factord = force_gimple_operand_gsi (gsi, factord, true,
2567 NULL_TREE, false,
2568 GSI_CONTINUE_LINKING);
2569 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2570 stopvalull);
2571 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2572 NULL_TREE, false,
2573 GSI_CONTINUE_LINKING);
2574 /* Temporarily disable flag_rounding_math, values will be
2575 decimal numbers divided by 2 and worst case imprecisions
2576 due to too large values ought to be caught later by the
2577 checks for fallback. */
2578 int save_flag_rounding_math = flag_rounding_math;
2579 flag_rounding_math = 0;
2580 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2581 build_real (double_type_node, dconst2));
2582 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2583 first_inner_iterationsd, t);
2584 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2585 GSI_CONTINUE_LINKING);
2586 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2587 build_real (double_type_node, dconst2));
2588 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2589 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2590 fold_build2 (MULT_EXPR, double_type_node,
2591 t3, t3));
2592 flag_rounding_math = save_flag_rounding_math;
2593 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2594 GSI_CONTINUE_LINKING);
2595 if (flag_exceptions
2596 && cfun->can_throw_non_call_exceptions
2597 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2599 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2600 build_zero_cst (double_type_node));
2601 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2602 false, GSI_CONTINUE_LINKING);
2603 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2604 boolean_false_node,
2605 NULL_TREE, NULL_TREE);
2607 else
2608 cond_stmt
2609 = gimple_build_cond (LT_EXPR, t,
2610 build_zero_cst (double_type_node),
2611 NULL_TREE, NULL_TREE);
2612 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2613 e = split_block (gsi_bb (*gsi), cond_stmt);
2614 basic_block bb1 = e->src;
2615 e->flags = EDGE_FALSE_VALUE;
2616 e->probability = profile_probability::very_likely ();
2617 *gsi = gsi_after_labels (e->dest);
2618 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2619 tree sqrtr = create_tmp_var (double_type_node);
2620 gimple_call_set_lhs (call, sqrtr);
2621 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2622 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2623 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2624 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2625 tree c = create_tmp_var (ulltype);
2626 tree d = create_tmp_var (ulltype);
2627 expand_omp_build_assign (gsi, c, t, true);
2628 t = fold_build2 (MINUS_EXPR, ulltype, c,
2629 build_one_cst (ulltype));
2630 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2631 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2632 t = fold_build2 (MULT_EXPR, ulltype,
2633 fold_convert (ulltype, fd->factor), t);
2634 tree t2
2635 = fold_build2 (MULT_EXPR, ulltype, c,
2636 fold_convert (ulltype,
2637 fd->first_inner_iterations));
2638 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2639 expand_omp_build_assign (gsi, d, t, true);
2640 t = fold_build2 (MULT_EXPR, ulltype,
2641 fold_convert (ulltype, fd->factor), c);
2642 t = fold_build2 (PLUS_EXPR, ulltype,
2643 t, fold_convert (ulltype,
2644 fd->first_inner_iterations));
2645 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2646 GSI_CONTINUE_LINKING);
2647 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2648 NULL_TREE, NULL_TREE);
2649 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2650 e = split_block (gsi_bb (*gsi), cond_stmt);
2651 basic_block bb2 = e->src;
2652 e->flags = EDGE_TRUE_VALUE;
2653 e->probability = profile_probability::very_likely ();
2654 *gsi = gsi_after_labels (e->dest);
2655 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2656 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2657 GSI_CONTINUE_LINKING);
2658 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2659 NULL_TREE, NULL_TREE);
2660 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2661 e = split_block (gsi_bb (*gsi), cond_stmt);
2662 basic_block bb3 = e->src;
2663 e->flags = EDGE_FALSE_VALUE;
2664 e->probability = profile_probability::very_likely ();
2665 *gsi = gsi_after_labels (e->dest);
2666 t = fold_convert (itype, c);
2667 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2668 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2669 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2670 GSI_CONTINUE_LINKING);
2671 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2672 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2673 t2 = fold_convert (itype, t2);
2674 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2675 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2676 if (fd->loops[i].m1)
2678 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2679 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2681 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2682 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2683 bb_triang = e->src;
2684 *gsi = gsi_after_labels (e->dest);
2685 remove_edge (e);
2686 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2687 e->probability = profile_probability::very_unlikely ();
2688 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2689 e->probability = profile_probability::very_unlikely ();
2690 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2691 e->probability = profile_probability::very_unlikely ();
2693 basic_block bb4 = create_empty_bb (bb0);
2694 add_bb_to_loop (bb4, bb0->loop_father);
2695 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2696 e->probability = profile_probability::unlikely ();
2697 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2698 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2699 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2700 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2701 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2702 counts[i], counts[i - 1]);
2703 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2704 GSI_CONTINUE_LINKING);
2705 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2706 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2707 t = fold_convert (itype, t);
2708 t2 = fold_convert (itype, t2);
2709 t = fold_build2 (MULT_EXPR, itype, t,
2710 fold_convert (itype, fd->loops[i].step));
2711 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2712 t2 = fold_build2 (MULT_EXPR, itype, t2,
2713 fold_convert (itype, fd->loops[i - 1].step));
2714 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2715 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2716 false, GSI_CONTINUE_LINKING);
2717 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2718 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2719 if (fd->loops[i].m1)
2721 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2722 fd->loops[i - 1].v);
2723 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2725 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2726 false, GSI_CONTINUE_LINKING);
2727 stmt = gimple_build_assign (fd->loops[i].v, t);
2728 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2730 /* Fallback implementation. Evaluate the loops in between
2731 (inclusive) fd->first_nonrect and fd->last_nonrect at
2732 runtime unsing temporaries instead of the original iteration
2733 variables, in the body just bump the counter and compare
2734 with the desired value. */
2735 gimple_stmt_iterator gsi2 = *gsi;
2736 basic_block entry_bb = gsi_bb (gsi2);
2737 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2738 e = split_block (e->dest, (gimple *) NULL);
2739 basic_block dom_bb = NULL;
2740 basic_block cur_bb = e->src;
2741 basic_block next_bb = e->dest;
2742 entry_bb = e->dest;
2743 *gsi = gsi_after_labels (entry_bb);
2745 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2746 tree n1 = NULL_TREE, n2 = NULL_TREE;
2747 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2749 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2751 tree itype = TREE_TYPE (fd->loops[j].v);
2752 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2753 && fd->loops[j].m2 == NULL_TREE
2754 && !fd->loops[j].non_rect_referenced);
2755 gsi2 = gsi_after_labels (cur_bb);
2756 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2757 if (fd->loops[j].m1)
2759 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2760 n1 = fold_build2 (MULT_EXPR, itype,
2761 vs[j - fd->loops[j].outer], n1);
2762 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2764 else if (rect_p)
2765 n1 = build_zero_cst (type);
2766 else
2767 n1 = t;
2768 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2769 true, GSI_SAME_STMT);
2770 if (j < fd->last_nonrect)
2772 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2773 expand_omp_build_assign (&gsi2, vs[j], n1);
2775 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2776 if (fd->loops[j].m2)
2778 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2779 n2 = fold_build2 (MULT_EXPR, itype,
2780 vs[j - fd->loops[j].outer], n2);
2781 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2783 else if (rect_p)
2784 n2 = counts[j];
2785 else
2786 n2 = t;
2787 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2788 true, GSI_SAME_STMT);
2789 if (j == fd->last_nonrect)
2791 gcond *cond_stmt
2792 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2793 NULL_TREE, NULL_TREE);
2794 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2795 e = split_block (cur_bb, cond_stmt);
2796 e->flags = EDGE_TRUE_VALUE;
2797 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2798 e->probability = profile_probability::likely ().guessed ();
2799 ne->probability = e->probability.invert ();
2800 gsi2 = gsi_after_labels (e->dest);
2802 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2803 ? -1 : 1));
2804 t = fold_build2 (PLUS_EXPR, itype,
2805 fold_convert (itype, fd->loops[j].step), t);
2806 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2807 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2808 tree step = fold_convert (itype, fd->loops[j].step);
2809 if (TYPE_UNSIGNED (itype)
2810 && fd->loops[j].cond_code == GT_EXPR)
2811 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2812 fold_build1 (NEGATE_EXPR, itype, t),
2813 fold_build1 (NEGATE_EXPR, itype, step));
2814 else
2815 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2816 t = fold_convert (type, t);
2817 t = fold_build2 (PLUS_EXPR, type, idx, t);
2818 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2819 true, GSI_SAME_STMT);
2820 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2821 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2822 cond_stmt
2823 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2824 NULL_TREE);
2825 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2826 e = split_block (gsi_bb (gsi2), cond_stmt);
2827 e->flags = EDGE_TRUE_VALUE;
2828 e->probability = profile_probability::likely ().guessed ();
2829 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2830 ne->probability = e->probability.invert ();
2831 gsi2 = gsi_after_labels (e->dest);
2832 expand_omp_build_assign (&gsi2, idx, t);
2833 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2834 break;
2836 e = split_block (cur_bb, last_stmt (cur_bb));
2838 basic_block new_cur_bb = create_empty_bb (cur_bb);
2839 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2841 gsi2 = gsi_after_labels (e->dest);
2842 if (rect_p)
2843 t = fold_build2 (PLUS_EXPR, type, vs[j],
2844 build_one_cst (type));
2845 else
2847 tree step
2848 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2849 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2851 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2852 true, GSI_SAME_STMT);
2853 expand_omp_build_assign (&gsi2, vs[j], t);
2855 edge ne = split_block (e->dest, last_stmt (e->dest));
2856 gsi2 = gsi_after_labels (ne->dest);
2858 gcond *cond_stmt;
2859 if (next_bb == entry_bb)
2860 /* No need to actually check the outermost condition. */
2861 cond_stmt
2862 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2863 boolean_true_node,
2864 NULL_TREE, NULL_TREE);
2865 else
2866 cond_stmt
2867 = gimple_build_cond (rect_p ? LT_EXPR
2868 : fd->loops[j].cond_code,
2869 vs[j], n2, NULL_TREE, NULL_TREE);
2870 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2871 edge e3, e4;
2872 if (next_bb == entry_bb)
2874 e3 = find_edge (ne->dest, next_bb);
2875 e3->flags = EDGE_FALSE_VALUE;
2876 dom_bb = ne->dest;
2878 else
2879 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2880 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2881 e4->probability = profile_probability::likely ().guessed ();
2882 e3->probability = e4->probability.invert ();
2883 basic_block esrc = e->src;
2884 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2885 cur_bb = new_cur_bb;
2886 basic_block latch_bb = next_bb;
2887 next_bb = e->dest;
2888 remove_edge (e);
2889 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2890 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2891 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2893 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2895 tree itype = TREE_TYPE (fd->loops[j].v);
2896 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2897 && fd->loops[j].m2 == NULL_TREE
2898 && !fd->loops[j].non_rect_referenced);
2899 if (j == fd->last_nonrect)
2901 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2902 t = fold_convert (itype, t);
2903 tree t2
2904 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2905 t = fold_build2 (MULT_EXPR, itype, t, t2);
2906 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2908 else if (rect_p)
2910 t = fold_convert (itype, vs[j]);
2911 t = fold_build2 (MULT_EXPR, itype, t,
2912 fold_convert (itype, fd->loops[j].step));
2913 if (POINTER_TYPE_P (vtype))
2914 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2915 else
2916 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2918 else
2919 t = vs[j];
2920 t = force_gimple_operand_gsi (gsi, t, false,
2921 NULL_TREE, true,
2922 GSI_SAME_STMT);
2923 stmt = gimple_build_assign (fd->loops[j].v, t);
2924 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2926 if (gsi_end_p (*gsi))
2927 *gsi = gsi_last_bb (gsi_bb (*gsi));
2928 else
2929 gsi_prev (gsi);
2930 if (bb_triang)
2932 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2933 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2934 *gsi = gsi_after_labels (e->dest);
2935 if (!gsi_end_p (*gsi))
2936 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2937 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2940 else
2942 t = fold_convert (itype, t);
2943 t = fold_build2 (MULT_EXPR, itype, t,
2944 fold_convert (itype, fd->loops[i].step));
2945 if (POINTER_TYPE_P (vtype))
2946 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2947 else
2948 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2949 t = force_gimple_operand_gsi (gsi, t,
2950 DECL_P (fd->loops[i].v)
2951 && TREE_ADDRESSABLE (fd->loops[i].v),
2952 NULL_TREE, false,
2953 GSI_CONTINUE_LINKING);
2954 stmt = gimple_build_assign (fd->loops[i].v, t);
2955 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2957 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2959 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2960 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2961 false, GSI_CONTINUE_LINKING);
2962 stmt = gimple_build_assign (tem, t);
2963 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2965 if (i == fd->last_nonrect)
2966 i = fd->first_nonrect;
2968 if (fd->non_rect)
2969 for (i = 0; i <= fd->last_nonrect; i++)
2970 if (fd->loops[i].m2)
2972 tree itype = TREE_TYPE (fd->loops[i].v);
2974 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2975 t = fold_build2 (MULT_EXPR, itype,
2976 fd->loops[i - fd->loops[i].outer].v, t);
2977 t = fold_build2 (PLUS_EXPR, itype, t,
2978 fold_convert (itype,
2979 unshare_expr (fd->loops[i].n2)));
2980 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2981 t = force_gimple_operand_gsi (gsi, t, false,
2982 NULL_TREE, false,
2983 GSI_CONTINUE_LINKING);
2984 stmt = gimple_build_assign (nonrect_bounds[i], t);
2985 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2989 /* Helper function for expand_omp_for_*. Generate code like:
2990 L10:
2991 V3 += STEP3;
2992 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2993 L11:
2994 V3 = N31;
2995 V2 += STEP2;
2996 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2997 L12:
2998 V2 = N21;
2999 V1 += STEP1;
3000 goto BODY_BB;
3001 For non-rectangular loops, use temporaries stored in nonrect_bounds
3002 for the upper bounds if M?2 multiplier is present. Given e.g.
3003 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3004 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3005 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3006 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3008 L10:
3009 V4 += STEP4;
3010 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3011 L11:
3012 V4 = N41 + M41 * V2; // This can be left out if the loop
3013 // refers to the immediate parent loop
3014 V3 += STEP3;
3015 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3016 L12:
3017 V3 = N31;
3018 V2 += STEP2;
3019 if (V2 cond2 N22) goto L120; else goto L13;
3020 L120:
3021 V4 = N41 + M41 * V2;
3022 NONRECT_BOUND4 = N42 + M42 * V2;
3023 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3024 L13:
3025 V2 = N21;
3026 V1 += STEP1;
3027 goto L120; */
3029 static basic_block
3030 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3031 basic_block cont_bb, basic_block body_bb)
3033 basic_block last_bb, bb, collapse_bb = NULL;
3034 int i;
3035 gimple_stmt_iterator gsi;
3036 edge e;
3037 tree t;
3038 gimple *stmt;
3040 last_bb = cont_bb;
3041 for (i = fd->collapse - 1; i >= 0; i--)
3043 tree vtype = TREE_TYPE (fd->loops[i].v);
3045 bb = create_empty_bb (last_bb);
3046 add_bb_to_loop (bb, last_bb->loop_father);
3047 gsi = gsi_start_bb (bb);
3049 if (i < fd->collapse - 1)
3051 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3052 e->probability
3053 = profile_probability::guessed_always ().apply_scale (1, 8);
3055 struct omp_for_data_loop *l = &fd->loops[i + 1];
3056 if (l->m1 == NULL_TREE || l->outer != 1)
3058 t = l->n1;
3059 if (l->m1)
3061 tree t2
3062 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3063 fd->loops[i + 1 - l->outer].v, l->m1);
3064 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3066 t = force_gimple_operand_gsi (&gsi, t,
3067 DECL_P (l->v)
3068 && TREE_ADDRESSABLE (l->v),
3069 NULL_TREE, false,
3070 GSI_CONTINUE_LINKING);
3071 stmt = gimple_build_assign (l->v, t);
3072 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3075 else
3076 collapse_bb = bb;
3078 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3080 if (POINTER_TYPE_P (vtype))
3081 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3082 else
3083 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3084 t = force_gimple_operand_gsi (&gsi, t,
3085 DECL_P (fd->loops[i].v)
3086 && TREE_ADDRESSABLE (fd->loops[i].v),
3087 NULL_TREE, false, GSI_CONTINUE_LINKING);
3088 stmt = gimple_build_assign (fd->loops[i].v, t);
3089 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3091 if (fd->loops[i].non_rect_referenced)
3093 basic_block update_bb = NULL, prev_bb = NULL;
3094 for (int j = i + 1; j <= fd->last_nonrect; j++)
3095 if (j - fd->loops[j].outer == i)
3097 tree n1, n2;
3098 struct omp_for_data_loop *l = &fd->loops[j];
3099 basic_block this_bb = create_empty_bb (last_bb);
3100 add_bb_to_loop (this_bb, last_bb->loop_father);
3101 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3102 if (prev_bb)
3104 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3105 e->probability
3106 = profile_probability::guessed_always ().apply_scale (7,
3108 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3110 if (l->m1)
3112 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3113 fd->loops[i].v);
3114 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3115 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3116 false,
3117 GSI_CONTINUE_LINKING);
3118 stmt = gimple_build_assign (l->v, n1);
3119 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3120 n1 = l->v;
3122 else
3123 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3124 NULL_TREE, false,
3125 GSI_CONTINUE_LINKING);
3126 if (l->m2)
3128 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3129 fd->loops[i].v);
3130 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3131 t, unshare_expr (l->n2));
3132 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3133 false,
3134 GSI_CONTINUE_LINKING);
3135 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3136 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3137 n2 = nonrect_bounds[j];
3139 else
3140 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3141 true, NULL_TREE, false,
3142 GSI_CONTINUE_LINKING);
3143 gcond *cond_stmt
3144 = gimple_build_cond (l->cond_code, n1, n2,
3145 NULL_TREE, NULL_TREE);
3146 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3147 if (update_bb == NULL)
3148 update_bb = this_bb;
3149 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3150 e->probability
3151 = profile_probability::guessed_always ().apply_scale (1, 8);
3152 if (prev_bb == NULL)
3153 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3154 prev_bb = this_bb;
3156 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3157 e->probability
3158 = profile_probability::guessed_always ().apply_scale (7, 8);
3159 body_bb = update_bb;
3162 if (i > 0)
3164 if (fd->loops[i].m2)
3165 t = nonrect_bounds[i];
3166 else
3167 t = unshare_expr (fd->loops[i].n2);
3168 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3169 false, GSI_CONTINUE_LINKING);
3170 tree v = fd->loops[i].v;
3171 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3172 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3173 false, GSI_CONTINUE_LINKING);
3174 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3175 stmt = gimple_build_cond_empty (t);
3176 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3177 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3178 expand_omp_regimplify_p, NULL, NULL)
3179 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3180 expand_omp_regimplify_p, NULL, NULL))
3181 gimple_regimplify_operands (stmt, &gsi);
3182 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3183 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3185 else
3186 make_edge (bb, body_bb, EDGE_FALLTHRU);
3187 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3188 last_bb = bb;
3191 return collapse_bb;
3194 /* Expand #pragma omp ordered depend(source). */
3196 static void
3197 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3198 tree *counts, location_t loc)
3200 enum built_in_function source_ix
3201 = fd->iter_type == long_integer_type_node
3202 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3203 gimple *g
3204 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3205 build_fold_addr_expr (counts[fd->ordered]));
3206 gimple_set_location (g, loc);
3207 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3210 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3212 static void
3213 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3214 tree *counts, tree c, location_t loc)
3216 auto_vec<tree, 10> args;
3217 enum built_in_function sink_ix
3218 = fd->iter_type == long_integer_type_node
3219 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3220 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3221 int i;
3222 gimple_stmt_iterator gsi2 = *gsi;
3223 bool warned_step = false;
3225 for (i = 0; i < fd->ordered; i++)
3227 tree step = NULL_TREE;
3228 off = TREE_PURPOSE (deps);
3229 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3231 step = TREE_OPERAND (off, 1);
3232 off = TREE_OPERAND (off, 0);
3234 if (!integer_zerop (off))
3236 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3237 || fd->loops[i].cond_code == GT_EXPR);
3238 bool forward = fd->loops[i].cond_code == LT_EXPR;
3239 if (step)
3241 /* Non-simple Fortran DO loops. If step is variable,
3242 we don't know at compile even the direction, so can't
3243 warn. */
3244 if (TREE_CODE (step) != INTEGER_CST)
3245 break;
3246 forward = tree_int_cst_sgn (step) != -1;
3248 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3249 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3250 "waiting for lexically later iteration");
3251 break;
3253 deps = TREE_CHAIN (deps);
3255 /* If all offsets corresponding to the collapsed loops are zero,
3256 this depend clause can be ignored. FIXME: but there is still a
3257 flush needed. We need to emit one __sync_synchronize () for it
3258 though (perhaps conditionally)? Solve this together with the
3259 conservative dependence folding optimization.
3260 if (i >= fd->collapse)
3261 return; */
3263 deps = OMP_CLAUSE_DECL (c);
3264 gsi_prev (&gsi2);
3265 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3266 edge e2 = split_block_after_labels (e1->dest);
3268 gsi2 = gsi_after_labels (e1->dest);
3269 *gsi = gsi_last_bb (e1->src);
3270 for (i = 0; i < fd->ordered; i++)
3272 tree itype = TREE_TYPE (fd->loops[i].v);
3273 tree step = NULL_TREE;
3274 tree orig_off = NULL_TREE;
3275 if (POINTER_TYPE_P (itype))
3276 itype = sizetype;
3277 if (i)
3278 deps = TREE_CHAIN (deps);
3279 off = TREE_PURPOSE (deps);
3280 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3282 step = TREE_OPERAND (off, 1);
3283 off = TREE_OPERAND (off, 0);
3284 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3285 && integer_onep (fd->loops[i].step)
3286 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3288 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3289 if (step)
3291 off = fold_convert_loc (loc, itype, off);
3292 orig_off = off;
3293 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3296 if (integer_zerop (off))
3297 t = boolean_true_node;
3298 else
3300 tree a;
3301 tree co = fold_convert_loc (loc, itype, off);
3302 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3304 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3305 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3306 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3307 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3308 co);
3310 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3311 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3312 fd->loops[i].v, co);
3313 else
3314 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3315 fd->loops[i].v, co);
3316 if (step)
3318 tree t1, t2;
3319 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3320 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3321 fd->loops[i].n1);
3322 else
3323 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3324 fd->loops[i].n2);
3325 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3326 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3327 fd->loops[i].n2);
3328 else
3329 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3330 fd->loops[i].n1);
3331 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3332 step, build_int_cst (TREE_TYPE (step), 0));
3333 if (TREE_CODE (step) != INTEGER_CST)
3335 t1 = unshare_expr (t1);
3336 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3337 false, GSI_CONTINUE_LINKING);
3338 t2 = unshare_expr (t2);
3339 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3340 false, GSI_CONTINUE_LINKING);
3342 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3343 t, t2, t1);
3345 else if (fd->loops[i].cond_code == LT_EXPR)
3347 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3348 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3349 fd->loops[i].n1);
3350 else
3351 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3352 fd->loops[i].n2);
3354 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3355 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3356 fd->loops[i].n2);
3357 else
3358 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3359 fd->loops[i].n1);
3361 if (cond)
3362 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3363 else
3364 cond = t;
3366 off = fold_convert_loc (loc, itype, off);
3368 if (step
3369 || (fd->loops[i].cond_code == LT_EXPR
3370 ? !integer_onep (fd->loops[i].step)
3371 : !integer_minus_onep (fd->loops[i].step)))
3373 if (step == NULL_TREE
3374 && TYPE_UNSIGNED (itype)
3375 && fd->loops[i].cond_code == GT_EXPR)
3376 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3377 fold_build1_loc (loc, NEGATE_EXPR, itype,
3378 s));
3379 else
3380 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3381 orig_off ? orig_off : off, s);
3382 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3383 build_int_cst (itype, 0));
3384 if (integer_zerop (t) && !warned_step)
3386 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3387 "refers to iteration never in the iteration "
3388 "space");
3389 warned_step = true;
3391 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3392 cond, t);
3395 if (i <= fd->collapse - 1 && fd->collapse > 1)
3396 t = fd->loop.v;
3397 else if (counts[i])
3398 t = counts[i];
3399 else
3401 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3402 fd->loops[i].v, fd->loops[i].n1);
3403 t = fold_convert_loc (loc, fd->iter_type, t);
3405 if (step)
3406 /* We have divided off by step already earlier. */;
3407 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3408 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3409 fold_build1_loc (loc, NEGATE_EXPR, itype,
3410 s));
3411 else
3412 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3413 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3414 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3415 off = fold_convert_loc (loc, fd->iter_type, off);
3416 if (i <= fd->collapse - 1 && fd->collapse > 1)
3418 if (i)
3419 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3420 off);
3421 if (i < fd->collapse - 1)
3423 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3424 counts[i]);
3425 continue;
3428 off = unshare_expr (off);
3429 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3430 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3431 true, GSI_SAME_STMT);
3432 args.safe_push (t);
3434 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3435 gimple_set_location (g, loc);
3436 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3438 cond = unshare_expr (cond);
3439 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3440 GSI_CONTINUE_LINKING);
3441 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3442 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3443 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3444 e1->probability = e3->probability.invert ();
3445 e1->flags = EDGE_TRUE_VALUE;
3446 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3448 *gsi = gsi_after_labels (e2->dest);
3451 /* Expand all #pragma omp ordered depend(source) and
3452 #pragma omp ordered depend(sink:...) constructs in the current
3453 #pragma omp for ordered(n) region. */
3455 static void
3456 expand_omp_ordered_source_sink (struct omp_region *region,
3457 struct omp_for_data *fd, tree *counts,
3458 basic_block cont_bb)
3460 struct omp_region *inner;
3461 int i;
3462 for (i = fd->collapse - 1; i < fd->ordered; i++)
3463 if (i == fd->collapse - 1 && fd->collapse > 1)
3464 counts[i] = NULL_TREE;
3465 else if (i >= fd->collapse && !cont_bb)
3466 counts[i] = build_zero_cst (fd->iter_type);
3467 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3468 && integer_onep (fd->loops[i].step))
3469 counts[i] = NULL_TREE;
3470 else
3471 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3472 tree atype
3473 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3474 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3475 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3477 for (inner = region->inner; inner; inner = inner->next)
3478 if (inner->type == GIMPLE_OMP_ORDERED)
3480 gomp_ordered *ord_stmt = inner->ord_stmt;
3481 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3482 location_t loc = gimple_location (ord_stmt);
3483 tree c;
3484 for (c = gimple_omp_ordered_clauses (ord_stmt);
3485 c; c = OMP_CLAUSE_CHAIN (c))
3486 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3487 break;
3488 if (c)
3489 expand_omp_ordered_source (&gsi, fd, counts, loc);
3490 for (c = gimple_omp_ordered_clauses (ord_stmt);
3491 c; c = OMP_CLAUSE_CHAIN (c))
3492 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3493 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3494 gsi_remove (&gsi, true);
3498 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3499 collapsed. */
3501 static basic_block
3502 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3503 basic_block cont_bb, basic_block body_bb,
3504 bool ordered_lastprivate)
3506 if (fd->ordered == fd->collapse)
3507 return cont_bb;
3509 if (!cont_bb)
3511 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3512 for (int i = fd->collapse; i < fd->ordered; i++)
3514 tree type = TREE_TYPE (fd->loops[i].v);
3515 tree n1 = fold_convert (type, fd->loops[i].n1);
3516 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3517 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3518 size_int (i - fd->collapse + 1),
3519 NULL_TREE, NULL_TREE);
3520 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3522 return NULL;
3525 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3527 tree t, type = TREE_TYPE (fd->loops[i].v);
3528 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3529 expand_omp_build_assign (&gsi, fd->loops[i].v,
3530 fold_convert (type, fd->loops[i].n1));
3531 if (counts[i])
3532 expand_omp_build_assign (&gsi, counts[i],
3533 build_zero_cst (fd->iter_type));
3534 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3535 size_int (i - fd->collapse + 1),
3536 NULL_TREE, NULL_TREE);
3537 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3538 if (!gsi_end_p (gsi))
3539 gsi_prev (&gsi);
3540 else
3541 gsi = gsi_last_bb (body_bb);
3542 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3543 basic_block new_body = e1->dest;
3544 if (body_bb == cont_bb)
3545 cont_bb = new_body;
3546 edge e2 = NULL;
3547 basic_block new_header;
3548 if (EDGE_COUNT (cont_bb->preds) > 0)
3550 gsi = gsi_last_bb (cont_bb);
3551 if (POINTER_TYPE_P (type))
3552 t = fold_build_pointer_plus (fd->loops[i].v,
3553 fold_convert (sizetype,
3554 fd->loops[i].step));
3555 else
3556 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3557 fold_convert (type, fd->loops[i].step));
3558 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3559 if (counts[i])
3561 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3562 build_int_cst (fd->iter_type, 1));
3563 expand_omp_build_assign (&gsi, counts[i], t);
3564 t = counts[i];
3566 else
3568 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3569 fd->loops[i].v, fd->loops[i].n1);
3570 t = fold_convert (fd->iter_type, t);
3571 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3572 true, GSI_SAME_STMT);
3574 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3575 size_int (i - fd->collapse + 1),
3576 NULL_TREE, NULL_TREE);
3577 expand_omp_build_assign (&gsi, aref, t);
3578 gsi_prev (&gsi);
3579 e2 = split_block (cont_bb, gsi_stmt (gsi));
3580 new_header = e2->dest;
3582 else
3583 new_header = cont_bb;
3584 gsi = gsi_after_labels (new_header);
3585 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3586 true, GSI_SAME_STMT);
3587 tree n2
3588 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3589 true, NULL_TREE, true, GSI_SAME_STMT);
3590 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3591 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3592 edge e3 = split_block (new_header, gsi_stmt (gsi));
3593 cont_bb = e3->dest;
3594 remove_edge (e1);
3595 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3596 e3->flags = EDGE_FALSE_VALUE;
3597 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3598 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3599 e1->probability = e3->probability.invert ();
3601 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3602 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3604 if (e2)
3606 class loop *loop = alloc_loop ();
3607 loop->header = new_header;
3608 loop->latch = e2->src;
3609 add_loop (loop, body_bb->loop_father);
3613 /* If there are any lastprivate clauses and it is possible some loops
3614 might have zero iterations, ensure all the decls are initialized,
3615 otherwise we could crash evaluating C++ class iterators with lastprivate
3616 clauses. */
3617 bool need_inits = false;
3618 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3619 if (need_inits)
3621 tree type = TREE_TYPE (fd->loops[i].v);
3622 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3623 expand_omp_build_assign (&gsi, fd->loops[i].v,
3624 fold_convert (type, fd->loops[i].n1));
3626 else
3628 tree type = TREE_TYPE (fd->loops[i].v);
3629 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3630 boolean_type_node,
3631 fold_convert (type, fd->loops[i].n1),
3632 fold_convert (type, fd->loops[i].n2));
3633 if (!integer_onep (this_cond))
3634 need_inits = true;
3637 return cont_bb;
3640 /* A subroutine of expand_omp_for. Generate code for a parallel
3641 loop with any schedule. Given parameters:
3643 for (V = N1; V cond N2; V += STEP) BODY;
3645 where COND is "<" or ">", we generate pseudocode
3647 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3648 if (more) goto L0; else goto L3;
3650 V = istart0;
3651 iend = iend0;
3653 BODY;
3654 V += STEP;
3655 if (V cond iend) goto L1; else goto L2;
3657 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3660 If this is a combined omp parallel loop, instead of the call to
3661 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3662 If this is gimple_omp_for_combined_p loop, then instead of assigning
3663 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3664 inner GIMPLE_OMP_FOR and V += STEP; and
3665 if (V cond iend) goto L1; else goto L2; are removed.
3667 For collapsed loops, given parameters:
3668 collapse(3)
3669 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3670 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3671 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3672 BODY;
3674 we generate pseudocode
3676 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3677 if (cond3 is <)
3678 adj = STEP3 - 1;
3679 else
3680 adj = STEP3 + 1;
3681 count3 = (adj + N32 - N31) / STEP3;
3682 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3683 if (cond2 is <)
3684 adj = STEP2 - 1;
3685 else
3686 adj = STEP2 + 1;
3687 count2 = (adj + N22 - N21) / STEP2;
3688 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3689 if (cond1 is <)
3690 adj = STEP1 - 1;
3691 else
3692 adj = STEP1 + 1;
3693 count1 = (adj + N12 - N11) / STEP1;
3694 count = count1 * count2 * count3;
3695 goto Z1;
3697 count = 0;
3699 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3700 if (more) goto L0; else goto L3;
3702 V = istart0;
3703 T = V;
3704 V3 = N31 + (T % count3) * STEP3;
3705 T = T / count3;
3706 V2 = N21 + (T % count2) * STEP2;
3707 T = T / count2;
3708 V1 = N11 + T * STEP1;
3709 iend = iend0;
3711 BODY;
3712 V += 1;
3713 if (V < iend) goto L10; else goto L2;
3714 L10:
3715 V3 += STEP3;
3716 if (V3 cond3 N32) goto L1; else goto L11;
3717 L11:
3718 V3 = N31;
3719 V2 += STEP2;
3720 if (V2 cond2 N22) goto L1; else goto L12;
3721 L12:
3722 V2 = N21;
3723 V1 += STEP1;
3724 goto L1;
3726 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3731 static void
3732 expand_omp_for_generic (struct omp_region *region,
3733 struct omp_for_data *fd,
3734 enum built_in_function start_fn,
3735 enum built_in_function next_fn,
3736 tree sched_arg,
3737 gimple *inner_stmt)
3739 tree type, istart0, iend0, iend;
3740 tree t, vmain, vback, bias = NULL_TREE;
3741 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3742 basic_block l2_bb = NULL, l3_bb = NULL;
3743 gimple_stmt_iterator gsi;
3744 gassign *assign_stmt;
3745 bool in_combined_parallel = is_combined_parallel (region);
3746 bool broken_loop = region->cont == NULL;
3747 edge e, ne;
3748 tree *counts = NULL;
3749 int i;
3750 bool ordered_lastprivate = false;
3752 gcc_assert (!broken_loop || !in_combined_parallel);
3753 gcc_assert (fd->iter_type == long_integer_type_node
3754 || !in_combined_parallel);
3756 entry_bb = region->entry;
3757 cont_bb = region->cont;
3758 collapse_bb = NULL;
3759 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3760 gcc_assert (broken_loop
3761 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3762 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3763 l1_bb = single_succ (l0_bb);
3764 if (!broken_loop)
3766 l2_bb = create_empty_bb (cont_bb);
3767 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3768 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3769 == l1_bb));
3770 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3772 else
3773 l2_bb = NULL;
3774 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3775 exit_bb = region->exit;
3777 gsi = gsi_last_nondebug_bb (entry_bb);
3779 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3780 if (fd->ordered
3781 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3782 OMP_CLAUSE_LASTPRIVATE))
3783 ordered_lastprivate = false;
3784 tree reductions = NULL_TREE;
3785 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3786 tree memv = NULL_TREE;
3787 if (fd->lastprivate_conditional)
3789 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3790 OMP_CLAUSE__CONDTEMP_);
3791 if (fd->have_pointer_condtemp)
3792 condtemp = OMP_CLAUSE_DECL (c);
3793 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3794 cond_var = OMP_CLAUSE_DECL (c);
3796 if (sched_arg)
3798 if (fd->have_reductemp)
3800 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3801 OMP_CLAUSE__REDUCTEMP_);
3802 reductions = OMP_CLAUSE_DECL (c);
3803 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3804 gimple *g = SSA_NAME_DEF_STMT (reductions);
3805 reductions = gimple_assign_rhs1 (g);
3806 OMP_CLAUSE_DECL (c) = reductions;
3807 entry_bb = gimple_bb (g);
3808 edge e = split_block (entry_bb, g);
3809 if (region->entry == entry_bb)
3810 region->entry = e->dest;
3811 gsi = gsi_last_bb (entry_bb);
3813 else
3814 reductions = null_pointer_node;
3815 if (fd->have_pointer_condtemp)
3817 tree type = TREE_TYPE (condtemp);
3818 memv = create_tmp_var (type);
3819 TREE_ADDRESSABLE (memv) = 1;
3820 unsigned HOST_WIDE_INT sz
3821 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3822 sz *= fd->lastprivate_conditional;
3823 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3824 false);
3825 mem = build_fold_addr_expr (memv);
3827 else
3828 mem = null_pointer_node;
3830 if (fd->collapse > 1 || fd->ordered)
3832 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3833 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3835 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3836 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3837 zero_iter1_bb, first_zero_iter1,
3838 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3840 if (zero_iter1_bb)
3842 /* Some counts[i] vars might be uninitialized if
3843 some loop has zero iterations. But the body shouldn't
3844 be executed in that case, so just avoid uninit warnings. */
3845 for (i = first_zero_iter1;
3846 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3847 if (SSA_VAR_P (counts[i]))
3848 suppress_warning (counts[i], OPT_Wuninitialized);
3849 gsi_prev (&gsi);
3850 e = split_block (entry_bb, gsi_stmt (gsi));
3851 entry_bb = e->dest;
3852 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3853 gsi = gsi_last_nondebug_bb (entry_bb);
3854 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3855 get_immediate_dominator (CDI_DOMINATORS,
3856 zero_iter1_bb));
3858 if (zero_iter2_bb)
3860 /* Some counts[i] vars might be uninitialized if
3861 some loop has zero iterations. But the body shouldn't
3862 be executed in that case, so just avoid uninit warnings. */
3863 for (i = first_zero_iter2; i < fd->ordered; i++)
3864 if (SSA_VAR_P (counts[i]))
3865 suppress_warning (counts[i], OPT_Wuninitialized);
3866 if (zero_iter1_bb)
3867 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3868 else
3870 gsi_prev (&gsi);
3871 e = split_block (entry_bb, gsi_stmt (gsi));
3872 entry_bb = e->dest;
3873 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3874 gsi = gsi_last_nondebug_bb (entry_bb);
3875 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3876 get_immediate_dominator
3877 (CDI_DOMINATORS, zero_iter2_bb));
3880 if (fd->collapse == 1)
3882 counts[0] = fd->loop.n2;
3883 fd->loop = fd->loops[0];
3887 type = TREE_TYPE (fd->loop.v);
3888 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3889 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3890 TREE_ADDRESSABLE (istart0) = 1;
3891 TREE_ADDRESSABLE (iend0) = 1;
3893 /* See if we need to bias by LLONG_MIN. */
3894 if (fd->iter_type == long_long_unsigned_type_node
3895 && TREE_CODE (type) == INTEGER_TYPE
3896 && !TYPE_UNSIGNED (type)
3897 && fd->ordered == 0)
3899 tree n1, n2;
3901 if (fd->loop.cond_code == LT_EXPR)
3903 n1 = fd->loop.n1;
3904 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3906 else
3908 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3909 n2 = fd->loop.n1;
3911 if (TREE_CODE (n1) != INTEGER_CST
3912 || TREE_CODE (n2) != INTEGER_CST
3913 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3914 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3917 gimple_stmt_iterator gsif = gsi;
3918 gsi_prev (&gsif);
3920 tree arr = NULL_TREE;
3921 if (in_combined_parallel)
3923 gcc_assert (fd->ordered == 0);
3924 /* In a combined parallel loop, emit a call to
3925 GOMP_loop_foo_next. */
3926 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3927 build_fold_addr_expr (istart0),
3928 build_fold_addr_expr (iend0));
3930 else
3932 tree t0, t1, t2, t3, t4;
3933 /* If this is not a combined parallel loop, emit a call to
3934 GOMP_loop_foo_start in ENTRY_BB. */
3935 t4 = build_fold_addr_expr (iend0);
3936 t3 = build_fold_addr_expr (istart0);
3937 if (fd->ordered)
3939 t0 = build_int_cst (unsigned_type_node,
3940 fd->ordered - fd->collapse + 1);
3941 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3942 fd->ordered
3943 - fd->collapse + 1),
3944 ".omp_counts");
3945 DECL_NAMELESS (arr) = 1;
3946 TREE_ADDRESSABLE (arr) = 1;
3947 TREE_STATIC (arr) = 1;
3948 vec<constructor_elt, va_gc> *v;
3949 vec_alloc (v, fd->ordered - fd->collapse + 1);
3950 int idx;
3952 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3954 tree c;
3955 if (idx == 0 && fd->collapse > 1)
3956 c = fd->loop.n2;
3957 else
3958 c = counts[idx + fd->collapse - 1];
3959 tree purpose = size_int (idx);
3960 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3961 if (TREE_CODE (c) != INTEGER_CST)
3962 TREE_STATIC (arr) = 0;
3965 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3966 if (!TREE_STATIC (arr))
3967 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3968 void_type_node, arr),
3969 true, NULL_TREE, true, GSI_SAME_STMT);
3970 t1 = build_fold_addr_expr (arr);
3971 t2 = NULL_TREE;
3973 else
3975 t2 = fold_convert (fd->iter_type, fd->loop.step);
3976 t1 = fd->loop.n2;
3977 t0 = fd->loop.n1;
3978 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3980 tree innerc
3981 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3982 OMP_CLAUSE__LOOPTEMP_);
3983 gcc_assert (innerc);
3984 t0 = OMP_CLAUSE_DECL (innerc);
3985 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3986 OMP_CLAUSE__LOOPTEMP_);
3987 gcc_assert (innerc);
3988 t1 = OMP_CLAUSE_DECL (innerc);
3990 if (POINTER_TYPE_P (TREE_TYPE (t0))
3991 && TYPE_PRECISION (TREE_TYPE (t0))
3992 != TYPE_PRECISION (fd->iter_type))
3994 /* Avoid casting pointers to integer of a different size. */
3995 tree itype = signed_type_for (type);
3996 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3997 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3999 else
4001 t1 = fold_convert (fd->iter_type, t1);
4002 t0 = fold_convert (fd->iter_type, t0);
4004 if (bias)
4006 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4007 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4010 if (fd->iter_type == long_integer_type_node || fd->ordered)
4012 if (fd->chunk_size)
4014 t = fold_convert (fd->iter_type, fd->chunk_size);
4015 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4016 if (sched_arg)
4018 if (fd->ordered)
4019 t = build_call_expr (builtin_decl_explicit (start_fn),
4020 8, t0, t1, sched_arg, t, t3, t4,
4021 reductions, mem);
4022 else
4023 t = build_call_expr (builtin_decl_explicit (start_fn),
4024 9, t0, t1, t2, sched_arg, t, t3, t4,
4025 reductions, mem);
4027 else if (fd->ordered)
4028 t = build_call_expr (builtin_decl_explicit (start_fn),
4029 5, t0, t1, t, t3, t4);
4030 else
4031 t = build_call_expr (builtin_decl_explicit (start_fn),
4032 6, t0, t1, t2, t, t3, t4);
4034 else if (fd->ordered)
4035 t = build_call_expr (builtin_decl_explicit (start_fn),
4036 4, t0, t1, t3, t4);
4037 else
4038 t = build_call_expr (builtin_decl_explicit (start_fn),
4039 5, t0, t1, t2, t3, t4);
4041 else
4043 tree t5;
4044 tree c_bool_type;
4045 tree bfn_decl;
4047 /* The GOMP_loop_ull_*start functions have additional boolean
4048 argument, true for < loops and false for > loops.
4049 In Fortran, the C bool type can be different from
4050 boolean_type_node. */
4051 bfn_decl = builtin_decl_explicit (start_fn);
4052 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4053 t5 = build_int_cst (c_bool_type,
4054 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4055 if (fd->chunk_size)
4057 tree bfn_decl = builtin_decl_explicit (start_fn);
4058 t = fold_convert (fd->iter_type, fd->chunk_size);
4059 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4060 if (sched_arg)
4061 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4062 t, t3, t4, reductions, mem);
4063 else
4064 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4066 else
4067 t = build_call_expr (builtin_decl_explicit (start_fn),
4068 6, t5, t0, t1, t2, t3, t4);
4071 if (TREE_TYPE (t) != boolean_type_node)
4072 t = fold_build2 (NE_EXPR, boolean_type_node,
4073 t, build_int_cst (TREE_TYPE (t), 0));
4074 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4075 true, GSI_SAME_STMT);
4076 if (arr && !TREE_STATIC (arr))
4078 tree clobber = build_clobber (TREE_TYPE (arr));
4079 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4080 GSI_SAME_STMT);
4082 if (fd->have_pointer_condtemp)
4083 expand_omp_build_assign (&gsi, condtemp, memv, false);
4084 if (fd->have_reductemp)
4086 gimple *g = gsi_stmt (gsi);
4087 gsi_remove (&gsi, true);
4088 release_ssa_name (gimple_assign_lhs (g));
4090 entry_bb = region->entry;
4091 gsi = gsi_last_nondebug_bb (entry_bb);
4093 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4095 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4097 /* Remove the GIMPLE_OMP_FOR statement. */
4098 gsi_remove (&gsi, true);
4100 if (gsi_end_p (gsif))
4101 gsif = gsi_after_labels (gsi_bb (gsif));
4102 gsi_next (&gsif);
4104 /* Iteration setup for sequential loop goes in L0_BB. */
4105 tree startvar = fd->loop.v;
4106 tree endvar = NULL_TREE;
4108 if (gimple_omp_for_combined_p (fd->for_stmt))
4110 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4111 && gimple_omp_for_kind (inner_stmt)
4112 == GF_OMP_FOR_KIND_SIMD);
4113 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4114 OMP_CLAUSE__LOOPTEMP_);
4115 gcc_assert (innerc);
4116 startvar = OMP_CLAUSE_DECL (innerc);
4117 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4118 OMP_CLAUSE__LOOPTEMP_);
4119 gcc_assert (innerc);
4120 endvar = OMP_CLAUSE_DECL (innerc);
4123 gsi = gsi_start_bb (l0_bb);
4124 t = istart0;
4125 if (fd->ordered && fd->collapse == 1)
4126 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4127 fold_convert (fd->iter_type, fd->loop.step));
4128 else if (bias)
4129 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4130 if (fd->ordered && fd->collapse == 1)
4132 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4133 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4134 fd->loop.n1, fold_convert (sizetype, t));
4135 else
4137 t = fold_convert (TREE_TYPE (startvar), t);
4138 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4139 fd->loop.n1, t);
4142 else
4144 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4145 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4146 t = fold_convert (TREE_TYPE (startvar), t);
4148 t = force_gimple_operand_gsi (&gsi, t,
4149 DECL_P (startvar)
4150 && TREE_ADDRESSABLE (startvar),
4151 NULL_TREE, false, GSI_CONTINUE_LINKING);
4152 assign_stmt = gimple_build_assign (startvar, t);
4153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4154 if (cond_var)
4156 tree itype = TREE_TYPE (cond_var);
4157 /* For lastprivate(conditional:) itervar, we need some iteration
4158 counter that starts at unsigned non-zero and increases.
4159 Prefer as few IVs as possible, so if we can use startvar
4160 itself, use that, or startvar + constant (those would be
4161 incremented with step), and as last resort use the s0 + 1
4162 incremented by 1. */
4163 if ((fd->ordered && fd->collapse == 1)
4164 || bias
4165 || POINTER_TYPE_P (type)
4166 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4167 || fd->loop.cond_code != LT_EXPR)
4168 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4169 build_int_cst (itype, 1));
4170 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4171 t = fold_convert (itype, t);
4172 else
4174 tree c = fold_convert (itype, fd->loop.n1);
4175 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4176 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4178 t = force_gimple_operand_gsi (&gsi, t, false,
4179 NULL_TREE, false, GSI_CONTINUE_LINKING);
4180 assign_stmt = gimple_build_assign (cond_var, t);
4181 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4184 t = iend0;
4185 if (fd->ordered && fd->collapse == 1)
4186 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4187 fold_convert (fd->iter_type, fd->loop.step));
4188 else if (bias)
4189 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4190 if (fd->ordered && fd->collapse == 1)
4192 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4193 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4194 fd->loop.n1, fold_convert (sizetype, t));
4195 else
4197 t = fold_convert (TREE_TYPE (startvar), t);
4198 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4199 fd->loop.n1, t);
4202 else
4204 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4205 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4206 t = fold_convert (TREE_TYPE (startvar), t);
4208 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4209 false, GSI_CONTINUE_LINKING);
4210 if (endvar)
4212 assign_stmt = gimple_build_assign (endvar, iend);
4213 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4214 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4215 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4216 else
4217 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4218 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4220 /* Handle linear clause adjustments. */
4221 tree itercnt = NULL_TREE;
4222 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4223 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4224 c; c = OMP_CLAUSE_CHAIN (c))
4225 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4226 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4228 tree d = OMP_CLAUSE_DECL (c);
4229 bool is_ref = omp_is_reference (d);
4230 tree t = d, a, dest;
4231 if (is_ref)
4232 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4233 tree type = TREE_TYPE (t);
4234 if (POINTER_TYPE_P (type))
4235 type = sizetype;
4236 dest = unshare_expr (t);
4237 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4238 expand_omp_build_assign (&gsif, v, t);
4239 if (itercnt == NULL_TREE)
4241 itercnt = startvar;
4242 tree n1 = fd->loop.n1;
4243 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4245 itercnt
4246 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4247 itercnt);
4248 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4250 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4251 itercnt, n1);
4252 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4253 itercnt, fd->loop.step);
4254 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4255 NULL_TREE, false,
4256 GSI_CONTINUE_LINKING);
4258 a = fold_build2 (MULT_EXPR, type,
4259 fold_convert (type, itercnt),
4260 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4261 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4262 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4263 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4264 false, GSI_CONTINUE_LINKING);
4265 expand_omp_build_assign (&gsi, dest, t, true);
4267 if (fd->collapse > 1)
4268 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4270 if (fd->ordered)
4272 /* Until now, counts array contained number of iterations or
4273 variable containing it for ith loop. From now on, we need
4274 those counts only for collapsed loops, and only for the 2nd
4275 till the last collapsed one. Move those one element earlier,
4276 we'll use counts[fd->collapse - 1] for the first source/sink
4277 iteration counter and so on and counts[fd->ordered]
4278 as the array holding the current counter values for
4279 depend(source). */
4280 if (fd->collapse > 1)
4281 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4282 if (broken_loop)
4284 int i;
4285 for (i = fd->collapse; i < fd->ordered; i++)
4287 tree type = TREE_TYPE (fd->loops[i].v);
4288 tree this_cond
4289 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4290 fold_convert (type, fd->loops[i].n1),
4291 fold_convert (type, fd->loops[i].n2));
4292 if (!integer_onep (this_cond))
4293 break;
4295 if (i < fd->ordered)
4297 cont_bb
4298 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4299 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4300 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4301 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4302 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4303 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4304 make_edge (cont_bb, l1_bb, 0);
4305 l2_bb = create_empty_bb (cont_bb);
4306 broken_loop = false;
4309 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4310 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4311 ordered_lastprivate);
4312 if (counts[fd->collapse - 1])
4314 gcc_assert (fd->collapse == 1);
4315 gsi = gsi_last_bb (l0_bb);
4316 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4317 istart0, true);
4318 if (cont_bb)
4320 gsi = gsi_last_bb (cont_bb);
4321 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4322 counts[fd->collapse - 1],
4323 build_int_cst (fd->iter_type, 1));
4324 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4325 tree aref = build4 (ARRAY_REF, fd->iter_type,
4326 counts[fd->ordered], size_zero_node,
4327 NULL_TREE, NULL_TREE);
4328 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4330 t = counts[fd->collapse - 1];
4332 else if (fd->collapse > 1)
4333 t = fd->loop.v;
4334 else
4336 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4337 fd->loops[0].v, fd->loops[0].n1);
4338 t = fold_convert (fd->iter_type, t);
4340 gsi = gsi_last_bb (l0_bb);
4341 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4342 size_zero_node, NULL_TREE, NULL_TREE);
4343 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4344 false, GSI_CONTINUE_LINKING);
4345 expand_omp_build_assign (&gsi, aref, t, true);
4348 if (!broken_loop)
4350 /* Code to control the increment and predicate for the sequential
4351 loop goes in the CONT_BB. */
4352 gsi = gsi_last_nondebug_bb (cont_bb);
4353 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4354 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4355 vmain = gimple_omp_continue_control_use (cont_stmt);
4356 vback = gimple_omp_continue_control_def (cont_stmt);
4358 if (cond_var)
4360 tree itype = TREE_TYPE (cond_var);
4361 tree t2;
4362 if ((fd->ordered && fd->collapse == 1)
4363 || bias
4364 || POINTER_TYPE_P (type)
4365 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4366 || fd->loop.cond_code != LT_EXPR)
4367 t2 = build_int_cst (itype, 1);
4368 else
4369 t2 = fold_convert (itype, fd->loop.step);
4370 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4371 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4372 NULL_TREE, true, GSI_SAME_STMT);
4373 assign_stmt = gimple_build_assign (cond_var, t2);
4374 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4377 if (!gimple_omp_for_combined_p (fd->for_stmt))
4379 if (POINTER_TYPE_P (type))
4380 t = fold_build_pointer_plus (vmain, fd->loop.step);
4381 else
4382 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4383 t = force_gimple_operand_gsi (&gsi, t,
4384 DECL_P (vback)
4385 && TREE_ADDRESSABLE (vback),
4386 NULL_TREE, true, GSI_SAME_STMT);
4387 assign_stmt = gimple_build_assign (vback, t);
4388 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4390 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4392 tree tem;
4393 if (fd->collapse > 1)
4394 tem = fd->loop.v;
4395 else
4397 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4398 fd->loops[0].v, fd->loops[0].n1);
4399 tem = fold_convert (fd->iter_type, tem);
4401 tree aref = build4 (ARRAY_REF, fd->iter_type,
4402 counts[fd->ordered], size_zero_node,
4403 NULL_TREE, NULL_TREE);
4404 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4405 true, GSI_SAME_STMT);
4406 expand_omp_build_assign (&gsi, aref, tem);
4409 t = build2 (fd->loop.cond_code, boolean_type_node,
4410 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4411 iend);
4412 gcond *cond_stmt = gimple_build_cond_empty (t);
4413 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4416 /* Remove GIMPLE_OMP_CONTINUE. */
4417 gsi_remove (&gsi, true);
4419 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4420 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4422 /* Emit code to get the next parallel iteration in L2_BB. */
4423 gsi = gsi_start_bb (l2_bb);
4425 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4426 build_fold_addr_expr (istart0),
4427 build_fold_addr_expr (iend0));
4428 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4429 false, GSI_CONTINUE_LINKING);
4430 if (TREE_TYPE (t) != boolean_type_node)
4431 t = fold_build2 (NE_EXPR, boolean_type_node,
4432 t, build_int_cst (TREE_TYPE (t), 0));
4433 gcond *cond_stmt = gimple_build_cond_empty (t);
4434 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4437 /* Add the loop cleanup function. */
4438 gsi = gsi_last_nondebug_bb (exit_bb);
4439 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4440 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4441 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4442 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4443 else
4444 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4445 gcall *call_stmt = gimple_build_call (t, 0);
4446 if (fd->ordered)
4448 tree arr = counts[fd->ordered];
4449 tree clobber = build_clobber (TREE_TYPE (arr));
4450 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4451 GSI_SAME_STMT);
4453 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4455 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4456 if (fd->have_reductemp)
4458 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4459 gimple_call_lhs (call_stmt));
4460 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4463 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4464 gsi_remove (&gsi, true);
4466 /* Connect the new blocks. */
4467 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4468 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4470 if (!broken_loop)
4472 gimple_seq phis;
4474 e = find_edge (cont_bb, l3_bb);
4475 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4477 phis = phi_nodes (l3_bb);
4478 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4480 gimple *phi = gsi_stmt (gsi);
4481 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4482 PHI_ARG_DEF_FROM_EDGE (phi, e));
4484 remove_edge (e);
4486 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4487 e = find_edge (cont_bb, l1_bb);
4488 if (e == NULL)
4490 e = BRANCH_EDGE (cont_bb);
4491 gcc_assert (single_succ (e->dest) == l1_bb);
4493 if (gimple_omp_for_combined_p (fd->for_stmt))
4495 remove_edge (e);
4496 e = NULL;
4498 else if (fd->collapse > 1)
4500 remove_edge (e);
4501 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4503 else
4504 e->flags = EDGE_TRUE_VALUE;
4505 if (e)
4507 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4508 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4510 else
4512 e = find_edge (cont_bb, l2_bb);
4513 e->flags = EDGE_FALLTHRU;
4515 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4517 if (gimple_in_ssa_p (cfun))
4519 /* Add phis to the outer loop that connect to the phis in the inner,
4520 original loop, and move the loop entry value of the inner phi to
4521 the loop entry value of the outer phi. */
4522 gphi_iterator psi;
4523 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4525 location_t locus;
4526 gphi *nphi;
4527 gphi *exit_phi = psi.phi ();
4529 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4530 continue;
4532 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4533 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4535 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4536 edge latch_to_l1 = find_edge (latch, l1_bb);
4537 gphi *inner_phi
4538 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4540 tree t = gimple_phi_result (exit_phi);
4541 tree new_res = copy_ssa_name (t, NULL);
4542 nphi = create_phi_node (new_res, l0_bb);
4544 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4545 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4546 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4547 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4548 add_phi_arg (nphi, t, entry_to_l0, locus);
4550 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4551 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4553 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4557 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4558 recompute_dominator (CDI_DOMINATORS, l2_bb));
4559 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4560 recompute_dominator (CDI_DOMINATORS, l3_bb));
4561 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4562 recompute_dominator (CDI_DOMINATORS, l0_bb));
4563 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4564 recompute_dominator (CDI_DOMINATORS, l1_bb));
4566 /* We enter expand_omp_for_generic with a loop. This original loop may
4567 have its own loop struct, or it may be part of an outer loop struct
4568 (which may be the fake loop). */
4569 class loop *outer_loop = entry_bb->loop_father;
4570 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4572 add_bb_to_loop (l2_bb, outer_loop);
4574 /* We've added a new loop around the original loop. Allocate the
4575 corresponding loop struct. */
4576 class loop *new_loop = alloc_loop ();
4577 new_loop->header = l0_bb;
4578 new_loop->latch = l2_bb;
4579 add_loop (new_loop, outer_loop);
4581 /* Allocate a loop structure for the original loop unless we already
4582 had one. */
4583 if (!orig_loop_has_loop_struct
4584 && !gimple_omp_for_combined_p (fd->for_stmt))
4586 class loop *orig_loop = alloc_loop ();
4587 orig_loop->header = l1_bb;
4588 /* The loop may have multiple latches. */
4589 add_loop (orig_loop, new_loop);
4594 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4595 compute needed allocation size. If !ALLOC of team allocations,
4596 if ALLOC of thread allocation. SZ is the initial needed size for
4597 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4598 CNT number of elements of each array, for !ALLOC this is
4599 omp_get_num_threads (), for ALLOC number of iterations handled by the
4600 current thread. If PTR is non-NULL, it is the start of the allocation
4601 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4602 clauses pointers to the corresponding arrays. */
4604 static tree
4605 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4606 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4607 gimple_stmt_iterator *gsi, bool alloc)
4609 tree eltsz = NULL_TREE;
4610 unsigned HOST_WIDE_INT preval = 0;
4611 if (ptr && sz)
4612 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4613 ptr, size_int (sz));
4614 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4615 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4616 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4617 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4619 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4620 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4621 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4623 unsigned HOST_WIDE_INT szl
4624 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4625 szl = least_bit_hwi (szl);
4626 if (szl)
4627 al = MIN (al, szl);
4629 if (ptr == NULL_TREE)
4631 if (eltsz == NULL_TREE)
4632 eltsz = TYPE_SIZE_UNIT (pointee_type);
4633 else
4634 eltsz = size_binop (PLUS_EXPR, eltsz,
4635 TYPE_SIZE_UNIT (pointee_type));
4637 if (preval == 0 && al <= alloc_align)
4639 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4640 sz += diff;
4641 if (diff && ptr)
4642 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4643 ptr, size_int (diff));
4645 else if (al > preval)
4647 if (ptr)
4649 ptr = fold_convert (pointer_sized_int_node, ptr);
4650 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4651 build_int_cst (pointer_sized_int_node,
4652 al - 1));
4653 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4654 build_int_cst (pointer_sized_int_node,
4655 -(HOST_WIDE_INT) al));
4656 ptr = fold_convert (ptr_type_node, ptr);
4658 else
4659 sz += al - 1;
4661 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4662 preval = al;
4663 else
4664 preval = 1;
4665 if (ptr)
4667 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4668 ptr = OMP_CLAUSE_DECL (c);
4669 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4670 size_binop (MULT_EXPR, cnt,
4671 TYPE_SIZE_UNIT (pointee_type)));
4675 if (ptr == NULL_TREE)
4677 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4678 if (sz)
4679 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4680 return eltsz;
4682 else
4683 return ptr;
4686 /* Return the last _looptemp_ clause if one has been created for
4687 lastprivate on distribute parallel for{, simd} or taskloop.
4688 FD is the loop data and INNERC should be the second _looptemp_
4689 clause (the one holding the end of the range).
4690 This is followed by collapse - 1 _looptemp_ clauses for the
4691 counts[1] and up, and for triangular loops followed by 4
4692 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4693 one factor and one adjn1). After this there is optionally one
4694 _looptemp_ clause that this function returns. */
4696 static tree
4697 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4699 gcc_assert (innerc);
4700 int count = fd->collapse - 1;
4701 if (fd->non_rect
4702 && fd->last_nonrect == fd->first_nonrect + 1
4703 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4704 count += 4;
4705 for (int i = 0; i < count; i++)
4707 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4708 OMP_CLAUSE__LOOPTEMP_);
4709 gcc_assert (innerc);
4711 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4712 OMP_CLAUSE__LOOPTEMP_);
4715 /* A subroutine of expand_omp_for. Generate code for a parallel
4716 loop with static schedule and no specified chunk size. Given
4717 parameters:
4719 for (V = N1; V cond N2; V += STEP) BODY;
4721 where COND is "<" or ">", we generate pseudocode
4723 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4724 if (cond is <)
4725 adj = STEP - 1;
4726 else
4727 adj = STEP + 1;
4728 if ((__typeof (V)) -1 > 0 && cond is >)
4729 n = -(adj + N2 - N1) / -STEP;
4730 else
4731 n = (adj + N2 - N1) / STEP;
4732 q = n / nthreads;
4733 tt = n % nthreads;
4734 if (threadid < tt) goto L3; else goto L4;
4736 tt = 0;
4737 q = q + 1;
4739 s0 = q * threadid + tt;
4740 e0 = s0 + q;
4741 V = s0 * STEP + N1;
4742 if (s0 >= e0) goto L2; else goto L0;
4744 e = e0 * STEP + N1;
4746 BODY;
4747 V += STEP;
4748 if (V cond e) goto L1;
4752 static void
4753 expand_omp_for_static_nochunk (struct omp_region *region,
4754 struct omp_for_data *fd,
4755 gimple *inner_stmt)
4757 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4758 tree type, itype, vmain, vback;
4759 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4760 basic_block body_bb, cont_bb, collapse_bb = NULL;
4761 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4762 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4763 gimple_stmt_iterator gsi, gsip;
4764 edge ep;
4765 bool broken_loop = region->cont == NULL;
4766 tree *counts = NULL;
4767 tree n1, n2, step;
4768 tree reductions = NULL_TREE;
4769 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4771 itype = type = TREE_TYPE (fd->loop.v);
4772 if (POINTER_TYPE_P (type))
4773 itype = signed_type_for (type);
4775 entry_bb = region->entry;
4776 cont_bb = region->cont;
4777 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4778 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4779 gcc_assert (broken_loop
4780 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4781 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4782 body_bb = single_succ (seq_start_bb);
4783 if (!broken_loop)
4785 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4786 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4787 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4789 exit_bb = region->exit;
4791 /* Iteration space partitioning goes in ENTRY_BB. */
4792 gsi = gsi_last_nondebug_bb (entry_bb);
4793 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4794 gsip = gsi;
4795 gsi_prev (&gsip);
4797 if (fd->collapse > 1)
4799 int first_zero_iter = -1, dummy = -1;
4800 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4802 counts = XALLOCAVEC (tree, fd->collapse);
4803 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4804 fin_bb, first_zero_iter,
4805 dummy_bb, dummy, l2_dom_bb);
4806 t = NULL_TREE;
4808 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4809 t = integer_one_node;
4810 else
4811 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4812 fold_convert (type, fd->loop.n1),
4813 fold_convert (type, fd->loop.n2));
4814 if (fd->collapse == 1
4815 && TYPE_UNSIGNED (type)
4816 && (t == NULL_TREE || !integer_onep (t)))
4818 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4819 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4820 true, GSI_SAME_STMT);
4821 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4822 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4823 true, GSI_SAME_STMT);
4824 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4825 NULL_TREE, NULL_TREE);
4826 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4828 expand_omp_regimplify_p, NULL, NULL)
4829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4830 expand_omp_regimplify_p, NULL, NULL))
4832 gsi = gsi_for_stmt (cond_stmt);
4833 gimple_regimplify_operands (cond_stmt, &gsi);
4835 ep = split_block (entry_bb, cond_stmt);
4836 ep->flags = EDGE_TRUE_VALUE;
4837 entry_bb = ep->dest;
4838 ep->probability = profile_probability::very_likely ();
4839 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4840 ep->probability = profile_probability::very_unlikely ();
4841 if (gimple_in_ssa_p (cfun))
4843 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4844 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4845 !gsi_end_p (gpi); gsi_next (&gpi))
4847 gphi *phi = gpi.phi ();
4848 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4849 ep, UNKNOWN_LOCATION);
4852 gsi = gsi_last_bb (entry_bb);
4855 if (fd->lastprivate_conditional)
4857 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4858 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4859 if (fd->have_pointer_condtemp)
4860 condtemp = OMP_CLAUSE_DECL (c);
4861 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4862 cond_var = OMP_CLAUSE_DECL (c);
4864 if (fd->have_reductemp
4865 /* For scan, we don't want to reinitialize condtemp before the
4866 second loop. */
4867 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4868 || fd->have_nonctrl_scantemp)
4870 tree t1 = build_int_cst (long_integer_type_node, 0);
4871 tree t2 = build_int_cst (long_integer_type_node, 1);
4872 tree t3 = build_int_cstu (long_integer_type_node,
4873 (HOST_WIDE_INT_1U << 31) + 1);
4874 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4875 gimple_stmt_iterator gsi2 = gsi_none ();
4876 gimple *g = NULL;
4877 tree mem = null_pointer_node, memv = NULL_TREE;
4878 unsigned HOST_WIDE_INT condtemp_sz = 0;
4879 unsigned HOST_WIDE_INT alloc_align = 0;
4880 if (fd->have_reductemp)
4882 gcc_assert (!fd->have_nonctrl_scantemp);
4883 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4884 reductions = OMP_CLAUSE_DECL (c);
4885 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4886 g = SSA_NAME_DEF_STMT (reductions);
4887 reductions = gimple_assign_rhs1 (g);
4888 OMP_CLAUSE_DECL (c) = reductions;
4889 gsi2 = gsi_for_stmt (g);
4891 else
4893 if (gsi_end_p (gsip))
4894 gsi2 = gsi_after_labels (region->entry);
4895 else
4896 gsi2 = gsip;
4897 reductions = null_pointer_node;
4899 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4901 tree type;
4902 if (fd->have_pointer_condtemp)
4903 type = TREE_TYPE (condtemp);
4904 else
4905 type = ptr_type_node;
4906 memv = create_tmp_var (type);
4907 TREE_ADDRESSABLE (memv) = 1;
4908 unsigned HOST_WIDE_INT sz = 0;
4909 tree size = NULL_TREE;
4910 if (fd->have_pointer_condtemp)
4912 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4913 sz *= fd->lastprivate_conditional;
4914 condtemp_sz = sz;
4916 if (fd->have_nonctrl_scantemp)
4918 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4919 gimple *g = gimple_build_call (nthreads, 0);
4920 nthreads = create_tmp_var (integer_type_node);
4921 gimple_call_set_lhs (g, nthreads);
4922 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4923 nthreads = fold_convert (sizetype, nthreads);
4924 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4925 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4926 alloc_align, nthreads, NULL,
4927 false);
4928 size = fold_convert (type, size);
4930 else
4931 size = build_int_cst (type, sz);
4932 expand_omp_build_assign (&gsi2, memv, size, false);
4933 mem = build_fold_addr_expr (memv);
4935 tree t
4936 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4937 9, t1, t2, t2, t3, t1, null_pointer_node,
4938 null_pointer_node, reductions, mem);
4939 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4940 true, GSI_SAME_STMT);
4941 if (fd->have_pointer_condtemp)
4942 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4943 if (fd->have_nonctrl_scantemp)
4945 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4946 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4947 alloc_align, nthreads, &gsi2, false);
4949 if (fd->have_reductemp)
4951 gsi_remove (&gsi2, true);
4952 release_ssa_name (gimple_assign_lhs (g));
4955 switch (gimple_omp_for_kind (fd->for_stmt))
4957 case GF_OMP_FOR_KIND_FOR:
4958 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4959 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4960 break;
4961 case GF_OMP_FOR_KIND_DISTRIBUTE:
4962 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4963 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4964 break;
4965 default:
4966 gcc_unreachable ();
4968 nthreads = build_call_expr (nthreads, 0);
4969 nthreads = fold_convert (itype, nthreads);
4970 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4971 true, GSI_SAME_STMT);
4972 threadid = build_call_expr (threadid, 0);
4973 threadid = fold_convert (itype, threadid);
4974 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4975 true, GSI_SAME_STMT);
4977 n1 = fd->loop.n1;
4978 n2 = fd->loop.n2;
4979 step = fd->loop.step;
4980 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4982 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4983 OMP_CLAUSE__LOOPTEMP_);
4984 gcc_assert (innerc);
4985 n1 = OMP_CLAUSE_DECL (innerc);
4986 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4987 OMP_CLAUSE__LOOPTEMP_);
4988 gcc_assert (innerc);
4989 n2 = OMP_CLAUSE_DECL (innerc);
4991 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4992 true, NULL_TREE, true, GSI_SAME_STMT);
4993 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4994 true, NULL_TREE, true, GSI_SAME_STMT);
4995 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4996 true, NULL_TREE, true, GSI_SAME_STMT);
4998 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4999 t = fold_build2 (PLUS_EXPR, itype, step, t);
5000 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5001 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5002 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5003 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5004 fold_build1 (NEGATE_EXPR, itype, t),
5005 fold_build1 (NEGATE_EXPR, itype, step));
5006 else
5007 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5008 t = fold_convert (itype, t);
5009 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5011 q = create_tmp_reg (itype, "q");
5012 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5013 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5014 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5016 tt = create_tmp_reg (itype, "tt");
5017 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5018 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5019 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5021 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5022 gcond *cond_stmt = gimple_build_cond_empty (t);
5023 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5025 second_bb = split_block (entry_bb, cond_stmt)->dest;
5026 gsi = gsi_last_nondebug_bb (second_bb);
5027 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5029 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5030 GSI_SAME_STMT);
5031 gassign *assign_stmt
5032 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5033 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5035 third_bb = split_block (second_bb, assign_stmt)->dest;
5036 gsi = gsi_last_nondebug_bb (third_bb);
5037 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5039 if (fd->have_nonctrl_scantemp)
5041 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5042 tree controlp = NULL_TREE, controlb = NULL_TREE;
5043 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5044 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5045 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5047 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5048 controlb = OMP_CLAUSE_DECL (c);
5049 else
5050 controlp = OMP_CLAUSE_DECL (c);
5051 if (controlb && controlp)
5052 break;
5054 gcc_assert (controlp && controlb);
5055 tree cnt = create_tmp_var (sizetype);
5056 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5057 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5058 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5059 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5060 alloc_align, cnt, NULL, true);
5061 tree size = create_tmp_var (sizetype);
5062 expand_omp_build_assign (&gsi, size, sz, false);
5063 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5064 size, size_int (16384));
5065 expand_omp_build_assign (&gsi, controlb, cmp);
5066 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5067 NULL_TREE, NULL_TREE);
5068 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5069 fourth_bb = split_block (third_bb, g)->dest;
5070 gsi = gsi_last_nondebug_bb (fourth_bb);
5071 /* FIXME: Once we have allocators, this should use allocator. */
5072 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5073 gimple_call_set_lhs (g, controlp);
5074 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5075 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5076 &gsi, true);
5077 gsi_prev (&gsi);
5078 g = gsi_stmt (gsi);
5079 fifth_bb = split_block (fourth_bb, g)->dest;
5080 gsi = gsi_last_nondebug_bb (fifth_bb);
5082 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5083 gimple_call_set_lhs (g, controlp);
5084 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5085 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5086 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5087 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5088 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5090 tree tmp = create_tmp_var (sizetype);
5091 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5092 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5093 TYPE_SIZE_UNIT (pointee_type));
5094 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5095 g = gimple_build_call (alloca_decl, 2, tmp,
5096 size_int (TYPE_ALIGN (pointee_type)));
5097 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5098 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5101 sixth_bb = split_block (fifth_bb, g)->dest;
5102 gsi = gsi_last_nondebug_bb (sixth_bb);
5105 t = build2 (MULT_EXPR, itype, q, threadid);
5106 t = build2 (PLUS_EXPR, itype, t, tt);
5107 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5109 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5110 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5112 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5113 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5115 /* Remove the GIMPLE_OMP_FOR statement. */
5116 gsi_remove (&gsi, true);
5118 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5119 gsi = gsi_start_bb (seq_start_bb);
5121 tree startvar = fd->loop.v;
5122 tree endvar = NULL_TREE;
5124 if (gimple_omp_for_combined_p (fd->for_stmt))
5126 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5127 ? gimple_omp_parallel_clauses (inner_stmt)
5128 : gimple_omp_for_clauses (inner_stmt);
5129 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5130 gcc_assert (innerc);
5131 startvar = OMP_CLAUSE_DECL (innerc);
5132 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5133 OMP_CLAUSE__LOOPTEMP_);
5134 gcc_assert (innerc);
5135 endvar = OMP_CLAUSE_DECL (innerc);
5136 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5137 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5139 innerc = find_lastprivate_looptemp (fd, innerc);
5140 if (innerc)
5142 /* If needed (distribute parallel for with lastprivate),
5143 propagate down the total number of iterations. */
5144 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5145 fd->loop.n2);
5146 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5147 GSI_CONTINUE_LINKING);
5148 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5149 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5153 t = fold_convert (itype, s0);
5154 t = fold_build2 (MULT_EXPR, itype, t, step);
5155 if (POINTER_TYPE_P (type))
5157 t = fold_build_pointer_plus (n1, t);
5158 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5159 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5160 t = fold_convert (signed_type_for (type), t);
5162 else
5163 t = fold_build2 (PLUS_EXPR, type, t, n1);
5164 t = fold_convert (TREE_TYPE (startvar), t);
5165 t = force_gimple_operand_gsi (&gsi, t,
5166 DECL_P (startvar)
5167 && TREE_ADDRESSABLE (startvar),
5168 NULL_TREE, false, GSI_CONTINUE_LINKING);
5169 assign_stmt = gimple_build_assign (startvar, t);
5170 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5171 if (cond_var)
5173 tree itype = TREE_TYPE (cond_var);
5174 /* For lastprivate(conditional:) itervar, we need some iteration
5175 counter that starts at unsigned non-zero and increases.
5176 Prefer as few IVs as possible, so if we can use startvar
5177 itself, use that, or startvar + constant (those would be
5178 incremented with step), and as last resort use the s0 + 1
5179 incremented by 1. */
5180 if (POINTER_TYPE_P (type)
5181 || TREE_CODE (n1) != INTEGER_CST
5182 || fd->loop.cond_code != LT_EXPR)
5183 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5184 build_int_cst (itype, 1));
5185 else if (tree_int_cst_sgn (n1) == 1)
5186 t = fold_convert (itype, t);
5187 else
5189 tree c = fold_convert (itype, n1);
5190 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5191 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5193 t = force_gimple_operand_gsi (&gsi, t, false,
5194 NULL_TREE, false, GSI_CONTINUE_LINKING);
5195 assign_stmt = gimple_build_assign (cond_var, t);
5196 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5199 t = fold_convert (itype, e0);
5200 t = fold_build2 (MULT_EXPR, itype, t, step);
5201 if (POINTER_TYPE_P (type))
5203 t = fold_build_pointer_plus (n1, t);
5204 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5205 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5206 t = fold_convert (signed_type_for (type), t);
5208 else
5209 t = fold_build2 (PLUS_EXPR, type, t, n1);
5210 t = fold_convert (TREE_TYPE (startvar), t);
5211 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5212 false, GSI_CONTINUE_LINKING);
5213 if (endvar)
5215 assign_stmt = gimple_build_assign (endvar, e);
5216 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5217 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5218 assign_stmt = gimple_build_assign (fd->loop.v, e);
5219 else
5220 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5221 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5223 /* Handle linear clause adjustments. */
5224 tree itercnt = NULL_TREE;
5225 tree *nonrect_bounds = NULL;
5226 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5227 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5228 c; c = OMP_CLAUSE_CHAIN (c))
5229 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5230 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5232 tree d = OMP_CLAUSE_DECL (c);
5233 bool is_ref = omp_is_reference (d);
5234 tree t = d, a, dest;
5235 if (is_ref)
5236 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5237 if (itercnt == NULL_TREE)
5239 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5241 itercnt = fold_build2 (MINUS_EXPR, itype,
5242 fold_convert (itype, n1),
5243 fold_convert (itype, fd->loop.n1));
5244 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5245 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5246 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5247 NULL_TREE, false,
5248 GSI_CONTINUE_LINKING);
5250 else
5251 itercnt = s0;
5253 tree type = TREE_TYPE (t);
5254 if (POINTER_TYPE_P (type))
5255 type = sizetype;
5256 a = fold_build2 (MULT_EXPR, type,
5257 fold_convert (type, itercnt),
5258 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5259 dest = unshare_expr (t);
5260 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5261 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5262 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5263 false, GSI_CONTINUE_LINKING);
5264 expand_omp_build_assign (&gsi, dest, t, true);
5266 if (fd->collapse > 1)
5268 if (fd->non_rect)
5270 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5271 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5273 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5274 startvar);
5277 if (!broken_loop)
5279 /* The code controlling the sequential loop replaces the
5280 GIMPLE_OMP_CONTINUE. */
5281 gsi = gsi_last_nondebug_bb (cont_bb);
5282 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5283 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5284 vmain = gimple_omp_continue_control_use (cont_stmt);
5285 vback = gimple_omp_continue_control_def (cont_stmt);
5287 if (cond_var)
5289 tree itype = TREE_TYPE (cond_var);
5290 tree t2;
5291 if (POINTER_TYPE_P (type)
5292 || TREE_CODE (n1) != INTEGER_CST
5293 || fd->loop.cond_code != LT_EXPR)
5294 t2 = build_int_cst (itype, 1);
5295 else
5296 t2 = fold_convert (itype, step);
5297 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5298 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5299 NULL_TREE, true, GSI_SAME_STMT);
5300 assign_stmt = gimple_build_assign (cond_var, t2);
5301 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5304 if (!gimple_omp_for_combined_p (fd->for_stmt))
5306 if (POINTER_TYPE_P (type))
5307 t = fold_build_pointer_plus (vmain, step);
5308 else
5309 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5310 t = force_gimple_operand_gsi (&gsi, t,
5311 DECL_P (vback)
5312 && TREE_ADDRESSABLE (vback),
5313 NULL_TREE, true, GSI_SAME_STMT);
5314 assign_stmt = gimple_build_assign (vback, t);
5315 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5317 t = build2 (fd->loop.cond_code, boolean_type_node,
5318 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5319 ? t : vback, e);
5320 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5323 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5324 gsi_remove (&gsi, true);
5326 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5327 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5328 cont_bb, body_bb);
5331 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5332 gsi = gsi_last_nondebug_bb (exit_bb);
5333 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5335 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5336 if (fd->have_reductemp
5337 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5338 && !fd->have_nonctrl_scantemp))
5340 tree fn;
5341 if (t)
5342 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5343 else
5344 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5345 gcall *g = gimple_build_call (fn, 0);
5346 if (t)
5348 gimple_call_set_lhs (g, t);
5349 if (fd->have_reductemp)
5350 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5351 NOP_EXPR, t),
5352 GSI_SAME_STMT);
5354 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5356 else
5357 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5359 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5360 && !fd->have_nonctrl_scantemp)
5362 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5363 gcall *g = gimple_build_call (fn, 0);
5364 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5366 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5368 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5369 tree controlp = NULL_TREE, controlb = NULL_TREE;
5370 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5371 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5372 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5374 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5375 controlb = OMP_CLAUSE_DECL (c);
5376 else
5377 controlp = OMP_CLAUSE_DECL (c);
5378 if (controlb && controlp)
5379 break;
5381 gcc_assert (controlp && controlb);
5382 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5383 NULL_TREE, NULL_TREE);
5384 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5385 exit1_bb = split_block (exit_bb, g)->dest;
5386 gsi = gsi_after_labels (exit1_bb);
5387 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5388 controlp);
5389 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5390 exit2_bb = split_block (exit1_bb, g)->dest;
5391 gsi = gsi_after_labels (exit2_bb);
5392 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5393 controlp);
5394 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5395 exit3_bb = split_block (exit2_bb, g)->dest;
5396 gsi = gsi_after_labels (exit3_bb);
5398 gsi_remove (&gsi, true);
5400 /* Connect all the blocks. */
5401 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5402 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5403 ep = find_edge (entry_bb, second_bb);
5404 ep->flags = EDGE_TRUE_VALUE;
5405 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5406 if (fourth_bb)
5408 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5409 ep->probability
5410 = profile_probability::guessed_always ().apply_scale (1, 2);
5411 ep = find_edge (third_bb, fourth_bb);
5412 ep->flags = EDGE_TRUE_VALUE;
5413 ep->probability
5414 = profile_probability::guessed_always ().apply_scale (1, 2);
5415 ep = find_edge (fourth_bb, fifth_bb);
5416 redirect_edge_and_branch (ep, sixth_bb);
5418 else
5419 sixth_bb = third_bb;
5420 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5421 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5422 if (exit1_bb)
5424 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5425 ep->probability
5426 = profile_probability::guessed_always ().apply_scale (1, 2);
5427 ep = find_edge (exit_bb, exit1_bb);
5428 ep->flags = EDGE_TRUE_VALUE;
5429 ep->probability
5430 = profile_probability::guessed_always ().apply_scale (1, 2);
5431 ep = find_edge (exit1_bb, exit2_bb);
5432 redirect_edge_and_branch (ep, exit3_bb);
5435 if (!broken_loop)
5437 ep = find_edge (cont_bb, body_bb);
5438 if (ep == NULL)
5440 ep = BRANCH_EDGE (cont_bb);
5441 gcc_assert (single_succ (ep->dest) == body_bb);
5443 if (gimple_omp_for_combined_p (fd->for_stmt))
5445 remove_edge (ep);
5446 ep = NULL;
5448 else if (fd->collapse > 1)
5450 remove_edge (ep);
5451 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5453 else
5454 ep->flags = EDGE_TRUE_VALUE;
5455 find_edge (cont_bb, fin_bb)->flags
5456 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5459 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5460 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5461 if (fourth_bb)
5463 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5464 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5466 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5468 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5469 recompute_dominator (CDI_DOMINATORS, body_bb));
5470 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5471 recompute_dominator (CDI_DOMINATORS, fin_bb));
5472 if (exit1_bb)
5474 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5475 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5478 class loop *loop = body_bb->loop_father;
5479 if (loop != entry_bb->loop_father)
5481 gcc_assert (broken_loop || loop->header == body_bb);
5482 gcc_assert (broken_loop
5483 || loop->latch == region->cont
5484 || single_pred (loop->latch) == region->cont);
5485 return;
5488 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5490 loop = alloc_loop ();
5491 loop->header = body_bb;
5492 if (collapse_bb == NULL)
5493 loop->latch = cont_bb;
5494 add_loop (loop, body_bb->loop_father);
5498 /* Return phi in E->DEST with ARG on edge E. */
5500 static gphi *
5501 find_phi_with_arg_on_edge (tree arg, edge e)
5503 basic_block bb = e->dest;
5505 for (gphi_iterator gpi = gsi_start_phis (bb);
5506 !gsi_end_p (gpi);
5507 gsi_next (&gpi))
5509 gphi *phi = gpi.phi ();
5510 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5511 return phi;
5514 return NULL;
5517 /* A subroutine of expand_omp_for. Generate code for a parallel
5518 loop with static schedule and a specified chunk size. Given
5519 parameters:
5521 for (V = N1; V cond N2; V += STEP) BODY;
5523 where COND is "<" or ">", we generate pseudocode
5525 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5526 if (cond is <)
5527 adj = STEP - 1;
5528 else
5529 adj = STEP + 1;
5530 if ((__typeof (V)) -1 > 0 && cond is >)
5531 n = -(adj + N2 - N1) / -STEP;
5532 else
5533 n = (adj + N2 - N1) / STEP;
5534 trip = 0;
5535 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5536 here so that V is defined
5537 if the loop is not entered
5539 s0 = (trip * nthreads + threadid) * CHUNK;
5540 e0 = min (s0 + CHUNK, n);
5541 if (s0 < n) goto L1; else goto L4;
5543 V = s0 * STEP + N1;
5544 e = e0 * STEP + N1;
5546 BODY;
5547 V += STEP;
5548 if (V cond e) goto L2; else goto L3;
5550 trip += 1;
5551 goto L0;
5555 static void
5556 expand_omp_for_static_chunk (struct omp_region *region,
5557 struct omp_for_data *fd, gimple *inner_stmt)
5559 tree n, s0, e0, e, t;
5560 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5561 tree type, itype, vmain, vback, vextra;
5562 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5563 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5564 gimple_stmt_iterator gsi, gsip;
5565 edge se;
5566 bool broken_loop = region->cont == NULL;
5567 tree *counts = NULL;
5568 tree n1, n2, step;
5569 tree reductions = NULL_TREE;
5570 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5572 itype = type = TREE_TYPE (fd->loop.v);
5573 if (POINTER_TYPE_P (type))
5574 itype = signed_type_for (type);
5576 entry_bb = region->entry;
5577 se = split_block (entry_bb, last_stmt (entry_bb));
5578 entry_bb = se->src;
5579 iter_part_bb = se->dest;
5580 cont_bb = region->cont;
5581 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5582 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5583 gcc_assert (broken_loop
5584 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5585 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5586 body_bb = single_succ (seq_start_bb);
5587 if (!broken_loop)
5589 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5590 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5591 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5592 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5594 exit_bb = region->exit;
5596 /* Trip and adjustment setup goes in ENTRY_BB. */
5597 gsi = gsi_last_nondebug_bb (entry_bb);
5598 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5599 gsip = gsi;
5600 gsi_prev (&gsip);
5602 if (fd->collapse > 1)
5604 int first_zero_iter = -1, dummy = -1;
5605 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5607 counts = XALLOCAVEC (tree, fd->collapse);
5608 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5609 fin_bb, first_zero_iter,
5610 dummy_bb, dummy, l2_dom_bb);
5611 t = NULL_TREE;
5613 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5614 t = integer_one_node;
5615 else
5616 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5617 fold_convert (type, fd->loop.n1),
5618 fold_convert (type, fd->loop.n2));
5619 if (fd->collapse == 1
5620 && TYPE_UNSIGNED (type)
5621 && (t == NULL_TREE || !integer_onep (t)))
5623 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5624 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5625 true, GSI_SAME_STMT);
5626 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5627 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5628 true, GSI_SAME_STMT);
5629 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5630 NULL_TREE, NULL_TREE);
5631 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5632 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5633 expand_omp_regimplify_p, NULL, NULL)
5634 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5635 expand_omp_regimplify_p, NULL, NULL))
5637 gsi = gsi_for_stmt (cond_stmt);
5638 gimple_regimplify_operands (cond_stmt, &gsi);
5640 se = split_block (entry_bb, cond_stmt);
5641 se->flags = EDGE_TRUE_VALUE;
5642 entry_bb = se->dest;
5643 se->probability = profile_probability::very_likely ();
5644 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5645 se->probability = profile_probability::very_unlikely ();
5646 if (gimple_in_ssa_p (cfun))
5648 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5649 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5650 !gsi_end_p (gpi); gsi_next (&gpi))
5652 gphi *phi = gpi.phi ();
5653 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5654 se, UNKNOWN_LOCATION);
5657 gsi = gsi_last_bb (entry_bb);
5660 if (fd->lastprivate_conditional)
5662 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5663 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5664 if (fd->have_pointer_condtemp)
5665 condtemp = OMP_CLAUSE_DECL (c);
5666 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5667 cond_var = OMP_CLAUSE_DECL (c);
5669 if (fd->have_reductemp || fd->have_pointer_condtemp)
5671 tree t1 = build_int_cst (long_integer_type_node, 0);
5672 tree t2 = build_int_cst (long_integer_type_node, 1);
5673 tree t3 = build_int_cstu (long_integer_type_node,
5674 (HOST_WIDE_INT_1U << 31) + 1);
5675 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5676 gimple_stmt_iterator gsi2 = gsi_none ();
5677 gimple *g = NULL;
5678 tree mem = null_pointer_node, memv = NULL_TREE;
5679 if (fd->have_reductemp)
5681 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5682 reductions = OMP_CLAUSE_DECL (c);
5683 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5684 g = SSA_NAME_DEF_STMT (reductions);
5685 reductions = gimple_assign_rhs1 (g);
5686 OMP_CLAUSE_DECL (c) = reductions;
5687 gsi2 = gsi_for_stmt (g);
5689 else
5691 if (gsi_end_p (gsip))
5692 gsi2 = gsi_after_labels (region->entry);
5693 else
5694 gsi2 = gsip;
5695 reductions = null_pointer_node;
5697 if (fd->have_pointer_condtemp)
5699 tree type = TREE_TYPE (condtemp);
5700 memv = create_tmp_var (type);
5701 TREE_ADDRESSABLE (memv) = 1;
5702 unsigned HOST_WIDE_INT sz
5703 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5704 sz *= fd->lastprivate_conditional;
5705 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5706 false);
5707 mem = build_fold_addr_expr (memv);
5709 tree t
5710 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5711 9, t1, t2, t2, t3, t1, null_pointer_node,
5712 null_pointer_node, reductions, mem);
5713 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5714 true, GSI_SAME_STMT);
5715 if (fd->have_pointer_condtemp)
5716 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5717 if (fd->have_reductemp)
5719 gsi_remove (&gsi2, true);
5720 release_ssa_name (gimple_assign_lhs (g));
5723 switch (gimple_omp_for_kind (fd->for_stmt))
5725 case GF_OMP_FOR_KIND_FOR:
5726 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5727 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5728 break;
5729 case GF_OMP_FOR_KIND_DISTRIBUTE:
5730 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5731 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5732 break;
5733 default:
5734 gcc_unreachable ();
5736 nthreads = build_call_expr (nthreads, 0);
5737 nthreads = fold_convert (itype, nthreads);
5738 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5739 true, GSI_SAME_STMT);
5740 threadid = build_call_expr (threadid, 0);
5741 threadid = fold_convert (itype, threadid);
5742 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5743 true, GSI_SAME_STMT);
5745 n1 = fd->loop.n1;
5746 n2 = fd->loop.n2;
5747 step = fd->loop.step;
5748 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5750 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5751 OMP_CLAUSE__LOOPTEMP_);
5752 gcc_assert (innerc);
5753 n1 = OMP_CLAUSE_DECL (innerc);
5754 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5755 OMP_CLAUSE__LOOPTEMP_);
5756 gcc_assert (innerc);
5757 n2 = OMP_CLAUSE_DECL (innerc);
5759 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5760 true, NULL_TREE, true, GSI_SAME_STMT);
5761 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5762 true, NULL_TREE, true, GSI_SAME_STMT);
5763 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5764 true, NULL_TREE, true, GSI_SAME_STMT);
5765 tree chunk_size = fold_convert (itype, fd->chunk_size);
5766 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5767 chunk_size
5768 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5769 GSI_SAME_STMT);
5771 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5772 t = fold_build2 (PLUS_EXPR, itype, step, t);
5773 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5774 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5775 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5776 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5777 fold_build1 (NEGATE_EXPR, itype, t),
5778 fold_build1 (NEGATE_EXPR, itype, step));
5779 else
5780 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5781 t = fold_convert (itype, t);
5782 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5783 true, GSI_SAME_STMT);
5785 trip_var = create_tmp_reg (itype, ".trip");
5786 if (gimple_in_ssa_p (cfun))
5788 trip_init = make_ssa_name (trip_var);
5789 trip_main = make_ssa_name (trip_var);
5790 trip_back = make_ssa_name (trip_var);
5792 else
5794 trip_init = trip_var;
5795 trip_main = trip_var;
5796 trip_back = trip_var;
5799 gassign *assign_stmt
5800 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5801 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5803 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5804 t = fold_build2 (MULT_EXPR, itype, t, step);
5805 if (POINTER_TYPE_P (type))
5806 t = fold_build_pointer_plus (n1, t);
5807 else
5808 t = fold_build2 (PLUS_EXPR, type, t, n1);
5809 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5810 true, GSI_SAME_STMT);
5812 /* Remove the GIMPLE_OMP_FOR. */
5813 gsi_remove (&gsi, true);
5815 gimple_stmt_iterator gsif = gsi;
5817 /* Iteration space partitioning goes in ITER_PART_BB. */
5818 gsi = gsi_last_bb (iter_part_bb);
5820 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5821 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5822 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5823 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5824 false, GSI_CONTINUE_LINKING);
5826 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5827 t = fold_build2 (MIN_EXPR, itype, t, n);
5828 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5829 false, GSI_CONTINUE_LINKING);
5831 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5832 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5834 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5835 gsi = gsi_start_bb (seq_start_bb);
5837 tree startvar = fd->loop.v;
5838 tree endvar = NULL_TREE;
5840 if (gimple_omp_for_combined_p (fd->for_stmt))
5842 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5843 ? gimple_omp_parallel_clauses (inner_stmt)
5844 : gimple_omp_for_clauses (inner_stmt);
5845 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5846 gcc_assert (innerc);
5847 startvar = OMP_CLAUSE_DECL (innerc);
5848 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5849 OMP_CLAUSE__LOOPTEMP_);
5850 gcc_assert (innerc);
5851 endvar = OMP_CLAUSE_DECL (innerc);
5852 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5853 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5855 innerc = find_lastprivate_looptemp (fd, innerc);
5856 if (innerc)
5858 /* If needed (distribute parallel for with lastprivate),
5859 propagate down the total number of iterations. */
5860 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5861 fd->loop.n2);
5862 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5863 GSI_CONTINUE_LINKING);
5864 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5865 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5870 t = fold_convert (itype, s0);
5871 t = fold_build2 (MULT_EXPR, itype, t, step);
5872 if (POINTER_TYPE_P (type))
5874 t = fold_build_pointer_plus (n1, t);
5875 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5876 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5877 t = fold_convert (signed_type_for (type), t);
5879 else
5880 t = fold_build2 (PLUS_EXPR, type, t, n1);
5881 t = fold_convert (TREE_TYPE (startvar), t);
5882 t = force_gimple_operand_gsi (&gsi, t,
5883 DECL_P (startvar)
5884 && TREE_ADDRESSABLE (startvar),
5885 NULL_TREE, false, GSI_CONTINUE_LINKING);
5886 assign_stmt = gimple_build_assign (startvar, t);
5887 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5888 if (cond_var)
5890 tree itype = TREE_TYPE (cond_var);
5891 /* For lastprivate(conditional:) itervar, we need some iteration
5892 counter that starts at unsigned non-zero and increases.
5893 Prefer as few IVs as possible, so if we can use startvar
5894 itself, use that, or startvar + constant (those would be
5895 incremented with step), and as last resort use the s0 + 1
5896 incremented by 1. */
5897 if (POINTER_TYPE_P (type)
5898 || TREE_CODE (n1) != INTEGER_CST
5899 || fd->loop.cond_code != LT_EXPR)
5900 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5901 build_int_cst (itype, 1));
5902 else if (tree_int_cst_sgn (n1) == 1)
5903 t = fold_convert (itype, t);
5904 else
5906 tree c = fold_convert (itype, n1);
5907 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5908 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5910 t = force_gimple_operand_gsi (&gsi, t, false,
5911 NULL_TREE, false, GSI_CONTINUE_LINKING);
5912 assign_stmt = gimple_build_assign (cond_var, t);
5913 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5916 t = fold_convert (itype, e0);
5917 t = fold_build2 (MULT_EXPR, itype, t, step);
5918 if (POINTER_TYPE_P (type))
5920 t = fold_build_pointer_plus (n1, t);
5921 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5922 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5923 t = fold_convert (signed_type_for (type), t);
5925 else
5926 t = fold_build2 (PLUS_EXPR, type, t, n1);
5927 t = fold_convert (TREE_TYPE (startvar), t);
5928 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5929 false, GSI_CONTINUE_LINKING);
5930 if (endvar)
5932 assign_stmt = gimple_build_assign (endvar, e);
5933 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5934 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5935 assign_stmt = gimple_build_assign (fd->loop.v, e);
5936 else
5937 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5938 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5940 /* Handle linear clause adjustments. */
5941 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5942 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5943 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5944 c; c = OMP_CLAUSE_CHAIN (c))
5945 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5946 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5948 tree d = OMP_CLAUSE_DECL (c);
5949 bool is_ref = omp_is_reference (d);
5950 tree t = d, a, dest;
5951 if (is_ref)
5952 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5953 tree type = TREE_TYPE (t);
5954 if (POINTER_TYPE_P (type))
5955 type = sizetype;
5956 dest = unshare_expr (t);
5957 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5958 expand_omp_build_assign (&gsif, v, t);
5959 if (itercnt == NULL_TREE)
5961 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5963 itercntbias
5964 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5965 fold_convert (itype, fd->loop.n1));
5966 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5967 itercntbias, step);
5968 itercntbias
5969 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5970 NULL_TREE, true,
5971 GSI_SAME_STMT);
5972 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5973 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5974 NULL_TREE, false,
5975 GSI_CONTINUE_LINKING);
5977 else
5978 itercnt = s0;
5980 a = fold_build2 (MULT_EXPR, type,
5981 fold_convert (type, itercnt),
5982 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5983 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5984 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5985 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5986 false, GSI_CONTINUE_LINKING);
5987 expand_omp_build_assign (&gsi, dest, t, true);
5989 if (fd->collapse > 1)
5990 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5992 if (!broken_loop)
5994 /* The code controlling the sequential loop goes in CONT_BB,
5995 replacing the GIMPLE_OMP_CONTINUE. */
5996 gsi = gsi_last_nondebug_bb (cont_bb);
5997 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5998 vmain = gimple_omp_continue_control_use (cont_stmt);
5999 vback = gimple_omp_continue_control_def (cont_stmt);
6001 if (cond_var)
6003 tree itype = TREE_TYPE (cond_var);
6004 tree t2;
6005 if (POINTER_TYPE_P (type)
6006 || TREE_CODE (n1) != INTEGER_CST
6007 || fd->loop.cond_code != LT_EXPR)
6008 t2 = build_int_cst (itype, 1);
6009 else
6010 t2 = fold_convert (itype, step);
6011 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6012 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6013 NULL_TREE, true, GSI_SAME_STMT);
6014 assign_stmt = gimple_build_assign (cond_var, t2);
6015 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6018 if (!gimple_omp_for_combined_p (fd->for_stmt))
6020 if (POINTER_TYPE_P (type))
6021 t = fold_build_pointer_plus (vmain, step);
6022 else
6023 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6024 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6025 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6026 true, GSI_SAME_STMT);
6027 assign_stmt = gimple_build_assign (vback, t);
6028 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6030 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6031 t = build2 (EQ_EXPR, boolean_type_node,
6032 build_int_cst (itype, 0),
6033 build_int_cst (itype, 1));
6034 else
6035 t = build2 (fd->loop.cond_code, boolean_type_node,
6036 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6037 ? t : vback, e);
6038 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6041 /* Remove GIMPLE_OMP_CONTINUE. */
6042 gsi_remove (&gsi, true);
6044 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6045 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6047 /* Trip update code goes into TRIP_UPDATE_BB. */
6048 gsi = gsi_start_bb (trip_update_bb);
6050 t = build_int_cst (itype, 1);
6051 t = build2 (PLUS_EXPR, itype, trip_main, t);
6052 assign_stmt = gimple_build_assign (trip_back, t);
6053 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6056 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6057 gsi = gsi_last_nondebug_bb (exit_bb);
6058 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6060 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6061 if (fd->have_reductemp || fd->have_pointer_condtemp)
6063 tree fn;
6064 if (t)
6065 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6066 else
6067 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6068 gcall *g = gimple_build_call (fn, 0);
6069 if (t)
6071 gimple_call_set_lhs (g, t);
6072 if (fd->have_reductemp)
6073 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6074 NOP_EXPR, t),
6075 GSI_SAME_STMT);
6077 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6079 else
6080 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6082 else if (fd->have_pointer_condtemp)
6084 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6085 gcall *g = gimple_build_call (fn, 0);
6086 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6088 gsi_remove (&gsi, true);
6090 /* Connect the new blocks. */
6091 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6092 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6094 if (!broken_loop)
6096 se = find_edge (cont_bb, body_bb);
6097 if (se == NULL)
6099 se = BRANCH_EDGE (cont_bb);
6100 gcc_assert (single_succ (se->dest) == body_bb);
6102 if (gimple_omp_for_combined_p (fd->for_stmt))
6104 remove_edge (se);
6105 se = NULL;
6107 else if (fd->collapse > 1)
6109 remove_edge (se);
6110 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6112 else
6113 se->flags = EDGE_TRUE_VALUE;
6114 find_edge (cont_bb, trip_update_bb)->flags
6115 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6117 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6118 iter_part_bb);
6121 if (gimple_in_ssa_p (cfun))
6123 gphi_iterator psi;
6124 gphi *phi;
6125 edge re, ene;
6126 edge_var_map *vm;
6127 size_t i;
6129 gcc_assert (fd->collapse == 1 && !broken_loop);
6131 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6132 remove arguments of the phi nodes in fin_bb. We need to create
6133 appropriate phi nodes in iter_part_bb instead. */
6134 se = find_edge (iter_part_bb, fin_bb);
6135 re = single_succ_edge (trip_update_bb);
6136 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6137 ene = single_succ_edge (entry_bb);
6139 psi = gsi_start_phis (fin_bb);
6140 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6141 gsi_next (&psi), ++i)
6143 gphi *nphi;
6144 location_t locus;
6146 phi = psi.phi ();
6147 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6148 redirect_edge_var_map_def (vm), 0))
6149 continue;
6151 t = gimple_phi_result (phi);
6152 gcc_assert (t == redirect_edge_var_map_result (vm));
6154 if (!single_pred_p (fin_bb))
6155 t = copy_ssa_name (t, phi);
6157 nphi = create_phi_node (t, iter_part_bb);
6159 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6160 locus = gimple_phi_arg_location_from_edge (phi, se);
6162 /* A special case -- fd->loop.v is not yet computed in
6163 iter_part_bb, we need to use vextra instead. */
6164 if (t == fd->loop.v)
6165 t = vextra;
6166 add_phi_arg (nphi, t, ene, locus);
6167 locus = redirect_edge_var_map_location (vm);
6168 tree back_arg = redirect_edge_var_map_def (vm);
6169 add_phi_arg (nphi, back_arg, re, locus);
6170 edge ce = find_edge (cont_bb, body_bb);
6171 if (ce == NULL)
6173 ce = BRANCH_EDGE (cont_bb);
6174 gcc_assert (single_succ (ce->dest) == body_bb);
6175 ce = single_succ_edge (ce->dest);
6177 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6178 gcc_assert (inner_loop_phi != NULL);
6179 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6180 find_edge (seq_start_bb, body_bb), locus);
6182 if (!single_pred_p (fin_bb))
6183 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6185 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6186 redirect_edge_var_map_clear (re);
6187 if (single_pred_p (fin_bb))
6188 while (1)
6190 psi = gsi_start_phis (fin_bb);
6191 if (gsi_end_p (psi))
6192 break;
6193 remove_phi_node (&psi, false);
6196 /* Make phi node for trip. */
6197 phi = create_phi_node (trip_main, iter_part_bb);
6198 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6199 UNKNOWN_LOCATION);
6200 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6201 UNKNOWN_LOCATION);
6204 if (!broken_loop)
6205 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6206 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6207 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6208 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6209 recompute_dominator (CDI_DOMINATORS, fin_bb));
6210 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6211 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6212 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6213 recompute_dominator (CDI_DOMINATORS, body_bb));
6215 if (!broken_loop)
6217 class loop *loop = body_bb->loop_father;
6218 class loop *trip_loop = alloc_loop ();
6219 trip_loop->header = iter_part_bb;
6220 trip_loop->latch = trip_update_bb;
6221 add_loop (trip_loop, iter_part_bb->loop_father);
6223 if (loop != entry_bb->loop_father)
6225 gcc_assert (loop->header == body_bb);
6226 gcc_assert (loop->latch == region->cont
6227 || single_pred (loop->latch) == region->cont);
6228 trip_loop->inner = loop;
6229 return;
6232 if (!gimple_omp_for_combined_p (fd->for_stmt))
6234 loop = alloc_loop ();
6235 loop->header = body_bb;
6236 if (collapse_bb == NULL)
6237 loop->latch = cont_bb;
6238 add_loop (loop, trip_loop);
6243 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6244 loop. Given parameters:
6246 for (V = N1; V cond N2; V += STEP) BODY;
6248 where COND is "<" or ">", we generate pseudocode
6250 V = N1;
6251 goto L1;
6253 BODY;
6254 V += STEP;
6256 if (V cond N2) goto L0; else goto L2;
6259 For collapsed loops, emit the outer loops as scalar
6260 and only try to vectorize the innermost loop. */
6262 static void
6263 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6265 tree type, t;
6266 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6267 gimple_stmt_iterator gsi;
6268 gimple *stmt;
6269 gcond *cond_stmt;
6270 bool broken_loop = region->cont == NULL;
6271 edge e, ne;
6272 tree *counts = NULL;
6273 int i;
6274 int safelen_int = INT_MAX;
6275 bool dont_vectorize = false;
6276 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6277 OMP_CLAUSE_SAFELEN);
6278 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6279 OMP_CLAUSE__SIMDUID_);
6280 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6281 OMP_CLAUSE_IF);
6282 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6283 OMP_CLAUSE_SIMDLEN);
6284 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6285 OMP_CLAUSE__CONDTEMP_);
6286 tree n1, n2;
6287 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6289 if (safelen)
6291 poly_uint64 val;
6292 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6293 if (!poly_int_tree_p (safelen, &val))
6294 safelen_int = 0;
6295 else
6296 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6297 if (safelen_int == 1)
6298 safelen_int = 0;
6300 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6301 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6303 safelen_int = 0;
6304 dont_vectorize = true;
6306 type = TREE_TYPE (fd->loop.v);
6307 entry_bb = region->entry;
6308 cont_bb = region->cont;
6309 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6310 gcc_assert (broken_loop
6311 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6312 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6313 if (!broken_loop)
6315 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6316 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6317 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6318 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6320 else
6322 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6323 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6324 l2_bb = single_succ (l1_bb);
6326 exit_bb = region->exit;
6327 l2_dom_bb = NULL;
6329 gsi = gsi_last_nondebug_bb (entry_bb);
6331 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6332 /* Not needed in SSA form right now. */
6333 gcc_assert (!gimple_in_ssa_p (cfun));
6334 if (fd->collapse > 1
6335 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6336 || broken_loop))
6338 int first_zero_iter = -1, dummy = -1;
6339 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6341 counts = XALLOCAVEC (tree, fd->collapse);
6342 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6343 zero_iter_bb, first_zero_iter,
6344 dummy_bb, dummy, l2_dom_bb);
6346 if (l2_dom_bb == NULL)
6347 l2_dom_bb = l1_bb;
6349 n1 = fd->loop.n1;
6350 n2 = fd->loop.n2;
6351 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6353 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6354 OMP_CLAUSE__LOOPTEMP_);
6355 gcc_assert (innerc);
6356 n1 = OMP_CLAUSE_DECL (innerc);
6357 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6358 OMP_CLAUSE__LOOPTEMP_);
6359 gcc_assert (innerc);
6360 n2 = OMP_CLAUSE_DECL (innerc);
6362 tree step = fd->loop.step;
6363 tree orig_step = step; /* May be different from step if is_simt. */
6365 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6366 OMP_CLAUSE__SIMT_);
6367 if (is_simt)
6369 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6370 is_simt = safelen_int > 1;
6372 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6373 if (is_simt)
6375 simt_lane = create_tmp_var (unsigned_type_node);
6376 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6377 gimple_call_set_lhs (g, simt_lane);
6378 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6379 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6380 fold_convert (TREE_TYPE (step), simt_lane));
6381 n1 = fold_convert (type, n1);
6382 if (POINTER_TYPE_P (type))
6383 n1 = fold_build_pointer_plus (n1, offset);
6384 else
6385 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6387 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6388 if (fd->collapse > 1)
6389 simt_maxlane = build_one_cst (unsigned_type_node);
6390 else if (safelen_int < omp_max_simt_vf ())
6391 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6392 tree vf
6393 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6394 unsigned_type_node, 0);
6395 if (simt_maxlane)
6396 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6397 vf = fold_convert (TREE_TYPE (step), vf);
6398 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6401 tree n2var = NULL_TREE;
6402 tree n2v = NULL_TREE;
6403 tree *nonrect_bounds = NULL;
6404 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6405 if (fd->collapse > 1)
6407 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6409 if (fd->non_rect)
6411 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6412 memset (nonrect_bounds, 0,
6413 sizeof (tree) * (fd->last_nonrect + 1));
6415 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6416 gcc_assert (entry_bb == gsi_bb (gsi));
6417 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6418 gsi_prev (&gsi);
6419 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6420 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6421 NULL, n1);
6422 gsi = gsi_for_stmt (fd->for_stmt);
6424 if (broken_loop)
6426 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6428 /* Compute in n2var the limit for the first innermost loop,
6429 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6430 where cnt is how many iterations would the loop have if
6431 all further iterations were assigned to the current task. */
6432 n2var = create_tmp_var (type);
6433 i = fd->collapse - 1;
6434 tree itype = TREE_TYPE (fd->loops[i].v);
6435 if (POINTER_TYPE_P (itype))
6436 itype = signed_type_for (itype);
6437 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6438 ? -1 : 1));
6439 t = fold_build2 (PLUS_EXPR, itype,
6440 fold_convert (itype, fd->loops[i].step), t);
6441 t = fold_build2 (PLUS_EXPR, itype, t,
6442 fold_convert (itype, fd->loops[i].n2));
6443 if (fd->loops[i].m2)
6445 tree t2 = fold_convert (itype,
6446 fd->loops[i - fd->loops[i].outer].v);
6447 tree t3 = fold_convert (itype, fd->loops[i].m2);
6448 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6449 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6451 t = fold_build2 (MINUS_EXPR, itype, t,
6452 fold_convert (itype, fd->loops[i].v));
6453 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6454 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6455 fold_build1 (NEGATE_EXPR, itype, t),
6456 fold_build1 (NEGATE_EXPR, itype,
6457 fold_convert (itype,
6458 fd->loops[i].step)));
6459 else
6460 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6461 fold_convert (itype, fd->loops[i].step));
6462 t = fold_convert (type, t);
6463 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6464 min_arg1 = create_tmp_var (type);
6465 expand_omp_build_assign (&gsi, min_arg1, t2);
6466 min_arg2 = create_tmp_var (type);
6467 expand_omp_build_assign (&gsi, min_arg2, t);
6469 else
6471 if (TREE_CODE (n2) == INTEGER_CST)
6473 /* Indicate for lastprivate handling that at least one iteration
6474 has been performed, without wasting runtime. */
6475 if (integer_nonzerop (n2))
6476 expand_omp_build_assign (&gsi, fd->loop.v,
6477 fold_convert (type, n2));
6478 else
6479 /* Indicate that no iteration has been performed. */
6480 expand_omp_build_assign (&gsi, fd->loop.v,
6481 build_one_cst (type));
6483 else
6485 expand_omp_build_assign (&gsi, fd->loop.v,
6486 build_zero_cst (type));
6487 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6489 for (i = 0; i < fd->collapse; i++)
6491 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6492 if (fd->loops[i].m1)
6494 tree t2
6495 = fold_convert (TREE_TYPE (t),
6496 fd->loops[i - fd->loops[i].outer].v);
6497 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6498 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6499 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6501 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6502 /* For normal non-combined collapsed loops just initialize
6503 the outermost iterator in the entry_bb. */
6504 if (!broken_loop)
6505 break;
6509 else
6510 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6511 tree altv = NULL_TREE, altn2 = NULL_TREE;
6512 if (fd->collapse == 1
6513 && !broken_loop
6514 && TREE_CODE (orig_step) != INTEGER_CST)
6516 /* The vectorizer currently punts on loops with non-constant steps
6517 for the main IV (can't compute number of iterations and gives up
6518 because of that). As for OpenMP loops it is always possible to
6519 compute the number of iterations upfront, use an alternate IV
6520 as the loop iterator:
6521 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6522 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6523 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6524 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6525 tree itype = TREE_TYPE (fd->loop.v);
6526 if (POINTER_TYPE_P (itype))
6527 itype = signed_type_for (itype);
6528 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6529 t = fold_build2 (PLUS_EXPR, itype,
6530 fold_convert (itype, step), t);
6531 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6532 t = fold_build2 (MINUS_EXPR, itype, t,
6533 fold_convert (itype, fd->loop.v));
6534 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6535 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6536 fold_build1 (NEGATE_EXPR, itype, t),
6537 fold_build1 (NEGATE_EXPR, itype,
6538 fold_convert (itype, step)));
6539 else
6540 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6541 fold_convert (itype, step));
6542 t = fold_convert (TREE_TYPE (altv), t);
6543 altn2 = create_tmp_var (TREE_TYPE (altv));
6544 expand_omp_build_assign (&gsi, altn2, t);
6545 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6546 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6547 true, GSI_SAME_STMT);
6548 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6549 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6550 build_zero_cst (TREE_TYPE (altv)));
6551 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6553 else if (fd->collapse > 1
6554 && !broken_loop
6555 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6556 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6558 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6559 altn2 = create_tmp_var (TREE_TYPE (altv));
6561 if (cond_var)
6563 if (POINTER_TYPE_P (type)
6564 || TREE_CODE (n1) != INTEGER_CST
6565 || fd->loop.cond_code != LT_EXPR
6566 || tree_int_cst_sgn (n1) != 1)
6567 expand_omp_build_assign (&gsi, cond_var,
6568 build_one_cst (TREE_TYPE (cond_var)));
6569 else
6570 expand_omp_build_assign (&gsi, cond_var,
6571 fold_convert (TREE_TYPE (cond_var), n1));
6574 /* Remove the GIMPLE_OMP_FOR statement. */
6575 gsi_remove (&gsi, true);
6577 if (!broken_loop)
6579 /* Code to control the increment goes in the CONT_BB. */
6580 gsi = gsi_last_nondebug_bb (cont_bb);
6581 stmt = gsi_stmt (gsi);
6582 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6584 if (fd->collapse == 1
6585 || gimple_omp_for_combined_into_p (fd->for_stmt))
6587 if (POINTER_TYPE_P (type))
6588 t = fold_build_pointer_plus (fd->loop.v, step);
6589 else
6590 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6591 expand_omp_build_assign (&gsi, fd->loop.v, t);
6593 else if (TREE_CODE (n2) != INTEGER_CST)
6594 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6595 if (altv)
6597 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6598 build_one_cst (TREE_TYPE (altv)));
6599 expand_omp_build_assign (&gsi, altv, t);
6602 if (fd->collapse > 1)
6604 i = fd->collapse - 1;
6605 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6607 t = fold_convert (sizetype, fd->loops[i].step);
6608 t = fold_build_pointer_plus (fd->loops[i].v, t);
6610 else
6612 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6613 fd->loops[i].step);
6614 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6615 fd->loops[i].v, t);
6617 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6619 if (cond_var)
6621 if (POINTER_TYPE_P (type)
6622 || TREE_CODE (n1) != INTEGER_CST
6623 || fd->loop.cond_code != LT_EXPR
6624 || tree_int_cst_sgn (n1) != 1)
6625 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6626 build_one_cst (TREE_TYPE (cond_var)));
6627 else
6628 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6629 fold_convert (TREE_TYPE (cond_var), step));
6630 expand_omp_build_assign (&gsi, cond_var, t);
6633 /* Remove GIMPLE_OMP_CONTINUE. */
6634 gsi_remove (&gsi, true);
6637 /* Emit the condition in L1_BB. */
6638 gsi = gsi_start_bb (l1_bb);
6640 if (altv)
6641 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6642 else if (fd->collapse > 1
6643 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6644 && !broken_loop)
6646 i = fd->collapse - 1;
6647 tree itype = TREE_TYPE (fd->loops[i].v);
6648 if (fd->loops[i].m2)
6649 t = n2v = create_tmp_var (itype);
6650 else
6651 t = fold_convert (itype, fd->loops[i].n2);
6652 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6653 false, GSI_CONTINUE_LINKING);
6654 tree v = fd->loops[i].v;
6655 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6656 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6657 false, GSI_CONTINUE_LINKING);
6658 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6660 else
6662 if (fd->collapse > 1 && !broken_loop)
6663 t = n2var;
6664 else
6665 t = fold_convert (type, n2);
6666 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6667 false, GSI_CONTINUE_LINKING);
6668 tree v = fd->loop.v;
6669 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6670 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6671 false, GSI_CONTINUE_LINKING);
6672 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6674 cond_stmt = gimple_build_cond_empty (t);
6675 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6676 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6677 NULL, NULL)
6678 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6679 NULL, NULL))
6681 gsi = gsi_for_stmt (cond_stmt);
6682 gimple_regimplify_operands (cond_stmt, &gsi);
6685 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6686 if (is_simt)
6688 gsi = gsi_start_bb (l2_bb);
6689 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6690 if (POINTER_TYPE_P (type))
6691 t = fold_build_pointer_plus (fd->loop.v, step);
6692 else
6693 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6694 expand_omp_build_assign (&gsi, fd->loop.v, t);
6697 /* Remove GIMPLE_OMP_RETURN. */
6698 gsi = gsi_last_nondebug_bb (exit_bb);
6699 gsi_remove (&gsi, true);
6701 /* Connect the new blocks. */
6702 remove_edge (FALLTHRU_EDGE (entry_bb));
6704 if (!broken_loop)
6706 remove_edge (BRANCH_EDGE (entry_bb));
6707 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6709 e = BRANCH_EDGE (l1_bb);
6710 ne = FALLTHRU_EDGE (l1_bb);
6711 e->flags = EDGE_TRUE_VALUE;
6713 else
6715 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6717 ne = single_succ_edge (l1_bb);
6718 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6721 ne->flags = EDGE_FALSE_VALUE;
6722 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6723 ne->probability = e->probability.invert ();
6725 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6726 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6728 if (simt_maxlane)
6730 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6731 NULL_TREE, NULL_TREE);
6732 gsi = gsi_last_bb (entry_bb);
6733 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6734 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6735 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6736 FALLTHRU_EDGE (entry_bb)->probability
6737 = profile_probability::guessed_always ().apply_scale (7, 8);
6738 BRANCH_EDGE (entry_bb)->probability
6739 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6740 l2_dom_bb = entry_bb;
6742 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6744 if (!broken_loop && fd->collapse > 1)
6746 basic_block last_bb = l1_bb;
6747 basic_block init_bb = NULL;
6748 for (i = fd->collapse - 2; i >= 0; i--)
6750 tree nextn2v = NULL_TREE;
6751 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6752 e = EDGE_SUCC (last_bb, 0);
6753 else
6754 e = EDGE_SUCC (last_bb, 1);
6755 basic_block bb = split_edge (e);
6756 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6758 t = fold_convert (sizetype, fd->loops[i].step);
6759 t = fold_build_pointer_plus (fd->loops[i].v, t);
6761 else
6763 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6764 fd->loops[i].step);
6765 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6766 fd->loops[i].v, t);
6768 gsi = gsi_after_labels (bb);
6769 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6771 bb = split_block (bb, last_stmt (bb))->dest;
6772 gsi = gsi_start_bb (bb);
6773 tree itype = TREE_TYPE (fd->loops[i].v);
6774 if (fd->loops[i].m2)
6775 t = nextn2v = create_tmp_var (itype);
6776 else
6777 t = fold_convert (itype, fd->loops[i].n2);
6778 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6779 false, GSI_CONTINUE_LINKING);
6780 tree v = fd->loops[i].v;
6781 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6782 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6783 false, GSI_CONTINUE_LINKING);
6784 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6785 cond_stmt = gimple_build_cond_empty (t);
6786 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6787 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6788 expand_omp_regimplify_p, NULL, NULL)
6789 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6790 expand_omp_regimplify_p, NULL, NULL))
6792 gsi = gsi_for_stmt (cond_stmt);
6793 gimple_regimplify_operands (cond_stmt, &gsi);
6795 ne = single_succ_edge (bb);
6796 ne->flags = EDGE_FALSE_VALUE;
6798 init_bb = create_empty_bb (bb);
6799 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6800 add_bb_to_loop (init_bb, bb->loop_father);
6801 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6802 e->probability
6803 = profile_probability::guessed_always ().apply_scale (7, 8);
6804 ne->probability = e->probability.invert ();
6806 gsi = gsi_after_labels (init_bb);
6807 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6808 fd->loops[i + 1].n1);
6809 if (fd->loops[i + 1].m1)
6811 tree t2 = fold_convert (TREE_TYPE (t),
6812 fd->loops[i + 1
6813 - fd->loops[i + 1].outer].v);
6814 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6815 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6816 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6818 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6819 if (fd->loops[i + 1].m2)
6821 if (i + 2 == fd->collapse && (n2var || altv))
6823 gcc_assert (n2v == NULL_TREE);
6824 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6826 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6827 fd->loops[i + 1].n2);
6828 tree t2 = fold_convert (TREE_TYPE (t),
6829 fd->loops[i + 1
6830 - fd->loops[i + 1].outer].v);
6831 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6832 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6833 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6834 expand_omp_build_assign (&gsi, n2v, t);
6836 if (i + 2 == fd->collapse && n2var)
6838 /* For composite simd, n2 is the first iteration the current
6839 task shouldn't already handle, so we effectively want to use
6840 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6841 as the vectorized loop. Except the vectorizer will not
6842 vectorize that, so instead compute N2VAR as
6843 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6844 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6845 as the loop to vectorize. */
6846 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6847 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6849 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6850 == LT_EXPR ? -1 : 1));
6851 t = fold_build2 (PLUS_EXPR, itype,
6852 fold_convert (itype,
6853 fd->loops[i + 1].step), t);
6854 if (fd->loops[i + 1].m2)
6855 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6856 else
6857 t = fold_build2 (PLUS_EXPR, itype, t,
6858 fold_convert (itype,
6859 fd->loops[i + 1].n2));
6860 t = fold_build2 (MINUS_EXPR, itype, t,
6861 fold_convert (itype, fd->loops[i + 1].v));
6862 tree step = fold_convert (itype, fd->loops[i + 1].step);
6863 if (TYPE_UNSIGNED (itype)
6864 && fd->loops[i + 1].cond_code == GT_EXPR)
6865 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6866 fold_build1 (NEGATE_EXPR, itype, t),
6867 fold_build1 (NEGATE_EXPR, itype, step));
6868 else
6869 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6870 t = fold_convert (type, t);
6872 else
6873 t = counts[i + 1];
6874 expand_omp_build_assign (&gsi, min_arg1, t2);
6875 expand_omp_build_assign (&gsi, min_arg2, t);
6876 e = split_block (init_bb, last_stmt (init_bb));
6877 gsi = gsi_after_labels (e->dest);
6878 init_bb = e->dest;
6879 remove_edge (FALLTHRU_EDGE (entry_bb));
6880 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6881 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6882 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6883 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6884 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6885 expand_omp_build_assign (&gsi, n2var, t);
6887 if (i + 2 == fd->collapse && altv)
6889 /* The vectorizer currently punts on loops with non-constant
6890 steps for the main IV (can't compute number of iterations
6891 and gives up because of that). As for OpenMP loops it is
6892 always possible to compute the number of iterations upfront,
6893 use an alternate IV as the loop iterator. */
6894 expand_omp_build_assign (&gsi, altv,
6895 build_zero_cst (TREE_TYPE (altv)));
6896 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6897 if (POINTER_TYPE_P (itype))
6898 itype = signed_type_for (itype);
6899 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6900 ? -1 : 1));
6901 t = fold_build2 (PLUS_EXPR, itype,
6902 fold_convert (itype, fd->loops[i + 1].step), t);
6903 t = fold_build2 (PLUS_EXPR, itype, t,
6904 fold_convert (itype,
6905 fd->loops[i + 1].m2
6906 ? n2v : fd->loops[i + 1].n2));
6907 t = fold_build2 (MINUS_EXPR, itype, t,
6908 fold_convert (itype, fd->loops[i + 1].v));
6909 tree step = fold_convert (itype, fd->loops[i + 1].step);
6910 if (TYPE_UNSIGNED (itype)
6911 && fd->loops[i + 1].cond_code == GT_EXPR)
6912 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6913 fold_build1 (NEGATE_EXPR, itype, t),
6914 fold_build1 (NEGATE_EXPR, itype, step));
6915 else
6916 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6917 t = fold_convert (TREE_TYPE (altv), t);
6918 expand_omp_build_assign (&gsi, altn2, t);
6919 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6920 fd->loops[i + 1].m2
6921 ? n2v : fd->loops[i + 1].n2);
6922 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6923 true, GSI_SAME_STMT);
6924 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6925 fd->loops[i + 1].v, t2);
6926 gassign *g
6927 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6928 build_zero_cst (TREE_TYPE (altv)));
6929 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6931 n2v = nextn2v;
6933 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6934 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6936 e = find_edge (entry_bb, last_bb);
6937 redirect_edge_succ (e, bb);
6938 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6939 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6942 last_bb = bb;
6945 if (!broken_loop)
6947 class loop *loop = alloc_loop ();
6948 loop->header = l1_bb;
6949 loop->latch = cont_bb;
6950 add_loop (loop, l1_bb->loop_father);
6951 loop->safelen = safelen_int;
6952 if (simduid)
6954 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6955 cfun->has_simduid_loops = true;
6957 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6958 the loop. */
6959 if ((flag_tree_loop_vectorize
6960 || !global_options_set.x_flag_tree_loop_vectorize)
6961 && flag_tree_loop_optimize
6962 && loop->safelen > 1)
6964 loop->force_vectorize = true;
6965 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6967 unsigned HOST_WIDE_INT v
6968 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6969 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6970 loop->simdlen = v;
6972 cfun->has_force_vectorize_loops = true;
6974 else if (dont_vectorize)
6975 loop->dont_vectorize = true;
6977 else if (simduid)
6978 cfun->has_simduid_loops = true;
6981 /* Taskloop construct is represented after gimplification with
6982 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6983 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6984 which should just compute all the needed loop temporaries
6985 for GIMPLE_OMP_TASK. */
6987 static void
6988 expand_omp_taskloop_for_outer (struct omp_region *region,
6989 struct omp_for_data *fd,
6990 gimple *inner_stmt)
6992 tree type, bias = NULL_TREE;
6993 basic_block entry_bb, cont_bb, exit_bb;
6994 gimple_stmt_iterator gsi;
6995 gassign *assign_stmt;
6996 tree *counts = NULL;
6997 int i;
6999 gcc_assert (inner_stmt);
7000 gcc_assert (region->cont);
7001 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7002 && gimple_omp_task_taskloop_p (inner_stmt));
7003 type = TREE_TYPE (fd->loop.v);
7005 /* See if we need to bias by LLONG_MIN. */
7006 if (fd->iter_type == long_long_unsigned_type_node
7007 && TREE_CODE (type) == INTEGER_TYPE
7008 && !TYPE_UNSIGNED (type))
7010 tree n1, n2;
7012 if (fd->loop.cond_code == LT_EXPR)
7014 n1 = fd->loop.n1;
7015 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7017 else
7019 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7020 n2 = fd->loop.n1;
7022 if (TREE_CODE (n1) != INTEGER_CST
7023 || TREE_CODE (n2) != INTEGER_CST
7024 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7025 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7028 entry_bb = region->entry;
7029 cont_bb = region->cont;
7030 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7031 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7032 exit_bb = region->exit;
7034 gsi = gsi_last_nondebug_bb (entry_bb);
7035 gimple *for_stmt = gsi_stmt (gsi);
7036 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7037 if (fd->collapse > 1)
7039 int first_zero_iter = -1, dummy = -1;
7040 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7042 counts = XALLOCAVEC (tree, fd->collapse);
7043 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7044 zero_iter_bb, first_zero_iter,
7045 dummy_bb, dummy, l2_dom_bb);
7047 if (zero_iter_bb)
7049 /* Some counts[i] vars might be uninitialized if
7050 some loop has zero iterations. But the body shouldn't
7051 be executed in that case, so just avoid uninit warnings. */
7052 for (i = first_zero_iter; i < fd->collapse; i++)
7053 if (SSA_VAR_P (counts[i]))
7054 suppress_warning (counts[i], OPT_Wuninitialized);
7055 gsi_prev (&gsi);
7056 edge e = split_block (entry_bb, gsi_stmt (gsi));
7057 entry_bb = e->dest;
7058 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7059 gsi = gsi_last_bb (entry_bb);
7060 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7061 get_immediate_dominator (CDI_DOMINATORS,
7062 zero_iter_bb));
7066 tree t0, t1;
7067 t1 = fd->loop.n2;
7068 t0 = fd->loop.n1;
7069 if (POINTER_TYPE_P (TREE_TYPE (t0))
7070 && TYPE_PRECISION (TREE_TYPE (t0))
7071 != TYPE_PRECISION (fd->iter_type))
7073 /* Avoid casting pointers to integer of a different size. */
7074 tree itype = signed_type_for (type);
7075 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7076 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7078 else
7080 t1 = fold_convert (fd->iter_type, t1);
7081 t0 = fold_convert (fd->iter_type, t0);
7083 if (bias)
7085 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7086 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7089 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7090 OMP_CLAUSE__LOOPTEMP_);
7091 gcc_assert (innerc);
7092 tree startvar = OMP_CLAUSE_DECL (innerc);
7093 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7094 gcc_assert (innerc);
7095 tree endvar = OMP_CLAUSE_DECL (innerc);
7096 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7098 innerc = find_lastprivate_looptemp (fd, innerc);
7099 if (innerc)
7101 /* If needed (inner taskloop has lastprivate clause), propagate
7102 down the total number of iterations. */
7103 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7104 NULL_TREE, false,
7105 GSI_CONTINUE_LINKING);
7106 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7111 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7112 GSI_CONTINUE_LINKING);
7113 assign_stmt = gimple_build_assign (startvar, t0);
7114 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7116 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7117 GSI_CONTINUE_LINKING);
7118 assign_stmt = gimple_build_assign (endvar, t1);
7119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7120 if (fd->collapse > 1)
7121 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7123 /* Remove the GIMPLE_OMP_FOR statement. */
7124 gsi = gsi_for_stmt (for_stmt);
7125 gsi_remove (&gsi, true);
7127 gsi = gsi_last_nondebug_bb (cont_bb);
7128 gsi_remove (&gsi, true);
7130 gsi = gsi_last_nondebug_bb (exit_bb);
7131 gsi_remove (&gsi, true);
7133 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7134 remove_edge (BRANCH_EDGE (entry_bb));
7135 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7136 remove_edge (BRANCH_EDGE (cont_bb));
7137 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7138 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7139 recompute_dominator (CDI_DOMINATORS, region->entry));
7142 /* Taskloop construct is represented after gimplification with
7143 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7144 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7145 GOMP_taskloop{,_ull} function arranges for each task to be given just
7146 a single range of iterations. */
7148 static void
7149 expand_omp_taskloop_for_inner (struct omp_region *region,
7150 struct omp_for_data *fd,
7151 gimple *inner_stmt)
7153 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7154 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7155 basic_block fin_bb;
7156 gimple_stmt_iterator gsi;
7157 edge ep;
7158 bool broken_loop = region->cont == NULL;
7159 tree *counts = NULL;
7160 tree n1, n2, step;
7162 itype = type = TREE_TYPE (fd->loop.v);
7163 if (POINTER_TYPE_P (type))
7164 itype = signed_type_for (type);
7166 /* See if we need to bias by LLONG_MIN. */
7167 if (fd->iter_type == long_long_unsigned_type_node
7168 && TREE_CODE (type) == INTEGER_TYPE
7169 && !TYPE_UNSIGNED (type))
7171 tree n1, n2;
7173 if (fd->loop.cond_code == LT_EXPR)
7175 n1 = fd->loop.n1;
7176 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7178 else
7180 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7181 n2 = fd->loop.n1;
7183 if (TREE_CODE (n1) != INTEGER_CST
7184 || TREE_CODE (n2) != INTEGER_CST
7185 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7186 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7189 entry_bb = region->entry;
7190 cont_bb = region->cont;
7191 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7192 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7193 gcc_assert (broken_loop
7194 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7195 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7196 if (!broken_loop)
7198 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7199 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7201 exit_bb = region->exit;
7203 /* Iteration space partitioning goes in ENTRY_BB. */
7204 gsi = gsi_last_nondebug_bb (entry_bb);
7205 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7207 if (fd->collapse > 1)
7209 int first_zero_iter = -1, dummy = -1;
7210 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7212 counts = XALLOCAVEC (tree, fd->collapse);
7213 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7214 fin_bb, first_zero_iter,
7215 dummy_bb, dummy, l2_dom_bb);
7216 t = NULL_TREE;
7218 else
7219 t = integer_one_node;
7221 step = fd->loop.step;
7222 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7223 OMP_CLAUSE__LOOPTEMP_);
7224 gcc_assert (innerc);
7225 n1 = OMP_CLAUSE_DECL (innerc);
7226 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7227 gcc_assert (innerc);
7228 n2 = OMP_CLAUSE_DECL (innerc);
7229 if (bias)
7231 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7232 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7234 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7235 true, NULL_TREE, true, GSI_SAME_STMT);
7236 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7237 true, NULL_TREE, true, GSI_SAME_STMT);
7238 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7239 true, NULL_TREE, true, GSI_SAME_STMT);
7241 tree startvar = fd->loop.v;
7242 tree endvar = NULL_TREE;
7244 if (gimple_omp_for_combined_p (fd->for_stmt))
7246 tree clauses = gimple_omp_for_clauses (inner_stmt);
7247 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7248 gcc_assert (innerc);
7249 startvar = OMP_CLAUSE_DECL (innerc);
7250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7251 OMP_CLAUSE__LOOPTEMP_);
7252 gcc_assert (innerc);
7253 endvar = OMP_CLAUSE_DECL (innerc);
7255 t = fold_convert (TREE_TYPE (startvar), n1);
7256 t = force_gimple_operand_gsi (&gsi, t,
7257 DECL_P (startvar)
7258 && TREE_ADDRESSABLE (startvar),
7259 NULL_TREE, false, GSI_CONTINUE_LINKING);
7260 gimple *assign_stmt = gimple_build_assign (startvar, t);
7261 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7263 t = fold_convert (TREE_TYPE (startvar), n2);
7264 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7265 false, GSI_CONTINUE_LINKING);
7266 if (endvar)
7268 assign_stmt = gimple_build_assign (endvar, e);
7269 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7270 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7271 assign_stmt = gimple_build_assign (fd->loop.v, e);
7272 else
7273 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7274 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7277 tree *nonrect_bounds = NULL;
7278 if (fd->collapse > 1)
7280 if (fd->non_rect)
7282 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7283 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7285 gcc_assert (gsi_bb (gsi) == entry_bb);
7286 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7287 startvar);
7288 entry_bb = gsi_bb (gsi);
7291 if (!broken_loop)
7293 /* The code controlling the sequential loop replaces the
7294 GIMPLE_OMP_CONTINUE. */
7295 gsi = gsi_last_nondebug_bb (cont_bb);
7296 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7297 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7298 vmain = gimple_omp_continue_control_use (cont_stmt);
7299 vback = gimple_omp_continue_control_def (cont_stmt);
7301 if (!gimple_omp_for_combined_p (fd->for_stmt))
7303 if (POINTER_TYPE_P (type))
7304 t = fold_build_pointer_plus (vmain, step);
7305 else
7306 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7307 t = force_gimple_operand_gsi (&gsi, t,
7308 DECL_P (vback)
7309 && TREE_ADDRESSABLE (vback),
7310 NULL_TREE, true, GSI_SAME_STMT);
7311 assign_stmt = gimple_build_assign (vback, t);
7312 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7314 t = build2 (fd->loop.cond_code, boolean_type_node,
7315 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7316 ? t : vback, e);
7317 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7320 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7321 gsi_remove (&gsi, true);
7323 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7324 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7325 cont_bb, body_bb);
7328 /* Remove the GIMPLE_OMP_FOR statement. */
7329 gsi = gsi_for_stmt (fd->for_stmt);
7330 gsi_remove (&gsi, true);
7332 /* Remove the GIMPLE_OMP_RETURN statement. */
7333 gsi = gsi_last_nondebug_bb (exit_bb);
7334 gsi_remove (&gsi, true);
7336 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7337 if (!broken_loop)
7338 remove_edge (BRANCH_EDGE (entry_bb));
7339 else
7341 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7342 region->outer->cont = NULL;
7345 /* Connect all the blocks. */
7346 if (!broken_loop)
7348 ep = find_edge (cont_bb, body_bb);
7349 if (gimple_omp_for_combined_p (fd->for_stmt))
7351 remove_edge (ep);
7352 ep = NULL;
7354 else if (fd->collapse > 1)
7356 remove_edge (ep);
7357 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7359 else
7360 ep->flags = EDGE_TRUE_VALUE;
7361 find_edge (cont_bb, fin_bb)->flags
7362 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7365 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7366 recompute_dominator (CDI_DOMINATORS, body_bb));
7367 if (!broken_loop)
7368 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7369 recompute_dominator (CDI_DOMINATORS, fin_bb));
7371 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7373 class loop *loop = alloc_loop ();
7374 loop->header = body_bb;
7375 if (collapse_bb == NULL)
7376 loop->latch = cont_bb;
7377 add_loop (loop, body_bb->loop_father);
7381 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7382 partitioned loop. The lowering here is abstracted, in that the
7383 loop parameters are passed through internal functions, which are
7384 further lowered by oacc_device_lower, once we get to the target
7385 compiler. The loop is of the form:
7387 for (V = B; V LTGT E; V += S) {BODY}
7389 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7390 (constant 0 for no chunking) and we will have a GWV partitioning
7391 mask, specifying dimensions over which the loop is to be
7392 partitioned (see note below). We generate code that looks like
7393 (this ignores tiling):
7395 <entry_bb> [incoming FALL->body, BRANCH->exit]
7396 typedef signedintify (typeof (V)) T; // underlying signed integral type
7397 T range = E - B;
7398 T chunk_no = 0;
7399 T DIR = LTGT == '<' ? +1 : -1;
7400 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7401 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7403 <head_bb> [created by splitting end of entry_bb]
7404 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7405 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7406 if (!(offset LTGT bound)) goto bottom_bb;
7408 <body_bb> [incoming]
7409 V = B + offset;
7410 {BODY}
7412 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7413 offset += step;
7414 if (offset LTGT bound) goto body_bb; [*]
7416 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7417 chunk_no++;
7418 if (chunk < chunk_max) goto head_bb;
7420 <exit_bb> [incoming]
7421 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7423 [*] Needed if V live at end of loop. */
7425 static void
7426 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7428 bool is_oacc_kernels_parallelized
7429 = (lookup_attribute ("oacc kernels parallelized",
7430 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7432 bool is_oacc_kernels
7433 = (lookup_attribute ("oacc kernels",
7434 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7435 if (is_oacc_kernels_parallelized)
7436 gcc_checking_assert (is_oacc_kernels);
7438 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7439 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7440 for SSA specifics, and some are for 'parloops' OpenACC
7441 'kernels'-parallelized specifics. */
7443 tree v = fd->loop.v;
7444 enum tree_code cond_code = fd->loop.cond_code;
7445 enum tree_code plus_code = PLUS_EXPR;
7447 tree chunk_size = integer_minus_one_node;
7448 tree gwv = integer_zero_node;
7449 tree iter_type = TREE_TYPE (v);
7450 tree diff_type = iter_type;
7451 tree plus_type = iter_type;
7452 struct oacc_collapse *counts = NULL;
7454 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7455 == GF_OMP_FOR_KIND_OACC_LOOP);
7456 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7457 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7459 if (POINTER_TYPE_P (iter_type))
7461 plus_code = POINTER_PLUS_EXPR;
7462 plus_type = sizetype;
7464 for (int ix = fd->collapse; ix--;)
7466 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7467 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7468 diff_type = diff_type2;
7470 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7471 diff_type = signed_type_for (diff_type);
7472 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7473 diff_type = integer_type_node;
7475 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7476 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7477 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7478 basic_block bottom_bb = NULL;
7480 /* entry_bb has two successors; the branch edge is to the exit
7481 block, fallthrough edge to body. */
7482 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7483 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7485 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7486 body_bb, or to a block whose only successor is the body_bb. Its
7487 fallthrough successor is the final block (same as the branch
7488 successor of the entry_bb). */
7489 if (cont_bb)
7491 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7492 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7494 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7495 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7497 else
7498 gcc_assert (!gimple_in_ssa_p (cfun));
7500 /* The exit block only has entry_bb and cont_bb as predecessors. */
7501 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7503 tree chunk_no;
7504 tree chunk_max = NULL_TREE;
7505 tree bound, offset;
7506 tree step = create_tmp_var (diff_type, ".step");
7507 bool up = cond_code == LT_EXPR;
7508 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7509 bool chunking = !gimple_in_ssa_p (cfun);
7510 bool negating;
7512 /* Tiling vars. */
7513 tree tile_size = NULL_TREE;
7514 tree element_s = NULL_TREE;
7515 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7516 basic_block elem_body_bb = NULL;
7517 basic_block elem_cont_bb = NULL;
7519 /* SSA instances. */
7520 tree offset_incr = NULL_TREE;
7521 tree offset_init = NULL_TREE;
7523 gimple_stmt_iterator gsi;
7524 gassign *ass;
7525 gcall *call;
7526 gimple *stmt;
7527 tree expr;
7528 location_t loc;
7529 edge split, be, fte;
7531 /* Split the end of entry_bb to create head_bb. */
7532 split = split_block (entry_bb, last_stmt (entry_bb));
7533 basic_block head_bb = split->dest;
7534 entry_bb = split->src;
7536 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7537 gsi = gsi_last_nondebug_bb (entry_bb);
7538 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7539 loc = gimple_location (for_stmt);
7541 if (gimple_in_ssa_p (cfun))
7543 offset_init = gimple_omp_for_index (for_stmt, 0);
7544 gcc_assert (integer_zerop (fd->loop.n1));
7545 /* The SSA parallelizer does gang parallelism. */
7546 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7549 if (fd->collapse > 1 || fd->tiling)
7551 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7552 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7553 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7554 TREE_TYPE (fd->loop.n2), loc);
7556 if (SSA_VAR_P (fd->loop.n2))
7558 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7559 true, GSI_SAME_STMT);
7560 ass = gimple_build_assign (fd->loop.n2, total);
7561 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7565 tree b = fd->loop.n1;
7566 tree e = fd->loop.n2;
7567 tree s = fd->loop.step;
7569 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7570 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7572 /* Convert the step, avoiding possible unsigned->signed overflow. */
7573 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7574 if (negating)
7575 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7576 s = fold_convert (diff_type, s);
7577 if (negating)
7578 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7579 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7581 if (!chunking)
7582 chunk_size = integer_zero_node;
7583 expr = fold_convert (diff_type, chunk_size);
7584 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7585 NULL_TREE, true, GSI_SAME_STMT);
7587 if (fd->tiling)
7589 /* Determine the tile size and element step,
7590 modify the outer loop step size. */
7591 tile_size = create_tmp_var (diff_type, ".tile_size");
7592 expr = build_int_cst (diff_type, 1);
7593 for (int ix = 0; ix < fd->collapse; ix++)
7594 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7595 expr = force_gimple_operand_gsi (&gsi, expr, true,
7596 NULL_TREE, true, GSI_SAME_STMT);
7597 ass = gimple_build_assign (tile_size, expr);
7598 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7600 element_s = create_tmp_var (diff_type, ".element_s");
7601 ass = gimple_build_assign (element_s, s);
7602 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7604 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7605 s = force_gimple_operand_gsi (&gsi, expr, true,
7606 NULL_TREE, true, GSI_SAME_STMT);
7609 /* Determine the range, avoiding possible unsigned->signed overflow. */
7610 negating = !up && TYPE_UNSIGNED (iter_type);
7611 expr = fold_build2 (MINUS_EXPR, plus_type,
7612 fold_convert (plus_type, negating ? b : e),
7613 fold_convert (plus_type, negating ? e : b));
7614 expr = fold_convert (diff_type, expr);
7615 if (negating)
7616 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7617 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7618 NULL_TREE, true, GSI_SAME_STMT);
7620 chunk_no = build_int_cst (diff_type, 0);
7621 if (chunking)
7623 gcc_assert (!gimple_in_ssa_p (cfun));
7625 expr = chunk_no;
7626 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7627 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7629 ass = gimple_build_assign (chunk_no, expr);
7630 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7632 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7633 build_int_cst (integer_type_node,
7634 IFN_GOACC_LOOP_CHUNKS),
7635 dir, range, s, chunk_size, gwv);
7636 gimple_call_set_lhs (call, chunk_max);
7637 gimple_set_location (call, loc);
7638 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7640 else
7641 chunk_size = chunk_no;
7643 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7644 build_int_cst (integer_type_node,
7645 IFN_GOACC_LOOP_STEP),
7646 dir, range, s, chunk_size, gwv);
7647 gimple_call_set_lhs (call, step);
7648 gimple_set_location (call, loc);
7649 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7651 /* Remove the GIMPLE_OMP_FOR. */
7652 gsi_remove (&gsi, true);
7654 /* Fixup edges from head_bb. */
7655 be = BRANCH_EDGE (head_bb);
7656 fte = FALLTHRU_EDGE (head_bb);
7657 be->flags |= EDGE_FALSE_VALUE;
7658 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7660 basic_block body_bb = fte->dest;
7662 if (gimple_in_ssa_p (cfun))
7664 gsi = gsi_last_nondebug_bb (cont_bb);
7665 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7667 offset = gimple_omp_continue_control_use (cont_stmt);
7668 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7670 else
7672 offset = create_tmp_var (diff_type, ".offset");
7673 offset_init = offset_incr = offset;
7675 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7677 /* Loop offset & bound go into head_bb. */
7678 gsi = gsi_start_bb (head_bb);
7680 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7681 build_int_cst (integer_type_node,
7682 IFN_GOACC_LOOP_OFFSET),
7683 dir, range, s,
7684 chunk_size, gwv, chunk_no);
7685 gimple_call_set_lhs (call, offset_init);
7686 gimple_set_location (call, loc);
7687 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7689 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7690 build_int_cst (integer_type_node,
7691 IFN_GOACC_LOOP_BOUND),
7692 dir, range, s,
7693 chunk_size, gwv, offset_init);
7694 gimple_call_set_lhs (call, bound);
7695 gimple_set_location (call, loc);
7696 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7698 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7699 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7700 GSI_CONTINUE_LINKING);
7702 /* V assignment goes into body_bb. */
7703 if (!gimple_in_ssa_p (cfun))
7705 gsi = gsi_start_bb (body_bb);
7707 expr = build2 (plus_code, iter_type, b,
7708 fold_convert (plus_type, offset));
7709 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7710 true, GSI_SAME_STMT);
7711 ass = gimple_build_assign (v, expr);
7712 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7714 if (fd->collapse > 1 || fd->tiling)
7715 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7717 if (fd->tiling)
7719 /* Determine the range of the element loop -- usually simply
7720 the tile_size, but could be smaller if the final
7721 iteration of the outer loop is a partial tile. */
7722 tree e_range = create_tmp_var (diff_type, ".e_range");
7724 expr = build2 (MIN_EXPR, diff_type,
7725 build2 (MINUS_EXPR, diff_type, bound, offset),
7726 build2 (MULT_EXPR, diff_type, tile_size,
7727 element_s));
7728 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7729 true, GSI_SAME_STMT);
7730 ass = gimple_build_assign (e_range, expr);
7731 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7733 /* Determine bound, offset & step of inner loop. */
7734 e_bound = create_tmp_var (diff_type, ".e_bound");
7735 e_offset = create_tmp_var (diff_type, ".e_offset");
7736 e_step = create_tmp_var (diff_type, ".e_step");
7738 /* Mark these as element loops. */
7739 tree t, e_gwv = integer_minus_one_node;
7740 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7742 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7743 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7744 element_s, chunk, e_gwv, chunk);
7745 gimple_call_set_lhs (call, e_offset);
7746 gimple_set_location (call, loc);
7747 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7749 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7750 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7751 element_s, chunk, e_gwv, e_offset);
7752 gimple_call_set_lhs (call, e_bound);
7753 gimple_set_location (call, loc);
7754 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7756 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7757 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7758 element_s, chunk, e_gwv);
7759 gimple_call_set_lhs (call, e_step);
7760 gimple_set_location (call, loc);
7761 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7763 /* Add test and split block. */
7764 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7765 stmt = gimple_build_cond_empty (expr);
7766 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7767 split = split_block (body_bb, stmt);
7768 elem_body_bb = split->dest;
7769 if (cont_bb == body_bb)
7770 cont_bb = elem_body_bb;
7771 body_bb = split->src;
7773 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7775 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7776 if (cont_bb == NULL)
7778 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7779 e->probability = profile_probability::even ();
7780 split->probability = profile_probability::even ();
7783 /* Initialize the user's loop vars. */
7784 gsi = gsi_start_bb (elem_body_bb);
7785 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7786 diff_type);
7790 /* Loop increment goes into cont_bb. If this is not a loop, we
7791 will have spawned threads as if it was, and each one will
7792 execute one iteration. The specification is not explicit about
7793 whether such constructs are ill-formed or not, and they can
7794 occur, especially when noreturn routines are involved. */
7795 if (cont_bb)
7797 gsi = gsi_last_nondebug_bb (cont_bb);
7798 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7799 loc = gimple_location (cont_stmt);
7801 if (fd->tiling)
7803 /* Insert element loop increment and test. */
7804 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7805 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7806 true, GSI_SAME_STMT);
7807 ass = gimple_build_assign (e_offset, expr);
7808 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7809 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7811 stmt = gimple_build_cond_empty (expr);
7812 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7813 split = split_block (cont_bb, stmt);
7814 elem_cont_bb = split->src;
7815 cont_bb = split->dest;
7817 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7818 split->probability = profile_probability::unlikely ().guessed ();
7819 edge latch_edge
7820 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7821 latch_edge->probability = profile_probability::likely ().guessed ();
7823 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7824 skip_edge->probability = profile_probability::unlikely ().guessed ();
7825 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7826 loop_entry_edge->probability
7827 = profile_probability::likely ().guessed ();
7829 gsi = gsi_for_stmt (cont_stmt);
7832 /* Increment offset. */
7833 if (gimple_in_ssa_p (cfun))
7834 expr = build2 (plus_code, iter_type, offset,
7835 fold_convert (plus_type, step));
7836 else
7837 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7838 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7839 true, GSI_SAME_STMT);
7840 ass = gimple_build_assign (offset_incr, expr);
7841 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7842 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7843 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7845 /* Remove the GIMPLE_OMP_CONTINUE. */
7846 gsi_remove (&gsi, true);
7848 /* Fixup edges from cont_bb. */
7849 be = BRANCH_EDGE (cont_bb);
7850 fte = FALLTHRU_EDGE (cont_bb);
7851 be->flags |= EDGE_TRUE_VALUE;
7852 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7854 if (chunking)
7856 /* Split the beginning of exit_bb to make bottom_bb. We
7857 need to insert a nop at the start, because splitting is
7858 after a stmt, not before. */
7859 gsi = gsi_start_bb (exit_bb);
7860 stmt = gimple_build_nop ();
7861 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7862 split = split_block (exit_bb, stmt);
7863 bottom_bb = split->src;
7864 exit_bb = split->dest;
7865 gsi = gsi_last_bb (bottom_bb);
7867 /* Chunk increment and test goes into bottom_bb. */
7868 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7869 build_int_cst (diff_type, 1));
7870 ass = gimple_build_assign (chunk_no, expr);
7871 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7873 /* Chunk test at end of bottom_bb. */
7874 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7875 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7876 GSI_CONTINUE_LINKING);
7878 /* Fixup edges from bottom_bb. */
7879 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7880 split->probability = profile_probability::unlikely ().guessed ();
7881 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7882 latch_edge->probability = profile_probability::likely ().guessed ();
7886 gsi = gsi_last_nondebug_bb (exit_bb);
7887 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7888 loc = gimple_location (gsi_stmt (gsi));
7890 if (!gimple_in_ssa_p (cfun))
7892 /* Insert the final value of V, in case it is live. This is the
7893 value for the only thread that survives past the join. */
7894 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7895 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7896 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7897 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7898 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7899 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7900 true, GSI_SAME_STMT);
7901 ass = gimple_build_assign (v, expr);
7902 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7905 /* Remove the OMP_RETURN. */
7906 gsi_remove (&gsi, true);
7908 if (cont_bb)
7910 /* We now have one, two or three nested loops. Update the loop
7911 structures. */
7912 class loop *parent = entry_bb->loop_father;
7913 class loop *body = body_bb->loop_father;
7915 if (chunking)
7917 class loop *chunk_loop = alloc_loop ();
7918 chunk_loop->header = head_bb;
7919 chunk_loop->latch = bottom_bb;
7920 add_loop (chunk_loop, parent);
7921 parent = chunk_loop;
7923 else if (parent != body)
7925 gcc_assert (body->header == body_bb);
7926 gcc_assert (body->latch == cont_bb
7927 || single_pred (body->latch) == cont_bb);
7928 parent = NULL;
7931 if (parent)
7933 class loop *body_loop = alloc_loop ();
7934 body_loop->header = body_bb;
7935 body_loop->latch = cont_bb;
7936 add_loop (body_loop, parent);
7938 if (fd->tiling)
7940 /* Insert tiling's element loop. */
7941 class loop *inner_loop = alloc_loop ();
7942 inner_loop->header = elem_body_bb;
7943 inner_loop->latch = elem_cont_bb;
7944 add_loop (inner_loop, body_loop);
7950 /* Expand the OMP loop defined by REGION. */
7952 static void
7953 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7955 struct omp_for_data fd;
7956 struct omp_for_data_loop *loops;
7958 loops = XALLOCAVEC (struct omp_for_data_loop,
7959 gimple_omp_for_collapse (last_stmt (region->entry)));
7960 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7961 &fd, loops);
7962 region->sched_kind = fd.sched_kind;
7963 region->sched_modifiers = fd.sched_modifiers;
7964 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7965 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7967 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7968 if ((loops[i].m1 || loops[i].m2)
7969 && (loops[i].m1 == NULL_TREE
7970 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7971 && (loops[i].m2 == NULL_TREE
7972 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7973 && TREE_CODE (loops[i].step) == INTEGER_CST
7974 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7976 tree t;
7977 tree itype = TREE_TYPE (loops[i].v);
7978 if (loops[i].m1 && loops[i].m2)
7979 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7980 else if (loops[i].m1)
7981 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7982 else
7983 t = loops[i].m2;
7984 t = fold_build2 (MULT_EXPR, itype, t,
7985 fold_convert (itype,
7986 loops[i - loops[i].outer].step));
7987 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7988 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7989 fold_build1 (NEGATE_EXPR, itype, t),
7990 fold_build1 (NEGATE_EXPR, itype,
7991 fold_convert (itype,
7992 loops[i].step)));
7993 else
7994 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7995 fold_convert (itype, loops[i].step));
7996 if (integer_nonzerop (t))
7997 error_at (gimple_location (fd.for_stmt),
7998 "invalid OpenMP non-rectangular loop step; "
7999 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8000 "step %qE",
8001 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8002 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8003 loops[i - loops[i].outer].step, i + 1,
8004 loops[i].step);
8008 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8009 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8010 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8011 if (region->cont)
8013 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8014 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8015 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8017 else
8018 /* If there isn't a continue then this is a degerate case where
8019 the introduction of abnormal edges during lowering will prevent
8020 original loops from being detected. Fix that up. */
8021 loops_state_set (LOOPS_NEED_FIXUP);
8023 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8024 expand_omp_simd (region, &fd);
8025 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8027 gcc_assert (!inner_stmt && !fd.non_rect);
8028 expand_oacc_for (region, &fd);
8030 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8032 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8033 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8034 else
8035 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8037 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8038 && !fd.have_ordered)
8040 if (fd.chunk_size == NULL)
8041 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8042 else
8043 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8045 else
8047 int fn_index, start_ix, next_ix;
8048 unsigned HOST_WIDE_INT sched = 0;
8049 tree sched_arg = NULL_TREE;
8051 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8052 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8053 if (fd.chunk_size == NULL
8054 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8055 fd.chunk_size = integer_zero_node;
8056 switch (fd.sched_kind)
8058 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8059 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8060 && fd.lastprivate_conditional == 0)
8062 gcc_assert (!fd.have_ordered);
8063 fn_index = 6;
8064 sched = 4;
8066 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8067 && !fd.have_ordered
8068 && fd.lastprivate_conditional == 0)
8069 fn_index = 7;
8070 else
8072 fn_index = 3;
8073 sched = (HOST_WIDE_INT_1U << 31);
8075 break;
8076 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8077 case OMP_CLAUSE_SCHEDULE_GUIDED:
8078 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8079 && !fd.have_ordered
8080 && fd.lastprivate_conditional == 0)
8082 fn_index = 3 + fd.sched_kind;
8083 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8084 break;
8086 fn_index = fd.sched_kind;
8087 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8088 sched += (HOST_WIDE_INT_1U << 31);
8089 break;
8090 case OMP_CLAUSE_SCHEDULE_STATIC:
8091 gcc_assert (fd.have_ordered);
8092 fn_index = 0;
8093 sched = (HOST_WIDE_INT_1U << 31) + 1;
8094 break;
8095 default:
8096 gcc_unreachable ();
8098 if (!fd.ordered)
8099 fn_index += fd.have_ordered * 8;
8100 if (fd.ordered)
8101 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8102 else
8103 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8104 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8105 if (fd.have_reductemp || fd.have_pointer_condtemp)
8107 if (fd.ordered)
8108 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8109 else if (fd.have_ordered)
8110 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8111 else
8112 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8113 sched_arg = build_int_cstu (long_integer_type_node, sched);
8114 if (!fd.chunk_size)
8115 fd.chunk_size = integer_zero_node;
8117 if (fd.iter_type == long_long_unsigned_type_node)
8119 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8120 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8121 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8122 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8124 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8125 (enum built_in_function) next_ix, sched_arg,
8126 inner_stmt);
8129 if (gimple_in_ssa_p (cfun))
8130 update_ssa (TODO_update_ssa_only_virtuals);
8133 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8135 v = GOMP_sections_start (n);
8137 switch (v)
8139 case 0:
8140 goto L2;
8141 case 1:
8142 section 1;
8143 goto L1;
8144 case 2:
8146 case n:
8148 default:
8149 abort ();
8152 v = GOMP_sections_next ();
8153 goto L0;
8155 reduction;
8157 If this is a combined parallel sections, replace the call to
8158 GOMP_sections_start with call to GOMP_sections_next. */
8160 static void
8161 expand_omp_sections (struct omp_region *region)
8163 tree t, u, vin = NULL, vmain, vnext, l2;
8164 unsigned len;
8165 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8166 gimple_stmt_iterator si, switch_si;
8167 gomp_sections *sections_stmt;
8168 gimple *stmt;
8169 gomp_continue *cont;
8170 edge_iterator ei;
8171 edge e;
8172 struct omp_region *inner;
8173 unsigned i, casei;
8174 bool exit_reachable = region->cont != NULL;
8176 gcc_assert (region->exit != NULL);
8177 entry_bb = region->entry;
8178 l0_bb = single_succ (entry_bb);
8179 l1_bb = region->cont;
8180 l2_bb = region->exit;
8181 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8182 l2 = gimple_block_label (l2_bb);
8183 else
8185 /* This can happen if there are reductions. */
8186 len = EDGE_COUNT (l0_bb->succs);
8187 gcc_assert (len > 0);
8188 e = EDGE_SUCC (l0_bb, len - 1);
8189 si = gsi_last_nondebug_bb (e->dest);
8190 l2 = NULL_TREE;
8191 if (gsi_end_p (si)
8192 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8193 l2 = gimple_block_label (e->dest);
8194 else
8195 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8197 si = gsi_last_nondebug_bb (e->dest);
8198 if (gsi_end_p (si)
8199 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8201 l2 = gimple_block_label (e->dest);
8202 break;
8206 if (exit_reachable)
8207 default_bb = create_empty_bb (l1_bb->prev_bb);
8208 else
8209 default_bb = create_empty_bb (l0_bb);
8211 /* We will build a switch() with enough cases for all the
8212 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8213 and a default case to abort if something goes wrong. */
8214 len = EDGE_COUNT (l0_bb->succs);
8216 /* Use vec::quick_push on label_vec throughout, since we know the size
8217 in advance. */
8218 auto_vec<tree> label_vec (len);
8220 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8221 GIMPLE_OMP_SECTIONS statement. */
8222 si = gsi_last_nondebug_bb (entry_bb);
8223 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8224 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8225 vin = gimple_omp_sections_control (sections_stmt);
8226 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8227 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8228 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8229 tree cond_var = NULL_TREE;
8230 if (reductmp || condtmp)
8232 tree reductions = null_pointer_node, mem = null_pointer_node;
8233 tree memv = NULL_TREE, condtemp = NULL_TREE;
8234 gimple_stmt_iterator gsi = gsi_none ();
8235 gimple *g = NULL;
8236 if (reductmp)
8238 reductions = OMP_CLAUSE_DECL (reductmp);
8239 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8240 g = SSA_NAME_DEF_STMT (reductions);
8241 reductions = gimple_assign_rhs1 (g);
8242 OMP_CLAUSE_DECL (reductmp) = reductions;
8243 gsi = gsi_for_stmt (g);
8245 else
8246 gsi = si;
8247 if (condtmp)
8249 condtemp = OMP_CLAUSE_DECL (condtmp);
8250 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8251 OMP_CLAUSE__CONDTEMP_);
8252 cond_var = OMP_CLAUSE_DECL (c);
8253 tree type = TREE_TYPE (condtemp);
8254 memv = create_tmp_var (type);
8255 TREE_ADDRESSABLE (memv) = 1;
8256 unsigned cnt = 0;
8257 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8258 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8259 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8260 ++cnt;
8261 unsigned HOST_WIDE_INT sz
8262 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8263 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8264 false);
8265 mem = build_fold_addr_expr (memv);
8267 t = build_int_cst (unsigned_type_node, len - 1);
8268 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8269 stmt = gimple_build_call (u, 3, t, reductions, mem);
8270 gimple_call_set_lhs (stmt, vin);
8271 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8272 if (condtmp)
8274 expand_omp_build_assign (&gsi, condtemp, memv, false);
8275 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8276 vin, build_one_cst (TREE_TYPE (cond_var)));
8277 expand_omp_build_assign (&gsi, cond_var, t, false);
8279 if (reductmp)
8281 gsi_remove (&gsi, true);
8282 release_ssa_name (gimple_assign_lhs (g));
8285 else if (!is_combined_parallel (region))
8287 /* If we are not inside a combined parallel+sections region,
8288 call GOMP_sections_start. */
8289 t = build_int_cst (unsigned_type_node, len - 1);
8290 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8291 stmt = gimple_build_call (u, 1, t);
8293 else
8295 /* Otherwise, call GOMP_sections_next. */
8296 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8297 stmt = gimple_build_call (u, 0);
8299 if (!reductmp && !condtmp)
8301 gimple_call_set_lhs (stmt, vin);
8302 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8304 gsi_remove (&si, true);
8306 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8307 L0_BB. */
8308 switch_si = gsi_last_nondebug_bb (l0_bb);
8309 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8310 if (exit_reachable)
8312 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8313 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8314 vmain = gimple_omp_continue_control_use (cont);
8315 vnext = gimple_omp_continue_control_def (cont);
8317 else
8319 vmain = vin;
8320 vnext = NULL_TREE;
8323 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8324 label_vec.quick_push (t);
8325 i = 1;
8327 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8328 for (inner = region->inner, casei = 1;
8329 inner;
8330 inner = inner->next, i++, casei++)
8332 basic_block s_entry_bb, s_exit_bb;
8334 /* Skip optional reduction region. */
8335 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8337 --i;
8338 --casei;
8339 continue;
8342 s_entry_bb = inner->entry;
8343 s_exit_bb = inner->exit;
8345 t = gimple_block_label (s_entry_bb);
8346 u = build_int_cst (unsigned_type_node, casei);
8347 u = build_case_label (u, NULL, t);
8348 label_vec.quick_push (u);
8350 si = gsi_last_nondebug_bb (s_entry_bb);
8351 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8352 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8353 gsi_remove (&si, true);
8354 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8356 if (s_exit_bb == NULL)
8357 continue;
8359 si = gsi_last_nondebug_bb (s_exit_bb);
8360 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8361 gsi_remove (&si, true);
8363 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8366 /* Error handling code goes in DEFAULT_BB. */
8367 t = gimple_block_label (default_bb);
8368 u = build_case_label (NULL, NULL, t);
8369 make_edge (l0_bb, default_bb, 0);
8370 add_bb_to_loop (default_bb, current_loops->tree_root);
8372 stmt = gimple_build_switch (vmain, u, label_vec);
8373 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8374 gsi_remove (&switch_si, true);
8376 si = gsi_start_bb (default_bb);
8377 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8378 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8380 if (exit_reachable)
8382 tree bfn_decl;
8384 /* Code to get the next section goes in L1_BB. */
8385 si = gsi_last_nondebug_bb (l1_bb);
8386 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8388 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8389 stmt = gimple_build_call (bfn_decl, 0);
8390 gimple_call_set_lhs (stmt, vnext);
8391 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8392 if (cond_var)
8394 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8395 vnext, build_one_cst (TREE_TYPE (cond_var)));
8396 expand_omp_build_assign (&si, cond_var, t, false);
8398 gsi_remove (&si, true);
8400 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8403 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8404 si = gsi_last_nondebug_bb (l2_bb);
8405 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8406 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8407 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8408 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8409 else
8410 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8411 stmt = gimple_build_call (t, 0);
8412 if (gimple_omp_return_lhs (gsi_stmt (si)))
8413 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8414 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8415 gsi_remove (&si, true);
8417 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8420 /* Expand code for an OpenMP single or scope directive. We've already expanded
8421 much of the code, here we simply place the GOMP_barrier call. */
8423 static void
8424 expand_omp_single (struct omp_region *region)
8426 basic_block entry_bb, exit_bb;
8427 gimple_stmt_iterator si;
8429 entry_bb = region->entry;
8430 exit_bb = region->exit;
8432 si = gsi_last_nondebug_bb (entry_bb);
8433 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8434 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SCOPE);
8435 gsi_remove (&si, true);
8436 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8438 si = gsi_last_nondebug_bb (exit_bb);
8439 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8441 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8442 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8444 gsi_remove (&si, true);
8445 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8448 /* Generic expansion for OpenMP synchronization directives: master,
8449 ordered and critical. All we need to do here is remove the entry
8450 and exit markers for REGION. */
8452 static void
8453 expand_omp_synch (struct omp_region *region)
8455 basic_block entry_bb, exit_bb;
8456 gimple_stmt_iterator si;
8458 entry_bb = region->entry;
8459 exit_bb = region->exit;
8461 si = gsi_last_nondebug_bb (entry_bb);
8462 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8463 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8464 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8465 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8466 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8467 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8468 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8469 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8470 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8472 expand_omp_taskreg (region);
8473 return;
8475 gsi_remove (&si, true);
8476 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8478 if (exit_bb)
8480 si = gsi_last_nondebug_bb (exit_bb);
8481 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8482 gsi_remove (&si, true);
8483 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8487 /* Translate enum omp_memory_order to enum memmodel. The two enums
8488 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8489 is 0. */
8491 static enum memmodel
8492 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8494 switch (mo)
8496 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8497 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8498 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8499 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8500 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8501 default: gcc_unreachable ();
8505 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8506 operation as a normal volatile load. */
8508 static bool
8509 expand_omp_atomic_load (basic_block load_bb, tree addr,
8510 tree loaded_val, int index)
8512 enum built_in_function tmpbase;
8513 gimple_stmt_iterator gsi;
8514 basic_block store_bb;
8515 location_t loc;
8516 gimple *stmt;
8517 tree decl, call, type, itype;
8519 gsi = gsi_last_nondebug_bb (load_bb);
8520 stmt = gsi_stmt (gsi);
8521 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8522 loc = gimple_location (stmt);
8524 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8525 is smaller than word size, then expand_atomic_load assumes that the load
8526 is atomic. We could avoid the builtin entirely in this case. */
8528 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8529 decl = builtin_decl_explicit (tmpbase);
8530 if (decl == NULL_TREE)
8531 return false;
8533 type = TREE_TYPE (loaded_val);
8534 itype = TREE_TYPE (TREE_TYPE (decl));
8536 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8537 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8538 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8539 if (!useless_type_conversion_p (type, itype))
8540 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8541 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8543 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8544 gsi_remove (&gsi, true);
8546 store_bb = single_succ (load_bb);
8547 gsi = gsi_last_nondebug_bb (store_bb);
8548 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8549 gsi_remove (&gsi, true);
8551 if (gimple_in_ssa_p (cfun))
8552 update_ssa (TODO_update_ssa_no_phi);
8554 return true;
8557 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8558 operation as a normal volatile store. */
8560 static bool
8561 expand_omp_atomic_store (basic_block load_bb, tree addr,
8562 tree loaded_val, tree stored_val, int index)
8564 enum built_in_function tmpbase;
8565 gimple_stmt_iterator gsi;
8566 basic_block store_bb = single_succ (load_bb);
8567 location_t loc;
8568 gimple *stmt;
8569 tree decl, call, type, itype;
8570 machine_mode imode;
8571 bool exchange;
8573 gsi = gsi_last_nondebug_bb (load_bb);
8574 stmt = gsi_stmt (gsi);
8575 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8577 /* If the load value is needed, then this isn't a store but an exchange. */
8578 exchange = gimple_omp_atomic_need_value_p (stmt);
8580 gsi = gsi_last_nondebug_bb (store_bb);
8581 stmt = gsi_stmt (gsi);
8582 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8583 loc = gimple_location (stmt);
8585 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8586 is smaller than word size, then expand_atomic_store assumes that the store
8587 is atomic. We could avoid the builtin entirely in this case. */
8589 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8590 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8591 decl = builtin_decl_explicit (tmpbase);
8592 if (decl == NULL_TREE)
8593 return false;
8595 type = TREE_TYPE (stored_val);
8597 /* Dig out the type of the function's second argument. */
8598 itype = TREE_TYPE (decl);
8599 itype = TYPE_ARG_TYPES (itype);
8600 itype = TREE_CHAIN (itype);
8601 itype = TREE_VALUE (itype);
8602 imode = TYPE_MODE (itype);
8604 if (exchange && !can_atomic_exchange_p (imode, true))
8605 return false;
8607 if (!useless_type_conversion_p (itype, type))
8608 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8609 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8610 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8611 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8612 if (exchange)
8614 if (!useless_type_conversion_p (type, itype))
8615 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8616 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8619 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8620 gsi_remove (&gsi, true);
8622 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8623 gsi = gsi_last_nondebug_bb (load_bb);
8624 gsi_remove (&gsi, true);
8626 if (gimple_in_ssa_p (cfun))
8627 update_ssa (TODO_update_ssa_no_phi);
8629 return true;
8632 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8633 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8634 size of the data type, and thus usable to find the index of the builtin
8635 decl. Returns false if the expression is not of the proper form. */
8637 static bool
8638 expand_omp_atomic_fetch_op (basic_block load_bb,
8639 tree addr, tree loaded_val,
8640 tree stored_val, int index)
8642 enum built_in_function oldbase, newbase, tmpbase;
8643 tree decl, itype, call;
8644 tree lhs, rhs;
8645 basic_block store_bb = single_succ (load_bb);
8646 gimple_stmt_iterator gsi;
8647 gimple *stmt;
8648 location_t loc;
8649 enum tree_code code;
8650 bool need_old, need_new;
8651 machine_mode imode;
8653 /* We expect to find the following sequences:
8655 load_bb:
8656 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8658 store_bb:
8659 val = tmp OP something; (or: something OP tmp)
8660 GIMPLE_OMP_STORE (val)
8662 ???FIXME: Allow a more flexible sequence.
8663 Perhaps use data flow to pick the statements.
8667 gsi = gsi_after_labels (store_bb);
8668 stmt = gsi_stmt (gsi);
8669 if (is_gimple_debug (stmt))
8671 gsi_next_nondebug (&gsi);
8672 if (gsi_end_p (gsi))
8673 return false;
8674 stmt = gsi_stmt (gsi);
8676 loc = gimple_location (stmt);
8677 if (!is_gimple_assign (stmt))
8678 return false;
8679 gsi_next_nondebug (&gsi);
8680 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8681 return false;
8682 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8683 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8684 enum omp_memory_order omo
8685 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8686 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8687 gcc_checking_assert (!need_old || !need_new);
8689 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8690 return false;
8692 /* Check for one of the supported fetch-op operations. */
8693 code = gimple_assign_rhs_code (stmt);
8694 switch (code)
8696 case PLUS_EXPR:
8697 case POINTER_PLUS_EXPR:
8698 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8699 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8700 break;
8701 case MINUS_EXPR:
8702 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8703 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8704 break;
8705 case BIT_AND_EXPR:
8706 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8707 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8708 break;
8709 case BIT_IOR_EXPR:
8710 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8711 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8712 break;
8713 case BIT_XOR_EXPR:
8714 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8715 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8716 break;
8717 default:
8718 return false;
8721 /* Make sure the expression is of the proper form. */
8722 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8723 rhs = gimple_assign_rhs2 (stmt);
8724 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8725 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8726 rhs = gimple_assign_rhs1 (stmt);
8727 else
8728 return false;
8730 tmpbase = ((enum built_in_function)
8731 ((need_new ? newbase : oldbase) + index + 1));
8732 decl = builtin_decl_explicit (tmpbase);
8733 if (decl == NULL_TREE)
8734 return false;
8735 itype = TREE_TYPE (TREE_TYPE (decl));
8736 imode = TYPE_MODE (itype);
8738 /* We could test all of the various optabs involved, but the fact of the
8739 matter is that (with the exception of i486 vs i586 and xadd) all targets
8740 that support any atomic operaton optab also implements compare-and-swap.
8741 Let optabs.c take care of expanding any compare-and-swap loop. */
8742 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8743 return false;
8745 gsi = gsi_last_nondebug_bb (load_bb);
8746 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8748 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8749 It only requires that the operation happen atomically. Thus we can
8750 use the RELAXED memory model. */
8751 call = build_call_expr_loc (loc, decl, 3, addr,
8752 fold_convert_loc (loc, itype, rhs),
8753 build_int_cst (NULL, mo));
8755 if (need_old || need_new)
8757 lhs = need_old ? loaded_val : stored_val;
8758 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8759 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8761 else
8762 call = fold_convert_loc (loc, void_type_node, call);
8763 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8764 gsi_remove (&gsi, true);
8766 gsi = gsi_last_nondebug_bb (store_bb);
8767 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8768 gsi_remove (&gsi, true);
8769 gsi = gsi_last_nondebug_bb (store_bb);
8770 stmt = gsi_stmt (gsi);
8771 gsi_remove (&gsi, true);
8773 if (gimple_in_ssa_p (cfun))
8775 release_defs (stmt);
8776 update_ssa (TODO_update_ssa_no_phi);
8779 return true;
8782 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8784 oldval = *addr;
8785 repeat:
8786 newval = rhs; // with oldval replacing *addr in rhs
8787 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8788 if (oldval != newval)
8789 goto repeat;
8791 INDEX is log2 of the size of the data type, and thus usable to find the
8792 index of the builtin decl. */
8794 static bool
8795 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8796 tree addr, tree loaded_val, tree stored_val,
8797 int index)
8799 tree loadedi, storedi, initial, new_storedi, old_vali;
8800 tree type, itype, cmpxchg, iaddr, atype;
8801 gimple_stmt_iterator si;
8802 basic_block loop_header = single_succ (load_bb);
8803 gimple *phi, *stmt;
8804 edge e;
8805 enum built_in_function fncode;
8807 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8808 order to use the RELAXED memory model effectively. */
8809 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8810 + index + 1);
8811 cmpxchg = builtin_decl_explicit (fncode);
8812 if (cmpxchg == NULL_TREE)
8813 return false;
8814 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8815 atype = type;
8816 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8818 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8819 || !can_atomic_load_p (TYPE_MODE (itype)))
8820 return false;
8822 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8823 si = gsi_last_nondebug_bb (load_bb);
8824 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8826 /* For floating-point values, we'll need to view-convert them to integers
8827 so that we can perform the atomic compare and swap. Simplify the
8828 following code by always setting up the "i"ntegral variables. */
8829 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8831 tree iaddr_val;
8833 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8834 true));
8835 atype = itype;
8836 iaddr_val
8837 = force_gimple_operand_gsi (&si,
8838 fold_convert (TREE_TYPE (iaddr), addr),
8839 false, NULL_TREE, true, GSI_SAME_STMT);
8840 stmt = gimple_build_assign (iaddr, iaddr_val);
8841 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8842 loadedi = create_tmp_var (itype);
8843 if (gimple_in_ssa_p (cfun))
8844 loadedi = make_ssa_name (loadedi);
8846 else
8848 iaddr = addr;
8849 loadedi = loaded_val;
8852 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8853 tree loaddecl = builtin_decl_explicit (fncode);
8854 if (loaddecl)
8855 initial
8856 = fold_convert (atype,
8857 build_call_expr (loaddecl, 2, iaddr,
8858 build_int_cst (NULL_TREE,
8859 MEMMODEL_RELAXED)));
8860 else
8862 tree off
8863 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8864 true), 0);
8865 initial = build2 (MEM_REF, atype, iaddr, off);
8868 initial
8869 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8870 GSI_SAME_STMT);
8872 /* Move the value to the LOADEDI temporary. */
8873 if (gimple_in_ssa_p (cfun))
8875 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8876 phi = create_phi_node (loadedi, loop_header);
8877 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8878 initial);
8880 else
8881 gsi_insert_before (&si,
8882 gimple_build_assign (loadedi, initial),
8883 GSI_SAME_STMT);
8884 if (loadedi != loaded_val)
8886 gimple_stmt_iterator gsi2;
8887 tree x;
8889 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8890 gsi2 = gsi_start_bb (loop_header);
8891 if (gimple_in_ssa_p (cfun))
8893 gassign *stmt;
8894 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8895 true, GSI_SAME_STMT);
8896 stmt = gimple_build_assign (loaded_val, x);
8897 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8899 else
8901 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8902 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8903 true, GSI_SAME_STMT);
8906 gsi_remove (&si, true);
8908 si = gsi_last_nondebug_bb (store_bb);
8909 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8911 if (iaddr == addr)
8912 storedi = stored_val;
8913 else
8914 storedi
8915 = force_gimple_operand_gsi (&si,
8916 build1 (VIEW_CONVERT_EXPR, itype,
8917 stored_val), true, NULL_TREE, true,
8918 GSI_SAME_STMT);
8920 /* Build the compare&swap statement. */
8921 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8922 new_storedi = force_gimple_operand_gsi (&si,
8923 fold_convert (TREE_TYPE (loadedi),
8924 new_storedi),
8925 true, NULL_TREE,
8926 true, GSI_SAME_STMT);
8928 if (gimple_in_ssa_p (cfun))
8929 old_vali = loadedi;
8930 else
8932 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8933 stmt = gimple_build_assign (old_vali, loadedi);
8934 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8936 stmt = gimple_build_assign (loadedi, new_storedi);
8937 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8940 /* Note that we always perform the comparison as an integer, even for
8941 floating point. This allows the atomic operation to properly
8942 succeed even with NaNs and -0.0. */
8943 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8944 stmt = gimple_build_cond_empty (ne);
8945 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8947 /* Update cfg. */
8948 e = single_succ_edge (store_bb);
8949 e->flags &= ~EDGE_FALLTHRU;
8950 e->flags |= EDGE_FALSE_VALUE;
8951 /* Expect no looping. */
8952 e->probability = profile_probability::guessed_always ();
8954 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8955 e->probability = profile_probability::guessed_never ();
8957 /* Copy the new value to loadedi (we already did that before the condition
8958 if we are not in SSA). */
8959 if (gimple_in_ssa_p (cfun))
8961 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8962 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8965 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8966 gsi_remove (&si, true);
8968 class loop *loop = alloc_loop ();
8969 loop->header = loop_header;
8970 loop->latch = store_bb;
8971 add_loop (loop, loop_header->loop_father);
8973 if (gimple_in_ssa_p (cfun))
8974 update_ssa (TODO_update_ssa_no_phi);
8976 return true;
8979 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8981 GOMP_atomic_start ();
8982 *addr = rhs;
8983 GOMP_atomic_end ();
8985 The result is not globally atomic, but works so long as all parallel
8986 references are within #pragma omp atomic directives. According to
8987 responses received from omp@openmp.org, appears to be within spec.
8988 Which makes sense, since that's how several other compilers handle
8989 this situation as well.
8990 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8991 expanding. STORED_VAL is the operand of the matching
8992 GIMPLE_OMP_ATOMIC_STORE.
8994 We replace
8995 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8996 loaded_val = *addr;
8998 and replace
8999 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9000 *addr = stored_val;
9003 static bool
9004 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9005 tree addr, tree loaded_val, tree stored_val)
9007 gimple_stmt_iterator si;
9008 gassign *stmt;
9009 tree t;
9011 si = gsi_last_nondebug_bb (load_bb);
9012 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9014 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9015 t = build_call_expr (t, 0);
9016 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9018 tree mem = build_simple_mem_ref (addr);
9019 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9020 TREE_OPERAND (mem, 1)
9021 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9022 true),
9023 TREE_OPERAND (mem, 1));
9024 stmt = gimple_build_assign (loaded_val, mem);
9025 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9026 gsi_remove (&si, true);
9028 si = gsi_last_nondebug_bb (store_bb);
9029 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9031 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9032 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9034 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9035 t = build_call_expr (t, 0);
9036 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9037 gsi_remove (&si, true);
9039 if (gimple_in_ssa_p (cfun))
9040 update_ssa (TODO_update_ssa_no_phi);
9041 return true;
9044 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9045 using expand_omp_atomic_fetch_op. If it failed, we try to
9046 call expand_omp_atomic_pipeline, and if it fails too, the
9047 ultimate fallback is wrapping the operation in a mutex
9048 (expand_omp_atomic_mutex). REGION is the atomic region built
9049 by build_omp_regions_1(). */
9051 static void
9052 expand_omp_atomic (struct omp_region *region)
9054 basic_block load_bb = region->entry, store_bb = region->exit;
9055 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9056 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9057 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9058 tree addr = gimple_omp_atomic_load_rhs (load);
9059 tree stored_val = gimple_omp_atomic_store_val (store);
9060 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9061 HOST_WIDE_INT index;
9063 /* Make sure the type is one of the supported sizes. */
9064 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9065 index = exact_log2 (index);
9066 if (index >= 0 && index <= 4)
9068 unsigned int align = TYPE_ALIGN_UNIT (type);
9070 /* __sync builtins require strict data alignment. */
9071 if (exact_log2 (align) >= index)
9073 /* Atomic load. */
9074 scalar_mode smode;
9075 if (loaded_val == stored_val
9076 && (is_int_mode (TYPE_MODE (type), &smode)
9077 || is_float_mode (TYPE_MODE (type), &smode))
9078 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9079 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9080 return;
9082 /* Atomic store. */
9083 if ((is_int_mode (TYPE_MODE (type), &smode)
9084 || is_float_mode (TYPE_MODE (type), &smode))
9085 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9086 && store_bb == single_succ (load_bb)
9087 && first_stmt (store_bb) == store
9088 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9089 stored_val, index))
9090 return;
9092 /* When possible, use specialized atomic update functions. */
9093 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9094 && store_bb == single_succ (load_bb)
9095 && expand_omp_atomic_fetch_op (load_bb, addr,
9096 loaded_val, stored_val, index))
9097 return;
9099 /* If we don't have specialized __sync builtins, try and implement
9100 as a compare and swap loop. */
9101 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9102 loaded_val, stored_val, index))
9103 return;
9107 /* The ultimate fallback is wrapping the operation in a mutex. */
9108 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9111 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9112 at REGION_EXIT. */
9114 static void
9115 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9116 basic_block region_exit)
9118 class loop *outer = region_entry->loop_father;
9119 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9121 /* Don't parallelize the kernels region if it contains more than one outer
9122 loop. */
9123 unsigned int nr_outer_loops = 0;
9124 class loop *single_outer = NULL;
9125 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9127 gcc_assert (loop_outer (loop) == outer);
9129 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9130 continue;
9132 if (region_exit != NULL
9133 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9134 continue;
9136 nr_outer_loops++;
9137 single_outer = loop;
9139 if (nr_outer_loops != 1)
9140 return;
9142 for (class loop *loop = single_outer->inner;
9143 loop != NULL;
9144 loop = loop->inner)
9145 if (loop->next)
9146 return;
9148 /* Mark the loops in the region. */
9149 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9150 loop->in_oacc_kernels_region = true;
9153 /* Build target argument identifier from the DEVICE identifier, value
9154 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9156 static tree
9157 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9159 tree t = build_int_cst (integer_type_node, device);
9160 if (subseqent_param)
9161 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9162 build_int_cst (integer_type_node,
9163 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9164 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9165 build_int_cst (integer_type_node, id));
9166 return t;
9169 /* Like above but return it in type that can be directly stored as an element
9170 of the argument array. */
9172 static tree
9173 get_target_argument_identifier (int device, bool subseqent_param, int id)
9175 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9176 return fold_convert (ptr_type_node, t);
9179 /* Return a target argument consisting of DEVICE identifier, value identifier
9180 ID, and the actual VALUE. */
9182 static tree
9183 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9184 tree value)
9186 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9187 fold_convert (integer_type_node, value),
9188 build_int_cst (unsigned_type_node,
9189 GOMP_TARGET_ARG_VALUE_SHIFT));
9190 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9191 get_target_argument_identifier_1 (device, false, id));
9192 t = fold_convert (ptr_type_node, t);
9193 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9196 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9197 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9198 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9199 arguments. */
9201 static void
9202 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9203 int id, tree value, vec <tree> *args)
9205 if (tree_fits_shwi_p (value)
9206 && tree_to_shwi (value) > -(1 << 15)
9207 && tree_to_shwi (value) < (1 << 15))
9208 args->quick_push (get_target_argument_value (gsi, device, id, value));
9209 else
9211 args->quick_push (get_target_argument_identifier (device, true, id));
9212 value = fold_convert (ptr_type_node, value);
9213 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9214 GSI_SAME_STMT);
9215 args->quick_push (value);
9219 /* Create an array of arguments that is then passed to GOMP_target. */
9221 static tree
9222 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9224 auto_vec <tree, 6> args;
9225 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9226 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9227 if (c)
9228 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9229 else
9230 t = integer_minus_one_node;
9231 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9232 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9234 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9235 if (c)
9236 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9237 else
9238 t = integer_minus_one_node;
9239 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9240 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9241 &args);
9243 /* Produce more, perhaps device specific, arguments here. */
9245 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9246 args.length () + 1),
9247 ".omp_target_args");
9248 for (unsigned i = 0; i < args.length (); i++)
9250 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9251 build_int_cst (integer_type_node, i),
9252 NULL_TREE, NULL_TREE);
9253 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9254 GSI_SAME_STMT);
9256 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9257 build_int_cst (integer_type_node, args.length ()),
9258 NULL_TREE, NULL_TREE);
9259 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9260 GSI_SAME_STMT);
9261 TREE_ADDRESSABLE (argarray) = 1;
9262 return build_fold_addr_expr (argarray);
9265 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9267 static void
9268 expand_omp_target (struct omp_region *region)
9270 basic_block entry_bb, exit_bb, new_bb;
9271 struct function *child_cfun;
9272 tree child_fn, block, t;
9273 gimple_stmt_iterator gsi;
9274 gomp_target *entry_stmt;
9275 gimple *stmt;
9276 edge e;
9277 bool offloaded;
9278 int target_kind;
9280 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9281 target_kind = gimple_omp_target_kind (entry_stmt);
9282 new_bb = region->entry;
9284 offloaded = is_gimple_omp_offloaded (entry_stmt);
9285 switch (target_kind)
9287 case GF_OMP_TARGET_KIND_REGION:
9288 case GF_OMP_TARGET_KIND_UPDATE:
9289 case GF_OMP_TARGET_KIND_ENTER_DATA:
9290 case GF_OMP_TARGET_KIND_EXIT_DATA:
9291 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9292 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9293 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9294 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9295 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9296 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9297 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9298 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9299 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9300 case GF_OMP_TARGET_KIND_DATA:
9301 case GF_OMP_TARGET_KIND_OACC_DATA:
9302 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9303 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9304 break;
9305 default:
9306 gcc_unreachable ();
9309 child_fn = NULL_TREE;
9310 child_cfun = NULL;
9311 if (offloaded)
9313 child_fn = gimple_omp_target_child_fn (entry_stmt);
9314 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9317 /* Supported by expand_omp_taskreg, but not here. */
9318 if (child_cfun != NULL)
9319 gcc_checking_assert (!child_cfun->cfg);
9320 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9322 entry_bb = region->entry;
9323 exit_bb = region->exit;
9325 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9326 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9328 /* Going on, all OpenACC compute constructs are mapped to
9329 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9330 To distinguish between them, we attach attributes. */
9331 switch (target_kind)
9333 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9334 DECL_ATTRIBUTES (child_fn)
9335 = tree_cons (get_identifier ("oacc parallel"),
9336 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9337 break;
9338 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9339 DECL_ATTRIBUTES (child_fn)
9340 = tree_cons (get_identifier ("oacc kernels"),
9341 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9342 break;
9343 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9344 DECL_ATTRIBUTES (child_fn)
9345 = tree_cons (get_identifier ("oacc serial"),
9346 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9347 break;
9348 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9349 DECL_ATTRIBUTES (child_fn)
9350 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9351 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9352 break;
9353 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9354 DECL_ATTRIBUTES (child_fn)
9355 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9356 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9357 break;
9358 default:
9359 /* Make sure we don't miss any. */
9360 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9361 && is_gimple_omp_offloaded (entry_stmt)));
9362 break;
9365 if (offloaded)
9367 unsigned srcidx, dstidx, num;
9369 /* If the offloading region needs data sent from the parent
9370 function, then the very first statement (except possible
9371 tree profile counter updates) of the offloading body
9372 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9373 &.OMP_DATA_O is passed as an argument to the child function,
9374 we need to replace it with the argument as seen by the child
9375 function.
9377 In most cases, this will end up being the identity assignment
9378 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9379 a function call that has been inlined, the original PARM_DECL
9380 .OMP_DATA_I may have been converted into a different local
9381 variable. In which case, we need to keep the assignment. */
9382 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9383 if (data_arg)
9385 basic_block entry_succ_bb = single_succ (entry_bb);
9386 gimple_stmt_iterator gsi;
9387 tree arg;
9388 gimple *tgtcopy_stmt = NULL;
9389 tree sender = TREE_VEC_ELT (data_arg, 0);
9391 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9393 gcc_assert (!gsi_end_p (gsi));
9394 stmt = gsi_stmt (gsi);
9395 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9396 continue;
9398 if (gimple_num_ops (stmt) == 2)
9400 tree arg = gimple_assign_rhs1 (stmt);
9402 /* We're ignoring the subcode because we're
9403 effectively doing a STRIP_NOPS. */
9405 if (TREE_CODE (arg) == ADDR_EXPR
9406 && TREE_OPERAND (arg, 0) == sender)
9408 tgtcopy_stmt = stmt;
9409 break;
9414 gcc_assert (tgtcopy_stmt != NULL);
9415 arg = DECL_ARGUMENTS (child_fn);
9417 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9418 gsi_remove (&gsi, true);
9421 /* Declare local variables needed in CHILD_CFUN. */
9422 block = DECL_INITIAL (child_fn);
9423 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9424 /* The gimplifier could record temporaries in the offloading block
9425 rather than in containing function's local_decls chain,
9426 which would mean cgraph missed finalizing them. Do it now. */
9427 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9428 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9429 varpool_node::finalize_decl (t);
9430 DECL_SAVED_TREE (child_fn) = NULL;
9431 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9432 gimple_set_body (child_fn, NULL);
9433 TREE_USED (block) = 1;
9435 /* Reset DECL_CONTEXT on function arguments. */
9436 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9437 DECL_CONTEXT (t) = child_fn;
9439 /* Split ENTRY_BB at GIMPLE_*,
9440 so that it can be moved to the child function. */
9441 gsi = gsi_last_nondebug_bb (entry_bb);
9442 stmt = gsi_stmt (gsi);
9443 gcc_assert (stmt
9444 && gimple_code (stmt) == gimple_code (entry_stmt));
9445 e = split_block (entry_bb, stmt);
9446 gsi_remove (&gsi, true);
9447 entry_bb = e->dest;
9448 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9450 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9451 if (exit_bb)
9453 gsi = gsi_last_nondebug_bb (exit_bb);
9454 gcc_assert (!gsi_end_p (gsi)
9455 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9456 stmt = gimple_build_return (NULL);
9457 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9458 gsi_remove (&gsi, true);
9461 /* Move the offloading region into CHILD_CFUN. */
9463 block = gimple_block (entry_stmt);
9465 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9466 if (exit_bb)
9467 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9468 /* When the OMP expansion process cannot guarantee an up-to-date
9469 loop tree arrange for the child function to fixup loops. */
9470 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9471 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9473 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9474 num = vec_safe_length (child_cfun->local_decls);
9475 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9477 t = (*child_cfun->local_decls)[srcidx];
9478 if (DECL_CONTEXT (t) == cfun->decl)
9479 continue;
9480 if (srcidx != dstidx)
9481 (*child_cfun->local_decls)[dstidx] = t;
9482 dstidx++;
9484 if (dstidx != num)
9485 vec_safe_truncate (child_cfun->local_decls, dstidx);
9487 /* Inform the callgraph about the new function. */
9488 child_cfun->curr_properties = cfun->curr_properties;
9489 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9490 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9491 cgraph_node *node = cgraph_node::get_create (child_fn);
9492 node->parallelized_function = 1;
9493 cgraph_node::add_new_function (child_fn, true);
9495 /* Add the new function to the offload table. */
9496 if (ENABLE_OFFLOADING)
9498 if (in_lto_p)
9499 DECL_PRESERVE_P (child_fn) = 1;
9500 vec_safe_push (offload_funcs, child_fn);
9503 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9504 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9506 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9507 fixed in a following pass. */
9508 push_cfun (child_cfun);
9509 if (need_asm)
9510 assign_assembler_name_if_needed (child_fn);
9511 cgraph_edge::rebuild_edges ();
9513 /* Some EH regions might become dead, see PR34608. If
9514 pass_cleanup_cfg isn't the first pass to happen with the
9515 new child, these dead EH edges might cause problems.
9516 Clean them up now. */
9517 if (flag_exceptions)
9519 basic_block bb;
9520 bool changed = false;
9522 FOR_EACH_BB_FN (bb, cfun)
9523 changed |= gimple_purge_dead_eh_edges (bb);
9524 if (changed)
9525 cleanup_tree_cfg ();
9527 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9528 verify_loop_structure ();
9529 pop_cfun ();
9531 if (dump_file && !gimple_in_ssa_p (cfun))
9533 omp_any_child_fn_dumped = true;
9534 dump_function_header (dump_file, child_fn, dump_flags);
9535 dump_function_to_file (child_fn, dump_file, dump_flags);
9538 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9541 /* Emit a library call to launch the offloading region, or do data
9542 transfers. */
9543 tree t1, t2, t3, t4, depend, c, clauses;
9544 enum built_in_function start_ix;
9545 unsigned int flags_i = 0;
9547 switch (gimple_omp_target_kind (entry_stmt))
9549 case GF_OMP_TARGET_KIND_REGION:
9550 start_ix = BUILT_IN_GOMP_TARGET;
9551 break;
9552 case GF_OMP_TARGET_KIND_DATA:
9553 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9554 break;
9555 case GF_OMP_TARGET_KIND_UPDATE:
9556 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9557 break;
9558 case GF_OMP_TARGET_KIND_ENTER_DATA:
9559 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9560 break;
9561 case GF_OMP_TARGET_KIND_EXIT_DATA:
9562 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9563 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9564 break;
9565 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9566 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9567 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9568 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9569 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9570 start_ix = BUILT_IN_GOACC_PARALLEL;
9571 break;
9572 case GF_OMP_TARGET_KIND_OACC_DATA:
9573 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9574 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9575 start_ix = BUILT_IN_GOACC_DATA_START;
9576 break;
9577 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9578 start_ix = BUILT_IN_GOACC_UPDATE;
9579 break;
9580 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9581 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9582 break;
9583 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9584 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9585 break;
9586 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9587 start_ix = BUILT_IN_GOACC_DECLARE;
9588 break;
9589 default:
9590 gcc_unreachable ();
9593 clauses = gimple_omp_target_clauses (entry_stmt);
9595 tree device = NULL_TREE;
9596 location_t device_loc = UNKNOWN_LOCATION;
9597 tree goacc_flags = NULL_TREE;
9598 if (is_gimple_omp_oacc (entry_stmt))
9600 /* By default, no GOACC_FLAGs are set. */
9601 goacc_flags = integer_zero_node;
9603 else
9605 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9606 if (c)
9608 device = OMP_CLAUSE_DEVICE_ID (c);
9609 device_loc = OMP_CLAUSE_LOCATION (c);
9611 else
9613 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9614 library choose). */
9615 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9616 device_loc = gimple_location (entry_stmt);
9619 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9620 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
9621 nowait doesn't appear. */
9622 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
9623 c = NULL;
9624 if (c)
9625 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9628 /* By default, there is no conditional. */
9629 tree cond = NULL_TREE;
9630 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9631 if (c)
9632 cond = OMP_CLAUSE_IF_EXPR (c);
9633 /* If we found the clause 'if (cond)', build:
9634 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9635 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9636 if (cond)
9638 tree *tp;
9639 if (is_gimple_omp_oacc (entry_stmt))
9640 tp = &goacc_flags;
9641 else
9643 /* Ensure 'device' is of the correct type. */
9644 device = fold_convert_loc (device_loc, integer_type_node, device);
9646 tp = &device;
9649 cond = gimple_boolify (cond);
9651 basic_block cond_bb, then_bb, else_bb;
9652 edge e;
9653 tree tmp_var;
9655 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9656 if (offloaded)
9657 e = split_block_after_labels (new_bb);
9658 else
9660 gsi = gsi_last_nondebug_bb (new_bb);
9661 gsi_prev (&gsi);
9662 e = split_block (new_bb, gsi_stmt (gsi));
9664 cond_bb = e->src;
9665 new_bb = e->dest;
9666 remove_edge (e);
9668 then_bb = create_empty_bb (cond_bb);
9669 else_bb = create_empty_bb (then_bb);
9670 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9671 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9673 stmt = gimple_build_cond_empty (cond);
9674 gsi = gsi_last_bb (cond_bb);
9675 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9677 gsi = gsi_start_bb (then_bb);
9678 stmt = gimple_build_assign (tmp_var, *tp);
9679 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9681 gsi = gsi_start_bb (else_bb);
9682 if (is_gimple_omp_oacc (entry_stmt))
9683 stmt = gimple_build_assign (tmp_var,
9684 BIT_IOR_EXPR,
9685 *tp,
9686 build_int_cst (integer_type_node,
9687 GOACC_FLAG_HOST_FALLBACK));
9688 else
9689 stmt = gimple_build_assign (tmp_var,
9690 build_int_cst (integer_type_node,
9691 GOMP_DEVICE_HOST_FALLBACK));
9692 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9696 add_bb_to_loop (then_bb, cond_bb->loop_father);
9697 add_bb_to_loop (else_bb, cond_bb->loop_father);
9698 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9699 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9701 *tp = tmp_var;
9703 gsi = gsi_last_nondebug_bb (new_bb);
9705 else
9707 gsi = gsi_last_nondebug_bb (new_bb);
9709 if (device != NULL_TREE)
9710 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9711 true, GSI_SAME_STMT);
9714 t = gimple_omp_target_data_arg (entry_stmt);
9715 if (t == NULL)
9717 t1 = size_zero_node;
9718 t2 = build_zero_cst (ptr_type_node);
9719 t3 = t2;
9720 t4 = t2;
9722 else
9724 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9725 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9726 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9727 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9728 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9731 gimple *g;
9732 bool tagging = false;
9733 /* The maximum number used by any start_ix, without varargs. */
9734 auto_vec<tree, 11> args;
9735 if (is_gimple_omp_oacc (entry_stmt))
9737 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9738 TREE_TYPE (goacc_flags), goacc_flags);
9739 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9740 NULL_TREE, true,
9741 GSI_SAME_STMT);
9742 args.quick_push (goacc_flags_m);
9744 else
9745 args.quick_push (device);
9746 if (offloaded)
9747 args.quick_push (build_fold_addr_expr (child_fn));
9748 args.quick_push (t1);
9749 args.quick_push (t2);
9750 args.quick_push (t3);
9751 args.quick_push (t4);
9752 switch (start_ix)
9754 case BUILT_IN_GOACC_DATA_START:
9755 case BUILT_IN_GOACC_DECLARE:
9756 case BUILT_IN_GOMP_TARGET_DATA:
9757 break;
9758 case BUILT_IN_GOMP_TARGET:
9759 case BUILT_IN_GOMP_TARGET_UPDATE:
9760 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9761 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9762 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9763 if (c)
9764 depend = OMP_CLAUSE_DECL (c);
9765 else
9766 depend = build_int_cst (ptr_type_node, 0);
9767 args.quick_push (depend);
9768 if (start_ix == BUILT_IN_GOMP_TARGET)
9769 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9770 break;
9771 case BUILT_IN_GOACC_PARALLEL:
9772 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9774 tree dims = NULL_TREE;
9775 unsigned int ix;
9777 /* For serial constructs we set all dimensions to 1. */
9778 for (ix = GOMP_DIM_MAX; ix--;)
9779 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9780 oacc_replace_fn_attrib (child_fn, dims);
9782 else
9783 oacc_set_fn_attrib (child_fn, clauses, &args);
9784 tagging = true;
9785 /* FALLTHRU */
9786 case BUILT_IN_GOACC_ENTER_DATA:
9787 case BUILT_IN_GOACC_EXIT_DATA:
9788 case BUILT_IN_GOACC_UPDATE:
9790 tree t_async = NULL_TREE;
9792 /* If present, use the value specified by the respective
9793 clause, making sure that is of the correct type. */
9794 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9795 if (c)
9796 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9797 integer_type_node,
9798 OMP_CLAUSE_ASYNC_EXPR (c));
9799 else if (!tagging)
9800 /* Default values for t_async. */
9801 t_async = fold_convert_loc (gimple_location (entry_stmt),
9802 integer_type_node,
9803 build_int_cst (integer_type_node,
9804 GOMP_ASYNC_SYNC));
9805 if (tagging && t_async)
9807 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9809 if (TREE_CODE (t_async) == INTEGER_CST)
9811 /* See if we can pack the async arg in to the tag's
9812 operand. */
9813 i_async = TREE_INT_CST_LOW (t_async);
9814 if (i_async < GOMP_LAUNCH_OP_MAX)
9815 t_async = NULL_TREE;
9816 else
9817 i_async = GOMP_LAUNCH_OP_MAX;
9819 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9820 i_async));
9822 if (t_async)
9823 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9824 NULL_TREE, true,
9825 GSI_SAME_STMT));
9827 /* Save the argument index, and ... */
9828 unsigned t_wait_idx = args.length ();
9829 unsigned num_waits = 0;
9830 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9831 if (!tagging || c)
9832 /* ... push a placeholder. */
9833 args.safe_push (integer_zero_node);
9835 for (; c; c = OMP_CLAUSE_CHAIN (c))
9836 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9838 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9839 integer_type_node,
9840 OMP_CLAUSE_WAIT_EXPR (c));
9841 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9842 GSI_SAME_STMT);
9843 args.safe_push (arg);
9844 num_waits++;
9847 if (!tagging || num_waits)
9849 tree len;
9851 /* Now that we know the number, update the placeholder. */
9852 if (tagging)
9853 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9854 else
9855 len = build_int_cst (integer_type_node, num_waits);
9856 len = fold_convert_loc (gimple_location (entry_stmt),
9857 unsigned_type_node, len);
9858 args[t_wait_idx] = len;
9861 break;
9862 default:
9863 gcc_unreachable ();
9865 if (tagging)
9866 /* Push terminal marker - zero. */
9867 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9869 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9870 gimple_set_location (g, gimple_location (entry_stmt));
9871 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9872 if (!offloaded)
9874 g = gsi_stmt (gsi);
9875 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9876 gsi_remove (&gsi, true);
9880 /* Expand the parallel region tree rooted at REGION. Expansion
9881 proceeds in depth-first order. Innermost regions are expanded
9882 first. This way, parallel regions that require a new function to
9883 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9884 internal dependencies in their body. */
9886 static void
9887 expand_omp (struct omp_region *region)
9889 omp_any_child_fn_dumped = false;
9890 while (region)
9892 location_t saved_location;
9893 gimple *inner_stmt = NULL;
9895 /* First, determine whether this is a combined parallel+workshare
9896 region. */
9897 if (region->type == GIMPLE_OMP_PARALLEL)
9898 determine_parallel_type (region);
9900 if (region->type == GIMPLE_OMP_FOR
9901 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9902 inner_stmt = last_stmt (region->inner->entry);
9904 if (region->inner)
9905 expand_omp (region->inner);
9907 saved_location = input_location;
9908 if (gimple_has_location (last_stmt (region->entry)))
9909 input_location = gimple_location (last_stmt (region->entry));
9911 switch (region->type)
9913 case GIMPLE_OMP_PARALLEL:
9914 case GIMPLE_OMP_TASK:
9915 expand_omp_taskreg (region);
9916 break;
9918 case GIMPLE_OMP_FOR:
9919 expand_omp_for (region, inner_stmt);
9920 break;
9922 case GIMPLE_OMP_SECTIONS:
9923 expand_omp_sections (region);
9924 break;
9926 case GIMPLE_OMP_SECTION:
9927 /* Individual omp sections are handled together with their
9928 parent GIMPLE_OMP_SECTIONS region. */
9929 break;
9931 case GIMPLE_OMP_SINGLE:
9932 case GIMPLE_OMP_SCOPE:
9933 expand_omp_single (region);
9934 break;
9936 case GIMPLE_OMP_ORDERED:
9938 gomp_ordered *ord_stmt
9939 = as_a <gomp_ordered *> (last_stmt (region->entry));
9940 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9941 OMP_CLAUSE_DEPEND))
9943 /* We'll expand these when expanding corresponding
9944 worksharing region with ordered(n) clause. */
9945 gcc_assert (region->outer
9946 && region->outer->type == GIMPLE_OMP_FOR);
9947 region->ord_stmt = ord_stmt;
9948 break;
9951 /* FALLTHRU */
9952 case GIMPLE_OMP_MASTER:
9953 case GIMPLE_OMP_MASKED:
9954 case GIMPLE_OMP_TASKGROUP:
9955 case GIMPLE_OMP_CRITICAL:
9956 case GIMPLE_OMP_TEAMS:
9957 expand_omp_synch (region);
9958 break;
9960 case GIMPLE_OMP_ATOMIC_LOAD:
9961 expand_omp_atomic (region);
9962 break;
9964 case GIMPLE_OMP_TARGET:
9965 expand_omp_target (region);
9966 break;
9968 default:
9969 gcc_unreachable ();
9972 input_location = saved_location;
9973 region = region->next;
9975 if (omp_any_child_fn_dumped)
9977 if (dump_file)
9978 dump_function_header (dump_file, current_function_decl, dump_flags);
9979 omp_any_child_fn_dumped = false;
9983 /* Helper for build_omp_regions. Scan the dominator tree starting at
9984 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9985 true, the function ends once a single tree is built (otherwise, whole
9986 forest of OMP constructs may be built). */
9988 static void
9989 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9990 bool single_tree)
9992 gimple_stmt_iterator gsi;
9993 gimple *stmt;
9994 basic_block son;
9996 gsi = gsi_last_nondebug_bb (bb);
9997 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9999 struct omp_region *region;
10000 enum gimple_code code;
10002 stmt = gsi_stmt (gsi);
10003 code = gimple_code (stmt);
10004 if (code == GIMPLE_OMP_RETURN)
10006 /* STMT is the return point out of region PARENT. Mark it
10007 as the exit point and make PARENT the immediately
10008 enclosing region. */
10009 gcc_assert (parent);
10010 region = parent;
10011 region->exit = bb;
10012 parent = parent->outer;
10014 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10016 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10017 GIMPLE_OMP_RETURN, but matches with
10018 GIMPLE_OMP_ATOMIC_LOAD. */
10019 gcc_assert (parent);
10020 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10021 region = parent;
10022 region->exit = bb;
10023 parent = parent->outer;
10025 else if (code == GIMPLE_OMP_CONTINUE)
10027 gcc_assert (parent);
10028 parent->cont = bb;
10030 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10032 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10033 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10035 else
10037 region = new_omp_region (bb, code, parent);
10038 /* Otherwise... */
10039 if (code == GIMPLE_OMP_TARGET)
10041 switch (gimple_omp_target_kind (stmt))
10043 case GF_OMP_TARGET_KIND_REGION:
10044 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10045 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10046 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10047 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10048 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10049 break;
10050 case GF_OMP_TARGET_KIND_UPDATE:
10051 case GF_OMP_TARGET_KIND_ENTER_DATA:
10052 case GF_OMP_TARGET_KIND_EXIT_DATA:
10053 case GF_OMP_TARGET_KIND_DATA:
10054 case GF_OMP_TARGET_KIND_OACC_DATA:
10055 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10056 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10057 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10058 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10059 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10060 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10061 /* ..., other than for those stand-alone directives... */
10062 region = NULL;
10063 break;
10064 default:
10065 gcc_unreachable ();
10068 else if (code == GIMPLE_OMP_ORDERED
10069 && omp_find_clause (gimple_omp_ordered_clauses
10070 (as_a <gomp_ordered *> (stmt)),
10071 OMP_CLAUSE_DEPEND))
10072 /* #pragma omp ordered depend is also just a stand-alone
10073 directive. */
10074 region = NULL;
10075 else if (code == GIMPLE_OMP_TASK
10076 && gimple_omp_task_taskwait_p (stmt))
10077 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10078 region = NULL;
10079 /* ..., this directive becomes the parent for a new region. */
10080 if (region)
10081 parent = region;
10085 if (single_tree && !parent)
10086 return;
10088 for (son = first_dom_son (CDI_DOMINATORS, bb);
10089 son;
10090 son = next_dom_son (CDI_DOMINATORS, son))
10091 build_omp_regions_1 (son, parent, single_tree);
10094 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10095 root_omp_region. */
10097 static void
10098 build_omp_regions_root (basic_block root)
10100 gcc_assert (root_omp_region == NULL);
10101 build_omp_regions_1 (root, NULL, true);
10102 gcc_assert (root_omp_region != NULL);
10105 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10107 void
10108 omp_expand_local (basic_block head)
10110 build_omp_regions_root (head);
10111 if (dump_file && (dump_flags & TDF_DETAILS))
10113 fprintf (dump_file, "\nOMP region tree\n\n");
10114 dump_omp_region (dump_file, root_omp_region, 0);
10115 fprintf (dump_file, "\n");
10118 remove_exit_barriers (root_omp_region);
10119 expand_omp (root_omp_region);
10121 omp_free_regions ();
10124 /* Scan the CFG and build a tree of OMP regions. Return the root of
10125 the OMP region tree. */
10127 static void
10128 build_omp_regions (void)
10130 gcc_assert (root_omp_region == NULL);
10131 calculate_dominance_info (CDI_DOMINATORS);
10132 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10135 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10137 static unsigned int
10138 execute_expand_omp (void)
10140 build_omp_regions ();
10142 if (!root_omp_region)
10143 return 0;
10145 if (dump_file)
10147 fprintf (dump_file, "\nOMP region tree\n\n");
10148 dump_omp_region (dump_file, root_omp_region, 0);
10149 fprintf (dump_file, "\n");
10152 remove_exit_barriers (root_omp_region);
10154 expand_omp (root_omp_region);
10156 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10157 verify_loop_structure ();
10158 cleanup_tree_cfg ();
10160 omp_free_regions ();
10162 return 0;
10165 /* OMP expansion -- the default pass, run before creation of SSA form. */
10167 namespace {
10169 const pass_data pass_data_expand_omp =
10171 GIMPLE_PASS, /* type */
10172 "ompexp", /* name */
10173 OPTGROUP_OMP, /* optinfo_flags */
10174 TV_NONE, /* tv_id */
10175 PROP_gimple_any, /* properties_required */
10176 PROP_gimple_eomp, /* properties_provided */
10177 0, /* properties_destroyed */
10178 0, /* todo_flags_start */
10179 0, /* todo_flags_finish */
10182 class pass_expand_omp : public gimple_opt_pass
10184 public:
10185 pass_expand_omp (gcc::context *ctxt)
10186 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10189 /* opt_pass methods: */
10190 virtual unsigned int execute (function *)
10192 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10193 || flag_openmp_simd != 0)
10194 && !seen_error ());
10196 /* This pass always runs, to provide PROP_gimple_eomp.
10197 But often, there is nothing to do. */
10198 if (!gate)
10199 return 0;
10201 return execute_expand_omp ();
10204 }; // class pass_expand_omp
10206 } // anon namespace
10208 gimple_opt_pass *
10209 make_pass_expand_omp (gcc::context *ctxt)
10211 return new pass_expand_omp (ctxt);
10214 namespace {
10216 const pass_data pass_data_expand_omp_ssa =
10218 GIMPLE_PASS, /* type */
10219 "ompexpssa", /* name */
10220 OPTGROUP_OMP, /* optinfo_flags */
10221 TV_NONE, /* tv_id */
10222 PROP_cfg | PROP_ssa, /* properties_required */
10223 PROP_gimple_eomp, /* properties_provided */
10224 0, /* properties_destroyed */
10225 0, /* todo_flags_start */
10226 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10229 class pass_expand_omp_ssa : public gimple_opt_pass
10231 public:
10232 pass_expand_omp_ssa (gcc::context *ctxt)
10233 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10236 /* opt_pass methods: */
10237 virtual bool gate (function *fun)
10239 return !(fun->curr_properties & PROP_gimple_eomp);
10241 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10242 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10244 }; // class pass_expand_omp_ssa
10246 } // anon namespace
10248 gimple_opt_pass *
10249 make_pass_expand_omp_ssa (gcc::context *ctxt)
10251 return new pass_expand_omp_ssa (ctxt);
10254 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10255 GIMPLE_* codes. */
10257 bool
10258 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10259 int *region_idx)
10261 gimple *last = last_stmt (bb);
10262 enum gimple_code code = gimple_code (last);
10263 struct omp_region *cur_region = *region;
10264 bool fallthru = false;
10266 switch (code)
10268 case GIMPLE_OMP_PARALLEL:
10269 case GIMPLE_OMP_FOR:
10270 case GIMPLE_OMP_SINGLE:
10271 case GIMPLE_OMP_TEAMS:
10272 case GIMPLE_OMP_MASTER:
10273 case GIMPLE_OMP_MASKED:
10274 case GIMPLE_OMP_SCOPE:
10275 case GIMPLE_OMP_TASKGROUP:
10276 case GIMPLE_OMP_CRITICAL:
10277 case GIMPLE_OMP_SECTION:
10278 cur_region = new_omp_region (bb, code, cur_region);
10279 fallthru = true;
10280 break;
10282 case GIMPLE_OMP_TASK:
10283 cur_region = new_omp_region (bb, code, cur_region);
10284 fallthru = true;
10285 if (gimple_omp_task_taskwait_p (last))
10286 cur_region = cur_region->outer;
10287 break;
10289 case GIMPLE_OMP_ORDERED:
10290 cur_region = new_omp_region (bb, code, cur_region);
10291 fallthru = true;
10292 if (omp_find_clause (gimple_omp_ordered_clauses
10293 (as_a <gomp_ordered *> (last)),
10294 OMP_CLAUSE_DEPEND))
10295 cur_region = cur_region->outer;
10296 break;
10298 case GIMPLE_OMP_TARGET:
10299 cur_region = new_omp_region (bb, code, cur_region);
10300 fallthru = true;
10301 switch (gimple_omp_target_kind (last))
10303 case GF_OMP_TARGET_KIND_REGION:
10304 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10305 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10306 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10307 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10308 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10309 break;
10310 case GF_OMP_TARGET_KIND_UPDATE:
10311 case GF_OMP_TARGET_KIND_ENTER_DATA:
10312 case GF_OMP_TARGET_KIND_EXIT_DATA:
10313 case GF_OMP_TARGET_KIND_DATA:
10314 case GF_OMP_TARGET_KIND_OACC_DATA:
10315 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10316 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10317 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10318 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10319 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10320 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10321 cur_region = cur_region->outer;
10322 break;
10323 default:
10324 gcc_unreachable ();
10326 break;
10328 case GIMPLE_OMP_SECTIONS:
10329 cur_region = new_omp_region (bb, code, cur_region);
10330 fallthru = true;
10331 break;
10333 case GIMPLE_OMP_SECTIONS_SWITCH:
10334 fallthru = false;
10335 break;
10337 case GIMPLE_OMP_ATOMIC_LOAD:
10338 case GIMPLE_OMP_ATOMIC_STORE:
10339 fallthru = true;
10340 break;
10342 case GIMPLE_OMP_RETURN:
10343 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10344 somewhere other than the next block. This will be
10345 created later. */
10346 cur_region->exit = bb;
10347 if (cur_region->type == GIMPLE_OMP_TASK)
10348 /* Add an edge corresponding to not scheduling the task
10349 immediately. */
10350 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10351 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10352 cur_region = cur_region->outer;
10353 break;
10355 case GIMPLE_OMP_CONTINUE:
10356 cur_region->cont = bb;
10357 switch (cur_region->type)
10359 case GIMPLE_OMP_FOR:
10360 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10361 succs edges as abnormal to prevent splitting
10362 them. */
10363 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10364 /* Make the loopback edge. */
10365 make_edge (bb, single_succ (cur_region->entry),
10366 EDGE_ABNORMAL);
10368 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10369 corresponds to the case that the body of the loop
10370 is not executed at all. */
10371 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10372 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10373 fallthru = false;
10374 break;
10376 case GIMPLE_OMP_SECTIONS:
10377 /* Wire up the edges into and out of the nested sections. */
10379 basic_block switch_bb = single_succ (cur_region->entry);
10381 struct omp_region *i;
10382 for (i = cur_region->inner; i ; i = i->next)
10384 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10385 make_edge (switch_bb, i->entry, 0);
10386 make_edge (i->exit, bb, EDGE_FALLTHRU);
10389 /* Make the loopback edge to the block with
10390 GIMPLE_OMP_SECTIONS_SWITCH. */
10391 make_edge (bb, switch_bb, 0);
10393 /* Make the edge from the switch to exit. */
10394 make_edge (switch_bb, bb->next_bb, 0);
10395 fallthru = false;
10397 break;
10399 case GIMPLE_OMP_TASK:
10400 fallthru = true;
10401 break;
10403 default:
10404 gcc_unreachable ();
10406 break;
10408 default:
10409 gcc_unreachable ();
10412 if (*region != cur_region)
10414 *region = cur_region;
10415 if (cur_region)
10416 *region_idx = cur_region->entry->index;
10417 else
10418 *region_idx = 0;
10421 return fallthru;