testsuite: localclass2 require LTO
[official-gcc.git] / gcc / omp-expand.c
blobc0e94e5e323690842793f422eceb94cf02ca8790
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
120 /* Return true if REGION is a combined parallel+workshare region. */
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
125 return region->is_combined_parallel;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
142 Is lowered into:
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
201 return true;
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237 struct omp_for_data fd;
238 tree n1, n2;
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
244 if (gimple_omp_for_combined_into_p (for_stmt))
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
268 if (fd.chunk_size)
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
275 return ws_args;
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
289 gcc_unreachable ();
292 /* Discover whether REGION is a combined parallel+workshare region. */
294 static void
295 determine_parallel_type (struct omp_region *region)
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
335 if (region->inner->type == GIMPLE_OMP_FOR)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
386 if (region->cont)
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
405 dump_omp_region (stderr, region, 0);
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr, root_omp_region, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
420 struct omp_region *region = XCNEW (struct omp_region);
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
426 if (parent)
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
433 else
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
441 return region;
444 /* Release the memory associated with the region tree rooted at REGION. */
446 static void
447 free_omp_region_1 (struct omp_region *region)
449 struct omp_region *i, *n;
451 for (i = region->inner; i ; i = n)
453 n = i->next;
454 free_omp_region_1 (i);
457 free (region);
460 /* Release the memory for the entire omp region tree. */
462 void
463 omp_free_regions (void)
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
468 n = r->next;
469 free_omp_region_1 (r);
471 root_omp_region = NULL;
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
477 static gcond *
478 gimple_build_cond_empty (tree cond)
480 enum tree_code pred_code;
481 tree lhs, rhs;
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
570 switch (region->inner->type)
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
631 else
632 clause_loc = gimple_location (entry_stmt);
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
645 cond = gimple_boolify (cond);
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
664 else
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 if (gimple_in_ssa_p (cfun))
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 val = tmp_join;
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
735 if (rtmp)
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 unsigned int iflags
767 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
768 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
769 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
771 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
772 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
773 tree num_tasks = NULL_TREE;
774 bool ull = false;
775 if (taskloop_p)
777 gimple *g = last_stmt (region->outer->entry);
778 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
780 struct omp_for_data fd;
781 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
782 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
783 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
784 OMP_CLAUSE__LOOPTEMP_);
785 startvar = OMP_CLAUSE_DECL (startvar);
786 endvar = OMP_CLAUSE_DECL (endvar);
787 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
788 if (fd.loop.cond_code == LT_EXPR)
789 iflags |= GOMP_TASK_FLAG_UP;
790 tree tclauses = gimple_omp_for_clauses (g);
791 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
792 if (num_tasks)
793 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
794 else
796 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
797 if (num_tasks)
799 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
800 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
802 else
803 num_tasks = integer_zero_node;
805 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
806 if (ifc == NULL_TREE)
807 iflags |= GOMP_TASK_FLAG_IF;
808 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
809 iflags |= GOMP_TASK_FLAG_NOGROUP;
810 ull = fd.iter_type == long_long_unsigned_type_node;
811 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
812 iflags |= GOMP_TASK_FLAG_REDUCTION;
814 else if (priority)
815 iflags |= GOMP_TASK_FLAG_PRIORITY;
817 tree flags = build_int_cst (unsigned_type_node, iflags);
819 tree cond = boolean_true_node;
820 if (ifc)
822 if (taskloop_p)
824 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
825 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
826 build_int_cst (unsigned_type_node,
827 GOMP_TASK_FLAG_IF),
828 build_int_cst (unsigned_type_node, 0));
829 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
830 flags, t);
832 else
833 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
836 if (finalc)
838 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_FINAL),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
845 if (depend)
846 depend = OMP_CLAUSE_DECL (depend);
847 else
848 depend = build_int_cst (ptr_type_node, 0);
849 if (priority)
850 priority = fold_convert (integer_type_node,
851 OMP_CLAUSE_PRIORITY_EXPR (priority));
852 else
853 priority = integer_zero_node;
855 gsi = gsi_last_nondebug_bb (bb);
856 tree t = gimple_omp_task_data_arg (entry_stmt);
857 if (t == NULL)
858 t2 = null_pointer_node;
859 else
860 t2 = build_fold_addr_expr_loc (loc, t);
861 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
862 t = gimple_omp_task_copy_fn (entry_stmt);
863 if (t == NULL)
864 t3 = null_pointer_node;
865 else
866 t3 = build_fold_addr_expr_loc (loc, t);
868 if (taskloop_p)
869 t = build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
872 11, t1, t2, t3,
873 gimple_omp_task_arg_size (entry_stmt),
874 gimple_omp_task_arg_align (entry_stmt), flags,
875 num_tasks, priority, startvar, endvar, step);
876 else
877 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
878 9, t1, t2, t3,
879 gimple_omp_task_arg_size (entry_stmt),
880 gimple_omp_task_arg_align (entry_stmt), cond, flags,
881 depend, priority);
883 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
889 code. */
891 static void
892 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
894 tree clauses = gimple_omp_task_clauses (entry_stmt);
895 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
896 if (depend == NULL_TREE)
897 return;
899 depend = OMP_CLAUSE_DECL (depend);
901 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
902 tree t
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
904 1, depend);
906 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
907 false, GSI_CONTINUE_LINKING);
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
914 static void
915 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
917 tree clauses = gimple_omp_teams_clauses (entry_stmt);
918 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
919 if (num_teams == NULL_TREE)
920 num_teams = build_int_cst (unsigned_type_node, 0);
921 else
923 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
924 num_teams = fold_convert (unsigned_type_node, num_teams);
926 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
927 if (thread_limit == NULL_TREE)
928 thread_limit = build_int_cst (unsigned_type_node, 0);
929 else
931 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
932 thread_limit = fold_convert (unsigned_type_node, thread_limit);
935 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
936 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
937 if (t == NULL)
938 t1 = null_pointer_node;
939 else
940 t1 = build_fold_addr_expr (t);
941 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
942 tree t2 = build_fold_addr_expr (child_fndecl);
944 vec<tree, va_gc> *args;
945 vec_alloc (args, 5);
946 args->quick_push (t2);
947 args->quick_push (t1);
948 args->quick_push (num_teams);
949 args->quick_push (thread_limit);
950 /* For future extensibility. */
951 args->quick_push (build_zero_cst (unsigned_type_node));
953 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
955 args);
957 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
958 false, GSI_CONTINUE_LINKING);
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
963 static tree
964 vec2chain (vec<tree, va_gc> *v)
966 tree chain = NULL_TREE, t;
967 unsigned ix;
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
971 DECL_CHAIN (t) = chain;
972 chain = t;
975 return chain;
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
982 removed. */
984 static void
985 remove_exit_barrier (struct omp_region *region)
987 gimple_stmt_iterator gsi;
988 basic_block exit_bb;
989 edge_iterator ei;
990 edge e;
991 gimple *stmt;
992 int any_addressable_vars = -1;
994 exit_bb = region->exit;
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
997 block at all. */
998 if (! exit_bb)
999 return;
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi = gsi_last_nondebug_bb (exit_bb);
1007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1008 gsi_prev_nondebug (&gsi);
1009 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1010 return;
1012 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1014 gsi = gsi_last_nondebug_bb (e->src);
1015 if (gsi_end_p (gsi))
1016 continue;
1017 stmt = gsi_stmt (gsi);
1018 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt))
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars < 0)
1031 gomp_parallel *parallel_stmt
1032 = as_a <gomp_parallel *> (last_stmt (region->entry));
1033 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1034 tree local_decls, block, decl;
1035 unsigned ix;
1037 any_addressable_vars = 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1039 if (TREE_ADDRESSABLE (decl))
1041 any_addressable_vars = 1;
1042 break;
1044 for (block = gimple_block (stmt);
1045 !any_addressable_vars
1046 && block
1047 && TREE_CODE (block) == BLOCK;
1048 block = BLOCK_SUPERCONTEXT (block))
1050 for (local_decls = BLOCK_VARS (block);
1051 local_decls;
1052 local_decls = DECL_CHAIN (local_decls))
1053 if (TREE_ADDRESSABLE (local_decls))
1055 any_addressable_vars = 1;
1056 break;
1058 if (block == gimple_block (parallel_stmt))
1059 break;
1062 if (!any_addressable_vars)
1063 gimple_omp_return_set_nowait (stmt);
1068 static void
1069 remove_exit_barriers (struct omp_region *region)
1071 if (region->type == GIMPLE_OMP_PARALLEL)
1072 remove_exit_barrier (region);
1074 if (region->inner)
1076 region = region->inner;
1077 remove_exit_barriers (region);
1078 while (region->next)
1080 region = region->next;
1081 remove_exit_barriers (region);
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1094 static void
1095 optimize_omp_library_calls (gimple *entry_stmt)
1097 basic_block bb;
1098 gimple_stmt_iterator gsi;
1099 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1100 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1101 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1102 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1103 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1105 OMP_CLAUSE_UNTIED) != NULL);
1107 FOR_EACH_BB_FN (bb, cfun)
1108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1110 gimple *call = gsi_stmt (gsi);
1111 tree decl;
1113 if (is_gimple_call (call)
1114 && (decl = gimple_call_fndecl (call))
1115 && DECL_EXTERNAL (decl)
1116 && TREE_PUBLIC (decl)
1117 && DECL_INITIAL (decl) == NULL)
1119 tree built_in;
1121 if (DECL_NAME (decl) == thr_num_id)
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1125 if (untied_task)
1126 continue;
1127 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1129 else if (DECL_NAME (decl) == num_thr_id)
1130 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 else
1132 continue;
1134 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1135 || gimple_call_num_args (call) != 0)
1136 continue;
1138 if (flag_exceptions && !TREE_NOTHROW (decl))
1139 continue;
1141 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1143 TREE_TYPE (TREE_TYPE (built_in))))
1144 continue;
1146 gimple_call_set_fndecl (call, built_in);
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1152 regimplified. */
1154 static tree
1155 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1157 tree t = *tp;
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1161 return t;
1163 if (TREE_CODE (t) == ADDR_EXPR)
1164 recompute_tree_invariant_for_addr_expr (t);
1166 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1167 return NULL_TREE;
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1172 static void
1173 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1174 bool after)
1176 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1177 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1178 !after, after ? GSI_CONTINUE_LINKING
1179 : GSI_SAME_STMT);
1180 gimple *stmt = gimple_build_assign (to, from);
1181 if (after)
1182 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1183 else
1184 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1185 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1186 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1189 gimple_regimplify_operands (stmt, &gsi);
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1195 static void
1196 expand_omp_taskreg (struct omp_region *region)
1198 basic_block entry_bb, exit_bb, new_bb;
1199 struct function *child_cfun;
1200 tree child_fn, block, t;
1201 gimple_stmt_iterator gsi;
1202 gimple *entry_stmt, *stmt;
1203 edge e;
1204 vec<tree, va_gc> *ws_args;
1206 entry_stmt = last_stmt (region->entry);
1207 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt))
1210 new_bb = region->entry;
1211 gsi = gsi_last_nondebug_bb (region->entry);
1212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1213 gsi_remove (&gsi, true);
1214 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1215 return;
1218 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1219 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1221 entry_bb = region->entry;
1222 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1223 exit_bb = region->cont;
1224 else
1225 exit_bb = region->exit;
1227 if (is_combined_parallel (region))
1228 ws_args = region->ws_args;
1229 else
1230 ws_args = NULL;
1232 if (child_cfun->cfg)
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e, exit_succ_e;
1239 entry_succ_e = single_succ_edge (entry_bb);
1241 gsi = gsi_last_nondebug_bb (entry_bb);
1242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1245 gsi_remove (&gsi, true);
1247 new_bb = entry_bb;
1248 if (exit_bb)
1250 exit_succ_e = single_succ_edge (exit_bb);
1251 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1253 remove_edge_and_dominated_blocks (entry_succ_e);
1255 else
1257 unsigned srcidx, dstidx, num;
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1265 function.
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt))
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1276 : FALLTHRU_EDGE (entry_bb)->dest;
1277 tree arg;
1278 gimple *parcopy_stmt = NULL;
1280 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1282 gimple *stmt;
1284 gcc_assert (!gsi_end_p (gsi));
1285 stmt = gsi_stmt (gsi);
1286 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1287 continue;
1289 if (gimple_num_ops (stmt) == 2)
1291 tree arg = gimple_assign_rhs1 (stmt);
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1296 if (TREE_CODE (arg) == ADDR_EXPR
1297 && (TREE_OPERAND (arg, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt)))
1300 parcopy_stmt = stmt;
1301 break;
1306 gcc_assert (parcopy_stmt != NULL);
1307 arg = DECL_ARGUMENTS (child_fn);
1309 if (!gimple_in_ssa_p (cfun))
1311 if (gimple_assign_lhs (parcopy_stmt) == arg)
1312 gsi_remove (&gsi, true);
1313 else
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1317 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1320 else
1322 tree lhs = gimple_assign_lhs (parcopy_stmt);
1323 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 update_stmt (parcopy_stmt);
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block = DECL_INITIAL (child_fn);
1337 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1342 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1343 varpool_node::finalize_decl (t);
1344 DECL_SAVED_TREE (child_fn) = NULL;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn, NULL);
1347 TREE_USED (block) = 1;
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1351 DECL_CONTEXT (t) = child_fn;
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi = gsi_last_nondebug_bb (entry_bb);
1356 stmt = gsi_stmt (gsi);
1357 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1360 e = split_block (entry_bb, stmt);
1361 gsi_remove (&gsi, true);
1362 entry_bb = e->dest;
1363 edge e2 = NULL;
1364 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1365 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1366 else
1368 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1369 gcc_assert (e2->dest == region->exit);
1370 remove_edge (BRANCH_EDGE (entry_bb));
1371 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1372 gsi = gsi_last_nondebug_bb (region->exit);
1373 gcc_assert (!gsi_end_p (gsi)
1374 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1375 gsi_remove (&gsi, true);
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1379 if (exit_bb)
1381 gsi = gsi_last_nondebug_bb (exit_bb);
1382 gcc_assert (!gsi_end_p (gsi)
1383 && (gimple_code (gsi_stmt (gsi))
1384 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1385 stmt = gimple_build_return (NULL);
1386 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1387 gsi_remove (&gsi, true);
1390 /* Move the parallel region into CHILD_CFUN. */
1392 if (gimple_in_ssa_p (cfun))
1394 init_tree_ssa (child_cfun);
1395 init_ssa_operands (child_cfun);
1396 child_cfun->gimple_df->in_ssa_p = true;
1397 block = NULL_TREE;
1399 else
1400 block = gimple_block (entry_stmt);
1402 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1403 if (exit_bb)
1404 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1405 if (e2)
1407 basic_block dest_bb = e2->dest;
1408 if (!exit_bb)
1409 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1410 remove_edge (e2);
1411 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1416 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num = vec_safe_length (child_cfun->local_decls);
1420 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1422 t = (*child_cfun->local_decls)[srcidx];
1423 if (DECL_CONTEXT (t) == cfun->decl)
1424 continue;
1425 if (srcidx != dstidx)
1426 (*child_cfun->local_decls)[dstidx] = t;
1427 dstidx++;
1429 if (dstidx != num)
1430 vec_safe_truncate (child_cfun->local_decls, dstidx);
1432 /* Inform the callgraph about the new function. */
1433 child_cfun->curr_properties = cfun->curr_properties;
1434 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1435 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1436 cgraph_node *node = cgraph_node::get_create (child_fn);
1437 node->parallelized_function = 1;
1438 cgraph_node::add_new_function (child_fn, true);
1440 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun);
1446 if (need_asm)
1447 assign_assembler_name_if_needed (child_fn);
1449 if (optimize)
1450 optimize_omp_library_calls (entry_stmt);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions)
1460 basic_block bb;
1461 bool changed = false;
1463 FOR_EACH_BB_FN (bb, cfun)
1464 changed |= gimple_purge_dead_eh_edges (bb);
1465 if (changed)
1466 cleanup_tree_cfg ();
1468 if (gimple_in_ssa_p (cfun))
1469 update_ssa (TODO_update_ssa);
1470 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1471 verify_loop_structure ();
1472 pop_cfun ();
1474 if (dump_file && !gimple_in_ssa_p (cfun))
1476 omp_any_child_fn_dumped = true;
1477 dump_function_header (dump_file, child_fn, dump_flags);
1478 dump_function_to_file (child_fn, dump_file, dump_flags);
1482 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1484 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1485 expand_parallel_call (region, new_bb,
1486 as_a <gomp_parallel *> (entry_stmt), ws_args);
1487 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1488 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1489 else
1490 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1491 if (gimple_in_ssa_p (cfun))
1492 update_ssa (TODO_update_ssa_only_virtuals);
1495 /* Information about members of an OpenACC collapsed loop nest. */
1497 struct oacc_collapse
1499 tree base; /* Base value. */
1500 tree iters; /* Number of steps. */
1501 tree step; /* Step size. */
1502 tree tile; /* Tile increment (if tiled). */
1503 tree outer; /* Tile iterator var. */
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1510 static tree
1511 expand_oacc_collapse_init (const struct omp_for_data *fd,
1512 gimple_stmt_iterator *gsi,
1513 oacc_collapse *counts, tree bound_type,
1514 location_t loc)
1516 tree tiling = fd->tiling;
1517 tree total = build_int_cst (bound_type, 1);
1518 int ix;
1520 gcc_assert (integer_onep (fd->loop.step));
1521 gcc_assert (integer_zerop (fd->loop.n1));
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix = fd->collapse; ix--;)
1528 const omp_for_data_loop *loop = &fd->loops[ix];
1530 tree iter_type = TREE_TYPE (loop->v);
1531 tree diff_type = iter_type;
1532 tree plus_type = iter_type;
1534 gcc_assert (loop->cond_code == fd->loop.cond_code);
1536 if (POINTER_TYPE_P (iter_type))
1537 plus_type = sizetype;
1538 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1539 diff_type = signed_type_for (diff_type);
1540 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1541 diff_type = integer_type_node;
1543 if (tiling)
1545 tree num = build_int_cst (integer_type_node, fd->collapse);
1546 tree loop_no = build_int_cst (integer_type_node, ix);
1547 tree tile = TREE_VALUE (tiling);
1548 gcall *call
1549 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1550 /* gwv-outer=*/integer_zero_node,
1551 /* gwv-inner=*/integer_zero_node);
1553 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1554 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1555 gimple_call_set_lhs (call, counts[ix].tile);
1556 gimple_set_location (call, loc);
1557 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1559 tiling = TREE_CHAIN (tiling);
1561 else
1563 counts[ix].tile = NULL;
1564 counts[ix].outer = loop->v;
1567 tree b = loop->n1;
1568 tree e = loop->n2;
1569 tree s = loop->step;
1570 bool up = loop->cond_code == LT_EXPR;
1571 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1572 bool negating;
1573 tree expr;
1575 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1576 true, GSI_SAME_STMT);
1577 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1578 true, GSI_SAME_STMT);
1580 /* Convert the step, avoiding possible unsigned->signed overflow. */
1581 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1582 if (negating)
1583 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1584 s = fold_convert (diff_type, s);
1585 if (negating)
1586 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1587 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1590 /* Determine the range, avoiding possible unsigned->signed overflow. */
1591 negating = !up && TYPE_UNSIGNED (iter_type);
1592 expr = fold_build2 (MINUS_EXPR, plus_type,
1593 fold_convert (plus_type, negating ? b : e),
1594 fold_convert (plus_type, negating ? e : b));
1595 expr = fold_convert (diff_type, expr);
1596 if (negating)
1597 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1598 tree range = force_gimple_operand_gsi
1599 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1601 /* Determine number of iterations. */
1602 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1603 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1604 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1606 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1609 counts[ix].base = b;
1610 counts[ix].iters = iters;
1611 counts[ix].step = s;
1613 total = fold_build2 (MULT_EXPR, bound_type, total,
1614 fold_convert (bound_type, iters));
1617 return total;
1620 /* Emit initializers for collapsed loop members. INNER is true if
1621 this is for the element loop of a TILE. IVAR is the outer
1622 loop iteration variable, from which collapsed loop iteration values
1623 are calculated. COUNTS array has been initialized by
1624 expand_oacc_collapse_inits. */
1626 static void
1627 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1628 gimple_stmt_iterator *gsi,
1629 const oacc_collapse *counts, tree ivar)
1631 tree ivar_type = TREE_TYPE (ivar);
1633 /* The most rapidly changing iteration variable is the innermost
1634 one. */
1635 for (int ix = fd->collapse; ix--;)
1637 const omp_for_data_loop *loop = &fd->loops[ix];
1638 const oacc_collapse *collapse = &counts[ix];
1639 tree v = inner ? loop->v : collapse->outer;
1640 tree iter_type = TREE_TYPE (v);
1641 tree diff_type = TREE_TYPE (collapse->step);
1642 tree plus_type = iter_type;
1643 enum tree_code plus_code = PLUS_EXPR;
1644 tree expr;
1646 if (POINTER_TYPE_P (iter_type))
1648 plus_code = POINTER_PLUS_EXPR;
1649 plus_type = sizetype;
1652 expr = ivar;
1653 if (ix)
1655 tree mod = fold_convert (ivar_type, collapse->iters);
1656 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1657 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1658 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1659 true, GSI_SAME_STMT);
1662 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1663 collapse->step);
1664 expr = fold_build2 (plus_code, iter_type,
1665 inner ? collapse->outer : collapse->base,
1666 fold_convert (plus_type, expr));
1667 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1668 true, GSI_SAME_STMT);
1669 gassign *ass = gimple_build_assign (v, expr);
1670 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1674 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1675 of the combined collapse > 1 loop constructs, generate code like:
1676 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1677 if (cond3 is <)
1678 adj = STEP3 - 1;
1679 else
1680 adj = STEP3 + 1;
1681 count3 = (adj + N32 - N31) / STEP3;
1682 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1683 if (cond2 is <)
1684 adj = STEP2 - 1;
1685 else
1686 adj = STEP2 + 1;
1687 count2 = (adj + N22 - N21) / STEP2;
1688 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1689 if (cond1 is <)
1690 adj = STEP1 - 1;
1691 else
1692 adj = STEP1 + 1;
1693 count1 = (adj + N12 - N11) / STEP1;
1694 count = count1 * count2 * count3;
1695 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1696 count = 0;
1697 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1698 of the combined loop constructs, just initialize COUNTS array
1699 from the _looptemp_ clauses. For loop nests with non-rectangular
1700 loops, do this only for the rectangular loops. Then pick
1701 the loops which reference outer vars in their bound expressions
1702 and the loops which they refer to and for this sub-nest compute
1703 number of iterations. For triangular loops use Faulhaber's formula,
1704 otherwise as a fallback, compute by iterating the loops.
1705 If e.g. the sub-nest is
1706 for (I = N11; I COND1 N12; I += STEP1)
1707 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1708 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1710 COUNT = 0;
1711 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1712 for (tmpj = M21 * tmpi + N21;
1713 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1715 int tmpk1 = M31 * tmpj + N31;
1716 int tmpk2 = M32 * tmpj + N32;
1717 if (tmpk1 COND3 tmpk2)
1719 if (COND3 is <)
1720 adj = STEP3 - 1;
1721 else
1722 adj = STEP3 + 1;
1723 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1726 and finally multiply the counts of the rectangular loops not
1727 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1728 store number of iterations of the loops from fd->first_nonrect
1729 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1730 by the counts of rectangular loops not referenced in any non-rectangular
1731 loops sandwitched in between those. */
1733 /* NOTE: It *could* be better to moosh all of the BBs together,
1734 creating one larger BB with all the computation and the unexpected
1735 jump at the end. I.e.
1737 bool zero3, zero2, zero1, zero;
1739 zero3 = N32 c3 N31;
1740 count3 = (N32 - N31) /[cl] STEP3;
1741 zero2 = N22 c2 N21;
1742 count2 = (N22 - N21) /[cl] STEP2;
1743 zero1 = N12 c1 N11;
1744 count1 = (N12 - N11) /[cl] STEP1;
1745 zero = zero3 || zero2 || zero1;
1746 count = count1 * count2 * count3;
1747 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749 After all, we expect the zero=false, and thus we expect to have to
1750 evaluate all of the comparison expressions, so short-circuiting
1751 oughtn't be a win. Since the condition isn't protecting a
1752 denominator, we're not concerned about divide-by-zero, so we can
1753 fully evaluate count even if a numerator turned out to be wrong.
1755 It seems like putting this all together would create much better
1756 scheduling opportunities, and less pressure on the chip's branch
1757 predictor. */
1759 static void
1760 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1761 basic_block &entry_bb, tree *counts,
1762 basic_block &zero_iter1_bb, int &first_zero_iter1,
1763 basic_block &zero_iter2_bb, int &first_zero_iter2,
1764 basic_block &l2_dom_bb)
1766 tree t, type = TREE_TYPE (fd->loop.v);
1767 edge e, ne;
1768 int i;
1770 /* Collapsed loops need work for expansion into SSA form. */
1771 gcc_assert (!gimple_in_ssa_p (cfun));
1773 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1774 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1776 gcc_assert (fd->ordered == 0);
1777 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1778 isn't supposed to be handled, as the inner loop doesn't
1779 use it. */
1780 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1781 OMP_CLAUSE__LOOPTEMP_);
1782 gcc_assert (innerc);
1783 for (i = 0; i < fd->collapse; i++)
1785 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 if (i)
1789 counts[i] = OMP_CLAUSE_DECL (innerc);
1790 else
1791 counts[0] = NULL_TREE;
1793 if (fd->non_rect
1794 && fd->last_nonrect == fd->first_nonrect + 1
1795 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1797 tree c[4];
1798 for (i = 0; i < 4; i++)
1800 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1801 OMP_CLAUSE__LOOPTEMP_);
1802 gcc_assert (innerc);
1803 c[i] = OMP_CLAUSE_DECL (innerc);
1805 counts[0] = c[0];
1806 fd->first_inner_iterations = c[1];
1807 fd->factor = c[2];
1808 fd->adjn1 = c[3];
1810 return;
1813 for (i = fd->collapse; i < fd->ordered; i++)
1815 tree itype = TREE_TYPE (fd->loops[i].v);
1816 counts[i] = NULL_TREE;
1817 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1818 fold_convert (itype, fd->loops[i].n1),
1819 fold_convert (itype, fd->loops[i].n2));
1820 if (t && integer_zerop (t))
1822 for (i = fd->collapse; i < fd->ordered; i++)
1823 counts[i] = build_int_cst (type, 0);
1824 break;
1827 bool rect_count_seen = false;
1828 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1830 tree itype = TREE_TYPE (fd->loops[i].v);
1832 if (i >= fd->collapse && counts[i])
1833 continue;
1834 if (fd->non_rect)
1836 /* Skip loops that use outer iterators in their expressions
1837 during this phase. */
1838 if (fd->loops[i].m1 || fd->loops[i].m2)
1840 counts[i] = build_zero_cst (type);
1841 continue;
1844 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1845 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1846 fold_convert (itype, fd->loops[i].n1),
1847 fold_convert (itype, fd->loops[i].n2)))
1848 == NULL_TREE || !integer_onep (t)))
1850 gcond *cond_stmt;
1851 tree n1, n2;
1852 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1853 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1854 true, GSI_SAME_STMT);
1855 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1856 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1857 true, GSI_SAME_STMT);
1858 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1859 NULL_TREE, NULL_TREE);
1860 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1861 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1862 expand_omp_regimplify_p, NULL, NULL)
1863 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1864 expand_omp_regimplify_p, NULL, NULL))
1866 *gsi = gsi_for_stmt (cond_stmt);
1867 gimple_regimplify_operands (cond_stmt, gsi);
1869 e = split_block (entry_bb, cond_stmt);
1870 basic_block &zero_iter_bb
1871 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1872 int &first_zero_iter
1873 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1874 if (zero_iter_bb == NULL)
1876 gassign *assign_stmt;
1877 first_zero_iter = i;
1878 zero_iter_bb = create_empty_bb (entry_bb);
1879 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1880 *gsi = gsi_after_labels (zero_iter_bb);
1881 if (i < fd->collapse)
1882 assign_stmt = gimple_build_assign (fd->loop.n2,
1883 build_zero_cst (type));
1884 else
1886 counts[i] = create_tmp_reg (type, ".count");
1887 assign_stmt
1888 = gimple_build_assign (counts[i], build_zero_cst (type));
1890 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1891 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1892 entry_bb);
1894 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1895 ne->probability = profile_probability::very_unlikely ();
1896 e->flags = EDGE_TRUE_VALUE;
1897 e->probability = ne->probability.invert ();
1898 if (l2_dom_bb == NULL)
1899 l2_dom_bb = entry_bb;
1900 entry_bb = e->dest;
1901 *gsi = gsi_last_nondebug_bb (entry_bb);
1904 if (POINTER_TYPE_P (itype))
1905 itype = signed_type_for (itype);
1906 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1907 ? -1 : 1));
1908 t = fold_build2 (PLUS_EXPR, itype,
1909 fold_convert (itype, fd->loops[i].step), t);
1910 t = fold_build2 (PLUS_EXPR, itype, t,
1911 fold_convert (itype, fd->loops[i].n2));
1912 t = fold_build2 (MINUS_EXPR, itype, t,
1913 fold_convert (itype, fd->loops[i].n1));
1914 /* ?? We could probably use CEIL_DIV_EXPR instead of
1915 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1916 generate the same code in the end because generically we
1917 don't know that the values involved must be negative for
1918 GT?? */
1919 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1920 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1921 fold_build1 (NEGATE_EXPR, itype, t),
1922 fold_build1 (NEGATE_EXPR, itype,
1923 fold_convert (itype,
1924 fd->loops[i].step)));
1925 else
1926 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1927 fold_convert (itype, fd->loops[i].step));
1928 t = fold_convert (type, t);
1929 if (TREE_CODE (t) == INTEGER_CST)
1930 counts[i] = t;
1931 else
1933 if (i < fd->collapse || i != first_zero_iter2)
1934 counts[i] = create_tmp_reg (type, ".count");
1935 expand_omp_build_assign (gsi, counts[i], t);
1937 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1939 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1940 continue;
1941 if (!rect_count_seen)
1943 t = counts[i];
1944 rect_count_seen = true;
1946 else
1947 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1948 expand_omp_build_assign (gsi, fd->loop.n2, t);
1951 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1953 gcc_assert (fd->last_nonrect != -1);
1955 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1956 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1957 build_zero_cst (type));
1958 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1959 if (fd->loops[i].m1
1960 || fd->loops[i].m2
1961 || fd->loops[i].non_rect_referenced)
1962 break;
1963 if (i == fd->last_nonrect
1964 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1965 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1967 int o = fd->first_nonrect;
1968 tree itype = TREE_TYPE (fd->loops[o].v);
1969 tree n1o = create_tmp_reg (itype, ".n1o");
1970 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1971 expand_omp_build_assign (gsi, n1o, t);
1972 tree n2o = create_tmp_reg (itype, ".n2o");
1973 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1974 expand_omp_build_assign (gsi, n2o, t);
1975 if (fd->loops[i].m1 && fd->loops[i].m2)
1976 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1977 unshare_expr (fd->loops[i].m1));
1978 else if (fd->loops[i].m1)
1979 t = fold_unary (NEGATE_EXPR, itype,
1980 unshare_expr (fd->loops[i].m1));
1981 else
1982 t = unshare_expr (fd->loops[i].m2);
1983 tree m2minusm1
1984 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1985 true, GSI_SAME_STMT);
1987 gimple_stmt_iterator gsi2 = *gsi;
1988 gsi_prev (&gsi2);
1989 e = split_block (entry_bb, gsi_stmt (gsi2));
1990 e = split_block (e->dest, (gimple *) NULL);
1991 basic_block bb1 = e->src;
1992 entry_bb = e->dest;
1993 *gsi = gsi_after_labels (entry_bb);
1995 gsi2 = gsi_after_labels (bb1);
1996 tree ostep = fold_convert (itype, fd->loops[o].step);
1997 t = build_int_cst (itype, (fd->loops[o].cond_code
1998 == LT_EXPR ? -1 : 1));
1999 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2000 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2001 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2002 if (TYPE_UNSIGNED (itype)
2003 && fd->loops[o].cond_code == GT_EXPR)
2004 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2005 fold_build1 (NEGATE_EXPR, itype, t),
2006 fold_build1 (NEGATE_EXPR, itype, ostep));
2007 else
2008 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2009 tree outer_niters
2010 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2011 true, GSI_SAME_STMT);
2012 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2013 build_one_cst (itype));
2014 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2015 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2016 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 tree n1, n2, n1e, n2e;
2019 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2020 if (fd->loops[i].m1)
2022 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2023 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2024 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2026 else
2027 n1 = t;
2028 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2029 true, GSI_SAME_STMT);
2030 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2031 if (fd->loops[i].m2)
2033 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2034 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2035 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2037 else
2038 n2 = t;
2039 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2040 true, GSI_SAME_STMT);
2041 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2042 if (fd->loops[i].m1)
2044 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2045 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2046 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2048 else
2049 n1e = t;
2050 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2051 true, GSI_SAME_STMT);
2052 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2053 if (fd->loops[i].m2)
2055 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2056 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2057 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2059 else
2060 n2e = t;
2061 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2062 true, GSI_SAME_STMT);
2063 gcond *cond_stmt
2064 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2065 NULL_TREE, NULL_TREE);
2066 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2067 e = split_block (bb1, cond_stmt);
2068 e->flags = EDGE_TRUE_VALUE;
2069 e->probability = profile_probability::likely ().guessed ();
2070 basic_block bb2 = e->dest;
2071 gsi2 = gsi_after_labels (bb2);
2073 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2074 NULL_TREE, NULL_TREE);
2075 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2076 e = split_block (bb2, cond_stmt);
2077 e->flags = EDGE_TRUE_VALUE;
2078 e->probability = profile_probability::likely ().guessed ();
2079 gsi2 = gsi_after_labels (e->dest);
2081 tree step = fold_convert (itype, fd->loops[i].step);
2082 t = build_int_cst (itype, (fd->loops[i].cond_code
2083 == LT_EXPR ? -1 : 1));
2084 t = fold_build2 (PLUS_EXPR, itype, step, t);
2085 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2086 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2087 if (TYPE_UNSIGNED (itype)
2088 && fd->loops[i].cond_code == GT_EXPR)
2089 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2090 fold_build1 (NEGATE_EXPR, itype, t),
2091 fold_build1 (NEGATE_EXPR, itype, step));
2092 else
2093 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2094 tree first_inner_iterations
2095 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2096 true, GSI_SAME_STMT);
2097 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2098 if (TYPE_UNSIGNED (itype)
2099 && fd->loops[i].cond_code == GT_EXPR)
2100 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2101 fold_build1 (NEGATE_EXPR, itype, t),
2102 fold_build1 (NEGATE_EXPR, itype, step));
2103 else
2104 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2105 tree factor
2106 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2107 true, GSI_SAME_STMT);
2108 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2109 build_one_cst (itype));
2110 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2111 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2112 t = fold_build2 (MULT_EXPR, itype, factor, t);
2113 t = fold_build2 (PLUS_EXPR, itype,
2114 fold_build2 (MULT_EXPR, itype, outer_niters,
2115 first_inner_iterations), t);
2116 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2117 fold_convert (type, t));
2119 basic_block bb3 = create_empty_bb (bb1);
2120 add_bb_to_loop (bb3, bb1->loop_father);
2122 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2123 e->probability = profile_probability::unlikely ().guessed ();
2125 gsi2 = gsi_after_labels (bb3);
2126 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2127 NULL_TREE, NULL_TREE);
2128 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2129 e = split_block (bb3, cond_stmt);
2130 e->flags = EDGE_TRUE_VALUE;
2131 e->probability = profile_probability::likely ().guessed ();
2132 basic_block bb4 = e->dest;
2134 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2135 ne->probability = e->probability.invert ();
2137 basic_block bb5 = create_empty_bb (bb2);
2138 add_bb_to_loop (bb5, bb2->loop_father);
2140 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2141 ne->probability = profile_probability::unlikely ().guessed ();
2143 for (int j = 0; j < 2; j++)
2145 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2146 t = fold_build2 (MINUS_EXPR, itype,
2147 unshare_expr (fd->loops[i].n1),
2148 unshare_expr (fd->loops[i].n2));
2149 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2150 tree tem
2151 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2152 true, GSI_SAME_STMT);
2153 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2154 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2155 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2156 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2157 true, GSI_SAME_STMT);
2158 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2159 if (fd->loops[i].m1)
2161 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2162 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2163 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2165 else
2166 n1 = t;
2167 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2168 true, GSI_SAME_STMT);
2169 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2170 if (fd->loops[i].m2)
2172 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2173 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2174 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2176 else
2177 n2 = t;
2178 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2179 true, GSI_SAME_STMT);
2180 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2182 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2183 NULL_TREE, NULL_TREE);
2184 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2185 e = split_block (gsi_bb (gsi2), cond_stmt);
2186 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2187 e->probability = profile_probability::unlikely ().guessed ();
2188 ne = make_edge (e->src, bb1,
2189 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2190 ne->probability = e->probability.invert ();
2191 gsi2 = gsi_after_labels (e->dest);
2193 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2194 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2196 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2199 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2200 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2201 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2203 if (fd->first_nonrect + 1 == fd->last_nonrect)
2205 fd->first_inner_iterations = first_inner_iterations;
2206 fd->factor = factor;
2207 fd->adjn1 = n1o;
2210 else
2212 /* Fallback implementation. Evaluate the loops with m1/m2
2213 non-NULL as well as their outer loops at runtime using temporaries
2214 instead of the original iteration variables, and in the
2215 body just bump the counter. */
2216 gimple_stmt_iterator gsi2 = *gsi;
2217 gsi_prev (&gsi2);
2218 e = split_block (entry_bb, gsi_stmt (gsi2));
2219 e = split_block (e->dest, (gimple *) NULL);
2220 basic_block cur_bb = e->src;
2221 basic_block next_bb = e->dest;
2222 entry_bb = e->dest;
2223 *gsi = gsi_after_labels (entry_bb);
2225 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2226 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2228 for (i = 0; i <= fd->last_nonrect; i++)
2230 if (fd->loops[i].m1 == NULL_TREE
2231 && fd->loops[i].m2 == NULL_TREE
2232 && !fd->loops[i].non_rect_referenced)
2233 continue;
2235 tree itype = TREE_TYPE (fd->loops[i].v);
2237 gsi2 = gsi_after_labels (cur_bb);
2238 tree n1, n2;
2239 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2240 if (fd->loops[i].m1)
2242 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2243 n1 = fold_build2 (MULT_EXPR, itype,
2244 vs[i - fd->loops[i].outer], n1);
2245 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2247 else
2248 n1 = t;
2249 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2250 true, GSI_SAME_STMT);
2251 if (i < fd->last_nonrect)
2253 vs[i] = create_tmp_reg (itype, ".it");
2254 expand_omp_build_assign (&gsi2, vs[i], n1);
2256 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2257 if (fd->loops[i].m2)
2259 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2260 n2 = fold_build2 (MULT_EXPR, itype,
2261 vs[i - fd->loops[i].outer], n2);
2262 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2264 else
2265 n2 = t;
2266 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2267 true, GSI_SAME_STMT);
2268 if (i == fd->last_nonrect)
2270 gcond *cond_stmt
2271 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2272 NULL_TREE, NULL_TREE);
2273 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2274 e = split_block (cur_bb, cond_stmt);
2275 e->flags = EDGE_TRUE_VALUE;
2276 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2277 e->probability = profile_probability::likely ().guessed ();
2278 ne->probability = e->probability.invert ();
2279 gsi2 = gsi_after_labels (e->dest);
2281 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2282 ? -1 : 1));
2283 t = fold_build2 (PLUS_EXPR, itype,
2284 fold_convert (itype, fd->loops[i].step), t);
2285 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2286 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2287 tree step = fold_convert (itype, fd->loops[i].step);
2288 if (TYPE_UNSIGNED (itype)
2289 && fd->loops[i].cond_code == GT_EXPR)
2290 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2291 fold_build1 (NEGATE_EXPR, itype, t),
2292 fold_build1 (NEGATE_EXPR, itype, step));
2293 else
2294 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2295 t = fold_convert (type, t);
2296 t = fold_build2 (PLUS_EXPR, type,
2297 counts[fd->last_nonrect], t);
2298 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2299 true, GSI_SAME_STMT);
2300 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2301 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2302 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2303 break;
2305 e = split_block (cur_bb, last_stmt (cur_bb));
2307 basic_block new_cur_bb = create_empty_bb (cur_bb);
2308 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2310 gsi2 = gsi_after_labels (e->dest);
2311 tree step = fold_convert (itype,
2312 unshare_expr (fd->loops[i].step));
2313 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2314 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2315 true, GSI_SAME_STMT);
2316 expand_omp_build_assign (&gsi2, vs[i], t);
2318 ne = split_block (e->dest, last_stmt (e->dest));
2319 gsi2 = gsi_after_labels (ne->dest);
2321 gcond *cond_stmt
2322 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2323 NULL_TREE, NULL_TREE);
2324 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2325 edge e3, e4;
2326 if (next_bb == entry_bb)
2328 e3 = find_edge (ne->dest, next_bb);
2329 e3->flags = EDGE_FALSE_VALUE;
2331 else
2332 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2333 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2334 e4->probability = profile_probability::likely ().guessed ();
2335 e3->probability = e4->probability.invert ();
2336 basic_block esrc = e->src;
2337 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2338 cur_bb = new_cur_bb;
2339 basic_block latch_bb = next_bb;
2340 next_bb = e->dest;
2341 remove_edge (e);
2342 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2343 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2344 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2347 t = NULL_TREE;
2348 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2349 if (!fd->loops[i].non_rect_referenced
2350 && fd->loops[i].m1 == NULL_TREE
2351 && fd->loops[i].m2 == NULL_TREE)
2353 if (t == NULL_TREE)
2354 t = counts[i];
2355 else
2356 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2358 if (t)
2360 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2361 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2363 if (!rect_count_seen)
2364 t = counts[fd->last_nonrect];
2365 else
2366 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2367 counts[fd->last_nonrect]);
2368 expand_omp_build_assign (gsi, fd->loop.n2, t);
2370 else if (fd->non_rect)
2372 tree t = fd->loop.n2;
2373 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2374 int non_rect_referenced = 0, non_rect = 0;
2375 for (i = 0; i < fd->collapse; i++)
2377 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2378 && !integer_zerop (counts[i]))
2379 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2380 if (fd->loops[i].non_rect_referenced)
2381 non_rect_referenced++;
2382 if (fd->loops[i].m1 || fd->loops[i].m2)
2383 non_rect++;
2385 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2386 counts[fd->last_nonrect] = t;
2390 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2391 T = V;
2392 V3 = N31 + (T % count3) * STEP3;
2393 T = T / count3;
2394 V2 = N21 + (T % count2) * STEP2;
2395 T = T / count2;
2396 V1 = N11 + T * STEP1;
2397 if this loop doesn't have an inner loop construct combined with it.
2398 If it does have an inner loop construct combined with it and the
2399 iteration count isn't known constant, store values from counts array
2400 into its _looptemp_ temporaries instead.
2401 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2402 inclusive), use the count of all those loops together, and either
2403 find quadratic etc. equation roots, or as a fallback, do:
2404 COUNT = 0;
2405 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2406 for (tmpj = M21 * tmpi + N21;
2407 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2409 int tmpk1 = M31 * tmpj + N31;
2410 int tmpk2 = M32 * tmpj + N32;
2411 if (tmpk1 COND3 tmpk2)
2413 if (COND3 is <)
2414 adj = STEP3 - 1;
2415 else
2416 adj = STEP3 + 1;
2417 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2418 if (COUNT + temp > T)
2420 V1 = tmpi;
2421 V2 = tmpj;
2422 V3 = tmpk1 + (T - COUNT) * STEP3;
2423 goto done;
2425 else
2426 COUNT += temp;
2429 done:;
2430 but for optional innermost or outermost rectangular loops that aren't
2431 referenced by other loop expressions keep doing the division/modulo. */
2433 static void
2434 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2435 tree *counts, tree *nonrect_bounds,
2436 gimple *inner_stmt, tree startvar)
2438 int i;
2439 if (gimple_omp_for_combined_p (fd->for_stmt))
2441 /* If fd->loop.n2 is constant, then no propagation of the counts
2442 is needed, they are constant. */
2443 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2444 return;
2446 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2447 ? gimple_omp_taskreg_clauses (inner_stmt)
2448 : gimple_omp_for_clauses (inner_stmt);
2449 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2450 isn't supposed to be handled, as the inner loop doesn't
2451 use it. */
2452 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2453 gcc_assert (innerc);
2454 int count = 0;
2455 if (fd->non_rect
2456 && fd->last_nonrect == fd->first_nonrect + 1
2457 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2458 count = 4;
2459 for (i = 0; i < fd->collapse + count; i++)
2461 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2462 OMP_CLAUSE__LOOPTEMP_);
2463 gcc_assert (innerc);
2464 if (i)
2466 tree tem = OMP_CLAUSE_DECL (innerc);
2467 tree t;
2468 if (i < fd->collapse)
2469 t = counts[i];
2470 else
2471 switch (i - fd->collapse)
2473 case 0: t = counts[0]; break;
2474 case 1: t = fd->first_inner_iterations; break;
2475 case 2: t = fd->factor; break;
2476 case 3: t = fd->adjn1; break;
2477 default: gcc_unreachable ();
2479 t = fold_convert (TREE_TYPE (tem), t);
2480 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2481 false, GSI_CONTINUE_LINKING);
2482 gassign *stmt = gimple_build_assign (tem, t);
2483 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2486 return;
2489 tree type = TREE_TYPE (fd->loop.v);
2490 tree tem = create_tmp_reg (type, ".tem");
2491 gassign *stmt = gimple_build_assign (tem, startvar);
2492 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2494 for (i = fd->collapse - 1; i >= 0; i--)
2496 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2497 itype = vtype;
2498 if (POINTER_TYPE_P (vtype))
2499 itype = signed_type_for (vtype);
2500 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2501 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2502 else
2503 t = tem;
2504 if (i == fd->last_nonrect)
2506 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2507 false, GSI_CONTINUE_LINKING);
2508 tree stopval = t;
2509 tree idx = create_tmp_reg (type, ".count");
2510 expand_omp_build_assign (gsi, idx,
2511 build_zero_cst (type), true);
2512 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2513 if (fd->first_nonrect + 1 == fd->last_nonrect
2514 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2515 || fd->first_inner_iterations)
2516 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2517 != CODE_FOR_nothing)
2518 && !integer_zerop (fd->loop.n2))
2520 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2521 tree itype = TREE_TYPE (fd->loops[i].v);
2522 tree first_inner_iterations = fd->first_inner_iterations;
2523 tree factor = fd->factor;
2524 gcond *cond_stmt
2525 = gimple_build_cond (NE_EXPR, factor,
2526 build_zero_cst (TREE_TYPE (factor)),
2527 NULL_TREE, NULL_TREE);
2528 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2529 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2530 basic_block bb0 = e->src;
2531 e->flags = EDGE_TRUE_VALUE;
2532 e->probability = profile_probability::likely ();
2533 bb_triang_dom = bb0;
2534 *gsi = gsi_after_labels (e->dest);
2535 tree slltype = long_long_integer_type_node;
2536 tree ulltype = long_long_unsigned_type_node;
2537 tree stopvalull = fold_convert (ulltype, stopval);
2538 stopvalull
2539 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2540 false, GSI_CONTINUE_LINKING);
2541 first_inner_iterations
2542 = fold_convert (slltype, first_inner_iterations);
2543 first_inner_iterations
2544 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2545 NULL_TREE, false,
2546 GSI_CONTINUE_LINKING);
2547 factor = fold_convert (slltype, factor);
2548 factor
2549 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2550 false, GSI_CONTINUE_LINKING);
2551 tree first_inner_iterationsd
2552 = fold_build1 (FLOAT_EXPR, double_type_node,
2553 first_inner_iterations);
2554 first_inner_iterationsd
2555 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2556 NULL_TREE, false,
2557 GSI_CONTINUE_LINKING);
2558 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2559 factor);
2560 factord = force_gimple_operand_gsi (gsi, factord, true,
2561 NULL_TREE, false,
2562 GSI_CONTINUE_LINKING);
2563 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2564 stopvalull);
2565 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2566 NULL_TREE, false,
2567 GSI_CONTINUE_LINKING);
2568 /* Temporarily disable flag_rounding_math, values will be
2569 decimal numbers divided by 2 and worst case imprecisions
2570 due to too large values ought to be caught later by the
2571 checks for fallback. */
2572 int save_flag_rounding_math = flag_rounding_math;
2573 flag_rounding_math = 0;
2574 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2575 build_real (double_type_node, dconst2));
2576 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2577 first_inner_iterationsd, t);
2578 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2579 GSI_CONTINUE_LINKING);
2580 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2581 build_real (double_type_node, dconst2));
2582 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2583 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2584 fold_build2 (MULT_EXPR, double_type_node,
2585 t3, t3));
2586 flag_rounding_math = save_flag_rounding_math;
2587 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2588 GSI_CONTINUE_LINKING);
2589 if (flag_exceptions
2590 && cfun->can_throw_non_call_exceptions
2591 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2593 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2594 build_zero_cst (double_type_node));
2595 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2596 false, GSI_CONTINUE_LINKING);
2597 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2598 boolean_false_node,
2599 NULL_TREE, NULL_TREE);
2601 else
2602 cond_stmt
2603 = gimple_build_cond (LT_EXPR, t,
2604 build_zero_cst (double_type_node),
2605 NULL_TREE, NULL_TREE);
2606 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2607 e = split_block (gsi_bb (*gsi), cond_stmt);
2608 basic_block bb1 = e->src;
2609 e->flags = EDGE_FALSE_VALUE;
2610 e->probability = profile_probability::very_likely ();
2611 *gsi = gsi_after_labels (e->dest);
2612 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2613 tree sqrtr = create_tmp_var (double_type_node);
2614 gimple_call_set_lhs (call, sqrtr);
2615 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2616 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2617 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2618 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2619 tree c = create_tmp_var (ulltype);
2620 tree d = create_tmp_var (ulltype);
2621 expand_omp_build_assign (gsi, c, t, true);
2622 t = fold_build2 (MINUS_EXPR, ulltype, c,
2623 build_one_cst (ulltype));
2624 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2625 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2626 t = fold_build2 (MULT_EXPR, ulltype,
2627 fold_convert (ulltype, fd->factor), t);
2628 tree t2
2629 = fold_build2 (MULT_EXPR, ulltype, c,
2630 fold_convert (ulltype,
2631 fd->first_inner_iterations));
2632 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2633 expand_omp_build_assign (gsi, d, t, true);
2634 t = fold_build2 (MULT_EXPR, ulltype,
2635 fold_convert (ulltype, fd->factor), c);
2636 t = fold_build2 (PLUS_EXPR, ulltype,
2637 t, fold_convert (ulltype,
2638 fd->first_inner_iterations));
2639 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2640 GSI_CONTINUE_LINKING);
2641 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2642 NULL_TREE, NULL_TREE);
2643 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2644 e = split_block (gsi_bb (*gsi), cond_stmt);
2645 basic_block bb2 = e->src;
2646 e->flags = EDGE_TRUE_VALUE;
2647 e->probability = profile_probability::very_likely ();
2648 *gsi = gsi_after_labels (e->dest);
2649 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2650 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2651 GSI_CONTINUE_LINKING);
2652 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2653 NULL_TREE, NULL_TREE);
2654 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2655 e = split_block (gsi_bb (*gsi), cond_stmt);
2656 basic_block bb3 = e->src;
2657 e->flags = EDGE_FALSE_VALUE;
2658 e->probability = profile_probability::very_likely ();
2659 *gsi = gsi_after_labels (e->dest);
2660 t = fold_convert (itype, c);
2661 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2662 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2663 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2664 GSI_CONTINUE_LINKING);
2665 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2666 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2667 t2 = fold_convert (itype, t2);
2668 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2669 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2670 if (fd->loops[i].m1)
2672 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2673 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2675 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2676 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2677 bb_triang = e->src;
2678 *gsi = gsi_after_labels (e->dest);
2679 remove_edge (e);
2680 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2681 e->probability = profile_probability::very_unlikely ();
2682 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2683 e->probability = profile_probability::very_unlikely ();
2684 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2685 e->probability = profile_probability::very_unlikely ();
2687 basic_block bb4 = create_empty_bb (bb0);
2688 add_bb_to_loop (bb4, bb0->loop_father);
2689 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2690 e->probability = profile_probability::unlikely ();
2691 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2692 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2693 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2694 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2695 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2696 counts[i], counts[i - 1]);
2697 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2698 GSI_CONTINUE_LINKING);
2699 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2700 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2701 t = fold_convert (itype, t);
2702 t2 = fold_convert (itype, t2);
2703 t = fold_build2 (MULT_EXPR, itype, t,
2704 fold_convert (itype, fd->loops[i].step));
2705 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2706 t2 = fold_build2 (MULT_EXPR, itype, t2,
2707 fold_convert (itype, fd->loops[i - 1].step));
2708 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2709 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2710 false, GSI_CONTINUE_LINKING);
2711 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2712 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2713 if (fd->loops[i].m1)
2715 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2716 fd->loops[i - 1].v);
2717 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2719 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2720 false, GSI_CONTINUE_LINKING);
2721 stmt = gimple_build_assign (fd->loops[i].v, t);
2722 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2724 /* Fallback implementation. Evaluate the loops in between
2725 (inclusive) fd->first_nonrect and fd->last_nonrect at
2726 runtime unsing temporaries instead of the original iteration
2727 variables, in the body just bump the counter and compare
2728 with the desired value. */
2729 gimple_stmt_iterator gsi2 = *gsi;
2730 basic_block entry_bb = gsi_bb (gsi2);
2731 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2732 e = split_block (e->dest, (gimple *) NULL);
2733 basic_block dom_bb = NULL;
2734 basic_block cur_bb = e->src;
2735 basic_block next_bb = e->dest;
2736 entry_bb = e->dest;
2737 *gsi = gsi_after_labels (entry_bb);
2739 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2740 tree n1 = NULL_TREE, n2 = NULL_TREE;
2741 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2743 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2745 tree itype = TREE_TYPE (fd->loops[j].v);
2746 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2747 && fd->loops[j].m2 == NULL_TREE
2748 && !fd->loops[j].non_rect_referenced);
2749 gsi2 = gsi_after_labels (cur_bb);
2750 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2751 if (fd->loops[j].m1)
2753 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2754 n1 = fold_build2 (MULT_EXPR, itype,
2755 vs[j - fd->loops[j].outer], n1);
2756 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2758 else if (rect_p)
2759 n1 = build_zero_cst (type);
2760 else
2761 n1 = t;
2762 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2763 true, GSI_SAME_STMT);
2764 if (j < fd->last_nonrect)
2766 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2767 expand_omp_build_assign (&gsi2, vs[j], n1);
2769 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2770 if (fd->loops[j].m2)
2772 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2773 n2 = fold_build2 (MULT_EXPR, itype,
2774 vs[j - fd->loops[j].outer], n2);
2775 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2777 else if (rect_p)
2778 n2 = counts[j];
2779 else
2780 n2 = t;
2781 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2782 true, GSI_SAME_STMT);
2783 if (j == fd->last_nonrect)
2785 gcond *cond_stmt
2786 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2787 NULL_TREE, NULL_TREE);
2788 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2789 e = split_block (cur_bb, cond_stmt);
2790 e->flags = EDGE_TRUE_VALUE;
2791 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2792 e->probability = profile_probability::likely ().guessed ();
2793 ne->probability = e->probability.invert ();
2794 gsi2 = gsi_after_labels (e->dest);
2796 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2797 ? -1 : 1));
2798 t = fold_build2 (PLUS_EXPR, itype,
2799 fold_convert (itype, fd->loops[j].step), t);
2800 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2801 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2802 tree step = fold_convert (itype, fd->loops[j].step);
2803 if (TYPE_UNSIGNED (itype)
2804 && fd->loops[j].cond_code == GT_EXPR)
2805 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2806 fold_build1 (NEGATE_EXPR, itype, t),
2807 fold_build1 (NEGATE_EXPR, itype, step));
2808 else
2809 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2810 t = fold_convert (type, t);
2811 t = fold_build2 (PLUS_EXPR, type, idx, t);
2812 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2813 true, GSI_SAME_STMT);
2814 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2815 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2816 cond_stmt
2817 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2818 NULL_TREE);
2819 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2820 e = split_block (gsi_bb (gsi2), cond_stmt);
2821 e->flags = EDGE_TRUE_VALUE;
2822 e->probability = profile_probability::likely ().guessed ();
2823 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2824 ne->probability = e->probability.invert ();
2825 gsi2 = gsi_after_labels (e->dest);
2826 expand_omp_build_assign (&gsi2, idx, t);
2827 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2828 break;
2830 e = split_block (cur_bb, last_stmt (cur_bb));
2832 basic_block new_cur_bb = create_empty_bb (cur_bb);
2833 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2835 gsi2 = gsi_after_labels (e->dest);
2836 if (rect_p)
2837 t = fold_build2 (PLUS_EXPR, type, vs[j],
2838 build_one_cst (type));
2839 else
2841 tree step
2842 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2843 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2845 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2846 true, GSI_SAME_STMT);
2847 expand_omp_build_assign (&gsi2, vs[j], t);
2849 edge ne = split_block (e->dest, last_stmt (e->dest));
2850 gsi2 = gsi_after_labels (ne->dest);
2852 gcond *cond_stmt;
2853 if (next_bb == entry_bb)
2854 /* No need to actually check the outermost condition. */
2855 cond_stmt
2856 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2857 boolean_true_node,
2858 NULL_TREE, NULL_TREE);
2859 else
2860 cond_stmt
2861 = gimple_build_cond (rect_p ? LT_EXPR
2862 : fd->loops[j].cond_code,
2863 vs[j], n2, NULL_TREE, NULL_TREE);
2864 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2865 edge e3, e4;
2866 if (next_bb == entry_bb)
2868 e3 = find_edge (ne->dest, next_bb);
2869 e3->flags = EDGE_FALSE_VALUE;
2870 dom_bb = ne->dest;
2872 else
2873 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2874 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2875 e4->probability = profile_probability::likely ().guessed ();
2876 e3->probability = e4->probability.invert ();
2877 basic_block esrc = e->src;
2878 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2879 cur_bb = new_cur_bb;
2880 basic_block latch_bb = next_bb;
2881 next_bb = e->dest;
2882 remove_edge (e);
2883 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2884 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2885 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2887 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2889 tree itype = TREE_TYPE (fd->loops[j].v);
2890 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2891 && fd->loops[j].m2 == NULL_TREE
2892 && !fd->loops[j].non_rect_referenced);
2893 if (j == fd->last_nonrect)
2895 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2896 t = fold_convert (itype, t);
2897 tree t2
2898 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2899 t = fold_build2 (MULT_EXPR, itype, t, t2);
2900 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2902 else if (rect_p)
2904 t = fold_convert (itype, vs[j]);
2905 t = fold_build2 (MULT_EXPR, itype, t,
2906 fold_convert (itype, fd->loops[j].step));
2907 if (POINTER_TYPE_P (vtype))
2908 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2909 else
2910 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2912 else
2913 t = vs[j];
2914 t = force_gimple_operand_gsi (gsi, t, false,
2915 NULL_TREE, true,
2916 GSI_SAME_STMT);
2917 stmt = gimple_build_assign (fd->loops[j].v, t);
2918 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2920 if (gsi_end_p (*gsi))
2921 *gsi = gsi_last_bb (gsi_bb (*gsi));
2922 else
2923 gsi_prev (gsi);
2924 if (bb_triang)
2926 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2927 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2928 *gsi = gsi_after_labels (e->dest);
2929 if (!gsi_end_p (*gsi))
2930 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2931 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2934 else
2936 t = fold_convert (itype, t);
2937 t = fold_build2 (MULT_EXPR, itype, t,
2938 fold_convert (itype, fd->loops[i].step));
2939 if (POINTER_TYPE_P (vtype))
2940 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2941 else
2942 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2943 t = force_gimple_operand_gsi (gsi, t,
2944 DECL_P (fd->loops[i].v)
2945 && TREE_ADDRESSABLE (fd->loops[i].v),
2946 NULL_TREE, false,
2947 GSI_CONTINUE_LINKING);
2948 stmt = gimple_build_assign (fd->loops[i].v, t);
2949 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2951 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2953 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2954 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2955 false, GSI_CONTINUE_LINKING);
2956 stmt = gimple_build_assign (tem, t);
2957 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2959 if (i == fd->last_nonrect)
2960 i = fd->first_nonrect;
2962 if (fd->non_rect)
2963 for (i = 0; i <= fd->last_nonrect; i++)
2964 if (fd->loops[i].m2)
2966 tree itype = TREE_TYPE (fd->loops[i].v);
2968 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2969 t = fold_build2 (MULT_EXPR, itype,
2970 fd->loops[i - fd->loops[i].outer].v, t);
2971 t = fold_build2 (PLUS_EXPR, itype, t,
2972 fold_convert (itype,
2973 unshare_expr (fd->loops[i].n2)));
2974 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2975 t = force_gimple_operand_gsi (gsi, t, false,
2976 NULL_TREE, false,
2977 GSI_CONTINUE_LINKING);
2978 stmt = gimple_build_assign (nonrect_bounds[i], t);
2979 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2983 /* Helper function for expand_omp_for_*. Generate code like:
2984 L10:
2985 V3 += STEP3;
2986 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2987 L11:
2988 V3 = N31;
2989 V2 += STEP2;
2990 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2991 L12:
2992 V2 = N21;
2993 V1 += STEP1;
2994 goto BODY_BB;
2995 For non-rectangular loops, use temporaries stored in nonrect_bounds
2996 for the upper bounds if M?2 multiplier is present. Given e.g.
2997 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2998 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2999 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3000 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3002 L10:
3003 V4 += STEP4;
3004 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3005 L11:
3006 V4 = N41 + M41 * V2; // This can be left out if the loop
3007 // refers to the immediate parent loop
3008 V3 += STEP3;
3009 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3010 L12:
3011 V3 = N31;
3012 V2 += STEP2;
3013 if (V2 cond2 N22) goto L120; else goto L13;
3014 L120:
3015 V4 = N41 + M41 * V2;
3016 NONRECT_BOUND4 = N42 + M42 * V2;
3017 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3018 L13:
3019 V2 = N21;
3020 V1 += STEP1;
3021 goto L120; */
3023 static basic_block
3024 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3025 basic_block cont_bb, basic_block body_bb)
3027 basic_block last_bb, bb, collapse_bb = NULL;
3028 int i;
3029 gimple_stmt_iterator gsi;
3030 edge e;
3031 tree t;
3032 gimple *stmt;
3034 last_bb = cont_bb;
3035 for (i = fd->collapse - 1; i >= 0; i--)
3037 tree vtype = TREE_TYPE (fd->loops[i].v);
3039 bb = create_empty_bb (last_bb);
3040 add_bb_to_loop (bb, last_bb->loop_father);
3041 gsi = gsi_start_bb (bb);
3043 if (i < fd->collapse - 1)
3045 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3046 e->probability
3047 = profile_probability::guessed_always ().apply_scale (1, 8);
3049 struct omp_for_data_loop *l = &fd->loops[i + 1];
3050 if (l->m1 == NULL_TREE || l->outer != 1)
3052 t = l->n1;
3053 if (l->m1)
3055 tree t2
3056 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3057 fd->loops[i + 1 - l->outer].v, l->m1);
3058 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3060 t = force_gimple_operand_gsi (&gsi, t,
3061 DECL_P (l->v)
3062 && TREE_ADDRESSABLE (l->v),
3063 NULL_TREE, false,
3064 GSI_CONTINUE_LINKING);
3065 stmt = gimple_build_assign (l->v, t);
3066 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3069 else
3070 collapse_bb = bb;
3072 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3074 if (POINTER_TYPE_P (vtype))
3075 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3076 else
3077 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3078 t = force_gimple_operand_gsi (&gsi, t,
3079 DECL_P (fd->loops[i].v)
3080 && TREE_ADDRESSABLE (fd->loops[i].v),
3081 NULL_TREE, false, GSI_CONTINUE_LINKING);
3082 stmt = gimple_build_assign (fd->loops[i].v, t);
3083 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3085 if (fd->loops[i].non_rect_referenced)
3087 basic_block update_bb = NULL, prev_bb = NULL;
3088 for (int j = i + 1; j <= fd->last_nonrect; j++)
3089 if (j - fd->loops[j].outer == i)
3091 tree n1, n2;
3092 struct omp_for_data_loop *l = &fd->loops[j];
3093 basic_block this_bb = create_empty_bb (last_bb);
3094 add_bb_to_loop (this_bb, last_bb->loop_father);
3095 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3096 if (prev_bb)
3098 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3099 e->probability
3100 = profile_probability::guessed_always ().apply_scale (7,
3102 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3104 if (l->m1)
3106 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3107 fd->loops[i].v);
3108 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3109 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3110 false,
3111 GSI_CONTINUE_LINKING);
3112 stmt = gimple_build_assign (l->v, n1);
3113 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3114 n1 = l->v;
3116 else
3117 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3118 NULL_TREE, false,
3119 GSI_CONTINUE_LINKING);
3120 if (l->m2)
3122 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3123 fd->loops[i].v);
3124 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3125 t, unshare_expr (l->n2));
3126 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3127 false,
3128 GSI_CONTINUE_LINKING);
3129 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3130 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3131 n2 = nonrect_bounds[j];
3133 else
3134 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3135 true, NULL_TREE, false,
3136 GSI_CONTINUE_LINKING);
3137 gcond *cond_stmt
3138 = gimple_build_cond (l->cond_code, n1, n2,
3139 NULL_TREE, NULL_TREE);
3140 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3141 if (update_bb == NULL)
3142 update_bb = this_bb;
3143 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3144 e->probability
3145 = profile_probability::guessed_always ().apply_scale (1, 8);
3146 if (prev_bb == NULL)
3147 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3148 prev_bb = this_bb;
3150 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3151 e->probability
3152 = profile_probability::guessed_always ().apply_scale (7, 8);
3153 body_bb = update_bb;
3156 if (i > 0)
3158 if (fd->loops[i].m2)
3159 t = nonrect_bounds[i];
3160 else
3161 t = unshare_expr (fd->loops[i].n2);
3162 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3163 false, GSI_CONTINUE_LINKING);
3164 tree v = fd->loops[i].v;
3165 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3166 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3167 false, GSI_CONTINUE_LINKING);
3168 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3169 stmt = gimple_build_cond_empty (t);
3170 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3171 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3172 expand_omp_regimplify_p, NULL, NULL)
3173 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3174 expand_omp_regimplify_p, NULL, NULL))
3175 gimple_regimplify_operands (stmt, &gsi);
3176 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3177 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3179 else
3180 make_edge (bb, body_bb, EDGE_FALLTHRU);
3181 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3182 last_bb = bb;
3185 return collapse_bb;
3188 /* Expand #pragma omp ordered depend(source). */
3190 static void
3191 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3192 tree *counts, location_t loc)
3194 enum built_in_function source_ix
3195 = fd->iter_type == long_integer_type_node
3196 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3197 gimple *g
3198 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3199 build_fold_addr_expr (counts[fd->ordered]));
3200 gimple_set_location (g, loc);
3201 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3204 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3206 static void
3207 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3208 tree *counts, tree c, location_t loc)
3210 auto_vec<tree, 10> args;
3211 enum built_in_function sink_ix
3212 = fd->iter_type == long_integer_type_node
3213 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3214 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3215 int i;
3216 gimple_stmt_iterator gsi2 = *gsi;
3217 bool warned_step = false;
3219 for (i = 0; i < fd->ordered; i++)
3221 tree step = NULL_TREE;
3222 off = TREE_PURPOSE (deps);
3223 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3225 step = TREE_OPERAND (off, 1);
3226 off = TREE_OPERAND (off, 0);
3228 if (!integer_zerop (off))
3230 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3231 || fd->loops[i].cond_code == GT_EXPR);
3232 bool forward = fd->loops[i].cond_code == LT_EXPR;
3233 if (step)
3235 /* Non-simple Fortran DO loops. If step is variable,
3236 we don't know at compile even the direction, so can't
3237 warn. */
3238 if (TREE_CODE (step) != INTEGER_CST)
3239 break;
3240 forward = tree_int_cst_sgn (step) != -1;
3242 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3243 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3244 "waiting for lexically later iteration");
3245 break;
3247 deps = TREE_CHAIN (deps);
3249 /* If all offsets corresponding to the collapsed loops are zero,
3250 this depend clause can be ignored. FIXME: but there is still a
3251 flush needed. We need to emit one __sync_synchronize () for it
3252 though (perhaps conditionally)? Solve this together with the
3253 conservative dependence folding optimization.
3254 if (i >= fd->collapse)
3255 return; */
3257 deps = OMP_CLAUSE_DECL (c);
3258 gsi_prev (&gsi2);
3259 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3260 edge e2 = split_block_after_labels (e1->dest);
3262 gsi2 = gsi_after_labels (e1->dest);
3263 *gsi = gsi_last_bb (e1->src);
3264 for (i = 0; i < fd->ordered; i++)
3266 tree itype = TREE_TYPE (fd->loops[i].v);
3267 tree step = NULL_TREE;
3268 tree orig_off = NULL_TREE;
3269 if (POINTER_TYPE_P (itype))
3270 itype = sizetype;
3271 if (i)
3272 deps = TREE_CHAIN (deps);
3273 off = TREE_PURPOSE (deps);
3274 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3276 step = TREE_OPERAND (off, 1);
3277 off = TREE_OPERAND (off, 0);
3278 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3279 && integer_onep (fd->loops[i].step)
3280 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3282 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3283 if (step)
3285 off = fold_convert_loc (loc, itype, off);
3286 orig_off = off;
3287 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3290 if (integer_zerop (off))
3291 t = boolean_true_node;
3292 else
3294 tree a;
3295 tree co = fold_convert_loc (loc, itype, off);
3296 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3298 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3299 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3300 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3301 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3302 co);
3304 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3305 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3306 fd->loops[i].v, co);
3307 else
3308 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3309 fd->loops[i].v, co);
3310 if (step)
3312 tree t1, t2;
3313 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3314 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3315 fd->loops[i].n1);
3316 else
3317 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3318 fd->loops[i].n2);
3319 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3320 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3321 fd->loops[i].n2);
3322 else
3323 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3324 fd->loops[i].n1);
3325 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3326 step, build_int_cst (TREE_TYPE (step), 0));
3327 if (TREE_CODE (step) != INTEGER_CST)
3329 t1 = unshare_expr (t1);
3330 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3331 false, GSI_CONTINUE_LINKING);
3332 t2 = unshare_expr (t2);
3333 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3334 false, GSI_CONTINUE_LINKING);
3336 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3337 t, t2, t1);
3339 else if (fd->loops[i].cond_code == LT_EXPR)
3341 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3342 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3343 fd->loops[i].n1);
3344 else
3345 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3346 fd->loops[i].n2);
3348 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3349 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3350 fd->loops[i].n2);
3351 else
3352 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3353 fd->loops[i].n1);
3355 if (cond)
3356 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3357 else
3358 cond = t;
3360 off = fold_convert_loc (loc, itype, off);
3362 if (step
3363 || (fd->loops[i].cond_code == LT_EXPR
3364 ? !integer_onep (fd->loops[i].step)
3365 : !integer_minus_onep (fd->loops[i].step)))
3367 if (step == NULL_TREE
3368 && TYPE_UNSIGNED (itype)
3369 && fd->loops[i].cond_code == GT_EXPR)
3370 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3371 fold_build1_loc (loc, NEGATE_EXPR, itype,
3372 s));
3373 else
3374 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3375 orig_off ? orig_off : off, s);
3376 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3377 build_int_cst (itype, 0));
3378 if (integer_zerop (t) && !warned_step)
3380 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3381 "refers to iteration never in the iteration "
3382 "space");
3383 warned_step = true;
3385 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3386 cond, t);
3389 if (i <= fd->collapse - 1 && fd->collapse > 1)
3390 t = fd->loop.v;
3391 else if (counts[i])
3392 t = counts[i];
3393 else
3395 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3396 fd->loops[i].v, fd->loops[i].n1);
3397 t = fold_convert_loc (loc, fd->iter_type, t);
3399 if (step)
3400 /* We have divided off by step already earlier. */;
3401 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3402 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3403 fold_build1_loc (loc, NEGATE_EXPR, itype,
3404 s));
3405 else
3406 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3407 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3408 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3409 off = fold_convert_loc (loc, fd->iter_type, off);
3410 if (i <= fd->collapse - 1 && fd->collapse > 1)
3412 if (i)
3413 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3414 off);
3415 if (i < fd->collapse - 1)
3417 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3418 counts[i]);
3419 continue;
3422 off = unshare_expr (off);
3423 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3424 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3425 true, GSI_SAME_STMT);
3426 args.safe_push (t);
3428 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3429 gimple_set_location (g, loc);
3430 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3432 cond = unshare_expr (cond);
3433 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3434 GSI_CONTINUE_LINKING);
3435 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3436 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3437 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3438 e1->probability = e3->probability.invert ();
3439 e1->flags = EDGE_TRUE_VALUE;
3440 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3442 *gsi = gsi_after_labels (e2->dest);
3445 /* Expand all #pragma omp ordered depend(source) and
3446 #pragma omp ordered depend(sink:...) constructs in the current
3447 #pragma omp for ordered(n) region. */
3449 static void
3450 expand_omp_ordered_source_sink (struct omp_region *region,
3451 struct omp_for_data *fd, tree *counts,
3452 basic_block cont_bb)
3454 struct omp_region *inner;
3455 int i;
3456 for (i = fd->collapse - 1; i < fd->ordered; i++)
3457 if (i == fd->collapse - 1 && fd->collapse > 1)
3458 counts[i] = NULL_TREE;
3459 else if (i >= fd->collapse && !cont_bb)
3460 counts[i] = build_zero_cst (fd->iter_type);
3461 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3462 && integer_onep (fd->loops[i].step))
3463 counts[i] = NULL_TREE;
3464 else
3465 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3466 tree atype
3467 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3468 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3469 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3471 for (inner = region->inner; inner; inner = inner->next)
3472 if (inner->type == GIMPLE_OMP_ORDERED)
3474 gomp_ordered *ord_stmt = inner->ord_stmt;
3475 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3476 location_t loc = gimple_location (ord_stmt);
3477 tree c;
3478 for (c = gimple_omp_ordered_clauses (ord_stmt);
3479 c; c = OMP_CLAUSE_CHAIN (c))
3480 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3481 break;
3482 if (c)
3483 expand_omp_ordered_source (&gsi, fd, counts, loc);
3484 for (c = gimple_omp_ordered_clauses (ord_stmt);
3485 c; c = OMP_CLAUSE_CHAIN (c))
3486 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3487 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3488 gsi_remove (&gsi, true);
3492 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3493 collapsed. */
3495 static basic_block
3496 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3497 basic_block cont_bb, basic_block body_bb,
3498 bool ordered_lastprivate)
3500 if (fd->ordered == fd->collapse)
3501 return cont_bb;
3503 if (!cont_bb)
3505 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3506 for (int i = fd->collapse; i < fd->ordered; i++)
3508 tree type = TREE_TYPE (fd->loops[i].v);
3509 tree n1 = fold_convert (type, fd->loops[i].n1);
3510 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3511 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3512 size_int (i - fd->collapse + 1),
3513 NULL_TREE, NULL_TREE);
3514 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3516 return NULL;
3519 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3521 tree t, type = TREE_TYPE (fd->loops[i].v);
3522 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3523 expand_omp_build_assign (&gsi, fd->loops[i].v,
3524 fold_convert (type, fd->loops[i].n1));
3525 if (counts[i])
3526 expand_omp_build_assign (&gsi, counts[i],
3527 build_zero_cst (fd->iter_type));
3528 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3529 size_int (i - fd->collapse + 1),
3530 NULL_TREE, NULL_TREE);
3531 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3532 if (!gsi_end_p (gsi))
3533 gsi_prev (&gsi);
3534 else
3535 gsi = gsi_last_bb (body_bb);
3536 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3537 basic_block new_body = e1->dest;
3538 if (body_bb == cont_bb)
3539 cont_bb = new_body;
3540 edge e2 = NULL;
3541 basic_block new_header;
3542 if (EDGE_COUNT (cont_bb->preds) > 0)
3544 gsi = gsi_last_bb (cont_bb);
3545 if (POINTER_TYPE_P (type))
3546 t = fold_build_pointer_plus (fd->loops[i].v,
3547 fold_convert (sizetype,
3548 fd->loops[i].step));
3549 else
3550 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3551 fold_convert (type, fd->loops[i].step));
3552 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3553 if (counts[i])
3555 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3556 build_int_cst (fd->iter_type, 1));
3557 expand_omp_build_assign (&gsi, counts[i], t);
3558 t = counts[i];
3560 else
3562 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3563 fd->loops[i].v, fd->loops[i].n1);
3564 t = fold_convert (fd->iter_type, t);
3565 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3566 true, GSI_SAME_STMT);
3568 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3569 size_int (i - fd->collapse + 1),
3570 NULL_TREE, NULL_TREE);
3571 expand_omp_build_assign (&gsi, aref, t);
3572 gsi_prev (&gsi);
3573 e2 = split_block (cont_bb, gsi_stmt (gsi));
3574 new_header = e2->dest;
3576 else
3577 new_header = cont_bb;
3578 gsi = gsi_after_labels (new_header);
3579 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3580 true, GSI_SAME_STMT);
3581 tree n2
3582 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3583 true, NULL_TREE, true, GSI_SAME_STMT);
3584 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3585 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3586 edge e3 = split_block (new_header, gsi_stmt (gsi));
3587 cont_bb = e3->dest;
3588 remove_edge (e1);
3589 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3590 e3->flags = EDGE_FALSE_VALUE;
3591 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3592 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3593 e1->probability = e3->probability.invert ();
3595 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3596 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3598 if (e2)
3600 class loop *loop = alloc_loop ();
3601 loop->header = new_header;
3602 loop->latch = e2->src;
3603 add_loop (loop, body_bb->loop_father);
3607 /* If there are any lastprivate clauses and it is possible some loops
3608 might have zero iterations, ensure all the decls are initialized,
3609 otherwise we could crash evaluating C++ class iterators with lastprivate
3610 clauses. */
3611 bool need_inits = false;
3612 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3613 if (need_inits)
3615 tree type = TREE_TYPE (fd->loops[i].v);
3616 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3617 expand_omp_build_assign (&gsi, fd->loops[i].v,
3618 fold_convert (type, fd->loops[i].n1));
3620 else
3622 tree type = TREE_TYPE (fd->loops[i].v);
3623 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3624 boolean_type_node,
3625 fold_convert (type, fd->loops[i].n1),
3626 fold_convert (type, fd->loops[i].n2));
3627 if (!integer_onep (this_cond))
3628 need_inits = true;
3631 return cont_bb;
3634 /* A subroutine of expand_omp_for. Generate code for a parallel
3635 loop with any schedule. Given parameters:
3637 for (V = N1; V cond N2; V += STEP) BODY;
3639 where COND is "<" or ">", we generate pseudocode
3641 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3642 if (more) goto L0; else goto L3;
3644 V = istart0;
3645 iend = iend0;
3647 BODY;
3648 V += STEP;
3649 if (V cond iend) goto L1; else goto L2;
3651 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3654 If this is a combined omp parallel loop, instead of the call to
3655 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3656 If this is gimple_omp_for_combined_p loop, then instead of assigning
3657 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3658 inner GIMPLE_OMP_FOR and V += STEP; and
3659 if (V cond iend) goto L1; else goto L2; are removed.
3661 For collapsed loops, given parameters:
3662 collapse(3)
3663 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3664 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3665 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3666 BODY;
3668 we generate pseudocode
3670 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3671 if (cond3 is <)
3672 adj = STEP3 - 1;
3673 else
3674 adj = STEP3 + 1;
3675 count3 = (adj + N32 - N31) / STEP3;
3676 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3677 if (cond2 is <)
3678 adj = STEP2 - 1;
3679 else
3680 adj = STEP2 + 1;
3681 count2 = (adj + N22 - N21) / STEP2;
3682 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3683 if (cond1 is <)
3684 adj = STEP1 - 1;
3685 else
3686 adj = STEP1 + 1;
3687 count1 = (adj + N12 - N11) / STEP1;
3688 count = count1 * count2 * count3;
3689 goto Z1;
3691 count = 0;
3693 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3694 if (more) goto L0; else goto L3;
3696 V = istart0;
3697 T = V;
3698 V3 = N31 + (T % count3) * STEP3;
3699 T = T / count3;
3700 V2 = N21 + (T % count2) * STEP2;
3701 T = T / count2;
3702 V1 = N11 + T * STEP1;
3703 iend = iend0;
3705 BODY;
3706 V += 1;
3707 if (V < iend) goto L10; else goto L2;
3708 L10:
3709 V3 += STEP3;
3710 if (V3 cond3 N32) goto L1; else goto L11;
3711 L11:
3712 V3 = N31;
3713 V2 += STEP2;
3714 if (V2 cond2 N22) goto L1; else goto L12;
3715 L12:
3716 V2 = N21;
3717 V1 += STEP1;
3718 goto L1;
3720 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3725 static void
3726 expand_omp_for_generic (struct omp_region *region,
3727 struct omp_for_data *fd,
3728 enum built_in_function start_fn,
3729 enum built_in_function next_fn,
3730 tree sched_arg,
3731 gimple *inner_stmt)
3733 tree type, istart0, iend0, iend;
3734 tree t, vmain, vback, bias = NULL_TREE;
3735 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3736 basic_block l2_bb = NULL, l3_bb = NULL;
3737 gimple_stmt_iterator gsi;
3738 gassign *assign_stmt;
3739 bool in_combined_parallel = is_combined_parallel (region);
3740 bool broken_loop = region->cont == NULL;
3741 edge e, ne;
3742 tree *counts = NULL;
3743 int i;
3744 bool ordered_lastprivate = false;
3746 gcc_assert (!broken_loop || !in_combined_parallel);
3747 gcc_assert (fd->iter_type == long_integer_type_node
3748 || !in_combined_parallel);
3750 entry_bb = region->entry;
3751 cont_bb = region->cont;
3752 collapse_bb = NULL;
3753 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3754 gcc_assert (broken_loop
3755 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3756 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3757 l1_bb = single_succ (l0_bb);
3758 if (!broken_loop)
3760 l2_bb = create_empty_bb (cont_bb);
3761 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3762 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3763 == l1_bb));
3764 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3766 else
3767 l2_bb = NULL;
3768 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3769 exit_bb = region->exit;
3771 gsi = gsi_last_nondebug_bb (entry_bb);
3773 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3774 if (fd->ordered
3775 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3776 OMP_CLAUSE_LASTPRIVATE))
3777 ordered_lastprivate = false;
3778 tree reductions = NULL_TREE;
3779 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3780 tree memv = NULL_TREE;
3781 if (fd->lastprivate_conditional)
3783 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3784 OMP_CLAUSE__CONDTEMP_);
3785 if (fd->have_pointer_condtemp)
3786 condtemp = OMP_CLAUSE_DECL (c);
3787 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3788 cond_var = OMP_CLAUSE_DECL (c);
3790 if (sched_arg)
3792 if (fd->have_reductemp)
3794 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3795 OMP_CLAUSE__REDUCTEMP_);
3796 reductions = OMP_CLAUSE_DECL (c);
3797 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3798 gimple *g = SSA_NAME_DEF_STMT (reductions);
3799 reductions = gimple_assign_rhs1 (g);
3800 OMP_CLAUSE_DECL (c) = reductions;
3801 entry_bb = gimple_bb (g);
3802 edge e = split_block (entry_bb, g);
3803 if (region->entry == entry_bb)
3804 region->entry = e->dest;
3805 gsi = gsi_last_bb (entry_bb);
3807 else
3808 reductions = null_pointer_node;
3809 if (fd->have_pointer_condtemp)
3811 tree type = TREE_TYPE (condtemp);
3812 memv = create_tmp_var (type);
3813 TREE_ADDRESSABLE (memv) = 1;
3814 unsigned HOST_WIDE_INT sz
3815 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3816 sz *= fd->lastprivate_conditional;
3817 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3818 false);
3819 mem = build_fold_addr_expr (memv);
3821 else
3822 mem = null_pointer_node;
3824 if (fd->collapse > 1 || fd->ordered)
3826 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3827 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3829 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3830 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3831 zero_iter1_bb, first_zero_iter1,
3832 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3834 if (zero_iter1_bb)
3836 /* Some counts[i] vars might be uninitialized if
3837 some loop has zero iterations. But the body shouldn't
3838 be executed in that case, so just avoid uninit warnings. */
3839 for (i = first_zero_iter1;
3840 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3841 if (SSA_VAR_P (counts[i]))
3842 TREE_NO_WARNING (counts[i]) = 1;
3843 gsi_prev (&gsi);
3844 e = split_block (entry_bb, gsi_stmt (gsi));
3845 entry_bb = e->dest;
3846 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3847 gsi = gsi_last_nondebug_bb (entry_bb);
3848 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3849 get_immediate_dominator (CDI_DOMINATORS,
3850 zero_iter1_bb));
3852 if (zero_iter2_bb)
3854 /* Some counts[i] vars might be uninitialized if
3855 some loop has zero iterations. But the body shouldn't
3856 be executed in that case, so just avoid uninit warnings. */
3857 for (i = first_zero_iter2; i < fd->ordered; i++)
3858 if (SSA_VAR_P (counts[i]))
3859 TREE_NO_WARNING (counts[i]) = 1;
3860 if (zero_iter1_bb)
3861 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3862 else
3864 gsi_prev (&gsi);
3865 e = split_block (entry_bb, gsi_stmt (gsi));
3866 entry_bb = e->dest;
3867 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3868 gsi = gsi_last_nondebug_bb (entry_bb);
3869 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3870 get_immediate_dominator
3871 (CDI_DOMINATORS, zero_iter2_bb));
3874 if (fd->collapse == 1)
3876 counts[0] = fd->loop.n2;
3877 fd->loop = fd->loops[0];
3881 type = TREE_TYPE (fd->loop.v);
3882 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3883 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3884 TREE_ADDRESSABLE (istart0) = 1;
3885 TREE_ADDRESSABLE (iend0) = 1;
3887 /* See if we need to bias by LLONG_MIN. */
3888 if (fd->iter_type == long_long_unsigned_type_node
3889 && TREE_CODE (type) == INTEGER_TYPE
3890 && !TYPE_UNSIGNED (type)
3891 && fd->ordered == 0)
3893 tree n1, n2;
3895 if (fd->loop.cond_code == LT_EXPR)
3897 n1 = fd->loop.n1;
3898 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3900 else
3902 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3903 n2 = fd->loop.n1;
3905 if (TREE_CODE (n1) != INTEGER_CST
3906 || TREE_CODE (n2) != INTEGER_CST
3907 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3908 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3911 gimple_stmt_iterator gsif = gsi;
3912 gsi_prev (&gsif);
3914 tree arr = NULL_TREE;
3915 if (in_combined_parallel)
3917 gcc_assert (fd->ordered == 0);
3918 /* In a combined parallel loop, emit a call to
3919 GOMP_loop_foo_next. */
3920 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3921 build_fold_addr_expr (istart0),
3922 build_fold_addr_expr (iend0));
3924 else
3926 tree t0, t1, t2, t3, t4;
3927 /* If this is not a combined parallel loop, emit a call to
3928 GOMP_loop_foo_start in ENTRY_BB. */
3929 t4 = build_fold_addr_expr (iend0);
3930 t3 = build_fold_addr_expr (istart0);
3931 if (fd->ordered)
3933 t0 = build_int_cst (unsigned_type_node,
3934 fd->ordered - fd->collapse + 1);
3935 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3936 fd->ordered
3937 - fd->collapse + 1),
3938 ".omp_counts");
3939 DECL_NAMELESS (arr) = 1;
3940 TREE_ADDRESSABLE (arr) = 1;
3941 TREE_STATIC (arr) = 1;
3942 vec<constructor_elt, va_gc> *v;
3943 vec_alloc (v, fd->ordered - fd->collapse + 1);
3944 int idx;
3946 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3948 tree c;
3949 if (idx == 0 && fd->collapse > 1)
3950 c = fd->loop.n2;
3951 else
3952 c = counts[idx + fd->collapse - 1];
3953 tree purpose = size_int (idx);
3954 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3955 if (TREE_CODE (c) != INTEGER_CST)
3956 TREE_STATIC (arr) = 0;
3959 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3960 if (!TREE_STATIC (arr))
3961 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3962 void_type_node, arr),
3963 true, NULL_TREE, true, GSI_SAME_STMT);
3964 t1 = build_fold_addr_expr (arr);
3965 t2 = NULL_TREE;
3967 else
3969 t2 = fold_convert (fd->iter_type, fd->loop.step);
3970 t1 = fd->loop.n2;
3971 t0 = fd->loop.n1;
3972 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3974 tree innerc
3975 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3976 OMP_CLAUSE__LOOPTEMP_);
3977 gcc_assert (innerc);
3978 t0 = OMP_CLAUSE_DECL (innerc);
3979 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3980 OMP_CLAUSE__LOOPTEMP_);
3981 gcc_assert (innerc);
3982 t1 = OMP_CLAUSE_DECL (innerc);
3984 if (POINTER_TYPE_P (TREE_TYPE (t0))
3985 && TYPE_PRECISION (TREE_TYPE (t0))
3986 != TYPE_PRECISION (fd->iter_type))
3988 /* Avoid casting pointers to integer of a different size. */
3989 tree itype = signed_type_for (type);
3990 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3991 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3993 else
3995 t1 = fold_convert (fd->iter_type, t1);
3996 t0 = fold_convert (fd->iter_type, t0);
3998 if (bias)
4000 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4001 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4004 if (fd->iter_type == long_integer_type_node || fd->ordered)
4006 if (fd->chunk_size)
4008 t = fold_convert (fd->iter_type, fd->chunk_size);
4009 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4010 if (sched_arg)
4012 if (fd->ordered)
4013 t = build_call_expr (builtin_decl_explicit (start_fn),
4014 8, t0, t1, sched_arg, t, t3, t4,
4015 reductions, mem);
4016 else
4017 t = build_call_expr (builtin_decl_explicit (start_fn),
4018 9, t0, t1, t2, sched_arg, t, t3, t4,
4019 reductions, mem);
4021 else if (fd->ordered)
4022 t = build_call_expr (builtin_decl_explicit (start_fn),
4023 5, t0, t1, t, t3, t4);
4024 else
4025 t = build_call_expr (builtin_decl_explicit (start_fn),
4026 6, t0, t1, t2, t, t3, t4);
4028 else if (fd->ordered)
4029 t = build_call_expr (builtin_decl_explicit (start_fn),
4030 4, t0, t1, t3, t4);
4031 else
4032 t = build_call_expr (builtin_decl_explicit (start_fn),
4033 5, t0, t1, t2, t3, t4);
4035 else
4037 tree t5;
4038 tree c_bool_type;
4039 tree bfn_decl;
4041 /* The GOMP_loop_ull_*start functions have additional boolean
4042 argument, true for < loops and false for > loops.
4043 In Fortran, the C bool type can be different from
4044 boolean_type_node. */
4045 bfn_decl = builtin_decl_explicit (start_fn);
4046 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4047 t5 = build_int_cst (c_bool_type,
4048 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4049 if (fd->chunk_size)
4051 tree bfn_decl = builtin_decl_explicit (start_fn);
4052 t = fold_convert (fd->iter_type, fd->chunk_size);
4053 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4054 if (sched_arg)
4055 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4056 t, t3, t4, reductions, mem);
4057 else
4058 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4060 else
4061 t = build_call_expr (builtin_decl_explicit (start_fn),
4062 6, t5, t0, t1, t2, t3, t4);
4065 if (TREE_TYPE (t) != boolean_type_node)
4066 t = fold_build2 (NE_EXPR, boolean_type_node,
4067 t, build_int_cst (TREE_TYPE (t), 0));
4068 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4069 true, GSI_SAME_STMT);
4070 if (arr && !TREE_STATIC (arr))
4072 tree clobber = build_clobber (TREE_TYPE (arr));
4073 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4074 GSI_SAME_STMT);
4076 if (fd->have_pointer_condtemp)
4077 expand_omp_build_assign (&gsi, condtemp, memv, false);
4078 if (fd->have_reductemp)
4080 gimple *g = gsi_stmt (gsi);
4081 gsi_remove (&gsi, true);
4082 release_ssa_name (gimple_assign_lhs (g));
4084 entry_bb = region->entry;
4085 gsi = gsi_last_nondebug_bb (entry_bb);
4087 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4089 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4091 /* Remove the GIMPLE_OMP_FOR statement. */
4092 gsi_remove (&gsi, true);
4094 if (gsi_end_p (gsif))
4095 gsif = gsi_after_labels (gsi_bb (gsif));
4096 gsi_next (&gsif);
4098 /* Iteration setup for sequential loop goes in L0_BB. */
4099 tree startvar = fd->loop.v;
4100 tree endvar = NULL_TREE;
4102 if (gimple_omp_for_combined_p (fd->for_stmt))
4104 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4105 && gimple_omp_for_kind (inner_stmt)
4106 == GF_OMP_FOR_KIND_SIMD);
4107 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4108 OMP_CLAUSE__LOOPTEMP_);
4109 gcc_assert (innerc);
4110 startvar = OMP_CLAUSE_DECL (innerc);
4111 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4112 OMP_CLAUSE__LOOPTEMP_);
4113 gcc_assert (innerc);
4114 endvar = OMP_CLAUSE_DECL (innerc);
4117 gsi = gsi_start_bb (l0_bb);
4118 t = istart0;
4119 if (fd->ordered && fd->collapse == 1)
4120 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4121 fold_convert (fd->iter_type, fd->loop.step));
4122 else if (bias)
4123 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4124 if (fd->ordered && fd->collapse == 1)
4126 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4127 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4128 fd->loop.n1, fold_convert (sizetype, t));
4129 else
4131 t = fold_convert (TREE_TYPE (startvar), t);
4132 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4133 fd->loop.n1, t);
4136 else
4138 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4139 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4140 t = fold_convert (TREE_TYPE (startvar), t);
4142 t = force_gimple_operand_gsi (&gsi, t,
4143 DECL_P (startvar)
4144 && TREE_ADDRESSABLE (startvar),
4145 NULL_TREE, false, GSI_CONTINUE_LINKING);
4146 assign_stmt = gimple_build_assign (startvar, t);
4147 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4148 if (cond_var)
4150 tree itype = TREE_TYPE (cond_var);
4151 /* For lastprivate(conditional:) itervar, we need some iteration
4152 counter that starts at unsigned non-zero and increases.
4153 Prefer as few IVs as possible, so if we can use startvar
4154 itself, use that, or startvar + constant (those would be
4155 incremented with step), and as last resort use the s0 + 1
4156 incremented by 1. */
4157 if ((fd->ordered && fd->collapse == 1)
4158 || bias
4159 || POINTER_TYPE_P (type)
4160 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4161 || fd->loop.cond_code != LT_EXPR)
4162 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4163 build_int_cst (itype, 1));
4164 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4165 t = fold_convert (itype, t);
4166 else
4168 tree c = fold_convert (itype, fd->loop.n1);
4169 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4170 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4172 t = force_gimple_operand_gsi (&gsi, t, false,
4173 NULL_TREE, false, GSI_CONTINUE_LINKING);
4174 assign_stmt = gimple_build_assign (cond_var, t);
4175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4178 t = iend0;
4179 if (fd->ordered && fd->collapse == 1)
4180 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4181 fold_convert (fd->iter_type, fd->loop.step));
4182 else if (bias)
4183 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4184 if (fd->ordered && fd->collapse == 1)
4186 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4187 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4188 fd->loop.n1, fold_convert (sizetype, t));
4189 else
4191 t = fold_convert (TREE_TYPE (startvar), t);
4192 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4193 fd->loop.n1, t);
4196 else
4198 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4199 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4200 t = fold_convert (TREE_TYPE (startvar), t);
4202 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4203 false, GSI_CONTINUE_LINKING);
4204 if (endvar)
4206 assign_stmt = gimple_build_assign (endvar, iend);
4207 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4208 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4209 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4210 else
4211 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4212 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4214 /* Handle linear clause adjustments. */
4215 tree itercnt = NULL_TREE;
4216 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4217 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4218 c; c = OMP_CLAUSE_CHAIN (c))
4219 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4220 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4222 tree d = OMP_CLAUSE_DECL (c);
4223 bool is_ref = omp_is_reference (d);
4224 tree t = d, a, dest;
4225 if (is_ref)
4226 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4227 tree type = TREE_TYPE (t);
4228 if (POINTER_TYPE_P (type))
4229 type = sizetype;
4230 dest = unshare_expr (t);
4231 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4232 expand_omp_build_assign (&gsif, v, t);
4233 if (itercnt == NULL_TREE)
4235 itercnt = startvar;
4236 tree n1 = fd->loop.n1;
4237 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4239 itercnt
4240 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4241 itercnt);
4242 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4244 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4245 itercnt, n1);
4246 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4247 itercnt, fd->loop.step);
4248 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4249 NULL_TREE, false,
4250 GSI_CONTINUE_LINKING);
4252 a = fold_build2 (MULT_EXPR, type,
4253 fold_convert (type, itercnt),
4254 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4255 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4256 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4257 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4258 false, GSI_CONTINUE_LINKING);
4259 expand_omp_build_assign (&gsi, dest, t, true);
4261 if (fd->collapse > 1)
4262 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4264 if (fd->ordered)
4266 /* Until now, counts array contained number of iterations or
4267 variable containing it for ith loop. From now on, we need
4268 those counts only for collapsed loops, and only for the 2nd
4269 till the last collapsed one. Move those one element earlier,
4270 we'll use counts[fd->collapse - 1] for the first source/sink
4271 iteration counter and so on and counts[fd->ordered]
4272 as the array holding the current counter values for
4273 depend(source). */
4274 if (fd->collapse > 1)
4275 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4276 if (broken_loop)
4278 int i;
4279 for (i = fd->collapse; i < fd->ordered; i++)
4281 tree type = TREE_TYPE (fd->loops[i].v);
4282 tree this_cond
4283 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4284 fold_convert (type, fd->loops[i].n1),
4285 fold_convert (type, fd->loops[i].n2));
4286 if (!integer_onep (this_cond))
4287 break;
4289 if (i < fd->ordered)
4291 cont_bb
4292 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4293 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4294 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4295 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4296 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4297 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4298 make_edge (cont_bb, l1_bb, 0);
4299 l2_bb = create_empty_bb (cont_bb);
4300 broken_loop = false;
4303 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4304 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4305 ordered_lastprivate);
4306 if (counts[fd->collapse - 1])
4308 gcc_assert (fd->collapse == 1);
4309 gsi = gsi_last_bb (l0_bb);
4310 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4311 istart0, true);
4312 gsi = gsi_last_bb (cont_bb);
4313 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4314 build_int_cst (fd->iter_type, 1));
4315 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4316 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4317 size_zero_node, NULL_TREE, NULL_TREE);
4318 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4319 t = counts[fd->collapse - 1];
4321 else if (fd->collapse > 1)
4322 t = fd->loop.v;
4323 else
4325 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4326 fd->loops[0].v, fd->loops[0].n1);
4327 t = fold_convert (fd->iter_type, t);
4329 gsi = gsi_last_bb (l0_bb);
4330 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4331 size_zero_node, NULL_TREE, NULL_TREE);
4332 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4333 false, GSI_CONTINUE_LINKING);
4334 expand_omp_build_assign (&gsi, aref, t, true);
4337 if (!broken_loop)
4339 /* Code to control the increment and predicate for the sequential
4340 loop goes in the CONT_BB. */
4341 gsi = gsi_last_nondebug_bb (cont_bb);
4342 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4343 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4344 vmain = gimple_omp_continue_control_use (cont_stmt);
4345 vback = gimple_omp_continue_control_def (cont_stmt);
4347 if (cond_var)
4349 tree itype = TREE_TYPE (cond_var);
4350 tree t2;
4351 if ((fd->ordered && fd->collapse == 1)
4352 || bias
4353 || POINTER_TYPE_P (type)
4354 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4355 || fd->loop.cond_code != LT_EXPR)
4356 t2 = build_int_cst (itype, 1);
4357 else
4358 t2 = fold_convert (itype, fd->loop.step);
4359 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4360 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4361 NULL_TREE, true, GSI_SAME_STMT);
4362 assign_stmt = gimple_build_assign (cond_var, t2);
4363 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4366 if (!gimple_omp_for_combined_p (fd->for_stmt))
4368 if (POINTER_TYPE_P (type))
4369 t = fold_build_pointer_plus (vmain, fd->loop.step);
4370 else
4371 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4372 t = force_gimple_operand_gsi (&gsi, t,
4373 DECL_P (vback)
4374 && TREE_ADDRESSABLE (vback),
4375 NULL_TREE, true, GSI_SAME_STMT);
4376 assign_stmt = gimple_build_assign (vback, t);
4377 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4379 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4381 tree tem;
4382 if (fd->collapse > 1)
4383 tem = fd->loop.v;
4384 else
4386 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4387 fd->loops[0].v, fd->loops[0].n1);
4388 tem = fold_convert (fd->iter_type, tem);
4390 tree aref = build4 (ARRAY_REF, fd->iter_type,
4391 counts[fd->ordered], size_zero_node,
4392 NULL_TREE, NULL_TREE);
4393 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4394 true, GSI_SAME_STMT);
4395 expand_omp_build_assign (&gsi, aref, tem);
4398 t = build2 (fd->loop.cond_code, boolean_type_node,
4399 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4400 iend);
4401 gcond *cond_stmt = gimple_build_cond_empty (t);
4402 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4405 /* Remove GIMPLE_OMP_CONTINUE. */
4406 gsi_remove (&gsi, true);
4408 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4409 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4411 /* Emit code to get the next parallel iteration in L2_BB. */
4412 gsi = gsi_start_bb (l2_bb);
4414 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4415 build_fold_addr_expr (istart0),
4416 build_fold_addr_expr (iend0));
4417 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4418 false, GSI_CONTINUE_LINKING);
4419 if (TREE_TYPE (t) != boolean_type_node)
4420 t = fold_build2 (NE_EXPR, boolean_type_node,
4421 t, build_int_cst (TREE_TYPE (t), 0));
4422 gcond *cond_stmt = gimple_build_cond_empty (t);
4423 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4426 /* Add the loop cleanup function. */
4427 gsi = gsi_last_nondebug_bb (exit_bb);
4428 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4429 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4430 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4431 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4432 else
4433 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4434 gcall *call_stmt = gimple_build_call (t, 0);
4435 if (fd->ordered)
4437 tree arr = counts[fd->ordered];
4438 tree clobber = build_clobber (TREE_TYPE (arr));
4439 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4440 GSI_SAME_STMT);
4442 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4444 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4445 if (fd->have_reductemp)
4447 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4448 gimple_call_lhs (call_stmt));
4449 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4452 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4453 gsi_remove (&gsi, true);
4455 /* Connect the new blocks. */
4456 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4457 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4459 if (!broken_loop)
4461 gimple_seq phis;
4463 e = find_edge (cont_bb, l3_bb);
4464 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4466 phis = phi_nodes (l3_bb);
4467 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4469 gimple *phi = gsi_stmt (gsi);
4470 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4471 PHI_ARG_DEF_FROM_EDGE (phi, e));
4473 remove_edge (e);
4475 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4476 e = find_edge (cont_bb, l1_bb);
4477 if (e == NULL)
4479 e = BRANCH_EDGE (cont_bb);
4480 gcc_assert (single_succ (e->dest) == l1_bb);
4482 if (gimple_omp_for_combined_p (fd->for_stmt))
4484 remove_edge (e);
4485 e = NULL;
4487 else if (fd->collapse > 1)
4489 remove_edge (e);
4490 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4492 else
4493 e->flags = EDGE_TRUE_VALUE;
4494 if (e)
4496 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4497 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4499 else
4501 e = find_edge (cont_bb, l2_bb);
4502 e->flags = EDGE_FALLTHRU;
4504 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4506 if (gimple_in_ssa_p (cfun))
4508 /* Add phis to the outer loop that connect to the phis in the inner,
4509 original loop, and move the loop entry value of the inner phi to
4510 the loop entry value of the outer phi. */
4511 gphi_iterator psi;
4512 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4514 location_t locus;
4515 gphi *nphi;
4516 gphi *exit_phi = psi.phi ();
4518 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4519 continue;
4521 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4522 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4524 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4525 edge latch_to_l1 = find_edge (latch, l1_bb);
4526 gphi *inner_phi
4527 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4529 tree t = gimple_phi_result (exit_phi);
4530 tree new_res = copy_ssa_name (t, NULL);
4531 nphi = create_phi_node (new_res, l0_bb);
4533 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4534 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4535 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4536 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4537 add_phi_arg (nphi, t, entry_to_l0, locus);
4539 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4540 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4542 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4546 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4547 recompute_dominator (CDI_DOMINATORS, l2_bb));
4548 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4549 recompute_dominator (CDI_DOMINATORS, l3_bb));
4550 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4551 recompute_dominator (CDI_DOMINATORS, l0_bb));
4552 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4553 recompute_dominator (CDI_DOMINATORS, l1_bb));
4555 /* We enter expand_omp_for_generic with a loop. This original loop may
4556 have its own loop struct, or it may be part of an outer loop struct
4557 (which may be the fake loop). */
4558 class loop *outer_loop = entry_bb->loop_father;
4559 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4561 add_bb_to_loop (l2_bb, outer_loop);
4563 /* We've added a new loop around the original loop. Allocate the
4564 corresponding loop struct. */
4565 class loop *new_loop = alloc_loop ();
4566 new_loop->header = l0_bb;
4567 new_loop->latch = l2_bb;
4568 add_loop (new_loop, outer_loop);
4570 /* Allocate a loop structure for the original loop unless we already
4571 had one. */
4572 if (!orig_loop_has_loop_struct
4573 && !gimple_omp_for_combined_p (fd->for_stmt))
4575 class loop *orig_loop = alloc_loop ();
4576 orig_loop->header = l1_bb;
4577 /* The loop may have multiple latches. */
4578 add_loop (orig_loop, new_loop);
4583 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4584 compute needed allocation size. If !ALLOC of team allocations,
4585 if ALLOC of thread allocation. SZ is the initial needed size for
4586 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4587 CNT number of elements of each array, for !ALLOC this is
4588 omp_get_num_threads (), for ALLOC number of iterations handled by the
4589 current thread. If PTR is non-NULL, it is the start of the allocation
4590 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4591 clauses pointers to the corresponding arrays. */
4593 static tree
4594 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4595 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4596 gimple_stmt_iterator *gsi, bool alloc)
4598 tree eltsz = NULL_TREE;
4599 unsigned HOST_WIDE_INT preval = 0;
4600 if (ptr && sz)
4601 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4602 ptr, size_int (sz));
4603 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4604 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4605 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4606 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4608 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4609 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4610 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4612 unsigned HOST_WIDE_INT szl
4613 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4614 szl = least_bit_hwi (szl);
4615 if (szl)
4616 al = MIN (al, szl);
4618 if (ptr == NULL_TREE)
4620 if (eltsz == NULL_TREE)
4621 eltsz = TYPE_SIZE_UNIT (pointee_type);
4622 else
4623 eltsz = size_binop (PLUS_EXPR, eltsz,
4624 TYPE_SIZE_UNIT (pointee_type));
4626 if (preval == 0 && al <= alloc_align)
4628 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4629 sz += diff;
4630 if (diff && ptr)
4631 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4632 ptr, size_int (diff));
4634 else if (al > preval)
4636 if (ptr)
4638 ptr = fold_convert (pointer_sized_int_node, ptr);
4639 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4640 build_int_cst (pointer_sized_int_node,
4641 al - 1));
4642 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4643 build_int_cst (pointer_sized_int_node,
4644 -(HOST_WIDE_INT) al));
4645 ptr = fold_convert (ptr_type_node, ptr);
4647 else
4648 sz += al - 1;
4650 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4651 preval = al;
4652 else
4653 preval = 1;
4654 if (ptr)
4656 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4657 ptr = OMP_CLAUSE_DECL (c);
4658 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4659 size_binop (MULT_EXPR, cnt,
4660 TYPE_SIZE_UNIT (pointee_type)));
4664 if (ptr == NULL_TREE)
4666 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4667 if (sz)
4668 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4669 return eltsz;
4671 else
4672 return ptr;
4675 /* Return the last _looptemp_ clause if one has been created for
4676 lastprivate on distribute parallel for{, simd} or taskloop.
4677 FD is the loop data and INNERC should be the second _looptemp_
4678 clause (the one holding the end of the range).
4679 This is followed by collapse - 1 _looptemp_ clauses for the
4680 counts[1] and up, and for triangular loops followed by 4
4681 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4682 one factor and one adjn1). After this there is optionally one
4683 _looptemp_ clause that this function returns. */
4685 static tree
4686 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4688 gcc_assert (innerc);
4689 int count = fd->collapse - 1;
4690 if (fd->non_rect
4691 && fd->last_nonrect == fd->first_nonrect + 1
4692 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4693 count += 4;
4694 for (int i = 0; i < count; i++)
4696 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4697 OMP_CLAUSE__LOOPTEMP_);
4698 gcc_assert (innerc);
4700 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4701 OMP_CLAUSE__LOOPTEMP_);
4704 /* A subroutine of expand_omp_for. Generate code for a parallel
4705 loop with static schedule and no specified chunk size. Given
4706 parameters:
4708 for (V = N1; V cond N2; V += STEP) BODY;
4710 where COND is "<" or ">", we generate pseudocode
4712 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4713 if (cond is <)
4714 adj = STEP - 1;
4715 else
4716 adj = STEP + 1;
4717 if ((__typeof (V)) -1 > 0 && cond is >)
4718 n = -(adj + N2 - N1) / -STEP;
4719 else
4720 n = (adj + N2 - N1) / STEP;
4721 q = n / nthreads;
4722 tt = n % nthreads;
4723 if (threadid < tt) goto L3; else goto L4;
4725 tt = 0;
4726 q = q + 1;
4728 s0 = q * threadid + tt;
4729 e0 = s0 + q;
4730 V = s0 * STEP + N1;
4731 if (s0 >= e0) goto L2; else goto L0;
4733 e = e0 * STEP + N1;
4735 BODY;
4736 V += STEP;
4737 if (V cond e) goto L1;
4741 static void
4742 expand_omp_for_static_nochunk (struct omp_region *region,
4743 struct omp_for_data *fd,
4744 gimple *inner_stmt)
4746 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4747 tree type, itype, vmain, vback;
4748 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4749 basic_block body_bb, cont_bb, collapse_bb = NULL;
4750 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4751 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4752 gimple_stmt_iterator gsi, gsip;
4753 edge ep;
4754 bool broken_loop = region->cont == NULL;
4755 tree *counts = NULL;
4756 tree n1, n2, step;
4757 tree reductions = NULL_TREE;
4758 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4760 itype = type = TREE_TYPE (fd->loop.v);
4761 if (POINTER_TYPE_P (type))
4762 itype = signed_type_for (type);
4764 entry_bb = region->entry;
4765 cont_bb = region->cont;
4766 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4767 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4768 gcc_assert (broken_loop
4769 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4770 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4771 body_bb = single_succ (seq_start_bb);
4772 if (!broken_loop)
4774 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4775 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4776 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4778 exit_bb = region->exit;
4780 /* Iteration space partitioning goes in ENTRY_BB. */
4781 gsi = gsi_last_nondebug_bb (entry_bb);
4782 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4783 gsip = gsi;
4784 gsi_prev (&gsip);
4786 if (fd->collapse > 1)
4788 int first_zero_iter = -1, dummy = -1;
4789 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4791 counts = XALLOCAVEC (tree, fd->collapse);
4792 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4793 fin_bb, first_zero_iter,
4794 dummy_bb, dummy, l2_dom_bb);
4795 t = NULL_TREE;
4797 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4798 t = integer_one_node;
4799 else
4800 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4801 fold_convert (type, fd->loop.n1),
4802 fold_convert (type, fd->loop.n2));
4803 if (fd->collapse == 1
4804 && TYPE_UNSIGNED (type)
4805 && (t == NULL_TREE || !integer_onep (t)))
4807 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4808 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4809 true, GSI_SAME_STMT);
4810 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4811 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4812 true, GSI_SAME_STMT);
4813 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4814 NULL_TREE, NULL_TREE);
4815 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4816 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4817 expand_omp_regimplify_p, NULL, NULL)
4818 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4819 expand_omp_regimplify_p, NULL, NULL))
4821 gsi = gsi_for_stmt (cond_stmt);
4822 gimple_regimplify_operands (cond_stmt, &gsi);
4824 ep = split_block (entry_bb, cond_stmt);
4825 ep->flags = EDGE_TRUE_VALUE;
4826 entry_bb = ep->dest;
4827 ep->probability = profile_probability::very_likely ();
4828 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4829 ep->probability = profile_probability::very_unlikely ();
4830 if (gimple_in_ssa_p (cfun))
4832 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4833 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4834 !gsi_end_p (gpi); gsi_next (&gpi))
4836 gphi *phi = gpi.phi ();
4837 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4838 ep, UNKNOWN_LOCATION);
4841 gsi = gsi_last_bb (entry_bb);
4844 if (fd->lastprivate_conditional)
4846 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4847 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4848 if (fd->have_pointer_condtemp)
4849 condtemp = OMP_CLAUSE_DECL (c);
4850 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4851 cond_var = OMP_CLAUSE_DECL (c);
4853 if (fd->have_reductemp
4854 /* For scan, we don't want to reinitialize condtemp before the
4855 second loop. */
4856 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4857 || fd->have_nonctrl_scantemp)
4859 tree t1 = build_int_cst (long_integer_type_node, 0);
4860 tree t2 = build_int_cst (long_integer_type_node, 1);
4861 tree t3 = build_int_cstu (long_integer_type_node,
4862 (HOST_WIDE_INT_1U << 31) + 1);
4863 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4864 gimple_stmt_iterator gsi2 = gsi_none ();
4865 gimple *g = NULL;
4866 tree mem = null_pointer_node, memv = NULL_TREE;
4867 unsigned HOST_WIDE_INT condtemp_sz = 0;
4868 unsigned HOST_WIDE_INT alloc_align = 0;
4869 if (fd->have_reductemp)
4871 gcc_assert (!fd->have_nonctrl_scantemp);
4872 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4873 reductions = OMP_CLAUSE_DECL (c);
4874 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4875 g = SSA_NAME_DEF_STMT (reductions);
4876 reductions = gimple_assign_rhs1 (g);
4877 OMP_CLAUSE_DECL (c) = reductions;
4878 gsi2 = gsi_for_stmt (g);
4880 else
4882 if (gsi_end_p (gsip))
4883 gsi2 = gsi_after_labels (region->entry);
4884 else
4885 gsi2 = gsip;
4886 reductions = null_pointer_node;
4888 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4890 tree type;
4891 if (fd->have_pointer_condtemp)
4892 type = TREE_TYPE (condtemp);
4893 else
4894 type = ptr_type_node;
4895 memv = create_tmp_var (type);
4896 TREE_ADDRESSABLE (memv) = 1;
4897 unsigned HOST_WIDE_INT sz = 0;
4898 tree size = NULL_TREE;
4899 if (fd->have_pointer_condtemp)
4901 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4902 sz *= fd->lastprivate_conditional;
4903 condtemp_sz = sz;
4905 if (fd->have_nonctrl_scantemp)
4907 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4908 gimple *g = gimple_build_call (nthreads, 0);
4909 nthreads = create_tmp_var (integer_type_node);
4910 gimple_call_set_lhs (g, nthreads);
4911 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4912 nthreads = fold_convert (sizetype, nthreads);
4913 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4914 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4915 alloc_align, nthreads, NULL,
4916 false);
4917 size = fold_convert (type, size);
4919 else
4920 size = build_int_cst (type, sz);
4921 expand_omp_build_assign (&gsi2, memv, size, false);
4922 mem = build_fold_addr_expr (memv);
4924 tree t
4925 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4926 9, t1, t2, t2, t3, t1, null_pointer_node,
4927 null_pointer_node, reductions, mem);
4928 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4929 true, GSI_SAME_STMT);
4930 if (fd->have_pointer_condtemp)
4931 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4932 if (fd->have_nonctrl_scantemp)
4934 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4935 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4936 alloc_align, nthreads, &gsi2, false);
4938 if (fd->have_reductemp)
4940 gsi_remove (&gsi2, true);
4941 release_ssa_name (gimple_assign_lhs (g));
4944 switch (gimple_omp_for_kind (fd->for_stmt))
4946 case GF_OMP_FOR_KIND_FOR:
4947 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4948 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4949 break;
4950 case GF_OMP_FOR_KIND_DISTRIBUTE:
4951 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4952 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4953 break;
4954 default:
4955 gcc_unreachable ();
4957 nthreads = build_call_expr (nthreads, 0);
4958 nthreads = fold_convert (itype, nthreads);
4959 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4960 true, GSI_SAME_STMT);
4961 threadid = build_call_expr (threadid, 0);
4962 threadid = fold_convert (itype, threadid);
4963 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4964 true, GSI_SAME_STMT);
4966 n1 = fd->loop.n1;
4967 n2 = fd->loop.n2;
4968 step = fd->loop.step;
4969 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4971 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4972 OMP_CLAUSE__LOOPTEMP_);
4973 gcc_assert (innerc);
4974 n1 = OMP_CLAUSE_DECL (innerc);
4975 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4976 OMP_CLAUSE__LOOPTEMP_);
4977 gcc_assert (innerc);
4978 n2 = OMP_CLAUSE_DECL (innerc);
4980 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4981 true, NULL_TREE, true, GSI_SAME_STMT);
4982 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4983 true, NULL_TREE, true, GSI_SAME_STMT);
4984 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4985 true, NULL_TREE, true, GSI_SAME_STMT);
4987 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4988 t = fold_build2 (PLUS_EXPR, itype, step, t);
4989 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4990 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4991 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4992 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4993 fold_build1 (NEGATE_EXPR, itype, t),
4994 fold_build1 (NEGATE_EXPR, itype, step));
4995 else
4996 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4997 t = fold_convert (itype, t);
4998 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5000 q = create_tmp_reg (itype, "q");
5001 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5002 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5003 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5005 tt = create_tmp_reg (itype, "tt");
5006 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5007 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5008 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5010 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5011 gcond *cond_stmt = gimple_build_cond_empty (t);
5012 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5014 second_bb = split_block (entry_bb, cond_stmt)->dest;
5015 gsi = gsi_last_nondebug_bb (second_bb);
5016 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5018 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5019 GSI_SAME_STMT);
5020 gassign *assign_stmt
5021 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5022 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5024 third_bb = split_block (second_bb, assign_stmt)->dest;
5025 gsi = gsi_last_nondebug_bb (third_bb);
5026 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5028 if (fd->have_nonctrl_scantemp)
5030 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5031 tree controlp = NULL_TREE, controlb = NULL_TREE;
5032 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5033 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5034 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5036 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5037 controlb = OMP_CLAUSE_DECL (c);
5038 else
5039 controlp = OMP_CLAUSE_DECL (c);
5040 if (controlb && controlp)
5041 break;
5043 gcc_assert (controlp && controlb);
5044 tree cnt = create_tmp_var (sizetype);
5045 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5046 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5047 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5048 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5049 alloc_align, cnt, NULL, true);
5050 tree size = create_tmp_var (sizetype);
5051 expand_omp_build_assign (&gsi, size, sz, false);
5052 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5053 size, size_int (16384));
5054 expand_omp_build_assign (&gsi, controlb, cmp);
5055 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5056 NULL_TREE, NULL_TREE);
5057 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5058 fourth_bb = split_block (third_bb, g)->dest;
5059 gsi = gsi_last_nondebug_bb (fourth_bb);
5060 /* FIXME: Once we have allocators, this should use allocator. */
5061 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5062 gimple_call_set_lhs (g, controlp);
5063 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5064 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5065 &gsi, true);
5066 gsi_prev (&gsi);
5067 g = gsi_stmt (gsi);
5068 fifth_bb = split_block (fourth_bb, g)->dest;
5069 gsi = gsi_last_nondebug_bb (fifth_bb);
5071 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5072 gimple_call_set_lhs (g, controlp);
5073 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5074 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5075 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5076 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5077 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5079 tree tmp = create_tmp_var (sizetype);
5080 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5081 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5082 TYPE_SIZE_UNIT (pointee_type));
5083 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5084 g = gimple_build_call (alloca_decl, 2, tmp,
5085 size_int (TYPE_ALIGN (pointee_type)));
5086 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5087 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5090 sixth_bb = split_block (fifth_bb, g)->dest;
5091 gsi = gsi_last_nondebug_bb (sixth_bb);
5094 t = build2 (MULT_EXPR, itype, q, threadid);
5095 t = build2 (PLUS_EXPR, itype, t, tt);
5096 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5098 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5099 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5101 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5102 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5104 /* Remove the GIMPLE_OMP_FOR statement. */
5105 gsi_remove (&gsi, true);
5107 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5108 gsi = gsi_start_bb (seq_start_bb);
5110 tree startvar = fd->loop.v;
5111 tree endvar = NULL_TREE;
5113 if (gimple_omp_for_combined_p (fd->for_stmt))
5115 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5116 ? gimple_omp_parallel_clauses (inner_stmt)
5117 : gimple_omp_for_clauses (inner_stmt);
5118 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5119 gcc_assert (innerc);
5120 startvar = OMP_CLAUSE_DECL (innerc);
5121 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5122 OMP_CLAUSE__LOOPTEMP_);
5123 gcc_assert (innerc);
5124 endvar = OMP_CLAUSE_DECL (innerc);
5125 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5126 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5128 innerc = find_lastprivate_looptemp (fd, innerc);
5129 if (innerc)
5131 /* If needed (distribute parallel for with lastprivate),
5132 propagate down the total number of iterations. */
5133 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5134 fd->loop.n2);
5135 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5136 GSI_CONTINUE_LINKING);
5137 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5138 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5142 t = fold_convert (itype, s0);
5143 t = fold_build2 (MULT_EXPR, itype, t, step);
5144 if (POINTER_TYPE_P (type))
5146 t = fold_build_pointer_plus (n1, t);
5147 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5148 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5149 t = fold_convert (signed_type_for (type), t);
5151 else
5152 t = fold_build2 (PLUS_EXPR, type, t, n1);
5153 t = fold_convert (TREE_TYPE (startvar), t);
5154 t = force_gimple_operand_gsi (&gsi, t,
5155 DECL_P (startvar)
5156 && TREE_ADDRESSABLE (startvar),
5157 NULL_TREE, false, GSI_CONTINUE_LINKING);
5158 assign_stmt = gimple_build_assign (startvar, t);
5159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5160 if (cond_var)
5162 tree itype = TREE_TYPE (cond_var);
5163 /* For lastprivate(conditional:) itervar, we need some iteration
5164 counter that starts at unsigned non-zero and increases.
5165 Prefer as few IVs as possible, so if we can use startvar
5166 itself, use that, or startvar + constant (those would be
5167 incremented with step), and as last resort use the s0 + 1
5168 incremented by 1. */
5169 if (POINTER_TYPE_P (type)
5170 || TREE_CODE (n1) != INTEGER_CST
5171 || fd->loop.cond_code != LT_EXPR)
5172 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5173 build_int_cst (itype, 1));
5174 else if (tree_int_cst_sgn (n1) == 1)
5175 t = fold_convert (itype, t);
5176 else
5178 tree c = fold_convert (itype, n1);
5179 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5180 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5182 t = force_gimple_operand_gsi (&gsi, t, false,
5183 NULL_TREE, false, GSI_CONTINUE_LINKING);
5184 assign_stmt = gimple_build_assign (cond_var, t);
5185 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5188 t = fold_convert (itype, e0);
5189 t = fold_build2 (MULT_EXPR, itype, t, step);
5190 if (POINTER_TYPE_P (type))
5192 t = fold_build_pointer_plus (n1, t);
5193 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5194 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5195 t = fold_convert (signed_type_for (type), t);
5197 else
5198 t = fold_build2 (PLUS_EXPR, type, t, n1);
5199 t = fold_convert (TREE_TYPE (startvar), t);
5200 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5201 false, GSI_CONTINUE_LINKING);
5202 if (endvar)
5204 assign_stmt = gimple_build_assign (endvar, e);
5205 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5206 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5207 assign_stmt = gimple_build_assign (fd->loop.v, e);
5208 else
5209 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5210 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5212 /* Handle linear clause adjustments. */
5213 tree itercnt = NULL_TREE;
5214 tree *nonrect_bounds = NULL;
5215 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5216 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5217 c; c = OMP_CLAUSE_CHAIN (c))
5218 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5219 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5221 tree d = OMP_CLAUSE_DECL (c);
5222 bool is_ref = omp_is_reference (d);
5223 tree t = d, a, dest;
5224 if (is_ref)
5225 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5226 if (itercnt == NULL_TREE)
5228 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5230 itercnt = fold_build2 (MINUS_EXPR, itype,
5231 fold_convert (itype, n1),
5232 fold_convert (itype, fd->loop.n1));
5233 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5234 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5235 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5236 NULL_TREE, false,
5237 GSI_CONTINUE_LINKING);
5239 else
5240 itercnt = s0;
5242 tree type = TREE_TYPE (t);
5243 if (POINTER_TYPE_P (type))
5244 type = sizetype;
5245 a = fold_build2 (MULT_EXPR, type,
5246 fold_convert (type, itercnt),
5247 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5248 dest = unshare_expr (t);
5249 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5250 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5251 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5252 false, GSI_CONTINUE_LINKING);
5253 expand_omp_build_assign (&gsi, dest, t, true);
5255 if (fd->collapse > 1)
5257 if (fd->non_rect)
5259 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5260 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5262 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5263 startvar);
5266 if (!broken_loop)
5268 /* The code controlling the sequential loop replaces the
5269 GIMPLE_OMP_CONTINUE. */
5270 gsi = gsi_last_nondebug_bb (cont_bb);
5271 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5272 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5273 vmain = gimple_omp_continue_control_use (cont_stmt);
5274 vback = gimple_omp_continue_control_def (cont_stmt);
5276 if (cond_var)
5278 tree itype = TREE_TYPE (cond_var);
5279 tree t2;
5280 if (POINTER_TYPE_P (type)
5281 || TREE_CODE (n1) != INTEGER_CST
5282 || fd->loop.cond_code != LT_EXPR)
5283 t2 = build_int_cst (itype, 1);
5284 else
5285 t2 = fold_convert (itype, step);
5286 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5287 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5288 NULL_TREE, true, GSI_SAME_STMT);
5289 assign_stmt = gimple_build_assign (cond_var, t2);
5290 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5293 if (!gimple_omp_for_combined_p (fd->for_stmt))
5295 if (POINTER_TYPE_P (type))
5296 t = fold_build_pointer_plus (vmain, step);
5297 else
5298 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5299 t = force_gimple_operand_gsi (&gsi, t,
5300 DECL_P (vback)
5301 && TREE_ADDRESSABLE (vback),
5302 NULL_TREE, true, GSI_SAME_STMT);
5303 assign_stmt = gimple_build_assign (vback, t);
5304 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5306 t = build2 (fd->loop.cond_code, boolean_type_node,
5307 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5308 ? t : vback, e);
5309 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5312 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5313 gsi_remove (&gsi, true);
5315 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5316 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5317 cont_bb, body_bb);
5320 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5321 gsi = gsi_last_nondebug_bb (exit_bb);
5322 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5324 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5325 if (fd->have_reductemp
5326 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5327 && !fd->have_nonctrl_scantemp))
5329 tree fn;
5330 if (t)
5331 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5332 else
5333 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5334 gcall *g = gimple_build_call (fn, 0);
5335 if (t)
5337 gimple_call_set_lhs (g, t);
5338 if (fd->have_reductemp)
5339 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5340 NOP_EXPR, t),
5341 GSI_SAME_STMT);
5343 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5345 else
5346 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5348 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5349 && !fd->have_nonctrl_scantemp)
5351 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5352 gcall *g = gimple_build_call (fn, 0);
5353 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5355 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5357 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5358 tree controlp = NULL_TREE, controlb = NULL_TREE;
5359 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5360 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5361 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5363 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5364 controlb = OMP_CLAUSE_DECL (c);
5365 else
5366 controlp = OMP_CLAUSE_DECL (c);
5367 if (controlb && controlp)
5368 break;
5370 gcc_assert (controlp && controlb);
5371 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5372 NULL_TREE, NULL_TREE);
5373 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5374 exit1_bb = split_block (exit_bb, g)->dest;
5375 gsi = gsi_after_labels (exit1_bb);
5376 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5377 controlp);
5378 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5379 exit2_bb = split_block (exit1_bb, g)->dest;
5380 gsi = gsi_after_labels (exit2_bb);
5381 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5382 controlp);
5383 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5384 exit3_bb = split_block (exit2_bb, g)->dest;
5385 gsi = gsi_after_labels (exit3_bb);
5387 gsi_remove (&gsi, true);
5389 /* Connect all the blocks. */
5390 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5391 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5392 ep = find_edge (entry_bb, second_bb);
5393 ep->flags = EDGE_TRUE_VALUE;
5394 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5395 if (fourth_bb)
5397 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5398 ep->probability
5399 = profile_probability::guessed_always ().apply_scale (1, 2);
5400 ep = find_edge (third_bb, fourth_bb);
5401 ep->flags = EDGE_TRUE_VALUE;
5402 ep->probability
5403 = profile_probability::guessed_always ().apply_scale (1, 2);
5404 ep = find_edge (fourth_bb, fifth_bb);
5405 redirect_edge_and_branch (ep, sixth_bb);
5407 else
5408 sixth_bb = third_bb;
5409 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5410 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5411 if (exit1_bb)
5413 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5414 ep->probability
5415 = profile_probability::guessed_always ().apply_scale (1, 2);
5416 ep = find_edge (exit_bb, exit1_bb);
5417 ep->flags = EDGE_TRUE_VALUE;
5418 ep->probability
5419 = profile_probability::guessed_always ().apply_scale (1, 2);
5420 ep = find_edge (exit1_bb, exit2_bb);
5421 redirect_edge_and_branch (ep, exit3_bb);
5424 if (!broken_loop)
5426 ep = find_edge (cont_bb, body_bb);
5427 if (ep == NULL)
5429 ep = BRANCH_EDGE (cont_bb);
5430 gcc_assert (single_succ (ep->dest) == body_bb);
5432 if (gimple_omp_for_combined_p (fd->for_stmt))
5434 remove_edge (ep);
5435 ep = NULL;
5437 else if (fd->collapse > 1)
5439 remove_edge (ep);
5440 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5442 else
5443 ep->flags = EDGE_TRUE_VALUE;
5444 find_edge (cont_bb, fin_bb)->flags
5445 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5448 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5449 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5450 if (fourth_bb)
5452 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5453 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5455 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5457 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5458 recompute_dominator (CDI_DOMINATORS, body_bb));
5459 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5460 recompute_dominator (CDI_DOMINATORS, fin_bb));
5461 if (exit1_bb)
5463 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5464 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5467 class loop *loop = body_bb->loop_father;
5468 if (loop != entry_bb->loop_father)
5470 gcc_assert (broken_loop || loop->header == body_bb);
5471 gcc_assert (broken_loop
5472 || loop->latch == region->cont
5473 || single_pred (loop->latch) == region->cont);
5474 return;
5477 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5479 loop = alloc_loop ();
5480 loop->header = body_bb;
5481 if (collapse_bb == NULL)
5482 loop->latch = cont_bb;
5483 add_loop (loop, body_bb->loop_father);
5487 /* Return phi in E->DEST with ARG on edge E. */
5489 static gphi *
5490 find_phi_with_arg_on_edge (tree arg, edge e)
5492 basic_block bb = e->dest;
5494 for (gphi_iterator gpi = gsi_start_phis (bb);
5495 !gsi_end_p (gpi);
5496 gsi_next (&gpi))
5498 gphi *phi = gpi.phi ();
5499 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5500 return phi;
5503 return NULL;
5506 /* A subroutine of expand_omp_for. Generate code for a parallel
5507 loop with static schedule and a specified chunk size. Given
5508 parameters:
5510 for (V = N1; V cond N2; V += STEP) BODY;
5512 where COND is "<" or ">", we generate pseudocode
5514 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5515 if (cond is <)
5516 adj = STEP - 1;
5517 else
5518 adj = STEP + 1;
5519 if ((__typeof (V)) -1 > 0 && cond is >)
5520 n = -(adj + N2 - N1) / -STEP;
5521 else
5522 n = (adj + N2 - N1) / STEP;
5523 trip = 0;
5524 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5525 here so that V is defined
5526 if the loop is not entered
5528 s0 = (trip * nthreads + threadid) * CHUNK;
5529 e0 = min (s0 + CHUNK, n);
5530 if (s0 < n) goto L1; else goto L4;
5532 V = s0 * STEP + N1;
5533 e = e0 * STEP + N1;
5535 BODY;
5536 V += STEP;
5537 if (V cond e) goto L2; else goto L3;
5539 trip += 1;
5540 goto L0;
5544 static void
5545 expand_omp_for_static_chunk (struct omp_region *region,
5546 struct omp_for_data *fd, gimple *inner_stmt)
5548 tree n, s0, e0, e, t;
5549 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5550 tree type, itype, vmain, vback, vextra;
5551 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5552 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5553 gimple_stmt_iterator gsi, gsip;
5554 edge se;
5555 bool broken_loop = region->cont == NULL;
5556 tree *counts = NULL;
5557 tree n1, n2, step;
5558 tree reductions = NULL_TREE;
5559 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5561 itype = type = TREE_TYPE (fd->loop.v);
5562 if (POINTER_TYPE_P (type))
5563 itype = signed_type_for (type);
5565 entry_bb = region->entry;
5566 se = split_block (entry_bb, last_stmt (entry_bb));
5567 entry_bb = se->src;
5568 iter_part_bb = se->dest;
5569 cont_bb = region->cont;
5570 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5571 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5572 gcc_assert (broken_loop
5573 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5574 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5575 body_bb = single_succ (seq_start_bb);
5576 if (!broken_loop)
5578 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5579 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5580 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5581 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5583 exit_bb = region->exit;
5585 /* Trip and adjustment setup goes in ENTRY_BB. */
5586 gsi = gsi_last_nondebug_bb (entry_bb);
5587 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5588 gsip = gsi;
5589 gsi_prev (&gsip);
5591 if (fd->collapse > 1)
5593 int first_zero_iter = -1, dummy = -1;
5594 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5596 counts = XALLOCAVEC (tree, fd->collapse);
5597 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5598 fin_bb, first_zero_iter,
5599 dummy_bb, dummy, l2_dom_bb);
5600 t = NULL_TREE;
5602 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5603 t = integer_one_node;
5604 else
5605 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5606 fold_convert (type, fd->loop.n1),
5607 fold_convert (type, fd->loop.n2));
5608 if (fd->collapse == 1
5609 && TYPE_UNSIGNED (type)
5610 && (t == NULL_TREE || !integer_onep (t)))
5612 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5613 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5614 true, GSI_SAME_STMT);
5615 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5616 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5617 true, GSI_SAME_STMT);
5618 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5619 NULL_TREE, NULL_TREE);
5620 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5621 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5622 expand_omp_regimplify_p, NULL, NULL)
5623 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5624 expand_omp_regimplify_p, NULL, NULL))
5626 gsi = gsi_for_stmt (cond_stmt);
5627 gimple_regimplify_operands (cond_stmt, &gsi);
5629 se = split_block (entry_bb, cond_stmt);
5630 se->flags = EDGE_TRUE_VALUE;
5631 entry_bb = se->dest;
5632 se->probability = profile_probability::very_likely ();
5633 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5634 se->probability = profile_probability::very_unlikely ();
5635 if (gimple_in_ssa_p (cfun))
5637 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5638 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5639 !gsi_end_p (gpi); gsi_next (&gpi))
5641 gphi *phi = gpi.phi ();
5642 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5643 se, UNKNOWN_LOCATION);
5646 gsi = gsi_last_bb (entry_bb);
5649 if (fd->lastprivate_conditional)
5651 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5652 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5653 if (fd->have_pointer_condtemp)
5654 condtemp = OMP_CLAUSE_DECL (c);
5655 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5656 cond_var = OMP_CLAUSE_DECL (c);
5658 if (fd->have_reductemp || fd->have_pointer_condtemp)
5660 tree t1 = build_int_cst (long_integer_type_node, 0);
5661 tree t2 = build_int_cst (long_integer_type_node, 1);
5662 tree t3 = build_int_cstu (long_integer_type_node,
5663 (HOST_WIDE_INT_1U << 31) + 1);
5664 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5665 gimple_stmt_iterator gsi2 = gsi_none ();
5666 gimple *g = NULL;
5667 tree mem = null_pointer_node, memv = NULL_TREE;
5668 if (fd->have_reductemp)
5670 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5671 reductions = OMP_CLAUSE_DECL (c);
5672 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5673 g = SSA_NAME_DEF_STMT (reductions);
5674 reductions = gimple_assign_rhs1 (g);
5675 OMP_CLAUSE_DECL (c) = reductions;
5676 gsi2 = gsi_for_stmt (g);
5678 else
5680 if (gsi_end_p (gsip))
5681 gsi2 = gsi_after_labels (region->entry);
5682 else
5683 gsi2 = gsip;
5684 reductions = null_pointer_node;
5686 if (fd->have_pointer_condtemp)
5688 tree type = TREE_TYPE (condtemp);
5689 memv = create_tmp_var (type);
5690 TREE_ADDRESSABLE (memv) = 1;
5691 unsigned HOST_WIDE_INT sz
5692 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5693 sz *= fd->lastprivate_conditional;
5694 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5695 false);
5696 mem = build_fold_addr_expr (memv);
5698 tree t
5699 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5700 9, t1, t2, t2, t3, t1, null_pointer_node,
5701 null_pointer_node, reductions, mem);
5702 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5703 true, GSI_SAME_STMT);
5704 if (fd->have_pointer_condtemp)
5705 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5706 if (fd->have_reductemp)
5708 gsi_remove (&gsi2, true);
5709 release_ssa_name (gimple_assign_lhs (g));
5712 switch (gimple_omp_for_kind (fd->for_stmt))
5714 case GF_OMP_FOR_KIND_FOR:
5715 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5716 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5717 break;
5718 case GF_OMP_FOR_KIND_DISTRIBUTE:
5719 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5720 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5721 break;
5722 default:
5723 gcc_unreachable ();
5725 nthreads = build_call_expr (nthreads, 0);
5726 nthreads = fold_convert (itype, nthreads);
5727 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5728 true, GSI_SAME_STMT);
5729 threadid = build_call_expr (threadid, 0);
5730 threadid = fold_convert (itype, threadid);
5731 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5732 true, GSI_SAME_STMT);
5734 n1 = fd->loop.n1;
5735 n2 = fd->loop.n2;
5736 step = fd->loop.step;
5737 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5739 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5740 OMP_CLAUSE__LOOPTEMP_);
5741 gcc_assert (innerc);
5742 n1 = OMP_CLAUSE_DECL (innerc);
5743 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5744 OMP_CLAUSE__LOOPTEMP_);
5745 gcc_assert (innerc);
5746 n2 = OMP_CLAUSE_DECL (innerc);
5748 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5749 true, NULL_TREE, true, GSI_SAME_STMT);
5750 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5751 true, NULL_TREE, true, GSI_SAME_STMT);
5752 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5753 true, NULL_TREE, true, GSI_SAME_STMT);
5754 tree chunk_size = fold_convert (itype, fd->chunk_size);
5755 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5756 chunk_size
5757 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5758 GSI_SAME_STMT);
5760 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5761 t = fold_build2 (PLUS_EXPR, itype, step, t);
5762 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5763 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5764 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5765 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5766 fold_build1 (NEGATE_EXPR, itype, t),
5767 fold_build1 (NEGATE_EXPR, itype, step));
5768 else
5769 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5770 t = fold_convert (itype, t);
5771 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5772 true, GSI_SAME_STMT);
5774 trip_var = create_tmp_reg (itype, ".trip");
5775 if (gimple_in_ssa_p (cfun))
5777 trip_init = make_ssa_name (trip_var);
5778 trip_main = make_ssa_name (trip_var);
5779 trip_back = make_ssa_name (trip_var);
5781 else
5783 trip_init = trip_var;
5784 trip_main = trip_var;
5785 trip_back = trip_var;
5788 gassign *assign_stmt
5789 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5790 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5792 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5793 t = fold_build2 (MULT_EXPR, itype, t, step);
5794 if (POINTER_TYPE_P (type))
5795 t = fold_build_pointer_plus (n1, t);
5796 else
5797 t = fold_build2 (PLUS_EXPR, type, t, n1);
5798 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5799 true, GSI_SAME_STMT);
5801 /* Remove the GIMPLE_OMP_FOR. */
5802 gsi_remove (&gsi, true);
5804 gimple_stmt_iterator gsif = gsi;
5806 /* Iteration space partitioning goes in ITER_PART_BB. */
5807 gsi = gsi_last_bb (iter_part_bb);
5809 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5810 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5811 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5812 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5813 false, GSI_CONTINUE_LINKING);
5815 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5816 t = fold_build2 (MIN_EXPR, itype, t, n);
5817 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5818 false, GSI_CONTINUE_LINKING);
5820 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5821 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5823 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5824 gsi = gsi_start_bb (seq_start_bb);
5826 tree startvar = fd->loop.v;
5827 tree endvar = NULL_TREE;
5829 if (gimple_omp_for_combined_p (fd->for_stmt))
5831 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5832 ? gimple_omp_parallel_clauses (inner_stmt)
5833 : gimple_omp_for_clauses (inner_stmt);
5834 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5835 gcc_assert (innerc);
5836 startvar = OMP_CLAUSE_DECL (innerc);
5837 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5838 OMP_CLAUSE__LOOPTEMP_);
5839 gcc_assert (innerc);
5840 endvar = OMP_CLAUSE_DECL (innerc);
5841 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5842 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5844 innerc = find_lastprivate_looptemp (fd, innerc);
5845 if (innerc)
5847 /* If needed (distribute parallel for with lastprivate),
5848 propagate down the total number of iterations. */
5849 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5850 fd->loop.n2);
5851 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5852 GSI_CONTINUE_LINKING);
5853 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5854 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5859 t = fold_convert (itype, s0);
5860 t = fold_build2 (MULT_EXPR, itype, t, step);
5861 if (POINTER_TYPE_P (type))
5863 t = fold_build_pointer_plus (n1, t);
5864 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5865 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5866 t = fold_convert (signed_type_for (type), t);
5868 else
5869 t = fold_build2 (PLUS_EXPR, type, t, n1);
5870 t = fold_convert (TREE_TYPE (startvar), t);
5871 t = force_gimple_operand_gsi (&gsi, t,
5872 DECL_P (startvar)
5873 && TREE_ADDRESSABLE (startvar),
5874 NULL_TREE, false, GSI_CONTINUE_LINKING);
5875 assign_stmt = gimple_build_assign (startvar, t);
5876 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5877 if (cond_var)
5879 tree itype = TREE_TYPE (cond_var);
5880 /* For lastprivate(conditional:) itervar, we need some iteration
5881 counter that starts at unsigned non-zero and increases.
5882 Prefer as few IVs as possible, so if we can use startvar
5883 itself, use that, or startvar + constant (those would be
5884 incremented with step), and as last resort use the s0 + 1
5885 incremented by 1. */
5886 if (POINTER_TYPE_P (type)
5887 || TREE_CODE (n1) != INTEGER_CST
5888 || fd->loop.cond_code != LT_EXPR)
5889 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5890 build_int_cst (itype, 1));
5891 else if (tree_int_cst_sgn (n1) == 1)
5892 t = fold_convert (itype, t);
5893 else
5895 tree c = fold_convert (itype, n1);
5896 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5897 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5899 t = force_gimple_operand_gsi (&gsi, t, false,
5900 NULL_TREE, false, GSI_CONTINUE_LINKING);
5901 assign_stmt = gimple_build_assign (cond_var, t);
5902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5905 t = fold_convert (itype, e0);
5906 t = fold_build2 (MULT_EXPR, itype, t, step);
5907 if (POINTER_TYPE_P (type))
5909 t = fold_build_pointer_plus (n1, t);
5910 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5911 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5912 t = fold_convert (signed_type_for (type), t);
5914 else
5915 t = fold_build2 (PLUS_EXPR, type, t, n1);
5916 t = fold_convert (TREE_TYPE (startvar), t);
5917 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5918 false, GSI_CONTINUE_LINKING);
5919 if (endvar)
5921 assign_stmt = gimple_build_assign (endvar, e);
5922 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5923 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5924 assign_stmt = gimple_build_assign (fd->loop.v, e);
5925 else
5926 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5927 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5929 /* Handle linear clause adjustments. */
5930 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5931 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5932 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5933 c; c = OMP_CLAUSE_CHAIN (c))
5934 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5935 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5937 tree d = OMP_CLAUSE_DECL (c);
5938 bool is_ref = omp_is_reference (d);
5939 tree t = d, a, dest;
5940 if (is_ref)
5941 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5942 tree type = TREE_TYPE (t);
5943 if (POINTER_TYPE_P (type))
5944 type = sizetype;
5945 dest = unshare_expr (t);
5946 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5947 expand_omp_build_assign (&gsif, v, t);
5948 if (itercnt == NULL_TREE)
5950 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5952 itercntbias
5953 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5954 fold_convert (itype, fd->loop.n1));
5955 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5956 itercntbias, step);
5957 itercntbias
5958 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5959 NULL_TREE, true,
5960 GSI_SAME_STMT);
5961 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5962 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5963 NULL_TREE, false,
5964 GSI_CONTINUE_LINKING);
5966 else
5967 itercnt = s0;
5969 a = fold_build2 (MULT_EXPR, type,
5970 fold_convert (type, itercnt),
5971 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5972 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5973 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5974 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5975 false, GSI_CONTINUE_LINKING);
5976 expand_omp_build_assign (&gsi, dest, t, true);
5978 if (fd->collapse > 1)
5979 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5981 if (!broken_loop)
5983 /* The code controlling the sequential loop goes in CONT_BB,
5984 replacing the GIMPLE_OMP_CONTINUE. */
5985 gsi = gsi_last_nondebug_bb (cont_bb);
5986 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5987 vmain = gimple_omp_continue_control_use (cont_stmt);
5988 vback = gimple_omp_continue_control_def (cont_stmt);
5990 if (cond_var)
5992 tree itype = TREE_TYPE (cond_var);
5993 tree t2;
5994 if (POINTER_TYPE_P (type)
5995 || TREE_CODE (n1) != INTEGER_CST
5996 || fd->loop.cond_code != LT_EXPR)
5997 t2 = build_int_cst (itype, 1);
5998 else
5999 t2 = fold_convert (itype, step);
6000 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6001 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6002 NULL_TREE, true, GSI_SAME_STMT);
6003 assign_stmt = gimple_build_assign (cond_var, t2);
6004 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6007 if (!gimple_omp_for_combined_p (fd->for_stmt))
6009 if (POINTER_TYPE_P (type))
6010 t = fold_build_pointer_plus (vmain, step);
6011 else
6012 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6013 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6014 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6015 true, GSI_SAME_STMT);
6016 assign_stmt = gimple_build_assign (vback, t);
6017 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6019 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6020 t = build2 (EQ_EXPR, boolean_type_node,
6021 build_int_cst (itype, 0),
6022 build_int_cst (itype, 1));
6023 else
6024 t = build2 (fd->loop.cond_code, boolean_type_node,
6025 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6026 ? t : vback, e);
6027 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6030 /* Remove GIMPLE_OMP_CONTINUE. */
6031 gsi_remove (&gsi, true);
6033 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6034 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6036 /* Trip update code goes into TRIP_UPDATE_BB. */
6037 gsi = gsi_start_bb (trip_update_bb);
6039 t = build_int_cst (itype, 1);
6040 t = build2 (PLUS_EXPR, itype, trip_main, t);
6041 assign_stmt = gimple_build_assign (trip_back, t);
6042 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6045 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6046 gsi = gsi_last_nondebug_bb (exit_bb);
6047 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6049 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6050 if (fd->have_reductemp || fd->have_pointer_condtemp)
6052 tree fn;
6053 if (t)
6054 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6055 else
6056 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6057 gcall *g = gimple_build_call (fn, 0);
6058 if (t)
6060 gimple_call_set_lhs (g, t);
6061 if (fd->have_reductemp)
6062 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6063 NOP_EXPR, t),
6064 GSI_SAME_STMT);
6066 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6068 else
6069 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6071 else if (fd->have_pointer_condtemp)
6073 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6074 gcall *g = gimple_build_call (fn, 0);
6075 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6077 gsi_remove (&gsi, true);
6079 /* Connect the new blocks. */
6080 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6081 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6083 if (!broken_loop)
6085 se = find_edge (cont_bb, body_bb);
6086 if (se == NULL)
6088 se = BRANCH_EDGE (cont_bb);
6089 gcc_assert (single_succ (se->dest) == body_bb);
6091 if (gimple_omp_for_combined_p (fd->for_stmt))
6093 remove_edge (se);
6094 se = NULL;
6096 else if (fd->collapse > 1)
6098 remove_edge (se);
6099 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6101 else
6102 se->flags = EDGE_TRUE_VALUE;
6103 find_edge (cont_bb, trip_update_bb)->flags
6104 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6106 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6107 iter_part_bb);
6110 if (gimple_in_ssa_p (cfun))
6112 gphi_iterator psi;
6113 gphi *phi;
6114 edge re, ene;
6115 edge_var_map *vm;
6116 size_t i;
6118 gcc_assert (fd->collapse == 1 && !broken_loop);
6120 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6121 remove arguments of the phi nodes in fin_bb. We need to create
6122 appropriate phi nodes in iter_part_bb instead. */
6123 se = find_edge (iter_part_bb, fin_bb);
6124 re = single_succ_edge (trip_update_bb);
6125 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6126 ene = single_succ_edge (entry_bb);
6128 psi = gsi_start_phis (fin_bb);
6129 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6130 gsi_next (&psi), ++i)
6132 gphi *nphi;
6133 location_t locus;
6135 phi = psi.phi ();
6136 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6137 redirect_edge_var_map_def (vm), 0))
6138 continue;
6140 t = gimple_phi_result (phi);
6141 gcc_assert (t == redirect_edge_var_map_result (vm));
6143 if (!single_pred_p (fin_bb))
6144 t = copy_ssa_name (t, phi);
6146 nphi = create_phi_node (t, iter_part_bb);
6148 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6149 locus = gimple_phi_arg_location_from_edge (phi, se);
6151 /* A special case -- fd->loop.v is not yet computed in
6152 iter_part_bb, we need to use vextra instead. */
6153 if (t == fd->loop.v)
6154 t = vextra;
6155 add_phi_arg (nphi, t, ene, locus);
6156 locus = redirect_edge_var_map_location (vm);
6157 tree back_arg = redirect_edge_var_map_def (vm);
6158 add_phi_arg (nphi, back_arg, re, locus);
6159 edge ce = find_edge (cont_bb, body_bb);
6160 if (ce == NULL)
6162 ce = BRANCH_EDGE (cont_bb);
6163 gcc_assert (single_succ (ce->dest) == body_bb);
6164 ce = single_succ_edge (ce->dest);
6166 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6167 gcc_assert (inner_loop_phi != NULL);
6168 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6169 find_edge (seq_start_bb, body_bb), locus);
6171 if (!single_pred_p (fin_bb))
6172 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6174 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6175 redirect_edge_var_map_clear (re);
6176 if (single_pred_p (fin_bb))
6177 while (1)
6179 psi = gsi_start_phis (fin_bb);
6180 if (gsi_end_p (psi))
6181 break;
6182 remove_phi_node (&psi, false);
6185 /* Make phi node for trip. */
6186 phi = create_phi_node (trip_main, iter_part_bb);
6187 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6188 UNKNOWN_LOCATION);
6189 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6190 UNKNOWN_LOCATION);
6193 if (!broken_loop)
6194 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6195 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6196 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6197 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6198 recompute_dominator (CDI_DOMINATORS, fin_bb));
6199 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6200 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6201 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6202 recompute_dominator (CDI_DOMINATORS, body_bb));
6204 if (!broken_loop)
6206 class loop *loop = body_bb->loop_father;
6207 class loop *trip_loop = alloc_loop ();
6208 trip_loop->header = iter_part_bb;
6209 trip_loop->latch = trip_update_bb;
6210 add_loop (trip_loop, iter_part_bb->loop_father);
6212 if (loop != entry_bb->loop_father)
6214 gcc_assert (loop->header == body_bb);
6215 gcc_assert (loop->latch == region->cont
6216 || single_pred (loop->latch) == region->cont);
6217 trip_loop->inner = loop;
6218 return;
6221 if (!gimple_omp_for_combined_p (fd->for_stmt))
6223 loop = alloc_loop ();
6224 loop->header = body_bb;
6225 if (collapse_bb == NULL)
6226 loop->latch = cont_bb;
6227 add_loop (loop, trip_loop);
6232 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6233 loop. Given parameters:
6235 for (V = N1; V cond N2; V += STEP) BODY;
6237 where COND is "<" or ">", we generate pseudocode
6239 V = N1;
6240 goto L1;
6242 BODY;
6243 V += STEP;
6245 if (V cond N2) goto L0; else goto L2;
6248 For collapsed loops, emit the outer loops as scalar
6249 and only try to vectorize the innermost loop. */
6251 static void
6252 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6254 tree type, t;
6255 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6256 gimple_stmt_iterator gsi;
6257 gimple *stmt;
6258 gcond *cond_stmt;
6259 bool broken_loop = region->cont == NULL;
6260 edge e, ne;
6261 tree *counts = NULL;
6262 int i;
6263 int safelen_int = INT_MAX;
6264 bool dont_vectorize = false;
6265 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6266 OMP_CLAUSE_SAFELEN);
6267 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6268 OMP_CLAUSE__SIMDUID_);
6269 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6270 OMP_CLAUSE_IF);
6271 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6272 OMP_CLAUSE_SIMDLEN);
6273 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6274 OMP_CLAUSE__CONDTEMP_);
6275 tree n1, n2;
6276 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6278 if (safelen)
6280 poly_uint64 val;
6281 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6282 if (!poly_int_tree_p (safelen, &val))
6283 safelen_int = 0;
6284 else
6285 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6286 if (safelen_int == 1)
6287 safelen_int = 0;
6289 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6290 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6292 safelen_int = 0;
6293 dont_vectorize = true;
6295 type = TREE_TYPE (fd->loop.v);
6296 entry_bb = region->entry;
6297 cont_bb = region->cont;
6298 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6299 gcc_assert (broken_loop
6300 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6301 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6302 if (!broken_loop)
6304 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6305 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6306 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6307 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6309 else
6311 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6312 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6313 l2_bb = single_succ (l1_bb);
6315 exit_bb = region->exit;
6316 l2_dom_bb = NULL;
6318 gsi = gsi_last_nondebug_bb (entry_bb);
6320 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6321 /* Not needed in SSA form right now. */
6322 gcc_assert (!gimple_in_ssa_p (cfun));
6323 if (fd->collapse > 1
6324 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6325 || broken_loop))
6327 int first_zero_iter = -1, dummy = -1;
6328 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6330 counts = XALLOCAVEC (tree, fd->collapse);
6331 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6332 zero_iter_bb, first_zero_iter,
6333 dummy_bb, dummy, l2_dom_bb);
6335 if (l2_dom_bb == NULL)
6336 l2_dom_bb = l1_bb;
6338 n1 = fd->loop.n1;
6339 n2 = fd->loop.n2;
6340 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6342 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6343 OMP_CLAUSE__LOOPTEMP_);
6344 gcc_assert (innerc);
6345 n1 = OMP_CLAUSE_DECL (innerc);
6346 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6347 OMP_CLAUSE__LOOPTEMP_);
6348 gcc_assert (innerc);
6349 n2 = OMP_CLAUSE_DECL (innerc);
6351 tree step = fd->loop.step;
6353 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6354 OMP_CLAUSE__SIMT_);
6355 if (is_simt)
6357 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6358 is_simt = safelen_int > 1;
6360 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6361 if (is_simt)
6363 simt_lane = create_tmp_var (unsigned_type_node);
6364 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6365 gimple_call_set_lhs (g, simt_lane);
6366 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6367 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6368 fold_convert (TREE_TYPE (step), simt_lane));
6369 n1 = fold_convert (type, n1);
6370 if (POINTER_TYPE_P (type))
6371 n1 = fold_build_pointer_plus (n1, offset);
6372 else
6373 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6375 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6376 if (fd->collapse > 1)
6377 simt_maxlane = build_one_cst (unsigned_type_node);
6378 else if (safelen_int < omp_max_simt_vf ())
6379 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6380 tree vf
6381 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6382 unsigned_type_node, 0);
6383 if (simt_maxlane)
6384 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6385 vf = fold_convert (TREE_TYPE (step), vf);
6386 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6389 tree n2var = NULL_TREE;
6390 tree n2v = NULL_TREE;
6391 tree *nonrect_bounds = NULL;
6392 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6393 if (fd->collapse > 1)
6395 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6397 if (fd->non_rect)
6399 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6400 memset (nonrect_bounds, 0,
6401 sizeof (tree) * (fd->last_nonrect + 1));
6403 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6404 gcc_assert (entry_bb == gsi_bb (gsi));
6405 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6406 gsi_prev (&gsi);
6407 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6408 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6409 NULL, n1);
6410 gsi = gsi_for_stmt (fd->for_stmt);
6412 if (broken_loop)
6414 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6416 /* Compute in n2var the limit for the first innermost loop,
6417 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6418 where cnt is how many iterations would the loop have if
6419 all further iterations were assigned to the current task. */
6420 n2var = create_tmp_var (type);
6421 i = fd->collapse - 1;
6422 tree itype = TREE_TYPE (fd->loops[i].v);
6423 if (POINTER_TYPE_P (itype))
6424 itype = signed_type_for (itype);
6425 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6426 ? -1 : 1));
6427 t = fold_build2 (PLUS_EXPR, itype,
6428 fold_convert (itype, fd->loops[i].step), t);
6429 t = fold_build2 (PLUS_EXPR, itype, t,
6430 fold_convert (itype, fd->loops[i].n2));
6431 if (fd->loops[i].m2)
6433 tree t2 = fold_convert (itype,
6434 fd->loops[i - fd->loops[i].outer].v);
6435 tree t3 = fold_convert (itype, fd->loops[i].m2);
6436 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6437 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6439 t = fold_build2 (MINUS_EXPR, itype, t,
6440 fold_convert (itype, fd->loops[i].v));
6441 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6442 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6443 fold_build1 (NEGATE_EXPR, itype, t),
6444 fold_build1 (NEGATE_EXPR, itype,
6445 fold_convert (itype,
6446 fd->loops[i].step)));
6447 else
6448 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6449 fold_convert (itype, fd->loops[i].step));
6450 t = fold_convert (type, t);
6451 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6452 min_arg1 = create_tmp_var (type);
6453 expand_omp_build_assign (&gsi, min_arg1, t2);
6454 min_arg2 = create_tmp_var (type);
6455 expand_omp_build_assign (&gsi, min_arg2, t);
6457 else
6459 if (TREE_CODE (n2) == INTEGER_CST)
6461 /* Indicate for lastprivate handling that at least one iteration
6462 has been performed, without wasting runtime. */
6463 if (integer_nonzerop (n2))
6464 expand_omp_build_assign (&gsi, fd->loop.v,
6465 fold_convert (type, n2));
6466 else
6467 /* Indicate that no iteration has been performed. */
6468 expand_omp_build_assign (&gsi, fd->loop.v,
6469 build_one_cst (type));
6471 else
6473 expand_omp_build_assign (&gsi, fd->loop.v,
6474 build_zero_cst (type));
6475 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6477 for (i = 0; i < fd->collapse; i++)
6479 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6480 if (fd->loops[i].m1)
6482 tree t2
6483 = fold_convert (TREE_TYPE (t),
6484 fd->loops[i - fd->loops[i].outer].v);
6485 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6486 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6487 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6489 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6490 /* For normal non-combined collapsed loops just initialize
6491 the outermost iterator in the entry_bb. */
6492 if (!broken_loop)
6493 break;
6497 else
6498 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6499 tree altv = NULL_TREE, altn2 = NULL_TREE;
6500 if (fd->collapse == 1
6501 && !broken_loop
6502 && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
6504 /* The vectorizer currently punts on loops with non-constant steps
6505 for the main IV (can't compute number of iterations and gives up
6506 because of that). As for OpenMP loops it is always possible to
6507 compute the number of iterations upfront, use an alternate IV
6508 as the loop iterator:
6509 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6510 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6511 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6512 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6513 tree itype = TREE_TYPE (fd->loop.v);
6514 if (POINTER_TYPE_P (itype))
6515 itype = signed_type_for (itype);
6516 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6517 t = fold_build2 (PLUS_EXPR, itype,
6518 fold_convert (itype, fd->loop.step), t);
6519 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6520 t = fold_build2 (MINUS_EXPR, itype, t,
6521 fold_convert (itype, fd->loop.v));
6522 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6523 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6524 fold_build1 (NEGATE_EXPR, itype, t),
6525 fold_build1 (NEGATE_EXPR, itype,
6526 fold_convert (itype, fd->loop.step)));
6527 else
6528 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6529 fold_convert (itype, fd->loop.step));
6530 t = fold_convert (TREE_TYPE (altv), t);
6531 altn2 = create_tmp_var (TREE_TYPE (altv));
6532 expand_omp_build_assign (&gsi, altn2, t);
6533 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6534 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6535 true, GSI_SAME_STMT);
6536 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6537 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6538 build_zero_cst (TREE_TYPE (altv)));
6539 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6541 else if (fd->collapse > 1
6542 && !broken_loop
6543 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6544 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6546 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6547 altn2 = create_tmp_var (TREE_TYPE (altv));
6549 if (cond_var)
6551 if (POINTER_TYPE_P (type)
6552 || TREE_CODE (n1) != INTEGER_CST
6553 || fd->loop.cond_code != LT_EXPR
6554 || tree_int_cst_sgn (n1) != 1)
6555 expand_omp_build_assign (&gsi, cond_var,
6556 build_one_cst (TREE_TYPE (cond_var)));
6557 else
6558 expand_omp_build_assign (&gsi, cond_var,
6559 fold_convert (TREE_TYPE (cond_var), n1));
6562 /* Remove the GIMPLE_OMP_FOR statement. */
6563 gsi_remove (&gsi, true);
6565 if (!broken_loop)
6567 /* Code to control the increment goes in the CONT_BB. */
6568 gsi = gsi_last_nondebug_bb (cont_bb);
6569 stmt = gsi_stmt (gsi);
6570 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6572 if (fd->collapse == 1
6573 || gimple_omp_for_combined_into_p (fd->for_stmt))
6575 if (POINTER_TYPE_P (type))
6576 t = fold_build_pointer_plus (fd->loop.v, step);
6577 else
6578 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6579 expand_omp_build_assign (&gsi, fd->loop.v, t);
6581 else if (TREE_CODE (n2) != INTEGER_CST)
6582 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6583 if (altv)
6585 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6586 build_one_cst (TREE_TYPE (altv)));
6587 expand_omp_build_assign (&gsi, altv, t);
6590 if (fd->collapse > 1)
6592 i = fd->collapse - 1;
6593 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6595 t = fold_convert (sizetype, fd->loops[i].step);
6596 t = fold_build_pointer_plus (fd->loops[i].v, t);
6598 else
6600 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6601 fd->loops[i].step);
6602 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6603 fd->loops[i].v, t);
6605 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6607 if (cond_var)
6609 if (POINTER_TYPE_P (type)
6610 || TREE_CODE (n1) != INTEGER_CST
6611 || fd->loop.cond_code != LT_EXPR
6612 || tree_int_cst_sgn (n1) != 1)
6613 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6614 build_one_cst (TREE_TYPE (cond_var)));
6615 else
6616 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6617 fold_convert (TREE_TYPE (cond_var), step));
6618 expand_omp_build_assign (&gsi, cond_var, t);
6621 /* Remove GIMPLE_OMP_CONTINUE. */
6622 gsi_remove (&gsi, true);
6625 /* Emit the condition in L1_BB. */
6626 gsi = gsi_start_bb (l1_bb);
6628 if (altv)
6629 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6630 else if (fd->collapse > 1
6631 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6632 && !broken_loop)
6634 i = fd->collapse - 1;
6635 tree itype = TREE_TYPE (fd->loops[i].v);
6636 if (fd->loops[i].m2)
6637 t = n2v = create_tmp_var (itype);
6638 else
6639 t = fold_convert (itype, fd->loops[i].n2);
6640 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6641 false, GSI_CONTINUE_LINKING);
6642 tree v = fd->loops[i].v;
6643 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6644 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6645 false, GSI_CONTINUE_LINKING);
6646 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6648 else
6650 if (fd->collapse > 1 && !broken_loop)
6651 t = n2var;
6652 else
6653 t = fold_convert (type, n2);
6654 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6655 false, GSI_CONTINUE_LINKING);
6656 tree v = fd->loop.v;
6657 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6658 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6659 false, GSI_CONTINUE_LINKING);
6660 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6662 cond_stmt = gimple_build_cond_empty (t);
6663 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6664 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6665 NULL, NULL)
6666 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6667 NULL, NULL))
6669 gsi = gsi_for_stmt (cond_stmt);
6670 gimple_regimplify_operands (cond_stmt, &gsi);
6673 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6674 if (is_simt)
6676 gsi = gsi_start_bb (l2_bb);
6677 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6678 if (POINTER_TYPE_P (type))
6679 t = fold_build_pointer_plus (fd->loop.v, step);
6680 else
6681 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6682 expand_omp_build_assign (&gsi, fd->loop.v, t);
6685 /* Remove GIMPLE_OMP_RETURN. */
6686 gsi = gsi_last_nondebug_bb (exit_bb);
6687 gsi_remove (&gsi, true);
6689 /* Connect the new blocks. */
6690 remove_edge (FALLTHRU_EDGE (entry_bb));
6692 if (!broken_loop)
6694 remove_edge (BRANCH_EDGE (entry_bb));
6695 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6697 e = BRANCH_EDGE (l1_bb);
6698 ne = FALLTHRU_EDGE (l1_bb);
6699 e->flags = EDGE_TRUE_VALUE;
6701 else
6703 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6705 ne = single_succ_edge (l1_bb);
6706 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6709 ne->flags = EDGE_FALSE_VALUE;
6710 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6711 ne->probability = e->probability.invert ();
6713 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6714 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6716 if (simt_maxlane)
6718 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6719 NULL_TREE, NULL_TREE);
6720 gsi = gsi_last_bb (entry_bb);
6721 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6722 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6723 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6724 FALLTHRU_EDGE (entry_bb)->probability
6725 = profile_probability::guessed_always ().apply_scale (7, 8);
6726 BRANCH_EDGE (entry_bb)->probability
6727 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6728 l2_dom_bb = entry_bb;
6730 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6732 if (!broken_loop && fd->collapse > 1)
6734 basic_block last_bb = l1_bb;
6735 basic_block init_bb = NULL;
6736 for (i = fd->collapse - 2; i >= 0; i--)
6738 tree nextn2v = NULL_TREE;
6739 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6740 e = EDGE_SUCC (last_bb, 0);
6741 else
6742 e = EDGE_SUCC (last_bb, 1);
6743 basic_block bb = split_edge (e);
6744 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6746 t = fold_convert (sizetype, fd->loops[i].step);
6747 t = fold_build_pointer_plus (fd->loops[i].v, t);
6749 else
6751 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6752 fd->loops[i].step);
6753 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6754 fd->loops[i].v, t);
6756 gsi = gsi_after_labels (bb);
6757 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6759 bb = split_block (bb, last_stmt (bb))->dest;
6760 gsi = gsi_start_bb (bb);
6761 tree itype = TREE_TYPE (fd->loops[i].v);
6762 if (fd->loops[i].m2)
6763 t = nextn2v = create_tmp_var (itype);
6764 else
6765 t = fold_convert (itype, fd->loops[i].n2);
6766 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6767 false, GSI_CONTINUE_LINKING);
6768 tree v = fd->loops[i].v;
6769 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6770 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6771 false, GSI_CONTINUE_LINKING);
6772 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6773 cond_stmt = gimple_build_cond_empty (t);
6774 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6775 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6776 expand_omp_regimplify_p, NULL, NULL)
6777 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6778 expand_omp_regimplify_p, NULL, NULL))
6780 gsi = gsi_for_stmt (cond_stmt);
6781 gimple_regimplify_operands (cond_stmt, &gsi);
6783 ne = single_succ_edge (bb);
6784 ne->flags = EDGE_FALSE_VALUE;
6786 init_bb = create_empty_bb (bb);
6787 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6788 add_bb_to_loop (init_bb, bb->loop_father);
6789 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6790 e->probability
6791 = profile_probability::guessed_always ().apply_scale (7, 8);
6792 ne->probability = e->probability.invert ();
6794 gsi = gsi_after_labels (init_bb);
6795 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6796 fd->loops[i + 1].n1);
6797 if (fd->loops[i + 1].m1)
6799 tree t2 = fold_convert (TREE_TYPE (t),
6800 fd->loops[i + 1
6801 - fd->loops[i + 1].outer].v);
6802 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6803 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6804 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6806 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6807 if (fd->loops[i + 1].m2)
6809 if (i + 2 == fd->collapse && (n2var || altv))
6811 gcc_assert (n2v == NULL_TREE);
6812 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6814 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6815 fd->loops[i + 1].n2);
6816 tree t2 = fold_convert (TREE_TYPE (t),
6817 fd->loops[i + 1
6818 - fd->loops[i + 1].outer].v);
6819 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6820 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6821 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6822 expand_omp_build_assign (&gsi, n2v, t);
6824 if (i + 2 == fd->collapse && n2var)
6826 /* For composite simd, n2 is the first iteration the current
6827 task shouldn't already handle, so we effectively want to use
6828 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6829 as the vectorized loop. Except the vectorizer will not
6830 vectorize that, so instead compute N2VAR as
6831 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6832 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6833 as the loop to vectorize. */
6834 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6835 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6837 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6838 == LT_EXPR ? -1 : 1));
6839 t = fold_build2 (PLUS_EXPR, itype,
6840 fold_convert (itype,
6841 fd->loops[i + 1].step), t);
6842 if (fd->loops[i + 1].m2)
6843 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6844 else
6845 t = fold_build2 (PLUS_EXPR, itype, t,
6846 fold_convert (itype,
6847 fd->loops[i + 1].n2));
6848 t = fold_build2 (MINUS_EXPR, itype, t,
6849 fold_convert (itype, fd->loops[i + 1].v));
6850 tree step = fold_convert (itype, fd->loops[i + 1].step);
6851 if (TYPE_UNSIGNED (itype)
6852 && fd->loops[i + 1].cond_code == GT_EXPR)
6853 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6854 fold_build1 (NEGATE_EXPR, itype, t),
6855 fold_build1 (NEGATE_EXPR, itype, step));
6856 else
6857 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6858 t = fold_convert (type, t);
6860 else
6861 t = counts[i + 1];
6862 expand_omp_build_assign (&gsi, min_arg1, t2);
6863 expand_omp_build_assign (&gsi, min_arg2, t);
6864 e = split_block (init_bb, last_stmt (init_bb));
6865 gsi = gsi_after_labels (e->dest);
6866 init_bb = e->dest;
6867 remove_edge (FALLTHRU_EDGE (entry_bb));
6868 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6869 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6870 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6871 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6872 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6873 expand_omp_build_assign (&gsi, n2var, t);
6875 if (i + 2 == fd->collapse && altv)
6877 /* The vectorizer currently punts on loops with non-constant
6878 steps for the main IV (can't compute number of iterations
6879 and gives up because of that). As for OpenMP loops it is
6880 always possible to compute the number of iterations upfront,
6881 use an alternate IV as the loop iterator. */
6882 expand_omp_build_assign (&gsi, altv,
6883 build_zero_cst (TREE_TYPE (altv)));
6884 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6885 if (POINTER_TYPE_P (itype))
6886 itype = signed_type_for (itype);
6887 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6888 ? -1 : 1));
6889 t = fold_build2 (PLUS_EXPR, itype,
6890 fold_convert (itype, fd->loops[i + 1].step), t);
6891 t = fold_build2 (PLUS_EXPR, itype, t,
6892 fold_convert (itype,
6893 fd->loops[i + 1].m2
6894 ? n2v : fd->loops[i + 1].n2));
6895 t = fold_build2 (MINUS_EXPR, itype, t,
6896 fold_convert (itype, fd->loops[i + 1].v));
6897 tree step = fold_convert (itype, fd->loops[i + 1].step);
6898 if (TYPE_UNSIGNED (itype)
6899 && fd->loops[i + 1].cond_code == GT_EXPR)
6900 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6901 fold_build1 (NEGATE_EXPR, itype, t),
6902 fold_build1 (NEGATE_EXPR, itype, step));
6903 else
6904 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6905 t = fold_convert (TREE_TYPE (altv), t);
6906 expand_omp_build_assign (&gsi, altn2, t);
6907 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6908 fd->loops[i + 1].m2
6909 ? n2v : fd->loops[i + 1].n2);
6910 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6911 true, GSI_SAME_STMT);
6912 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6913 fd->loops[i + 1].v, t2);
6914 gassign *g
6915 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6916 build_zero_cst (TREE_TYPE (altv)));
6917 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6919 n2v = nextn2v;
6921 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6922 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6924 e = find_edge (entry_bb, last_bb);
6925 redirect_edge_succ (e, bb);
6926 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6927 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6930 last_bb = bb;
6933 if (!broken_loop)
6935 class loop *loop = alloc_loop ();
6936 loop->header = l1_bb;
6937 loop->latch = cont_bb;
6938 add_loop (loop, l1_bb->loop_father);
6939 loop->safelen = safelen_int;
6940 if (simduid)
6942 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6943 cfun->has_simduid_loops = true;
6945 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6946 the loop. */
6947 if ((flag_tree_loop_vectorize
6948 || !global_options_set.x_flag_tree_loop_vectorize)
6949 && flag_tree_loop_optimize
6950 && loop->safelen > 1)
6952 loop->force_vectorize = true;
6953 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6955 unsigned HOST_WIDE_INT v
6956 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6957 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6958 loop->simdlen = v;
6960 cfun->has_force_vectorize_loops = true;
6962 else if (dont_vectorize)
6963 loop->dont_vectorize = true;
6965 else if (simduid)
6966 cfun->has_simduid_loops = true;
6969 /* Taskloop construct is represented after gimplification with
6970 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6971 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6972 which should just compute all the needed loop temporaries
6973 for GIMPLE_OMP_TASK. */
6975 static void
6976 expand_omp_taskloop_for_outer (struct omp_region *region,
6977 struct omp_for_data *fd,
6978 gimple *inner_stmt)
6980 tree type, bias = NULL_TREE;
6981 basic_block entry_bb, cont_bb, exit_bb;
6982 gimple_stmt_iterator gsi;
6983 gassign *assign_stmt;
6984 tree *counts = NULL;
6985 int i;
6987 gcc_assert (inner_stmt);
6988 gcc_assert (region->cont);
6989 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6990 && gimple_omp_task_taskloop_p (inner_stmt));
6991 type = TREE_TYPE (fd->loop.v);
6993 /* See if we need to bias by LLONG_MIN. */
6994 if (fd->iter_type == long_long_unsigned_type_node
6995 && TREE_CODE (type) == INTEGER_TYPE
6996 && !TYPE_UNSIGNED (type))
6998 tree n1, n2;
7000 if (fd->loop.cond_code == LT_EXPR)
7002 n1 = fd->loop.n1;
7003 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7005 else
7007 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7008 n2 = fd->loop.n1;
7010 if (TREE_CODE (n1) != INTEGER_CST
7011 || TREE_CODE (n2) != INTEGER_CST
7012 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7013 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7016 entry_bb = region->entry;
7017 cont_bb = region->cont;
7018 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7019 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7020 exit_bb = region->exit;
7022 gsi = gsi_last_nondebug_bb (entry_bb);
7023 gimple *for_stmt = gsi_stmt (gsi);
7024 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7025 if (fd->collapse > 1)
7027 int first_zero_iter = -1, dummy = -1;
7028 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7030 counts = XALLOCAVEC (tree, fd->collapse);
7031 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7032 zero_iter_bb, first_zero_iter,
7033 dummy_bb, dummy, l2_dom_bb);
7035 if (zero_iter_bb)
7037 /* Some counts[i] vars might be uninitialized if
7038 some loop has zero iterations. But the body shouldn't
7039 be executed in that case, so just avoid uninit warnings. */
7040 for (i = first_zero_iter; i < fd->collapse; i++)
7041 if (SSA_VAR_P (counts[i]))
7042 TREE_NO_WARNING (counts[i]) = 1;
7043 gsi_prev (&gsi);
7044 edge e = split_block (entry_bb, gsi_stmt (gsi));
7045 entry_bb = e->dest;
7046 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7047 gsi = gsi_last_bb (entry_bb);
7048 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7049 get_immediate_dominator (CDI_DOMINATORS,
7050 zero_iter_bb));
7054 tree t0, t1;
7055 t1 = fd->loop.n2;
7056 t0 = fd->loop.n1;
7057 if (POINTER_TYPE_P (TREE_TYPE (t0))
7058 && TYPE_PRECISION (TREE_TYPE (t0))
7059 != TYPE_PRECISION (fd->iter_type))
7061 /* Avoid casting pointers to integer of a different size. */
7062 tree itype = signed_type_for (type);
7063 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7064 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7066 else
7068 t1 = fold_convert (fd->iter_type, t1);
7069 t0 = fold_convert (fd->iter_type, t0);
7071 if (bias)
7073 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7074 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7077 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7078 OMP_CLAUSE__LOOPTEMP_);
7079 gcc_assert (innerc);
7080 tree startvar = OMP_CLAUSE_DECL (innerc);
7081 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7082 gcc_assert (innerc);
7083 tree endvar = OMP_CLAUSE_DECL (innerc);
7084 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7086 innerc = find_lastprivate_looptemp (fd, innerc);
7087 if (innerc)
7089 /* If needed (inner taskloop has lastprivate clause), propagate
7090 down the total number of iterations. */
7091 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7092 NULL_TREE, false,
7093 GSI_CONTINUE_LINKING);
7094 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7095 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7099 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7100 GSI_CONTINUE_LINKING);
7101 assign_stmt = gimple_build_assign (startvar, t0);
7102 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7104 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7105 GSI_CONTINUE_LINKING);
7106 assign_stmt = gimple_build_assign (endvar, t1);
7107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7108 if (fd->collapse > 1)
7109 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7111 /* Remove the GIMPLE_OMP_FOR statement. */
7112 gsi = gsi_for_stmt (for_stmt);
7113 gsi_remove (&gsi, true);
7115 gsi = gsi_last_nondebug_bb (cont_bb);
7116 gsi_remove (&gsi, true);
7118 gsi = gsi_last_nondebug_bb (exit_bb);
7119 gsi_remove (&gsi, true);
7121 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7122 remove_edge (BRANCH_EDGE (entry_bb));
7123 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7124 remove_edge (BRANCH_EDGE (cont_bb));
7125 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7126 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7127 recompute_dominator (CDI_DOMINATORS, region->entry));
7130 /* Taskloop construct is represented after gimplification with
7131 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7132 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7133 GOMP_taskloop{,_ull} function arranges for each task to be given just
7134 a single range of iterations. */
7136 static void
7137 expand_omp_taskloop_for_inner (struct omp_region *region,
7138 struct omp_for_data *fd,
7139 gimple *inner_stmt)
7141 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7142 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7143 basic_block fin_bb;
7144 gimple_stmt_iterator gsi;
7145 edge ep;
7146 bool broken_loop = region->cont == NULL;
7147 tree *counts = NULL;
7148 tree n1, n2, step;
7150 itype = type = TREE_TYPE (fd->loop.v);
7151 if (POINTER_TYPE_P (type))
7152 itype = signed_type_for (type);
7154 /* See if we need to bias by LLONG_MIN. */
7155 if (fd->iter_type == long_long_unsigned_type_node
7156 && TREE_CODE (type) == INTEGER_TYPE
7157 && !TYPE_UNSIGNED (type))
7159 tree n1, n2;
7161 if (fd->loop.cond_code == LT_EXPR)
7163 n1 = fd->loop.n1;
7164 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7166 else
7168 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7169 n2 = fd->loop.n1;
7171 if (TREE_CODE (n1) != INTEGER_CST
7172 || TREE_CODE (n2) != INTEGER_CST
7173 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7174 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7177 entry_bb = region->entry;
7178 cont_bb = region->cont;
7179 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7180 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7181 gcc_assert (broken_loop
7182 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7183 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7184 if (!broken_loop)
7186 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7187 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7189 exit_bb = region->exit;
7191 /* Iteration space partitioning goes in ENTRY_BB. */
7192 gsi = gsi_last_nondebug_bb (entry_bb);
7193 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7195 if (fd->collapse > 1)
7197 int first_zero_iter = -1, dummy = -1;
7198 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7200 counts = XALLOCAVEC (tree, fd->collapse);
7201 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7202 fin_bb, first_zero_iter,
7203 dummy_bb, dummy, l2_dom_bb);
7204 t = NULL_TREE;
7206 else
7207 t = integer_one_node;
7209 step = fd->loop.step;
7210 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7211 OMP_CLAUSE__LOOPTEMP_);
7212 gcc_assert (innerc);
7213 n1 = OMP_CLAUSE_DECL (innerc);
7214 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7215 gcc_assert (innerc);
7216 n2 = OMP_CLAUSE_DECL (innerc);
7217 if (bias)
7219 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7220 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7222 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7223 true, NULL_TREE, true, GSI_SAME_STMT);
7224 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7225 true, NULL_TREE, true, GSI_SAME_STMT);
7226 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7227 true, NULL_TREE, true, GSI_SAME_STMT);
7229 tree startvar = fd->loop.v;
7230 tree endvar = NULL_TREE;
7232 if (gimple_omp_for_combined_p (fd->for_stmt))
7234 tree clauses = gimple_omp_for_clauses (inner_stmt);
7235 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7236 gcc_assert (innerc);
7237 startvar = OMP_CLAUSE_DECL (innerc);
7238 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7239 OMP_CLAUSE__LOOPTEMP_);
7240 gcc_assert (innerc);
7241 endvar = OMP_CLAUSE_DECL (innerc);
7243 t = fold_convert (TREE_TYPE (startvar), n1);
7244 t = force_gimple_operand_gsi (&gsi, t,
7245 DECL_P (startvar)
7246 && TREE_ADDRESSABLE (startvar),
7247 NULL_TREE, false, GSI_CONTINUE_LINKING);
7248 gimple *assign_stmt = gimple_build_assign (startvar, t);
7249 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7251 t = fold_convert (TREE_TYPE (startvar), n2);
7252 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7253 false, GSI_CONTINUE_LINKING);
7254 if (endvar)
7256 assign_stmt = gimple_build_assign (endvar, e);
7257 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7258 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7259 assign_stmt = gimple_build_assign (fd->loop.v, e);
7260 else
7261 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7262 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7265 tree *nonrect_bounds = NULL;
7266 if (fd->collapse > 1)
7268 if (fd->non_rect)
7270 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7271 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7273 gcc_assert (gsi_bb (gsi) == entry_bb);
7274 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7275 startvar);
7276 entry_bb = gsi_bb (gsi);
7279 if (!broken_loop)
7281 /* The code controlling the sequential loop replaces the
7282 GIMPLE_OMP_CONTINUE. */
7283 gsi = gsi_last_nondebug_bb (cont_bb);
7284 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7285 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7286 vmain = gimple_omp_continue_control_use (cont_stmt);
7287 vback = gimple_omp_continue_control_def (cont_stmt);
7289 if (!gimple_omp_for_combined_p (fd->for_stmt))
7291 if (POINTER_TYPE_P (type))
7292 t = fold_build_pointer_plus (vmain, step);
7293 else
7294 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7295 t = force_gimple_operand_gsi (&gsi, t,
7296 DECL_P (vback)
7297 && TREE_ADDRESSABLE (vback),
7298 NULL_TREE, true, GSI_SAME_STMT);
7299 assign_stmt = gimple_build_assign (vback, t);
7300 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7302 t = build2 (fd->loop.cond_code, boolean_type_node,
7303 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7304 ? t : vback, e);
7305 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7308 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7309 gsi_remove (&gsi, true);
7311 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7312 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7313 cont_bb, body_bb);
7316 /* Remove the GIMPLE_OMP_FOR statement. */
7317 gsi = gsi_for_stmt (fd->for_stmt);
7318 gsi_remove (&gsi, true);
7320 /* Remove the GIMPLE_OMP_RETURN statement. */
7321 gsi = gsi_last_nondebug_bb (exit_bb);
7322 gsi_remove (&gsi, true);
7324 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7325 if (!broken_loop)
7326 remove_edge (BRANCH_EDGE (entry_bb));
7327 else
7329 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7330 region->outer->cont = NULL;
7333 /* Connect all the blocks. */
7334 if (!broken_loop)
7336 ep = find_edge (cont_bb, body_bb);
7337 if (gimple_omp_for_combined_p (fd->for_stmt))
7339 remove_edge (ep);
7340 ep = NULL;
7342 else if (fd->collapse > 1)
7344 remove_edge (ep);
7345 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7347 else
7348 ep->flags = EDGE_TRUE_VALUE;
7349 find_edge (cont_bb, fin_bb)->flags
7350 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7353 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7354 recompute_dominator (CDI_DOMINATORS, body_bb));
7355 if (!broken_loop)
7356 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7357 recompute_dominator (CDI_DOMINATORS, fin_bb));
7359 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7361 class loop *loop = alloc_loop ();
7362 loop->header = body_bb;
7363 if (collapse_bb == NULL)
7364 loop->latch = cont_bb;
7365 add_loop (loop, body_bb->loop_father);
7369 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7370 partitioned loop. The lowering here is abstracted, in that the
7371 loop parameters are passed through internal functions, which are
7372 further lowered by oacc_device_lower, once we get to the target
7373 compiler. The loop is of the form:
7375 for (V = B; V LTGT E; V += S) {BODY}
7377 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7378 (constant 0 for no chunking) and we will have a GWV partitioning
7379 mask, specifying dimensions over which the loop is to be
7380 partitioned (see note below). We generate code that looks like
7381 (this ignores tiling):
7383 <entry_bb> [incoming FALL->body, BRANCH->exit]
7384 typedef signedintify (typeof (V)) T; // underlying signed integral type
7385 T range = E - B;
7386 T chunk_no = 0;
7387 T DIR = LTGT == '<' ? +1 : -1;
7388 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7389 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7391 <head_bb> [created by splitting end of entry_bb]
7392 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7393 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7394 if (!(offset LTGT bound)) goto bottom_bb;
7396 <body_bb> [incoming]
7397 V = B + offset;
7398 {BODY}
7400 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7401 offset += step;
7402 if (offset LTGT bound) goto body_bb; [*]
7404 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7405 chunk_no++;
7406 if (chunk < chunk_max) goto head_bb;
7408 <exit_bb> [incoming]
7409 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7411 [*] Needed if V live at end of loop. */
7413 static void
7414 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7416 tree v = fd->loop.v;
7417 enum tree_code cond_code = fd->loop.cond_code;
7418 enum tree_code plus_code = PLUS_EXPR;
7420 tree chunk_size = integer_minus_one_node;
7421 tree gwv = integer_zero_node;
7422 tree iter_type = TREE_TYPE (v);
7423 tree diff_type = iter_type;
7424 tree plus_type = iter_type;
7425 struct oacc_collapse *counts = NULL;
7427 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7428 == GF_OMP_FOR_KIND_OACC_LOOP);
7429 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7430 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7432 if (POINTER_TYPE_P (iter_type))
7434 plus_code = POINTER_PLUS_EXPR;
7435 plus_type = sizetype;
7437 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7438 diff_type = signed_type_for (diff_type);
7439 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7440 diff_type = integer_type_node;
7442 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7443 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7444 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7445 basic_block bottom_bb = NULL;
7447 /* entry_bb has two successors; the branch edge is to the exit
7448 block, fallthrough edge to body. */
7449 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7450 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7452 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7453 body_bb, or to a block whose only successor is the body_bb. Its
7454 fallthrough successor is the final block (same as the branch
7455 successor of the entry_bb). */
7456 if (cont_bb)
7458 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7459 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7461 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7462 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7464 else
7465 gcc_assert (!gimple_in_ssa_p (cfun));
7467 /* The exit block only has entry_bb and cont_bb as predecessors. */
7468 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7470 tree chunk_no;
7471 tree chunk_max = NULL_TREE;
7472 tree bound, offset;
7473 tree step = create_tmp_var (diff_type, ".step");
7474 bool up = cond_code == LT_EXPR;
7475 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7476 bool chunking = !gimple_in_ssa_p (cfun);
7477 bool negating;
7479 /* Tiling vars. */
7480 tree tile_size = NULL_TREE;
7481 tree element_s = NULL_TREE;
7482 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7483 basic_block elem_body_bb = NULL;
7484 basic_block elem_cont_bb = NULL;
7486 /* SSA instances. */
7487 tree offset_incr = NULL_TREE;
7488 tree offset_init = NULL_TREE;
7490 gimple_stmt_iterator gsi;
7491 gassign *ass;
7492 gcall *call;
7493 gimple *stmt;
7494 tree expr;
7495 location_t loc;
7496 edge split, be, fte;
7498 /* Split the end of entry_bb to create head_bb. */
7499 split = split_block (entry_bb, last_stmt (entry_bb));
7500 basic_block head_bb = split->dest;
7501 entry_bb = split->src;
7503 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7504 gsi = gsi_last_nondebug_bb (entry_bb);
7505 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7506 loc = gimple_location (for_stmt);
7508 if (gimple_in_ssa_p (cfun))
7510 offset_init = gimple_omp_for_index (for_stmt, 0);
7511 gcc_assert (integer_zerop (fd->loop.n1));
7512 /* The SSA parallelizer does gang parallelism. */
7513 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7516 if (fd->collapse > 1 || fd->tiling)
7518 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7519 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7520 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
7521 TREE_TYPE (fd->loop.n2), loc);
7523 if (SSA_VAR_P (fd->loop.n2))
7525 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7526 true, GSI_SAME_STMT);
7527 ass = gimple_build_assign (fd->loop.n2, total);
7528 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7532 tree b = fd->loop.n1;
7533 tree e = fd->loop.n2;
7534 tree s = fd->loop.step;
7536 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7537 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7539 /* Convert the step, avoiding possible unsigned->signed overflow. */
7540 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7541 if (negating)
7542 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7543 s = fold_convert (diff_type, s);
7544 if (negating)
7545 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7546 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7548 if (!chunking)
7549 chunk_size = integer_zero_node;
7550 expr = fold_convert (diff_type, chunk_size);
7551 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7552 NULL_TREE, true, GSI_SAME_STMT);
7554 if (fd->tiling)
7556 /* Determine the tile size and element step,
7557 modify the outer loop step size. */
7558 tile_size = create_tmp_var (diff_type, ".tile_size");
7559 expr = build_int_cst (diff_type, 1);
7560 for (int ix = 0; ix < fd->collapse; ix++)
7561 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7562 expr = force_gimple_operand_gsi (&gsi, expr, true,
7563 NULL_TREE, true, GSI_SAME_STMT);
7564 ass = gimple_build_assign (tile_size, expr);
7565 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7567 element_s = create_tmp_var (diff_type, ".element_s");
7568 ass = gimple_build_assign (element_s, s);
7569 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7571 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7572 s = force_gimple_operand_gsi (&gsi, expr, true,
7573 NULL_TREE, true, GSI_SAME_STMT);
7576 /* Determine the range, avoiding possible unsigned->signed overflow. */
7577 negating = !up && TYPE_UNSIGNED (iter_type);
7578 expr = fold_build2 (MINUS_EXPR, plus_type,
7579 fold_convert (plus_type, negating ? b : e),
7580 fold_convert (plus_type, negating ? e : b));
7581 expr = fold_convert (diff_type, expr);
7582 if (negating)
7583 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7584 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7585 NULL_TREE, true, GSI_SAME_STMT);
7587 chunk_no = build_int_cst (diff_type, 0);
7588 if (chunking)
7590 gcc_assert (!gimple_in_ssa_p (cfun));
7592 expr = chunk_no;
7593 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7594 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7596 ass = gimple_build_assign (chunk_no, expr);
7597 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7599 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7600 build_int_cst (integer_type_node,
7601 IFN_GOACC_LOOP_CHUNKS),
7602 dir, range, s, chunk_size, gwv);
7603 gimple_call_set_lhs (call, chunk_max);
7604 gimple_set_location (call, loc);
7605 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7607 else
7608 chunk_size = chunk_no;
7610 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7611 build_int_cst (integer_type_node,
7612 IFN_GOACC_LOOP_STEP),
7613 dir, range, s, chunk_size, gwv);
7614 gimple_call_set_lhs (call, step);
7615 gimple_set_location (call, loc);
7616 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7618 /* Remove the GIMPLE_OMP_FOR. */
7619 gsi_remove (&gsi, true);
7621 /* Fixup edges from head_bb. */
7622 be = BRANCH_EDGE (head_bb);
7623 fte = FALLTHRU_EDGE (head_bb);
7624 be->flags |= EDGE_FALSE_VALUE;
7625 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7627 basic_block body_bb = fte->dest;
7629 if (gimple_in_ssa_p (cfun))
7631 gsi = gsi_last_nondebug_bb (cont_bb);
7632 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7634 offset = gimple_omp_continue_control_use (cont_stmt);
7635 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7637 else
7639 offset = create_tmp_var (diff_type, ".offset");
7640 offset_init = offset_incr = offset;
7642 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7644 /* Loop offset & bound go into head_bb. */
7645 gsi = gsi_start_bb (head_bb);
7647 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7648 build_int_cst (integer_type_node,
7649 IFN_GOACC_LOOP_OFFSET),
7650 dir, range, s,
7651 chunk_size, gwv, chunk_no);
7652 gimple_call_set_lhs (call, offset_init);
7653 gimple_set_location (call, loc);
7654 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7656 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7657 build_int_cst (integer_type_node,
7658 IFN_GOACC_LOOP_BOUND),
7659 dir, range, s,
7660 chunk_size, gwv, offset_init);
7661 gimple_call_set_lhs (call, bound);
7662 gimple_set_location (call, loc);
7663 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7665 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7666 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7667 GSI_CONTINUE_LINKING);
7669 /* V assignment goes into body_bb. */
7670 if (!gimple_in_ssa_p (cfun))
7672 gsi = gsi_start_bb (body_bb);
7674 expr = build2 (plus_code, iter_type, b,
7675 fold_convert (plus_type, offset));
7676 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7677 true, GSI_SAME_STMT);
7678 ass = gimple_build_assign (v, expr);
7679 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7681 if (fd->collapse > 1 || fd->tiling)
7682 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7684 if (fd->tiling)
7686 /* Determine the range of the element loop -- usually simply
7687 the tile_size, but could be smaller if the final
7688 iteration of the outer loop is a partial tile. */
7689 tree e_range = create_tmp_var (diff_type, ".e_range");
7691 expr = build2 (MIN_EXPR, diff_type,
7692 build2 (MINUS_EXPR, diff_type, bound, offset),
7693 build2 (MULT_EXPR, diff_type, tile_size,
7694 element_s));
7695 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7696 true, GSI_SAME_STMT);
7697 ass = gimple_build_assign (e_range, expr);
7698 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7700 /* Determine bound, offset & step of inner loop. */
7701 e_bound = create_tmp_var (diff_type, ".e_bound");
7702 e_offset = create_tmp_var (diff_type, ".e_offset");
7703 e_step = create_tmp_var (diff_type, ".e_step");
7705 /* Mark these as element loops. */
7706 tree t, e_gwv = integer_minus_one_node;
7707 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7709 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7710 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7711 element_s, chunk, e_gwv, chunk);
7712 gimple_call_set_lhs (call, e_offset);
7713 gimple_set_location (call, loc);
7714 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7716 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7717 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7718 element_s, chunk, e_gwv, e_offset);
7719 gimple_call_set_lhs (call, e_bound);
7720 gimple_set_location (call, loc);
7721 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7723 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7724 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7725 element_s, chunk, e_gwv);
7726 gimple_call_set_lhs (call, e_step);
7727 gimple_set_location (call, loc);
7728 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7730 /* Add test and split block. */
7731 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7732 stmt = gimple_build_cond_empty (expr);
7733 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7734 split = split_block (body_bb, stmt);
7735 elem_body_bb = split->dest;
7736 if (cont_bb == body_bb)
7737 cont_bb = elem_body_bb;
7738 body_bb = split->src;
7740 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7742 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7743 if (cont_bb == NULL)
7745 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7746 e->probability = profile_probability::even ();
7747 split->probability = profile_probability::even ();
7750 /* Initialize the user's loop vars. */
7751 gsi = gsi_start_bb (elem_body_bb);
7752 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7756 /* Loop increment goes into cont_bb. If this is not a loop, we
7757 will have spawned threads as if it was, and each one will
7758 execute one iteration. The specification is not explicit about
7759 whether such constructs are ill-formed or not, and they can
7760 occur, especially when noreturn routines are involved. */
7761 if (cont_bb)
7763 gsi = gsi_last_nondebug_bb (cont_bb);
7764 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7765 loc = gimple_location (cont_stmt);
7767 if (fd->tiling)
7769 /* Insert element loop increment and test. */
7770 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7771 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7772 true, GSI_SAME_STMT);
7773 ass = gimple_build_assign (e_offset, expr);
7774 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7775 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7777 stmt = gimple_build_cond_empty (expr);
7778 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7779 split = split_block (cont_bb, stmt);
7780 elem_cont_bb = split->src;
7781 cont_bb = split->dest;
7783 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7784 split->probability = profile_probability::unlikely ().guessed ();
7785 edge latch_edge
7786 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7787 latch_edge->probability = profile_probability::likely ().guessed ();
7789 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7790 skip_edge->probability = profile_probability::unlikely ().guessed ();
7791 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7792 loop_entry_edge->probability
7793 = profile_probability::likely ().guessed ();
7795 gsi = gsi_for_stmt (cont_stmt);
7798 /* Increment offset. */
7799 if (gimple_in_ssa_p (cfun))
7800 expr = build2 (plus_code, iter_type, offset,
7801 fold_convert (plus_type, step));
7802 else
7803 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7804 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7805 true, GSI_SAME_STMT);
7806 ass = gimple_build_assign (offset_incr, expr);
7807 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7808 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7809 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7811 /* Remove the GIMPLE_OMP_CONTINUE. */
7812 gsi_remove (&gsi, true);
7814 /* Fixup edges from cont_bb. */
7815 be = BRANCH_EDGE (cont_bb);
7816 fte = FALLTHRU_EDGE (cont_bb);
7817 be->flags |= EDGE_TRUE_VALUE;
7818 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7820 if (chunking)
7822 /* Split the beginning of exit_bb to make bottom_bb. We
7823 need to insert a nop at the start, because splitting is
7824 after a stmt, not before. */
7825 gsi = gsi_start_bb (exit_bb);
7826 stmt = gimple_build_nop ();
7827 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7828 split = split_block (exit_bb, stmt);
7829 bottom_bb = split->src;
7830 exit_bb = split->dest;
7831 gsi = gsi_last_bb (bottom_bb);
7833 /* Chunk increment and test goes into bottom_bb. */
7834 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7835 build_int_cst (diff_type, 1));
7836 ass = gimple_build_assign (chunk_no, expr);
7837 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7839 /* Chunk test at end of bottom_bb. */
7840 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7841 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7842 GSI_CONTINUE_LINKING);
7844 /* Fixup edges from bottom_bb. */
7845 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7846 split->probability = profile_probability::unlikely ().guessed ();
7847 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7848 latch_edge->probability = profile_probability::likely ().guessed ();
7852 gsi = gsi_last_nondebug_bb (exit_bb);
7853 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7854 loc = gimple_location (gsi_stmt (gsi));
7856 if (!gimple_in_ssa_p (cfun))
7858 /* Insert the final value of V, in case it is live. This is the
7859 value for the only thread that survives past the join. */
7860 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7861 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7862 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7863 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7864 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7865 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7866 true, GSI_SAME_STMT);
7867 ass = gimple_build_assign (v, expr);
7868 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7871 /* Remove the OMP_RETURN. */
7872 gsi_remove (&gsi, true);
7874 if (cont_bb)
7876 /* We now have one, two or three nested loops. Update the loop
7877 structures. */
7878 class loop *parent = entry_bb->loop_father;
7879 class loop *body = body_bb->loop_father;
7881 if (chunking)
7883 class loop *chunk_loop = alloc_loop ();
7884 chunk_loop->header = head_bb;
7885 chunk_loop->latch = bottom_bb;
7886 add_loop (chunk_loop, parent);
7887 parent = chunk_loop;
7889 else if (parent != body)
7891 gcc_assert (body->header == body_bb);
7892 gcc_assert (body->latch == cont_bb
7893 || single_pred (body->latch) == cont_bb);
7894 parent = NULL;
7897 if (parent)
7899 class loop *body_loop = alloc_loop ();
7900 body_loop->header = body_bb;
7901 body_loop->latch = cont_bb;
7902 add_loop (body_loop, parent);
7904 if (fd->tiling)
7906 /* Insert tiling's element loop. */
7907 class loop *inner_loop = alloc_loop ();
7908 inner_loop->header = elem_body_bb;
7909 inner_loop->latch = elem_cont_bb;
7910 add_loop (inner_loop, body_loop);
7916 /* Expand the OMP loop defined by REGION. */
7918 static void
7919 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7921 struct omp_for_data fd;
7922 struct omp_for_data_loop *loops;
7924 loops = XALLOCAVEC (struct omp_for_data_loop,
7925 gimple_omp_for_collapse (last_stmt (region->entry)));
7926 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7927 &fd, loops);
7928 region->sched_kind = fd.sched_kind;
7929 region->sched_modifiers = fd.sched_modifiers;
7930 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7931 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7933 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7934 if ((loops[i].m1 || loops[i].m2)
7935 && (loops[i].m1 == NULL_TREE
7936 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7937 && (loops[i].m2 == NULL_TREE
7938 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7939 && TREE_CODE (loops[i].step) == INTEGER_CST
7940 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7942 tree t;
7943 tree itype = TREE_TYPE (loops[i].v);
7944 if (loops[i].m1 && loops[i].m2)
7945 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7946 else if (loops[i].m1)
7947 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7948 else
7949 t = loops[i].m2;
7950 t = fold_build2 (MULT_EXPR, itype, t,
7951 fold_convert (itype,
7952 loops[i - loops[i].outer].step));
7953 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7954 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7955 fold_build1 (NEGATE_EXPR, itype, t),
7956 fold_build1 (NEGATE_EXPR, itype,
7957 fold_convert (itype,
7958 loops[i].step)));
7959 else
7960 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7961 fold_convert (itype, loops[i].step));
7962 if (integer_nonzerop (t))
7963 error_at (gimple_location (fd.for_stmt),
7964 "invalid OpenMP non-rectangular loop step; "
7965 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7966 "step %qE",
7967 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7968 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7969 loops[i - loops[i].outer].step, i + 1,
7970 loops[i].step);
7974 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7975 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7976 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7977 if (region->cont)
7979 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7980 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7981 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7983 else
7984 /* If there isn't a continue then this is a degerate case where
7985 the introduction of abnormal edges during lowering will prevent
7986 original loops from being detected. Fix that up. */
7987 loops_state_set (LOOPS_NEED_FIXUP);
7989 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
7990 expand_omp_simd (region, &fd);
7991 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7993 gcc_assert (!inner_stmt && !fd.non_rect);
7994 expand_oacc_for (region, &fd);
7996 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
7998 if (gimple_omp_for_combined_into_p (fd.for_stmt))
7999 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8000 else
8001 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8003 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8004 && !fd.have_ordered)
8006 if (fd.chunk_size == NULL)
8007 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8008 else
8009 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8011 else
8013 int fn_index, start_ix, next_ix;
8014 unsigned HOST_WIDE_INT sched = 0;
8015 tree sched_arg = NULL_TREE;
8017 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8018 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8019 if (fd.chunk_size == NULL
8020 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8021 fd.chunk_size = integer_zero_node;
8022 switch (fd.sched_kind)
8024 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8025 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8026 && fd.lastprivate_conditional == 0)
8028 gcc_assert (!fd.have_ordered);
8029 fn_index = 6;
8030 sched = 4;
8032 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8033 && !fd.have_ordered
8034 && fd.lastprivate_conditional == 0)
8035 fn_index = 7;
8036 else
8038 fn_index = 3;
8039 sched = (HOST_WIDE_INT_1U << 31);
8041 break;
8042 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8043 case OMP_CLAUSE_SCHEDULE_GUIDED:
8044 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8045 && !fd.have_ordered
8046 && fd.lastprivate_conditional == 0)
8048 fn_index = 3 + fd.sched_kind;
8049 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8050 break;
8052 fn_index = fd.sched_kind;
8053 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8054 sched += (HOST_WIDE_INT_1U << 31);
8055 break;
8056 case OMP_CLAUSE_SCHEDULE_STATIC:
8057 gcc_assert (fd.have_ordered);
8058 fn_index = 0;
8059 sched = (HOST_WIDE_INT_1U << 31) + 1;
8060 break;
8061 default:
8062 gcc_unreachable ();
8064 if (!fd.ordered)
8065 fn_index += fd.have_ordered * 8;
8066 if (fd.ordered)
8067 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8068 else
8069 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8070 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8071 if (fd.have_reductemp || fd.have_pointer_condtemp)
8073 if (fd.ordered)
8074 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8075 else if (fd.have_ordered)
8076 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8077 else
8078 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8079 sched_arg = build_int_cstu (long_integer_type_node, sched);
8080 if (!fd.chunk_size)
8081 fd.chunk_size = integer_zero_node;
8083 if (fd.iter_type == long_long_unsigned_type_node)
8085 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8086 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8087 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8088 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8090 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8091 (enum built_in_function) next_ix, sched_arg,
8092 inner_stmt);
8095 if (gimple_in_ssa_p (cfun))
8096 update_ssa (TODO_update_ssa_only_virtuals);
8099 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8101 v = GOMP_sections_start (n);
8103 switch (v)
8105 case 0:
8106 goto L2;
8107 case 1:
8108 section 1;
8109 goto L1;
8110 case 2:
8112 case n:
8114 default:
8115 abort ();
8118 v = GOMP_sections_next ();
8119 goto L0;
8121 reduction;
8123 If this is a combined parallel sections, replace the call to
8124 GOMP_sections_start with call to GOMP_sections_next. */
8126 static void
8127 expand_omp_sections (struct omp_region *region)
8129 tree t, u, vin = NULL, vmain, vnext, l2;
8130 unsigned len;
8131 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8132 gimple_stmt_iterator si, switch_si;
8133 gomp_sections *sections_stmt;
8134 gimple *stmt;
8135 gomp_continue *cont;
8136 edge_iterator ei;
8137 edge e;
8138 struct omp_region *inner;
8139 unsigned i, casei;
8140 bool exit_reachable = region->cont != NULL;
8142 gcc_assert (region->exit != NULL);
8143 entry_bb = region->entry;
8144 l0_bb = single_succ (entry_bb);
8145 l1_bb = region->cont;
8146 l2_bb = region->exit;
8147 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8148 l2 = gimple_block_label (l2_bb);
8149 else
8151 /* This can happen if there are reductions. */
8152 len = EDGE_COUNT (l0_bb->succs);
8153 gcc_assert (len > 0);
8154 e = EDGE_SUCC (l0_bb, len - 1);
8155 si = gsi_last_nondebug_bb (e->dest);
8156 l2 = NULL_TREE;
8157 if (gsi_end_p (si)
8158 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8159 l2 = gimple_block_label (e->dest);
8160 else
8161 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8163 si = gsi_last_nondebug_bb (e->dest);
8164 if (gsi_end_p (si)
8165 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8167 l2 = gimple_block_label (e->dest);
8168 break;
8172 if (exit_reachable)
8173 default_bb = create_empty_bb (l1_bb->prev_bb);
8174 else
8175 default_bb = create_empty_bb (l0_bb);
8177 /* We will build a switch() with enough cases for all the
8178 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8179 and a default case to abort if something goes wrong. */
8180 len = EDGE_COUNT (l0_bb->succs);
8182 /* Use vec::quick_push on label_vec throughout, since we know the size
8183 in advance. */
8184 auto_vec<tree> label_vec (len);
8186 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8187 GIMPLE_OMP_SECTIONS statement. */
8188 si = gsi_last_nondebug_bb (entry_bb);
8189 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8190 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8191 vin = gimple_omp_sections_control (sections_stmt);
8192 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8193 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8194 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8195 tree cond_var = NULL_TREE;
8196 if (reductmp || condtmp)
8198 tree reductions = null_pointer_node, mem = null_pointer_node;
8199 tree memv = NULL_TREE, condtemp = NULL_TREE;
8200 gimple_stmt_iterator gsi = gsi_none ();
8201 gimple *g = NULL;
8202 if (reductmp)
8204 reductions = OMP_CLAUSE_DECL (reductmp);
8205 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8206 g = SSA_NAME_DEF_STMT (reductions);
8207 reductions = gimple_assign_rhs1 (g);
8208 OMP_CLAUSE_DECL (reductmp) = reductions;
8209 gsi = gsi_for_stmt (g);
8211 else
8212 gsi = si;
8213 if (condtmp)
8215 condtemp = OMP_CLAUSE_DECL (condtmp);
8216 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8217 OMP_CLAUSE__CONDTEMP_);
8218 cond_var = OMP_CLAUSE_DECL (c);
8219 tree type = TREE_TYPE (condtemp);
8220 memv = create_tmp_var (type);
8221 TREE_ADDRESSABLE (memv) = 1;
8222 unsigned cnt = 0;
8223 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8224 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8225 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8226 ++cnt;
8227 unsigned HOST_WIDE_INT sz
8228 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8229 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8230 false);
8231 mem = build_fold_addr_expr (memv);
8233 t = build_int_cst (unsigned_type_node, len - 1);
8234 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8235 stmt = gimple_build_call (u, 3, t, reductions, mem);
8236 gimple_call_set_lhs (stmt, vin);
8237 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8238 if (condtmp)
8240 expand_omp_build_assign (&gsi, condtemp, memv, false);
8241 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8242 vin, build_one_cst (TREE_TYPE (cond_var)));
8243 expand_omp_build_assign (&gsi, cond_var, t, false);
8245 if (reductmp)
8247 gsi_remove (&gsi, true);
8248 release_ssa_name (gimple_assign_lhs (g));
8251 else if (!is_combined_parallel (region))
8253 /* If we are not inside a combined parallel+sections region,
8254 call GOMP_sections_start. */
8255 t = build_int_cst (unsigned_type_node, len - 1);
8256 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8257 stmt = gimple_build_call (u, 1, t);
8259 else
8261 /* Otherwise, call GOMP_sections_next. */
8262 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8263 stmt = gimple_build_call (u, 0);
8265 if (!reductmp && !condtmp)
8267 gimple_call_set_lhs (stmt, vin);
8268 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8270 gsi_remove (&si, true);
8272 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8273 L0_BB. */
8274 switch_si = gsi_last_nondebug_bb (l0_bb);
8275 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8276 if (exit_reachable)
8278 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8279 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8280 vmain = gimple_omp_continue_control_use (cont);
8281 vnext = gimple_omp_continue_control_def (cont);
8283 else
8285 vmain = vin;
8286 vnext = NULL_TREE;
8289 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8290 label_vec.quick_push (t);
8291 i = 1;
8293 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8294 for (inner = region->inner, casei = 1;
8295 inner;
8296 inner = inner->next, i++, casei++)
8298 basic_block s_entry_bb, s_exit_bb;
8300 /* Skip optional reduction region. */
8301 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8303 --i;
8304 --casei;
8305 continue;
8308 s_entry_bb = inner->entry;
8309 s_exit_bb = inner->exit;
8311 t = gimple_block_label (s_entry_bb);
8312 u = build_int_cst (unsigned_type_node, casei);
8313 u = build_case_label (u, NULL, t);
8314 label_vec.quick_push (u);
8316 si = gsi_last_nondebug_bb (s_entry_bb);
8317 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8318 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8319 gsi_remove (&si, true);
8320 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8322 if (s_exit_bb == NULL)
8323 continue;
8325 si = gsi_last_nondebug_bb (s_exit_bb);
8326 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8327 gsi_remove (&si, true);
8329 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8332 /* Error handling code goes in DEFAULT_BB. */
8333 t = gimple_block_label (default_bb);
8334 u = build_case_label (NULL, NULL, t);
8335 make_edge (l0_bb, default_bb, 0);
8336 add_bb_to_loop (default_bb, current_loops->tree_root);
8338 stmt = gimple_build_switch (vmain, u, label_vec);
8339 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8340 gsi_remove (&switch_si, true);
8342 si = gsi_start_bb (default_bb);
8343 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8344 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8346 if (exit_reachable)
8348 tree bfn_decl;
8350 /* Code to get the next section goes in L1_BB. */
8351 si = gsi_last_nondebug_bb (l1_bb);
8352 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8354 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8355 stmt = gimple_build_call (bfn_decl, 0);
8356 gimple_call_set_lhs (stmt, vnext);
8357 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8358 if (cond_var)
8360 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8361 vnext, build_one_cst (TREE_TYPE (cond_var)));
8362 expand_omp_build_assign (&si, cond_var, t, false);
8364 gsi_remove (&si, true);
8366 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8369 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8370 si = gsi_last_nondebug_bb (l2_bb);
8371 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8372 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8373 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8374 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8375 else
8376 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8377 stmt = gimple_build_call (t, 0);
8378 if (gimple_omp_return_lhs (gsi_stmt (si)))
8379 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8380 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8381 gsi_remove (&si, true);
8383 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8386 /* Expand code for an OpenMP single directive. We've already expanded
8387 much of the code, here we simply place the GOMP_barrier call. */
8389 static void
8390 expand_omp_single (struct omp_region *region)
8392 basic_block entry_bb, exit_bb;
8393 gimple_stmt_iterator si;
8395 entry_bb = region->entry;
8396 exit_bb = region->exit;
8398 si = gsi_last_nondebug_bb (entry_bb);
8399 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8400 gsi_remove (&si, true);
8401 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8403 si = gsi_last_nondebug_bb (exit_bb);
8404 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8406 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8407 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8409 gsi_remove (&si, true);
8410 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8413 /* Generic expansion for OpenMP synchronization directives: master,
8414 ordered and critical. All we need to do here is remove the entry
8415 and exit markers for REGION. */
8417 static void
8418 expand_omp_synch (struct omp_region *region)
8420 basic_block entry_bb, exit_bb;
8421 gimple_stmt_iterator si;
8423 entry_bb = region->entry;
8424 exit_bb = region->exit;
8426 si = gsi_last_nondebug_bb (entry_bb);
8427 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8428 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8429 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8430 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8431 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8432 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8433 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8434 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8436 expand_omp_taskreg (region);
8437 return;
8439 gsi_remove (&si, true);
8440 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8442 if (exit_bb)
8444 si = gsi_last_nondebug_bb (exit_bb);
8445 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8446 gsi_remove (&si, true);
8447 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8451 /* Translate enum omp_memory_order to enum memmodel. The two enums
8452 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8453 is 0. */
8455 static enum memmodel
8456 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8458 switch (mo)
8460 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8461 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8462 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8463 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8464 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8465 default: gcc_unreachable ();
8469 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8470 operation as a normal volatile load. */
8472 static bool
8473 expand_omp_atomic_load (basic_block load_bb, tree addr,
8474 tree loaded_val, int index)
8476 enum built_in_function tmpbase;
8477 gimple_stmt_iterator gsi;
8478 basic_block store_bb;
8479 location_t loc;
8480 gimple *stmt;
8481 tree decl, call, type, itype;
8483 gsi = gsi_last_nondebug_bb (load_bb);
8484 stmt = gsi_stmt (gsi);
8485 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8486 loc = gimple_location (stmt);
8488 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8489 is smaller than word size, then expand_atomic_load assumes that the load
8490 is atomic. We could avoid the builtin entirely in this case. */
8492 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8493 decl = builtin_decl_explicit (tmpbase);
8494 if (decl == NULL_TREE)
8495 return false;
8497 type = TREE_TYPE (loaded_val);
8498 itype = TREE_TYPE (TREE_TYPE (decl));
8500 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8501 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8502 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8503 if (!useless_type_conversion_p (type, itype))
8504 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8505 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8507 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8508 gsi_remove (&gsi, true);
8510 store_bb = single_succ (load_bb);
8511 gsi = gsi_last_nondebug_bb (store_bb);
8512 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8513 gsi_remove (&gsi, true);
8515 if (gimple_in_ssa_p (cfun))
8516 update_ssa (TODO_update_ssa_no_phi);
8518 return true;
8521 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8522 operation as a normal volatile store. */
8524 static bool
8525 expand_omp_atomic_store (basic_block load_bb, tree addr,
8526 tree loaded_val, tree stored_val, int index)
8528 enum built_in_function tmpbase;
8529 gimple_stmt_iterator gsi;
8530 basic_block store_bb = single_succ (load_bb);
8531 location_t loc;
8532 gimple *stmt;
8533 tree decl, call, type, itype;
8534 machine_mode imode;
8535 bool exchange;
8537 gsi = gsi_last_nondebug_bb (load_bb);
8538 stmt = gsi_stmt (gsi);
8539 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8541 /* If the load value is needed, then this isn't a store but an exchange. */
8542 exchange = gimple_omp_atomic_need_value_p (stmt);
8544 gsi = gsi_last_nondebug_bb (store_bb);
8545 stmt = gsi_stmt (gsi);
8546 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8547 loc = gimple_location (stmt);
8549 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8550 is smaller than word size, then expand_atomic_store assumes that the store
8551 is atomic. We could avoid the builtin entirely in this case. */
8553 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8554 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8555 decl = builtin_decl_explicit (tmpbase);
8556 if (decl == NULL_TREE)
8557 return false;
8559 type = TREE_TYPE (stored_val);
8561 /* Dig out the type of the function's second argument. */
8562 itype = TREE_TYPE (decl);
8563 itype = TYPE_ARG_TYPES (itype);
8564 itype = TREE_CHAIN (itype);
8565 itype = TREE_VALUE (itype);
8566 imode = TYPE_MODE (itype);
8568 if (exchange && !can_atomic_exchange_p (imode, true))
8569 return false;
8571 if (!useless_type_conversion_p (itype, type))
8572 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8573 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8574 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8575 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8576 if (exchange)
8578 if (!useless_type_conversion_p (type, itype))
8579 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8580 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8583 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8584 gsi_remove (&gsi, true);
8586 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8587 gsi = gsi_last_nondebug_bb (load_bb);
8588 gsi_remove (&gsi, true);
8590 if (gimple_in_ssa_p (cfun))
8591 update_ssa (TODO_update_ssa_no_phi);
8593 return true;
8596 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8597 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8598 size of the data type, and thus usable to find the index of the builtin
8599 decl. Returns false if the expression is not of the proper form. */
8601 static bool
8602 expand_omp_atomic_fetch_op (basic_block load_bb,
8603 tree addr, tree loaded_val,
8604 tree stored_val, int index)
8606 enum built_in_function oldbase, newbase, tmpbase;
8607 tree decl, itype, call;
8608 tree lhs, rhs;
8609 basic_block store_bb = single_succ (load_bb);
8610 gimple_stmt_iterator gsi;
8611 gimple *stmt;
8612 location_t loc;
8613 enum tree_code code;
8614 bool need_old, need_new;
8615 machine_mode imode;
8617 /* We expect to find the following sequences:
8619 load_bb:
8620 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8622 store_bb:
8623 val = tmp OP something; (or: something OP tmp)
8624 GIMPLE_OMP_STORE (val)
8626 ???FIXME: Allow a more flexible sequence.
8627 Perhaps use data flow to pick the statements.
8631 gsi = gsi_after_labels (store_bb);
8632 stmt = gsi_stmt (gsi);
8633 if (is_gimple_debug (stmt))
8635 gsi_next_nondebug (&gsi);
8636 if (gsi_end_p (gsi))
8637 return false;
8638 stmt = gsi_stmt (gsi);
8640 loc = gimple_location (stmt);
8641 if (!is_gimple_assign (stmt))
8642 return false;
8643 gsi_next_nondebug (&gsi);
8644 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8645 return false;
8646 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8647 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8648 enum omp_memory_order omo
8649 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8650 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8651 gcc_checking_assert (!need_old || !need_new);
8653 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8654 return false;
8656 /* Check for one of the supported fetch-op operations. */
8657 code = gimple_assign_rhs_code (stmt);
8658 switch (code)
8660 case PLUS_EXPR:
8661 case POINTER_PLUS_EXPR:
8662 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8663 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8664 break;
8665 case MINUS_EXPR:
8666 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8667 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8668 break;
8669 case BIT_AND_EXPR:
8670 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8671 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8672 break;
8673 case BIT_IOR_EXPR:
8674 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8675 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8676 break;
8677 case BIT_XOR_EXPR:
8678 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8679 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8680 break;
8681 default:
8682 return false;
8685 /* Make sure the expression is of the proper form. */
8686 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8687 rhs = gimple_assign_rhs2 (stmt);
8688 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8689 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8690 rhs = gimple_assign_rhs1 (stmt);
8691 else
8692 return false;
8694 tmpbase = ((enum built_in_function)
8695 ((need_new ? newbase : oldbase) + index + 1));
8696 decl = builtin_decl_explicit (tmpbase);
8697 if (decl == NULL_TREE)
8698 return false;
8699 itype = TREE_TYPE (TREE_TYPE (decl));
8700 imode = TYPE_MODE (itype);
8702 /* We could test all of the various optabs involved, but the fact of the
8703 matter is that (with the exception of i486 vs i586 and xadd) all targets
8704 that support any atomic operaton optab also implements compare-and-swap.
8705 Let optabs.c take care of expanding any compare-and-swap loop. */
8706 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8707 return false;
8709 gsi = gsi_last_nondebug_bb (load_bb);
8710 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8712 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8713 It only requires that the operation happen atomically. Thus we can
8714 use the RELAXED memory model. */
8715 call = build_call_expr_loc (loc, decl, 3, addr,
8716 fold_convert_loc (loc, itype, rhs),
8717 build_int_cst (NULL, mo));
8719 if (need_old || need_new)
8721 lhs = need_old ? loaded_val : stored_val;
8722 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8723 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8725 else
8726 call = fold_convert_loc (loc, void_type_node, call);
8727 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8728 gsi_remove (&gsi, true);
8730 gsi = gsi_last_nondebug_bb (store_bb);
8731 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8732 gsi_remove (&gsi, true);
8733 gsi = gsi_last_nondebug_bb (store_bb);
8734 stmt = gsi_stmt (gsi);
8735 gsi_remove (&gsi, true);
8737 if (gimple_in_ssa_p (cfun))
8739 release_defs (stmt);
8740 update_ssa (TODO_update_ssa_no_phi);
8743 return true;
8746 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8748 oldval = *addr;
8749 repeat:
8750 newval = rhs; // with oldval replacing *addr in rhs
8751 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8752 if (oldval != newval)
8753 goto repeat;
8755 INDEX is log2 of the size of the data type, and thus usable to find the
8756 index of the builtin decl. */
8758 static bool
8759 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8760 tree addr, tree loaded_val, tree stored_val,
8761 int index)
8763 tree loadedi, storedi, initial, new_storedi, old_vali;
8764 tree type, itype, cmpxchg, iaddr, atype;
8765 gimple_stmt_iterator si;
8766 basic_block loop_header = single_succ (load_bb);
8767 gimple *phi, *stmt;
8768 edge e;
8769 enum built_in_function fncode;
8771 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8772 order to use the RELAXED memory model effectively. */
8773 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8774 + index + 1);
8775 cmpxchg = builtin_decl_explicit (fncode);
8776 if (cmpxchg == NULL_TREE)
8777 return false;
8778 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8779 atype = type;
8780 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8782 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8783 || !can_atomic_load_p (TYPE_MODE (itype)))
8784 return false;
8786 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8787 si = gsi_last_nondebug_bb (load_bb);
8788 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8790 /* For floating-point values, we'll need to view-convert them to integers
8791 so that we can perform the atomic compare and swap. Simplify the
8792 following code by always setting up the "i"ntegral variables. */
8793 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8795 tree iaddr_val;
8797 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8798 true));
8799 atype = itype;
8800 iaddr_val
8801 = force_gimple_operand_gsi (&si,
8802 fold_convert (TREE_TYPE (iaddr), addr),
8803 false, NULL_TREE, true, GSI_SAME_STMT);
8804 stmt = gimple_build_assign (iaddr, iaddr_val);
8805 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8806 loadedi = create_tmp_var (itype);
8807 if (gimple_in_ssa_p (cfun))
8808 loadedi = make_ssa_name (loadedi);
8810 else
8812 iaddr = addr;
8813 loadedi = loaded_val;
8816 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8817 tree loaddecl = builtin_decl_explicit (fncode);
8818 if (loaddecl)
8819 initial
8820 = fold_convert (atype,
8821 build_call_expr (loaddecl, 2, iaddr,
8822 build_int_cst (NULL_TREE,
8823 MEMMODEL_RELAXED)));
8824 else
8826 tree off
8827 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8828 true), 0);
8829 initial = build2 (MEM_REF, atype, iaddr, off);
8832 initial
8833 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8834 GSI_SAME_STMT);
8836 /* Move the value to the LOADEDI temporary. */
8837 if (gimple_in_ssa_p (cfun))
8839 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8840 phi = create_phi_node (loadedi, loop_header);
8841 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8842 initial);
8844 else
8845 gsi_insert_before (&si,
8846 gimple_build_assign (loadedi, initial),
8847 GSI_SAME_STMT);
8848 if (loadedi != loaded_val)
8850 gimple_stmt_iterator gsi2;
8851 tree x;
8853 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8854 gsi2 = gsi_start_bb (loop_header);
8855 if (gimple_in_ssa_p (cfun))
8857 gassign *stmt;
8858 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8859 true, GSI_SAME_STMT);
8860 stmt = gimple_build_assign (loaded_val, x);
8861 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8863 else
8865 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8866 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8867 true, GSI_SAME_STMT);
8870 gsi_remove (&si, true);
8872 si = gsi_last_nondebug_bb (store_bb);
8873 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8875 if (iaddr == addr)
8876 storedi = stored_val;
8877 else
8878 storedi
8879 = force_gimple_operand_gsi (&si,
8880 build1 (VIEW_CONVERT_EXPR, itype,
8881 stored_val), true, NULL_TREE, true,
8882 GSI_SAME_STMT);
8884 /* Build the compare&swap statement. */
8885 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8886 new_storedi = force_gimple_operand_gsi (&si,
8887 fold_convert (TREE_TYPE (loadedi),
8888 new_storedi),
8889 true, NULL_TREE,
8890 true, GSI_SAME_STMT);
8892 if (gimple_in_ssa_p (cfun))
8893 old_vali = loadedi;
8894 else
8896 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8897 stmt = gimple_build_assign (old_vali, loadedi);
8898 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8900 stmt = gimple_build_assign (loadedi, new_storedi);
8901 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8904 /* Note that we always perform the comparison as an integer, even for
8905 floating point. This allows the atomic operation to properly
8906 succeed even with NaNs and -0.0. */
8907 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8908 stmt = gimple_build_cond_empty (ne);
8909 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8911 /* Update cfg. */
8912 e = single_succ_edge (store_bb);
8913 e->flags &= ~EDGE_FALLTHRU;
8914 e->flags |= EDGE_FALSE_VALUE;
8915 /* Expect no looping. */
8916 e->probability = profile_probability::guessed_always ();
8918 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8919 e->probability = profile_probability::guessed_never ();
8921 /* Copy the new value to loadedi (we already did that before the condition
8922 if we are not in SSA). */
8923 if (gimple_in_ssa_p (cfun))
8925 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8926 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8929 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8930 gsi_remove (&si, true);
8932 class loop *loop = alloc_loop ();
8933 loop->header = loop_header;
8934 loop->latch = store_bb;
8935 add_loop (loop, loop_header->loop_father);
8937 if (gimple_in_ssa_p (cfun))
8938 update_ssa (TODO_update_ssa_no_phi);
8940 return true;
8943 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8945 GOMP_atomic_start ();
8946 *addr = rhs;
8947 GOMP_atomic_end ();
8949 The result is not globally atomic, but works so long as all parallel
8950 references are within #pragma omp atomic directives. According to
8951 responses received from omp@openmp.org, appears to be within spec.
8952 Which makes sense, since that's how several other compilers handle
8953 this situation as well.
8954 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8955 expanding. STORED_VAL is the operand of the matching
8956 GIMPLE_OMP_ATOMIC_STORE.
8958 We replace
8959 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8960 loaded_val = *addr;
8962 and replace
8963 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8964 *addr = stored_val;
8967 static bool
8968 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8969 tree addr, tree loaded_val, tree stored_val)
8971 gimple_stmt_iterator si;
8972 gassign *stmt;
8973 tree t;
8975 si = gsi_last_nondebug_bb (load_bb);
8976 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8978 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8979 t = build_call_expr (t, 0);
8980 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8982 tree mem = build_simple_mem_ref (addr);
8983 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8984 TREE_OPERAND (mem, 1)
8985 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8986 true),
8987 TREE_OPERAND (mem, 1));
8988 stmt = gimple_build_assign (loaded_val, mem);
8989 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8990 gsi_remove (&si, true);
8992 si = gsi_last_nondebug_bb (store_bb);
8993 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8995 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
8996 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8998 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
8999 t = build_call_expr (t, 0);
9000 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9001 gsi_remove (&si, true);
9003 if (gimple_in_ssa_p (cfun))
9004 update_ssa (TODO_update_ssa_no_phi);
9005 return true;
9008 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9009 using expand_omp_atomic_fetch_op. If it failed, we try to
9010 call expand_omp_atomic_pipeline, and if it fails too, the
9011 ultimate fallback is wrapping the operation in a mutex
9012 (expand_omp_atomic_mutex). REGION is the atomic region built
9013 by build_omp_regions_1(). */
9015 static void
9016 expand_omp_atomic (struct omp_region *region)
9018 basic_block load_bb = region->entry, store_bb = region->exit;
9019 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9020 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9021 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9022 tree addr = gimple_omp_atomic_load_rhs (load);
9023 tree stored_val = gimple_omp_atomic_store_val (store);
9024 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9025 HOST_WIDE_INT index;
9027 /* Make sure the type is one of the supported sizes. */
9028 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9029 index = exact_log2 (index);
9030 if (index >= 0 && index <= 4)
9032 unsigned int align = TYPE_ALIGN_UNIT (type);
9034 /* __sync builtins require strict data alignment. */
9035 if (exact_log2 (align) >= index)
9037 /* Atomic load. */
9038 scalar_mode smode;
9039 if (loaded_val == stored_val
9040 && (is_int_mode (TYPE_MODE (type), &smode)
9041 || is_float_mode (TYPE_MODE (type), &smode))
9042 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9043 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9044 return;
9046 /* Atomic store. */
9047 if ((is_int_mode (TYPE_MODE (type), &smode)
9048 || is_float_mode (TYPE_MODE (type), &smode))
9049 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9050 && store_bb == single_succ (load_bb)
9051 && first_stmt (store_bb) == store
9052 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9053 stored_val, index))
9054 return;
9056 /* When possible, use specialized atomic update functions. */
9057 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9058 && store_bb == single_succ (load_bb)
9059 && expand_omp_atomic_fetch_op (load_bb, addr,
9060 loaded_val, stored_val, index))
9061 return;
9063 /* If we don't have specialized __sync builtins, try and implement
9064 as a compare and swap loop. */
9065 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9066 loaded_val, stored_val, index))
9067 return;
9071 /* The ultimate fallback is wrapping the operation in a mutex. */
9072 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9075 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9076 at REGION_EXIT. */
9078 static void
9079 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9080 basic_block region_exit)
9082 class loop *outer = region_entry->loop_father;
9083 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9085 /* Don't parallelize the kernels region if it contains more than one outer
9086 loop. */
9087 unsigned int nr_outer_loops = 0;
9088 class loop *single_outer = NULL;
9089 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9091 gcc_assert (loop_outer (loop) == outer);
9093 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9094 continue;
9096 if (region_exit != NULL
9097 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9098 continue;
9100 nr_outer_loops++;
9101 single_outer = loop;
9103 if (nr_outer_loops != 1)
9104 return;
9106 for (class loop *loop = single_outer->inner;
9107 loop != NULL;
9108 loop = loop->inner)
9109 if (loop->next)
9110 return;
9112 /* Mark the loops in the region. */
9113 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9114 loop->in_oacc_kernels_region = true;
9117 /* Build target argument identifier from the DEVICE identifier, value
9118 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9120 static tree
9121 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9123 tree t = build_int_cst (integer_type_node, device);
9124 if (subseqent_param)
9125 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9126 build_int_cst (integer_type_node,
9127 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9128 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9129 build_int_cst (integer_type_node, id));
9130 return t;
9133 /* Like above but return it in type that can be directly stored as an element
9134 of the argument array. */
9136 static tree
9137 get_target_argument_identifier (int device, bool subseqent_param, int id)
9139 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9140 return fold_convert (ptr_type_node, t);
9143 /* Return a target argument consisting of DEVICE identifier, value identifier
9144 ID, and the actual VALUE. */
9146 static tree
9147 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9148 tree value)
9150 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9151 fold_convert (integer_type_node, value),
9152 build_int_cst (unsigned_type_node,
9153 GOMP_TARGET_ARG_VALUE_SHIFT));
9154 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9155 get_target_argument_identifier_1 (device, false, id));
9156 t = fold_convert (ptr_type_node, t);
9157 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9160 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9161 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9162 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9163 arguments. */
9165 static void
9166 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9167 int id, tree value, vec <tree> *args)
9169 if (tree_fits_shwi_p (value)
9170 && tree_to_shwi (value) > -(1 << 15)
9171 && tree_to_shwi (value) < (1 << 15))
9172 args->quick_push (get_target_argument_value (gsi, device, id, value));
9173 else
9175 args->quick_push (get_target_argument_identifier (device, true, id));
9176 value = fold_convert (ptr_type_node, value);
9177 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9178 GSI_SAME_STMT);
9179 args->quick_push (value);
9183 /* Create an array of arguments that is then passed to GOMP_target. */
9185 static tree
9186 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9188 auto_vec <tree, 6> args;
9189 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9190 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9191 if (c)
9192 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9193 else
9194 t = integer_minus_one_node;
9195 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9196 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9198 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9199 if (c)
9200 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9201 else
9202 t = integer_minus_one_node;
9203 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9204 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9205 &args);
9207 /* Produce more, perhaps device specific, arguments here. */
9209 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9210 args.length () + 1),
9211 ".omp_target_args");
9212 for (unsigned i = 0; i < args.length (); i++)
9214 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9215 build_int_cst (integer_type_node, i),
9216 NULL_TREE, NULL_TREE);
9217 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9218 GSI_SAME_STMT);
9220 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9221 build_int_cst (integer_type_node, args.length ()),
9222 NULL_TREE, NULL_TREE);
9223 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9224 GSI_SAME_STMT);
9225 TREE_ADDRESSABLE (argarray) = 1;
9226 return build_fold_addr_expr (argarray);
9229 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9231 static void
9232 expand_omp_target (struct omp_region *region)
9234 basic_block entry_bb, exit_bb, new_bb;
9235 struct function *child_cfun;
9236 tree child_fn, block, t;
9237 gimple_stmt_iterator gsi;
9238 gomp_target *entry_stmt;
9239 gimple *stmt;
9240 edge e;
9241 bool offloaded, data_region;
9242 int target_kind;
9244 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9245 target_kind = gimple_omp_target_kind (entry_stmt);
9246 new_bb = region->entry;
9248 offloaded = is_gimple_omp_offloaded (entry_stmt);
9249 switch (target_kind)
9251 case GF_OMP_TARGET_KIND_REGION:
9252 case GF_OMP_TARGET_KIND_UPDATE:
9253 case GF_OMP_TARGET_KIND_ENTER_DATA:
9254 case GF_OMP_TARGET_KIND_EXIT_DATA:
9255 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9256 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9257 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9258 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9259 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9260 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9261 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9262 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9263 data_region = false;
9264 break;
9265 case GF_OMP_TARGET_KIND_DATA:
9266 case GF_OMP_TARGET_KIND_OACC_DATA:
9267 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9268 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9269 data_region = true;
9270 break;
9271 default:
9272 gcc_unreachable ();
9275 child_fn = NULL_TREE;
9276 child_cfun = NULL;
9277 if (offloaded)
9279 child_fn = gimple_omp_target_child_fn (entry_stmt);
9280 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9283 /* Supported by expand_omp_taskreg, but not here. */
9284 if (child_cfun != NULL)
9285 gcc_checking_assert (!child_cfun->cfg);
9286 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9288 entry_bb = region->entry;
9289 exit_bb = region->exit;
9291 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9292 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9294 /* Going on, all OpenACC compute constructs are mapped to
9295 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9296 To distinguish between them, we attach attributes. */
9297 switch (target_kind)
9299 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9300 DECL_ATTRIBUTES (child_fn)
9301 = tree_cons (get_identifier ("oacc parallel"),
9302 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9303 break;
9304 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9305 DECL_ATTRIBUTES (child_fn)
9306 = tree_cons (get_identifier ("oacc kernels"),
9307 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9308 break;
9309 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9310 DECL_ATTRIBUTES (child_fn)
9311 = tree_cons (get_identifier ("oacc serial"),
9312 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9313 break;
9314 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9315 DECL_ATTRIBUTES (child_fn)
9316 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9317 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9318 break;
9319 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9320 DECL_ATTRIBUTES (child_fn)
9321 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9322 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9323 break;
9324 default:
9325 /* Make sure we don't miss any. */
9326 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9327 && is_gimple_omp_offloaded (entry_stmt)));
9328 break;
9331 if (offloaded)
9333 unsigned srcidx, dstidx, num;
9335 /* If the offloading region needs data sent from the parent
9336 function, then the very first statement (except possible
9337 tree profile counter updates) of the offloading body
9338 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9339 &.OMP_DATA_O is passed as an argument to the child function,
9340 we need to replace it with the argument as seen by the child
9341 function.
9343 In most cases, this will end up being the identity assignment
9344 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9345 a function call that has been inlined, the original PARM_DECL
9346 .OMP_DATA_I may have been converted into a different local
9347 variable. In which case, we need to keep the assignment. */
9348 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9349 if (data_arg)
9351 basic_block entry_succ_bb = single_succ (entry_bb);
9352 gimple_stmt_iterator gsi;
9353 tree arg;
9354 gimple *tgtcopy_stmt = NULL;
9355 tree sender = TREE_VEC_ELT (data_arg, 0);
9357 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9359 gcc_assert (!gsi_end_p (gsi));
9360 stmt = gsi_stmt (gsi);
9361 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9362 continue;
9364 if (gimple_num_ops (stmt) == 2)
9366 tree arg = gimple_assign_rhs1 (stmt);
9368 /* We're ignoring the subcode because we're
9369 effectively doing a STRIP_NOPS. */
9371 if (TREE_CODE (arg) == ADDR_EXPR
9372 && TREE_OPERAND (arg, 0) == sender)
9374 tgtcopy_stmt = stmt;
9375 break;
9380 gcc_assert (tgtcopy_stmt != NULL);
9381 arg = DECL_ARGUMENTS (child_fn);
9383 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9384 gsi_remove (&gsi, true);
9387 /* Declare local variables needed in CHILD_CFUN. */
9388 block = DECL_INITIAL (child_fn);
9389 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9390 /* The gimplifier could record temporaries in the offloading block
9391 rather than in containing function's local_decls chain,
9392 which would mean cgraph missed finalizing them. Do it now. */
9393 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9394 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9395 varpool_node::finalize_decl (t);
9396 DECL_SAVED_TREE (child_fn) = NULL;
9397 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9398 gimple_set_body (child_fn, NULL);
9399 TREE_USED (block) = 1;
9401 /* Reset DECL_CONTEXT on function arguments. */
9402 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9403 DECL_CONTEXT (t) = child_fn;
9405 /* Split ENTRY_BB at GIMPLE_*,
9406 so that it can be moved to the child function. */
9407 gsi = gsi_last_nondebug_bb (entry_bb);
9408 stmt = gsi_stmt (gsi);
9409 gcc_assert (stmt
9410 && gimple_code (stmt) == gimple_code (entry_stmt));
9411 e = split_block (entry_bb, stmt);
9412 gsi_remove (&gsi, true);
9413 entry_bb = e->dest;
9414 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9416 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9417 if (exit_bb)
9419 gsi = gsi_last_nondebug_bb (exit_bb);
9420 gcc_assert (!gsi_end_p (gsi)
9421 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9422 stmt = gimple_build_return (NULL);
9423 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9424 gsi_remove (&gsi, true);
9427 /* Move the offloading region into CHILD_CFUN. */
9429 block = gimple_block (entry_stmt);
9431 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9432 if (exit_bb)
9433 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9434 /* When the OMP expansion process cannot guarantee an up-to-date
9435 loop tree arrange for the child function to fixup loops. */
9436 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9437 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9439 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9440 num = vec_safe_length (child_cfun->local_decls);
9441 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9443 t = (*child_cfun->local_decls)[srcidx];
9444 if (DECL_CONTEXT (t) == cfun->decl)
9445 continue;
9446 if (srcidx != dstidx)
9447 (*child_cfun->local_decls)[dstidx] = t;
9448 dstidx++;
9450 if (dstidx != num)
9451 vec_safe_truncate (child_cfun->local_decls, dstidx);
9453 /* Inform the callgraph about the new function. */
9454 child_cfun->curr_properties = cfun->curr_properties;
9455 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9456 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9457 cgraph_node *node = cgraph_node::get_create (child_fn);
9458 node->parallelized_function = 1;
9459 cgraph_node::add_new_function (child_fn, true);
9461 /* Add the new function to the offload table. */
9462 if (ENABLE_OFFLOADING)
9464 if (in_lto_p)
9465 DECL_PRESERVE_P (child_fn) = 1;
9466 vec_safe_push (offload_funcs, child_fn);
9469 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9470 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9472 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9473 fixed in a following pass. */
9474 push_cfun (child_cfun);
9475 if (need_asm)
9476 assign_assembler_name_if_needed (child_fn);
9477 cgraph_edge::rebuild_edges ();
9479 /* Some EH regions might become dead, see PR34608. If
9480 pass_cleanup_cfg isn't the first pass to happen with the
9481 new child, these dead EH edges might cause problems.
9482 Clean them up now. */
9483 if (flag_exceptions)
9485 basic_block bb;
9486 bool changed = false;
9488 FOR_EACH_BB_FN (bb, cfun)
9489 changed |= gimple_purge_dead_eh_edges (bb);
9490 if (changed)
9491 cleanup_tree_cfg ();
9493 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9494 verify_loop_structure ();
9495 pop_cfun ();
9497 if (dump_file && !gimple_in_ssa_p (cfun))
9499 omp_any_child_fn_dumped = true;
9500 dump_function_header (dump_file, child_fn, dump_flags);
9501 dump_function_to_file (child_fn, dump_file, dump_flags);
9504 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9507 /* Emit a library call to launch the offloading region, or do data
9508 transfers. */
9509 tree t1, t2, t3, t4, depend, c, clauses;
9510 enum built_in_function start_ix;
9511 unsigned int flags_i = 0;
9513 switch (gimple_omp_target_kind (entry_stmt))
9515 case GF_OMP_TARGET_KIND_REGION:
9516 start_ix = BUILT_IN_GOMP_TARGET;
9517 break;
9518 case GF_OMP_TARGET_KIND_DATA:
9519 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9520 break;
9521 case GF_OMP_TARGET_KIND_UPDATE:
9522 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9523 break;
9524 case GF_OMP_TARGET_KIND_ENTER_DATA:
9525 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9526 break;
9527 case GF_OMP_TARGET_KIND_EXIT_DATA:
9528 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9529 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9530 break;
9531 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9532 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9533 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9534 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9535 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9536 start_ix = BUILT_IN_GOACC_PARALLEL;
9537 break;
9538 case GF_OMP_TARGET_KIND_OACC_DATA:
9539 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9540 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9541 start_ix = BUILT_IN_GOACC_DATA_START;
9542 break;
9543 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9544 start_ix = BUILT_IN_GOACC_UPDATE;
9545 break;
9546 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9547 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9548 break;
9549 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9550 start_ix = BUILT_IN_GOACC_DECLARE;
9551 break;
9552 default:
9553 gcc_unreachable ();
9556 clauses = gimple_omp_target_clauses (entry_stmt);
9558 tree device = NULL_TREE;
9559 location_t device_loc = UNKNOWN_LOCATION;
9560 tree goacc_flags = NULL_TREE;
9561 if (is_gimple_omp_oacc (entry_stmt))
9563 /* By default, no GOACC_FLAGs are set. */
9564 goacc_flags = integer_zero_node;
9566 else
9568 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9569 if (c)
9571 device = OMP_CLAUSE_DEVICE_ID (c);
9572 device_loc = OMP_CLAUSE_LOCATION (c);
9574 else
9576 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9577 library choose). */
9578 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9579 device_loc = gimple_location (entry_stmt);
9582 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9583 if (c)
9584 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9587 /* By default, there is no conditional. */
9588 tree cond = NULL_TREE;
9589 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9590 if (c)
9591 cond = OMP_CLAUSE_IF_EXPR (c);
9592 /* If we found the clause 'if (cond)', build:
9593 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9594 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9595 if (cond)
9597 tree *tp;
9598 if (is_gimple_omp_oacc (entry_stmt))
9599 tp = &goacc_flags;
9600 else
9602 /* Ensure 'device' is of the correct type. */
9603 device = fold_convert_loc (device_loc, integer_type_node, device);
9605 tp = &device;
9608 cond = gimple_boolify (cond);
9610 basic_block cond_bb, then_bb, else_bb;
9611 edge e;
9612 tree tmp_var;
9614 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9615 if (offloaded)
9616 e = split_block_after_labels (new_bb);
9617 else
9619 gsi = gsi_last_nondebug_bb (new_bb);
9620 gsi_prev (&gsi);
9621 e = split_block (new_bb, gsi_stmt (gsi));
9623 cond_bb = e->src;
9624 new_bb = e->dest;
9625 remove_edge (e);
9627 then_bb = create_empty_bb (cond_bb);
9628 else_bb = create_empty_bb (then_bb);
9629 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9630 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9632 stmt = gimple_build_cond_empty (cond);
9633 gsi = gsi_last_bb (cond_bb);
9634 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9636 gsi = gsi_start_bb (then_bb);
9637 stmt = gimple_build_assign (tmp_var, *tp);
9638 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9640 gsi = gsi_start_bb (else_bb);
9641 if (is_gimple_omp_oacc (entry_stmt))
9642 stmt = gimple_build_assign (tmp_var,
9643 BIT_IOR_EXPR,
9644 *tp,
9645 build_int_cst (integer_type_node,
9646 GOACC_FLAG_HOST_FALLBACK));
9647 else
9648 stmt = gimple_build_assign (tmp_var,
9649 build_int_cst (integer_type_node,
9650 GOMP_DEVICE_HOST_FALLBACK));
9651 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9653 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9654 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9655 add_bb_to_loop (then_bb, cond_bb->loop_father);
9656 add_bb_to_loop (else_bb, cond_bb->loop_father);
9657 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9658 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9660 *tp = tmp_var;
9662 gsi = gsi_last_nondebug_bb (new_bb);
9664 else
9666 gsi = gsi_last_nondebug_bb (new_bb);
9668 if (device != NULL_TREE)
9669 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9670 true, GSI_SAME_STMT);
9673 t = gimple_omp_target_data_arg (entry_stmt);
9674 if (t == NULL)
9676 t1 = size_zero_node;
9677 t2 = build_zero_cst (ptr_type_node);
9678 t3 = t2;
9679 t4 = t2;
9681 else
9683 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9684 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9685 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9686 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9687 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9690 gimple *g;
9691 bool tagging = false;
9692 /* The maximum number used by any start_ix, without varargs. */
9693 auto_vec<tree, 11> args;
9694 if (is_gimple_omp_oacc (entry_stmt))
9696 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9697 TREE_TYPE (goacc_flags), goacc_flags);
9698 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9699 NULL_TREE, true,
9700 GSI_SAME_STMT);
9701 args.quick_push (goacc_flags_m);
9703 else
9704 args.quick_push (device);
9705 if (offloaded)
9706 args.quick_push (build_fold_addr_expr (child_fn));
9707 args.quick_push (t1);
9708 args.quick_push (t2);
9709 args.quick_push (t3);
9710 args.quick_push (t4);
9711 switch (start_ix)
9713 case BUILT_IN_GOACC_DATA_START:
9714 case BUILT_IN_GOACC_DECLARE:
9715 case BUILT_IN_GOMP_TARGET_DATA:
9716 break;
9717 case BUILT_IN_GOMP_TARGET:
9718 case BUILT_IN_GOMP_TARGET_UPDATE:
9719 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9720 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9721 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9722 if (c)
9723 depend = OMP_CLAUSE_DECL (c);
9724 else
9725 depend = build_int_cst (ptr_type_node, 0);
9726 args.quick_push (depend);
9727 if (start_ix == BUILT_IN_GOMP_TARGET)
9728 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9729 break;
9730 case BUILT_IN_GOACC_PARALLEL:
9731 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9733 tree dims = NULL_TREE;
9734 unsigned int ix;
9736 /* For serial constructs we set all dimensions to 1. */
9737 for (ix = GOMP_DIM_MAX; ix--;)
9738 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9739 oacc_replace_fn_attrib (child_fn, dims);
9741 else
9742 oacc_set_fn_attrib (child_fn, clauses, &args);
9743 tagging = true;
9744 /* FALLTHRU */
9745 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9746 case BUILT_IN_GOACC_UPDATE:
9748 tree t_async = NULL_TREE;
9750 /* If present, use the value specified by the respective
9751 clause, making sure that is of the correct type. */
9752 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9753 if (c)
9754 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9755 integer_type_node,
9756 OMP_CLAUSE_ASYNC_EXPR (c));
9757 else if (!tagging)
9758 /* Default values for t_async. */
9759 t_async = fold_convert_loc (gimple_location (entry_stmt),
9760 integer_type_node,
9761 build_int_cst (integer_type_node,
9762 GOMP_ASYNC_SYNC));
9763 if (tagging && t_async)
9765 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9767 if (TREE_CODE (t_async) == INTEGER_CST)
9769 /* See if we can pack the async arg in to the tag's
9770 operand. */
9771 i_async = TREE_INT_CST_LOW (t_async);
9772 if (i_async < GOMP_LAUNCH_OP_MAX)
9773 t_async = NULL_TREE;
9774 else
9775 i_async = GOMP_LAUNCH_OP_MAX;
9777 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9778 i_async));
9780 if (t_async)
9781 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9782 NULL_TREE, true,
9783 GSI_SAME_STMT));
9785 /* Save the argument index, and ... */
9786 unsigned t_wait_idx = args.length ();
9787 unsigned num_waits = 0;
9788 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9789 if (!tagging || c)
9790 /* ... push a placeholder. */
9791 args.safe_push (integer_zero_node);
9793 for (; c; c = OMP_CLAUSE_CHAIN (c))
9794 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9796 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9797 integer_type_node,
9798 OMP_CLAUSE_WAIT_EXPR (c));
9799 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9800 GSI_SAME_STMT);
9801 args.safe_push (arg);
9802 num_waits++;
9805 if (!tagging || num_waits)
9807 tree len;
9809 /* Now that we know the number, update the placeholder. */
9810 if (tagging)
9811 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9812 else
9813 len = build_int_cst (integer_type_node, num_waits);
9814 len = fold_convert_loc (gimple_location (entry_stmt),
9815 unsigned_type_node, len);
9816 args[t_wait_idx] = len;
9819 break;
9820 default:
9821 gcc_unreachable ();
9823 if (tagging)
9824 /* Push terminal marker - zero. */
9825 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9827 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9828 gimple_set_location (g, gimple_location (entry_stmt));
9829 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9830 if (!offloaded)
9832 g = gsi_stmt (gsi);
9833 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9834 gsi_remove (&gsi, true);
9836 if (data_region && region->exit)
9838 gsi = gsi_last_nondebug_bb (region->exit);
9839 g = gsi_stmt (gsi);
9840 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9841 gsi_remove (&gsi, true);
9845 /* Expand the parallel region tree rooted at REGION. Expansion
9846 proceeds in depth-first order. Innermost regions are expanded
9847 first. This way, parallel regions that require a new function to
9848 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9849 internal dependencies in their body. */
9851 static void
9852 expand_omp (struct omp_region *region)
9854 omp_any_child_fn_dumped = false;
9855 while (region)
9857 location_t saved_location;
9858 gimple *inner_stmt = NULL;
9860 /* First, determine whether this is a combined parallel+workshare
9861 region. */
9862 if (region->type == GIMPLE_OMP_PARALLEL)
9863 determine_parallel_type (region);
9865 if (region->type == GIMPLE_OMP_FOR
9866 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9867 inner_stmt = last_stmt (region->inner->entry);
9869 if (region->inner)
9870 expand_omp (region->inner);
9872 saved_location = input_location;
9873 if (gimple_has_location (last_stmt (region->entry)))
9874 input_location = gimple_location (last_stmt (region->entry));
9876 switch (region->type)
9878 case GIMPLE_OMP_PARALLEL:
9879 case GIMPLE_OMP_TASK:
9880 expand_omp_taskreg (region);
9881 break;
9883 case GIMPLE_OMP_FOR:
9884 expand_omp_for (region, inner_stmt);
9885 break;
9887 case GIMPLE_OMP_SECTIONS:
9888 expand_omp_sections (region);
9889 break;
9891 case GIMPLE_OMP_SECTION:
9892 /* Individual omp sections are handled together with their
9893 parent GIMPLE_OMP_SECTIONS region. */
9894 break;
9896 case GIMPLE_OMP_SINGLE:
9897 expand_omp_single (region);
9898 break;
9900 case GIMPLE_OMP_ORDERED:
9902 gomp_ordered *ord_stmt
9903 = as_a <gomp_ordered *> (last_stmt (region->entry));
9904 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9905 OMP_CLAUSE_DEPEND))
9907 /* We'll expand these when expanding corresponding
9908 worksharing region with ordered(n) clause. */
9909 gcc_assert (region->outer
9910 && region->outer->type == GIMPLE_OMP_FOR);
9911 region->ord_stmt = ord_stmt;
9912 break;
9915 /* FALLTHRU */
9916 case GIMPLE_OMP_MASTER:
9917 case GIMPLE_OMP_TASKGROUP:
9918 case GIMPLE_OMP_CRITICAL:
9919 case GIMPLE_OMP_TEAMS:
9920 expand_omp_synch (region);
9921 break;
9923 case GIMPLE_OMP_ATOMIC_LOAD:
9924 expand_omp_atomic (region);
9925 break;
9927 case GIMPLE_OMP_TARGET:
9928 expand_omp_target (region);
9929 break;
9931 default:
9932 gcc_unreachable ();
9935 input_location = saved_location;
9936 region = region->next;
9938 if (omp_any_child_fn_dumped)
9940 if (dump_file)
9941 dump_function_header (dump_file, current_function_decl, dump_flags);
9942 omp_any_child_fn_dumped = false;
9946 /* Helper for build_omp_regions. Scan the dominator tree starting at
9947 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9948 true, the function ends once a single tree is built (otherwise, whole
9949 forest of OMP constructs may be built). */
9951 static void
9952 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9953 bool single_tree)
9955 gimple_stmt_iterator gsi;
9956 gimple *stmt;
9957 basic_block son;
9959 gsi = gsi_last_nondebug_bb (bb);
9960 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9962 struct omp_region *region;
9963 enum gimple_code code;
9965 stmt = gsi_stmt (gsi);
9966 code = gimple_code (stmt);
9967 if (code == GIMPLE_OMP_RETURN)
9969 /* STMT is the return point out of region PARENT. Mark it
9970 as the exit point and make PARENT the immediately
9971 enclosing region. */
9972 gcc_assert (parent);
9973 region = parent;
9974 region->exit = bb;
9975 parent = parent->outer;
9977 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9979 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9980 GIMPLE_OMP_RETURN, but matches with
9981 GIMPLE_OMP_ATOMIC_LOAD. */
9982 gcc_assert (parent);
9983 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9984 region = parent;
9985 region->exit = bb;
9986 parent = parent->outer;
9988 else if (code == GIMPLE_OMP_CONTINUE)
9990 gcc_assert (parent);
9991 parent->cont = bb;
9993 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9995 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9996 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9998 else
10000 region = new_omp_region (bb, code, parent);
10001 /* Otherwise... */
10002 if (code == GIMPLE_OMP_TARGET)
10004 switch (gimple_omp_target_kind (stmt))
10006 case GF_OMP_TARGET_KIND_REGION:
10007 case GF_OMP_TARGET_KIND_DATA:
10008 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10009 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10010 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10011 case GF_OMP_TARGET_KIND_OACC_DATA:
10012 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10013 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10014 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10015 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10016 break;
10017 case GF_OMP_TARGET_KIND_UPDATE:
10018 case GF_OMP_TARGET_KIND_ENTER_DATA:
10019 case GF_OMP_TARGET_KIND_EXIT_DATA:
10020 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10021 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10022 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10023 /* ..., other than for those stand-alone directives... */
10024 region = NULL;
10025 break;
10026 default:
10027 gcc_unreachable ();
10030 else if (code == GIMPLE_OMP_ORDERED
10031 && omp_find_clause (gimple_omp_ordered_clauses
10032 (as_a <gomp_ordered *> (stmt)),
10033 OMP_CLAUSE_DEPEND))
10034 /* #pragma omp ordered depend is also just a stand-alone
10035 directive. */
10036 region = NULL;
10037 else if (code == GIMPLE_OMP_TASK
10038 && gimple_omp_task_taskwait_p (stmt))
10039 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10040 region = NULL;
10041 /* ..., this directive becomes the parent for a new region. */
10042 if (region)
10043 parent = region;
10047 if (single_tree && !parent)
10048 return;
10050 for (son = first_dom_son (CDI_DOMINATORS, bb);
10051 son;
10052 son = next_dom_son (CDI_DOMINATORS, son))
10053 build_omp_regions_1 (son, parent, single_tree);
10056 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10057 root_omp_region. */
10059 static void
10060 build_omp_regions_root (basic_block root)
10062 gcc_assert (root_omp_region == NULL);
10063 build_omp_regions_1 (root, NULL, true);
10064 gcc_assert (root_omp_region != NULL);
10067 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10069 void
10070 omp_expand_local (basic_block head)
10072 build_omp_regions_root (head);
10073 if (dump_file && (dump_flags & TDF_DETAILS))
10075 fprintf (dump_file, "\nOMP region tree\n\n");
10076 dump_omp_region (dump_file, root_omp_region, 0);
10077 fprintf (dump_file, "\n");
10080 remove_exit_barriers (root_omp_region);
10081 expand_omp (root_omp_region);
10083 omp_free_regions ();
10086 /* Scan the CFG and build a tree of OMP regions. Return the root of
10087 the OMP region tree. */
10089 static void
10090 build_omp_regions (void)
10092 gcc_assert (root_omp_region == NULL);
10093 calculate_dominance_info (CDI_DOMINATORS);
10094 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10097 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10099 static unsigned int
10100 execute_expand_omp (void)
10102 build_omp_regions ();
10104 if (!root_omp_region)
10105 return 0;
10107 if (dump_file)
10109 fprintf (dump_file, "\nOMP region tree\n\n");
10110 dump_omp_region (dump_file, root_omp_region, 0);
10111 fprintf (dump_file, "\n");
10114 remove_exit_barriers (root_omp_region);
10116 expand_omp (root_omp_region);
10118 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10119 verify_loop_structure ();
10120 cleanup_tree_cfg ();
10122 omp_free_regions ();
10124 return 0;
10127 /* OMP expansion -- the default pass, run before creation of SSA form. */
10129 namespace {
10131 const pass_data pass_data_expand_omp =
10133 GIMPLE_PASS, /* type */
10134 "ompexp", /* name */
10135 OPTGROUP_OMP, /* optinfo_flags */
10136 TV_NONE, /* tv_id */
10137 PROP_gimple_any, /* properties_required */
10138 PROP_gimple_eomp, /* properties_provided */
10139 0, /* properties_destroyed */
10140 0, /* todo_flags_start */
10141 0, /* todo_flags_finish */
10144 class pass_expand_omp : public gimple_opt_pass
10146 public:
10147 pass_expand_omp (gcc::context *ctxt)
10148 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10151 /* opt_pass methods: */
10152 virtual unsigned int execute (function *)
10154 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10155 || flag_openmp_simd != 0)
10156 && !seen_error ());
10158 /* This pass always runs, to provide PROP_gimple_eomp.
10159 But often, there is nothing to do. */
10160 if (!gate)
10161 return 0;
10163 return execute_expand_omp ();
10166 }; // class pass_expand_omp
10168 } // anon namespace
10170 gimple_opt_pass *
10171 make_pass_expand_omp (gcc::context *ctxt)
10173 return new pass_expand_omp (ctxt);
10176 namespace {
10178 const pass_data pass_data_expand_omp_ssa =
10180 GIMPLE_PASS, /* type */
10181 "ompexpssa", /* name */
10182 OPTGROUP_OMP, /* optinfo_flags */
10183 TV_NONE, /* tv_id */
10184 PROP_cfg | PROP_ssa, /* properties_required */
10185 PROP_gimple_eomp, /* properties_provided */
10186 0, /* properties_destroyed */
10187 0, /* todo_flags_start */
10188 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10191 class pass_expand_omp_ssa : public gimple_opt_pass
10193 public:
10194 pass_expand_omp_ssa (gcc::context *ctxt)
10195 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10198 /* opt_pass methods: */
10199 virtual bool gate (function *fun)
10201 return !(fun->curr_properties & PROP_gimple_eomp);
10203 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10204 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10206 }; // class pass_expand_omp_ssa
10208 } // anon namespace
10210 gimple_opt_pass *
10211 make_pass_expand_omp_ssa (gcc::context *ctxt)
10213 return new pass_expand_omp_ssa (ctxt);
10216 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10217 GIMPLE_* codes. */
10219 bool
10220 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10221 int *region_idx)
10223 gimple *last = last_stmt (bb);
10224 enum gimple_code code = gimple_code (last);
10225 struct omp_region *cur_region = *region;
10226 bool fallthru = false;
10228 switch (code)
10230 case GIMPLE_OMP_PARALLEL:
10231 case GIMPLE_OMP_FOR:
10232 case GIMPLE_OMP_SINGLE:
10233 case GIMPLE_OMP_TEAMS:
10234 case GIMPLE_OMP_MASTER:
10235 case GIMPLE_OMP_TASKGROUP:
10236 case GIMPLE_OMP_CRITICAL:
10237 case GIMPLE_OMP_SECTION:
10238 cur_region = new_omp_region (bb, code, cur_region);
10239 fallthru = true;
10240 break;
10242 case GIMPLE_OMP_TASK:
10243 cur_region = new_omp_region (bb, code, cur_region);
10244 fallthru = true;
10245 if (gimple_omp_task_taskwait_p (last))
10246 cur_region = cur_region->outer;
10247 break;
10249 case GIMPLE_OMP_ORDERED:
10250 cur_region = new_omp_region (bb, code, cur_region);
10251 fallthru = true;
10252 if (omp_find_clause (gimple_omp_ordered_clauses
10253 (as_a <gomp_ordered *> (last)),
10254 OMP_CLAUSE_DEPEND))
10255 cur_region = cur_region->outer;
10256 break;
10258 case GIMPLE_OMP_TARGET:
10259 cur_region = new_omp_region (bb, code, cur_region);
10260 fallthru = true;
10261 switch (gimple_omp_target_kind (last))
10263 case GF_OMP_TARGET_KIND_REGION:
10264 case GF_OMP_TARGET_KIND_DATA:
10265 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10266 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10267 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10268 case GF_OMP_TARGET_KIND_OACC_DATA:
10269 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10270 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10271 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10272 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10273 break;
10274 case GF_OMP_TARGET_KIND_UPDATE:
10275 case GF_OMP_TARGET_KIND_ENTER_DATA:
10276 case GF_OMP_TARGET_KIND_EXIT_DATA:
10277 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10278 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10279 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10280 cur_region = cur_region->outer;
10281 break;
10282 default:
10283 gcc_unreachable ();
10285 break;
10287 case GIMPLE_OMP_SECTIONS:
10288 cur_region = new_omp_region (bb, code, cur_region);
10289 fallthru = true;
10290 break;
10292 case GIMPLE_OMP_SECTIONS_SWITCH:
10293 fallthru = false;
10294 break;
10296 case GIMPLE_OMP_ATOMIC_LOAD:
10297 case GIMPLE_OMP_ATOMIC_STORE:
10298 fallthru = true;
10299 break;
10301 case GIMPLE_OMP_RETURN:
10302 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10303 somewhere other than the next block. This will be
10304 created later. */
10305 cur_region->exit = bb;
10306 if (cur_region->type == GIMPLE_OMP_TASK)
10307 /* Add an edge corresponding to not scheduling the task
10308 immediately. */
10309 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10310 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10311 cur_region = cur_region->outer;
10312 break;
10314 case GIMPLE_OMP_CONTINUE:
10315 cur_region->cont = bb;
10316 switch (cur_region->type)
10318 case GIMPLE_OMP_FOR:
10319 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10320 succs edges as abnormal to prevent splitting
10321 them. */
10322 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10323 /* Make the loopback edge. */
10324 make_edge (bb, single_succ (cur_region->entry),
10325 EDGE_ABNORMAL);
10327 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10328 corresponds to the case that the body of the loop
10329 is not executed at all. */
10330 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10331 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10332 fallthru = false;
10333 break;
10335 case GIMPLE_OMP_SECTIONS:
10336 /* Wire up the edges into and out of the nested sections. */
10338 basic_block switch_bb = single_succ (cur_region->entry);
10340 struct omp_region *i;
10341 for (i = cur_region->inner; i ; i = i->next)
10343 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10344 make_edge (switch_bb, i->entry, 0);
10345 make_edge (i->exit, bb, EDGE_FALLTHRU);
10348 /* Make the loopback edge to the block with
10349 GIMPLE_OMP_SECTIONS_SWITCH. */
10350 make_edge (bb, switch_bb, 0);
10352 /* Make the edge from the switch to exit. */
10353 make_edge (switch_bb, bb->next_bb, 0);
10354 fallthru = false;
10356 break;
10358 case GIMPLE_OMP_TASK:
10359 fallthru = true;
10360 break;
10362 default:
10363 gcc_unreachable ();
10365 break;
10367 default:
10368 gcc_unreachable ();
10371 if (*region != cur_region)
10373 *region = cur_region;
10374 if (cur_region)
10375 *region_idx = cur_region->entry->index;
10376 else
10377 *region_idx = 0;
10380 return fallthru;