c++: fix explicit/copy problem [PR109247]
[official-gcc.git] / gcc / omp-expand.cc
blobdb58b3cb49b667d20f135fae7c1e7bb8fd666d34
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2023 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses
323 = gimple_omp_parallel_clauses (last_nondebug_stmt (par_entry_bb));
324 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
325 return;
327 if (single_succ (par_entry_bb) == ws_entry_bb
328 && single_succ (ws_exit_bb) == par_exit_bb
329 && workshare_safe_to_combine_p (ws_entry_bb)
330 && (gimple_omp_parallel_combined_p (last_nondebug_stmt (par_entry_bb))
331 || (last_and_only_stmt (ws_entry_bb)
332 && last_and_only_stmt (par_exit_bb))))
334 gimple *par_stmt = last_nondebug_stmt (par_entry_bb);
335 gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
337 if (region->inner->type == GIMPLE_OMP_FOR)
339 /* If this is a combined parallel loop, we need to determine
340 whether or not to use the combined library calls. There
341 are two cases where we do not apply the transformation:
342 static loops and any kind of ordered loop. In the first
343 case, we already open code the loop so there is no need
344 to do anything else. In the latter case, the combined
345 parallel loop call would still need extra synchronization
346 to implement ordered semantics, so there would not be any
347 gain in using the combined call. */
348 tree clauses = gimple_omp_for_clauses (ws_stmt);
349 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
350 if (c == NULL
351 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
352 == OMP_CLAUSE_SCHEDULE_STATIC)
353 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
354 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
355 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
356 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
357 return;
359 else if (region->inner->type == GIMPLE_OMP_SECTIONS
360 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__REDUCTEMP_)
362 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
363 OMP_CLAUSE__CONDTEMP_)))
364 return;
366 region->is_combined_parallel = true;
367 region->inner->is_combined_parallel = true;
368 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
372 /* Debugging dumps for parallel regions. */
373 void dump_omp_region (FILE *, struct omp_region *, int);
374 void debug_omp_region (struct omp_region *);
375 void debug_all_omp_regions (void);
377 /* Dump the parallel region tree rooted at REGION. */
379 void
380 dump_omp_region (FILE *file, struct omp_region *region, int indent)
382 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
383 gimple_code_name[region->type]);
385 if (region->inner)
386 dump_omp_region (file, region->inner, indent + 4);
388 if (region->cont)
390 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
391 region->cont->index);
394 if (region->exit)
395 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
396 region->exit->index);
397 else
398 fprintf (file, "%*s[no exit marker]\n", indent, "");
400 if (region->next)
401 dump_omp_region (file, region->next, indent);
404 DEBUG_FUNCTION void
405 debug_omp_region (struct omp_region *region)
407 dump_omp_region (stderr, region, 0);
410 DEBUG_FUNCTION void
411 debug_all_omp_regions (void)
413 dump_omp_region (stderr, root_omp_region, 0);
416 /* Create a new parallel region starting at STMT inside region PARENT. */
418 static struct omp_region *
419 new_omp_region (basic_block bb, enum gimple_code type,
420 struct omp_region *parent)
422 struct omp_region *region = XCNEW (struct omp_region);
424 region->outer = parent;
425 region->entry = bb;
426 region->type = type;
428 if (parent)
430 /* This is a nested region. Add it to the list of inner
431 regions in PARENT. */
432 region->next = parent->inner;
433 parent->inner = region;
435 else
437 /* This is a toplevel region. Add it to the list of toplevel
438 regions in ROOT_OMP_REGION. */
439 region->next = root_omp_region;
440 root_omp_region = region;
443 return region;
446 /* Release the memory associated with the region tree rooted at REGION. */
448 static void
449 free_omp_region_1 (struct omp_region *region)
451 struct omp_region *i, *n;
453 for (i = region->inner; i ; i = n)
455 n = i->next;
456 free_omp_region_1 (i);
459 free (region);
462 /* Release the memory for the entire omp region tree. */
464 void
465 omp_free_regions (void)
467 struct omp_region *r, *n;
468 for (r = root_omp_region; r ; r = n)
470 n = r->next;
471 free_omp_region_1 (r);
473 root_omp_region = NULL;
476 /* A convenience function to build an empty GIMPLE_COND with just the
477 condition. */
479 static gcond *
480 gimple_build_cond_empty (tree cond)
482 enum tree_code pred_code;
483 tree lhs, rhs;
485 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
486 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
489 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
490 Add CHILD_FNDECL to decl chain of the supercontext of the block
491 ENTRY_BLOCK - this is the block which originally contained the
492 code from which CHILD_FNDECL was created.
494 Together, these actions ensure that the debug info for the outlined
495 function will be emitted with the correct lexical scope. */
497 static void
498 adjust_context_and_scope (struct omp_region *region, tree entry_block,
499 tree child_fndecl)
501 tree parent_fndecl = NULL_TREE;
502 gimple *entry_stmt;
503 /* OMP expansion expands inner regions before outer ones, so if
504 we e.g. have explicit task region nested in parallel region, when
505 expanding the task region current_function_decl will be the original
506 source function, but we actually want to use as context the child
507 function of the parallel. */
508 for (region = region->outer;
509 region && parent_fndecl == NULL_TREE; region = region->outer)
510 switch (region->type)
512 case GIMPLE_OMP_PARALLEL:
513 case GIMPLE_OMP_TASK:
514 case GIMPLE_OMP_TEAMS:
515 entry_stmt = last_nondebug_stmt (region->entry);
516 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
517 break;
518 case GIMPLE_OMP_TARGET:
519 entry_stmt = last_nondebug_stmt (region->entry);
520 parent_fndecl
521 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
522 break;
523 default:
524 break;
527 if (parent_fndecl == NULL_TREE)
528 parent_fndecl = current_function_decl;
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
531 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
533 tree b = BLOCK_SUPERCONTEXT (entry_block);
534 if (TREE_CODE (b) == BLOCK)
536 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
537 BLOCK_VARS (b) = child_fndecl;
542 /* Build the function calls to GOMP_parallel etc to actually
543 generate the parallel operation. REGION is the parallel region
544 being expanded. BB is the block where to insert the code. WS_ARGS
545 will be set if this is a call to a combined parallel+workshare
546 construct, it contains the list of additional arguments needed by
547 the workshare construct. */
549 static void
550 expand_parallel_call (struct omp_region *region, basic_block bb,
551 gomp_parallel *entry_stmt,
552 vec<tree, va_gc> *ws_args)
554 tree t, t1, t2, val, cond, c, clauses, flags;
555 gimple_stmt_iterator gsi;
556 gimple *stmt;
557 enum built_in_function start_ix;
558 int start_ix2;
559 location_t clause_loc;
560 vec<tree, va_gc> *args;
562 clauses = gimple_omp_parallel_clauses (entry_stmt);
564 /* Determine what flavor of GOMP_parallel we will be
565 emitting. */
566 start_ix = BUILT_IN_GOMP_PARALLEL;
567 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
568 if (rtmp)
569 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
570 else if (is_combined_parallel (region))
572 switch (region->inner->type)
574 case GIMPLE_OMP_FOR:
575 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
576 switch (region->inner->sched_kind)
578 case OMP_CLAUSE_SCHEDULE_RUNTIME:
579 /* For lastprivate(conditional:), our implementation
580 requires monotonic behavior. */
581 if (region->inner->has_lastprivate_conditional != 0)
582 start_ix2 = 3;
583 else if ((region->inner->sched_modifiers
584 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
585 start_ix2 = 6;
586 else if ((region->inner->sched_modifiers
587 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
588 start_ix2 = 7;
589 else
590 start_ix2 = 3;
591 break;
592 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
593 case OMP_CLAUSE_SCHEDULE_GUIDED:
594 if ((region->inner->sched_modifiers
595 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
596 && !region->inner->has_lastprivate_conditional)
598 start_ix2 = 3 + region->inner->sched_kind;
599 break;
601 /* FALLTHRU */
602 default:
603 start_ix2 = region->inner->sched_kind;
604 break;
606 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
607 start_ix = (enum built_in_function) start_ix2;
608 break;
609 case GIMPLE_OMP_SECTIONS:
610 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
611 break;
612 default:
613 gcc_unreachable ();
617 /* By default, the value of NUM_THREADS is zero (selected at run time)
618 and there is no conditional. */
619 cond = NULL_TREE;
620 val = build_int_cst (unsigned_type_node, 0);
621 flags = build_int_cst (unsigned_type_node, 0);
623 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
624 if (c)
625 cond = OMP_CLAUSE_IF_EXPR (c);
627 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
628 if (c)
630 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
631 clause_loc = OMP_CLAUSE_LOCATION (c);
633 else
634 clause_loc = gimple_location (entry_stmt);
636 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
637 if (c)
638 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
640 /* Ensure 'val' is of the correct type. */
641 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
643 /* If we found the clause 'if (cond)', build either
644 (cond != 0) or (cond ? val : 1u). */
645 if (cond)
647 cond = gimple_boolify (cond);
649 if (integer_zerop (val))
650 val = fold_build2_loc (clause_loc,
651 EQ_EXPR, unsigned_type_node, cond,
652 build_int_cst (TREE_TYPE (cond), 0));
653 else
655 basic_block cond_bb, then_bb, else_bb;
656 edge e, e_then, e_else;
657 tree tmp_then, tmp_else, tmp_join, tmp_var;
659 tmp_var = create_tmp_var (TREE_TYPE (val));
660 if (gimple_in_ssa_p (cfun))
662 tmp_then = make_ssa_name (tmp_var);
663 tmp_else = make_ssa_name (tmp_var);
664 tmp_join = make_ssa_name (tmp_var);
666 else
668 tmp_then = tmp_var;
669 tmp_else = tmp_var;
670 tmp_join = tmp_var;
673 e = split_block_after_labels (bb);
674 cond_bb = e->src;
675 bb = e->dest;
676 remove_edge (e);
678 then_bb = create_empty_bb (cond_bb);
679 else_bb = create_empty_bb (then_bb);
680 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
681 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
683 stmt = gimple_build_cond_empty (cond);
684 gsi = gsi_start_bb (cond_bb);
685 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
687 gsi = gsi_start_bb (then_bb);
688 expand_omp_build_assign (&gsi, tmp_then, val, true);
690 gsi = gsi_start_bb (else_bb);
691 expand_omp_build_assign (&gsi, tmp_else,
692 build_int_cst (unsigned_type_node, 1),
693 true);
695 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
696 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
697 add_bb_to_loop (then_bb, cond_bb->loop_father);
698 add_bb_to_loop (else_bb, cond_bb->loop_father);
699 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
700 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
702 if (gimple_in_ssa_p (cfun))
704 gphi *phi = create_phi_node (tmp_join, bb);
705 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
706 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
709 val = tmp_join;
712 gsi = gsi_start_bb (bb);
713 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
714 false, GSI_CONTINUE_LINKING);
717 gsi = gsi_last_nondebug_bb (bb);
718 t = gimple_omp_parallel_data_arg (entry_stmt);
719 if (t == NULL)
720 t1 = null_pointer_node;
721 else
722 t1 = build_fold_addr_expr (t);
723 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
724 t2 = build_fold_addr_expr (child_fndecl);
726 vec_alloc (args, 4 + vec_safe_length (ws_args));
727 args->quick_push (t2);
728 args->quick_push (t1);
729 args->quick_push (val);
730 if (ws_args)
731 args->splice (*ws_args);
732 args->quick_push (flags);
734 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
735 builtin_decl_explicit (start_ix), args);
737 if (rtmp)
739 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
740 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
741 fold_convert (type,
742 fold_convert (pointer_sized_int_node, t)));
744 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
745 false, GSI_CONTINUE_LINKING);
748 /* Build the function call to GOMP_task to actually
749 generate the task operation. BB is the block where to insert the code. */
751 static void
752 expand_task_call (struct omp_region *region, basic_block bb,
753 gomp_task *entry_stmt)
755 tree t1, t2, t3;
756 gimple_stmt_iterator gsi;
757 location_t loc = gimple_location (entry_stmt);
759 tree clauses = gimple_omp_task_clauses (entry_stmt);
761 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
762 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
763 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
764 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
765 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
766 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
767 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
769 unsigned int iflags
770 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
771 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
772 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
774 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
775 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
776 tree num_tasks = NULL_TREE;
777 bool ull = false;
778 if (taskloop_p)
780 gimple *g = last_nondebug_stmt (region->outer->entry);
781 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
782 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
783 struct omp_for_data fd;
784 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
785 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
786 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
787 OMP_CLAUSE__LOOPTEMP_);
788 startvar = OMP_CLAUSE_DECL (startvar);
789 endvar = OMP_CLAUSE_DECL (endvar);
790 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
791 if (fd.loop.cond_code == LT_EXPR)
792 iflags |= GOMP_TASK_FLAG_UP;
793 tree tclauses = gimple_omp_for_clauses (g);
794 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
795 if (num_tasks)
797 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
798 iflags |= GOMP_TASK_FLAG_STRICT;
799 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
801 else
803 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
804 if (num_tasks)
806 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
807 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
808 iflags |= GOMP_TASK_FLAG_STRICT;
809 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
811 else
812 num_tasks = integer_zero_node;
814 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
815 if (ifc == NULL_TREE)
816 iflags |= GOMP_TASK_FLAG_IF;
817 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
818 iflags |= GOMP_TASK_FLAG_NOGROUP;
819 ull = fd.iter_type == long_long_unsigned_type_node;
820 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
821 iflags |= GOMP_TASK_FLAG_REDUCTION;
823 else
825 if (priority)
826 iflags |= GOMP_TASK_FLAG_PRIORITY;
827 if (detach)
828 iflags |= GOMP_TASK_FLAG_DETACH;
831 tree flags = build_int_cst (unsigned_type_node, iflags);
833 tree cond = boolean_true_node;
834 if (ifc)
836 if (taskloop_p)
838 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_IF),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
844 flags, t);
846 else
847 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
850 if (finalc)
852 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
853 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
854 build_int_cst (unsigned_type_node,
855 GOMP_TASK_FLAG_FINAL),
856 build_int_cst (unsigned_type_node, 0));
857 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
859 if (depend)
860 depend = OMP_CLAUSE_DECL (depend);
861 else
862 depend = build_int_cst (ptr_type_node, 0);
863 if (priority)
864 priority = fold_convert (integer_type_node,
865 OMP_CLAUSE_PRIORITY_EXPR (priority));
866 else
867 priority = integer_zero_node;
869 gsi = gsi_last_nondebug_bb (bb);
871 detach = (detach
872 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
873 : null_pointer_node);
875 tree t = gimple_omp_task_data_arg (entry_stmt);
876 if (t == NULL)
877 t2 = null_pointer_node;
878 else
879 t2 = build_fold_addr_expr_loc (loc, t);
880 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
881 t = gimple_omp_task_copy_fn (entry_stmt);
882 if (t == NULL)
883 t3 = null_pointer_node;
884 else
885 t3 = build_fold_addr_expr_loc (loc, t);
887 if (taskloop_p)
888 t = build_call_expr (ull
889 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
890 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
891 11, t1, t2, t3,
892 gimple_omp_task_arg_size (entry_stmt),
893 gimple_omp_task_arg_align (entry_stmt), flags,
894 num_tasks, priority, startvar, endvar, step);
895 else
896 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
897 10, t1, t2, t3,
898 gimple_omp_task_arg_size (entry_stmt),
899 gimple_omp_task_arg_align (entry_stmt), cond, flags,
900 depend, priority, detach);
902 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
903 false, GSI_CONTINUE_LINKING);
906 /* Build the function call to GOMP_taskwait_depend to actually
907 generate the taskwait operation. BB is the block where to insert the
908 code. */
910 static void
911 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
913 tree clauses = gimple_omp_task_clauses (entry_stmt);
914 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
915 if (depend == NULL_TREE)
916 return;
918 depend = OMP_CLAUSE_DECL (depend);
920 bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE;
921 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
922 enum built_in_function f = (nowait
923 ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
924 : BUILT_IN_GOMP_TASKWAIT_DEPEND);
925 tree t = build_call_expr (builtin_decl_explicit (f), 1, depend);
927 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
928 false, GSI_CONTINUE_LINKING);
931 /* Build the function call to GOMP_teams_reg to actually
932 generate the host teams operation. REGION is the teams region
933 being expanded. BB is the block where to insert the code. */
935 static void
936 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
938 tree clauses = gimple_omp_teams_clauses (entry_stmt);
939 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
940 if (num_teams == NULL_TREE)
941 num_teams = build_int_cst (unsigned_type_node, 0);
942 else
944 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
945 num_teams = fold_convert (unsigned_type_node, num_teams);
947 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
948 if (thread_limit == NULL_TREE)
949 thread_limit = build_int_cst (unsigned_type_node, 0);
950 else
952 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
953 thread_limit = fold_convert (unsigned_type_node, thread_limit);
956 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
957 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
958 if (t == NULL)
959 t1 = null_pointer_node;
960 else
961 t1 = build_fold_addr_expr (t);
962 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
963 tree t2 = build_fold_addr_expr (child_fndecl);
965 vec<tree, va_gc> *args;
966 vec_alloc (args, 5);
967 args->quick_push (t2);
968 args->quick_push (t1);
969 args->quick_push (num_teams);
970 args->quick_push (thread_limit);
971 /* For future extensibility. */
972 args->quick_push (build_zero_cst (unsigned_type_node));
974 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
975 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
976 args);
978 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
979 false, GSI_CONTINUE_LINKING);
982 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
984 static tree
985 vec2chain (vec<tree, va_gc> *v)
987 tree chain = NULL_TREE, t;
988 unsigned ix;
990 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
992 DECL_CHAIN (t) = chain;
993 chain = t;
996 return chain;
999 /* Remove barriers in REGION->EXIT's block. Note that this is only
1000 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1001 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1002 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1003 removed. */
1005 static void
1006 remove_exit_barrier (struct omp_region *region)
1008 gimple_stmt_iterator gsi;
1009 basic_block exit_bb;
1010 edge_iterator ei;
1011 edge e;
1012 gimple *stmt;
1013 int any_addressable_vars = -1;
1015 exit_bb = region->exit;
1017 /* If the parallel region doesn't return, we don't have REGION->EXIT
1018 block at all. */
1019 if (! exit_bb)
1020 return;
1022 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1023 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1024 statements that can appear in between are extremely limited -- no
1025 memory operations at all. Here, we allow nothing at all, so the
1026 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1027 gsi = gsi_last_nondebug_bb (exit_bb);
1028 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1029 gsi_prev_nondebug (&gsi);
1030 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1031 return;
1033 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1035 gsi = gsi_last_nondebug_bb (e->src);
1036 if (gsi_end_p (gsi))
1037 continue;
1038 stmt = gsi_stmt (gsi);
1039 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1040 && !gimple_omp_return_nowait_p (stmt))
1042 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1043 in many cases. If there could be tasks queued, the barrier
1044 might be needed to let the tasks run before some local
1045 variable of the parallel that the task uses as shared
1046 runs out of scope. The task can be spawned either
1047 from within current function (this would be easy to check)
1048 or from some function it calls and gets passed an address
1049 of such a variable. */
1050 if (any_addressable_vars < 0)
1052 gomp_parallel *parallel_stmt
1053 = as_a <gomp_parallel *> (last_nondebug_stmt (region->entry));
1054 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1055 tree local_decls, block, decl;
1056 unsigned ix;
1058 any_addressable_vars = 0;
1059 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1060 if (TREE_ADDRESSABLE (decl))
1062 any_addressable_vars = 1;
1063 break;
1065 for (block = gimple_block (stmt);
1066 !any_addressable_vars
1067 && block
1068 && TREE_CODE (block) == BLOCK;
1069 block = BLOCK_SUPERCONTEXT (block))
1071 for (local_decls = BLOCK_VARS (block);
1072 local_decls;
1073 local_decls = DECL_CHAIN (local_decls))
1074 if (TREE_ADDRESSABLE (local_decls))
1076 any_addressable_vars = 1;
1077 break;
1079 if (block == gimple_block (parallel_stmt))
1080 break;
1083 if (!any_addressable_vars)
1084 gimple_omp_return_set_nowait (stmt);
1089 static void
1090 remove_exit_barriers (struct omp_region *region)
1092 if (region->type == GIMPLE_OMP_PARALLEL)
1093 remove_exit_barrier (region);
1095 if (region->inner)
1097 region = region->inner;
1098 remove_exit_barriers (region);
1099 while (region->next)
1101 region = region->next;
1102 remove_exit_barriers (region);
1107 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1108 calls. These can't be declared as const functions, but
1109 within one parallel body they are constant, so they can be
1110 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1111 which are declared const. Similarly for task body, except
1112 that in untied task omp_get_thread_num () can change at any task
1113 scheduling point. */
1115 static void
1116 optimize_omp_library_calls (gimple *entry_stmt)
1118 basic_block bb;
1119 gimple_stmt_iterator gsi;
1120 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1121 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1122 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1123 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1124 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1125 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1126 OMP_CLAUSE_UNTIED) != NULL);
1128 FOR_EACH_BB_FN (bb, cfun)
1129 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1131 gimple *call = gsi_stmt (gsi);
1132 tree decl;
1134 if (is_gimple_call (call)
1135 && (decl = gimple_call_fndecl (call))
1136 && DECL_EXTERNAL (decl)
1137 && TREE_PUBLIC (decl)
1138 && DECL_INITIAL (decl) == NULL)
1140 tree built_in;
1142 if (DECL_NAME (decl) == thr_num_id)
1144 /* In #pragma omp task untied omp_get_thread_num () can change
1145 during the execution of the task region. */
1146 if (untied_task)
1147 continue;
1148 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1150 else if (DECL_NAME (decl) == num_thr_id)
1151 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1152 else
1153 continue;
1155 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1156 || gimple_call_num_args (call) != 0)
1157 continue;
1159 if (flag_exceptions && !TREE_NOTHROW (decl))
1160 continue;
1162 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1163 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1164 TREE_TYPE (TREE_TYPE (built_in))))
1165 continue;
1167 gimple_call_set_fndecl (call, built_in);
1172 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1173 regimplified. */
1175 static tree
1176 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1178 tree t = *tp;
1180 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1181 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1182 return t;
1184 if (TREE_CODE (t) == ADDR_EXPR)
1185 recompute_tree_invariant_for_addr_expr (t);
1187 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1188 return NULL_TREE;
1191 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1193 static void
1194 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1195 bool after)
1197 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1198 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1199 !after, after ? GSI_CONTINUE_LINKING
1200 : GSI_SAME_STMT);
1201 gimple *stmt = gimple_build_assign (to, from);
1202 if (after)
1203 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1204 else
1205 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1206 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1207 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1209 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1210 gimple_regimplify_operands (stmt, &gsi);
1214 /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1216 static gcond *
1217 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1218 tree lhs, tree rhs, bool after = false)
1220 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1221 if (after)
1222 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1223 else
1224 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1225 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1226 NULL, NULL)
1227 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1228 NULL, NULL))
1230 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1231 gimple_regimplify_operands (cond_stmt, &gsi);
1233 return cond_stmt;
1236 /* Expand the OpenMP parallel or task directive starting at REGION. */
1238 static void
1239 expand_omp_taskreg (struct omp_region *region)
1241 basic_block entry_bb, exit_bb, new_bb;
1242 struct function *child_cfun;
1243 tree child_fn, block, t;
1244 gimple_stmt_iterator gsi;
1245 gimple *entry_stmt, *stmt;
1246 edge e;
1247 vec<tree, va_gc> *ws_args;
1249 entry_stmt = last_nondebug_stmt (region->entry);
1250 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1251 && gimple_omp_task_taskwait_p (entry_stmt))
1253 new_bb = region->entry;
1254 gsi = gsi_last_nondebug_bb (region->entry);
1255 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1256 gsi_remove (&gsi, true);
1257 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1258 return;
1261 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1262 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1264 entry_bb = region->entry;
1265 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1266 exit_bb = region->cont;
1267 else
1268 exit_bb = region->exit;
1270 if (is_combined_parallel (region))
1271 ws_args = region->ws_args;
1272 else
1273 ws_args = NULL;
1275 if (child_cfun->cfg)
1277 /* Due to inlining, it may happen that we have already outlined
1278 the region, in which case all we need to do is make the
1279 sub-graph unreachable and emit the parallel call. */
1280 edge entry_succ_e, exit_succ_e;
1282 entry_succ_e = single_succ_edge (entry_bb);
1284 gsi = gsi_last_nondebug_bb (entry_bb);
1285 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1287 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1288 gsi_remove (&gsi, true);
1290 new_bb = entry_bb;
1291 if (exit_bb)
1293 exit_succ_e = single_succ_edge (exit_bb);
1294 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1296 remove_edge_and_dominated_blocks (entry_succ_e);
1298 else
1300 unsigned srcidx, dstidx, num;
1302 /* If the parallel region needs data sent from the parent
1303 function, then the very first statement (except possible
1304 tree profile counter updates) of the parallel body
1305 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1306 &.OMP_DATA_O is passed as an argument to the child function,
1307 we need to replace it with the argument as seen by the child
1308 function.
1310 In most cases, this will end up being the identity assignment
1311 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1312 a function call that has been inlined, the original PARM_DECL
1313 .OMP_DATA_I may have been converted into a different local
1314 variable. In which case, we need to keep the assignment. */
1315 if (gimple_omp_taskreg_data_arg (entry_stmt))
1317 basic_block entry_succ_bb
1318 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1319 : FALLTHRU_EDGE (entry_bb)->dest;
1320 tree arg;
1321 gimple *parcopy_stmt = NULL;
1323 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1325 gimple *stmt;
1327 gcc_assert (!gsi_end_p (gsi));
1328 stmt = gsi_stmt (gsi);
1329 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1330 continue;
1332 if (gimple_num_ops (stmt) == 2)
1334 tree arg = gimple_assign_rhs1 (stmt);
1336 /* We're ignore the subcode because we're
1337 effectively doing a STRIP_NOPS. */
1339 if (TREE_CODE (arg) == ADDR_EXPR
1340 && (TREE_OPERAND (arg, 0)
1341 == gimple_omp_taskreg_data_arg (entry_stmt)))
1343 parcopy_stmt = stmt;
1344 break;
1349 gcc_assert (parcopy_stmt != NULL);
1350 arg = DECL_ARGUMENTS (child_fn);
1352 if (!gimple_in_ssa_p (cfun))
1354 if (gimple_assign_lhs (parcopy_stmt) == arg)
1355 gsi_remove (&gsi, true);
1356 else
1358 /* ?? Is setting the subcode really necessary ?? */
1359 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1360 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1363 else
1365 tree lhs = gimple_assign_lhs (parcopy_stmt);
1366 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1367 /* We'd like to set the rhs to the default def in the child_fn,
1368 but it's too early to create ssa names in the child_fn.
1369 Instead, we set the rhs to the parm. In
1370 move_sese_region_to_fn, we introduce a default def for the
1371 parm, map the parm to it's default def, and once we encounter
1372 this stmt, replace the parm with the default def. */
1373 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1374 update_stmt (parcopy_stmt);
1378 /* Declare local variables needed in CHILD_CFUN. */
1379 block = DECL_INITIAL (child_fn);
1380 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1381 /* The gimplifier could record temporaries in parallel/task block
1382 rather than in containing function's local_decls chain,
1383 which would mean cgraph missed finalizing them. Do it now. */
1384 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1385 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1386 varpool_node::finalize_decl (t);
1387 DECL_SAVED_TREE (child_fn) = NULL;
1388 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1389 gimple_set_body (child_fn, NULL);
1390 TREE_USED (block) = 1;
1392 /* Reset DECL_CONTEXT on function arguments. */
1393 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1394 DECL_CONTEXT (t) = child_fn;
1396 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1397 so that it can be moved to the child function. */
1398 gsi = gsi_last_nondebug_bb (entry_bb);
1399 stmt = gsi_stmt (gsi);
1400 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1401 || gimple_code (stmt) == GIMPLE_OMP_TASK
1402 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1403 e = split_block (entry_bb, stmt);
1404 gsi_remove (&gsi, true);
1405 entry_bb = e->dest;
1406 edge e2 = NULL;
1407 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1408 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1409 else
1411 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1412 gcc_assert (e2->dest == region->exit);
1413 remove_edge (BRANCH_EDGE (entry_bb));
1414 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1415 gsi = gsi_last_nondebug_bb (region->exit);
1416 gcc_assert (!gsi_end_p (gsi)
1417 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1418 gsi_remove (&gsi, true);
1421 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1422 if (exit_bb)
1424 gsi = gsi_last_nondebug_bb (exit_bb);
1425 gcc_assert (!gsi_end_p (gsi)
1426 && (gimple_code (gsi_stmt (gsi))
1427 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1428 stmt = gimple_build_return (NULL);
1429 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1430 gsi_remove (&gsi, true);
1433 /* Move the parallel region into CHILD_CFUN. */
1435 if (gimple_in_ssa_p (cfun))
1437 init_tree_ssa (child_cfun);
1438 init_ssa_operands (child_cfun);
1439 child_cfun->gimple_df->in_ssa_p = true;
1440 block = NULL_TREE;
1442 else
1443 block = gimple_block (entry_stmt);
1445 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1446 if (exit_bb)
1447 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1448 if (e2)
1450 basic_block dest_bb = e2->dest;
1451 if (!exit_bb)
1452 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1453 remove_edge (e2);
1454 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1456 /* When the OMP expansion process cannot guarantee an up-to-date
1457 loop tree arrange for the child function to fixup loops. */
1458 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1459 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1461 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1462 num = vec_safe_length (child_cfun->local_decls);
1463 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1465 t = (*child_cfun->local_decls)[srcidx];
1466 if (DECL_CONTEXT (t) == cfun->decl)
1467 continue;
1468 if (srcidx != dstidx)
1469 (*child_cfun->local_decls)[dstidx] = t;
1470 dstidx++;
1472 if (dstidx != num)
1473 vec_safe_truncate (child_cfun->local_decls, dstidx);
1475 /* Inform the callgraph about the new function. */
1476 child_cfun->curr_properties = cfun->curr_properties;
1477 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1478 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1479 cgraph_node *node = cgraph_node::get_create (child_fn);
1480 node->parallelized_function = 1;
1481 cgraph_node::add_new_function (child_fn, true);
1483 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1484 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1486 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1487 fixed in a following pass. */
1488 push_cfun (child_cfun);
1489 if (need_asm)
1490 assign_assembler_name_if_needed (child_fn);
1492 if (optimize)
1493 optimize_omp_library_calls (entry_stmt);
1494 update_max_bb_count ();
1495 cgraph_edge::rebuild_edges ();
1497 /* Some EH regions might become dead, see PR34608. If
1498 pass_cleanup_cfg isn't the first pass to happen with the
1499 new child, these dead EH edges might cause problems.
1500 Clean them up now. */
1501 if (flag_exceptions)
1503 basic_block bb;
1504 bool changed = false;
1506 FOR_EACH_BB_FN (bb, cfun)
1507 changed |= gimple_purge_dead_eh_edges (bb);
1508 if (changed)
1509 cleanup_tree_cfg ();
1511 if (gimple_in_ssa_p (cfun))
1512 update_ssa (TODO_update_ssa);
1513 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1514 verify_loop_structure ();
1515 pop_cfun ();
1517 if (dump_file && !gimple_in_ssa_p (cfun))
1519 omp_any_child_fn_dumped = true;
1520 dump_function_header (dump_file, child_fn, dump_flags);
1521 dump_function_to_file (child_fn, dump_file, dump_flags);
1525 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1527 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1528 expand_parallel_call (region, new_bb,
1529 as_a <gomp_parallel *> (entry_stmt), ws_args);
1530 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1531 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1532 else
1533 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1536 /* Information about members of an OpenACC collapsed loop nest. */
1538 struct oacc_collapse
1540 tree base; /* Base value. */
1541 tree iters; /* Number of steps. */
1542 tree step; /* Step size. */
1543 tree tile; /* Tile increment (if tiled). */
1544 tree outer; /* Tile iterator var. */
1547 /* Helper for expand_oacc_for. Determine collapsed loop information.
1548 Fill in COUNTS array. Emit any initialization code before GSI.
1549 Return the calculated outer loop bound of BOUND_TYPE. */
1551 static tree
1552 expand_oacc_collapse_init (const struct omp_for_data *fd,
1553 gimple_stmt_iterator *gsi,
1554 oacc_collapse *counts, tree diff_type,
1555 tree bound_type, location_t loc)
1557 tree tiling = fd->tiling;
1558 tree total = build_int_cst (bound_type, 1);
1559 int ix;
1561 gcc_assert (integer_onep (fd->loop.step));
1562 gcc_assert (integer_zerop (fd->loop.n1));
1564 /* When tiling, the first operand of the tile clause applies to the
1565 innermost loop, and we work outwards from there. Seems
1566 backwards, but whatever. */
1567 for (ix = fd->collapse; ix--;)
1569 const omp_for_data_loop *loop = &fd->loops[ix];
1571 tree iter_type = TREE_TYPE (loop->v);
1572 tree plus_type = iter_type;
1574 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1576 if (POINTER_TYPE_P (iter_type))
1577 plus_type = sizetype;
1579 if (tiling)
1581 tree num = build_int_cst (integer_type_node, fd->collapse);
1582 tree loop_no = build_int_cst (integer_type_node, ix);
1583 tree tile = TREE_VALUE (tiling);
1584 gcall *call
1585 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1586 /* gwv-outer=*/integer_zero_node,
1587 /* gwv-inner=*/integer_zero_node);
1589 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1590 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1591 gimple_call_set_lhs (call, counts[ix].tile);
1592 gimple_set_location (call, loc);
1593 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1595 tiling = TREE_CHAIN (tiling);
1597 else
1599 counts[ix].tile = NULL;
1600 counts[ix].outer = loop->v;
1603 tree b = loop->n1;
1604 tree e = loop->n2;
1605 tree s = loop->step;
1606 bool up = loop->cond_code == LT_EXPR;
1607 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1608 bool negating;
1609 tree expr;
1611 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1612 true, GSI_SAME_STMT);
1613 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1616 /* Convert the step, avoiding possible unsigned->signed overflow. */
1617 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1618 if (negating)
1619 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1620 s = fold_convert (diff_type, s);
1621 if (negating)
1622 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1623 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1624 true, GSI_SAME_STMT);
1626 /* Determine the range, avoiding possible unsigned->signed overflow. */
1627 negating = !up && TYPE_UNSIGNED (iter_type);
1628 expr = fold_build2 (MINUS_EXPR, plus_type,
1629 fold_convert (plus_type, negating ? b : e),
1630 fold_convert (plus_type, negating ? e : b));
1631 expr = fold_convert (diff_type, expr);
1632 if (negating)
1633 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1634 tree range = force_gimple_operand_gsi
1635 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1637 /* Determine number of iterations. */
1638 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1639 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1640 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1642 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1643 true, GSI_SAME_STMT);
1645 counts[ix].base = b;
1646 counts[ix].iters = iters;
1647 counts[ix].step = s;
1649 total = fold_build2 (MULT_EXPR, bound_type, total,
1650 fold_convert (bound_type, iters));
1653 return total;
1656 /* Emit initializers for collapsed loop members. INNER is true if
1657 this is for the element loop of a TILE. IVAR is the outer
1658 loop iteration variable, from which collapsed loop iteration values
1659 are calculated. COUNTS array has been initialized by
1660 expand_oacc_collapse_inits. */
1662 static void
1663 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1664 gimple_stmt_iterator *gsi,
1665 const oacc_collapse *counts, tree ivar,
1666 tree diff_type)
1668 tree ivar_type = TREE_TYPE (ivar);
1670 /* The most rapidly changing iteration variable is the innermost
1671 one. */
1672 for (int ix = fd->collapse; ix--;)
1674 const omp_for_data_loop *loop = &fd->loops[ix];
1675 const oacc_collapse *collapse = &counts[ix];
1676 tree v = inner ? loop->v : collapse->outer;
1677 tree iter_type = TREE_TYPE (v);
1678 tree plus_type = iter_type;
1679 enum tree_code plus_code = PLUS_EXPR;
1680 tree expr;
1682 if (POINTER_TYPE_P (iter_type))
1684 plus_code = POINTER_PLUS_EXPR;
1685 plus_type = sizetype;
1688 expr = ivar;
1689 if (ix)
1691 tree mod = fold_convert (ivar_type, collapse->iters);
1692 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1693 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1694 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1695 true, GSI_SAME_STMT);
1698 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1699 fold_convert (diff_type, collapse->step));
1700 expr = fold_build2 (plus_code, iter_type,
1701 inner ? collapse->outer : collapse->base,
1702 fold_convert (plus_type, expr));
1703 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1704 true, GSI_SAME_STMT);
1705 gassign *ass = gimple_build_assign (v, expr);
1706 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1710 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1711 of the combined collapse > 1 loop constructs, generate code like:
1712 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1713 if (cond3 is <)
1714 adj = STEP3 - 1;
1715 else
1716 adj = STEP3 + 1;
1717 count3 = (adj + N32 - N31) / STEP3;
1718 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1719 if (cond2 is <)
1720 adj = STEP2 - 1;
1721 else
1722 adj = STEP2 + 1;
1723 count2 = (adj + N22 - N21) / STEP2;
1724 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1725 if (cond1 is <)
1726 adj = STEP1 - 1;
1727 else
1728 adj = STEP1 + 1;
1729 count1 = (adj + N12 - N11) / STEP1;
1730 count = count1 * count2 * count3;
1731 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1732 count = 0;
1733 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1734 of the combined loop constructs, just initialize COUNTS array
1735 from the _looptemp_ clauses. For loop nests with non-rectangular
1736 loops, do this only for the rectangular loops. Then pick
1737 the loops which reference outer vars in their bound expressions
1738 and the loops which they refer to and for this sub-nest compute
1739 number of iterations. For triangular loops use Faulhaber's formula,
1740 otherwise as a fallback, compute by iterating the loops.
1741 If e.g. the sub-nest is
1742 for (I = N11; I COND1 N12; I += STEP1)
1743 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1744 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1746 COUNT = 0;
1747 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1748 for (tmpj = M21 * tmpi + N21;
1749 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1751 int tmpk1 = M31 * tmpj + N31;
1752 int tmpk2 = M32 * tmpj + N32;
1753 if (tmpk1 COND3 tmpk2)
1755 if (COND3 is <)
1756 adj = STEP3 - 1;
1757 else
1758 adj = STEP3 + 1;
1759 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1762 and finally multiply the counts of the rectangular loops not
1763 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1764 store number of iterations of the loops from fd->first_nonrect
1765 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1766 by the counts of rectangular loops not referenced in any non-rectangular
1767 loops sandwitched in between those. */
1769 /* NOTE: It *could* be better to moosh all of the BBs together,
1770 creating one larger BB with all the computation and the unexpected
1771 jump at the end. I.e.
1773 bool zero3, zero2, zero1, zero;
1775 zero3 = N32 c3 N31;
1776 count3 = (N32 - N31) /[cl] STEP3;
1777 zero2 = N22 c2 N21;
1778 count2 = (N22 - N21) /[cl] STEP2;
1779 zero1 = N12 c1 N11;
1780 count1 = (N12 - N11) /[cl] STEP1;
1781 zero = zero3 || zero2 || zero1;
1782 count = count1 * count2 * count3;
1783 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1785 After all, we expect the zero=false, and thus we expect to have to
1786 evaluate all of the comparison expressions, so short-circuiting
1787 oughtn't be a win. Since the condition isn't protecting a
1788 denominator, we're not concerned about divide-by-zero, so we can
1789 fully evaluate count even if a numerator turned out to be wrong.
1791 It seems like putting this all together would create much better
1792 scheduling opportunities, and less pressure on the chip's branch
1793 predictor. */
1795 static void
1796 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1797 basic_block &entry_bb, tree *counts,
1798 basic_block &zero_iter1_bb, int &first_zero_iter1,
1799 basic_block &zero_iter2_bb, int &first_zero_iter2,
1800 basic_block &l2_dom_bb)
1802 tree t, type = TREE_TYPE (fd->loop.v);
1803 edge e, ne;
1804 int i;
1806 /* Collapsed loops need work for expansion into SSA form. */
1807 gcc_assert (!gimple_in_ssa_p (cfun));
1809 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1810 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1812 gcc_assert (fd->ordered == 0);
1813 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1814 isn't supposed to be handled, as the inner loop doesn't
1815 use it. */
1816 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1817 OMP_CLAUSE__LOOPTEMP_);
1818 gcc_assert (innerc);
1819 for (i = 0; i < fd->collapse; i++)
1821 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1822 OMP_CLAUSE__LOOPTEMP_);
1823 gcc_assert (innerc);
1824 if (i)
1825 counts[i] = OMP_CLAUSE_DECL (innerc);
1826 else
1827 counts[0] = NULL_TREE;
1829 if (fd->non_rect
1830 && fd->last_nonrect == fd->first_nonrect + 1
1831 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1833 tree c[4];
1834 for (i = 0; i < 4; i++)
1836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1837 OMP_CLAUSE__LOOPTEMP_);
1838 gcc_assert (innerc);
1839 c[i] = OMP_CLAUSE_DECL (innerc);
1841 counts[0] = c[0];
1842 fd->first_inner_iterations = c[1];
1843 fd->factor = c[2];
1844 fd->adjn1 = c[3];
1846 return;
1849 for (i = fd->collapse; i < fd->ordered; i++)
1851 tree itype = TREE_TYPE (fd->loops[i].v);
1852 counts[i] = NULL_TREE;
1853 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1854 fold_convert (itype, fd->loops[i].n1),
1855 fold_convert (itype, fd->loops[i].n2));
1856 if (t && integer_zerop (t))
1858 for (i = fd->collapse; i < fd->ordered; i++)
1859 counts[i] = build_int_cst (type, 0);
1860 break;
1863 bool rect_count_seen = false;
1864 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1866 tree itype = TREE_TYPE (fd->loops[i].v);
1868 if (i >= fd->collapse && counts[i])
1869 continue;
1870 if (fd->non_rect)
1872 /* Skip loops that use outer iterators in their expressions
1873 during this phase. */
1874 if (fd->loops[i].m1 || fd->loops[i].m2)
1876 counts[i] = build_zero_cst (type);
1877 continue;
1880 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1881 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1882 fold_convert (itype, fd->loops[i].n1),
1883 fold_convert (itype, fd->loops[i].n2)))
1884 == NULL_TREE || !integer_onep (t)))
1886 gcond *cond_stmt;
1887 tree n1, n2;
1888 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1889 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1890 true, GSI_SAME_STMT);
1891 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1892 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1893 true, GSI_SAME_STMT);
1894 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1895 n1, n2);
1896 e = split_block (entry_bb, cond_stmt);
1897 basic_block &zero_iter_bb
1898 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1899 int &first_zero_iter
1900 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1901 if (zero_iter_bb == NULL)
1903 gassign *assign_stmt;
1904 first_zero_iter = i;
1905 zero_iter_bb = create_empty_bb (entry_bb);
1906 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1907 *gsi = gsi_after_labels (zero_iter_bb);
1908 if (i < fd->collapse)
1909 assign_stmt = gimple_build_assign (fd->loop.n2,
1910 build_zero_cst (type));
1911 else
1913 counts[i] = create_tmp_reg (type, ".count");
1914 assign_stmt
1915 = gimple_build_assign (counts[i], build_zero_cst (type));
1917 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1918 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1919 entry_bb);
1921 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1922 ne->probability = profile_probability::very_unlikely ();
1923 e->flags = EDGE_TRUE_VALUE;
1924 e->probability = ne->probability.invert ();
1925 if (l2_dom_bb == NULL)
1926 l2_dom_bb = entry_bb;
1927 entry_bb = e->dest;
1928 *gsi = gsi_last_nondebug_bb (entry_bb);
1931 if (POINTER_TYPE_P (itype))
1932 itype = signed_type_for (itype);
1933 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1934 ? -1 : 1));
1935 t = fold_build2 (PLUS_EXPR, itype,
1936 fold_convert (itype, fd->loops[i].step), t);
1937 t = fold_build2 (PLUS_EXPR, itype, t,
1938 fold_convert (itype, fd->loops[i].n2));
1939 t = fold_build2 (MINUS_EXPR, itype, t,
1940 fold_convert (itype, fd->loops[i].n1));
1941 /* ?? We could probably use CEIL_DIV_EXPR instead of
1942 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1943 generate the same code in the end because generically we
1944 don't know that the values involved must be negative for
1945 GT?? */
1946 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1947 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1948 fold_build1 (NEGATE_EXPR, itype, t),
1949 fold_build1 (NEGATE_EXPR, itype,
1950 fold_convert (itype,
1951 fd->loops[i].step)));
1952 else
1953 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1954 fold_convert (itype, fd->loops[i].step));
1955 t = fold_convert (type, t);
1956 if (TREE_CODE (t) == INTEGER_CST)
1957 counts[i] = t;
1958 else
1960 if (i < fd->collapse || i != first_zero_iter2)
1961 counts[i] = create_tmp_reg (type, ".count");
1962 expand_omp_build_assign (gsi, counts[i], t);
1964 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1966 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1967 continue;
1968 if (!rect_count_seen)
1970 t = counts[i];
1971 rect_count_seen = true;
1973 else
1974 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1975 expand_omp_build_assign (gsi, fd->loop.n2, t);
1978 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1980 gcc_assert (fd->last_nonrect != -1);
1982 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1983 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1984 build_zero_cst (type));
1985 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1986 if (fd->loops[i].m1
1987 || fd->loops[i].m2
1988 || fd->loops[i].non_rect_referenced)
1989 break;
1990 if (i == fd->last_nonrect
1991 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1992 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1993 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1995 int o = fd->first_nonrect;
1996 tree itype = TREE_TYPE (fd->loops[o].v);
1997 tree n1o = create_tmp_reg (itype, ".n1o");
1998 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1999 expand_omp_build_assign (gsi, n1o, t);
2000 tree n2o = create_tmp_reg (itype, ".n2o");
2001 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2002 expand_omp_build_assign (gsi, n2o, t);
2003 if (fd->loops[i].m1 && fd->loops[i].m2)
2004 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2005 unshare_expr (fd->loops[i].m1));
2006 else if (fd->loops[i].m1)
2007 t = fold_build1 (NEGATE_EXPR, itype,
2008 unshare_expr (fd->loops[i].m1));
2009 else
2010 t = unshare_expr (fd->loops[i].m2);
2011 tree m2minusm1
2012 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2013 true, GSI_SAME_STMT);
2015 gimple_stmt_iterator gsi2 = *gsi;
2016 gsi_prev (&gsi2);
2017 e = split_block (entry_bb, gsi_stmt (gsi2));
2018 e = split_block (e->dest, (gimple *) NULL);
2019 basic_block bb1 = e->src;
2020 entry_bb = e->dest;
2021 *gsi = gsi_after_labels (entry_bb);
2023 gsi2 = gsi_after_labels (bb1);
2024 tree ostep = fold_convert (itype, fd->loops[o].step);
2025 t = build_int_cst (itype, (fd->loops[o].cond_code
2026 == LT_EXPR ? -1 : 1));
2027 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2028 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2029 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2030 if (TYPE_UNSIGNED (itype)
2031 && fd->loops[o].cond_code == GT_EXPR)
2032 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2033 fold_build1 (NEGATE_EXPR, itype, t),
2034 fold_build1 (NEGATE_EXPR, itype, ostep));
2035 else
2036 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2037 tree outer_niters
2038 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2039 true, GSI_SAME_STMT);
2040 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2041 build_one_cst (itype));
2042 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2043 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2044 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2045 true, GSI_SAME_STMT);
2046 tree n1, n2, n1e, n2e;
2047 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2048 if (fd->loops[i].m1)
2050 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2051 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2052 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2054 else
2055 n1 = t;
2056 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2057 true, GSI_SAME_STMT);
2058 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2059 if (fd->loops[i].m2)
2061 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2062 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2063 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2065 else
2066 n2 = t;
2067 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2068 true, GSI_SAME_STMT);
2069 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2070 if (fd->loops[i].m1)
2072 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2073 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2074 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2076 else
2077 n1e = t;
2078 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2081 if (fd->loops[i].m2)
2083 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2084 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2085 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2087 else
2088 n2e = t;
2089 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2090 true, GSI_SAME_STMT);
2091 gcond *cond_stmt
2092 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2093 n1, n2);
2094 e = split_block (bb1, cond_stmt);
2095 e->flags = EDGE_TRUE_VALUE;
2096 e->probability = profile_probability::likely ().guessed ();
2097 basic_block bb2 = e->dest;
2098 gsi2 = gsi_after_labels (bb2);
2100 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2101 n1e, n2e);
2102 e = split_block (bb2, cond_stmt);
2103 e->flags = EDGE_TRUE_VALUE;
2104 e->probability = profile_probability::likely ().guessed ();
2105 gsi2 = gsi_after_labels (e->dest);
2107 tree step = fold_convert (itype, fd->loops[i].step);
2108 t = build_int_cst (itype, (fd->loops[i].cond_code
2109 == LT_EXPR ? -1 : 1));
2110 t = fold_build2 (PLUS_EXPR, itype, step, t);
2111 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2112 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2113 if (TYPE_UNSIGNED (itype)
2114 && fd->loops[i].cond_code == GT_EXPR)
2115 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2116 fold_build1 (NEGATE_EXPR, itype, t),
2117 fold_build1 (NEGATE_EXPR, itype, step));
2118 else
2119 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2120 tree first_inner_iterations
2121 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2122 true, GSI_SAME_STMT);
2123 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2124 if (TYPE_UNSIGNED (itype)
2125 && fd->loops[i].cond_code == GT_EXPR)
2126 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2127 fold_build1 (NEGATE_EXPR, itype, t),
2128 fold_build1 (NEGATE_EXPR, itype, step));
2129 else
2130 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2131 tree factor
2132 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2133 true, GSI_SAME_STMT);
2134 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2135 build_one_cst (itype));
2136 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2137 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2138 t = fold_build2 (MULT_EXPR, itype, factor, t);
2139 t = fold_build2 (PLUS_EXPR, itype,
2140 fold_build2 (MULT_EXPR, itype, outer_niters,
2141 first_inner_iterations), t);
2142 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2143 fold_convert (type, t));
2145 basic_block bb3 = create_empty_bb (bb1);
2146 add_bb_to_loop (bb3, bb1->loop_father);
2148 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2149 e->probability = profile_probability::unlikely ().guessed ();
2151 gsi2 = gsi_after_labels (bb3);
2152 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2153 n1e, n2e);
2154 e = split_block (bb3, cond_stmt);
2155 e->flags = EDGE_TRUE_VALUE;
2156 e->probability = profile_probability::likely ().guessed ();
2157 basic_block bb4 = e->dest;
2159 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2160 ne->probability = e->probability.invert ();
2162 basic_block bb5 = create_empty_bb (bb2);
2163 add_bb_to_loop (bb5, bb2->loop_father);
2165 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2166 ne->probability = profile_probability::unlikely ().guessed ();
2168 for (int j = 0; j < 2; j++)
2170 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2171 t = fold_build2 (MINUS_EXPR, itype,
2172 unshare_expr (fd->loops[i].n1),
2173 unshare_expr (fd->loops[i].n2));
2174 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2175 tree tem
2176 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2177 true, GSI_SAME_STMT);
2178 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2179 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2180 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2181 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2182 true, GSI_SAME_STMT);
2183 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2184 if (fd->loops[i].m1)
2186 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2187 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2188 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2190 else
2191 n1 = t;
2192 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2193 true, GSI_SAME_STMT);
2194 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2195 if (fd->loops[i].m2)
2197 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2198 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2199 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2201 else
2202 n2 = t;
2203 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2204 true, GSI_SAME_STMT);
2205 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2207 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2208 n1, n2);
2209 e = split_block (gsi_bb (gsi2), cond_stmt);
2210 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2211 e->probability = profile_probability::unlikely ().guessed ();
2212 ne = make_edge (e->src, bb1,
2213 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2214 ne->probability = e->probability.invert ();
2215 gsi2 = gsi_after_labels (e->dest);
2217 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2218 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2220 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2223 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2224 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2225 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2227 if (fd->first_nonrect + 1 == fd->last_nonrect)
2229 fd->first_inner_iterations = first_inner_iterations;
2230 fd->factor = factor;
2231 fd->adjn1 = n1o;
2234 else
2236 /* Fallback implementation. Evaluate the loops with m1/m2
2237 non-NULL as well as their outer loops at runtime using temporaries
2238 instead of the original iteration variables, and in the
2239 body just bump the counter. */
2240 gimple_stmt_iterator gsi2 = *gsi;
2241 gsi_prev (&gsi2);
2242 e = split_block (entry_bb, gsi_stmt (gsi2));
2243 e = split_block (e->dest, (gimple *) NULL);
2244 basic_block cur_bb = e->src;
2245 basic_block next_bb = e->dest;
2246 entry_bb = e->dest;
2247 *gsi = gsi_after_labels (entry_bb);
2249 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2250 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2252 for (i = 0; i <= fd->last_nonrect; i++)
2254 if (fd->loops[i].m1 == NULL_TREE
2255 && fd->loops[i].m2 == NULL_TREE
2256 && !fd->loops[i].non_rect_referenced)
2257 continue;
2259 tree itype = TREE_TYPE (fd->loops[i].v);
2261 gsi2 = gsi_after_labels (cur_bb);
2262 tree n1, n2;
2263 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2264 if (fd->loops[i].m1 == NULL_TREE)
2265 n1 = t;
2266 else if (POINTER_TYPE_P (itype))
2268 gcc_assert (integer_onep (fd->loops[i].m1));
2269 t = unshare_expr (fd->loops[i].n1);
2270 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2272 else
2274 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275 n1 = fold_build2 (MULT_EXPR, itype,
2276 vs[i - fd->loops[i].outer], n1);
2277 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2279 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280 true, GSI_SAME_STMT);
2281 if (i < fd->last_nonrect)
2283 vs[i] = create_tmp_reg (itype, ".it");
2284 expand_omp_build_assign (&gsi2, vs[i], n1);
2286 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287 if (fd->loops[i].m2 == NULL_TREE)
2288 n2 = t;
2289 else if (POINTER_TYPE_P (itype))
2291 gcc_assert (integer_onep (fd->loops[i].m2));
2292 t = unshare_expr (fd->loops[i].n2);
2293 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2295 else
2297 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2298 n2 = fold_build2 (MULT_EXPR, itype,
2299 vs[i - fd->loops[i].outer], n2);
2300 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2302 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2303 true, GSI_SAME_STMT);
2304 if (POINTER_TYPE_P (itype))
2305 itype = signed_type_for (itype);
2306 if (i == fd->last_nonrect)
2308 gcond *cond_stmt
2309 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2310 n1, n2);
2311 e = split_block (cur_bb, cond_stmt);
2312 e->flags = EDGE_TRUE_VALUE;
2313 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2314 e->probability = profile_probability::likely ().guessed ();
2315 ne->probability = e->probability.invert ();
2316 gsi2 = gsi_after_labels (e->dest);
2318 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2319 ? -1 : 1));
2320 t = fold_build2 (PLUS_EXPR, itype,
2321 fold_convert (itype, fd->loops[i].step), t);
2322 t = fold_build2 (PLUS_EXPR, itype, t,
2323 fold_convert (itype, n2));
2324 t = fold_build2 (MINUS_EXPR, itype, t,
2325 fold_convert (itype, n1));
2326 tree step = fold_convert (itype, fd->loops[i].step);
2327 if (TYPE_UNSIGNED (itype)
2328 && fd->loops[i].cond_code == GT_EXPR)
2329 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2330 fold_build1 (NEGATE_EXPR, itype, t),
2331 fold_build1 (NEGATE_EXPR, itype, step));
2332 else
2333 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2334 t = fold_convert (type, t);
2335 t = fold_build2 (PLUS_EXPR, type,
2336 counts[fd->last_nonrect], t);
2337 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2338 true, GSI_SAME_STMT);
2339 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2340 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2341 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2342 break;
2344 e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2346 basic_block new_cur_bb = create_empty_bb (cur_bb);
2347 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2349 gsi2 = gsi_after_labels (e->dest);
2350 tree step = fold_convert (itype,
2351 unshare_expr (fd->loops[i].step));
2352 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2353 t = fold_build_pointer_plus (vs[i], step);
2354 else
2355 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2356 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2357 true, GSI_SAME_STMT);
2358 expand_omp_build_assign (&gsi2, vs[i], t);
2360 ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2361 gsi2 = gsi_after_labels (ne->dest);
2363 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2364 edge e3, e4;
2365 if (next_bb == entry_bb)
2367 e3 = find_edge (ne->dest, next_bb);
2368 e3->flags = EDGE_FALSE_VALUE;
2370 else
2371 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2372 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2373 e4->probability = profile_probability::likely ().guessed ();
2374 e3->probability = e4->probability.invert ();
2375 basic_block esrc = e->src;
2376 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2377 cur_bb = new_cur_bb;
2378 basic_block latch_bb = next_bb;
2379 next_bb = e->dest;
2380 remove_edge (e);
2381 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2382 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2383 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2386 t = NULL_TREE;
2387 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2388 if (!fd->loops[i].non_rect_referenced
2389 && fd->loops[i].m1 == NULL_TREE
2390 && fd->loops[i].m2 == NULL_TREE)
2392 if (t == NULL_TREE)
2393 t = counts[i];
2394 else
2395 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2397 if (t)
2399 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2400 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2402 if (!rect_count_seen)
2403 t = counts[fd->last_nonrect];
2404 else
2405 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2406 counts[fd->last_nonrect]);
2407 expand_omp_build_assign (gsi, fd->loop.n2, t);
2409 else if (fd->non_rect)
2411 tree t = fd->loop.n2;
2412 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2413 int non_rect_referenced = 0, non_rect = 0;
2414 for (i = 0; i < fd->collapse; i++)
2416 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2417 && !integer_zerop (counts[i]))
2418 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2419 if (fd->loops[i].non_rect_referenced)
2420 non_rect_referenced++;
2421 if (fd->loops[i].m1 || fd->loops[i].m2)
2422 non_rect++;
2424 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2425 counts[fd->last_nonrect] = t;
2429 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2430 T = V;
2431 V3 = N31 + (T % count3) * STEP3;
2432 T = T / count3;
2433 V2 = N21 + (T % count2) * STEP2;
2434 T = T / count2;
2435 V1 = N11 + T * STEP1;
2436 if this loop doesn't have an inner loop construct combined with it.
2437 If it does have an inner loop construct combined with it and the
2438 iteration count isn't known constant, store values from counts array
2439 into its _looptemp_ temporaries instead.
2440 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2441 inclusive), use the count of all those loops together, and either
2442 find quadratic etc. equation roots, or as a fallback, do:
2443 COUNT = 0;
2444 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2445 for (tmpj = M21 * tmpi + N21;
2446 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2448 int tmpk1 = M31 * tmpj + N31;
2449 int tmpk2 = M32 * tmpj + N32;
2450 if (tmpk1 COND3 tmpk2)
2452 if (COND3 is <)
2453 adj = STEP3 - 1;
2454 else
2455 adj = STEP3 + 1;
2456 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2457 if (COUNT + temp > T)
2459 V1 = tmpi;
2460 V2 = tmpj;
2461 V3 = tmpk1 + (T - COUNT) * STEP3;
2462 goto done;
2464 else
2465 COUNT += temp;
2468 done:;
2469 but for optional innermost or outermost rectangular loops that aren't
2470 referenced by other loop expressions keep doing the division/modulo. */
2472 static void
2473 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2474 tree *counts, tree *nonrect_bounds,
2475 gimple *inner_stmt, tree startvar)
2477 int i;
2478 if (gimple_omp_for_combined_p (fd->for_stmt))
2480 /* If fd->loop.n2 is constant, then no propagation of the counts
2481 is needed, they are constant. */
2482 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2483 return;
2485 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2486 ? gimple_omp_taskreg_clauses (inner_stmt)
2487 : gimple_omp_for_clauses (inner_stmt);
2488 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2489 isn't supposed to be handled, as the inner loop doesn't
2490 use it. */
2491 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2492 gcc_assert (innerc);
2493 int count = 0;
2494 if (fd->non_rect
2495 && fd->last_nonrect == fd->first_nonrect + 1
2496 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2497 count = 4;
2498 for (i = 0; i < fd->collapse + count; i++)
2500 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2501 OMP_CLAUSE__LOOPTEMP_);
2502 gcc_assert (innerc);
2503 if (i)
2505 tree tem = OMP_CLAUSE_DECL (innerc);
2506 tree t;
2507 if (i < fd->collapse)
2508 t = counts[i];
2509 else
2510 switch (i - fd->collapse)
2512 case 0: t = counts[0]; break;
2513 case 1: t = fd->first_inner_iterations; break;
2514 case 2: t = fd->factor; break;
2515 case 3: t = fd->adjn1; break;
2516 default: gcc_unreachable ();
2518 t = fold_convert (TREE_TYPE (tem), t);
2519 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2520 false, GSI_CONTINUE_LINKING);
2521 gassign *stmt = gimple_build_assign (tem, t);
2522 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2525 return;
2528 tree type = TREE_TYPE (fd->loop.v);
2529 tree tem = create_tmp_reg (type, ".tem");
2530 gassign *stmt = gimple_build_assign (tem, startvar);
2531 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2533 for (i = fd->collapse - 1; i >= 0; i--)
2535 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2536 itype = vtype;
2537 if (POINTER_TYPE_P (vtype))
2538 itype = signed_type_for (vtype);
2539 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2540 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2541 else
2542 t = tem;
2543 if (i == fd->last_nonrect)
2545 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2546 false, GSI_CONTINUE_LINKING);
2547 tree stopval = t;
2548 tree idx = create_tmp_reg (type, ".count");
2549 expand_omp_build_assign (gsi, idx,
2550 build_zero_cst (type), true);
2551 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2552 if (fd->first_nonrect + 1 == fd->last_nonrect
2553 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2554 || fd->first_inner_iterations)
2555 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2556 != CODE_FOR_nothing)
2557 && !integer_zerop (fd->loop.n2))
2559 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2560 tree itype = TREE_TYPE (fd->loops[i].v);
2561 tree first_inner_iterations = fd->first_inner_iterations;
2562 tree factor = fd->factor;
2563 gcond *cond_stmt
2564 = expand_omp_build_cond (gsi, NE_EXPR, factor,
2565 build_zero_cst (TREE_TYPE (factor)));
2566 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2567 basic_block bb0 = e->src;
2568 e->flags = EDGE_TRUE_VALUE;
2569 e->probability = profile_probability::likely ();
2570 bb_triang_dom = bb0;
2571 *gsi = gsi_after_labels (e->dest);
2572 tree slltype = long_long_integer_type_node;
2573 tree ulltype = long_long_unsigned_type_node;
2574 tree stopvalull = fold_convert (ulltype, stopval);
2575 stopvalull
2576 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2577 false, GSI_CONTINUE_LINKING);
2578 first_inner_iterations
2579 = fold_convert (slltype, first_inner_iterations);
2580 first_inner_iterations
2581 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2582 NULL_TREE, false,
2583 GSI_CONTINUE_LINKING);
2584 factor = fold_convert (slltype, factor);
2585 factor
2586 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2587 false, GSI_CONTINUE_LINKING);
2588 tree first_inner_iterationsd
2589 = fold_build1 (FLOAT_EXPR, double_type_node,
2590 first_inner_iterations);
2591 first_inner_iterationsd
2592 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2593 NULL_TREE, false,
2594 GSI_CONTINUE_LINKING);
2595 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2596 factor);
2597 factord = force_gimple_operand_gsi (gsi, factord, true,
2598 NULL_TREE, false,
2599 GSI_CONTINUE_LINKING);
2600 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2601 stopvalull);
2602 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2603 NULL_TREE, false,
2604 GSI_CONTINUE_LINKING);
2605 /* Temporarily disable flag_rounding_math, values will be
2606 decimal numbers divided by 2 and worst case imprecisions
2607 due to too large values ought to be caught later by the
2608 checks for fallback. */
2609 int save_flag_rounding_math = flag_rounding_math;
2610 flag_rounding_math = 0;
2611 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2612 build_real (double_type_node, dconst2));
2613 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2614 first_inner_iterationsd, t);
2615 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2616 GSI_CONTINUE_LINKING);
2617 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2618 build_real (double_type_node, dconst2));
2619 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2620 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2621 fold_build2 (MULT_EXPR, double_type_node,
2622 t3, t3));
2623 flag_rounding_math = save_flag_rounding_math;
2624 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2625 GSI_CONTINUE_LINKING);
2626 if (flag_exceptions
2627 && cfun->can_throw_non_call_exceptions
2628 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2630 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2631 build_zero_cst (double_type_node));
2632 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2633 false, GSI_CONTINUE_LINKING);
2634 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2635 boolean_false_node,
2636 NULL_TREE, NULL_TREE);
2638 else
2639 cond_stmt
2640 = gimple_build_cond (LT_EXPR, t,
2641 build_zero_cst (double_type_node),
2642 NULL_TREE, NULL_TREE);
2643 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2644 e = split_block (gsi_bb (*gsi), cond_stmt);
2645 basic_block bb1 = e->src;
2646 e->flags = EDGE_FALSE_VALUE;
2647 e->probability = profile_probability::very_likely ();
2648 *gsi = gsi_after_labels (e->dest);
2649 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2650 tree sqrtr = create_tmp_var (double_type_node);
2651 gimple_call_set_lhs (call, sqrtr);
2652 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2653 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2654 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2655 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2656 tree c = create_tmp_var (ulltype);
2657 tree d = create_tmp_var (ulltype);
2658 expand_omp_build_assign (gsi, c, t, true);
2659 t = fold_build2 (MINUS_EXPR, ulltype, c,
2660 build_one_cst (ulltype));
2661 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2662 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2663 t = fold_build2 (MULT_EXPR, ulltype,
2664 fold_convert (ulltype, fd->factor), t);
2665 tree t2
2666 = fold_build2 (MULT_EXPR, ulltype, c,
2667 fold_convert (ulltype,
2668 fd->first_inner_iterations));
2669 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2670 expand_omp_build_assign (gsi, d, t, true);
2671 t = fold_build2 (MULT_EXPR, ulltype,
2672 fold_convert (ulltype, fd->factor), c);
2673 t = fold_build2 (PLUS_EXPR, ulltype,
2674 t, fold_convert (ulltype,
2675 fd->first_inner_iterations));
2676 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2677 GSI_CONTINUE_LINKING);
2678 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2679 NULL_TREE, NULL_TREE);
2680 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2681 e = split_block (gsi_bb (*gsi), cond_stmt);
2682 basic_block bb2 = e->src;
2683 e->flags = EDGE_TRUE_VALUE;
2684 e->probability = profile_probability::very_likely ();
2685 *gsi = gsi_after_labels (e->dest);
2686 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2687 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2688 GSI_CONTINUE_LINKING);
2689 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2690 NULL_TREE, NULL_TREE);
2691 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2692 e = split_block (gsi_bb (*gsi), cond_stmt);
2693 basic_block bb3 = e->src;
2694 e->flags = EDGE_FALSE_VALUE;
2695 e->probability = profile_probability::very_likely ();
2696 *gsi = gsi_after_labels (e->dest);
2697 t = fold_convert (itype, c);
2698 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2699 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2700 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2701 GSI_CONTINUE_LINKING);
2702 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2703 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2704 t2 = fold_convert (itype, t2);
2705 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2706 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2707 if (fd->loops[i].m1)
2709 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2710 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2712 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2713 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2714 bb_triang = e->src;
2715 *gsi = gsi_after_labels (e->dest);
2716 remove_edge (e);
2717 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2718 e->probability = profile_probability::very_unlikely ();
2719 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2720 e->probability = profile_probability::very_unlikely ();
2721 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2722 e->probability = profile_probability::very_unlikely ();
2724 basic_block bb4 = create_empty_bb (bb0);
2725 add_bb_to_loop (bb4, bb0->loop_father);
2726 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2727 e->probability = profile_probability::unlikely ();
2728 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2729 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2730 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2731 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2732 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2733 counts[i], counts[i - 1]);
2734 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2735 GSI_CONTINUE_LINKING);
2736 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2737 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2738 t = fold_convert (itype, t);
2739 t2 = fold_convert (itype, t2);
2740 t = fold_build2 (MULT_EXPR, itype, t,
2741 fold_convert (itype, fd->loops[i].step));
2742 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2743 t2 = fold_build2 (MULT_EXPR, itype, t2,
2744 fold_convert (itype, fd->loops[i - 1].step));
2745 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2746 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2747 false, GSI_CONTINUE_LINKING);
2748 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2749 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2750 if (fd->loops[i].m1)
2752 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2753 fd->loops[i - 1].v);
2754 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2756 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2757 false, GSI_CONTINUE_LINKING);
2758 stmt = gimple_build_assign (fd->loops[i].v, t);
2759 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2761 /* Fallback implementation. Evaluate the loops in between
2762 (inclusive) fd->first_nonrect and fd->last_nonrect at
2763 runtime unsing temporaries instead of the original iteration
2764 variables, in the body just bump the counter and compare
2765 with the desired value. */
2766 gimple_stmt_iterator gsi2 = *gsi;
2767 basic_block entry_bb = gsi_bb (gsi2);
2768 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2769 e = split_block (e->dest, (gimple *) NULL);
2770 basic_block dom_bb = NULL;
2771 basic_block cur_bb = e->src;
2772 basic_block next_bb = e->dest;
2773 entry_bb = e->dest;
2774 *gsi = gsi_after_labels (entry_bb);
2776 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2777 tree n1 = NULL_TREE, n2 = NULL_TREE;
2778 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2780 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2782 tree itype = TREE_TYPE (fd->loops[j].v);
2783 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2784 && fd->loops[j].m2 == NULL_TREE
2785 && !fd->loops[j].non_rect_referenced);
2786 gsi2 = gsi_after_labels (cur_bb);
2787 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2788 if (fd->loops[j].m1 == NULL_TREE)
2789 n1 = rect_p ? build_zero_cst (type) : t;
2790 else if (POINTER_TYPE_P (itype))
2792 gcc_assert (integer_onep (fd->loops[j].m1));
2793 t = unshare_expr (fd->loops[j].n1);
2794 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2796 else
2798 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2799 n1 = fold_build2 (MULT_EXPR, itype,
2800 vs[j - fd->loops[j].outer], n1);
2801 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2803 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2804 true, GSI_SAME_STMT);
2805 if (j < fd->last_nonrect)
2807 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2808 expand_omp_build_assign (&gsi2, vs[j], n1);
2810 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2811 if (fd->loops[j].m2 == NULL_TREE)
2812 n2 = rect_p ? counts[j] : t;
2813 else if (POINTER_TYPE_P (itype))
2815 gcc_assert (integer_onep (fd->loops[j].m2));
2816 t = unshare_expr (fd->loops[j].n2);
2817 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2819 else
2821 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2822 n2 = fold_build2 (MULT_EXPR, itype,
2823 vs[j - fd->loops[j].outer], n2);
2824 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2826 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2827 true, GSI_SAME_STMT);
2828 if (POINTER_TYPE_P (itype))
2829 itype = signed_type_for (itype);
2830 if (j == fd->last_nonrect)
2832 gcond *cond_stmt
2833 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2834 n1, n2);
2835 e = split_block (cur_bb, cond_stmt);
2836 e->flags = EDGE_TRUE_VALUE;
2837 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2838 e->probability = profile_probability::likely ().guessed ();
2839 ne->probability = e->probability.invert ();
2840 gsi2 = gsi_after_labels (e->dest);
2842 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2843 ? -1 : 1));
2844 t = fold_build2 (PLUS_EXPR, itype,
2845 fold_convert (itype, fd->loops[j].step), t);
2846 t = fold_build2 (PLUS_EXPR, itype, t,
2847 fold_convert (itype, n2));
2848 t = fold_build2 (MINUS_EXPR, itype, t,
2849 fold_convert (itype, n1));
2850 tree step = fold_convert (itype, fd->loops[j].step);
2851 if (TYPE_UNSIGNED (itype)
2852 && fd->loops[j].cond_code == GT_EXPR)
2853 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2854 fold_build1 (NEGATE_EXPR, itype, t),
2855 fold_build1 (NEGATE_EXPR, itype, step));
2856 else
2857 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2858 t = fold_convert (type, t);
2859 t = fold_build2 (PLUS_EXPR, type, idx, t);
2860 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2861 true, GSI_SAME_STMT);
2862 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2863 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2864 cond_stmt
2865 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2866 NULL_TREE);
2867 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2868 e = split_block (gsi_bb (gsi2), cond_stmt);
2869 e->flags = EDGE_TRUE_VALUE;
2870 e->probability = profile_probability::likely ().guessed ();
2871 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2872 ne->probability = e->probability.invert ();
2873 gsi2 = gsi_after_labels (e->dest);
2874 expand_omp_build_assign (&gsi2, idx, t);
2875 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2876 break;
2878 e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2880 basic_block new_cur_bb = create_empty_bb (cur_bb);
2881 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2883 gsi2 = gsi_after_labels (e->dest);
2884 if (rect_p)
2885 t = fold_build2 (PLUS_EXPR, type, vs[j],
2886 build_one_cst (type));
2887 else
2889 tree step
2890 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2891 if (POINTER_TYPE_P (vtype))
2892 t = fold_build_pointer_plus (vs[j], step);
2893 else
2894 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2896 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2897 true, GSI_SAME_STMT);
2898 expand_omp_build_assign (&gsi2, vs[j], t);
2900 edge ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2901 gsi2 = gsi_after_labels (ne->dest);
2903 gcond *cond_stmt;
2904 if (next_bb == entry_bb)
2905 /* No need to actually check the outermost condition. */
2906 cond_stmt
2907 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2908 boolean_true_node,
2909 NULL_TREE, NULL_TREE);
2910 else
2911 cond_stmt
2912 = gimple_build_cond (rect_p ? LT_EXPR
2913 : fd->loops[j].cond_code,
2914 vs[j], n2, NULL_TREE, NULL_TREE);
2915 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2916 edge e3, e4;
2917 if (next_bb == entry_bb)
2919 e3 = find_edge (ne->dest, next_bb);
2920 e3->flags = EDGE_FALSE_VALUE;
2921 dom_bb = ne->dest;
2923 else
2924 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2925 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2926 e4->probability = profile_probability::likely ().guessed ();
2927 e3->probability = e4->probability.invert ();
2928 basic_block esrc = e->src;
2929 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2930 cur_bb = new_cur_bb;
2931 basic_block latch_bb = next_bb;
2932 next_bb = e->dest;
2933 remove_edge (e);
2934 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2935 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2936 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2938 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2940 tree vtype = TREE_TYPE (fd->loops[j].v);
2941 tree itype = vtype;
2942 if (POINTER_TYPE_P (itype))
2943 itype = signed_type_for (itype);
2944 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2945 && fd->loops[j].m2 == NULL_TREE
2946 && !fd->loops[j].non_rect_referenced);
2947 if (j == fd->last_nonrect)
2949 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2950 t = fold_convert (itype, t);
2951 tree t2
2952 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2953 t = fold_build2 (MULT_EXPR, itype, t, t2);
2954 if (POINTER_TYPE_P (vtype))
2955 t = fold_build_pointer_plus (n1, t);
2956 else
2957 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2959 else if (rect_p)
2961 t = fold_convert (itype, vs[j]);
2962 t = fold_build2 (MULT_EXPR, itype, t,
2963 fold_convert (itype, fd->loops[j].step));
2964 if (POINTER_TYPE_P (vtype))
2965 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2966 else
2967 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2969 else
2970 t = vs[j];
2971 t = force_gimple_operand_gsi (gsi, t, false,
2972 NULL_TREE, true,
2973 GSI_SAME_STMT);
2974 stmt = gimple_build_assign (fd->loops[j].v, t);
2975 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2977 if (gsi_end_p (*gsi))
2978 *gsi = gsi_last_bb (gsi_bb (*gsi));
2979 else
2980 gsi_prev (gsi);
2981 if (bb_triang)
2983 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2984 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2985 *gsi = gsi_after_labels (e->dest);
2986 if (!gsi_end_p (*gsi))
2987 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2988 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2991 else
2993 t = fold_convert (itype, t);
2994 t = fold_build2 (MULT_EXPR, itype, t,
2995 fold_convert (itype, fd->loops[i].step));
2996 if (POINTER_TYPE_P (vtype))
2997 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2998 else
2999 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3000 t = force_gimple_operand_gsi (gsi, t,
3001 DECL_P (fd->loops[i].v)
3002 && TREE_ADDRESSABLE (fd->loops[i].v),
3003 NULL_TREE, false,
3004 GSI_CONTINUE_LINKING);
3005 stmt = gimple_build_assign (fd->loops[i].v, t);
3006 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3008 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3010 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3011 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3012 false, GSI_CONTINUE_LINKING);
3013 stmt = gimple_build_assign (tem, t);
3014 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3016 if (i == fd->last_nonrect)
3017 i = fd->first_nonrect;
3019 if (fd->non_rect)
3020 for (i = 0; i <= fd->last_nonrect; i++)
3021 if (fd->loops[i].m2)
3023 tree itype = TREE_TYPE (fd->loops[i].v);
3025 tree t;
3026 if (POINTER_TYPE_P (itype))
3028 gcc_assert (integer_onep (fd->loops[i].m2));
3029 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3030 unshare_expr (fd->loops[i].n2));
3032 else
3034 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3035 t = fold_build2 (MULT_EXPR, itype,
3036 fd->loops[i - fd->loops[i].outer].v, t);
3037 t = fold_build2 (PLUS_EXPR, itype, t,
3038 fold_convert (itype,
3039 unshare_expr (fd->loops[i].n2)));
3041 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3042 t = force_gimple_operand_gsi (gsi, t, false,
3043 NULL_TREE, false,
3044 GSI_CONTINUE_LINKING);
3045 stmt = gimple_build_assign (nonrect_bounds[i], t);
3046 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3050 /* Helper function for expand_omp_for_*. Generate code like:
3051 L10:
3052 V3 += STEP3;
3053 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3054 L11:
3055 V3 = N31;
3056 V2 += STEP2;
3057 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3058 L12:
3059 V2 = N21;
3060 V1 += STEP1;
3061 goto BODY_BB;
3062 For non-rectangular loops, use temporaries stored in nonrect_bounds
3063 for the upper bounds if M?2 multiplier is present. Given e.g.
3064 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3065 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3066 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3067 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3069 L10:
3070 V4 += STEP4;
3071 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3072 L11:
3073 V4 = N41 + M41 * V2; // This can be left out if the loop
3074 // refers to the immediate parent loop
3075 V3 += STEP3;
3076 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3077 L12:
3078 V3 = N31;
3079 V2 += STEP2;
3080 if (V2 cond2 N22) goto L120; else goto L13;
3081 L120:
3082 V4 = N41 + M41 * V2;
3083 NONRECT_BOUND4 = N42 + M42 * V2;
3084 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3085 L13:
3086 V2 = N21;
3087 V1 += STEP1;
3088 goto L120; */
3090 static basic_block
3091 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3092 basic_block cont_bb, basic_block body_bb)
3094 basic_block last_bb, bb, collapse_bb = NULL;
3095 int i;
3096 gimple_stmt_iterator gsi;
3097 edge e;
3098 tree t;
3099 gimple *stmt;
3101 last_bb = cont_bb;
3102 for (i = fd->collapse - 1; i >= 0; i--)
3104 tree vtype = TREE_TYPE (fd->loops[i].v);
3106 bb = create_empty_bb (last_bb);
3107 add_bb_to_loop (bb, last_bb->loop_father);
3108 gsi = gsi_start_bb (bb);
3110 if (i < fd->collapse - 1)
3112 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3113 e->probability = profile_probability::guessed_always () / 8;
3115 struct omp_for_data_loop *l = &fd->loops[i + 1];
3116 if (l->m1 == NULL_TREE || l->outer != 1)
3118 t = l->n1;
3119 if (l->m1)
3121 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3122 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3124 else
3126 tree t2
3127 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3128 fd->loops[i + 1 - l->outer].v, l->m1);
3129 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3132 t = force_gimple_operand_gsi (&gsi, t,
3133 DECL_P (l->v)
3134 && TREE_ADDRESSABLE (l->v),
3135 NULL_TREE, false,
3136 GSI_CONTINUE_LINKING);
3137 stmt = gimple_build_assign (l->v, t);
3138 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3141 else
3142 collapse_bb = bb;
3144 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3146 if (POINTER_TYPE_P (vtype))
3147 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3148 else
3149 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3150 t = force_gimple_operand_gsi (&gsi, t,
3151 DECL_P (fd->loops[i].v)
3152 && TREE_ADDRESSABLE (fd->loops[i].v),
3153 NULL_TREE, false, GSI_CONTINUE_LINKING);
3154 stmt = gimple_build_assign (fd->loops[i].v, t);
3155 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3157 if (fd->loops[i].non_rect_referenced)
3159 basic_block update_bb = NULL, prev_bb = NULL;
3160 for (int j = i + 1; j <= fd->last_nonrect; j++)
3161 if (j - fd->loops[j].outer == i)
3163 tree n1, n2;
3164 struct omp_for_data_loop *l = &fd->loops[j];
3165 basic_block this_bb = create_empty_bb (last_bb);
3166 add_bb_to_loop (this_bb, last_bb->loop_father);
3167 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3168 if (prev_bb)
3170 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3171 e->probability
3172 = profile_probability::guessed_always ().apply_scale (7,
3174 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3176 if (l->m1)
3178 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3179 t = fold_build_pointer_plus (fd->loops[i].v, l->n1);
3180 else
3182 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3183 fd->loops[i].v);
3184 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3185 t, l->n1);
3187 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3188 false,
3189 GSI_CONTINUE_LINKING);
3190 stmt = gimple_build_assign (l->v, n1);
3191 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3192 n1 = l->v;
3194 else
3195 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3196 NULL_TREE, false,
3197 GSI_CONTINUE_LINKING);
3198 if (l->m2)
3200 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3201 t = fold_build_pointer_plus (fd->loops[i].v, l->n2);
3202 else
3204 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3205 fd->loops[i].v);
3206 t = fold_build2 (PLUS_EXPR,
3207 TREE_TYPE (nonrect_bounds[j]),
3208 t, unshare_expr (l->n2));
3210 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3211 false,
3212 GSI_CONTINUE_LINKING);
3213 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3214 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3215 n2 = nonrect_bounds[j];
3217 else
3218 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3219 true, NULL_TREE, false,
3220 GSI_CONTINUE_LINKING);
3221 gcond *cond_stmt
3222 = gimple_build_cond (l->cond_code, n1, n2,
3223 NULL_TREE, NULL_TREE);
3224 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3225 if (update_bb == NULL)
3226 update_bb = this_bb;
3227 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3228 e->probability = profile_probability::guessed_always () / 8;
3229 if (prev_bb == NULL)
3230 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3231 prev_bb = this_bb;
3233 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3234 e->probability
3235 = profile_probability::guessed_always ().apply_scale (7, 8);
3236 body_bb = update_bb;
3239 if (i > 0)
3241 if (fd->loops[i].m2)
3242 t = nonrect_bounds[i];
3243 else
3244 t = unshare_expr (fd->loops[i].n2);
3245 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3246 false, GSI_CONTINUE_LINKING);
3247 tree v = fd->loops[i].v;
3248 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3249 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3250 false, GSI_CONTINUE_LINKING);
3251 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3252 stmt = gimple_build_cond_empty (t);
3253 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3254 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3255 expand_omp_regimplify_p, NULL, NULL)
3256 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3257 expand_omp_regimplify_p, NULL, NULL))
3258 gimple_regimplify_operands (stmt, &gsi);
3259 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3260 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3262 else
3263 make_edge (bb, body_bb, EDGE_FALLTHRU);
3264 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3265 last_bb = bb;
3268 return collapse_bb;
3271 /* Expand #pragma omp ordered depend(source). */
3273 static void
3274 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3275 tree *counts, location_t loc)
3277 enum built_in_function source_ix
3278 = fd->iter_type == long_integer_type_node
3279 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3280 gimple *g
3281 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3282 build_fold_addr_expr (counts[fd->ordered]));
3283 gimple_set_location (g, loc);
3284 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3287 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3289 static void
3290 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3291 tree *counts, tree c, location_t loc,
3292 basic_block cont_bb)
3294 auto_vec<tree, 10> args;
3295 enum built_in_function sink_ix
3296 = fd->iter_type == long_integer_type_node
3297 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3298 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3299 int i;
3300 gimple_stmt_iterator gsi2 = *gsi;
3301 bool warned_step = false;
3303 if (deps == NULL)
3305 /* Handle doacross(sink: omp_cur_iteration - 1). */
3306 gsi_prev (&gsi2);
3307 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3308 edge e2 = split_block_after_labels (e1->dest);
3309 gsi2 = gsi_after_labels (e1->dest);
3310 *gsi = gsi_last_bb (e1->src);
3311 gimple_stmt_iterator gsi3 = *gsi;
3313 if (counts[fd->collapse - 1])
3315 gcc_assert (fd->collapse == 1);
3316 t = counts[fd->collapse - 1];
3318 else if (fd->collapse > 1)
3319 t = fd->loop.v;
3320 else
3322 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3323 fd->loops[0].v, fd->loops[0].n1);
3324 t = fold_convert (fd->iter_type, t);
3327 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
3328 false, GSI_CONTINUE_LINKING);
3329 gsi_insert_after (gsi, gimple_build_cond (NE_EXPR, t,
3330 build_zero_cst (TREE_TYPE (t)),
3331 NULL_TREE, NULL_TREE),
3332 GSI_NEW_STMT);
3334 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3335 build_minus_one_cst (TREE_TYPE (t)));
3336 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3337 true, GSI_SAME_STMT);
3338 args.safe_push (t);
3339 for (i = fd->collapse; i < fd->ordered; i++)
3341 t = counts[fd->ordered + 2 + (i - fd->collapse)];
3342 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3343 build_minus_one_cst (TREE_TYPE (t)));
3344 t = fold_convert (fd->iter_type, t);
3345 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3346 true, GSI_SAME_STMT);
3347 args.safe_push (t);
3350 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix),
3351 args);
3352 gimple_set_location (g, loc);
3353 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3355 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3356 e3->probability = profile_probability::guessed_always () / 8;
3357 e1->probability = e3->probability.invert ();
3358 e1->flags = EDGE_TRUE_VALUE;
3359 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3361 if (fd->ordered > fd->collapse && cont_bb)
3363 if (counts[fd->ordered + 1] == NULL_TREE)
3364 counts[fd->ordered + 1]
3365 = create_tmp_var (boolean_type_node, ".first");
3367 edge e4;
3368 if (gsi_end_p (gsi3))
3369 e4 = split_block_after_labels (e1->src);
3370 else
3372 gsi_prev (&gsi3);
3373 e4 = split_block (gsi_bb (gsi3), gsi_stmt (gsi3));
3375 gsi3 = gsi_last_bb (e4->src);
3377 gsi_insert_after (&gsi3,
3378 gimple_build_cond (NE_EXPR,
3379 counts[fd->ordered + 1],
3380 boolean_false_node,
3381 NULL_TREE, NULL_TREE),
3382 GSI_NEW_STMT);
3384 edge e5 = make_edge (e4->src, e2->dest, EDGE_FALSE_VALUE);
3385 e4->probability = profile_probability::guessed_always () / 8;
3386 e5->probability = e4->probability.invert ();
3387 e4->flags = EDGE_TRUE_VALUE;
3388 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e4->src);
3391 *gsi = gsi_after_labels (e2->dest);
3392 return;
3394 for (i = 0; i < fd->ordered; i++)
3396 tree step = NULL_TREE;
3397 off = TREE_PURPOSE (deps);
3398 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3400 step = TREE_OPERAND (off, 1);
3401 off = TREE_OPERAND (off, 0);
3403 if (!integer_zerop (off))
3405 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3406 || fd->loops[i].cond_code == GT_EXPR);
3407 bool forward = fd->loops[i].cond_code == LT_EXPR;
3408 if (step)
3410 /* Non-simple Fortran DO loops. If step is variable,
3411 we don't know at compile even the direction, so can't
3412 warn. */
3413 if (TREE_CODE (step) != INTEGER_CST)
3414 break;
3415 forward = tree_int_cst_sgn (step) != -1;
3417 if (forward ^ OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3418 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3419 "waiting for lexically later iteration",
3420 OMP_CLAUSE_DOACROSS_DEPEND (c)
3421 ? "depend" : "doacross");
3422 break;
3424 deps = TREE_CHAIN (deps);
3426 /* If all offsets corresponding to the collapsed loops are zero,
3427 this depend clause can be ignored. FIXME: but there is still a
3428 flush needed. We need to emit one __sync_synchronize () for it
3429 though (perhaps conditionally)? Solve this together with the
3430 conservative dependence folding optimization.
3431 if (i >= fd->collapse)
3432 return; */
3434 deps = OMP_CLAUSE_DECL (c);
3435 gsi_prev (&gsi2);
3436 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3437 edge e2 = split_block_after_labels (e1->dest);
3439 gsi2 = gsi_after_labels (e1->dest);
3440 *gsi = gsi_last_bb (e1->src);
3441 for (i = 0; i < fd->ordered; i++)
3443 tree itype = TREE_TYPE (fd->loops[i].v);
3444 tree step = NULL_TREE;
3445 tree orig_off = NULL_TREE;
3446 if (POINTER_TYPE_P (itype))
3447 itype = sizetype;
3448 if (i)
3449 deps = TREE_CHAIN (deps);
3450 off = TREE_PURPOSE (deps);
3451 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3453 step = TREE_OPERAND (off, 1);
3454 off = TREE_OPERAND (off, 0);
3455 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3456 && integer_onep (fd->loops[i].step)
3457 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3459 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3460 if (step)
3462 off = fold_convert_loc (loc, itype, off);
3463 orig_off = off;
3464 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3467 if (integer_zerop (off))
3468 t = boolean_true_node;
3469 else
3471 tree a;
3472 tree co = fold_convert_loc (loc, itype, off);
3473 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3475 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3476 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3477 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3478 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3479 co);
3481 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3482 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3483 fd->loops[i].v, co);
3484 else
3485 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3486 fd->loops[i].v, co);
3487 if (step)
3489 tree t1, t2;
3490 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3491 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3492 fd->loops[i].n1);
3493 else
3494 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3495 fd->loops[i].n2);
3496 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3497 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3498 fd->loops[i].n2);
3499 else
3500 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3501 fd->loops[i].n1);
3502 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3503 step, build_int_cst (TREE_TYPE (step), 0));
3504 if (TREE_CODE (step) != INTEGER_CST)
3506 t1 = unshare_expr (t1);
3507 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3508 false, GSI_CONTINUE_LINKING);
3509 t2 = unshare_expr (t2);
3510 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3511 false, GSI_CONTINUE_LINKING);
3513 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3514 t, t2, t1);
3516 else if (fd->loops[i].cond_code == LT_EXPR)
3518 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3519 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3520 fd->loops[i].n1);
3521 else
3522 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3523 fd->loops[i].n2);
3525 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3526 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3527 fd->loops[i].n2);
3528 else
3529 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3530 fd->loops[i].n1);
3532 if (cond)
3533 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3534 else
3535 cond = t;
3537 off = fold_convert_loc (loc, itype, off);
3539 if (step
3540 || (fd->loops[i].cond_code == LT_EXPR
3541 ? !integer_onep (fd->loops[i].step)
3542 : !integer_minus_onep (fd->loops[i].step)))
3544 if (step == NULL_TREE
3545 && TYPE_UNSIGNED (itype)
3546 && fd->loops[i].cond_code == GT_EXPR)
3547 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3548 fold_build1_loc (loc, NEGATE_EXPR, itype,
3549 s));
3550 else
3551 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3552 orig_off ? orig_off : off, s);
3553 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3554 build_int_cst (itype, 0));
3555 if (integer_zerop (t) && !warned_step)
3557 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3558 "refers to iteration never in the iteration "
3559 "space",
3560 OMP_CLAUSE_DOACROSS_DEPEND (c)
3561 ? "depend" : "doacross");
3562 warned_step = true;
3564 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3565 cond, t);
3568 if (i <= fd->collapse - 1 && fd->collapse > 1)
3569 t = fd->loop.v;
3570 else if (counts[i])
3571 t = counts[i];
3572 else
3574 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3575 fd->loops[i].v, fd->loops[i].n1);
3576 t = fold_convert_loc (loc, fd->iter_type, t);
3578 if (step)
3579 /* We have divided off by step already earlier. */;
3580 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3581 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3582 fold_build1_loc (loc, NEGATE_EXPR, itype,
3583 s));
3584 else
3585 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3586 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3587 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3588 off = fold_convert_loc (loc, fd->iter_type, off);
3589 if (i <= fd->collapse - 1 && fd->collapse > 1)
3591 if (i)
3592 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3593 off);
3594 if (i < fd->collapse - 1)
3596 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3597 counts[i]);
3598 continue;
3601 off = unshare_expr (off);
3602 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3603 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3604 true, GSI_SAME_STMT);
3605 args.safe_push (t);
3607 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3608 gimple_set_location (g, loc);
3609 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3611 cond = unshare_expr (cond);
3612 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3613 GSI_CONTINUE_LINKING);
3614 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3615 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3616 e3->probability = profile_probability::guessed_always () / 8;
3617 e1->probability = e3->probability.invert ();
3618 e1->flags = EDGE_TRUE_VALUE;
3619 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3621 *gsi = gsi_after_labels (e2->dest);
3624 /* Expand all #pragma omp ordered depend(source) and
3625 #pragma omp ordered depend(sink:...) constructs in the current
3626 #pragma omp for ordered(n) region. */
3628 static void
3629 expand_omp_ordered_source_sink (struct omp_region *region,
3630 struct omp_for_data *fd, tree *counts,
3631 basic_block cont_bb)
3633 struct omp_region *inner;
3634 int i;
3635 for (i = fd->collapse - 1; i < fd->ordered; i++)
3636 if (i == fd->collapse - 1 && fd->collapse > 1)
3637 counts[i] = NULL_TREE;
3638 else if (i >= fd->collapse && !cont_bb)
3639 counts[i] = build_zero_cst (fd->iter_type);
3640 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3641 && integer_onep (fd->loops[i].step))
3642 counts[i] = NULL_TREE;
3643 else
3644 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3645 tree atype
3646 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3647 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3648 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3649 counts[fd->ordered + 1] = NULL_TREE;
3651 for (inner = region->inner; inner; inner = inner->next)
3652 if (inner->type == GIMPLE_OMP_ORDERED)
3654 gomp_ordered *ord_stmt = inner->ord_stmt;
3655 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3656 location_t loc = gimple_location (ord_stmt);
3657 tree c;
3658 for (c = gimple_omp_ordered_clauses (ord_stmt);
3659 c; c = OMP_CLAUSE_CHAIN (c))
3660 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SOURCE)
3661 break;
3662 if (c)
3663 expand_omp_ordered_source (&gsi, fd, counts, loc);
3664 for (c = gimple_omp_ordered_clauses (ord_stmt);
3665 c; c = OMP_CLAUSE_CHAIN (c))
3666 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SINK)
3667 expand_omp_ordered_sink (&gsi, fd, counts, c, loc, cont_bb);
3668 gsi_remove (&gsi, true);
3672 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3673 collapsed. */
3675 static basic_block
3676 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3677 basic_block cont_bb, basic_block body_bb,
3678 basic_block l0_bb, bool ordered_lastprivate)
3680 if (fd->ordered == fd->collapse)
3681 return cont_bb;
3683 if (!cont_bb)
3685 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3686 for (int i = fd->collapse; i < fd->ordered; i++)
3688 tree type = TREE_TYPE (fd->loops[i].v);
3689 tree n1 = fold_convert (type, fd->loops[i].n1);
3690 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3691 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3692 size_int (i - fd->collapse + 1),
3693 NULL_TREE, NULL_TREE);
3694 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3696 return NULL;
3699 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3701 tree t, type = TREE_TYPE (fd->loops[i].v);
3702 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3703 if (counts[fd->ordered + 1] && i == fd->collapse)
3704 expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3705 boolean_true_node);
3706 expand_omp_build_assign (&gsi, fd->loops[i].v,
3707 fold_convert (type, fd->loops[i].n1));
3708 if (counts[i])
3709 expand_omp_build_assign (&gsi, counts[i],
3710 build_zero_cst (fd->iter_type));
3711 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3712 size_int (i - fd->collapse + 1),
3713 NULL_TREE, NULL_TREE);
3714 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3715 if (!gsi_end_p (gsi))
3716 gsi_prev (&gsi);
3717 else
3718 gsi = gsi_last_bb (body_bb);
3719 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3720 basic_block new_body = e1->dest;
3721 if (body_bb == cont_bb)
3722 cont_bb = new_body;
3723 edge e2 = NULL;
3724 basic_block new_header;
3725 if (EDGE_COUNT (cont_bb->preds) > 0)
3727 gsi = gsi_last_bb (cont_bb);
3728 if (POINTER_TYPE_P (type))
3729 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3730 else
3731 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3732 fold_convert (type, fd->loops[i].step));
3733 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3734 if (counts[i])
3736 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3737 build_int_cst (fd->iter_type, 1));
3738 expand_omp_build_assign (&gsi, counts[i], t);
3739 t = counts[i];
3741 else
3743 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3744 fd->loops[i].v, fd->loops[i].n1);
3745 t = fold_convert (fd->iter_type, t);
3746 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3747 true, GSI_SAME_STMT);
3749 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3750 size_int (i - fd->collapse + 1),
3751 NULL_TREE, NULL_TREE);
3752 expand_omp_build_assign (&gsi, aref, t);
3753 if (counts[fd->ordered + 1] && i == fd->ordered - 1)
3754 expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3755 boolean_false_node);
3756 gsi_prev (&gsi);
3757 e2 = split_block (cont_bb, gsi_stmt (gsi));
3758 new_header = e2->dest;
3760 else
3761 new_header = cont_bb;
3762 gsi = gsi_after_labels (new_header);
3763 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3764 true, GSI_SAME_STMT);
3765 tree n2
3766 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3767 true, NULL_TREE, true, GSI_SAME_STMT);
3768 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3769 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3770 edge e3 = split_block (new_header, gsi_stmt (gsi));
3771 cont_bb = e3->dest;
3772 remove_edge (e1);
3773 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3774 e3->flags = EDGE_FALSE_VALUE;
3775 e3->probability = profile_probability::guessed_always () / 8;
3776 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3777 e1->probability = e3->probability.invert ();
3779 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3780 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3782 if (e2)
3784 class loop *loop = alloc_loop ();
3785 loop->header = new_header;
3786 loop->latch = e2->src;
3787 add_loop (loop, l0_bb->loop_father);
3791 /* If there are any lastprivate clauses and it is possible some loops
3792 might have zero iterations, ensure all the decls are initialized,
3793 otherwise we could crash evaluating C++ class iterators with lastprivate
3794 clauses. */
3795 bool need_inits = false;
3796 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3797 if (need_inits)
3799 tree type = TREE_TYPE (fd->loops[i].v);
3800 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3801 expand_omp_build_assign (&gsi, fd->loops[i].v,
3802 fold_convert (type, fd->loops[i].n1));
3804 else
3806 tree type = TREE_TYPE (fd->loops[i].v);
3807 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3808 boolean_type_node,
3809 fold_convert (type, fd->loops[i].n1),
3810 fold_convert (type, fd->loops[i].n2));
3811 if (!integer_onep (this_cond))
3812 need_inits = true;
3815 return cont_bb;
3818 /* A subroutine of expand_omp_for. Generate code for a parallel
3819 loop with any schedule. Given parameters:
3821 for (V = N1; V cond N2; V += STEP) BODY;
3823 where COND is "<" or ">", we generate pseudocode
3825 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3826 if (more) goto L0; else goto L3;
3828 V = istart0;
3829 iend = iend0;
3831 BODY;
3832 V += STEP;
3833 if (V cond iend) goto L1; else goto L2;
3835 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3838 If this is a combined omp parallel loop, instead of the call to
3839 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3840 If this is gimple_omp_for_combined_p loop, then instead of assigning
3841 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3842 inner GIMPLE_OMP_FOR and V += STEP; and
3843 if (V cond iend) goto L1; else goto L2; are removed.
3845 For collapsed loops, given parameters:
3846 collapse(3)
3847 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3848 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3849 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3850 BODY;
3852 we generate pseudocode
3854 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3855 if (cond3 is <)
3856 adj = STEP3 - 1;
3857 else
3858 adj = STEP3 + 1;
3859 count3 = (adj + N32 - N31) / STEP3;
3860 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3861 if (cond2 is <)
3862 adj = STEP2 - 1;
3863 else
3864 adj = STEP2 + 1;
3865 count2 = (adj + N22 - N21) / STEP2;
3866 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3867 if (cond1 is <)
3868 adj = STEP1 - 1;
3869 else
3870 adj = STEP1 + 1;
3871 count1 = (adj + N12 - N11) / STEP1;
3872 count = count1 * count2 * count3;
3873 goto Z1;
3875 count = 0;
3877 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3878 if (more) goto L0; else goto L3;
3880 V = istart0;
3881 T = V;
3882 V3 = N31 + (T % count3) * STEP3;
3883 T = T / count3;
3884 V2 = N21 + (T % count2) * STEP2;
3885 T = T / count2;
3886 V1 = N11 + T * STEP1;
3887 iend = iend0;
3889 BODY;
3890 V += 1;
3891 if (V < iend) goto L10; else goto L2;
3892 L10:
3893 V3 += STEP3;
3894 if (V3 cond3 N32) goto L1; else goto L11;
3895 L11:
3896 V3 = N31;
3897 V2 += STEP2;
3898 if (V2 cond2 N22) goto L1; else goto L12;
3899 L12:
3900 V2 = N21;
3901 V1 += STEP1;
3902 goto L1;
3904 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3909 static void
3910 expand_omp_for_generic (struct omp_region *region,
3911 struct omp_for_data *fd,
3912 enum built_in_function start_fn,
3913 enum built_in_function next_fn,
3914 tree sched_arg,
3915 gimple *inner_stmt)
3917 tree type, istart0, iend0, iend;
3918 tree t, vmain, vback, bias = NULL_TREE;
3919 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3920 basic_block l2_bb = NULL, l3_bb = NULL;
3921 gimple_stmt_iterator gsi;
3922 gassign *assign_stmt;
3923 bool in_combined_parallel = is_combined_parallel (region);
3924 bool broken_loop = region->cont == NULL;
3925 edge e, ne;
3926 tree *counts = NULL;
3927 int i;
3928 bool ordered_lastprivate = false;
3930 gcc_assert (!broken_loop || !in_combined_parallel);
3931 gcc_assert (fd->iter_type == long_integer_type_node
3932 || !in_combined_parallel);
3934 entry_bb = region->entry;
3935 cont_bb = region->cont;
3936 collapse_bb = NULL;
3937 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3938 gcc_assert (broken_loop
3939 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3940 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3941 l1_bb = single_succ (l0_bb);
3942 if (!broken_loop)
3944 l2_bb = create_empty_bb (cont_bb);
3945 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3946 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3947 == l1_bb));
3948 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3950 else
3951 l2_bb = NULL;
3952 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3953 exit_bb = region->exit;
3955 gsi = gsi_last_nondebug_bb (entry_bb);
3957 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3958 if (fd->ordered
3959 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3960 OMP_CLAUSE_LASTPRIVATE))
3961 ordered_lastprivate = false;
3962 tree reductions = NULL_TREE;
3963 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3964 tree memv = NULL_TREE;
3965 if (fd->lastprivate_conditional)
3967 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3968 OMP_CLAUSE__CONDTEMP_);
3969 if (fd->have_pointer_condtemp)
3970 condtemp = OMP_CLAUSE_DECL (c);
3971 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3972 cond_var = OMP_CLAUSE_DECL (c);
3974 if (sched_arg)
3976 if (fd->have_reductemp)
3978 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3979 OMP_CLAUSE__REDUCTEMP_);
3980 reductions = OMP_CLAUSE_DECL (c);
3981 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3982 gimple *g = SSA_NAME_DEF_STMT (reductions);
3983 reductions = gimple_assign_rhs1 (g);
3984 OMP_CLAUSE_DECL (c) = reductions;
3985 entry_bb = gimple_bb (g);
3986 edge e = split_block (entry_bb, g);
3987 if (region->entry == entry_bb)
3988 region->entry = e->dest;
3989 gsi = gsi_last_bb (entry_bb);
3991 else
3992 reductions = null_pointer_node;
3993 if (fd->have_pointer_condtemp)
3995 tree type = TREE_TYPE (condtemp);
3996 memv = create_tmp_var (type);
3997 TREE_ADDRESSABLE (memv) = 1;
3998 unsigned HOST_WIDE_INT sz
3999 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4000 sz *= fd->lastprivate_conditional;
4001 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
4002 false);
4003 mem = build_fold_addr_expr (memv);
4005 else
4006 mem = null_pointer_node;
4008 if (fd->collapse > 1 || fd->ordered)
4010 int first_zero_iter1 = -1, first_zero_iter2 = -1;
4011 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
4013 counts = XALLOCAVEC (tree, fd->ordered
4014 ? fd->ordered + 2
4015 + (fd->ordered - fd->collapse)
4016 : fd->collapse);
4017 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4018 zero_iter1_bb, first_zero_iter1,
4019 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
4021 if (zero_iter1_bb)
4023 /* Some counts[i] vars might be uninitialized if
4024 some loop has zero iterations. But the body shouldn't
4025 be executed in that case, so just avoid uninit warnings. */
4026 for (i = first_zero_iter1;
4027 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
4028 if (SSA_VAR_P (counts[i]))
4029 suppress_warning (counts[i], OPT_Wuninitialized);
4030 gsi_prev (&gsi);
4031 e = split_block (entry_bb, gsi_stmt (gsi));
4032 entry_bb = e->dest;
4033 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
4034 gsi = gsi_last_nondebug_bb (entry_bb);
4035 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4036 get_immediate_dominator (CDI_DOMINATORS,
4037 zero_iter1_bb));
4039 if (zero_iter2_bb)
4041 /* Some counts[i] vars might be uninitialized if
4042 some loop has zero iterations. But the body shouldn't
4043 be executed in that case, so just avoid uninit warnings. */
4044 for (i = first_zero_iter2; i < fd->ordered; i++)
4045 if (SSA_VAR_P (counts[i]))
4046 suppress_warning (counts[i], OPT_Wuninitialized);
4047 if (zero_iter1_bb)
4048 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4049 else
4051 gsi_prev (&gsi);
4052 e = split_block (entry_bb, gsi_stmt (gsi));
4053 entry_bb = e->dest;
4054 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4055 gsi = gsi_last_nondebug_bb (entry_bb);
4056 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4057 get_immediate_dominator
4058 (CDI_DOMINATORS, zero_iter2_bb));
4061 if (fd->collapse == 1)
4063 counts[0] = fd->loop.n2;
4064 fd->loop = fd->loops[0];
4068 type = TREE_TYPE (fd->loop.v);
4069 istart0 = create_tmp_var (fd->iter_type, ".istart0");
4070 iend0 = create_tmp_var (fd->iter_type, ".iend0");
4071 TREE_ADDRESSABLE (istart0) = 1;
4072 TREE_ADDRESSABLE (iend0) = 1;
4074 /* See if we need to bias by LLONG_MIN. */
4075 if (fd->iter_type == long_long_unsigned_type_node
4076 && TREE_CODE (type) == INTEGER_TYPE
4077 && !TYPE_UNSIGNED (type)
4078 && fd->ordered == 0)
4080 tree n1, n2;
4082 if (fd->loop.cond_code == LT_EXPR)
4084 n1 = fd->loop.n1;
4085 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4087 else
4089 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4090 n2 = fd->loop.n1;
4092 if (TREE_CODE (n1) != INTEGER_CST
4093 || TREE_CODE (n2) != INTEGER_CST
4094 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4095 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4098 gimple_stmt_iterator gsif = gsi;
4099 gsi_prev (&gsif);
4101 tree arr = NULL_TREE;
4102 if (in_combined_parallel)
4104 gcc_assert (fd->ordered == 0);
4105 /* In a combined parallel loop, emit a call to
4106 GOMP_loop_foo_next. */
4107 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4108 build_fold_addr_expr (istart0),
4109 build_fold_addr_expr (iend0));
4111 else
4113 tree t0, t1, t2, t3, t4;
4114 /* If this is not a combined parallel loop, emit a call to
4115 GOMP_loop_foo_start in ENTRY_BB. */
4116 t4 = build_fold_addr_expr (iend0);
4117 t3 = build_fold_addr_expr (istart0);
4118 if (fd->ordered)
4120 t0 = build_int_cst (unsigned_type_node,
4121 fd->ordered - fd->collapse + 1);
4122 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4123 fd->ordered
4124 - fd->collapse + 1),
4125 ".omp_counts");
4126 DECL_NAMELESS (arr) = 1;
4127 TREE_ADDRESSABLE (arr) = 1;
4128 TREE_STATIC (arr) = 1;
4129 vec<constructor_elt, va_gc> *v;
4130 vec_alloc (v, fd->ordered - fd->collapse + 1);
4131 int idx;
4133 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4135 tree c;
4136 if (idx == 0 && fd->collapse > 1)
4137 c = fd->loop.n2;
4138 else
4139 c = counts[idx + fd->collapse - 1];
4140 tree purpose = size_int (idx);
4141 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4142 if (TREE_CODE (c) != INTEGER_CST)
4143 TREE_STATIC (arr) = 0;
4146 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4147 if (!TREE_STATIC (arr))
4148 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4149 void_type_node, arr),
4150 true, NULL_TREE, true, GSI_SAME_STMT);
4151 t1 = build_fold_addr_expr (arr);
4152 t2 = NULL_TREE;
4154 else
4156 t2 = fold_convert (fd->iter_type, fd->loop.step);
4157 t1 = fd->loop.n2;
4158 t0 = fd->loop.n1;
4159 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4161 tree innerc
4162 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4163 OMP_CLAUSE__LOOPTEMP_);
4164 gcc_assert (innerc);
4165 t0 = OMP_CLAUSE_DECL (innerc);
4166 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4167 OMP_CLAUSE__LOOPTEMP_);
4168 gcc_assert (innerc);
4169 t1 = OMP_CLAUSE_DECL (innerc);
4171 if (POINTER_TYPE_P (TREE_TYPE (t0))
4172 && TYPE_PRECISION (TREE_TYPE (t0))
4173 != TYPE_PRECISION (fd->iter_type))
4175 /* Avoid casting pointers to integer of a different size. */
4176 tree itype = signed_type_for (type);
4177 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4178 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4180 else
4182 t1 = fold_convert (fd->iter_type, t1);
4183 t0 = fold_convert (fd->iter_type, t0);
4185 if (bias)
4187 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4188 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4191 if (fd->iter_type == long_integer_type_node || fd->ordered)
4193 if (fd->chunk_size)
4195 t = fold_convert (fd->iter_type, fd->chunk_size);
4196 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4197 if (sched_arg)
4199 if (fd->ordered)
4200 t = build_call_expr (builtin_decl_explicit (start_fn),
4201 8, t0, t1, sched_arg, t, t3, t4,
4202 reductions, mem);
4203 else
4204 t = build_call_expr (builtin_decl_explicit (start_fn),
4205 9, t0, t1, t2, sched_arg, t, t3, t4,
4206 reductions, mem);
4208 else if (fd->ordered)
4209 t = build_call_expr (builtin_decl_explicit (start_fn),
4210 5, t0, t1, t, t3, t4);
4211 else
4212 t = build_call_expr (builtin_decl_explicit (start_fn),
4213 6, t0, t1, t2, t, t3, t4);
4215 else if (fd->ordered)
4216 t = build_call_expr (builtin_decl_explicit (start_fn),
4217 4, t0, t1, t3, t4);
4218 else
4219 t = build_call_expr (builtin_decl_explicit (start_fn),
4220 5, t0, t1, t2, t3, t4);
4222 else
4224 tree t5;
4225 tree c_bool_type;
4226 tree bfn_decl;
4228 /* The GOMP_loop_ull_*start functions have additional boolean
4229 argument, true for < loops and false for > loops.
4230 In Fortran, the C bool type can be different from
4231 boolean_type_node. */
4232 bfn_decl = builtin_decl_explicit (start_fn);
4233 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4234 t5 = build_int_cst (c_bool_type,
4235 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4236 if (fd->chunk_size)
4238 tree bfn_decl = builtin_decl_explicit (start_fn);
4239 t = fold_convert (fd->iter_type, fd->chunk_size);
4240 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4241 if (sched_arg)
4242 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4243 t, t3, t4, reductions, mem);
4244 else
4245 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4247 else
4248 t = build_call_expr (builtin_decl_explicit (start_fn),
4249 6, t5, t0, t1, t2, t3, t4);
4252 if (TREE_TYPE (t) != boolean_type_node)
4253 t = fold_build2 (NE_EXPR, boolean_type_node,
4254 t, build_int_cst (TREE_TYPE (t), 0));
4255 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4256 true, GSI_SAME_STMT);
4257 if (arr && !TREE_STATIC (arr))
4259 tree clobber = build_clobber (TREE_TYPE (arr));
4260 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4261 GSI_SAME_STMT);
4263 if (fd->have_pointer_condtemp)
4264 expand_omp_build_assign (&gsi, condtemp, memv, false);
4265 if (fd->have_reductemp)
4267 gimple *g = gsi_stmt (gsi);
4268 gsi_remove (&gsi, true);
4269 release_ssa_name (gimple_assign_lhs (g));
4271 entry_bb = region->entry;
4272 gsi = gsi_last_nondebug_bb (entry_bb);
4274 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4276 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4278 /* Remove the GIMPLE_OMP_FOR statement. */
4279 gsi_remove (&gsi, true);
4281 if (gsi_end_p (gsif))
4282 gsif = gsi_after_labels (gsi_bb (gsif));
4283 gsi_next (&gsif);
4285 /* Iteration setup for sequential loop goes in L0_BB. */
4286 tree startvar = fd->loop.v;
4287 tree endvar = NULL_TREE;
4289 if (gimple_omp_for_combined_p (fd->for_stmt))
4291 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4292 && gimple_omp_for_kind (inner_stmt)
4293 == GF_OMP_FOR_KIND_SIMD);
4294 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4295 OMP_CLAUSE__LOOPTEMP_);
4296 gcc_assert (innerc);
4297 startvar = OMP_CLAUSE_DECL (innerc);
4298 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4299 OMP_CLAUSE__LOOPTEMP_);
4300 gcc_assert (innerc);
4301 endvar = OMP_CLAUSE_DECL (innerc);
4304 gsi = gsi_start_bb (l0_bb);
4305 t = istart0;
4306 if (fd->ordered && fd->collapse == 1)
4307 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4308 fold_convert (fd->iter_type, fd->loop.step));
4309 else if (bias)
4310 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4311 if (fd->ordered && fd->collapse == 1)
4313 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4314 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4315 fd->loop.n1, fold_convert (sizetype, t));
4316 else
4318 t = fold_convert (TREE_TYPE (startvar), t);
4319 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4320 fd->loop.n1, t);
4323 else
4325 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4326 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4327 t = fold_convert (TREE_TYPE (startvar), t);
4329 t = force_gimple_operand_gsi (&gsi, t,
4330 DECL_P (startvar)
4331 && TREE_ADDRESSABLE (startvar),
4332 NULL_TREE, false, GSI_CONTINUE_LINKING);
4333 assign_stmt = gimple_build_assign (startvar, t);
4334 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4335 if (cond_var)
4337 tree itype = TREE_TYPE (cond_var);
4338 /* For lastprivate(conditional:) itervar, we need some iteration
4339 counter that starts at unsigned non-zero and increases.
4340 Prefer as few IVs as possible, so if we can use startvar
4341 itself, use that, or startvar + constant (those would be
4342 incremented with step), and as last resort use the s0 + 1
4343 incremented by 1. */
4344 if ((fd->ordered && fd->collapse == 1)
4345 || bias
4346 || POINTER_TYPE_P (type)
4347 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4348 || fd->loop.cond_code != LT_EXPR)
4349 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4350 build_int_cst (itype, 1));
4351 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4352 t = fold_convert (itype, t);
4353 else
4355 tree c = fold_convert (itype, fd->loop.n1);
4356 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4357 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4359 t = force_gimple_operand_gsi (&gsi, t, false,
4360 NULL_TREE, false, GSI_CONTINUE_LINKING);
4361 assign_stmt = gimple_build_assign (cond_var, t);
4362 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4365 t = iend0;
4366 if (fd->ordered && fd->collapse == 1)
4367 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4368 fold_convert (fd->iter_type, fd->loop.step));
4369 else if (bias)
4370 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4371 if (fd->ordered && fd->collapse == 1)
4373 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4374 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4375 fd->loop.n1, fold_convert (sizetype, t));
4376 else
4378 t = fold_convert (TREE_TYPE (startvar), t);
4379 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4380 fd->loop.n1, t);
4383 else
4385 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4386 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4387 t = fold_convert (TREE_TYPE (startvar), t);
4389 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4390 false, GSI_CONTINUE_LINKING);
4391 if (endvar)
4393 assign_stmt = gimple_build_assign (endvar, iend);
4394 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4395 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4396 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4397 else
4398 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4399 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4401 /* Handle linear clause adjustments. */
4402 tree itercnt = NULL_TREE;
4403 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4404 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4405 c; c = OMP_CLAUSE_CHAIN (c))
4406 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4407 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4409 tree d = OMP_CLAUSE_DECL (c);
4410 tree t = d, a, dest;
4411 if (omp_privatize_by_reference (t))
4412 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4413 tree type = TREE_TYPE (t);
4414 if (POINTER_TYPE_P (type))
4415 type = sizetype;
4416 dest = unshare_expr (t);
4417 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4418 expand_omp_build_assign (&gsif, v, t);
4419 if (itercnt == NULL_TREE)
4421 itercnt = startvar;
4422 tree n1 = fd->loop.n1;
4423 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4425 itercnt
4426 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4427 itercnt);
4428 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4430 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4431 itercnt, n1);
4432 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4433 itercnt, fd->loop.step);
4434 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4435 NULL_TREE, false,
4436 GSI_CONTINUE_LINKING);
4438 a = fold_build2 (MULT_EXPR, type,
4439 fold_convert (type, itercnt),
4440 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4441 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4442 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4443 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4444 false, GSI_CONTINUE_LINKING);
4445 expand_omp_build_assign (&gsi, dest, t, true);
4447 if (fd->collapse > 1)
4448 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4450 if (fd->ordered)
4452 /* Until now, counts array contained number of iterations or
4453 variable containing it for ith loop. From now on, we usually need
4454 those counts only for collapsed loops, and only for the 2nd
4455 till the last collapsed one. Move those one element earlier,
4456 we'll use counts[fd->collapse - 1] for the first source/sink
4457 iteration counter and so on and counts[fd->ordered]
4458 as the array holding the current counter values for
4459 depend(source). For doacross(sink:omp_cur_iteration - 1) we need
4460 the counts from fd->collapse to fd->ordered - 1; make a copy of
4461 those to counts[fd->ordered + 2] and onwards.
4462 counts[fd->ordered + 1] can be a flag whether it is the first
4463 iteration with a new collapsed counter (used only if
4464 fd->ordered > fd->collapse). */
4465 if (fd->ordered > fd->collapse)
4466 memcpy (counts + fd->ordered + 2, counts + fd->collapse,
4467 (fd->ordered - fd->collapse) * sizeof (counts[0]));
4468 if (fd->collapse > 1)
4469 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4470 if (broken_loop)
4472 int i;
4473 for (i = fd->collapse; i < fd->ordered; i++)
4475 tree type = TREE_TYPE (fd->loops[i].v);
4476 tree this_cond
4477 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4478 fold_convert (type, fd->loops[i].n1),
4479 fold_convert (type, fd->loops[i].n2));
4480 if (!integer_onep (this_cond))
4481 break;
4483 if (i < fd->ordered)
4485 if (entry_bb->loop_father != l0_bb->loop_father)
4487 remove_bb_from_loops (l0_bb);
4488 add_bb_to_loop (l0_bb, entry_bb->loop_father);
4489 gcc_assert (single_succ (l0_bb) == l1_bb);
4491 cont_bb
4492 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4493 add_bb_to_loop (cont_bb, l0_bb->loop_father);
4494 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4495 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4496 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4497 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4498 make_edge (cont_bb, l1_bb, 0);
4499 l2_bb = create_empty_bb (cont_bb);
4500 broken_loop = false;
4503 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4504 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4505 l0_bb, ordered_lastprivate);
4506 if (counts[fd->collapse - 1])
4508 gcc_assert (fd->collapse == 1);
4509 gsi = gsi_last_bb (l0_bb);
4510 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4511 istart0, true);
4512 if (cont_bb)
4514 gsi = gsi_last_bb (cont_bb);
4515 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4516 counts[fd->collapse - 1],
4517 build_int_cst (fd->iter_type, 1));
4518 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4519 tree aref = build4 (ARRAY_REF, fd->iter_type,
4520 counts[fd->ordered], size_zero_node,
4521 NULL_TREE, NULL_TREE);
4522 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4524 t = counts[fd->collapse - 1];
4526 else if (fd->collapse > 1)
4527 t = fd->loop.v;
4528 else
4530 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4531 fd->loops[0].v, fd->loops[0].n1);
4532 t = fold_convert (fd->iter_type, t);
4534 gsi = gsi_last_bb (l0_bb);
4535 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4536 size_zero_node, NULL_TREE, NULL_TREE);
4537 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4538 false, GSI_CONTINUE_LINKING);
4539 expand_omp_build_assign (&gsi, aref, t, true);
4542 if (!broken_loop)
4544 /* Code to control the increment and predicate for the sequential
4545 loop goes in the CONT_BB. */
4546 gsi = gsi_last_nondebug_bb (cont_bb);
4547 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4548 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4549 vmain = gimple_omp_continue_control_use (cont_stmt);
4550 vback = gimple_omp_continue_control_def (cont_stmt);
4552 if (cond_var)
4554 tree itype = TREE_TYPE (cond_var);
4555 tree t2;
4556 if ((fd->ordered && fd->collapse == 1)
4557 || bias
4558 || POINTER_TYPE_P (type)
4559 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4560 || fd->loop.cond_code != LT_EXPR)
4561 t2 = build_int_cst (itype, 1);
4562 else
4563 t2 = fold_convert (itype, fd->loop.step);
4564 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4565 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4566 NULL_TREE, true, GSI_SAME_STMT);
4567 assign_stmt = gimple_build_assign (cond_var, t2);
4568 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4571 if (!gimple_omp_for_combined_p (fd->for_stmt))
4573 if (POINTER_TYPE_P (type))
4574 t = fold_build_pointer_plus (vmain, fd->loop.step);
4575 else
4576 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4577 t = force_gimple_operand_gsi (&gsi, t,
4578 DECL_P (vback)
4579 && TREE_ADDRESSABLE (vback),
4580 NULL_TREE, true, GSI_SAME_STMT);
4581 assign_stmt = gimple_build_assign (vback, t);
4582 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4584 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4586 tree tem;
4587 if (fd->collapse > 1)
4588 tem = fd->loop.v;
4589 else
4591 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4592 fd->loops[0].v, fd->loops[0].n1);
4593 tem = fold_convert (fd->iter_type, tem);
4595 tree aref = build4 (ARRAY_REF, fd->iter_type,
4596 counts[fd->ordered], size_zero_node,
4597 NULL_TREE, NULL_TREE);
4598 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4599 true, GSI_SAME_STMT);
4600 expand_omp_build_assign (&gsi, aref, tem);
4603 t = build2 (fd->loop.cond_code, boolean_type_node,
4604 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4605 iend);
4606 gcond *cond_stmt = gimple_build_cond_empty (t);
4607 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4610 /* Remove GIMPLE_OMP_CONTINUE. */
4611 gsi_remove (&gsi, true);
4613 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4614 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4616 /* Emit code to get the next parallel iteration in L2_BB. */
4617 gsi = gsi_start_bb (l2_bb);
4619 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4620 build_fold_addr_expr (istart0),
4621 build_fold_addr_expr (iend0));
4622 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4623 false, GSI_CONTINUE_LINKING);
4624 if (TREE_TYPE (t) != boolean_type_node)
4625 t = fold_build2 (NE_EXPR, boolean_type_node,
4626 t, build_int_cst (TREE_TYPE (t), 0));
4627 gcond *cond_stmt = gimple_build_cond_empty (t);
4628 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4631 /* Add the loop cleanup function. */
4632 gsi = gsi_last_nondebug_bb (exit_bb);
4633 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4634 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4635 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4636 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4637 else
4638 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4639 gcall *call_stmt = gimple_build_call (t, 0);
4640 if (fd->ordered)
4642 tree arr = counts[fd->ordered];
4643 tree clobber = build_clobber (TREE_TYPE (arr));
4644 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4645 GSI_SAME_STMT);
4647 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4649 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4650 if (fd->have_reductemp)
4652 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4653 gimple_call_lhs (call_stmt));
4654 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4657 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4658 gsi_remove (&gsi, true);
4660 /* Connect the new blocks. */
4661 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4662 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4664 if (!broken_loop)
4666 gimple_seq phis;
4668 e = find_edge (cont_bb, l3_bb);
4669 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4671 phis = phi_nodes (l3_bb);
4672 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4674 gimple *phi = gsi_stmt (gsi);
4675 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4676 PHI_ARG_DEF_FROM_EDGE (phi, e));
4678 remove_edge (e);
4680 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4681 e = find_edge (cont_bb, l1_bb);
4682 if (e == NULL)
4684 e = BRANCH_EDGE (cont_bb);
4685 gcc_assert (single_succ (e->dest) == l1_bb);
4687 if (gimple_omp_for_combined_p (fd->for_stmt))
4689 remove_edge (e);
4690 e = NULL;
4692 else if (fd->collapse > 1)
4694 remove_edge (e);
4695 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4697 else
4698 e->flags = EDGE_TRUE_VALUE;
4699 if (e)
4701 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4702 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4704 else
4706 e = find_edge (cont_bb, l2_bb);
4707 e->flags = EDGE_FALLTHRU;
4709 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4711 if (gimple_in_ssa_p (cfun))
4713 /* Add phis to the outer loop that connect to the phis in the inner,
4714 original loop, and move the loop entry value of the inner phi to
4715 the loop entry value of the outer phi. */
4716 gphi_iterator psi;
4717 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4719 location_t locus;
4720 gphi *nphi;
4721 gphi *exit_phi = psi.phi ();
4723 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4724 continue;
4726 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4727 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4729 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4730 edge latch_to_l1 = find_edge (latch, l1_bb);
4731 gphi *inner_phi
4732 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4734 tree t = gimple_phi_result (exit_phi);
4735 tree new_res = copy_ssa_name (t, NULL);
4736 nphi = create_phi_node (new_res, l0_bb);
4738 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4739 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4740 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4741 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4742 add_phi_arg (nphi, t, entry_to_l0, locus);
4744 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4745 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4747 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4751 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4752 recompute_dominator (CDI_DOMINATORS, l2_bb));
4753 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4754 recompute_dominator (CDI_DOMINATORS, l3_bb));
4755 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4756 recompute_dominator (CDI_DOMINATORS, l0_bb));
4757 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4758 recompute_dominator (CDI_DOMINATORS, l1_bb));
4760 /* We enter expand_omp_for_generic with a loop. This original loop may
4761 have its own loop struct, or it may be part of an outer loop struct
4762 (which may be the fake loop). */
4763 class loop *outer_loop = entry_bb->loop_father;
4764 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4766 add_bb_to_loop (l2_bb, outer_loop);
4768 /* We've added a new loop around the original loop. Allocate the
4769 corresponding loop struct. */
4770 class loop *new_loop = alloc_loop ();
4771 new_loop->header = l0_bb;
4772 new_loop->latch = l2_bb;
4773 add_loop (new_loop, outer_loop);
4775 /* Allocate a loop structure for the original loop unless we already
4776 had one. */
4777 if (!orig_loop_has_loop_struct
4778 && !gimple_omp_for_combined_p (fd->for_stmt))
4780 class loop *orig_loop = alloc_loop ();
4781 orig_loop->header = l1_bb;
4782 /* The loop may have multiple latches. */
4783 add_loop (orig_loop, new_loop);
4788 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4789 compute needed allocation size. If !ALLOC of team allocations,
4790 if ALLOC of thread allocation. SZ is the initial needed size for
4791 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4792 CNT number of elements of each array, for !ALLOC this is
4793 omp_get_num_threads (), for ALLOC number of iterations handled by the
4794 current thread. If PTR is non-NULL, it is the start of the allocation
4795 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4796 clauses pointers to the corresponding arrays. */
4798 static tree
4799 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4800 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4801 gimple_stmt_iterator *gsi, bool alloc)
4803 tree eltsz = NULL_TREE;
4804 unsigned HOST_WIDE_INT preval = 0;
4805 if (ptr && sz)
4806 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4807 ptr, size_int (sz));
4808 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4809 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4810 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4811 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4813 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4814 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4815 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4817 unsigned HOST_WIDE_INT szl
4818 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4819 szl = least_bit_hwi (szl);
4820 if (szl)
4821 al = MIN (al, szl);
4823 if (ptr == NULL_TREE)
4825 if (eltsz == NULL_TREE)
4826 eltsz = TYPE_SIZE_UNIT (pointee_type);
4827 else
4828 eltsz = size_binop (PLUS_EXPR, eltsz,
4829 TYPE_SIZE_UNIT (pointee_type));
4831 if (preval == 0 && al <= alloc_align)
4833 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4834 sz += diff;
4835 if (diff && ptr)
4836 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4837 ptr, size_int (diff));
4839 else if (al > preval)
4841 if (ptr)
4843 ptr = fold_convert (pointer_sized_int_node, ptr);
4844 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4845 build_int_cst (pointer_sized_int_node,
4846 al - 1));
4847 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4848 build_int_cst (pointer_sized_int_node,
4849 -(HOST_WIDE_INT) al));
4850 ptr = fold_convert (ptr_type_node, ptr);
4852 else
4853 sz += al - 1;
4855 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4856 preval = al;
4857 else
4858 preval = 1;
4859 if (ptr)
4861 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4862 ptr = OMP_CLAUSE_DECL (c);
4863 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4864 size_binop (MULT_EXPR, cnt,
4865 TYPE_SIZE_UNIT (pointee_type)));
4869 if (ptr == NULL_TREE)
4871 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4872 if (sz)
4873 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4874 return eltsz;
4876 else
4877 return ptr;
4880 /* Return the last _looptemp_ clause if one has been created for
4881 lastprivate on distribute parallel for{, simd} or taskloop.
4882 FD is the loop data and INNERC should be the second _looptemp_
4883 clause (the one holding the end of the range).
4884 This is followed by collapse - 1 _looptemp_ clauses for the
4885 counts[1] and up, and for triangular loops followed by 4
4886 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4887 one factor and one adjn1). After this there is optionally one
4888 _looptemp_ clause that this function returns. */
4890 static tree
4891 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4893 gcc_assert (innerc);
4894 int count = fd->collapse - 1;
4895 if (fd->non_rect
4896 && fd->last_nonrect == fd->first_nonrect + 1
4897 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4898 count += 4;
4899 for (int i = 0; i < count; i++)
4901 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4902 OMP_CLAUSE__LOOPTEMP_);
4903 gcc_assert (innerc);
4905 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4906 OMP_CLAUSE__LOOPTEMP_);
4909 /* A subroutine of expand_omp_for. Generate code for a parallel
4910 loop with static schedule and no specified chunk size. Given
4911 parameters:
4913 for (V = N1; V cond N2; V += STEP) BODY;
4915 where COND is "<" or ">", we generate pseudocode
4917 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4918 if (cond is <)
4919 adj = STEP - 1;
4920 else
4921 adj = STEP + 1;
4922 if ((__typeof (V)) -1 > 0 && cond is >)
4923 n = -(adj + N2 - N1) / -STEP;
4924 else
4925 n = (adj + N2 - N1) / STEP;
4926 q = n / nthreads;
4927 tt = n % nthreads;
4928 if (threadid < tt) goto L3; else goto L4;
4930 tt = 0;
4931 q = q + 1;
4933 s0 = q * threadid + tt;
4934 e0 = s0 + q;
4935 V = s0 * STEP + N1;
4936 if (s0 >= e0) goto L2; else goto L0;
4938 e = e0 * STEP + N1;
4940 BODY;
4941 V += STEP;
4942 if (V cond e) goto L1;
4946 static void
4947 expand_omp_for_static_nochunk (struct omp_region *region,
4948 struct omp_for_data *fd,
4949 gimple *inner_stmt)
4951 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4952 tree type, itype, vmain, vback;
4953 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4954 basic_block body_bb, cont_bb, collapse_bb = NULL;
4955 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4956 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4957 gimple_stmt_iterator gsi, gsip;
4958 edge ep;
4959 bool broken_loop = region->cont == NULL;
4960 tree *counts = NULL;
4961 tree n1, n2, step;
4962 tree reductions = NULL_TREE;
4963 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4965 itype = type = TREE_TYPE (fd->loop.v);
4966 if (POINTER_TYPE_P (type))
4967 itype = signed_type_for (type);
4969 entry_bb = region->entry;
4970 cont_bb = region->cont;
4971 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4972 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4973 gcc_assert (broken_loop
4974 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4975 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4976 body_bb = single_succ (seq_start_bb);
4977 if (!broken_loop)
4979 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4980 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4981 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4983 exit_bb = region->exit;
4985 /* Iteration space partitioning goes in ENTRY_BB. */
4986 gsi = gsi_last_nondebug_bb (entry_bb);
4987 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4988 gsip = gsi;
4989 gsi_prev (&gsip);
4991 if (fd->collapse > 1)
4993 int first_zero_iter = -1, dummy = -1;
4994 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4996 counts = XALLOCAVEC (tree, fd->collapse);
4997 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4998 fin_bb, first_zero_iter,
4999 dummy_bb, dummy, l2_dom_bb);
5000 t = NULL_TREE;
5002 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5003 t = integer_one_node;
5004 else
5005 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5006 fold_convert (type, fd->loop.n1),
5007 fold_convert (type, fd->loop.n2));
5008 if (fd->collapse == 1
5009 && TYPE_UNSIGNED (type)
5010 && (t == NULL_TREE || !integer_onep (t)))
5012 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5013 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5014 true, GSI_SAME_STMT);
5015 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5016 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5017 true, GSI_SAME_STMT);
5018 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5019 n1, n2);
5020 ep = split_block (entry_bb, cond_stmt);
5021 ep->flags = EDGE_TRUE_VALUE;
5022 entry_bb = ep->dest;
5023 ep->probability = profile_probability::very_likely ();
5024 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
5025 ep->probability = profile_probability::very_unlikely ();
5026 if (gimple_in_ssa_p (cfun))
5028 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
5029 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5030 !gsi_end_p (gpi); gsi_next (&gpi))
5032 gphi *phi = gpi.phi ();
5033 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5034 ep, UNKNOWN_LOCATION);
5037 gsi = gsi_last_bb (entry_bb);
5040 if (fd->lastprivate_conditional)
5042 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5043 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5044 if (fd->have_pointer_condtemp)
5045 condtemp = OMP_CLAUSE_DECL (c);
5046 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5047 cond_var = OMP_CLAUSE_DECL (c);
5049 if (fd->have_reductemp
5050 /* For scan, we don't want to reinitialize condtemp before the
5051 second loop. */
5052 || (fd->have_pointer_condtemp && !fd->have_scantemp)
5053 || fd->have_nonctrl_scantemp)
5055 tree t1 = build_int_cst (long_integer_type_node, 0);
5056 tree t2 = build_int_cst (long_integer_type_node, 1);
5057 tree t3 = build_int_cstu (long_integer_type_node,
5058 (HOST_WIDE_INT_1U << 31) + 1);
5059 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5060 gimple_stmt_iterator gsi2 = gsi_none ();
5061 gimple *g = NULL;
5062 tree mem = null_pointer_node, memv = NULL_TREE;
5063 unsigned HOST_WIDE_INT condtemp_sz = 0;
5064 unsigned HOST_WIDE_INT alloc_align = 0;
5065 if (fd->have_reductemp)
5067 gcc_assert (!fd->have_nonctrl_scantemp);
5068 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5069 reductions = OMP_CLAUSE_DECL (c);
5070 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5071 g = SSA_NAME_DEF_STMT (reductions);
5072 reductions = gimple_assign_rhs1 (g);
5073 OMP_CLAUSE_DECL (c) = reductions;
5074 gsi2 = gsi_for_stmt (g);
5076 else
5078 if (gsi_end_p (gsip))
5079 gsi2 = gsi_after_labels (region->entry);
5080 else
5081 gsi2 = gsip;
5082 reductions = null_pointer_node;
5084 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
5086 tree type;
5087 if (fd->have_pointer_condtemp)
5088 type = TREE_TYPE (condtemp);
5089 else
5090 type = ptr_type_node;
5091 memv = create_tmp_var (type);
5092 TREE_ADDRESSABLE (memv) = 1;
5093 unsigned HOST_WIDE_INT sz = 0;
5094 tree size = NULL_TREE;
5095 if (fd->have_pointer_condtemp)
5097 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5098 sz *= fd->lastprivate_conditional;
5099 condtemp_sz = sz;
5101 if (fd->have_nonctrl_scantemp)
5103 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5104 gimple *g = gimple_build_call (nthreads, 0);
5105 nthreads = create_tmp_var (integer_type_node);
5106 gimple_call_set_lhs (g, nthreads);
5107 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5108 nthreads = fold_convert (sizetype, nthreads);
5109 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5110 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5111 alloc_align, nthreads, NULL,
5112 false);
5113 size = fold_convert (type, size);
5115 else
5116 size = build_int_cst (type, sz);
5117 expand_omp_build_assign (&gsi2, memv, size, false);
5118 mem = build_fold_addr_expr (memv);
5120 tree t
5121 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5122 9, t1, t2, t2, t3, t1, null_pointer_node,
5123 null_pointer_node, reductions, mem);
5124 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5125 true, GSI_SAME_STMT);
5126 if (fd->have_pointer_condtemp)
5127 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5128 if (fd->have_nonctrl_scantemp)
5130 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5131 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5132 alloc_align, nthreads, &gsi2, false);
5134 if (fd->have_reductemp)
5136 gsi_remove (&gsi2, true);
5137 release_ssa_name (gimple_assign_lhs (g));
5140 switch (gimple_omp_for_kind (fd->for_stmt))
5142 case GF_OMP_FOR_KIND_FOR:
5143 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5144 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5145 break;
5146 case GF_OMP_FOR_KIND_DISTRIBUTE:
5147 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5148 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5149 break;
5150 default:
5151 gcc_unreachable ();
5153 nthreads = build_call_expr (nthreads, 0);
5154 nthreads = fold_convert (itype, nthreads);
5155 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5156 true, GSI_SAME_STMT);
5157 threadid = build_call_expr (threadid, 0);
5158 threadid = fold_convert (itype, threadid);
5159 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5160 true, GSI_SAME_STMT);
5162 n1 = fd->loop.n1;
5163 n2 = fd->loop.n2;
5164 step = fd->loop.step;
5165 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5167 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5168 OMP_CLAUSE__LOOPTEMP_);
5169 gcc_assert (innerc);
5170 n1 = OMP_CLAUSE_DECL (innerc);
5171 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5172 OMP_CLAUSE__LOOPTEMP_);
5173 gcc_assert (innerc);
5174 n2 = OMP_CLAUSE_DECL (innerc);
5176 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5177 true, NULL_TREE, true, GSI_SAME_STMT);
5178 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5179 true, NULL_TREE, true, GSI_SAME_STMT);
5180 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5181 true, NULL_TREE, true, GSI_SAME_STMT);
5183 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5184 t = fold_build2 (PLUS_EXPR, itype, step, t);
5185 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5186 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5187 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5188 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5189 fold_build1 (NEGATE_EXPR, itype, t),
5190 fold_build1 (NEGATE_EXPR, itype, step));
5191 else
5192 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5193 t = fold_convert (itype, t);
5194 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5196 q = create_tmp_reg (itype, "q");
5197 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5198 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5199 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5201 tt = create_tmp_reg (itype, "tt");
5202 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5203 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5204 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5206 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5207 gcond *cond_stmt = gimple_build_cond_empty (t);
5208 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5210 second_bb = split_block (entry_bb, cond_stmt)->dest;
5211 gsi = gsi_last_nondebug_bb (second_bb);
5212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5214 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5215 GSI_SAME_STMT);
5216 gassign *assign_stmt
5217 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5218 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5220 third_bb = split_block (second_bb, assign_stmt)->dest;
5221 gsi = gsi_last_nondebug_bb (third_bb);
5222 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5224 if (fd->have_nonctrl_scantemp)
5226 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5227 tree controlp = NULL_TREE, controlb = NULL_TREE;
5228 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5229 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5230 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5232 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5233 controlb = OMP_CLAUSE_DECL (c);
5234 else
5235 controlp = OMP_CLAUSE_DECL (c);
5236 if (controlb && controlp)
5237 break;
5239 gcc_assert (controlp && controlb);
5240 tree cnt = create_tmp_var (sizetype);
5241 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5242 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5243 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5244 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5245 alloc_align, cnt, NULL, true);
5246 tree size = create_tmp_var (sizetype);
5247 expand_omp_build_assign (&gsi, size, sz, false);
5248 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5249 size, size_int (16384));
5250 expand_omp_build_assign (&gsi, controlb, cmp);
5251 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5252 NULL_TREE, NULL_TREE);
5253 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5254 fourth_bb = split_block (third_bb, g)->dest;
5255 gsi = gsi_last_nondebug_bb (fourth_bb);
5256 /* FIXME: Once we have allocators, this should use allocator. */
5257 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5258 gimple_call_set_lhs (g, controlp);
5259 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5260 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5261 &gsi, true);
5262 gsi_prev (&gsi);
5263 g = gsi_stmt (gsi);
5264 fifth_bb = split_block (fourth_bb, g)->dest;
5265 gsi = gsi_last_nondebug_bb (fifth_bb);
5267 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5268 gimple_call_set_lhs (g, controlp);
5269 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5270 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5271 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5272 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5273 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5275 tree tmp = create_tmp_var (sizetype);
5276 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5277 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5278 TYPE_SIZE_UNIT (pointee_type));
5279 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5280 g = gimple_build_call (alloca_decl, 2, tmp,
5281 size_int (TYPE_ALIGN (pointee_type)));
5282 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5283 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5286 sixth_bb = split_block (fifth_bb, g)->dest;
5287 gsi = gsi_last_nondebug_bb (sixth_bb);
5290 t = build2 (MULT_EXPR, itype, q, threadid);
5291 t = build2 (PLUS_EXPR, itype, t, tt);
5292 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5294 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5295 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5297 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5298 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5300 /* Remove the GIMPLE_OMP_FOR statement. */
5301 gsi_remove (&gsi, true);
5303 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5304 gsi = gsi_start_bb (seq_start_bb);
5306 tree startvar = fd->loop.v;
5307 tree endvar = NULL_TREE;
5309 if (gimple_omp_for_combined_p (fd->for_stmt))
5311 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5312 ? gimple_omp_parallel_clauses (inner_stmt)
5313 : gimple_omp_for_clauses (inner_stmt);
5314 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5315 gcc_assert (innerc);
5316 startvar = OMP_CLAUSE_DECL (innerc);
5317 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5318 OMP_CLAUSE__LOOPTEMP_);
5319 gcc_assert (innerc);
5320 endvar = OMP_CLAUSE_DECL (innerc);
5321 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5322 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5324 innerc = find_lastprivate_looptemp (fd, innerc);
5325 if (innerc)
5327 /* If needed (distribute parallel for with lastprivate),
5328 propagate down the total number of iterations. */
5329 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5330 fd->loop.n2);
5331 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5332 GSI_CONTINUE_LINKING);
5333 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5334 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5338 t = fold_convert (itype, s0);
5339 t = fold_build2 (MULT_EXPR, itype, t, step);
5340 if (POINTER_TYPE_P (type))
5342 t = fold_build_pointer_plus (n1, t);
5343 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5344 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5345 t = fold_convert (signed_type_for (type), t);
5347 else
5348 t = fold_build2 (PLUS_EXPR, type, t, n1);
5349 t = fold_convert (TREE_TYPE (startvar), t);
5350 t = force_gimple_operand_gsi (&gsi, t,
5351 DECL_P (startvar)
5352 && TREE_ADDRESSABLE (startvar),
5353 NULL_TREE, false, GSI_CONTINUE_LINKING);
5354 assign_stmt = gimple_build_assign (startvar, t);
5355 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5356 if (cond_var)
5358 tree itype = TREE_TYPE (cond_var);
5359 /* For lastprivate(conditional:) itervar, we need some iteration
5360 counter that starts at unsigned non-zero and increases.
5361 Prefer as few IVs as possible, so if we can use startvar
5362 itself, use that, or startvar + constant (those would be
5363 incremented with step), and as last resort use the s0 + 1
5364 incremented by 1. */
5365 if (POINTER_TYPE_P (type)
5366 || TREE_CODE (n1) != INTEGER_CST
5367 || fd->loop.cond_code != LT_EXPR)
5368 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5369 build_int_cst (itype, 1));
5370 else if (tree_int_cst_sgn (n1) == 1)
5371 t = fold_convert (itype, t);
5372 else
5374 tree c = fold_convert (itype, n1);
5375 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5376 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5378 t = force_gimple_operand_gsi (&gsi, t, false,
5379 NULL_TREE, false, GSI_CONTINUE_LINKING);
5380 assign_stmt = gimple_build_assign (cond_var, t);
5381 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5384 t = fold_convert (itype, e0);
5385 t = fold_build2 (MULT_EXPR, itype, t, step);
5386 if (POINTER_TYPE_P (type))
5388 t = fold_build_pointer_plus (n1, t);
5389 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5390 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5391 t = fold_convert (signed_type_for (type), t);
5393 else
5394 t = fold_build2 (PLUS_EXPR, type, t, n1);
5395 t = fold_convert (TREE_TYPE (startvar), t);
5396 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5397 false, GSI_CONTINUE_LINKING);
5398 if (endvar)
5400 assign_stmt = gimple_build_assign (endvar, e);
5401 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5402 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5403 assign_stmt = gimple_build_assign (fd->loop.v, e);
5404 else
5405 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5406 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5408 /* Handle linear clause adjustments. */
5409 tree itercnt = NULL_TREE;
5410 tree *nonrect_bounds = NULL;
5411 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5412 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5413 c; c = OMP_CLAUSE_CHAIN (c))
5414 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5415 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5417 tree d = OMP_CLAUSE_DECL (c);
5418 tree t = d, a, dest;
5419 if (omp_privatize_by_reference (t))
5420 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5421 if (itercnt == NULL_TREE)
5423 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5425 itercnt = fold_build2 (MINUS_EXPR, itype,
5426 fold_convert (itype, n1),
5427 fold_convert (itype, fd->loop.n1));
5428 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5429 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5430 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5431 NULL_TREE, false,
5432 GSI_CONTINUE_LINKING);
5434 else
5435 itercnt = s0;
5437 tree type = TREE_TYPE (t);
5438 if (POINTER_TYPE_P (type))
5439 type = sizetype;
5440 a = fold_build2 (MULT_EXPR, type,
5441 fold_convert (type, itercnt),
5442 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5443 dest = unshare_expr (t);
5444 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5445 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5446 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5447 false, GSI_CONTINUE_LINKING);
5448 expand_omp_build_assign (&gsi, dest, t, true);
5450 if (fd->collapse > 1)
5452 if (fd->non_rect)
5454 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5455 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5457 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5458 startvar);
5461 if (!broken_loop)
5463 /* The code controlling the sequential loop replaces the
5464 GIMPLE_OMP_CONTINUE. */
5465 gsi = gsi_last_nondebug_bb (cont_bb);
5466 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5467 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5468 vmain = gimple_omp_continue_control_use (cont_stmt);
5469 vback = gimple_omp_continue_control_def (cont_stmt);
5471 if (cond_var)
5473 tree itype = TREE_TYPE (cond_var);
5474 tree t2;
5475 if (POINTER_TYPE_P (type)
5476 || TREE_CODE (n1) != INTEGER_CST
5477 || fd->loop.cond_code != LT_EXPR)
5478 t2 = build_int_cst (itype, 1);
5479 else
5480 t2 = fold_convert (itype, step);
5481 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5482 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5483 NULL_TREE, true, GSI_SAME_STMT);
5484 assign_stmt = gimple_build_assign (cond_var, t2);
5485 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5488 if (!gimple_omp_for_combined_p (fd->for_stmt))
5490 if (POINTER_TYPE_P (type))
5491 t = fold_build_pointer_plus (vmain, step);
5492 else
5493 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5494 t = force_gimple_operand_gsi (&gsi, t,
5495 DECL_P (vback)
5496 && TREE_ADDRESSABLE (vback),
5497 NULL_TREE, true, GSI_SAME_STMT);
5498 assign_stmt = gimple_build_assign (vback, t);
5499 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5501 t = build2 (fd->loop.cond_code, boolean_type_node,
5502 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5503 ? t : vback, e);
5504 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5507 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5508 gsi_remove (&gsi, true);
5510 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5511 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5512 cont_bb, body_bb);
5515 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5516 gsi = gsi_last_nondebug_bb (exit_bb);
5517 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5519 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5520 if (fd->have_reductemp
5521 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5522 && !fd->have_nonctrl_scantemp))
5524 tree fn;
5525 if (t)
5526 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5527 else
5528 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5529 gcall *g = gimple_build_call (fn, 0);
5530 if (t)
5532 gimple_call_set_lhs (g, t);
5533 if (fd->have_reductemp)
5534 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5535 NOP_EXPR, t),
5536 GSI_SAME_STMT);
5538 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5540 else
5541 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5543 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5544 && !fd->have_nonctrl_scantemp)
5546 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5547 gcall *g = gimple_build_call (fn, 0);
5548 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5550 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5552 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5553 tree controlp = NULL_TREE, controlb = NULL_TREE;
5554 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5555 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5556 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5558 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5559 controlb = OMP_CLAUSE_DECL (c);
5560 else
5561 controlp = OMP_CLAUSE_DECL (c);
5562 if (controlb && controlp)
5563 break;
5565 gcc_assert (controlp && controlb);
5566 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5567 NULL_TREE, NULL_TREE);
5568 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5569 exit1_bb = split_block (exit_bb, g)->dest;
5570 gsi = gsi_after_labels (exit1_bb);
5571 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5572 controlp);
5573 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5574 exit2_bb = split_block (exit1_bb, g)->dest;
5575 gsi = gsi_after_labels (exit2_bb);
5576 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5577 controlp);
5578 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5579 exit3_bb = split_block (exit2_bb, g)->dest;
5580 gsi = gsi_after_labels (exit3_bb);
5582 gsi_remove (&gsi, true);
5584 /* Connect all the blocks. */
5585 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5586 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5587 ep = find_edge (entry_bb, second_bb);
5588 ep->flags = EDGE_TRUE_VALUE;
5589 ep->probability = profile_probability::guessed_always () / 4;
5590 if (fourth_bb)
5592 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5593 ep->probability = profile_probability::guessed_always () / 2;
5594 ep = find_edge (third_bb, fourth_bb);
5595 ep->flags = EDGE_TRUE_VALUE;
5596 ep->probability = profile_probability::guessed_always () / 2;
5597 ep = find_edge (fourth_bb, fifth_bb);
5598 redirect_edge_and_branch (ep, sixth_bb);
5600 else
5601 sixth_bb = third_bb;
5602 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5603 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5604 if (exit1_bb)
5606 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5607 ep->probability = profile_probability::guessed_always () / 2;
5608 ep = find_edge (exit_bb, exit1_bb);
5609 ep->flags = EDGE_TRUE_VALUE;
5610 ep->probability = profile_probability::guessed_always () / 2;
5611 ep = find_edge (exit1_bb, exit2_bb);
5612 redirect_edge_and_branch (ep, exit3_bb);
5615 if (!broken_loop)
5617 ep = find_edge (cont_bb, body_bb);
5618 if (ep == NULL)
5620 ep = BRANCH_EDGE (cont_bb);
5621 gcc_assert (single_succ (ep->dest) == body_bb);
5623 if (gimple_omp_for_combined_p (fd->for_stmt))
5625 remove_edge (ep);
5626 ep = NULL;
5628 else if (fd->collapse > 1)
5630 remove_edge (ep);
5631 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5633 else
5634 ep->flags = EDGE_TRUE_VALUE;
5635 find_edge (cont_bb, fin_bb)->flags
5636 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5639 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5640 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5641 if (fourth_bb)
5643 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5644 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5646 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5648 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5649 recompute_dominator (CDI_DOMINATORS, body_bb));
5650 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5651 recompute_dominator (CDI_DOMINATORS, fin_bb));
5652 if (exit1_bb)
5654 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5655 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5658 class loop *loop = body_bb->loop_father;
5659 if (loop != entry_bb->loop_father)
5661 gcc_assert (broken_loop || loop->header == body_bb);
5662 gcc_assert (broken_loop
5663 || loop->latch == region->cont
5664 || single_pred (loop->latch) == region->cont);
5665 return;
5668 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5670 loop = alloc_loop ();
5671 loop->header = body_bb;
5672 if (collapse_bb == NULL)
5673 loop->latch = cont_bb;
5674 add_loop (loop, body_bb->loop_father);
5678 /* Return phi in E->DEST with ARG on edge E. */
5680 static gphi *
5681 find_phi_with_arg_on_edge (tree arg, edge e)
5683 basic_block bb = e->dest;
5685 for (gphi_iterator gpi = gsi_start_phis (bb);
5686 !gsi_end_p (gpi);
5687 gsi_next (&gpi))
5689 gphi *phi = gpi.phi ();
5690 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5691 return phi;
5694 return NULL;
5697 /* A subroutine of expand_omp_for. Generate code for a parallel
5698 loop with static schedule and a specified chunk size. Given
5699 parameters:
5701 for (V = N1; V cond N2; V += STEP) BODY;
5703 where COND is "<" or ">", we generate pseudocode
5705 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5706 if (cond is <)
5707 adj = STEP - 1;
5708 else
5709 adj = STEP + 1;
5710 if ((__typeof (V)) -1 > 0 && cond is >)
5711 n = -(adj + N2 - N1) / -STEP;
5712 else
5713 n = (adj + N2 - N1) / STEP;
5714 trip = 0;
5715 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5716 here so that V is defined
5717 if the loop is not entered
5719 s0 = (trip * nthreads + threadid) * CHUNK;
5720 e0 = min (s0 + CHUNK, n);
5721 if (s0 < n) goto L1; else goto L4;
5723 V = s0 * STEP + N1;
5724 e = e0 * STEP + N1;
5726 BODY;
5727 V += STEP;
5728 if (V cond e) goto L2; else goto L3;
5730 trip += 1;
5731 goto L0;
5735 static void
5736 expand_omp_for_static_chunk (struct omp_region *region,
5737 struct omp_for_data *fd, gimple *inner_stmt)
5739 tree n, s0, e0, e, t;
5740 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5741 tree type, itype, vmain, vback, vextra;
5742 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5743 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5744 gimple_stmt_iterator gsi, gsip;
5745 edge se;
5746 bool broken_loop = region->cont == NULL;
5747 tree *counts = NULL;
5748 tree n1, n2, step;
5749 tree reductions = NULL_TREE;
5750 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5752 itype = type = TREE_TYPE (fd->loop.v);
5753 if (POINTER_TYPE_P (type))
5754 itype = signed_type_for (type);
5756 entry_bb = region->entry;
5757 se = split_block (entry_bb, last_nondebug_stmt (entry_bb));
5758 entry_bb = se->src;
5759 iter_part_bb = se->dest;
5760 cont_bb = region->cont;
5761 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5762 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5763 gcc_assert (broken_loop
5764 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5765 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5766 body_bb = single_succ (seq_start_bb);
5767 if (!broken_loop)
5769 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5770 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5771 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5772 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5774 exit_bb = region->exit;
5776 /* Trip and adjustment setup goes in ENTRY_BB. */
5777 gsi = gsi_last_nondebug_bb (entry_bb);
5778 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5779 gsip = gsi;
5780 gsi_prev (&gsip);
5782 if (fd->collapse > 1)
5784 int first_zero_iter = -1, dummy = -1;
5785 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5787 counts = XALLOCAVEC (tree, fd->collapse);
5788 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5789 fin_bb, first_zero_iter,
5790 dummy_bb, dummy, l2_dom_bb);
5791 t = NULL_TREE;
5793 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5794 t = integer_one_node;
5795 else
5796 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5797 fold_convert (type, fd->loop.n1),
5798 fold_convert (type, fd->loop.n2));
5799 if (fd->collapse == 1
5800 && TYPE_UNSIGNED (type)
5801 && (t == NULL_TREE || !integer_onep (t)))
5803 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5804 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5805 true, GSI_SAME_STMT);
5806 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5807 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5808 true, GSI_SAME_STMT);
5809 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5810 n1, n2);
5811 se = split_block (entry_bb, cond_stmt);
5812 se->flags = EDGE_TRUE_VALUE;
5813 entry_bb = se->dest;
5814 se->probability = profile_probability::very_likely ();
5815 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5816 se->probability = profile_probability::very_unlikely ();
5817 if (gimple_in_ssa_p (cfun))
5819 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5820 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5821 !gsi_end_p (gpi); gsi_next (&gpi))
5823 gphi *phi = gpi.phi ();
5824 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5825 se, UNKNOWN_LOCATION);
5828 gsi = gsi_last_bb (entry_bb);
5831 if (fd->lastprivate_conditional)
5833 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5834 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5835 if (fd->have_pointer_condtemp)
5836 condtemp = OMP_CLAUSE_DECL (c);
5837 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5838 cond_var = OMP_CLAUSE_DECL (c);
5840 if (fd->have_reductemp || fd->have_pointer_condtemp)
5842 tree t1 = build_int_cst (long_integer_type_node, 0);
5843 tree t2 = build_int_cst (long_integer_type_node, 1);
5844 tree t3 = build_int_cstu (long_integer_type_node,
5845 (HOST_WIDE_INT_1U << 31) + 1);
5846 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5847 gimple_stmt_iterator gsi2 = gsi_none ();
5848 gimple *g = NULL;
5849 tree mem = null_pointer_node, memv = NULL_TREE;
5850 if (fd->have_reductemp)
5852 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5853 reductions = OMP_CLAUSE_DECL (c);
5854 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5855 g = SSA_NAME_DEF_STMT (reductions);
5856 reductions = gimple_assign_rhs1 (g);
5857 OMP_CLAUSE_DECL (c) = reductions;
5858 gsi2 = gsi_for_stmt (g);
5860 else
5862 if (gsi_end_p (gsip))
5863 gsi2 = gsi_after_labels (region->entry);
5864 else
5865 gsi2 = gsip;
5866 reductions = null_pointer_node;
5868 if (fd->have_pointer_condtemp)
5870 tree type = TREE_TYPE (condtemp);
5871 memv = create_tmp_var (type);
5872 TREE_ADDRESSABLE (memv) = 1;
5873 unsigned HOST_WIDE_INT sz
5874 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5875 sz *= fd->lastprivate_conditional;
5876 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5877 false);
5878 mem = build_fold_addr_expr (memv);
5880 tree t
5881 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5882 9, t1, t2, t2, t3, t1, null_pointer_node,
5883 null_pointer_node, reductions, mem);
5884 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5885 true, GSI_SAME_STMT);
5886 if (fd->have_pointer_condtemp)
5887 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5888 if (fd->have_reductemp)
5890 gsi_remove (&gsi2, true);
5891 release_ssa_name (gimple_assign_lhs (g));
5894 switch (gimple_omp_for_kind (fd->for_stmt))
5896 case GF_OMP_FOR_KIND_FOR:
5897 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5898 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5899 break;
5900 case GF_OMP_FOR_KIND_DISTRIBUTE:
5901 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5902 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5903 break;
5904 default:
5905 gcc_unreachable ();
5907 nthreads = build_call_expr (nthreads, 0);
5908 nthreads = fold_convert (itype, nthreads);
5909 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5910 true, GSI_SAME_STMT);
5911 threadid = build_call_expr (threadid, 0);
5912 threadid = fold_convert (itype, threadid);
5913 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5914 true, GSI_SAME_STMT);
5916 n1 = fd->loop.n1;
5917 n2 = fd->loop.n2;
5918 step = fd->loop.step;
5919 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5921 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5922 OMP_CLAUSE__LOOPTEMP_);
5923 gcc_assert (innerc);
5924 n1 = OMP_CLAUSE_DECL (innerc);
5925 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5926 OMP_CLAUSE__LOOPTEMP_);
5927 gcc_assert (innerc);
5928 n2 = OMP_CLAUSE_DECL (innerc);
5930 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5931 true, NULL_TREE, true, GSI_SAME_STMT);
5932 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5933 true, NULL_TREE, true, GSI_SAME_STMT);
5934 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5935 true, NULL_TREE, true, GSI_SAME_STMT);
5936 tree chunk_size = fold_convert (itype, fd->chunk_size);
5937 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5938 chunk_size
5939 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5940 GSI_SAME_STMT);
5942 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5943 t = fold_build2 (PLUS_EXPR, itype, step, t);
5944 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5945 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5946 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5947 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5948 fold_build1 (NEGATE_EXPR, itype, t),
5949 fold_build1 (NEGATE_EXPR, itype, step));
5950 else
5951 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5952 t = fold_convert (itype, t);
5953 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5954 true, GSI_SAME_STMT);
5956 trip_var = create_tmp_reg (itype, ".trip");
5957 if (gimple_in_ssa_p (cfun))
5959 trip_init = make_ssa_name (trip_var);
5960 trip_main = make_ssa_name (trip_var);
5961 trip_back = make_ssa_name (trip_var);
5963 else
5965 trip_init = trip_var;
5966 trip_main = trip_var;
5967 trip_back = trip_var;
5970 gassign *assign_stmt
5971 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5972 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5974 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5975 t = fold_build2 (MULT_EXPR, itype, t, step);
5976 if (POINTER_TYPE_P (type))
5977 t = fold_build_pointer_plus (n1, t);
5978 else
5979 t = fold_build2 (PLUS_EXPR, type, t, n1);
5980 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5981 true, GSI_SAME_STMT);
5983 /* Remove the GIMPLE_OMP_FOR. */
5984 gsi_remove (&gsi, true);
5986 gimple_stmt_iterator gsif = gsi;
5988 /* Iteration space partitioning goes in ITER_PART_BB. */
5989 gsi = gsi_last_bb (iter_part_bb);
5991 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5992 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5993 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5994 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5995 false, GSI_CONTINUE_LINKING);
5997 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5998 t = fold_build2 (MIN_EXPR, itype, t, n);
5999 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6000 false, GSI_CONTINUE_LINKING);
6002 t = build2 (LT_EXPR, boolean_type_node, s0, n);
6003 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
6005 /* Setup code for sequential iteration goes in SEQ_START_BB. */
6006 gsi = gsi_start_bb (seq_start_bb);
6008 tree startvar = fd->loop.v;
6009 tree endvar = NULL_TREE;
6011 if (gimple_omp_for_combined_p (fd->for_stmt))
6013 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
6014 ? gimple_omp_parallel_clauses (inner_stmt)
6015 : gimple_omp_for_clauses (inner_stmt);
6016 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6017 gcc_assert (innerc);
6018 startvar = OMP_CLAUSE_DECL (innerc);
6019 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6020 OMP_CLAUSE__LOOPTEMP_);
6021 gcc_assert (innerc);
6022 endvar = OMP_CLAUSE_DECL (innerc);
6023 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
6024 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
6026 innerc = find_lastprivate_looptemp (fd, innerc);
6027 if (innerc)
6029 /* If needed (distribute parallel for with lastprivate),
6030 propagate down the total number of iterations. */
6031 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
6032 fd->loop.n2);
6033 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
6034 GSI_CONTINUE_LINKING);
6035 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6036 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6041 t = fold_convert (itype, s0);
6042 t = fold_build2 (MULT_EXPR, itype, t, step);
6043 if (POINTER_TYPE_P (type))
6045 t = fold_build_pointer_plus (n1, t);
6046 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6047 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6048 t = fold_convert (signed_type_for (type), t);
6050 else
6051 t = fold_build2 (PLUS_EXPR, type, t, n1);
6052 t = fold_convert (TREE_TYPE (startvar), t);
6053 t = force_gimple_operand_gsi (&gsi, t,
6054 DECL_P (startvar)
6055 && TREE_ADDRESSABLE (startvar),
6056 NULL_TREE, false, GSI_CONTINUE_LINKING);
6057 assign_stmt = gimple_build_assign (startvar, t);
6058 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6059 if (cond_var)
6061 tree itype = TREE_TYPE (cond_var);
6062 /* For lastprivate(conditional:) itervar, we need some iteration
6063 counter that starts at unsigned non-zero and increases.
6064 Prefer as few IVs as possible, so if we can use startvar
6065 itself, use that, or startvar + constant (those would be
6066 incremented with step), and as last resort use the s0 + 1
6067 incremented by 1. */
6068 if (POINTER_TYPE_P (type)
6069 || TREE_CODE (n1) != INTEGER_CST
6070 || fd->loop.cond_code != LT_EXPR)
6071 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
6072 build_int_cst (itype, 1));
6073 else if (tree_int_cst_sgn (n1) == 1)
6074 t = fold_convert (itype, t);
6075 else
6077 tree c = fold_convert (itype, n1);
6078 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
6079 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
6081 t = force_gimple_operand_gsi (&gsi, t, false,
6082 NULL_TREE, false, GSI_CONTINUE_LINKING);
6083 assign_stmt = gimple_build_assign (cond_var, t);
6084 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6087 t = fold_convert (itype, e0);
6088 t = fold_build2 (MULT_EXPR, itype, t, step);
6089 if (POINTER_TYPE_P (type))
6091 t = fold_build_pointer_plus (n1, t);
6092 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6093 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6094 t = fold_convert (signed_type_for (type), t);
6096 else
6097 t = fold_build2 (PLUS_EXPR, type, t, n1);
6098 t = fold_convert (TREE_TYPE (startvar), t);
6099 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6100 false, GSI_CONTINUE_LINKING);
6101 if (endvar)
6103 assign_stmt = gimple_build_assign (endvar, e);
6104 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6105 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6106 assign_stmt = gimple_build_assign (fd->loop.v, e);
6107 else
6108 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6109 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6111 /* Handle linear clause adjustments. */
6112 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6113 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6114 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6115 c; c = OMP_CLAUSE_CHAIN (c))
6116 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6117 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6119 tree d = OMP_CLAUSE_DECL (c);
6120 tree t = d, a, dest;
6121 if (omp_privatize_by_reference (t))
6122 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6123 tree type = TREE_TYPE (t);
6124 if (POINTER_TYPE_P (type))
6125 type = sizetype;
6126 dest = unshare_expr (t);
6127 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6128 expand_omp_build_assign (&gsif, v, t);
6129 if (itercnt == NULL_TREE)
6131 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6133 itercntbias
6134 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6135 fold_convert (itype, fd->loop.n1));
6136 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6137 itercntbias, step);
6138 itercntbias
6139 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6140 NULL_TREE, true,
6141 GSI_SAME_STMT);
6142 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6143 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6144 NULL_TREE, false,
6145 GSI_CONTINUE_LINKING);
6147 else
6148 itercnt = s0;
6150 a = fold_build2 (MULT_EXPR, type,
6151 fold_convert (type, itercnt),
6152 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6153 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6154 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6155 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6156 false, GSI_CONTINUE_LINKING);
6157 expand_omp_build_assign (&gsi, dest, t, true);
6159 if (fd->collapse > 1)
6160 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6162 if (!broken_loop)
6164 /* The code controlling the sequential loop goes in CONT_BB,
6165 replacing the GIMPLE_OMP_CONTINUE. */
6166 gsi = gsi_last_nondebug_bb (cont_bb);
6167 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6168 vmain = gimple_omp_continue_control_use (cont_stmt);
6169 vback = gimple_omp_continue_control_def (cont_stmt);
6171 if (cond_var)
6173 tree itype = TREE_TYPE (cond_var);
6174 tree t2;
6175 if (POINTER_TYPE_P (type)
6176 || TREE_CODE (n1) != INTEGER_CST
6177 || fd->loop.cond_code != LT_EXPR)
6178 t2 = build_int_cst (itype, 1);
6179 else
6180 t2 = fold_convert (itype, step);
6181 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6182 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6183 NULL_TREE, true, GSI_SAME_STMT);
6184 assign_stmt = gimple_build_assign (cond_var, t2);
6185 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6188 if (!gimple_omp_for_combined_p (fd->for_stmt))
6190 if (POINTER_TYPE_P (type))
6191 t = fold_build_pointer_plus (vmain, step);
6192 else
6193 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6194 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6195 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6196 true, GSI_SAME_STMT);
6197 assign_stmt = gimple_build_assign (vback, t);
6198 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6200 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6201 t = build2 (EQ_EXPR, boolean_type_node,
6202 build_int_cst (itype, 0),
6203 build_int_cst (itype, 1));
6204 else
6205 t = build2 (fd->loop.cond_code, boolean_type_node,
6206 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6207 ? t : vback, e);
6208 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6211 /* Remove GIMPLE_OMP_CONTINUE. */
6212 gsi_remove (&gsi, true);
6214 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6215 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6217 /* Trip update code goes into TRIP_UPDATE_BB. */
6218 gsi = gsi_start_bb (trip_update_bb);
6220 t = build_int_cst (itype, 1);
6221 t = build2 (PLUS_EXPR, itype, trip_main, t);
6222 assign_stmt = gimple_build_assign (trip_back, t);
6223 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6226 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6227 gsi = gsi_last_nondebug_bb (exit_bb);
6228 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6230 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6231 if (fd->have_reductemp || fd->have_pointer_condtemp)
6233 tree fn;
6234 if (t)
6235 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6236 else
6237 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6238 gcall *g = gimple_build_call (fn, 0);
6239 if (t)
6241 gimple_call_set_lhs (g, t);
6242 if (fd->have_reductemp)
6243 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6244 NOP_EXPR, t),
6245 GSI_SAME_STMT);
6247 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6249 else
6250 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6252 else if (fd->have_pointer_condtemp)
6254 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6255 gcall *g = gimple_build_call (fn, 0);
6256 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6258 gsi_remove (&gsi, true);
6260 /* Connect the new blocks. */
6261 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6262 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6264 if (!broken_loop)
6266 se = find_edge (cont_bb, body_bb);
6267 if (se == NULL)
6269 se = BRANCH_EDGE (cont_bb);
6270 gcc_assert (single_succ (se->dest) == body_bb);
6272 if (gimple_omp_for_combined_p (fd->for_stmt))
6274 remove_edge (se);
6275 se = NULL;
6277 else if (fd->collapse > 1)
6279 remove_edge (se);
6280 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6282 else
6283 se->flags = EDGE_TRUE_VALUE;
6284 find_edge (cont_bb, trip_update_bb)->flags
6285 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6287 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6288 iter_part_bb);
6291 if (gimple_in_ssa_p (cfun))
6293 gphi_iterator psi;
6294 gphi *phi;
6295 edge re, ene;
6296 edge_var_map *vm;
6297 size_t i;
6299 gcc_assert (fd->collapse == 1 && !broken_loop);
6301 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6302 remove arguments of the phi nodes in fin_bb. We need to create
6303 appropriate phi nodes in iter_part_bb instead. */
6304 se = find_edge (iter_part_bb, fin_bb);
6305 re = single_succ_edge (trip_update_bb);
6306 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6307 ene = single_succ_edge (entry_bb);
6309 psi = gsi_start_phis (fin_bb);
6310 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6311 gsi_next (&psi), ++i)
6313 gphi *nphi;
6314 location_t locus;
6316 phi = psi.phi ();
6317 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6318 redirect_edge_var_map_def (vm), 0))
6319 continue;
6321 t = gimple_phi_result (phi);
6322 gcc_assert (t == redirect_edge_var_map_result (vm));
6324 if (!single_pred_p (fin_bb))
6325 t = copy_ssa_name (t, phi);
6327 nphi = create_phi_node (t, iter_part_bb);
6329 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6330 locus = gimple_phi_arg_location_from_edge (phi, se);
6332 /* A special case -- fd->loop.v is not yet computed in
6333 iter_part_bb, we need to use vextra instead. */
6334 if (t == fd->loop.v)
6335 t = vextra;
6336 add_phi_arg (nphi, t, ene, locus);
6337 locus = redirect_edge_var_map_location (vm);
6338 tree back_arg = redirect_edge_var_map_def (vm);
6339 add_phi_arg (nphi, back_arg, re, locus);
6340 edge ce = find_edge (cont_bb, body_bb);
6341 if (ce == NULL)
6343 ce = BRANCH_EDGE (cont_bb);
6344 gcc_assert (single_succ (ce->dest) == body_bb);
6345 ce = single_succ_edge (ce->dest);
6347 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6348 gcc_assert (inner_loop_phi != NULL);
6349 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6350 find_edge (seq_start_bb, body_bb), locus);
6352 if (!single_pred_p (fin_bb))
6353 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6355 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6356 redirect_edge_var_map_clear (re);
6357 if (single_pred_p (fin_bb))
6358 while (1)
6360 psi = gsi_start_phis (fin_bb);
6361 if (gsi_end_p (psi))
6362 break;
6363 remove_phi_node (&psi, false);
6366 /* Make phi node for trip. */
6367 phi = create_phi_node (trip_main, iter_part_bb);
6368 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6369 UNKNOWN_LOCATION);
6370 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6371 UNKNOWN_LOCATION);
6374 if (!broken_loop)
6375 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6376 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6377 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6378 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6379 recompute_dominator (CDI_DOMINATORS, fin_bb));
6380 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6381 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6382 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6383 recompute_dominator (CDI_DOMINATORS, body_bb));
6385 if (!broken_loop)
6387 class loop *loop = body_bb->loop_father;
6388 class loop *trip_loop = alloc_loop ();
6389 trip_loop->header = iter_part_bb;
6390 trip_loop->latch = trip_update_bb;
6391 add_loop (trip_loop, iter_part_bb->loop_father);
6393 if (loop != entry_bb->loop_father)
6395 gcc_assert (loop->header == body_bb);
6396 gcc_assert (loop->latch == region->cont
6397 || single_pred (loop->latch) == region->cont);
6398 trip_loop->inner = loop;
6399 return;
6402 if (!gimple_omp_for_combined_p (fd->for_stmt))
6404 loop = alloc_loop ();
6405 loop->header = body_bb;
6406 if (collapse_bb == NULL)
6407 loop->latch = cont_bb;
6408 add_loop (loop, trip_loop);
6413 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6414 loop. Given parameters:
6416 for (V = N1; V cond N2; V += STEP) BODY;
6418 where COND is "<" or ">", we generate pseudocode
6420 V = N1;
6421 goto L1;
6423 BODY;
6424 V += STEP;
6426 if (V cond N2) goto L0; else goto L2;
6429 For collapsed loops, emit the outer loops as scalar
6430 and only try to vectorize the innermost loop. */
6432 static void
6433 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6435 tree type, t;
6436 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6437 gimple_stmt_iterator gsi;
6438 gimple *stmt;
6439 gcond *cond_stmt;
6440 bool broken_loop = region->cont == NULL;
6441 edge e, ne;
6442 tree *counts = NULL;
6443 int i;
6444 int safelen_int = INT_MAX;
6445 bool dont_vectorize = false;
6446 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6447 OMP_CLAUSE_SAFELEN);
6448 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6449 OMP_CLAUSE__SIMDUID_);
6450 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6451 OMP_CLAUSE_IF);
6452 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6453 OMP_CLAUSE_SIMDLEN);
6454 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6455 OMP_CLAUSE__CONDTEMP_);
6456 tree n1, n2;
6457 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6459 if (safelen)
6461 poly_uint64 val;
6462 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6463 if (!poly_int_tree_p (safelen, &val))
6464 safelen_int = 0;
6465 else
6466 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6467 if (safelen_int == 1)
6468 safelen_int = 0;
6470 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6471 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6473 safelen_int = 0;
6474 dont_vectorize = true;
6476 type = TREE_TYPE (fd->loop.v);
6477 entry_bb = region->entry;
6478 cont_bb = region->cont;
6479 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6480 gcc_assert (broken_loop
6481 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6482 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6483 if (!broken_loop)
6485 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6486 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6487 l1_bb = split_block (cont_bb, last_nondebug_stmt (cont_bb))->dest;
6488 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6490 else
6492 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6493 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6494 l2_bb = single_succ (l1_bb);
6496 exit_bb = region->exit;
6497 l2_dom_bb = NULL;
6499 gsi = gsi_last_nondebug_bb (entry_bb);
6501 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6502 /* Not needed in SSA form right now. */
6503 gcc_assert (!gimple_in_ssa_p (cfun));
6504 if (fd->collapse > 1
6505 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6506 || broken_loop))
6508 int first_zero_iter = -1, dummy = -1;
6509 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6511 counts = XALLOCAVEC (tree, fd->collapse);
6512 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6513 zero_iter_bb, first_zero_iter,
6514 dummy_bb, dummy, l2_dom_bb);
6516 if (l2_dom_bb == NULL)
6517 l2_dom_bb = l1_bb;
6519 n1 = fd->loop.n1;
6520 n2 = fd->loop.n2;
6521 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6523 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6524 OMP_CLAUSE__LOOPTEMP_);
6525 gcc_assert (innerc);
6526 n1 = OMP_CLAUSE_DECL (innerc);
6527 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6528 OMP_CLAUSE__LOOPTEMP_);
6529 gcc_assert (innerc);
6530 n2 = OMP_CLAUSE_DECL (innerc);
6532 tree step = fd->loop.step;
6533 tree orig_step = step; /* May be different from step if is_simt. */
6535 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6536 OMP_CLAUSE__SIMT_);
6537 if (is_simt)
6539 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6540 is_simt = safelen_int > 1;
6542 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6543 if (is_simt)
6545 simt_lane = create_tmp_var (unsigned_type_node);
6546 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6547 gimple_call_set_lhs (g, simt_lane);
6548 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6549 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6550 fold_convert (TREE_TYPE (step), simt_lane));
6551 n1 = fold_convert (type, n1);
6552 if (POINTER_TYPE_P (type))
6553 n1 = fold_build_pointer_plus (n1, offset);
6554 else
6555 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6557 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6558 if (fd->collapse > 1)
6559 simt_maxlane = build_one_cst (unsigned_type_node);
6560 else if (safelen_int < omp_max_simt_vf ())
6561 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6562 tree vf
6563 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6564 unsigned_type_node, 0);
6565 if (simt_maxlane)
6566 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6567 vf = fold_convert (TREE_TYPE (step), vf);
6568 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6571 tree n2var = NULL_TREE;
6572 tree n2v = NULL_TREE;
6573 tree *nonrect_bounds = NULL;
6574 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6575 if (fd->collapse > 1)
6577 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6579 if (fd->non_rect)
6581 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6582 memset (nonrect_bounds, 0,
6583 sizeof (tree) * (fd->last_nonrect + 1));
6585 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6586 gcc_assert (entry_bb == gsi_bb (gsi));
6587 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6588 gsi_prev (&gsi);
6589 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6590 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6591 NULL, n1);
6592 gsi = gsi_for_stmt (fd->for_stmt);
6594 if (broken_loop)
6596 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6598 /* Compute in n2var the limit for the first innermost loop,
6599 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6600 where cnt is how many iterations would the loop have if
6601 all further iterations were assigned to the current task. */
6602 n2var = create_tmp_var (type);
6603 i = fd->collapse - 1;
6604 tree itype = TREE_TYPE (fd->loops[i].v);
6605 if (POINTER_TYPE_P (itype))
6606 itype = signed_type_for (itype);
6607 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6608 ? -1 : 1));
6609 t = fold_build2 (PLUS_EXPR, itype,
6610 fold_convert (itype, fd->loops[i].step), t);
6611 t = fold_build2 (PLUS_EXPR, itype, t,
6612 fold_convert (itype, fd->loops[i].n2));
6613 if (fd->loops[i].m2)
6615 tree t2 = fold_convert (itype,
6616 fd->loops[i - fd->loops[i].outer].v);
6617 tree t3 = fold_convert (itype, fd->loops[i].m2);
6618 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6619 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6621 t = fold_build2 (MINUS_EXPR, itype, t,
6622 fold_convert (itype, fd->loops[i].v));
6623 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6624 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6625 fold_build1 (NEGATE_EXPR, itype, t),
6626 fold_build1 (NEGATE_EXPR, itype,
6627 fold_convert (itype,
6628 fd->loops[i].step)));
6629 else
6630 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6631 fold_convert (itype, fd->loops[i].step));
6632 t = fold_convert (type, t);
6633 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6634 min_arg1 = create_tmp_var (type);
6635 expand_omp_build_assign (&gsi, min_arg1, t2);
6636 min_arg2 = create_tmp_var (type);
6637 expand_omp_build_assign (&gsi, min_arg2, t);
6639 else
6641 if (TREE_CODE (n2) == INTEGER_CST)
6643 /* Indicate for lastprivate handling that at least one iteration
6644 has been performed, without wasting runtime. */
6645 if (integer_nonzerop (n2))
6646 expand_omp_build_assign (&gsi, fd->loop.v,
6647 fold_convert (type, n2));
6648 else
6649 /* Indicate that no iteration has been performed. */
6650 expand_omp_build_assign (&gsi, fd->loop.v,
6651 build_one_cst (type));
6653 else
6655 expand_omp_build_assign (&gsi, fd->loop.v,
6656 build_zero_cst (type));
6657 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6659 for (i = 0; i < fd->collapse; i++)
6661 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6662 if (fd->loops[i].m1)
6664 tree t2
6665 = fold_convert (TREE_TYPE (t),
6666 fd->loops[i - fd->loops[i].outer].v);
6667 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6668 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6669 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6671 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6672 /* For normal non-combined collapsed loops just initialize
6673 the outermost iterator in the entry_bb. */
6674 if (!broken_loop)
6675 break;
6679 else
6680 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6681 tree altv = NULL_TREE, altn2 = NULL_TREE;
6682 if (fd->collapse == 1
6683 && !broken_loop
6684 && TREE_CODE (orig_step) != INTEGER_CST)
6686 /* The vectorizer currently punts on loops with non-constant steps
6687 for the main IV (can't compute number of iterations and gives up
6688 because of that). As for OpenMP loops it is always possible to
6689 compute the number of iterations upfront, use an alternate IV
6690 as the loop iterator:
6691 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6692 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6693 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6694 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6695 tree itype = TREE_TYPE (fd->loop.v);
6696 if (POINTER_TYPE_P (itype))
6697 itype = signed_type_for (itype);
6698 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6699 t = fold_build2 (PLUS_EXPR, itype,
6700 fold_convert (itype, step), t);
6701 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6702 t = fold_build2 (MINUS_EXPR, itype, t,
6703 fold_convert (itype, fd->loop.v));
6704 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6705 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6706 fold_build1 (NEGATE_EXPR, itype, t),
6707 fold_build1 (NEGATE_EXPR, itype,
6708 fold_convert (itype, step)));
6709 else
6710 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6711 fold_convert (itype, step));
6712 t = fold_convert (TREE_TYPE (altv), t);
6713 altn2 = create_tmp_var (TREE_TYPE (altv));
6714 expand_omp_build_assign (&gsi, altn2, t);
6715 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6716 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6717 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6718 true, GSI_SAME_STMT);
6719 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6720 build_zero_cst (TREE_TYPE (altv)));
6721 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6723 else if (fd->collapse > 1
6724 && !broken_loop
6725 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6726 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6728 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6729 altn2 = create_tmp_var (TREE_TYPE (altv));
6731 if (cond_var)
6733 if (POINTER_TYPE_P (type)
6734 || TREE_CODE (n1) != INTEGER_CST
6735 || fd->loop.cond_code != LT_EXPR
6736 || tree_int_cst_sgn (n1) != 1)
6737 expand_omp_build_assign (&gsi, cond_var,
6738 build_one_cst (TREE_TYPE (cond_var)));
6739 else
6740 expand_omp_build_assign (&gsi, cond_var,
6741 fold_convert (TREE_TYPE (cond_var), n1));
6744 /* Remove the GIMPLE_OMP_FOR statement. */
6745 gsi_remove (&gsi, true);
6747 if (!broken_loop)
6749 /* Code to control the increment goes in the CONT_BB. */
6750 gsi = gsi_last_nondebug_bb (cont_bb);
6751 stmt = gsi_stmt (gsi);
6752 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6754 if (fd->collapse == 1
6755 || gimple_omp_for_combined_into_p (fd->for_stmt))
6757 if (POINTER_TYPE_P (type))
6758 t = fold_build_pointer_plus (fd->loop.v, step);
6759 else
6760 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6761 expand_omp_build_assign (&gsi, fd->loop.v, t);
6763 else if (TREE_CODE (n2) != INTEGER_CST)
6764 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6765 if (altv)
6767 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6768 build_one_cst (TREE_TYPE (altv)));
6769 expand_omp_build_assign (&gsi, altv, t);
6772 if (fd->collapse > 1)
6774 i = fd->collapse - 1;
6775 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6776 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6777 else
6779 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6780 fd->loops[i].step);
6781 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6782 fd->loops[i].v, t);
6784 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6786 if (cond_var)
6788 if (POINTER_TYPE_P (type)
6789 || TREE_CODE (n1) != INTEGER_CST
6790 || fd->loop.cond_code != LT_EXPR
6791 || tree_int_cst_sgn (n1) != 1)
6792 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6793 build_one_cst (TREE_TYPE (cond_var)));
6794 else
6795 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6796 fold_convert (TREE_TYPE (cond_var), step));
6797 expand_omp_build_assign (&gsi, cond_var, t);
6800 /* Remove GIMPLE_OMP_CONTINUE. */
6801 gsi_remove (&gsi, true);
6804 /* Emit the condition in L1_BB. */
6805 gsi = gsi_start_bb (l1_bb);
6807 if (altv)
6808 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6809 else if (fd->collapse > 1
6810 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6811 && !broken_loop)
6813 i = fd->collapse - 1;
6814 tree itype = TREE_TYPE (fd->loops[i].v);
6815 if (fd->loops[i].m2)
6816 t = n2v = create_tmp_var (itype);
6817 else
6818 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6819 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6820 false, GSI_CONTINUE_LINKING);
6821 tree v = fd->loops[i].v;
6822 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6823 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6824 false, GSI_CONTINUE_LINKING);
6825 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6827 else
6829 if (fd->collapse > 1 && !broken_loop)
6830 t = n2var;
6831 else
6832 t = fold_convert (type, unshare_expr (n2));
6833 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6834 false, GSI_CONTINUE_LINKING);
6835 tree v = fd->loop.v;
6836 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6837 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6838 false, GSI_CONTINUE_LINKING);
6839 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6841 cond_stmt = gimple_build_cond_empty (t);
6842 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6843 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6844 NULL, NULL)
6845 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6846 NULL, NULL))
6848 gsi = gsi_for_stmt (cond_stmt);
6849 gimple_regimplify_operands (cond_stmt, &gsi);
6852 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6853 if (is_simt)
6855 gsi = gsi_start_bb (l2_bb);
6856 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6857 if (POINTER_TYPE_P (type))
6858 t = fold_build_pointer_plus (fd->loop.v, step);
6859 else
6860 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6861 expand_omp_build_assign (&gsi, fd->loop.v, t);
6864 /* Remove GIMPLE_OMP_RETURN. */
6865 gsi = gsi_last_nondebug_bb (exit_bb);
6866 gsi_remove (&gsi, true);
6868 /* Connect the new blocks. */
6869 remove_edge (FALLTHRU_EDGE (entry_bb));
6871 if (!broken_loop)
6873 remove_edge (BRANCH_EDGE (entry_bb));
6874 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6876 e = BRANCH_EDGE (l1_bb);
6877 ne = FALLTHRU_EDGE (l1_bb);
6878 e->flags = EDGE_TRUE_VALUE;
6880 else
6882 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6884 ne = single_succ_edge (l1_bb);
6885 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6888 ne->flags = EDGE_FALSE_VALUE;
6889 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6890 ne->probability = e->probability.invert ();
6892 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6893 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6895 if (simt_maxlane)
6897 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6898 NULL_TREE, NULL_TREE);
6899 gsi = gsi_last_bb (entry_bb);
6900 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6901 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6902 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6903 FALLTHRU_EDGE (entry_bb)->probability
6904 = profile_probability::guessed_always ().apply_scale (7, 8);
6905 BRANCH_EDGE (entry_bb)->probability
6906 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6907 l2_dom_bb = entry_bb;
6909 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6911 if (!broken_loop && fd->collapse > 1)
6913 basic_block last_bb = l1_bb;
6914 basic_block init_bb = NULL;
6915 for (i = fd->collapse - 2; i >= 0; i--)
6917 tree nextn2v = NULL_TREE;
6918 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6919 e = EDGE_SUCC (last_bb, 0);
6920 else
6921 e = EDGE_SUCC (last_bb, 1);
6922 basic_block bb = split_edge (e);
6923 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6924 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6925 else
6927 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6928 fd->loops[i].step);
6929 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6930 fd->loops[i].v, t);
6932 gsi = gsi_after_labels (bb);
6933 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6935 bb = split_block (bb, last_nondebug_stmt (bb))->dest;
6936 gsi = gsi_start_bb (bb);
6937 tree itype = TREE_TYPE (fd->loops[i].v);
6938 if (fd->loops[i].m2)
6939 t = nextn2v = create_tmp_var (itype);
6940 else
6941 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6942 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6943 false, GSI_CONTINUE_LINKING);
6944 tree v = fd->loops[i].v;
6945 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6946 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6947 false, GSI_CONTINUE_LINKING);
6948 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6949 cond_stmt = gimple_build_cond_empty (t);
6950 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6951 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6952 expand_omp_regimplify_p, NULL, NULL)
6953 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6954 expand_omp_regimplify_p, NULL, NULL))
6956 gsi = gsi_for_stmt (cond_stmt);
6957 gimple_regimplify_operands (cond_stmt, &gsi);
6959 ne = single_succ_edge (bb);
6960 ne->flags = EDGE_FALSE_VALUE;
6962 init_bb = create_empty_bb (bb);
6963 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6964 add_bb_to_loop (init_bb, bb->loop_father);
6965 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6966 e->probability
6967 = profile_probability::guessed_always ().apply_scale (7, 8);
6968 ne->probability = e->probability.invert ();
6970 gsi = gsi_after_labels (init_bb);
6971 if (fd->loops[i + 1].m1)
6973 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6974 fd->loops[i + 1
6975 - fd->loops[i + 1].outer].v);
6976 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6977 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6978 else
6980 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6981 fd->loops[i + 1].n1);
6982 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6983 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6984 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6987 else
6988 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6989 fd->loops[i + 1].n1);
6990 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6991 if (fd->loops[i + 1].m2)
6993 if (i + 2 == fd->collapse && (n2var || altv))
6995 gcc_assert (n2v == NULL_TREE);
6996 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6998 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6999 fd->loops[i + 1
7000 - fd->loops[i + 1].outer].v);
7001 if (POINTER_TYPE_P (TREE_TYPE (t2)))
7002 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
7003 else
7005 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7006 fd->loops[i + 1].n2);
7007 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
7008 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
7009 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
7011 expand_omp_build_assign (&gsi, n2v, t);
7013 if (i + 2 == fd->collapse && n2var)
7015 /* For composite simd, n2 is the first iteration the current
7016 task shouldn't already handle, so we effectively want to use
7017 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
7018 as the vectorized loop. Except the vectorizer will not
7019 vectorize that, so instead compute N2VAR as
7020 N2VAR = V + MIN (N2 - V, COUNTS3) and use
7021 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
7022 as the loop to vectorize. */
7023 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
7024 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
7026 tree itype = TREE_TYPE (fd->loops[i].v);
7027 if (POINTER_TYPE_P (itype))
7028 itype = signed_type_for (itype);
7029 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
7030 == LT_EXPR ? -1 : 1));
7031 t = fold_build2 (PLUS_EXPR, itype,
7032 fold_convert (itype,
7033 fd->loops[i + 1].step), t);
7034 if (fd->loops[i + 1].m2 == NULL_TREE)
7035 t = fold_build2 (PLUS_EXPR, itype, t,
7036 fold_convert (itype,
7037 fd->loops[i + 1].n2));
7038 else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
7040 t = fold_build_pointer_plus (n2v, t);
7041 t = fold_convert (itype, t);
7043 else
7044 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
7045 t = fold_build2 (MINUS_EXPR, itype, t,
7046 fold_convert (itype, fd->loops[i + 1].v));
7047 tree step = fold_convert (itype, fd->loops[i + 1].step);
7048 if (TYPE_UNSIGNED (itype)
7049 && fd->loops[i + 1].cond_code == GT_EXPR)
7050 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7051 fold_build1 (NEGATE_EXPR, itype, t),
7052 fold_build1 (NEGATE_EXPR, itype, step));
7053 else
7054 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7055 t = fold_convert (type, t);
7057 else
7058 t = counts[i + 1];
7059 expand_omp_build_assign (&gsi, min_arg1, t2);
7060 expand_omp_build_assign (&gsi, min_arg2, t);
7061 e = split_block (init_bb, last_nondebug_stmt (init_bb));
7062 gsi = gsi_after_labels (e->dest);
7063 init_bb = e->dest;
7064 remove_edge (FALLTHRU_EDGE (entry_bb));
7065 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
7066 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
7067 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
7068 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
7069 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
7070 expand_omp_build_assign (&gsi, n2var, t);
7072 if (i + 2 == fd->collapse && altv)
7074 /* The vectorizer currently punts on loops with non-constant
7075 steps for the main IV (can't compute number of iterations
7076 and gives up because of that). As for OpenMP loops it is
7077 always possible to compute the number of iterations upfront,
7078 use an alternate IV as the loop iterator. */
7079 expand_omp_build_assign (&gsi, altv,
7080 build_zero_cst (TREE_TYPE (altv)));
7081 tree itype = TREE_TYPE (fd->loops[i + 1].v);
7082 if (POINTER_TYPE_P (itype))
7083 itype = signed_type_for (itype);
7084 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
7085 ? -1 : 1));
7086 t = fold_build2 (PLUS_EXPR, itype,
7087 fold_convert (itype, fd->loops[i + 1].step), t);
7088 t = fold_build2 (PLUS_EXPR, itype, t,
7089 fold_convert (itype,
7090 fd->loops[i + 1].m2
7091 ? n2v : fd->loops[i + 1].n2));
7092 t = fold_build2 (MINUS_EXPR, itype, t,
7093 fold_convert (itype, fd->loops[i + 1].v));
7094 tree step = fold_convert (itype, fd->loops[i + 1].step);
7095 if (TYPE_UNSIGNED (itype)
7096 && fd->loops[i + 1].cond_code == GT_EXPR)
7097 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7098 fold_build1 (NEGATE_EXPR, itype, t),
7099 fold_build1 (NEGATE_EXPR, itype, step));
7100 else
7101 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7102 t = fold_convert (TREE_TYPE (altv), t);
7103 expand_omp_build_assign (&gsi, altn2, t);
7104 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7105 fd->loops[i + 1].m2
7106 ? n2v : fd->loops[i + 1].n2);
7107 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7108 fd->loops[i + 1].v, t2);
7109 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7110 true, GSI_SAME_STMT);
7111 gassign *g
7112 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7113 build_zero_cst (TREE_TYPE (altv)));
7114 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7116 n2v = nextn2v;
7118 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7119 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7121 e = find_edge (entry_bb, last_bb);
7122 redirect_edge_succ (e, bb);
7123 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7124 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7127 last_bb = bb;
7130 if (!broken_loop)
7132 class loop *loop = alloc_loop ();
7133 loop->header = l1_bb;
7134 loop->latch = cont_bb;
7135 add_loop (loop, l1_bb->loop_father);
7136 loop->safelen = safelen_int;
7137 if (simduid)
7139 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7140 cfun->has_simduid_loops = true;
7142 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7143 the loop. */
7144 if ((flag_tree_loop_vectorize
7145 || !OPTION_SET_P (flag_tree_loop_vectorize))
7146 && flag_tree_loop_optimize
7147 && loop->safelen > 1)
7149 loop->force_vectorize = true;
7150 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7152 unsigned HOST_WIDE_INT v
7153 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7154 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7155 loop->simdlen = v;
7157 cfun->has_force_vectorize_loops = true;
7159 else if (dont_vectorize)
7160 loop->dont_vectorize = true;
7162 else if (simduid)
7163 cfun->has_simduid_loops = true;
7166 /* Taskloop construct is represented after gimplification with
7167 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7168 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7169 which should just compute all the needed loop temporaries
7170 for GIMPLE_OMP_TASK. */
7172 static void
7173 expand_omp_taskloop_for_outer (struct omp_region *region,
7174 struct omp_for_data *fd,
7175 gimple *inner_stmt)
7177 tree type, bias = NULL_TREE;
7178 basic_block entry_bb, cont_bb, exit_bb;
7179 gimple_stmt_iterator gsi;
7180 gassign *assign_stmt;
7181 tree *counts = NULL;
7182 int i;
7184 gcc_assert (inner_stmt);
7185 gcc_assert (region->cont);
7186 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7187 && gimple_omp_task_taskloop_p (inner_stmt));
7188 type = TREE_TYPE (fd->loop.v);
7190 /* See if we need to bias by LLONG_MIN. */
7191 if (fd->iter_type == long_long_unsigned_type_node
7192 && TREE_CODE (type) == INTEGER_TYPE
7193 && !TYPE_UNSIGNED (type))
7195 tree n1, n2;
7197 if (fd->loop.cond_code == LT_EXPR)
7199 n1 = fd->loop.n1;
7200 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7202 else
7204 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7205 n2 = fd->loop.n1;
7207 if (TREE_CODE (n1) != INTEGER_CST
7208 || TREE_CODE (n2) != INTEGER_CST
7209 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7210 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7213 entry_bb = region->entry;
7214 cont_bb = region->cont;
7215 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7216 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7217 exit_bb = region->exit;
7219 gsi = gsi_last_nondebug_bb (entry_bb);
7220 gimple *for_stmt = gsi_stmt (gsi);
7221 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7222 if (fd->collapse > 1)
7224 int first_zero_iter = -1, dummy = -1;
7225 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7227 counts = XALLOCAVEC (tree, fd->collapse);
7228 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7229 zero_iter_bb, first_zero_iter,
7230 dummy_bb, dummy, l2_dom_bb);
7232 if (zero_iter_bb)
7234 /* Some counts[i] vars might be uninitialized if
7235 some loop has zero iterations. But the body shouldn't
7236 be executed in that case, so just avoid uninit warnings. */
7237 for (i = first_zero_iter; i < fd->collapse; i++)
7238 if (SSA_VAR_P (counts[i]))
7239 suppress_warning (counts[i], OPT_Wuninitialized);
7240 gsi_prev (&gsi);
7241 edge e = split_block (entry_bb, gsi_stmt (gsi));
7242 entry_bb = e->dest;
7243 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7244 gsi = gsi_last_bb (entry_bb);
7245 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7246 get_immediate_dominator (CDI_DOMINATORS,
7247 zero_iter_bb));
7251 tree t0, t1;
7252 t1 = fd->loop.n2;
7253 t0 = fd->loop.n1;
7254 if (POINTER_TYPE_P (TREE_TYPE (t0))
7255 && TYPE_PRECISION (TREE_TYPE (t0))
7256 != TYPE_PRECISION (fd->iter_type))
7258 /* Avoid casting pointers to integer of a different size. */
7259 tree itype = signed_type_for (type);
7260 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7261 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7263 else
7265 t1 = fold_convert (fd->iter_type, t1);
7266 t0 = fold_convert (fd->iter_type, t0);
7268 if (bias)
7270 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7271 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7274 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7275 OMP_CLAUSE__LOOPTEMP_);
7276 gcc_assert (innerc);
7277 tree startvar = OMP_CLAUSE_DECL (innerc);
7278 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7279 gcc_assert (innerc);
7280 tree endvar = OMP_CLAUSE_DECL (innerc);
7281 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7283 innerc = find_lastprivate_looptemp (fd, innerc);
7284 if (innerc)
7286 /* If needed (inner taskloop has lastprivate clause), propagate
7287 down the total number of iterations. */
7288 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7289 NULL_TREE, false,
7290 GSI_CONTINUE_LINKING);
7291 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7292 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7296 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7297 GSI_CONTINUE_LINKING);
7298 assign_stmt = gimple_build_assign (startvar, t0);
7299 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7301 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7302 GSI_CONTINUE_LINKING);
7303 assign_stmt = gimple_build_assign (endvar, t1);
7304 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7305 if (fd->collapse > 1)
7306 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7308 /* Remove the GIMPLE_OMP_FOR statement. */
7309 gsi = gsi_for_stmt (for_stmt);
7310 gsi_remove (&gsi, true);
7312 gsi = gsi_last_nondebug_bb (cont_bb);
7313 gsi_remove (&gsi, true);
7315 gsi = gsi_last_nondebug_bb (exit_bb);
7316 gsi_remove (&gsi, true);
7318 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7319 remove_edge (BRANCH_EDGE (entry_bb));
7320 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7321 remove_edge (BRANCH_EDGE (cont_bb));
7322 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7323 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7324 recompute_dominator (CDI_DOMINATORS, region->entry));
7327 /* Taskloop construct is represented after gimplification with
7328 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7329 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7330 GOMP_taskloop{,_ull} function arranges for each task to be given just
7331 a single range of iterations. */
7333 static void
7334 expand_omp_taskloop_for_inner (struct omp_region *region,
7335 struct omp_for_data *fd,
7336 gimple *inner_stmt)
7338 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7339 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7340 basic_block fin_bb;
7341 gimple_stmt_iterator gsi;
7342 edge ep;
7343 bool broken_loop = region->cont == NULL;
7344 tree *counts = NULL;
7345 tree n1, n2, step;
7347 itype = type = TREE_TYPE (fd->loop.v);
7348 if (POINTER_TYPE_P (type))
7349 itype = signed_type_for (type);
7351 /* See if we need to bias by LLONG_MIN. */
7352 if (fd->iter_type == long_long_unsigned_type_node
7353 && TREE_CODE (type) == INTEGER_TYPE
7354 && !TYPE_UNSIGNED (type))
7356 tree n1, n2;
7358 if (fd->loop.cond_code == LT_EXPR)
7360 n1 = fd->loop.n1;
7361 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7363 else
7365 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7366 n2 = fd->loop.n1;
7368 if (TREE_CODE (n1) != INTEGER_CST
7369 || TREE_CODE (n2) != INTEGER_CST
7370 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7371 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7374 entry_bb = region->entry;
7375 cont_bb = region->cont;
7376 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7377 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7378 gcc_assert (broken_loop
7379 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7380 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7381 if (!broken_loop)
7383 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7384 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7386 exit_bb = region->exit;
7388 /* Iteration space partitioning goes in ENTRY_BB. */
7389 gsi = gsi_last_nondebug_bb (entry_bb);
7390 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7392 if (fd->collapse > 1)
7394 int first_zero_iter = -1, dummy = -1;
7395 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7397 counts = XALLOCAVEC (tree, fd->collapse);
7398 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7399 fin_bb, first_zero_iter,
7400 dummy_bb, dummy, l2_dom_bb);
7401 t = NULL_TREE;
7403 else
7404 t = integer_one_node;
7406 step = fd->loop.step;
7407 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7408 OMP_CLAUSE__LOOPTEMP_);
7409 gcc_assert (innerc);
7410 n1 = OMP_CLAUSE_DECL (innerc);
7411 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7412 gcc_assert (innerc);
7413 n2 = OMP_CLAUSE_DECL (innerc);
7414 if (bias)
7416 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7417 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7419 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7420 true, NULL_TREE, true, GSI_SAME_STMT);
7421 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7422 true, NULL_TREE, true, GSI_SAME_STMT);
7423 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7424 true, NULL_TREE, true, GSI_SAME_STMT);
7426 tree startvar = fd->loop.v;
7427 tree endvar = NULL_TREE;
7429 if (gimple_omp_for_combined_p (fd->for_stmt))
7431 tree clauses = gimple_omp_for_clauses (inner_stmt);
7432 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7433 gcc_assert (innerc);
7434 startvar = OMP_CLAUSE_DECL (innerc);
7435 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7436 OMP_CLAUSE__LOOPTEMP_);
7437 gcc_assert (innerc);
7438 endvar = OMP_CLAUSE_DECL (innerc);
7440 t = fold_convert (TREE_TYPE (startvar), n1);
7441 t = force_gimple_operand_gsi (&gsi, t,
7442 DECL_P (startvar)
7443 && TREE_ADDRESSABLE (startvar),
7444 NULL_TREE, false, GSI_CONTINUE_LINKING);
7445 gimple *assign_stmt = gimple_build_assign (startvar, t);
7446 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7448 t = fold_convert (TREE_TYPE (startvar), n2);
7449 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7450 false, GSI_CONTINUE_LINKING);
7451 if (endvar)
7453 assign_stmt = gimple_build_assign (endvar, e);
7454 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7455 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7456 assign_stmt = gimple_build_assign (fd->loop.v, e);
7457 else
7458 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7459 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7462 tree *nonrect_bounds = NULL;
7463 if (fd->collapse > 1)
7465 if (fd->non_rect)
7467 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7468 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7470 gcc_assert (gsi_bb (gsi) == entry_bb);
7471 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7472 startvar);
7473 entry_bb = gsi_bb (gsi);
7476 if (!broken_loop)
7478 /* The code controlling the sequential loop replaces the
7479 GIMPLE_OMP_CONTINUE. */
7480 gsi = gsi_last_nondebug_bb (cont_bb);
7481 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7482 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7483 vmain = gimple_omp_continue_control_use (cont_stmt);
7484 vback = gimple_omp_continue_control_def (cont_stmt);
7486 if (!gimple_omp_for_combined_p (fd->for_stmt))
7488 if (POINTER_TYPE_P (type))
7489 t = fold_build_pointer_plus (vmain, step);
7490 else
7491 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7492 t = force_gimple_operand_gsi (&gsi, t,
7493 DECL_P (vback)
7494 && TREE_ADDRESSABLE (vback),
7495 NULL_TREE, true, GSI_SAME_STMT);
7496 assign_stmt = gimple_build_assign (vback, t);
7497 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7499 t = build2 (fd->loop.cond_code, boolean_type_node,
7500 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7501 ? t : vback, e);
7502 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7505 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7506 gsi_remove (&gsi, true);
7508 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7509 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7510 cont_bb, body_bb);
7513 /* Remove the GIMPLE_OMP_FOR statement. */
7514 gsi = gsi_for_stmt (fd->for_stmt);
7515 gsi_remove (&gsi, true);
7517 /* Remove the GIMPLE_OMP_RETURN statement. */
7518 gsi = gsi_last_nondebug_bb (exit_bb);
7519 gsi_remove (&gsi, true);
7521 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7522 if (!broken_loop)
7523 remove_edge (BRANCH_EDGE (entry_bb));
7524 else
7526 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7527 region->outer->cont = NULL;
7530 /* Connect all the blocks. */
7531 if (!broken_loop)
7533 ep = find_edge (cont_bb, body_bb);
7534 if (gimple_omp_for_combined_p (fd->for_stmt))
7536 remove_edge (ep);
7537 ep = NULL;
7539 else if (fd->collapse > 1)
7541 remove_edge (ep);
7542 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7544 else
7545 ep->flags = EDGE_TRUE_VALUE;
7546 find_edge (cont_bb, fin_bb)->flags
7547 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7550 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7551 recompute_dominator (CDI_DOMINATORS, body_bb));
7552 if (!broken_loop)
7553 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7554 recompute_dominator (CDI_DOMINATORS, fin_bb));
7556 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7558 class loop *loop = alloc_loop ();
7559 loop->header = body_bb;
7560 if (collapse_bb == NULL)
7561 loop->latch = cont_bb;
7562 add_loop (loop, body_bb->loop_father);
7566 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7567 partitioned loop. The lowering here is abstracted, in that the
7568 loop parameters are passed through internal functions, which are
7569 further lowered by oacc_device_lower, once we get to the target
7570 compiler. The loop is of the form:
7572 for (V = B; V LTGT E; V += S) {BODY}
7574 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7575 (constant 0 for no chunking) and we will have a GWV partitioning
7576 mask, specifying dimensions over which the loop is to be
7577 partitioned (see note below). We generate code that looks like
7578 (this ignores tiling):
7580 <entry_bb> [incoming FALL->body, BRANCH->exit]
7581 typedef signedintify (typeof (V)) T; // underlying signed integral type
7582 T range = E - B;
7583 T chunk_no = 0;
7584 T DIR = LTGT == '<' ? +1 : -1;
7585 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7586 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7588 <head_bb> [created by splitting end of entry_bb]
7589 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7590 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7591 if (!(offset LTGT bound)) goto bottom_bb;
7593 <body_bb> [incoming]
7594 V = B + offset;
7595 {BODY}
7597 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7598 offset += step;
7599 if (offset LTGT bound) goto body_bb; [*]
7601 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7602 chunk_no++;
7603 if (chunk < chunk_max) goto head_bb;
7605 <exit_bb> [incoming]
7606 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7608 [*] Needed if V live at end of loop. */
7610 static void
7611 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7613 bool is_oacc_kernels_parallelized
7614 = (lookup_attribute ("oacc kernels parallelized",
7615 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7617 bool is_oacc_kernels
7618 = (lookup_attribute ("oacc kernels",
7619 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7620 if (is_oacc_kernels_parallelized)
7621 gcc_checking_assert (is_oacc_kernels);
7623 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7624 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7625 for SSA specifics, and some are for 'parloops' OpenACC
7626 'kernels'-parallelized specifics. */
7628 tree v = fd->loop.v;
7629 enum tree_code cond_code = fd->loop.cond_code;
7630 enum tree_code plus_code = PLUS_EXPR;
7632 tree chunk_size = integer_minus_one_node;
7633 tree gwv = integer_zero_node;
7634 tree iter_type = TREE_TYPE (v);
7635 tree diff_type = iter_type;
7636 tree plus_type = iter_type;
7637 struct oacc_collapse *counts = NULL;
7639 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7640 == GF_OMP_FOR_KIND_OACC_LOOP);
7641 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7642 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7644 if (POINTER_TYPE_P (iter_type))
7646 plus_code = POINTER_PLUS_EXPR;
7647 plus_type = sizetype;
7649 for (int ix = fd->collapse; ix--;)
7651 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7652 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7653 diff_type = diff_type2;
7655 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7656 diff_type = signed_type_for (diff_type);
7657 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7658 diff_type = integer_type_node;
7660 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7661 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7662 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7663 basic_block bottom_bb = NULL;
7665 /* entry_bb has two successors; the branch edge is to the exit
7666 block, fallthrough edge to body. */
7667 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7668 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7670 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7671 body_bb, or to a block whose only successor is the body_bb. Its
7672 fallthrough successor is the final block (same as the branch
7673 successor of the entry_bb). */
7674 if (cont_bb)
7676 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7677 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7679 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7680 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7682 else
7683 gcc_assert (!gimple_in_ssa_p (cfun));
7685 /* The exit block only has entry_bb and cont_bb as predecessors. */
7686 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7688 tree chunk_no;
7689 tree chunk_max = NULL_TREE;
7690 tree bound, offset;
7691 tree step = create_tmp_var (diff_type, ".step");
7692 bool up = cond_code == LT_EXPR;
7693 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7694 bool chunking = !gimple_in_ssa_p (cfun);
7695 bool negating;
7697 /* Tiling vars. */
7698 tree tile_size = NULL_TREE;
7699 tree element_s = NULL_TREE;
7700 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7701 basic_block elem_body_bb = NULL;
7702 basic_block elem_cont_bb = NULL;
7704 /* SSA instances. */
7705 tree offset_incr = NULL_TREE;
7706 tree offset_init = NULL_TREE;
7708 gimple_stmt_iterator gsi;
7709 gassign *ass;
7710 gcall *call;
7711 gimple *stmt;
7712 tree expr;
7713 location_t loc;
7714 edge split, be, fte;
7716 /* Split the end of entry_bb to create head_bb. */
7717 split = split_block (entry_bb, last_nondebug_stmt (entry_bb));
7718 basic_block head_bb = split->dest;
7719 entry_bb = split->src;
7721 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7722 gsi = gsi_last_nondebug_bb (entry_bb);
7723 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7724 loc = gimple_location (for_stmt);
7726 if (gimple_in_ssa_p (cfun))
7728 offset_init = gimple_omp_for_index (for_stmt, 0);
7729 gcc_assert (integer_zerop (fd->loop.n1));
7730 /* The SSA parallelizer does gang parallelism. */
7731 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7734 if (fd->collapse > 1 || fd->tiling)
7736 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7737 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7738 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7739 TREE_TYPE (fd->loop.n2), loc);
7741 if (SSA_VAR_P (fd->loop.n2))
7743 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7744 true, GSI_SAME_STMT);
7745 ass = gimple_build_assign (fd->loop.n2, total);
7746 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7750 tree b = fd->loop.n1;
7751 tree e = fd->loop.n2;
7752 tree s = fd->loop.step;
7754 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7755 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7757 /* Convert the step, avoiding possible unsigned->signed overflow. */
7758 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7759 if (negating)
7760 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7761 s = fold_convert (diff_type, s);
7762 if (negating)
7763 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7764 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7766 if (!chunking)
7767 chunk_size = integer_zero_node;
7768 expr = fold_convert (diff_type, chunk_size);
7769 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7770 NULL_TREE, true, GSI_SAME_STMT);
7772 if (fd->tiling)
7774 /* Determine the tile size and element step,
7775 modify the outer loop step size. */
7776 tile_size = create_tmp_var (diff_type, ".tile_size");
7777 expr = build_int_cst (diff_type, 1);
7778 for (int ix = 0; ix < fd->collapse; ix++)
7779 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7780 expr = force_gimple_operand_gsi (&gsi, expr, true,
7781 NULL_TREE, true, GSI_SAME_STMT);
7782 ass = gimple_build_assign (tile_size, expr);
7783 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7785 element_s = create_tmp_var (diff_type, ".element_s");
7786 ass = gimple_build_assign (element_s, s);
7787 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7789 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7790 s = force_gimple_operand_gsi (&gsi, expr, true,
7791 NULL_TREE, true, GSI_SAME_STMT);
7794 /* Determine the range, avoiding possible unsigned->signed overflow. */
7795 negating = !up && TYPE_UNSIGNED (iter_type);
7796 expr = fold_build2 (MINUS_EXPR, plus_type,
7797 fold_convert (plus_type, negating ? b : e),
7798 fold_convert (plus_type, negating ? e : b));
7799 expr = fold_convert (diff_type, expr);
7800 if (negating)
7801 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7802 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7803 NULL_TREE, true, GSI_SAME_STMT);
7805 chunk_no = build_int_cst (diff_type, 0);
7806 if (chunking)
7808 gcc_assert (!gimple_in_ssa_p (cfun));
7810 expr = chunk_no;
7811 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7812 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7814 ass = gimple_build_assign (chunk_no, expr);
7815 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7817 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7818 build_int_cst (integer_type_node,
7819 IFN_GOACC_LOOP_CHUNKS),
7820 dir, range, s, chunk_size, gwv);
7821 gimple_call_set_lhs (call, chunk_max);
7822 gimple_set_location (call, loc);
7823 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7825 else
7826 chunk_size = chunk_no;
7828 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7829 build_int_cst (integer_type_node,
7830 IFN_GOACC_LOOP_STEP),
7831 dir, range, s, chunk_size, gwv);
7832 gimple_call_set_lhs (call, step);
7833 gimple_set_location (call, loc);
7834 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7836 /* Remove the GIMPLE_OMP_FOR. */
7837 gsi_remove (&gsi, true);
7839 /* Fixup edges from head_bb. */
7840 be = BRANCH_EDGE (head_bb);
7841 fte = FALLTHRU_EDGE (head_bb);
7842 be->flags |= EDGE_FALSE_VALUE;
7843 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7845 basic_block body_bb = fte->dest;
7847 if (gimple_in_ssa_p (cfun))
7849 gsi = gsi_last_nondebug_bb (cont_bb);
7850 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7852 offset = gimple_omp_continue_control_use (cont_stmt);
7853 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7855 else
7857 offset = create_tmp_var (diff_type, ".offset");
7858 offset_init = offset_incr = offset;
7860 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7862 /* Loop offset & bound go into head_bb. */
7863 gsi = gsi_start_bb (head_bb);
7865 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7866 build_int_cst (integer_type_node,
7867 IFN_GOACC_LOOP_OFFSET),
7868 dir, range, s,
7869 chunk_size, gwv, chunk_no);
7870 gimple_call_set_lhs (call, offset_init);
7871 gimple_set_location (call, loc);
7872 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7874 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7875 build_int_cst (integer_type_node,
7876 IFN_GOACC_LOOP_BOUND),
7877 dir, range, s,
7878 chunk_size, gwv, offset_init);
7879 gimple_call_set_lhs (call, bound);
7880 gimple_set_location (call, loc);
7881 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7883 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7884 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7885 GSI_CONTINUE_LINKING);
7887 /* V assignment goes into body_bb. */
7888 if (!gimple_in_ssa_p (cfun))
7890 gsi = gsi_start_bb (body_bb);
7892 expr = build2 (plus_code, iter_type, b,
7893 fold_convert (plus_type, offset));
7894 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7895 true, GSI_SAME_STMT);
7896 ass = gimple_build_assign (v, expr);
7897 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7899 if (fd->collapse > 1 || fd->tiling)
7900 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7902 if (fd->tiling)
7904 /* Determine the range of the element loop -- usually simply
7905 the tile_size, but could be smaller if the final
7906 iteration of the outer loop is a partial tile. */
7907 tree e_range = create_tmp_var (diff_type, ".e_range");
7909 expr = build2 (MIN_EXPR, diff_type,
7910 build2 (MINUS_EXPR, diff_type, bound, offset),
7911 build2 (MULT_EXPR, diff_type, tile_size,
7912 element_s));
7913 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7914 true, GSI_SAME_STMT);
7915 ass = gimple_build_assign (e_range, expr);
7916 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7918 /* Determine bound, offset & step of inner loop. */
7919 e_bound = create_tmp_var (diff_type, ".e_bound");
7920 e_offset = create_tmp_var (diff_type, ".e_offset");
7921 e_step = create_tmp_var (diff_type, ".e_step");
7923 /* Mark these as element loops. */
7924 tree t, e_gwv = integer_minus_one_node;
7925 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7927 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7928 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7929 element_s, chunk, e_gwv, chunk);
7930 gimple_call_set_lhs (call, e_offset);
7931 gimple_set_location (call, loc);
7932 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7934 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7935 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7936 element_s, chunk, e_gwv, e_offset);
7937 gimple_call_set_lhs (call, e_bound);
7938 gimple_set_location (call, loc);
7939 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7941 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7942 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7943 element_s, chunk, e_gwv);
7944 gimple_call_set_lhs (call, e_step);
7945 gimple_set_location (call, loc);
7946 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7948 /* Add test and split block. */
7949 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7950 stmt = gimple_build_cond_empty (expr);
7951 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7952 split = split_block (body_bb, stmt);
7953 elem_body_bb = split->dest;
7954 if (cont_bb == body_bb)
7955 cont_bb = elem_body_bb;
7956 body_bb = split->src;
7958 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7960 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7961 if (cont_bb == NULL)
7963 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7964 e->probability = profile_probability::even ();
7965 split->probability = profile_probability::even ();
7968 /* Initialize the user's loop vars. */
7969 gsi = gsi_start_bb (elem_body_bb);
7970 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7971 diff_type);
7975 /* Loop increment goes into cont_bb. If this is not a loop, we
7976 will have spawned threads as if it was, and each one will
7977 execute one iteration. The specification is not explicit about
7978 whether such constructs are ill-formed or not, and they can
7979 occur, especially when noreturn routines are involved. */
7980 if (cont_bb)
7982 gsi = gsi_last_nondebug_bb (cont_bb);
7983 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7984 loc = gimple_location (cont_stmt);
7986 if (fd->tiling)
7988 /* Insert element loop increment and test. */
7989 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7990 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7991 true, GSI_SAME_STMT);
7992 ass = gimple_build_assign (e_offset, expr);
7993 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7994 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7996 stmt = gimple_build_cond_empty (expr);
7997 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7998 split = split_block (cont_bb, stmt);
7999 elem_cont_bb = split->src;
8000 cont_bb = split->dest;
8002 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8003 split->probability = profile_probability::unlikely ().guessed ();
8004 edge latch_edge
8005 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
8006 latch_edge->probability = profile_probability::likely ().guessed ();
8008 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
8009 skip_edge->probability = profile_probability::unlikely ().guessed ();
8010 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
8011 loop_entry_edge->probability
8012 = profile_probability::likely ().guessed ();
8014 gsi = gsi_for_stmt (cont_stmt);
8017 /* Increment offset. */
8018 if (gimple_in_ssa_p (cfun))
8019 expr = build2 (plus_code, iter_type, offset,
8020 fold_convert (plus_type, step));
8021 else
8022 expr = build2 (PLUS_EXPR, diff_type, offset, step);
8023 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8024 true, GSI_SAME_STMT);
8025 ass = gimple_build_assign (offset_incr, expr);
8026 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8027 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
8028 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
8030 /* Remove the GIMPLE_OMP_CONTINUE. */
8031 gsi_remove (&gsi, true);
8033 /* Fixup edges from cont_bb. */
8034 be = BRANCH_EDGE (cont_bb);
8035 fte = FALLTHRU_EDGE (cont_bb);
8036 be->flags |= EDGE_TRUE_VALUE;
8037 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8039 if (chunking)
8041 /* Split the beginning of exit_bb to make bottom_bb. We
8042 need to insert a nop at the start, because splitting is
8043 after a stmt, not before. */
8044 gsi = gsi_start_bb (exit_bb);
8045 stmt = gimple_build_nop ();
8046 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8047 split = split_block (exit_bb, stmt);
8048 bottom_bb = split->src;
8049 exit_bb = split->dest;
8050 gsi = gsi_last_bb (bottom_bb);
8052 /* Chunk increment and test goes into bottom_bb. */
8053 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
8054 build_int_cst (diff_type, 1));
8055 ass = gimple_build_assign (chunk_no, expr);
8056 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
8058 /* Chunk test at end of bottom_bb. */
8059 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
8060 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
8061 GSI_CONTINUE_LINKING);
8063 /* Fixup edges from bottom_bb. */
8064 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8065 split->probability = profile_probability::unlikely ().guessed ();
8066 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
8067 latch_edge->probability = profile_probability::likely ().guessed ();
8071 gsi = gsi_last_nondebug_bb (exit_bb);
8072 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8073 loc = gimple_location (gsi_stmt (gsi));
8075 if (!gimple_in_ssa_p (cfun))
8077 /* Insert the final value of V, in case it is live. This is the
8078 value for the only thread that survives past the join. */
8079 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
8080 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
8081 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
8082 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
8083 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
8084 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8085 true, GSI_SAME_STMT);
8086 ass = gimple_build_assign (v, expr);
8087 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8090 /* Remove the OMP_RETURN. */
8091 gsi_remove (&gsi, true);
8093 if (cont_bb)
8095 /* We now have one, two or three nested loops. Update the loop
8096 structures. */
8097 class loop *parent = entry_bb->loop_father;
8098 class loop *body = body_bb->loop_father;
8100 if (chunking)
8102 class loop *chunk_loop = alloc_loop ();
8103 chunk_loop->header = head_bb;
8104 chunk_loop->latch = bottom_bb;
8105 add_loop (chunk_loop, parent);
8106 parent = chunk_loop;
8108 else if (parent != body)
8110 gcc_assert (body->header == body_bb);
8111 gcc_assert (body->latch == cont_bb
8112 || single_pred (body->latch) == cont_bb);
8113 parent = NULL;
8116 if (parent)
8118 class loop *body_loop = alloc_loop ();
8119 body_loop->header = body_bb;
8120 body_loop->latch = cont_bb;
8121 add_loop (body_loop, parent);
8123 if (fd->tiling)
8125 /* Insert tiling's element loop. */
8126 class loop *inner_loop = alloc_loop ();
8127 inner_loop->header = elem_body_bb;
8128 inner_loop->latch = elem_cont_bb;
8129 add_loop (inner_loop, body_loop);
8135 /* Expand the OMP loop defined by REGION. */
8137 static void
8138 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8140 struct omp_for_data fd;
8141 struct omp_for_data_loop *loops;
8143 loops = XALLOCAVEC (struct omp_for_data_loop,
8144 gimple_omp_for_collapse
8145 (last_nondebug_stmt (region->entry)));
8146 omp_extract_for_data (as_a <gomp_for *> (last_nondebug_stmt (region->entry)),
8147 &fd, loops);
8148 region->sched_kind = fd.sched_kind;
8149 region->sched_modifiers = fd.sched_modifiers;
8150 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8151 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8153 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8154 if ((loops[i].m1 || loops[i].m2)
8155 && (loops[i].m1 == NULL_TREE
8156 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8157 && (loops[i].m2 == NULL_TREE
8158 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8159 && TREE_CODE (loops[i].step) == INTEGER_CST
8160 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8162 tree t;
8163 tree itype = TREE_TYPE (loops[i].v);
8164 if (loops[i].m1 && loops[i].m2)
8165 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8166 else if (loops[i].m1)
8167 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8168 else
8169 t = loops[i].m2;
8170 t = fold_build2 (MULT_EXPR, itype, t,
8171 fold_convert (itype,
8172 loops[i - loops[i].outer].step));
8173 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8174 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8175 fold_build1 (NEGATE_EXPR, itype, t),
8176 fold_build1 (NEGATE_EXPR, itype,
8177 fold_convert (itype,
8178 loops[i].step)));
8179 else
8180 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8181 fold_convert (itype, loops[i].step));
8182 if (integer_nonzerop (t))
8183 error_at (gimple_location (fd.for_stmt),
8184 "invalid OpenMP non-rectangular loop step; "
8185 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8186 "step %qE",
8187 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8188 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8189 loops[i - loops[i].outer].step, i + 1,
8190 loops[i].step);
8194 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8195 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8196 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8197 if (region->cont)
8199 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8200 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8201 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8203 else
8204 /* If there isn't a continue then this is a degerate case where
8205 the introduction of abnormal edges during lowering will prevent
8206 original loops from being detected. Fix that up. */
8207 loops_state_set (LOOPS_NEED_FIXUP);
8209 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8210 expand_omp_simd (region, &fd);
8211 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8213 gcc_assert (!inner_stmt && !fd.non_rect);
8214 expand_oacc_for (region, &fd);
8216 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8218 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8219 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8220 else
8221 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8223 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8224 && !fd.have_ordered)
8226 if (fd.chunk_size == NULL)
8227 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8228 else
8229 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8231 else
8233 int fn_index, start_ix, next_ix;
8234 unsigned HOST_WIDE_INT sched = 0;
8235 tree sched_arg = NULL_TREE;
8237 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8238 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8239 if (fd.chunk_size == NULL
8240 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8241 fd.chunk_size = integer_zero_node;
8242 switch (fd.sched_kind)
8244 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8245 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8246 && fd.lastprivate_conditional == 0)
8248 gcc_assert (!fd.have_ordered);
8249 fn_index = 6;
8250 sched = 4;
8252 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8253 && !fd.have_ordered
8254 && fd.lastprivate_conditional == 0)
8255 fn_index = 7;
8256 else
8258 fn_index = 3;
8259 sched = (HOST_WIDE_INT_1U << 31);
8261 break;
8262 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8263 case OMP_CLAUSE_SCHEDULE_GUIDED:
8264 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8265 && !fd.have_ordered
8266 && fd.lastprivate_conditional == 0)
8268 fn_index = 3 + fd.sched_kind;
8269 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8270 break;
8272 fn_index = fd.sched_kind;
8273 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8274 sched += (HOST_WIDE_INT_1U << 31);
8275 break;
8276 case OMP_CLAUSE_SCHEDULE_STATIC:
8277 gcc_assert (fd.have_ordered);
8278 fn_index = 0;
8279 sched = (HOST_WIDE_INT_1U << 31) + 1;
8280 break;
8281 default:
8282 gcc_unreachable ();
8284 if (!fd.ordered)
8285 fn_index += fd.have_ordered * 8;
8286 if (fd.ordered)
8287 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8288 else
8289 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8290 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8291 if (fd.have_reductemp || fd.have_pointer_condtemp)
8293 if (fd.ordered)
8294 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8295 else if (fd.have_ordered)
8296 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8297 else
8298 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8299 sched_arg = build_int_cstu (long_integer_type_node, sched);
8300 if (!fd.chunk_size)
8301 fd.chunk_size = integer_zero_node;
8303 if (fd.iter_type == long_long_unsigned_type_node)
8305 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8306 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8307 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8308 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8310 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8311 (enum built_in_function) next_ix, sched_arg,
8312 inner_stmt);
8316 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8318 v = GOMP_sections_start (n);
8320 switch (v)
8322 case 0:
8323 goto L2;
8324 case 1:
8325 section 1;
8326 goto L1;
8327 case 2:
8329 case n:
8331 default:
8332 abort ();
8335 v = GOMP_sections_next ();
8336 goto L0;
8338 reduction;
8340 If this is a combined parallel sections, replace the call to
8341 GOMP_sections_start with call to GOMP_sections_next. */
8343 static void
8344 expand_omp_sections (struct omp_region *region)
8346 tree t, u, vin = NULL, vmain, vnext, l2;
8347 unsigned len;
8348 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8349 gimple_stmt_iterator si, switch_si;
8350 gomp_sections *sections_stmt;
8351 gimple *stmt;
8352 gomp_continue *cont;
8353 edge_iterator ei;
8354 edge e;
8355 struct omp_region *inner;
8356 unsigned i, casei;
8357 bool exit_reachable = region->cont != NULL;
8359 gcc_assert (region->exit != NULL);
8360 entry_bb = region->entry;
8361 l0_bb = single_succ (entry_bb);
8362 l1_bb = region->cont;
8363 l2_bb = region->exit;
8364 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8365 l2 = gimple_block_label (l2_bb);
8366 else
8368 /* This can happen if there are reductions. */
8369 len = EDGE_COUNT (l0_bb->succs);
8370 gcc_assert (len > 0);
8371 e = EDGE_SUCC (l0_bb, len - 1);
8372 si = gsi_last_nondebug_bb (e->dest);
8373 l2 = NULL_TREE;
8374 if (gsi_end_p (si)
8375 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8376 l2 = gimple_block_label (e->dest);
8377 else
8378 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8380 si = gsi_last_nondebug_bb (e->dest);
8381 if (gsi_end_p (si)
8382 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8384 l2 = gimple_block_label (e->dest);
8385 break;
8389 if (exit_reachable)
8390 default_bb = create_empty_bb (l1_bb->prev_bb);
8391 else
8392 default_bb = create_empty_bb (l0_bb);
8394 /* We will build a switch() with enough cases for all the
8395 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8396 and a default case to abort if something goes wrong. */
8397 len = EDGE_COUNT (l0_bb->succs);
8399 /* Use vec::quick_push on label_vec throughout, since we know the size
8400 in advance. */
8401 auto_vec<tree> label_vec (len);
8403 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8404 GIMPLE_OMP_SECTIONS statement. */
8405 si = gsi_last_nondebug_bb (entry_bb);
8406 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8407 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8408 vin = gimple_omp_sections_control (sections_stmt);
8409 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8410 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8411 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8412 tree cond_var = NULL_TREE;
8413 if (reductmp || condtmp)
8415 tree reductions = null_pointer_node, mem = null_pointer_node;
8416 tree memv = NULL_TREE, condtemp = NULL_TREE;
8417 gimple_stmt_iterator gsi = gsi_none ();
8418 gimple *g = NULL;
8419 if (reductmp)
8421 reductions = OMP_CLAUSE_DECL (reductmp);
8422 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8423 g = SSA_NAME_DEF_STMT (reductions);
8424 reductions = gimple_assign_rhs1 (g);
8425 OMP_CLAUSE_DECL (reductmp) = reductions;
8426 gsi = gsi_for_stmt (g);
8428 else
8429 gsi = si;
8430 if (condtmp)
8432 condtemp = OMP_CLAUSE_DECL (condtmp);
8433 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8434 OMP_CLAUSE__CONDTEMP_);
8435 cond_var = OMP_CLAUSE_DECL (c);
8436 tree type = TREE_TYPE (condtemp);
8437 memv = create_tmp_var (type);
8438 TREE_ADDRESSABLE (memv) = 1;
8439 unsigned cnt = 0;
8440 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8441 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8442 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8443 ++cnt;
8444 unsigned HOST_WIDE_INT sz
8445 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8446 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8447 false);
8448 mem = build_fold_addr_expr (memv);
8450 t = build_int_cst (unsigned_type_node, len - 1);
8451 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8452 stmt = gimple_build_call (u, 3, t, reductions, mem);
8453 gimple_call_set_lhs (stmt, vin);
8454 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8455 if (condtmp)
8457 expand_omp_build_assign (&gsi, condtemp, memv, false);
8458 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8459 vin, build_one_cst (TREE_TYPE (cond_var)));
8460 expand_omp_build_assign (&gsi, cond_var, t, false);
8462 if (reductmp)
8464 gsi_remove (&gsi, true);
8465 release_ssa_name (gimple_assign_lhs (g));
8468 else if (!is_combined_parallel (region))
8470 /* If we are not inside a combined parallel+sections region,
8471 call GOMP_sections_start. */
8472 t = build_int_cst (unsigned_type_node, len - 1);
8473 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8474 stmt = gimple_build_call (u, 1, t);
8476 else
8478 /* Otherwise, call GOMP_sections_next. */
8479 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8480 stmt = gimple_build_call (u, 0);
8482 if (!reductmp && !condtmp)
8484 gimple_call_set_lhs (stmt, vin);
8485 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8487 gsi_remove (&si, true);
8489 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8490 L0_BB. */
8491 switch_si = gsi_last_nondebug_bb (l0_bb);
8492 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8493 if (exit_reachable)
8495 cont = as_a <gomp_continue *> (last_nondebug_stmt (l1_bb));
8496 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8497 vmain = gimple_omp_continue_control_use (cont);
8498 vnext = gimple_omp_continue_control_def (cont);
8500 else
8502 vmain = vin;
8503 vnext = NULL_TREE;
8506 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8507 label_vec.quick_push (t);
8508 i = 1;
8510 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8511 for (inner = region->inner, casei = 1;
8512 inner;
8513 inner = inner->next, i++, casei++)
8515 basic_block s_entry_bb, s_exit_bb;
8517 /* Skip optional reduction region. */
8518 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8520 --i;
8521 --casei;
8522 continue;
8525 s_entry_bb = inner->entry;
8526 s_exit_bb = inner->exit;
8528 t = gimple_block_label (s_entry_bb);
8529 u = build_int_cst (unsigned_type_node, casei);
8530 u = build_case_label (u, NULL, t);
8531 label_vec.quick_push (u);
8533 si = gsi_last_nondebug_bb (s_entry_bb);
8534 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8535 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8536 gsi_remove (&si, true);
8537 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8539 if (s_exit_bb == NULL)
8540 continue;
8542 si = gsi_last_nondebug_bb (s_exit_bb);
8543 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8544 gsi_remove (&si, true);
8546 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8549 /* Error handling code goes in DEFAULT_BB. */
8550 t = gimple_block_label (default_bb);
8551 u = build_case_label (NULL, NULL, t);
8552 make_edge (l0_bb, default_bb, 0);
8553 add_bb_to_loop (default_bb, current_loops->tree_root);
8555 stmt = gimple_build_switch (vmain, u, label_vec);
8556 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8557 gsi_remove (&switch_si, true);
8559 si = gsi_start_bb (default_bb);
8560 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8561 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8563 if (exit_reachable)
8565 tree bfn_decl;
8567 /* Code to get the next section goes in L1_BB. */
8568 si = gsi_last_nondebug_bb (l1_bb);
8569 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8571 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8572 stmt = gimple_build_call (bfn_decl, 0);
8573 gimple_call_set_lhs (stmt, vnext);
8574 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8575 if (cond_var)
8577 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8578 vnext, build_one_cst (TREE_TYPE (cond_var)));
8579 expand_omp_build_assign (&si, cond_var, t, false);
8581 gsi_remove (&si, true);
8583 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8586 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8587 si = gsi_last_nondebug_bb (l2_bb);
8588 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8589 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8590 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8591 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8592 else
8593 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8594 stmt = gimple_build_call (t, 0);
8595 if (gimple_omp_return_lhs (gsi_stmt (si)))
8596 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8597 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8598 gsi_remove (&si, true);
8600 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8603 /* Expand code for an OpenMP single or scope directive. We've already expanded
8604 much of the code, here we simply place the GOMP_barrier call. */
8606 static void
8607 expand_omp_single (struct omp_region *region)
8609 basic_block entry_bb, exit_bb;
8610 gimple_stmt_iterator si;
8612 entry_bb = region->entry;
8613 exit_bb = region->exit;
8615 si = gsi_last_nondebug_bb (entry_bb);
8616 enum gimple_code code = gimple_code (gsi_stmt (si));
8617 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8618 gsi_remove (&si, true);
8619 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8621 if (exit_bb == NULL)
8623 gcc_assert (code == GIMPLE_OMP_SCOPE);
8624 return;
8627 si = gsi_last_nondebug_bb (exit_bb);
8628 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8630 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8631 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8633 gsi_remove (&si, true);
8634 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8637 /* Generic expansion for OpenMP synchronization directives: master,
8638 ordered and critical. All we need to do here is remove the entry
8639 and exit markers for REGION. */
8641 static void
8642 expand_omp_synch (struct omp_region *region)
8644 basic_block entry_bb, exit_bb;
8645 gimple_stmt_iterator si;
8647 entry_bb = region->entry;
8648 exit_bb = region->exit;
8650 si = gsi_last_nondebug_bb (entry_bb);
8651 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8652 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8653 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8654 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8655 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8656 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8657 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8658 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8659 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8661 expand_omp_taskreg (region);
8662 return;
8664 gsi_remove (&si, true);
8665 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8667 if (exit_bb)
8669 si = gsi_last_nondebug_bb (exit_bb);
8670 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8671 gsi_remove (&si, true);
8672 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8676 /* Translate enum omp_memory_order to enum memmodel for the embedded
8677 fail clause in there. */
8679 static enum memmodel
8680 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8682 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8684 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8685 switch (mo & OMP_MEMORY_ORDER_MASK)
8687 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8688 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8689 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8690 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8691 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8692 default: break;
8694 gcc_unreachable ();
8695 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8696 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8697 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8698 default: gcc_unreachable ();
8702 /* Translate enum omp_memory_order to enum memmodel. The two enums
8703 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8704 is 0 and omp_memory_order has the fail mode encoded in it too. */
8706 static enum memmodel
8707 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8709 enum memmodel ret, fail_ret;
8710 switch (mo & OMP_MEMORY_ORDER_MASK)
8712 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8713 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8714 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8715 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8716 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8717 default: gcc_unreachable ();
8719 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8720 we can just return ret here unconditionally. Otherwise, work around
8721 it here and make sure fail memmodel is not stronger. */
8722 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8723 return ret;
8724 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8725 if (fail_ret > ret)
8726 return fail_ret;
8727 return ret;
8730 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8731 operation as a normal volatile load. */
8733 static bool
8734 expand_omp_atomic_load (basic_block load_bb, tree addr,
8735 tree loaded_val, int index)
8737 enum built_in_function tmpbase;
8738 gimple_stmt_iterator gsi;
8739 basic_block store_bb;
8740 location_t loc;
8741 gimple *stmt;
8742 tree decl, type, itype;
8744 gsi = gsi_last_nondebug_bb (load_bb);
8745 stmt = gsi_stmt (gsi);
8746 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8747 loc = gimple_location (stmt);
8749 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8750 is smaller than word size, then expand_atomic_load assumes that the load
8751 is atomic. We could avoid the builtin entirely in this case. */
8753 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8754 decl = builtin_decl_explicit (tmpbase);
8755 if (decl == NULL_TREE)
8756 return false;
8758 type = TREE_TYPE (loaded_val);
8759 itype = TREE_TYPE (TREE_TYPE (decl));
8761 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8762 tree mo = build_int_cst (integer_type_node,
8763 omp_memory_order_to_memmodel (omo));
8764 gcall *call = gimple_build_call (decl, 2, addr, mo);
8765 gimple_set_location (call, loc);
8766 gimple_set_vuse (call, gimple_vuse (stmt));
8767 gimple *repl;
8768 if (!useless_type_conversion_p (type, itype))
8770 tree lhs = make_ssa_name (itype);
8771 gimple_call_set_lhs (call, lhs);
8772 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8773 repl = gimple_build_assign (loaded_val,
8774 build1 (VIEW_CONVERT_EXPR, type, lhs));
8775 gimple_set_location (repl, loc);
8777 else
8779 gimple_call_set_lhs (call, loaded_val);
8780 repl = call;
8782 gsi_replace (&gsi, repl, true);
8784 store_bb = single_succ (load_bb);
8785 gsi = gsi_last_nondebug_bb (store_bb);
8786 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8787 gsi_remove (&gsi, true);
8789 return true;
8792 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8793 operation as a normal volatile store. */
8795 static bool
8796 expand_omp_atomic_store (basic_block load_bb, tree addr,
8797 tree loaded_val, tree stored_val, int index)
8799 enum built_in_function tmpbase;
8800 gimple_stmt_iterator gsi;
8801 basic_block store_bb = single_succ (load_bb);
8802 location_t loc;
8803 gimple *stmt;
8804 tree decl, type, itype;
8805 machine_mode imode;
8806 bool exchange;
8808 gsi = gsi_last_nondebug_bb (load_bb);
8809 stmt = gsi_stmt (gsi);
8810 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8812 /* If the load value is needed, then this isn't a store but an exchange. */
8813 exchange = gimple_omp_atomic_need_value_p (stmt);
8815 gsi = gsi_last_nondebug_bb (store_bb);
8816 stmt = gsi_stmt (gsi);
8817 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8818 loc = gimple_location (stmt);
8820 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8821 is smaller than word size, then expand_atomic_store assumes that the store
8822 is atomic. We could avoid the builtin entirely in this case. */
8824 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8825 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8826 decl = builtin_decl_explicit (tmpbase);
8827 if (decl == NULL_TREE)
8828 return false;
8830 type = TREE_TYPE (stored_val);
8832 /* Dig out the type of the function's second argument. */
8833 itype = TREE_TYPE (decl);
8834 itype = TYPE_ARG_TYPES (itype);
8835 itype = TREE_CHAIN (itype);
8836 itype = TREE_VALUE (itype);
8837 imode = TYPE_MODE (itype);
8839 if (exchange && !can_atomic_exchange_p (imode, true))
8840 return false;
8842 if (!useless_type_conversion_p (itype, type))
8843 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8844 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8845 tree mo = build_int_cst (integer_type_node,
8846 omp_memory_order_to_memmodel (omo));
8847 stored_val = force_gimple_operand_gsi (&gsi, stored_val, true, NULL_TREE,
8848 true, GSI_SAME_STMT);
8849 gcall *call = gimple_build_call (decl, 3, addr, stored_val, mo);
8850 gimple_set_location (call, loc);
8851 gimple_set_vuse (call, gimple_vuse (stmt));
8852 gimple_set_vdef (call, gimple_vdef (stmt));
8854 gimple *repl = call;
8855 if (exchange)
8857 if (!useless_type_conversion_p (type, itype))
8859 tree lhs = make_ssa_name (itype);
8860 gimple_call_set_lhs (call, lhs);
8861 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8862 repl = gimple_build_assign (loaded_val,
8863 build1 (VIEW_CONVERT_EXPR, type, lhs));
8864 gimple_set_location (repl, loc);
8866 else
8867 gimple_call_set_lhs (call, loaded_val);
8869 gsi_replace (&gsi, repl, true);
8871 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8872 gsi = gsi_last_nondebug_bb (load_bb);
8873 gsi_remove (&gsi, true);
8875 return true;
8878 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8879 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8880 size of the data type, and thus usable to find the index of the builtin
8881 decl. Returns false if the expression is not of the proper form. */
8883 static bool
8884 expand_omp_atomic_fetch_op (basic_block load_bb,
8885 tree addr, tree loaded_val,
8886 tree stored_val, int index)
8888 enum built_in_function oldbase, newbase, tmpbase;
8889 tree decl, itype, call;
8890 tree lhs, rhs;
8891 basic_block store_bb = single_succ (load_bb);
8892 gimple_stmt_iterator gsi;
8893 gimple *stmt;
8894 location_t loc;
8895 enum tree_code code;
8896 bool need_old, need_new;
8897 machine_mode imode;
8899 /* We expect to find the following sequences:
8901 load_bb:
8902 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8904 store_bb:
8905 val = tmp OP something; (or: something OP tmp)
8906 GIMPLE_OMP_STORE (val)
8908 ???FIXME: Allow a more flexible sequence.
8909 Perhaps use data flow to pick the statements.
8913 gsi = gsi_after_labels (store_bb);
8914 stmt = gsi_stmt (gsi);
8915 if (is_gimple_debug (stmt))
8917 gsi_next_nondebug (&gsi);
8918 if (gsi_end_p (gsi))
8919 return false;
8920 stmt = gsi_stmt (gsi);
8922 loc = gimple_location (stmt);
8923 if (!is_gimple_assign (stmt))
8924 return false;
8925 gsi_next_nondebug (&gsi);
8926 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8927 return false;
8928 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8929 need_old = gimple_omp_atomic_need_value_p (last_nondebug_stmt (load_bb));
8930 enum omp_memory_order omo
8931 = gimple_omp_atomic_memory_order (last_nondebug_stmt (load_bb));
8932 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8933 gcc_checking_assert (!need_old || !need_new);
8935 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8936 return false;
8938 /* Check for one of the supported fetch-op operations. */
8939 code = gimple_assign_rhs_code (stmt);
8940 switch (code)
8942 case PLUS_EXPR:
8943 case POINTER_PLUS_EXPR:
8944 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8945 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8946 break;
8947 case MINUS_EXPR:
8948 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8949 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8950 break;
8951 case BIT_AND_EXPR:
8952 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8953 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8954 break;
8955 case BIT_IOR_EXPR:
8956 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8957 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8958 break;
8959 case BIT_XOR_EXPR:
8960 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8961 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8962 break;
8963 default:
8964 return false;
8967 /* Make sure the expression is of the proper form. */
8968 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8969 rhs = gimple_assign_rhs2 (stmt);
8970 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8971 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8972 rhs = gimple_assign_rhs1 (stmt);
8973 else
8974 return false;
8976 tmpbase = ((enum built_in_function)
8977 ((need_new ? newbase : oldbase) + index + 1));
8978 decl = builtin_decl_explicit (tmpbase);
8979 if (decl == NULL_TREE)
8980 return false;
8981 itype = TREE_TYPE (TREE_TYPE (decl));
8982 imode = TYPE_MODE (itype);
8984 /* We could test all of the various optabs involved, but the fact of the
8985 matter is that (with the exception of i486 vs i586 and xadd) all targets
8986 that support any atomic operaton optab also implements compare-and-swap.
8987 Let optabs.cc take care of expanding any compare-and-swap loop. */
8988 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8989 return false;
8991 gsi = gsi_last_nondebug_bb (load_bb);
8992 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8994 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8995 It only requires that the operation happen atomically. Thus we can
8996 use the RELAXED memory model. */
8997 call = build_call_expr_loc (loc, decl, 3, addr,
8998 fold_convert_loc (loc, itype, rhs),
8999 build_int_cst (NULL, mo));
9001 if (need_old || need_new)
9003 lhs = need_old ? loaded_val : stored_val;
9004 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
9005 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
9007 else
9008 call = fold_convert_loc (loc, void_type_node, call);
9009 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
9010 gsi_remove (&gsi, true);
9012 gsi = gsi_last_nondebug_bb (store_bb);
9013 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
9014 gsi_remove (&gsi, true);
9015 gsi = gsi_last_nondebug_bb (store_bb);
9016 stmt = gsi_stmt (gsi);
9017 gsi_remove (&gsi, true);
9019 if (gimple_in_ssa_p (cfun))
9020 release_defs (stmt);
9022 return true;
9025 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
9026 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
9027 Returns false if the expression is not of the proper form. */
9029 static bool
9030 expand_omp_atomic_cas (basic_block load_bb, tree addr,
9031 tree loaded_val, tree stored_val, int index)
9033 /* We expect to find the following sequences:
9035 load_bb:
9036 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
9038 store_bb:
9039 val = tmp == e ? d : tmp;
9040 GIMPLE_OMP_ATOMIC_STORE (val)
9042 or in store_bb instead:
9043 tmp2 = tmp == e;
9044 val = tmp2 ? d : tmp;
9045 GIMPLE_OMP_ATOMIC_STORE (val)
9048 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
9049 val = e == tmp3 ? d : tmp;
9050 GIMPLE_OMP_ATOMIC_STORE (val)
9052 etc. */
9055 basic_block store_bb = single_succ (load_bb);
9056 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
9057 gimple *store_stmt = gsi_stmt (gsi);
9058 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
9059 return false;
9060 gsi_prev_nondebug (&gsi);
9061 if (gsi_end_p (gsi))
9062 return false;
9063 gimple *condexpr_stmt = gsi_stmt (gsi);
9064 if (!is_gimple_assign (condexpr_stmt)
9065 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
9066 return false;
9067 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
9068 return false;
9069 gimple *cond_stmt = NULL;
9070 gimple *vce_stmt = NULL;
9071 gsi_prev_nondebug (&gsi);
9072 if (!gsi_end_p (gsi))
9074 cond_stmt = gsi_stmt (gsi);
9075 if (!is_gimple_assign (cond_stmt))
9076 return false;
9077 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
9079 gsi_prev_nondebug (&gsi);
9080 if (!gsi_end_p (gsi))
9082 vce_stmt = gsi_stmt (gsi);
9083 if (!is_gimple_assign (vce_stmt)
9084 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
9085 return false;
9088 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
9089 std::swap (vce_stmt, cond_stmt);
9090 else
9091 return false;
9092 if (vce_stmt)
9094 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
9095 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
9096 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
9097 return false;
9098 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
9099 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
9100 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
9101 TYPE_SIZE (TREE_TYPE (loaded_val))))
9102 return false;
9103 gsi_prev_nondebug (&gsi);
9104 if (!gsi_end_p (gsi))
9105 return false;
9108 tree cond = gimple_assign_rhs1 (condexpr_stmt);
9109 tree cond_op1, cond_op2;
9110 if (cond_stmt)
9112 /* We should now always get a separate cond_stmt. */
9113 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9114 return false;
9115 cond_op1 = gimple_assign_rhs1 (cond_stmt);
9116 cond_op2 = gimple_assign_rhs2 (cond_stmt);
9118 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9119 return false;
9120 else
9122 cond_op1 = TREE_OPERAND (cond, 0);
9123 cond_op2 = TREE_OPERAND (cond, 1);
9125 tree d;
9126 if (TREE_CODE (cond) == NE_EXPR)
9128 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9129 return false;
9130 d = gimple_assign_rhs3 (condexpr_stmt);
9132 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9133 return false;
9134 else
9135 d = gimple_assign_rhs2 (condexpr_stmt);
9136 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9137 if (operand_equal_p (e, cond_op1))
9138 e = cond_op2;
9139 else if (operand_equal_p (e, cond_op2))
9140 e = cond_op1;
9141 else
9142 return false;
9144 location_t loc = gimple_location (store_stmt);
9145 gimple *load_stmt = last_nondebug_stmt (load_bb);
9146 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9147 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9148 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9149 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9150 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9151 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9152 gcc_checking_assert (!need_old || !need_new);
9154 enum built_in_function fncode
9155 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9156 + index + 1);
9157 tree cmpxchg = builtin_decl_explicit (fncode);
9158 if (cmpxchg == NULL_TREE)
9159 return false;
9160 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9162 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9163 || !can_atomic_load_p (TYPE_MODE (itype)))
9164 return false;
9166 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9167 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9168 return false;
9170 gsi = gsi_for_stmt (store_stmt);
9171 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9173 tree ne = create_tmp_reg (itype);
9174 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9175 gimple_set_location (g, loc);
9176 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9177 e = ne;
9179 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9181 tree nd = create_tmp_reg (itype);
9182 enum tree_code code;
9183 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9185 code = VIEW_CONVERT_EXPR;
9186 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9188 else
9189 code = NOP_EXPR;
9190 gimple *g = gimple_build_assign (nd, code, d);
9191 gimple_set_location (g, loc);
9192 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9193 d = nd;
9196 tree ctype = build_complex_type (itype);
9197 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9198 gimple *g
9199 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9200 build_int_cst (integer_type_node, flag),
9201 mo, fmo);
9202 tree cres = create_tmp_reg (ctype);
9203 gimple_call_set_lhs (g, cres);
9204 gimple_set_location (g, loc);
9205 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9207 if (cond_stmt || need_old || need_new)
9209 tree im = create_tmp_reg (itype);
9210 g = gimple_build_assign (im, IMAGPART_EXPR,
9211 build1 (IMAGPART_EXPR, itype, cres));
9212 gimple_set_location (g, loc);
9213 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9215 tree re = NULL_TREE;
9216 if (need_old || need_new)
9218 re = create_tmp_reg (itype);
9219 g = gimple_build_assign (re, REALPART_EXPR,
9220 build1 (REALPART_EXPR, itype, cres));
9221 gimple_set_location (g, loc);
9222 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9225 if (cond_stmt)
9227 g = gimple_build_assign (cond, NOP_EXPR, im);
9228 gimple_set_location (g, loc);
9229 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9232 if (need_new)
9234 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9235 cond_stmt
9236 ? cond : build2 (NE_EXPR, boolean_type_node,
9237 im, build_zero_cst (itype)),
9238 d, re);
9239 gimple_set_location (g, loc);
9240 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9241 re = gimple_assign_lhs (g);
9244 if (need_old || need_new)
9246 tree v = need_old ? loaded_val : stored_val;
9247 enum tree_code code;
9248 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9250 code = VIEW_CONVERT_EXPR;
9251 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9253 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9254 code = NOP_EXPR;
9255 else
9256 code = TREE_CODE (re);
9257 g = gimple_build_assign (v, code, re);
9258 gimple_set_location (g, loc);
9259 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9263 gsi_remove (&gsi, true);
9264 gsi = gsi_for_stmt (load_stmt);
9265 gsi_remove (&gsi, true);
9266 gsi = gsi_for_stmt (condexpr_stmt);
9267 gsi_remove (&gsi, true);
9268 if (cond_stmt)
9270 gsi = gsi_for_stmt (cond_stmt);
9271 gsi_remove (&gsi, true);
9273 if (vce_stmt)
9275 gsi = gsi_for_stmt (vce_stmt);
9276 gsi_remove (&gsi, true);
9279 return true;
9282 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9284 oldval = *addr;
9285 repeat:
9286 newval = rhs; // with oldval replacing *addr in rhs
9287 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9288 if (oldval != newval)
9289 goto repeat;
9291 INDEX is log2 of the size of the data type, and thus usable to find the
9292 index of the builtin decl. */
9294 static bool
9295 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9296 tree addr, tree loaded_val, tree stored_val,
9297 int index)
9299 tree loadedi, storedi, initial, new_storedi, old_vali;
9300 tree type, itype, cmpxchg, iaddr, atype;
9301 gimple_stmt_iterator si;
9302 basic_block loop_header = single_succ (load_bb);
9303 gimple *phi, *stmt;
9304 edge e;
9305 enum built_in_function fncode;
9307 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9308 + index + 1);
9309 cmpxchg = builtin_decl_explicit (fncode);
9310 if (cmpxchg == NULL_TREE)
9311 return false;
9312 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9313 atype = type;
9314 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9316 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9317 || !can_atomic_load_p (TYPE_MODE (itype)))
9318 return false;
9320 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9321 si = gsi_last_nondebug_bb (load_bb);
9322 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9323 location_t loc = gimple_location (gsi_stmt (si));
9324 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9325 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9326 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9328 /* For floating-point values, we'll need to view-convert them to integers
9329 so that we can perform the atomic compare and swap. Simplify the
9330 following code by always setting up the "i"ntegral variables. */
9331 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9333 tree iaddr_val;
9335 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9336 true));
9337 atype = itype;
9338 iaddr_val
9339 = force_gimple_operand_gsi (&si,
9340 fold_convert (TREE_TYPE (iaddr), addr),
9341 false, NULL_TREE, true, GSI_SAME_STMT);
9342 stmt = gimple_build_assign (iaddr, iaddr_val);
9343 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9344 loadedi = create_tmp_var (itype);
9345 if (gimple_in_ssa_p (cfun))
9346 loadedi = make_ssa_name (loadedi);
9348 else
9350 iaddr = addr;
9351 loadedi = loaded_val;
9354 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9355 tree loaddecl = builtin_decl_explicit (fncode);
9356 if (loaddecl)
9357 initial
9358 = fold_convert (atype,
9359 build_call_expr (loaddecl, 2, iaddr,
9360 build_int_cst (NULL_TREE,
9361 MEMMODEL_RELAXED)));
9362 else
9364 tree off
9365 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9366 true), 0);
9367 initial = build2 (MEM_REF, atype, iaddr, off);
9370 initial
9371 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9372 GSI_SAME_STMT);
9374 /* Move the value to the LOADEDI temporary. */
9375 if (gimple_in_ssa_p (cfun))
9377 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9378 phi = create_phi_node (loadedi, loop_header);
9379 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9380 initial);
9382 else
9383 gsi_insert_before (&si,
9384 gimple_build_assign (loadedi, initial),
9385 GSI_SAME_STMT);
9386 if (loadedi != loaded_val)
9388 gimple_stmt_iterator gsi2;
9389 tree x;
9391 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9392 gsi2 = gsi_start_bb (loop_header);
9393 if (gimple_in_ssa_p (cfun))
9395 gassign *stmt;
9396 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9397 true, GSI_SAME_STMT);
9398 stmt = gimple_build_assign (loaded_val, x);
9399 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9401 else
9403 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9404 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9405 true, GSI_SAME_STMT);
9408 gsi_remove (&si, true);
9410 si = gsi_last_nondebug_bb (store_bb);
9411 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9413 if (iaddr == addr)
9414 storedi = stored_val;
9415 else
9416 storedi
9417 = force_gimple_operand_gsi (&si,
9418 build1 (VIEW_CONVERT_EXPR, itype,
9419 stored_val), true, NULL_TREE, true,
9420 GSI_SAME_STMT);
9422 /* Build the compare&swap statement. */
9423 tree ctype = build_complex_type (itype);
9424 int flag = int_size_in_bytes (itype);
9425 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9426 ctype, 6, iaddr, loadedi,
9427 storedi,
9428 build_int_cst (integer_type_node,
9429 flag),
9430 mo, fmo);
9431 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9432 new_storedi = force_gimple_operand_gsi (&si,
9433 fold_convert (TREE_TYPE (loadedi),
9434 new_storedi),
9435 true, NULL_TREE,
9436 true, GSI_SAME_STMT);
9438 if (gimple_in_ssa_p (cfun))
9439 old_vali = loadedi;
9440 else
9442 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9443 stmt = gimple_build_assign (old_vali, loadedi);
9444 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9446 stmt = gimple_build_assign (loadedi, new_storedi);
9447 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9450 /* Note that we always perform the comparison as an integer, even for
9451 floating point. This allows the atomic operation to properly
9452 succeed even with NaNs and -0.0. */
9453 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9454 stmt = gimple_build_cond_empty (ne);
9455 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9457 /* Update cfg. */
9458 e = single_succ_edge (store_bb);
9459 e->flags &= ~EDGE_FALLTHRU;
9460 e->flags |= EDGE_FALSE_VALUE;
9461 /* Expect no looping. */
9462 e->probability = profile_probability::guessed_always ();
9464 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9465 e->probability = profile_probability::guessed_never ();
9467 /* Copy the new value to loadedi (we already did that before the condition
9468 if we are not in SSA). */
9469 if (gimple_in_ssa_p (cfun))
9471 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9472 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9475 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9476 stmt = gsi_stmt (si);
9477 gsi_remove (&si, true);
9478 if (gimple_in_ssa_p (cfun))
9479 release_defs (stmt);
9481 class loop *loop = alloc_loop ();
9482 loop->header = loop_header;
9483 loop->latch = store_bb;
9484 add_loop (loop, loop_header->loop_father);
9486 return true;
9489 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9491 GOMP_atomic_start ();
9492 *addr = rhs;
9493 GOMP_atomic_end ();
9495 The result is not globally atomic, but works so long as all parallel
9496 references are within #pragma omp atomic directives. According to
9497 responses received from omp@openmp.org, appears to be within spec.
9498 Which makes sense, since that's how several other compilers handle
9499 this situation as well.
9500 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9501 expanding. STORED_VAL is the operand of the matching
9502 GIMPLE_OMP_ATOMIC_STORE.
9504 We replace
9505 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9506 loaded_val = *addr;
9508 and replace
9509 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9510 *addr = stored_val;
9513 static bool
9514 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9515 tree addr, tree loaded_val, tree stored_val)
9517 gimple_stmt_iterator si;
9518 gassign *stmt;
9519 tree t;
9521 si = gsi_last_nondebug_bb (load_bb);
9522 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9524 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9525 t = build_call_expr (t, 0);
9526 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9528 tree mem = build_simple_mem_ref (addr);
9529 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9530 TREE_OPERAND (mem, 1)
9531 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9532 true),
9533 TREE_OPERAND (mem, 1));
9534 stmt = gimple_build_assign (loaded_val, mem);
9535 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9536 gsi_remove (&si, true);
9538 si = gsi_last_nondebug_bb (store_bb);
9539 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9541 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9542 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (si)));
9543 gimple_set_vdef (stmt, gimple_vdef (gsi_stmt (si)));
9544 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9546 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9547 t = build_call_expr (t, 0);
9548 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9549 gsi_remove (&si, true);
9550 return true;
9553 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9554 using expand_omp_atomic_fetch_op. If it failed, we try to
9555 call expand_omp_atomic_pipeline, and if it fails too, the
9556 ultimate fallback is wrapping the operation in a mutex
9557 (expand_omp_atomic_mutex). REGION is the atomic region built
9558 by build_omp_regions_1(). */
9560 static void
9561 expand_omp_atomic (struct omp_region *region)
9563 basic_block load_bb = region->entry, store_bb = region->exit;
9564 gomp_atomic_load *load
9565 = as_a <gomp_atomic_load *> (last_nondebug_stmt (load_bb));
9566 gomp_atomic_store *store
9567 = as_a <gomp_atomic_store *> (last_nondebug_stmt (store_bb));
9568 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9569 tree addr = gimple_omp_atomic_load_rhs (load);
9570 tree stored_val = gimple_omp_atomic_store_val (store);
9571 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9572 HOST_WIDE_INT index;
9574 /* Make sure the type is one of the supported sizes. */
9575 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9576 index = exact_log2 (index);
9577 if (index >= 0 && index <= 4)
9579 unsigned int align = TYPE_ALIGN_UNIT (type);
9581 /* __sync builtins require strict data alignment. */
9582 if (exact_log2 (align) >= index)
9584 /* Atomic load. */
9585 scalar_mode smode;
9586 if (loaded_val == stored_val
9587 && (is_int_mode (TYPE_MODE (type), &smode)
9588 || is_float_mode (TYPE_MODE (type), &smode))
9589 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9590 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9591 return;
9593 /* Atomic store. */
9594 if ((is_int_mode (TYPE_MODE (type), &smode)
9595 || is_float_mode (TYPE_MODE (type), &smode))
9596 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9597 && store_bb == single_succ (load_bb)
9598 && first_stmt (store_bb) == store
9599 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9600 stored_val, index))
9601 return;
9603 /* When possible, use specialized atomic update functions. */
9604 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9605 && store_bb == single_succ (load_bb)
9606 && expand_omp_atomic_fetch_op (load_bb, addr,
9607 loaded_val, stored_val, index))
9608 return;
9610 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9611 if (store_bb == single_succ (load_bb)
9612 && !gimple_in_ssa_p (cfun)
9613 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9614 index))
9615 return;
9617 /* If we don't have specialized __sync builtins, try and implement
9618 as a compare and swap loop. */
9619 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9620 loaded_val, stored_val, index))
9621 return;
9625 /* The ultimate fallback is wrapping the operation in a mutex. */
9626 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9629 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9630 at REGION_EXIT. */
9632 static void
9633 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9634 basic_block region_exit)
9636 class loop *outer = region_entry->loop_father;
9637 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9639 /* Don't parallelize the kernels region if it contains more than one outer
9640 loop. */
9641 unsigned int nr_outer_loops = 0;
9642 class loop *single_outer = NULL;
9643 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9645 gcc_assert (loop_outer (loop) == outer);
9647 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9648 continue;
9650 if (region_exit != NULL
9651 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9652 continue;
9654 nr_outer_loops++;
9655 single_outer = loop;
9657 if (nr_outer_loops != 1)
9658 return;
9660 for (class loop *loop = single_outer->inner;
9661 loop != NULL;
9662 loop = loop->inner)
9663 if (loop->next)
9664 return;
9666 /* Mark the loops in the region. */
9667 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9668 loop->in_oacc_kernels_region = true;
9671 /* Build target argument identifier from the DEVICE identifier, value
9672 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9674 static tree
9675 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9677 tree t = build_int_cst (integer_type_node, device);
9678 if (subseqent_param)
9679 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9680 build_int_cst (integer_type_node,
9681 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9682 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9683 build_int_cst (integer_type_node, id));
9684 return t;
9687 /* Like above but return it in type that can be directly stored as an element
9688 of the argument array. */
9690 static tree
9691 get_target_argument_identifier (int device, bool subseqent_param, int id)
9693 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9694 return fold_convert (ptr_type_node, t);
9697 /* Return a target argument consisting of DEVICE identifier, value identifier
9698 ID, and the actual VALUE. */
9700 static tree
9701 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9702 tree value)
9704 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9705 fold_convert (integer_type_node, value),
9706 build_int_cst (unsigned_type_node,
9707 GOMP_TARGET_ARG_VALUE_SHIFT));
9708 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9709 get_target_argument_identifier_1 (device, false, id));
9710 t = fold_convert (ptr_type_node, t);
9711 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9714 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9715 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9716 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9717 arguments. */
9719 static void
9720 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9721 int id, tree value, vec <tree> *args)
9723 if (tree_fits_shwi_p (value)
9724 && tree_to_shwi (value) > -(1 << 15)
9725 && tree_to_shwi (value) < (1 << 15))
9726 args->quick_push (get_target_argument_value (gsi, device, id, value));
9727 else
9729 args->quick_push (get_target_argument_identifier (device, true, id));
9730 value = fold_convert (ptr_type_node, value);
9731 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9732 GSI_SAME_STMT);
9733 args->quick_push (value);
9737 /* Create an array of arguments that is then passed to GOMP_target. */
9739 static tree
9740 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9742 auto_vec <tree, 6> args;
9743 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9744 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9745 if (c)
9746 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9747 else
9748 t = integer_minus_one_node;
9749 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9750 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9752 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9753 if (c)
9754 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9755 else
9756 t = integer_minus_one_node;
9757 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9758 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9759 &args);
9761 /* Produce more, perhaps device specific, arguments here. */
9763 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9764 args.length () + 1),
9765 ".omp_target_args");
9766 for (unsigned i = 0; i < args.length (); i++)
9768 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9769 build_int_cst (integer_type_node, i),
9770 NULL_TREE, NULL_TREE);
9771 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9772 GSI_SAME_STMT);
9774 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9775 build_int_cst (integer_type_node, args.length ()),
9776 NULL_TREE, NULL_TREE);
9777 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9778 GSI_SAME_STMT);
9779 TREE_ADDRESSABLE (argarray) = 1;
9780 return build_fold_addr_expr (argarray);
9783 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9785 static void
9786 expand_omp_target (struct omp_region *region)
9788 basic_block entry_bb, exit_bb, new_bb;
9789 struct function *child_cfun;
9790 tree child_fn, child_fn2, block, t, c;
9791 gimple_stmt_iterator gsi;
9792 gomp_target *entry_stmt;
9793 gimple *stmt;
9794 edge e;
9795 bool offloaded;
9796 int target_kind;
9798 entry_stmt = as_a <gomp_target *> (last_nondebug_stmt (region->entry));
9799 target_kind = gimple_omp_target_kind (entry_stmt);
9800 new_bb = region->entry;
9802 offloaded = is_gimple_omp_offloaded (entry_stmt);
9803 switch (target_kind)
9805 case GF_OMP_TARGET_KIND_REGION:
9806 case GF_OMP_TARGET_KIND_UPDATE:
9807 case GF_OMP_TARGET_KIND_ENTER_DATA:
9808 case GF_OMP_TARGET_KIND_EXIT_DATA:
9809 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9810 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9811 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9812 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9813 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9814 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9815 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9816 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9817 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9818 case GF_OMP_TARGET_KIND_DATA:
9819 case GF_OMP_TARGET_KIND_OACC_DATA:
9820 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9821 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9822 break;
9823 default:
9824 gcc_unreachable ();
9827 tree clauses = gimple_omp_target_clauses (entry_stmt);
9829 bool is_ancestor = false;
9830 child_fn = child_fn2 = NULL_TREE;
9831 child_cfun = NULL;
9832 if (offloaded)
9834 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9835 if (ENABLE_OFFLOADING && c)
9836 is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
9837 child_fn = gimple_omp_target_child_fn (entry_stmt);
9838 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9841 /* Supported by expand_omp_taskreg, but not here. */
9842 if (child_cfun != NULL)
9843 gcc_checking_assert (!child_cfun->cfg);
9844 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9846 entry_bb = region->entry;
9847 exit_bb = region->exit;
9849 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9850 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9852 /* Going on, all OpenACC compute constructs are mapped to
9853 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9854 To distinguish between them, we attach attributes. */
9855 switch (target_kind)
9857 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9858 DECL_ATTRIBUTES (child_fn)
9859 = tree_cons (get_identifier ("oacc parallel"),
9860 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9861 break;
9862 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9863 DECL_ATTRIBUTES (child_fn)
9864 = tree_cons (get_identifier ("oacc kernels"),
9865 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9866 break;
9867 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9868 DECL_ATTRIBUTES (child_fn)
9869 = tree_cons (get_identifier ("oacc serial"),
9870 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9871 break;
9872 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9873 DECL_ATTRIBUTES (child_fn)
9874 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9875 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9876 break;
9877 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9878 DECL_ATTRIBUTES (child_fn)
9879 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9880 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9881 break;
9882 default:
9883 /* Make sure we don't miss any. */
9884 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9885 && is_gimple_omp_offloaded (entry_stmt)));
9886 break;
9889 if (offloaded)
9891 unsigned srcidx, dstidx, num;
9893 /* If the offloading region needs data sent from the parent
9894 function, then the very first statement (except possible
9895 tree profile counter updates) of the offloading body
9896 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9897 &.OMP_DATA_O is passed as an argument to the child function,
9898 we need to replace it with the argument as seen by the child
9899 function.
9901 In most cases, this will end up being the identity assignment
9902 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9903 a function call that has been inlined, the original PARM_DECL
9904 .OMP_DATA_I may have been converted into a different local
9905 variable. In which case, we need to keep the assignment. */
9906 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9907 if (data_arg)
9909 basic_block entry_succ_bb = single_succ (entry_bb);
9910 gimple_stmt_iterator gsi;
9911 tree arg;
9912 gimple *tgtcopy_stmt = NULL;
9913 tree sender = TREE_VEC_ELT (data_arg, 0);
9915 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9917 gcc_assert (!gsi_end_p (gsi));
9918 stmt = gsi_stmt (gsi);
9919 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9920 continue;
9922 if (gimple_num_ops (stmt) == 2)
9924 tree arg = gimple_assign_rhs1 (stmt);
9926 /* We're ignoring the subcode because we're
9927 effectively doing a STRIP_NOPS. */
9929 if (TREE_CODE (arg) == ADDR_EXPR
9930 && TREE_OPERAND (arg, 0) == sender)
9932 tgtcopy_stmt = stmt;
9933 break;
9938 gcc_assert (tgtcopy_stmt != NULL);
9939 arg = DECL_ARGUMENTS (child_fn);
9941 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9942 gsi_remove (&gsi, true);
9945 /* Declare local variables needed in CHILD_CFUN. */
9946 block = DECL_INITIAL (child_fn);
9947 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9948 /* The gimplifier could record temporaries in the offloading block
9949 rather than in containing function's local_decls chain,
9950 which would mean cgraph missed finalizing them. Do it now. */
9951 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9952 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9953 varpool_node::finalize_decl (t);
9954 DECL_SAVED_TREE (child_fn) = NULL;
9955 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9956 gimple_set_body (child_fn, NULL);
9957 TREE_USED (block) = 1;
9959 /* Reset DECL_CONTEXT on function arguments. */
9960 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9961 DECL_CONTEXT (t) = child_fn;
9963 /* Split ENTRY_BB at GIMPLE_*,
9964 so that it can be moved to the child function. */
9965 gsi = gsi_last_nondebug_bb (entry_bb);
9966 stmt = gsi_stmt (gsi);
9967 gcc_assert (stmt
9968 && gimple_code (stmt) == gimple_code (entry_stmt));
9969 e = split_block (entry_bb, stmt);
9970 gsi_remove (&gsi, true);
9971 entry_bb = e->dest;
9972 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9974 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9975 if (exit_bb)
9977 gsi = gsi_last_nondebug_bb (exit_bb);
9978 gcc_assert (!gsi_end_p (gsi)
9979 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9980 stmt = gimple_build_return (NULL);
9981 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9982 gsi_remove (&gsi, true);
9985 /* Move the offloading region into CHILD_CFUN. */
9987 block = gimple_block (entry_stmt);
9989 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9990 if (exit_bb)
9991 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9992 /* When the OMP expansion process cannot guarantee an up-to-date
9993 loop tree arrange for the child function to fixup loops. */
9994 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9995 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9997 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9998 num = vec_safe_length (child_cfun->local_decls);
9999 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
10001 t = (*child_cfun->local_decls)[srcidx];
10002 if (DECL_CONTEXT (t) == cfun->decl)
10003 continue;
10004 if (srcidx != dstidx)
10005 (*child_cfun->local_decls)[dstidx] = t;
10006 dstidx++;
10008 if (dstidx != num)
10009 vec_safe_truncate (child_cfun->local_decls, dstidx);
10011 /* Inform the callgraph about the new function. */
10012 child_cfun->curr_properties = cfun->curr_properties;
10013 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
10014 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
10015 cgraph_node *node = cgraph_node::get_create (child_fn);
10016 node->parallelized_function = 1;
10017 cgraph_node::add_new_function (child_fn, true);
10019 /* Add the new function to the offload table. */
10020 if (ENABLE_OFFLOADING)
10022 if (in_lto_p)
10023 DECL_PRESERVE_P (child_fn) = 1;
10024 if (!is_ancestor)
10025 vec_safe_push (offload_funcs, child_fn);
10028 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
10029 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
10031 /* Fix the callgraph edges for child_cfun. Those for cfun will be
10032 fixed in a following pass. */
10033 push_cfun (child_cfun);
10034 if (need_asm)
10035 assign_assembler_name_if_needed (child_fn);
10036 cgraph_edge::rebuild_edges ();
10038 /* Some EH regions might become dead, see PR34608. If
10039 pass_cleanup_cfg isn't the first pass to happen with the
10040 new child, these dead EH edges might cause problems.
10041 Clean them up now. */
10042 if (flag_exceptions)
10044 basic_block bb;
10045 bool changed = false;
10047 FOR_EACH_BB_FN (bb, cfun)
10048 changed |= gimple_purge_dead_eh_edges (bb);
10049 if (changed)
10050 cleanup_tree_cfg ();
10052 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10053 verify_loop_structure ();
10054 pop_cfun ();
10056 if (dump_file && !gimple_in_ssa_p (cfun))
10058 omp_any_child_fn_dumped = true;
10059 dump_function_header (dump_file, child_fn, dump_flags);
10060 dump_function_to_file (child_fn, dump_file, dump_flags);
10063 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
10065 /* Handle the case that an inner ancestor:1 target is called by an outer
10066 target region. */
10067 if (is_ancestor)
10069 cgraph_node *fn2_node;
10070 child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
10071 FUNCTION_DECL,
10072 clone_function_name (child_fn, "nohost"),
10073 TREE_TYPE (child_fn));
10074 if (in_lto_p)
10075 DECL_PRESERVE_P (child_fn2) = 1;
10076 TREE_STATIC (child_fn2) = 1;
10077 DECL_ARTIFICIAL (child_fn2) = 1;
10078 DECL_IGNORED_P (child_fn2) = 0;
10079 TREE_PUBLIC (child_fn2) = 0;
10080 DECL_UNINLINABLE (child_fn2) = 1;
10081 DECL_EXTERNAL (child_fn2) = 0;
10082 DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn);
10083 DECL_INITIAL (child_fn2) = make_node (BLOCK);
10084 BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
10085 DECL_ATTRIBUTES (child_fn)
10086 = remove_attribute ("omp target entrypoint",
10087 DECL_ATTRIBUTES (child_fn));
10088 DECL_ATTRIBUTES (child_fn2)
10089 = tree_cons (get_identifier ("omp target device_ancestor_nohost"),
10090 NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
10091 DECL_ATTRIBUTES (child_fn)
10092 = tree_cons (get_identifier ("omp target device_ancestor_host"),
10093 NULL_TREE, DECL_ATTRIBUTES (child_fn));
10094 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
10095 = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
10096 DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
10097 = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
10098 DECL_FUNCTION_VERSIONED (child_fn2)
10099 = DECL_FUNCTION_VERSIONED (current_function_decl);
10101 fn2_node = cgraph_node::get_create (child_fn2);
10102 fn2_node->offloadable = 1;
10103 fn2_node->force_output = 1;
10104 node->offloadable = 0;
10106 /* Enable pass_omp_device_lower pass. */
10107 fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn));
10108 fn2_node->calls_declare_variant_alt = 1;
10110 t = build_decl (DECL_SOURCE_LOCATION (child_fn),
10111 RESULT_DECL, NULL_TREE, void_type_node);
10112 DECL_ARTIFICIAL (t) = 1;
10113 DECL_IGNORED_P (t) = 1;
10114 DECL_CONTEXT (t) = child_fn2;
10115 DECL_RESULT (child_fn2) = t;
10116 DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
10117 void_type_node, NULL);
10118 tree tmp = DECL_ARGUMENTS (child_fn);
10119 t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
10120 DECL_NAME (tmp), TREE_TYPE (tmp));
10121 DECL_ARTIFICIAL (t) = 1;
10122 DECL_NAMELESS (t) = 1;
10123 DECL_ARG_TYPE (t) = ptr_type_node;
10124 DECL_CONTEXT (t) = current_function_decl;
10125 TREE_USED (t) = 1;
10126 TREE_READONLY (t) = 1;
10127 DECL_ARGUMENTS (child_fn2) = t;
10128 gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
10130 gimplify_function_tree (child_fn2);
10131 cgraph_node::add_new_function (child_fn2, true);
10133 vec_safe_push (offload_funcs, child_fn2);
10134 if (dump_file && !gimple_in_ssa_p (cfun))
10136 dump_function_header (dump_file, child_fn2, dump_flags);
10137 dump_function_to_file (child_fn2, dump_file, dump_flags);
10142 /* Emit a library call to launch the offloading region, or do data
10143 transfers. */
10144 tree t1, t2, t3, t4, depend;
10145 enum built_in_function start_ix;
10146 unsigned int flags_i = 0;
10148 switch (gimple_omp_target_kind (entry_stmt))
10150 case GF_OMP_TARGET_KIND_REGION:
10151 start_ix = BUILT_IN_GOMP_TARGET;
10152 break;
10153 case GF_OMP_TARGET_KIND_DATA:
10154 start_ix = BUILT_IN_GOMP_TARGET_DATA;
10155 break;
10156 case GF_OMP_TARGET_KIND_UPDATE:
10157 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
10158 break;
10159 case GF_OMP_TARGET_KIND_ENTER_DATA:
10160 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10161 break;
10162 case GF_OMP_TARGET_KIND_EXIT_DATA:
10163 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10164 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
10165 break;
10166 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10167 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10168 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10169 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10170 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10171 start_ix = BUILT_IN_GOACC_PARALLEL;
10172 break;
10173 case GF_OMP_TARGET_KIND_OACC_DATA:
10174 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10175 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10176 start_ix = BUILT_IN_GOACC_DATA_START;
10177 break;
10178 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10179 start_ix = BUILT_IN_GOACC_UPDATE;
10180 break;
10181 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10182 start_ix = BUILT_IN_GOACC_ENTER_DATA;
10183 break;
10184 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10185 start_ix = BUILT_IN_GOACC_EXIT_DATA;
10186 break;
10187 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10188 start_ix = BUILT_IN_GOACC_DECLARE;
10189 break;
10190 default:
10191 gcc_unreachable ();
10194 tree device = NULL_TREE;
10195 location_t device_loc = UNKNOWN_LOCATION;
10196 tree goacc_flags = NULL_TREE;
10197 bool need_device_adjustment = false;
10198 gimple_stmt_iterator adj_gsi;
10199 if (is_gimple_omp_oacc (entry_stmt))
10201 /* By default, no GOACC_FLAGs are set. */
10202 goacc_flags = integer_zero_node;
10204 else
10206 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10207 if (c)
10209 device = OMP_CLAUSE_DEVICE_ID (c);
10210 /* Ensure 'device' is of the correct type. */
10211 device = fold_convert_loc (device_loc, integer_type_node, device);
10212 if (TREE_CODE (device) == INTEGER_CST)
10214 if (wi::to_wide (device) == GOMP_DEVICE_ICV)
10215 device = build_int_cst (integer_type_node,
10216 GOMP_DEVICE_HOST_FALLBACK);
10217 else if (wi::to_wide (device) == GOMP_DEVICE_HOST_FALLBACK)
10218 device = build_int_cst (integer_type_node,
10219 GOMP_DEVICE_HOST_FALLBACK - 1);
10221 else
10222 need_device_adjustment = true;
10223 device_loc = OMP_CLAUSE_LOCATION (c);
10224 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10225 device = build_int_cst (integer_type_node,
10226 GOMP_DEVICE_HOST_FALLBACK);
10228 else
10230 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10231 library choose). */
10232 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10233 device_loc = gimple_location (entry_stmt);
10236 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10237 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10238 nowait doesn't appear. */
10239 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10240 c = NULL;
10241 if (c)
10242 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10245 /* By default, there is no conditional. */
10246 tree cond = NULL_TREE;
10247 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10248 if (c)
10249 cond = OMP_CLAUSE_IF_EXPR (c);
10250 /* If we found the clause 'if (cond)', build:
10251 OpenACC: goacc_flags = (cond ? goacc_flags
10252 : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10253 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10254 if (cond)
10256 tree *tp;
10257 if (is_gimple_omp_oacc (entry_stmt))
10258 tp = &goacc_flags;
10259 else
10260 tp = &device;
10262 cond = gimple_boolify (cond);
10264 basic_block cond_bb, then_bb, else_bb;
10265 edge e;
10266 tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10267 if (offloaded)
10268 e = split_block_after_labels (new_bb);
10269 else
10271 gsi = gsi_last_nondebug_bb (new_bb);
10272 gsi_prev (&gsi);
10273 e = split_block (new_bb, gsi_stmt (gsi));
10275 cond_bb = e->src;
10276 new_bb = e->dest;
10277 remove_edge (e);
10279 then_bb = create_empty_bb (cond_bb);
10280 else_bb = create_empty_bb (then_bb);
10281 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10282 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10284 stmt = gimple_build_cond_empty (cond);
10285 gsi = gsi_last_bb (cond_bb);
10286 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10288 gsi = gsi_start_bb (then_bb);
10289 stmt = gimple_build_assign (tmp_var, *tp);
10290 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10291 adj_gsi = gsi;
10293 gsi = gsi_start_bb (else_bb);
10294 if (is_gimple_omp_oacc (entry_stmt))
10295 stmt = gimple_build_assign (tmp_var,
10296 BIT_IOR_EXPR,
10297 *tp,
10298 build_int_cst (integer_type_node,
10299 GOACC_FLAG_HOST_FALLBACK));
10300 else
10301 stmt = gimple_build_assign (tmp_var,
10302 build_int_cst (integer_type_node,
10303 GOMP_DEVICE_HOST_FALLBACK));
10304 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10306 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10307 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10308 add_bb_to_loop (then_bb, cond_bb->loop_father);
10309 add_bb_to_loop (else_bb, cond_bb->loop_father);
10310 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10311 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10313 *tp = tmp_var;
10315 gsi = gsi_last_nondebug_bb (new_bb);
10317 else
10319 gsi = gsi_last_nondebug_bb (new_bb);
10321 if (device != NULL_TREE)
10322 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10323 true, GSI_SAME_STMT);
10324 if (need_device_adjustment)
10326 tree tmp_var = create_tmp_var (TREE_TYPE (device));
10327 stmt = gimple_build_assign (tmp_var, device);
10328 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10329 adj_gsi = gsi_for_stmt (stmt);
10330 device = tmp_var;
10334 if (need_device_adjustment)
10336 tree uns = fold_convert (unsigned_type_node, device);
10337 uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10338 false, GSI_CONTINUE_LINKING);
10339 edge e = split_block (gsi_bb (adj_gsi), gsi_stmt (adj_gsi));
10340 basic_block cond_bb = e->src;
10341 basic_block else_bb = e->dest;
10342 if (gsi_bb (adj_gsi) == new_bb)
10344 new_bb = else_bb;
10345 gsi = gsi_last_nondebug_bb (new_bb);
10348 basic_block then_bb = create_empty_bb (cond_bb);
10349 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10351 cond = build2 (GT_EXPR, boolean_type_node, uns,
10352 build_int_cst (unsigned_type_node,
10353 GOMP_DEVICE_HOST_FALLBACK - 1));
10354 stmt = gimple_build_cond_empty (cond);
10355 adj_gsi = gsi_last_bb (cond_bb);
10356 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10358 adj_gsi = gsi_start_bb (then_bb);
10359 tree add = build2 (PLUS_EXPR, integer_type_node, device,
10360 build_int_cst (integer_type_node, -1));
10361 stmt = gimple_build_assign (device, add);
10362 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10364 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10365 e->flags = EDGE_FALSE_VALUE;
10366 add_bb_to_loop (then_bb, cond_bb->loop_father);
10367 make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10370 t = gimple_omp_target_data_arg (entry_stmt);
10371 if (t == NULL)
10373 t1 = size_zero_node;
10374 t2 = build_zero_cst (ptr_type_node);
10375 t3 = t2;
10376 t4 = t2;
10378 else
10380 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10381 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10382 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10383 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10384 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10387 gimple *g;
10388 bool tagging = false;
10389 /* The maximum number used by any start_ix, without varargs. */
10390 auto_vec<tree, 11> args;
10391 if (is_gimple_omp_oacc (entry_stmt))
10393 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10394 TREE_TYPE (goacc_flags), goacc_flags);
10395 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10396 NULL_TREE, true,
10397 GSI_SAME_STMT);
10398 args.quick_push (goacc_flags_m);
10400 else
10401 args.quick_push (device);
10402 if (offloaded)
10403 args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
10404 args.quick_push (t1);
10405 args.quick_push (t2);
10406 args.quick_push (t3);
10407 args.quick_push (t4);
10408 switch (start_ix)
10410 case BUILT_IN_GOACC_DATA_START:
10411 case BUILT_IN_GOACC_DECLARE:
10412 case BUILT_IN_GOMP_TARGET_DATA:
10413 break;
10414 case BUILT_IN_GOMP_TARGET:
10415 case BUILT_IN_GOMP_TARGET_UPDATE:
10416 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10417 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10418 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10419 if (c)
10420 depend = OMP_CLAUSE_DECL (c);
10421 else
10422 depend = build_int_cst (ptr_type_node, 0);
10423 args.quick_push (depend);
10424 if (start_ix == BUILT_IN_GOMP_TARGET)
10425 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10426 break;
10427 case BUILT_IN_GOACC_PARALLEL:
10428 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10430 tree dims = NULL_TREE;
10431 unsigned int ix;
10433 /* For serial constructs we set all dimensions to 1. */
10434 for (ix = GOMP_DIM_MAX; ix--;)
10435 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10436 oacc_replace_fn_attrib (child_fn, dims);
10438 else
10439 oacc_set_fn_attrib (child_fn, clauses, &args);
10440 tagging = true;
10441 /* FALLTHRU */
10442 case BUILT_IN_GOACC_ENTER_DATA:
10443 case BUILT_IN_GOACC_EXIT_DATA:
10444 case BUILT_IN_GOACC_UPDATE:
10446 tree t_async = NULL_TREE;
10448 /* If present, use the value specified by the respective
10449 clause, making sure that is of the correct type. */
10450 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10451 if (c)
10452 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10453 integer_type_node,
10454 OMP_CLAUSE_ASYNC_EXPR (c));
10455 else if (!tagging)
10456 /* Default values for t_async. */
10457 t_async = fold_convert_loc (gimple_location (entry_stmt),
10458 integer_type_node,
10459 build_int_cst (integer_type_node,
10460 GOMP_ASYNC_SYNC));
10461 if (tagging && t_async)
10463 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10465 if (TREE_CODE (t_async) == INTEGER_CST)
10467 /* See if we can pack the async arg in to the tag's
10468 operand. */
10469 i_async = TREE_INT_CST_LOW (t_async);
10470 if (i_async < GOMP_LAUNCH_OP_MAX)
10471 t_async = NULL_TREE;
10472 else
10473 i_async = GOMP_LAUNCH_OP_MAX;
10475 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10476 i_async));
10478 if (t_async)
10479 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10480 NULL_TREE, true,
10481 GSI_SAME_STMT));
10483 /* Save the argument index, and ... */
10484 unsigned t_wait_idx = args.length ();
10485 unsigned num_waits = 0;
10486 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10487 if (!tagging || c)
10488 /* ... push a placeholder. */
10489 args.safe_push (integer_zero_node);
10491 for (; c; c = OMP_CLAUSE_CHAIN (c))
10492 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10494 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10495 integer_type_node,
10496 OMP_CLAUSE_WAIT_EXPR (c));
10497 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10498 GSI_SAME_STMT);
10499 args.safe_push (arg);
10500 num_waits++;
10503 if (!tagging || num_waits)
10505 tree len;
10507 /* Now that we know the number, update the placeholder. */
10508 if (tagging)
10509 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10510 else
10511 len = build_int_cst (integer_type_node, num_waits);
10512 len = fold_convert_loc (gimple_location (entry_stmt),
10513 unsigned_type_node, len);
10514 args[t_wait_idx] = len;
10517 break;
10518 default:
10519 gcc_unreachable ();
10521 if (tagging)
10522 /* Push terminal marker - zero. */
10523 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10525 if (child_fn2)
10527 g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
10528 build_fold_addr_expr (child_fn));
10529 gimple_set_location (g, gimple_location (entry_stmt));
10530 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10533 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10534 gimple_set_location (g, gimple_location (entry_stmt));
10535 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10536 if (!offloaded)
10538 g = gsi_stmt (gsi);
10539 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10540 gsi_remove (&gsi, true);
10544 /* Expand the parallel region tree rooted at REGION. Expansion
10545 proceeds in depth-first order. Innermost regions are expanded
10546 first. This way, parallel regions that require a new function to
10547 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10548 internal dependencies in their body. */
10550 static void
10551 expand_omp (struct omp_region *region)
10553 omp_any_child_fn_dumped = false;
10554 while (region)
10556 location_t saved_location;
10557 gimple *inner_stmt = NULL;
10559 /* First, determine whether this is a combined parallel+workshare
10560 region. */
10561 if (region->type == GIMPLE_OMP_PARALLEL)
10562 determine_parallel_type (region);
10564 if (region->type == GIMPLE_OMP_FOR
10565 && gimple_omp_for_combined_p (last_nondebug_stmt (region->entry)))
10566 inner_stmt = last_nondebug_stmt (region->inner->entry);
10568 if (region->inner)
10569 expand_omp (region->inner);
10571 saved_location = input_location;
10572 if (gimple_has_location (last_nondebug_stmt (region->entry)))
10573 input_location = gimple_location (last_nondebug_stmt (region->entry));
10575 switch (region->type)
10577 case GIMPLE_OMP_PARALLEL:
10578 case GIMPLE_OMP_TASK:
10579 expand_omp_taskreg (region);
10580 break;
10582 case GIMPLE_OMP_FOR:
10583 expand_omp_for (region, inner_stmt);
10584 break;
10586 case GIMPLE_OMP_SECTIONS:
10587 expand_omp_sections (region);
10588 break;
10590 case GIMPLE_OMP_SECTION:
10591 /* Individual omp sections are handled together with their
10592 parent GIMPLE_OMP_SECTIONS region. */
10593 break;
10595 case GIMPLE_OMP_SINGLE:
10596 case GIMPLE_OMP_SCOPE:
10597 expand_omp_single (region);
10598 break;
10600 case GIMPLE_OMP_ORDERED:
10602 gomp_ordered *ord_stmt
10603 = as_a <gomp_ordered *> (last_nondebug_stmt (region->entry));
10604 if (gimple_omp_ordered_standalone_p (ord_stmt))
10606 /* We'll expand these when expanding corresponding
10607 worksharing region with ordered(n) clause. */
10608 gcc_assert (region->outer
10609 && region->outer->type == GIMPLE_OMP_FOR);
10610 region->ord_stmt = ord_stmt;
10611 break;
10614 /* FALLTHRU */
10615 case GIMPLE_OMP_MASTER:
10616 case GIMPLE_OMP_MASKED:
10617 case GIMPLE_OMP_TASKGROUP:
10618 case GIMPLE_OMP_CRITICAL:
10619 case GIMPLE_OMP_TEAMS:
10620 expand_omp_synch (region);
10621 break;
10623 case GIMPLE_OMP_ATOMIC_LOAD:
10624 expand_omp_atomic (region);
10625 break;
10627 case GIMPLE_OMP_TARGET:
10628 expand_omp_target (region);
10629 break;
10631 default:
10632 gcc_unreachable ();
10635 input_location = saved_location;
10636 region = region->next;
10638 if (omp_any_child_fn_dumped)
10640 if (dump_file)
10641 dump_function_header (dump_file, current_function_decl, dump_flags);
10642 omp_any_child_fn_dumped = false;
10646 /* Helper for build_omp_regions. Scan the dominator tree starting at
10647 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10648 true, the function ends once a single tree is built (otherwise, whole
10649 forest of OMP constructs may be built). */
10651 static void
10652 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10653 bool single_tree)
10655 gimple_stmt_iterator gsi;
10656 gimple *stmt;
10657 basic_block son;
10659 gsi = gsi_last_nondebug_bb (bb);
10660 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10662 struct omp_region *region;
10663 enum gimple_code code;
10665 stmt = gsi_stmt (gsi);
10666 code = gimple_code (stmt);
10667 if (code == GIMPLE_OMP_RETURN)
10669 /* STMT is the return point out of region PARENT. Mark it
10670 as the exit point and make PARENT the immediately
10671 enclosing region. */
10672 gcc_assert (parent);
10673 region = parent;
10674 region->exit = bb;
10675 parent = parent->outer;
10677 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10679 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10680 GIMPLE_OMP_RETURN, but matches with
10681 GIMPLE_OMP_ATOMIC_LOAD. */
10682 gcc_assert (parent);
10683 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10684 region = parent;
10685 region->exit = bb;
10686 parent = parent->outer;
10688 else if (code == GIMPLE_OMP_CONTINUE)
10690 gcc_assert (parent);
10691 parent->cont = bb;
10693 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10695 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10696 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10698 else
10700 region = new_omp_region (bb, code, parent);
10701 /* Otherwise... */
10702 if (code == GIMPLE_OMP_TARGET)
10704 switch (gimple_omp_target_kind (stmt))
10706 case GF_OMP_TARGET_KIND_REGION:
10707 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10708 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10709 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10710 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10711 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10712 break;
10713 case GF_OMP_TARGET_KIND_UPDATE:
10714 case GF_OMP_TARGET_KIND_ENTER_DATA:
10715 case GF_OMP_TARGET_KIND_EXIT_DATA:
10716 case GF_OMP_TARGET_KIND_DATA:
10717 case GF_OMP_TARGET_KIND_OACC_DATA:
10718 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10719 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10720 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10721 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10722 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10723 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10724 /* ..., other than for those stand-alone directives...
10725 To be precise, target data isn't stand-alone, but
10726 gimplifier put the end API call into try finally block
10727 for it, so omp expansion can treat it as such. */
10728 region = NULL;
10729 break;
10730 default:
10731 gcc_unreachable ();
10734 else if (code == GIMPLE_OMP_ORDERED
10735 && gimple_omp_ordered_standalone_p (stmt))
10736 /* #pragma omp ordered depend is also just a stand-alone
10737 directive. */
10738 region = NULL;
10739 else if (code == GIMPLE_OMP_TASK
10740 && gimple_omp_task_taskwait_p (stmt))
10741 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10742 region = NULL;
10743 else if (code == GIMPLE_OMP_TASKGROUP)
10744 /* #pragma omp taskgroup isn't a stand-alone directive, but
10745 gimplifier put the end API call into try finall block
10746 for it, so omp expansion can treat it as such. */
10747 region = NULL;
10748 /* ..., this directive becomes the parent for a new region. */
10749 if (region)
10750 parent = region;
10754 if (single_tree && !parent)
10755 return;
10757 for (son = first_dom_son (CDI_DOMINATORS, bb);
10758 son;
10759 son = next_dom_son (CDI_DOMINATORS, son))
10760 build_omp_regions_1 (son, parent, single_tree);
10763 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10764 root_omp_region. */
10766 static void
10767 build_omp_regions_root (basic_block root)
10769 gcc_assert (root_omp_region == NULL);
10770 build_omp_regions_1 (root, NULL, true);
10771 gcc_assert (root_omp_region != NULL);
10774 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10776 void
10777 omp_expand_local (basic_block head)
10779 build_omp_regions_root (head);
10780 if (dump_file && (dump_flags & TDF_DETAILS))
10782 fprintf (dump_file, "\nOMP region tree\n\n");
10783 dump_omp_region (dump_file, root_omp_region, 0);
10784 fprintf (dump_file, "\n");
10787 remove_exit_barriers (root_omp_region);
10788 expand_omp (root_omp_region);
10790 omp_free_regions ();
10793 /* Scan the CFG and build a tree of OMP regions. Return the root of
10794 the OMP region tree. */
10796 static void
10797 build_omp_regions (void)
10799 gcc_assert (root_omp_region == NULL);
10800 calculate_dominance_info (CDI_DOMINATORS);
10801 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10804 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10806 static unsigned int
10807 execute_expand_omp (void)
10809 build_omp_regions ();
10811 if (!root_omp_region)
10812 return 0;
10814 if (dump_file)
10816 fprintf (dump_file, "\nOMP region tree\n\n");
10817 dump_omp_region (dump_file, root_omp_region, 0);
10818 fprintf (dump_file, "\n");
10821 remove_exit_barriers (root_omp_region);
10823 expand_omp (root_omp_region);
10825 omp_free_regions ();
10827 return (TODO_cleanup_cfg
10828 | (gimple_in_ssa_p (cfun) ? TODO_update_ssa_only_virtuals : 0));
10831 /* OMP expansion -- the default pass, run before creation of SSA form. */
10833 namespace {
10835 const pass_data pass_data_expand_omp =
10837 GIMPLE_PASS, /* type */
10838 "ompexp", /* name */
10839 OPTGROUP_OMP, /* optinfo_flags */
10840 TV_NONE, /* tv_id */
10841 PROP_gimple_any, /* properties_required */
10842 PROP_gimple_eomp, /* properties_provided */
10843 0, /* properties_destroyed */
10844 0, /* todo_flags_start */
10845 0, /* todo_flags_finish */
10848 class pass_expand_omp : public gimple_opt_pass
10850 public:
10851 pass_expand_omp (gcc::context *ctxt)
10852 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10855 /* opt_pass methods: */
10856 unsigned int execute (function *) final override
10858 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10859 || flag_openmp_simd != 0)
10860 && !seen_error ());
10862 /* This pass always runs, to provide PROP_gimple_eomp.
10863 But often, there is nothing to do. */
10864 if (!gate)
10865 return 0;
10867 return execute_expand_omp ();
10870 }; // class pass_expand_omp
10872 } // anon namespace
10874 gimple_opt_pass *
10875 make_pass_expand_omp (gcc::context *ctxt)
10877 return new pass_expand_omp (ctxt);
10880 namespace {
10882 const pass_data pass_data_expand_omp_ssa =
10884 GIMPLE_PASS, /* type */
10885 "ompexpssa", /* name */
10886 OPTGROUP_OMP, /* optinfo_flags */
10887 TV_NONE, /* tv_id */
10888 PROP_cfg | PROP_ssa, /* properties_required */
10889 PROP_gimple_eomp, /* properties_provided */
10890 0, /* properties_destroyed */
10891 0, /* todo_flags_start */
10892 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10895 class pass_expand_omp_ssa : public gimple_opt_pass
10897 public:
10898 pass_expand_omp_ssa (gcc::context *ctxt)
10899 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10902 /* opt_pass methods: */
10903 bool gate (function *fun) final override
10905 return !(fun->curr_properties & PROP_gimple_eomp);
10907 unsigned int execute (function *) final override
10909 return execute_expand_omp ();
10911 opt_pass * clone () final override
10913 return new pass_expand_omp_ssa (m_ctxt);
10916 }; // class pass_expand_omp_ssa
10918 } // anon namespace
10920 gimple_opt_pass *
10921 make_pass_expand_omp_ssa (gcc::context *ctxt)
10923 return new pass_expand_omp_ssa (ctxt);
10926 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10927 GIMPLE_* codes. */
10929 bool
10930 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10931 int *region_idx)
10933 gimple *last = last_nondebug_stmt (bb);
10934 enum gimple_code code = gimple_code (last);
10935 struct omp_region *cur_region = *region;
10936 bool fallthru = false;
10938 switch (code)
10940 case GIMPLE_OMP_PARALLEL:
10941 case GIMPLE_OMP_FOR:
10942 case GIMPLE_OMP_SINGLE:
10943 case GIMPLE_OMP_TEAMS:
10944 case GIMPLE_OMP_MASTER:
10945 case GIMPLE_OMP_MASKED:
10946 case GIMPLE_OMP_SCOPE:
10947 case GIMPLE_OMP_CRITICAL:
10948 case GIMPLE_OMP_SECTION:
10949 cur_region = new_omp_region (bb, code, cur_region);
10950 fallthru = true;
10951 break;
10953 case GIMPLE_OMP_TASKGROUP:
10954 cur_region = new_omp_region (bb, code, cur_region);
10955 fallthru = true;
10956 cur_region = cur_region->outer;
10957 break;
10959 case GIMPLE_OMP_TASK:
10960 cur_region = new_omp_region (bb, code, cur_region);
10961 fallthru = true;
10962 if (gimple_omp_task_taskwait_p (last))
10963 cur_region = cur_region->outer;
10964 break;
10966 case GIMPLE_OMP_ORDERED:
10967 cur_region = new_omp_region (bb, code, cur_region);
10968 fallthru = true;
10969 if (gimple_omp_ordered_standalone_p (last))
10970 cur_region = cur_region->outer;
10971 break;
10973 case GIMPLE_OMP_TARGET:
10974 cur_region = new_omp_region (bb, code, cur_region);
10975 fallthru = true;
10976 switch (gimple_omp_target_kind (last))
10978 case GF_OMP_TARGET_KIND_REGION:
10979 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10980 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10981 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10982 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10983 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10984 break;
10985 case GF_OMP_TARGET_KIND_UPDATE:
10986 case GF_OMP_TARGET_KIND_ENTER_DATA:
10987 case GF_OMP_TARGET_KIND_EXIT_DATA:
10988 case GF_OMP_TARGET_KIND_DATA:
10989 case GF_OMP_TARGET_KIND_OACC_DATA:
10990 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10991 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10992 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10993 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10994 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10995 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10996 cur_region = cur_region->outer;
10997 break;
10998 default:
10999 gcc_unreachable ();
11001 break;
11003 case GIMPLE_OMP_SECTIONS:
11004 cur_region = new_omp_region (bb, code, cur_region);
11005 fallthru = true;
11006 break;
11008 case GIMPLE_OMP_SECTIONS_SWITCH:
11009 fallthru = false;
11010 break;
11012 case GIMPLE_OMP_ATOMIC_LOAD:
11013 case GIMPLE_OMP_ATOMIC_STORE:
11014 fallthru = true;
11015 break;
11017 case GIMPLE_OMP_RETURN:
11018 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
11019 somewhere other than the next block. This will be
11020 created later. */
11021 cur_region->exit = bb;
11022 if (cur_region->type == GIMPLE_OMP_TASK)
11023 /* Add an edge corresponding to not scheduling the task
11024 immediately. */
11025 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
11026 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
11027 cur_region = cur_region->outer;
11028 break;
11030 case GIMPLE_OMP_CONTINUE:
11031 cur_region->cont = bb;
11032 switch (cur_region->type)
11034 case GIMPLE_OMP_FOR:
11035 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
11036 succs edges as abnormal to prevent splitting
11037 them. */
11038 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
11039 /* Make the loopback edge. */
11040 make_edge (bb, single_succ (cur_region->entry),
11041 EDGE_ABNORMAL);
11043 /* Create an edge from GIMPLE_OMP_FOR to exit, which
11044 corresponds to the case that the body of the loop
11045 is not executed at all. */
11046 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
11047 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
11048 fallthru = false;
11049 break;
11051 case GIMPLE_OMP_SECTIONS:
11052 /* Wire up the edges into and out of the nested sections. */
11054 basic_block switch_bb = single_succ (cur_region->entry);
11056 struct omp_region *i;
11057 for (i = cur_region->inner; i ; i = i->next)
11059 gcc_assert (i->type == GIMPLE_OMP_SECTION);
11060 make_edge (switch_bb, i->entry, 0);
11061 make_edge (i->exit, bb, EDGE_FALLTHRU);
11064 /* Make the loopback edge to the block with
11065 GIMPLE_OMP_SECTIONS_SWITCH. */
11066 make_edge (bb, switch_bb, 0);
11068 /* Make the edge from the switch to exit. */
11069 make_edge (switch_bb, bb->next_bb, 0);
11070 fallthru = false;
11072 break;
11074 case GIMPLE_OMP_TASK:
11075 fallthru = true;
11076 break;
11078 default:
11079 gcc_unreachable ();
11081 break;
11083 default:
11084 gcc_unreachable ();
11087 if (*region != cur_region)
11089 *region = cur_region;
11090 if (cur_region)
11091 *region_idx = cur_region->entry->index;
11092 else
11093 *region_idx = 0;
11096 return fallthru;