gimplify.c (enum gimplify_omp_var_data): Add GOVD_CONDTEMP.
[official-gcc.git] / gcc / omp-expand.c
blob832892794cb7924695b9e1d563b8fe7469770e74
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2019 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
66 struct omp_region
68 /* The enclosing region. */
69 struct omp_region *outer;
71 /* First child region. */
72 struct omp_region *inner;
74 /* Next peer region. */
75 struct omp_region *next;
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
116 /* Return true if REGION is a combined parallel+workshare region. */
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
121 return region->is_combined_parallel;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
138 Is lowered into:
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
197 return true;
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
206 if (!simd_schedule || integer_zerop (chunk_size))
207 return chunk_size;
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
211 return chunk_size;
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
233 struct omp_for_data fd;
234 tree n1, n2;
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
240 if (gimple_omp_for_combined_into_p (for_stmt))
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
264 if (fd.chunk_size)
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
271 return ws_args;
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
285 gcc_unreachable ();
288 /* Discover whether REGION is a combined parallel+workshare region. */
290 static void
291 determine_parallel_type (struct omp_region *region)
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
331 if (region->inner->type == GIMPLE_OMP_FOR)
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
349 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
350 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
351 return;
353 else if (region->inner->type == GIMPLE_OMP_SECTIONS
354 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
355 OMP_CLAUSE__REDUCTEMP_)
356 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
357 OMP_CLAUSE__CONDTEMP_)))
358 return;
360 region->is_combined_parallel = true;
361 region->inner->is_combined_parallel = true;
362 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
366 /* Debugging dumps for parallel regions. */
367 void dump_omp_region (FILE *, struct omp_region *, int);
368 void debug_omp_region (struct omp_region *);
369 void debug_all_omp_regions (void);
371 /* Dump the parallel region tree rooted at REGION. */
373 void
374 dump_omp_region (FILE *file, struct omp_region *region, int indent)
376 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
377 gimple_code_name[region->type]);
379 if (region->inner)
380 dump_omp_region (file, region->inner, indent + 4);
382 if (region->cont)
384 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
385 region->cont->index);
388 if (region->exit)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
390 region->exit->index);
391 else
392 fprintf (file, "%*s[no exit marker]\n", indent, "");
394 if (region->next)
395 dump_omp_region (file, region->next, indent);
398 DEBUG_FUNCTION void
399 debug_omp_region (struct omp_region *region)
401 dump_omp_region (stderr, region, 0);
404 DEBUG_FUNCTION void
405 debug_all_omp_regions (void)
407 dump_omp_region (stderr, root_omp_region, 0);
410 /* Create a new parallel region starting at STMT inside region PARENT. */
412 static struct omp_region *
413 new_omp_region (basic_block bb, enum gimple_code type,
414 struct omp_region *parent)
416 struct omp_region *region = XCNEW (struct omp_region);
418 region->outer = parent;
419 region->entry = bb;
420 region->type = type;
422 if (parent)
424 /* This is a nested region. Add it to the list of inner
425 regions in PARENT. */
426 region->next = parent->inner;
427 parent->inner = region;
429 else
431 /* This is a toplevel region. Add it to the list of toplevel
432 regions in ROOT_OMP_REGION. */
433 region->next = root_omp_region;
434 root_omp_region = region;
437 return region;
440 /* Release the memory associated with the region tree rooted at REGION. */
442 static void
443 free_omp_region_1 (struct omp_region *region)
445 struct omp_region *i, *n;
447 for (i = region->inner; i ; i = n)
449 n = i->next;
450 free_omp_region_1 (i);
453 free (region);
456 /* Release the memory for the entire omp region tree. */
458 void
459 omp_free_regions (void)
461 struct omp_region *r, *n;
462 for (r = root_omp_region; r ; r = n)
464 n = r->next;
465 free_omp_region_1 (r);
467 root_omp_region = NULL;
470 /* A convenience function to build an empty GIMPLE_COND with just the
471 condition. */
473 static gcond *
474 gimple_build_cond_empty (tree cond)
476 enum tree_code pred_code;
477 tree lhs, rhs;
479 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
480 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
483 /* Return true if a parallel REGION is within a declare target function or
484 within a target region and is not a part of a gridified target. */
486 static bool
487 parallel_needs_hsa_kernel_p (struct omp_region *region)
489 bool indirect = false;
490 for (region = region->outer; region; region = region->outer)
492 if (region->type == GIMPLE_OMP_PARALLEL)
493 indirect = true;
494 else if (region->type == GIMPLE_OMP_TARGET)
496 gomp_target *tgt_stmt
497 = as_a <gomp_target *> (last_stmt (region->entry));
499 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
500 OMP_CLAUSE__GRIDDIM_))
501 return indirect;
502 else
503 return true;
507 if (lookup_attribute ("omp declare target",
508 DECL_ATTRIBUTES (current_function_decl)))
509 return true;
511 return false;
514 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
515 Add CHILD_FNDECL to decl chain of the supercontext of the block
516 ENTRY_BLOCK - this is the block which originally contained the
517 code from which CHILD_FNDECL was created.
519 Together, these actions ensure that the debug info for the outlined
520 function will be emitted with the correct lexical scope. */
522 static void
523 adjust_context_and_scope (struct omp_region *region, tree entry_block,
524 tree child_fndecl)
526 tree parent_fndecl = NULL_TREE;
527 gimple *entry_stmt;
528 /* OMP expansion expands inner regions before outer ones, so if
529 we e.g. have explicit task region nested in parallel region, when
530 expanding the task region current_function_decl will be the original
531 source function, but we actually want to use as context the child
532 function of the parallel. */
533 for (region = region->outer;
534 region && parent_fndecl == NULL_TREE; region = region->outer)
535 switch (region->type)
537 case GIMPLE_OMP_PARALLEL:
538 case GIMPLE_OMP_TASK:
539 case GIMPLE_OMP_TEAMS:
540 entry_stmt = last_stmt (region->entry);
541 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
542 break;
543 case GIMPLE_OMP_TARGET:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl
546 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
547 break;
548 default:
549 break;
552 if (parent_fndecl == NULL_TREE)
553 parent_fndecl = current_function_decl;
554 DECL_CONTEXT (child_fndecl) = parent_fndecl;
556 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
558 tree b = BLOCK_SUPERCONTEXT (entry_block);
559 if (TREE_CODE (b) == BLOCK)
561 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
562 BLOCK_VARS (b) = child_fndecl;
567 /* Build the function calls to GOMP_parallel etc to actually
568 generate the parallel operation. REGION is the parallel region
569 being expanded. BB is the block where to insert the code. WS_ARGS
570 will be set if this is a call to a combined parallel+workshare
571 construct, it contains the list of additional arguments needed by
572 the workshare construct. */
574 static void
575 expand_parallel_call (struct omp_region *region, basic_block bb,
576 gomp_parallel *entry_stmt,
577 vec<tree, va_gc> *ws_args)
579 tree t, t1, t2, val, cond, c, clauses, flags;
580 gimple_stmt_iterator gsi;
581 gimple *stmt;
582 enum built_in_function start_ix;
583 int start_ix2;
584 location_t clause_loc;
585 vec<tree, va_gc> *args;
587 clauses = gimple_omp_parallel_clauses (entry_stmt);
589 /* Determine what flavor of GOMP_parallel we will be
590 emitting. */
591 start_ix = BUILT_IN_GOMP_PARALLEL;
592 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
593 if (rtmp)
594 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
595 else if (is_combined_parallel (region))
597 switch (region->inner->type)
599 case GIMPLE_OMP_FOR:
600 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
601 switch (region->inner->sched_kind)
603 case OMP_CLAUSE_SCHEDULE_RUNTIME:
604 if ((region->inner->sched_modifiers
605 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
606 start_ix2 = 6;
607 else if ((region->inner->sched_modifiers
608 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
609 start_ix2 = 7;
610 else
611 start_ix2 = 3;
612 break;
613 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
614 case OMP_CLAUSE_SCHEDULE_GUIDED:
615 if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
618 start_ix2 = 3 + region->inner->sched_kind;
619 break;
621 /* FALLTHRU */
622 default:
623 start_ix2 = region->inner->sched_kind;
624 break;
626 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
627 start_ix = (enum built_in_function) start_ix2;
628 break;
629 case GIMPLE_OMP_SECTIONS:
630 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
631 break;
632 default:
633 gcc_unreachable ();
637 /* By default, the value of NUM_THREADS is zero (selected at run time)
638 and there is no conditional. */
639 cond = NULL_TREE;
640 val = build_int_cst (unsigned_type_node, 0);
641 flags = build_int_cst (unsigned_type_node, 0);
643 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
644 if (c)
645 cond = OMP_CLAUSE_IF_EXPR (c);
647 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
648 if (c)
650 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
651 clause_loc = OMP_CLAUSE_LOCATION (c);
653 else
654 clause_loc = gimple_location (entry_stmt);
656 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
657 if (c)
658 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
660 /* Ensure 'val' is of the correct type. */
661 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
663 /* If we found the clause 'if (cond)', build either
664 (cond != 0) or (cond ? val : 1u). */
665 if (cond)
667 cond = gimple_boolify (cond);
669 if (integer_zerop (val))
670 val = fold_build2_loc (clause_loc,
671 EQ_EXPR, unsigned_type_node, cond,
672 build_int_cst (TREE_TYPE (cond), 0));
673 else
675 basic_block cond_bb, then_bb, else_bb;
676 edge e, e_then, e_else;
677 tree tmp_then, tmp_else, tmp_join, tmp_var;
679 tmp_var = create_tmp_var (TREE_TYPE (val));
680 if (gimple_in_ssa_p (cfun))
682 tmp_then = make_ssa_name (tmp_var);
683 tmp_else = make_ssa_name (tmp_var);
684 tmp_join = make_ssa_name (tmp_var);
686 else
688 tmp_then = tmp_var;
689 tmp_else = tmp_var;
690 tmp_join = tmp_var;
693 e = split_block_after_labels (bb);
694 cond_bb = e->src;
695 bb = e->dest;
696 remove_edge (e);
698 then_bb = create_empty_bb (cond_bb);
699 else_bb = create_empty_bb (then_bb);
700 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
701 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
703 stmt = gimple_build_cond_empty (cond);
704 gsi = gsi_start_bb (cond_bb);
705 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
707 gsi = gsi_start_bb (then_bb);
708 expand_omp_build_assign (&gsi, tmp_then, val, true);
710 gsi = gsi_start_bb (else_bb);
711 expand_omp_build_assign (&gsi, tmp_else,
712 build_int_cst (unsigned_type_node, 1),
713 true);
715 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
716 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
717 add_bb_to_loop (then_bb, cond_bb->loop_father);
718 add_bb_to_loop (else_bb, cond_bb->loop_father);
719 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
720 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
722 if (gimple_in_ssa_p (cfun))
724 gphi *phi = create_phi_node (tmp_join, bb);
725 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
726 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
729 val = tmp_join;
732 gsi = gsi_start_bb (bb);
733 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
734 false, GSI_CONTINUE_LINKING);
737 gsi = gsi_last_nondebug_bb (bb);
738 t = gimple_omp_parallel_data_arg (entry_stmt);
739 if (t == NULL)
740 t1 = null_pointer_node;
741 else
742 t1 = build_fold_addr_expr (t);
743 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
744 t2 = build_fold_addr_expr (child_fndecl);
746 vec_alloc (args, 4 + vec_safe_length (ws_args));
747 args->quick_push (t2);
748 args->quick_push (t1);
749 args->quick_push (val);
750 if (ws_args)
751 args->splice (*ws_args);
752 args->quick_push (flags);
754 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
755 builtin_decl_explicit (start_ix), args);
757 if (rtmp)
759 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
760 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
761 fold_convert (type,
762 fold_convert (pointer_sized_int_node, t)));
764 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
765 false, GSI_CONTINUE_LINKING);
767 if (hsa_gen_requested_p ()
768 && parallel_needs_hsa_kernel_p (region))
770 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
771 hsa_register_kernel (child_cnode);
775 /* Build the function call to GOMP_task to actually
776 generate the task operation. BB is the block where to insert the code. */
778 static void
779 expand_task_call (struct omp_region *region, basic_block bb,
780 gomp_task *entry_stmt)
782 tree t1, t2, t3;
783 gimple_stmt_iterator gsi;
784 location_t loc = gimple_location (entry_stmt);
786 tree clauses = gimple_omp_task_clauses (entry_stmt);
788 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
789 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
790 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
791 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
792 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
793 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
795 unsigned int iflags
796 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
797 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
798 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
800 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
801 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
802 tree num_tasks = NULL_TREE;
803 bool ull = false;
804 if (taskloop_p)
806 gimple *g = last_stmt (region->outer->entry);
807 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
808 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
809 struct omp_for_data fd;
810 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
811 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
812 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
813 OMP_CLAUSE__LOOPTEMP_);
814 startvar = OMP_CLAUSE_DECL (startvar);
815 endvar = OMP_CLAUSE_DECL (endvar);
816 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
817 if (fd.loop.cond_code == LT_EXPR)
818 iflags |= GOMP_TASK_FLAG_UP;
819 tree tclauses = gimple_omp_for_clauses (g);
820 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
821 if (num_tasks)
822 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
823 else
825 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
826 if (num_tasks)
828 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
829 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
831 else
832 num_tasks = integer_zero_node;
834 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
835 if (ifc == NULL_TREE)
836 iflags |= GOMP_TASK_FLAG_IF;
837 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
838 iflags |= GOMP_TASK_FLAG_NOGROUP;
839 ull = fd.iter_type == long_long_unsigned_type_node;
840 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
841 iflags |= GOMP_TASK_FLAG_REDUCTION;
843 else if (priority)
844 iflags |= GOMP_TASK_FLAG_PRIORITY;
846 tree flags = build_int_cst (unsigned_type_node, iflags);
848 tree cond = boolean_true_node;
849 if (ifc)
851 if (taskloop_p)
853 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
854 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
855 build_int_cst (unsigned_type_node,
856 GOMP_TASK_FLAG_IF),
857 build_int_cst (unsigned_type_node, 0));
858 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
859 flags, t);
861 else
862 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
865 if (finalc)
867 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
868 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
869 build_int_cst (unsigned_type_node,
870 GOMP_TASK_FLAG_FINAL),
871 build_int_cst (unsigned_type_node, 0));
872 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
874 if (depend)
875 depend = OMP_CLAUSE_DECL (depend);
876 else
877 depend = build_int_cst (ptr_type_node, 0);
878 if (priority)
879 priority = fold_convert (integer_type_node,
880 OMP_CLAUSE_PRIORITY_EXPR (priority));
881 else
882 priority = integer_zero_node;
884 gsi = gsi_last_nondebug_bb (bb);
885 tree t = gimple_omp_task_data_arg (entry_stmt);
886 if (t == NULL)
887 t2 = null_pointer_node;
888 else
889 t2 = build_fold_addr_expr_loc (loc, t);
890 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
891 t = gimple_omp_task_copy_fn (entry_stmt);
892 if (t == NULL)
893 t3 = null_pointer_node;
894 else
895 t3 = build_fold_addr_expr_loc (loc, t);
897 if (taskloop_p)
898 t = build_call_expr (ull
899 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
900 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
901 11, t1, t2, t3,
902 gimple_omp_task_arg_size (entry_stmt),
903 gimple_omp_task_arg_align (entry_stmt), flags,
904 num_tasks, priority, startvar, endvar, step);
905 else
906 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
907 9, t1, t2, t3,
908 gimple_omp_task_arg_size (entry_stmt),
909 gimple_omp_task_arg_align (entry_stmt), cond, flags,
910 depend, priority);
912 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
913 false, GSI_CONTINUE_LINKING);
916 /* Build the function call to GOMP_taskwait_depend to actually
917 generate the taskwait operation. BB is the block where to insert the
918 code. */
920 static void
921 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
923 tree clauses = gimple_omp_task_clauses (entry_stmt);
924 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
925 if (depend == NULL_TREE)
926 return;
928 depend = OMP_CLAUSE_DECL (depend);
930 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
931 tree t
932 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
933 1, depend);
935 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
936 false, GSI_CONTINUE_LINKING);
939 /* Build the function call to GOMP_teams_reg to actually
940 generate the host teams operation. REGION is the teams region
941 being expanded. BB is the block where to insert the code. */
943 static void
944 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
946 tree clauses = gimple_omp_teams_clauses (entry_stmt);
947 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
948 if (num_teams == NULL_TREE)
949 num_teams = build_int_cst (unsigned_type_node, 0);
950 else
952 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
953 num_teams = fold_convert (unsigned_type_node, num_teams);
955 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
956 if (thread_limit == NULL_TREE)
957 thread_limit = build_int_cst (unsigned_type_node, 0);
958 else
960 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
961 thread_limit = fold_convert (unsigned_type_node, thread_limit);
964 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
965 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
966 if (t == NULL)
967 t1 = null_pointer_node;
968 else
969 t1 = build_fold_addr_expr (t);
970 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
971 tree t2 = build_fold_addr_expr (child_fndecl);
973 vec<tree, va_gc> *args;
974 vec_alloc (args, 5);
975 args->quick_push (t2);
976 args->quick_push (t1);
977 args->quick_push (num_teams);
978 args->quick_push (thread_limit);
979 /* For future extensibility. */
980 args->quick_push (build_zero_cst (unsigned_type_node));
982 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
983 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
984 args);
986 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
987 false, GSI_CONTINUE_LINKING);
990 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
992 static tree
993 vec2chain (vec<tree, va_gc> *v)
995 tree chain = NULL_TREE, t;
996 unsigned ix;
998 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1000 DECL_CHAIN (t) = chain;
1001 chain = t;
1004 return chain;
1007 /* Remove barriers in REGION->EXIT's block. Note that this is only
1008 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1009 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1010 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1011 removed. */
1013 static void
1014 remove_exit_barrier (struct omp_region *region)
1016 gimple_stmt_iterator gsi;
1017 basic_block exit_bb;
1018 edge_iterator ei;
1019 edge e;
1020 gimple *stmt;
1021 int any_addressable_vars = -1;
1023 exit_bb = region->exit;
1025 /* If the parallel region doesn't return, we don't have REGION->EXIT
1026 block at all. */
1027 if (! exit_bb)
1028 return;
1030 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1031 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1032 statements that can appear in between are extremely limited -- no
1033 memory operations at all. Here, we allow nothing at all, so the
1034 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1035 gsi = gsi_last_nondebug_bb (exit_bb);
1036 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1037 gsi_prev_nondebug (&gsi);
1038 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1039 return;
1041 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1043 gsi = gsi_last_nondebug_bb (e->src);
1044 if (gsi_end_p (gsi))
1045 continue;
1046 stmt = gsi_stmt (gsi);
1047 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1048 && !gimple_omp_return_nowait_p (stmt))
1050 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1051 in many cases. If there could be tasks queued, the barrier
1052 might be needed to let the tasks run before some local
1053 variable of the parallel that the task uses as shared
1054 runs out of scope. The task can be spawned either
1055 from within current function (this would be easy to check)
1056 or from some function it calls and gets passed an address
1057 of such a variable. */
1058 if (any_addressable_vars < 0)
1060 gomp_parallel *parallel_stmt
1061 = as_a <gomp_parallel *> (last_stmt (region->entry));
1062 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1063 tree local_decls, block, decl;
1064 unsigned ix;
1066 any_addressable_vars = 0;
1067 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1068 if (TREE_ADDRESSABLE (decl))
1070 any_addressable_vars = 1;
1071 break;
1073 for (block = gimple_block (stmt);
1074 !any_addressable_vars
1075 && block
1076 && TREE_CODE (block) == BLOCK;
1077 block = BLOCK_SUPERCONTEXT (block))
1079 for (local_decls = BLOCK_VARS (block);
1080 local_decls;
1081 local_decls = DECL_CHAIN (local_decls))
1082 if (TREE_ADDRESSABLE (local_decls))
1084 any_addressable_vars = 1;
1085 break;
1087 if (block == gimple_block (parallel_stmt))
1088 break;
1091 if (!any_addressable_vars)
1092 gimple_omp_return_set_nowait (stmt);
1097 static void
1098 remove_exit_barriers (struct omp_region *region)
1100 if (region->type == GIMPLE_OMP_PARALLEL)
1101 remove_exit_barrier (region);
1103 if (region->inner)
1105 region = region->inner;
1106 remove_exit_barriers (region);
1107 while (region->next)
1109 region = region->next;
1110 remove_exit_barriers (region);
1115 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1116 calls. These can't be declared as const functions, but
1117 within one parallel body they are constant, so they can be
1118 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1119 which are declared const. Similarly for task body, except
1120 that in untied task omp_get_thread_num () can change at any task
1121 scheduling point. */
1123 static void
1124 optimize_omp_library_calls (gimple *entry_stmt)
1126 basic_block bb;
1127 gimple_stmt_iterator gsi;
1128 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1129 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1130 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1132 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1133 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1134 OMP_CLAUSE_UNTIED) != NULL);
1136 FOR_EACH_BB_FN (bb, cfun)
1137 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1139 gimple *call = gsi_stmt (gsi);
1140 tree decl;
1142 if (is_gimple_call (call)
1143 && (decl = gimple_call_fndecl (call))
1144 && DECL_EXTERNAL (decl)
1145 && TREE_PUBLIC (decl)
1146 && DECL_INITIAL (decl) == NULL)
1148 tree built_in;
1150 if (DECL_NAME (decl) == thr_num_id)
1152 /* In #pragma omp task untied omp_get_thread_num () can change
1153 during the execution of the task region. */
1154 if (untied_task)
1155 continue;
1156 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1158 else if (DECL_NAME (decl) == num_thr_id)
1159 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1160 else
1161 continue;
1163 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1164 || gimple_call_num_args (call) != 0)
1165 continue;
1167 if (flag_exceptions && !TREE_NOTHROW (decl))
1168 continue;
1170 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1171 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1172 TREE_TYPE (TREE_TYPE (built_in))))
1173 continue;
1175 gimple_call_set_fndecl (call, built_in);
1180 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1181 regimplified. */
1183 static tree
1184 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1186 tree t = *tp;
1188 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1189 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1190 return t;
1192 if (TREE_CODE (t) == ADDR_EXPR)
1193 recompute_tree_invariant_for_addr_expr (t);
1195 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1196 return NULL_TREE;
1199 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1201 static void
1202 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1203 bool after)
1205 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1206 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1207 !after, after ? GSI_CONTINUE_LINKING
1208 : GSI_SAME_STMT);
1209 gimple *stmt = gimple_build_assign (to, from);
1210 if (after)
1211 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1212 else
1213 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1214 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1215 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1217 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1218 gimple_regimplify_operands (stmt, &gsi);
1222 /* Expand the OpenMP parallel or task directive starting at REGION. */
1224 static void
1225 expand_omp_taskreg (struct omp_region *region)
1227 basic_block entry_bb, exit_bb, new_bb;
1228 struct function *child_cfun;
1229 tree child_fn, block, t;
1230 gimple_stmt_iterator gsi;
1231 gimple *entry_stmt, *stmt;
1232 edge e;
1233 vec<tree, va_gc> *ws_args;
1235 entry_stmt = last_stmt (region->entry);
1236 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1237 && gimple_omp_task_taskwait_p (entry_stmt))
1239 new_bb = region->entry;
1240 gsi = gsi_last_nondebug_bb (region->entry);
1241 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1242 gsi_remove (&gsi, true);
1243 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1244 return;
1247 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1248 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1250 entry_bb = region->entry;
1251 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1252 exit_bb = region->cont;
1253 else
1254 exit_bb = region->exit;
1256 if (is_combined_parallel (region))
1257 ws_args = region->ws_args;
1258 else
1259 ws_args = NULL;
1261 if (child_cfun->cfg)
1263 /* Due to inlining, it may happen that we have already outlined
1264 the region, in which case all we need to do is make the
1265 sub-graph unreachable and emit the parallel call. */
1266 edge entry_succ_e, exit_succ_e;
1268 entry_succ_e = single_succ_edge (entry_bb);
1270 gsi = gsi_last_nondebug_bb (entry_bb);
1271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1272 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1273 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1274 gsi_remove (&gsi, true);
1276 new_bb = entry_bb;
1277 if (exit_bb)
1279 exit_succ_e = single_succ_edge (exit_bb);
1280 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1282 remove_edge_and_dominated_blocks (entry_succ_e);
1284 else
1286 unsigned srcidx, dstidx, num;
1288 /* If the parallel region needs data sent from the parent
1289 function, then the very first statement (except possible
1290 tree profile counter updates) of the parallel body
1291 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1292 &.OMP_DATA_O is passed as an argument to the child function,
1293 we need to replace it with the argument as seen by the child
1294 function.
1296 In most cases, this will end up being the identity assignment
1297 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1298 a function call that has been inlined, the original PARM_DECL
1299 .OMP_DATA_I may have been converted into a different local
1300 variable. In which case, we need to keep the assignment. */
1301 if (gimple_omp_taskreg_data_arg (entry_stmt))
1303 basic_block entry_succ_bb
1304 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1305 : FALLTHRU_EDGE (entry_bb)->dest;
1306 tree arg;
1307 gimple *parcopy_stmt = NULL;
1309 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1311 gimple *stmt;
1313 gcc_assert (!gsi_end_p (gsi));
1314 stmt = gsi_stmt (gsi);
1315 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1316 continue;
1318 if (gimple_num_ops (stmt) == 2)
1320 tree arg = gimple_assign_rhs1 (stmt);
1322 /* We're ignore the subcode because we're
1323 effectively doing a STRIP_NOPS. */
1325 if (TREE_CODE (arg) == ADDR_EXPR
1326 && (TREE_OPERAND (arg, 0)
1327 == gimple_omp_taskreg_data_arg (entry_stmt)))
1329 parcopy_stmt = stmt;
1330 break;
1335 gcc_assert (parcopy_stmt != NULL);
1336 arg = DECL_ARGUMENTS (child_fn);
1338 if (!gimple_in_ssa_p (cfun))
1340 if (gimple_assign_lhs (parcopy_stmt) == arg)
1341 gsi_remove (&gsi, true);
1342 else
1344 /* ?? Is setting the subcode really necessary ?? */
1345 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1346 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1349 else
1351 tree lhs = gimple_assign_lhs (parcopy_stmt);
1352 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1353 /* We'd like to set the rhs to the default def in the child_fn,
1354 but it's too early to create ssa names in the child_fn.
1355 Instead, we set the rhs to the parm. In
1356 move_sese_region_to_fn, we introduce a default def for the
1357 parm, map the parm to it's default def, and once we encounter
1358 this stmt, replace the parm with the default def. */
1359 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1360 update_stmt (parcopy_stmt);
1364 /* Declare local variables needed in CHILD_CFUN. */
1365 block = DECL_INITIAL (child_fn);
1366 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1367 /* The gimplifier could record temporaries in parallel/task block
1368 rather than in containing function's local_decls chain,
1369 which would mean cgraph missed finalizing them. Do it now. */
1370 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1371 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1372 varpool_node::finalize_decl (t);
1373 DECL_SAVED_TREE (child_fn) = NULL;
1374 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1375 gimple_set_body (child_fn, NULL);
1376 TREE_USED (block) = 1;
1378 /* Reset DECL_CONTEXT on function arguments. */
1379 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1380 DECL_CONTEXT (t) = child_fn;
1382 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1383 so that it can be moved to the child function. */
1384 gsi = gsi_last_nondebug_bb (entry_bb);
1385 stmt = gsi_stmt (gsi);
1386 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1387 || gimple_code (stmt) == GIMPLE_OMP_TASK
1388 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1389 e = split_block (entry_bb, stmt);
1390 gsi_remove (&gsi, true);
1391 entry_bb = e->dest;
1392 edge e2 = NULL;
1393 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1394 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1395 else
1397 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1398 gcc_assert (e2->dest == region->exit);
1399 remove_edge (BRANCH_EDGE (entry_bb));
1400 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1401 gsi = gsi_last_nondebug_bb (region->exit);
1402 gcc_assert (!gsi_end_p (gsi)
1403 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1404 gsi_remove (&gsi, true);
1407 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1408 if (exit_bb)
1410 gsi = gsi_last_nondebug_bb (exit_bb);
1411 gcc_assert (!gsi_end_p (gsi)
1412 && (gimple_code (gsi_stmt (gsi))
1413 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1414 stmt = gimple_build_return (NULL);
1415 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1416 gsi_remove (&gsi, true);
1419 /* Move the parallel region into CHILD_CFUN. */
1421 if (gimple_in_ssa_p (cfun))
1423 init_tree_ssa (child_cfun);
1424 init_ssa_operands (child_cfun);
1425 child_cfun->gimple_df->in_ssa_p = true;
1426 block = NULL_TREE;
1428 else
1429 block = gimple_block (entry_stmt);
1431 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1432 if (exit_bb)
1433 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1434 if (e2)
1436 basic_block dest_bb = e2->dest;
1437 if (!exit_bb)
1438 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1439 remove_edge (e2);
1440 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1442 /* When the OMP expansion process cannot guarantee an up-to-date
1443 loop tree arrange for the child function to fixup loops. */
1444 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1445 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1447 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1448 num = vec_safe_length (child_cfun->local_decls);
1449 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1451 t = (*child_cfun->local_decls)[srcidx];
1452 if (DECL_CONTEXT (t) == cfun->decl)
1453 continue;
1454 if (srcidx != dstidx)
1455 (*child_cfun->local_decls)[dstidx] = t;
1456 dstidx++;
1458 if (dstidx != num)
1459 vec_safe_truncate (child_cfun->local_decls, dstidx);
1461 /* Inform the callgraph about the new function. */
1462 child_cfun->curr_properties = cfun->curr_properties;
1463 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1464 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1465 cgraph_node *node = cgraph_node::get_create (child_fn);
1466 node->parallelized_function = 1;
1467 cgraph_node::add_new_function (child_fn, true);
1469 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1470 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1472 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1473 fixed in a following pass. */
1474 push_cfun (child_cfun);
1475 if (need_asm)
1476 assign_assembler_name_if_needed (child_fn);
1478 if (optimize)
1479 optimize_omp_library_calls (entry_stmt);
1480 update_max_bb_count ();
1481 cgraph_edge::rebuild_edges ();
1483 /* Some EH regions might become dead, see PR34608. If
1484 pass_cleanup_cfg isn't the first pass to happen with the
1485 new child, these dead EH edges might cause problems.
1486 Clean them up now. */
1487 if (flag_exceptions)
1489 basic_block bb;
1490 bool changed = false;
1492 FOR_EACH_BB_FN (bb, cfun)
1493 changed |= gimple_purge_dead_eh_edges (bb);
1494 if (changed)
1495 cleanup_tree_cfg ();
1497 if (gimple_in_ssa_p (cfun))
1498 update_ssa (TODO_update_ssa);
1499 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1500 verify_loop_structure ();
1501 pop_cfun ();
1503 if (dump_file && !gimple_in_ssa_p (cfun))
1505 omp_any_child_fn_dumped = true;
1506 dump_function_header (dump_file, child_fn, dump_flags);
1507 dump_function_to_file (child_fn, dump_file, dump_flags);
1511 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1513 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1514 expand_parallel_call (region, new_bb,
1515 as_a <gomp_parallel *> (entry_stmt), ws_args);
1516 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1517 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1518 else
1519 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1520 if (gimple_in_ssa_p (cfun))
1521 update_ssa (TODO_update_ssa_only_virtuals);
1524 /* Information about members of an OpenACC collapsed loop nest. */
1526 struct oacc_collapse
1528 tree base; /* Base value. */
1529 tree iters; /* Number of steps. */
1530 tree step; /* Step size. */
1531 tree tile; /* Tile increment (if tiled). */
1532 tree outer; /* Tile iterator var. */
1535 /* Helper for expand_oacc_for. Determine collapsed loop information.
1536 Fill in COUNTS array. Emit any initialization code before GSI.
1537 Return the calculated outer loop bound of BOUND_TYPE. */
1539 static tree
1540 expand_oacc_collapse_init (const struct omp_for_data *fd,
1541 gimple_stmt_iterator *gsi,
1542 oacc_collapse *counts, tree bound_type,
1543 location_t loc)
1545 tree tiling = fd->tiling;
1546 tree total = build_int_cst (bound_type, 1);
1547 int ix;
1549 gcc_assert (integer_onep (fd->loop.step));
1550 gcc_assert (integer_zerop (fd->loop.n1));
1552 /* When tiling, the first operand of the tile clause applies to the
1553 innermost loop, and we work outwards from there. Seems
1554 backwards, but whatever. */
1555 for (ix = fd->collapse; ix--;)
1557 const omp_for_data_loop *loop = &fd->loops[ix];
1559 tree iter_type = TREE_TYPE (loop->v);
1560 tree diff_type = iter_type;
1561 tree plus_type = iter_type;
1563 gcc_assert (loop->cond_code == fd->loop.cond_code);
1565 if (POINTER_TYPE_P (iter_type))
1566 plus_type = sizetype;
1567 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1568 diff_type = signed_type_for (diff_type);
1569 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1570 diff_type = integer_type_node;
1572 if (tiling)
1574 tree num = build_int_cst (integer_type_node, fd->collapse);
1575 tree loop_no = build_int_cst (integer_type_node, ix);
1576 tree tile = TREE_VALUE (tiling);
1577 gcall *call
1578 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1579 /* gwv-outer=*/integer_zero_node,
1580 /* gwv-inner=*/integer_zero_node);
1582 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1583 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1584 gimple_call_set_lhs (call, counts[ix].tile);
1585 gimple_set_location (call, loc);
1586 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1588 tiling = TREE_CHAIN (tiling);
1590 else
1592 counts[ix].tile = NULL;
1593 counts[ix].outer = loop->v;
1596 tree b = loop->n1;
1597 tree e = loop->n2;
1598 tree s = loop->step;
1599 bool up = loop->cond_code == LT_EXPR;
1600 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1601 bool negating;
1602 tree expr;
1604 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1605 true, GSI_SAME_STMT);
1606 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1609 /* Convert the step, avoiding possible unsigned->signed overflow. */
1610 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1611 if (negating)
1612 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1613 s = fold_convert (diff_type, s);
1614 if (negating)
1615 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1616 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1617 true, GSI_SAME_STMT);
1619 /* Determine the range, avoiding possible unsigned->signed overflow. */
1620 negating = !up && TYPE_UNSIGNED (iter_type);
1621 expr = fold_build2 (MINUS_EXPR, plus_type,
1622 fold_convert (plus_type, negating ? b : e),
1623 fold_convert (plus_type, negating ? e : b));
1624 expr = fold_convert (diff_type, expr);
1625 if (negating)
1626 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1627 tree range = force_gimple_operand_gsi
1628 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1630 /* Determine number of iterations. */
1631 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1632 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1633 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1635 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1636 true, GSI_SAME_STMT);
1638 counts[ix].base = b;
1639 counts[ix].iters = iters;
1640 counts[ix].step = s;
1642 total = fold_build2 (MULT_EXPR, bound_type, total,
1643 fold_convert (bound_type, iters));
1646 return total;
1649 /* Emit initializers for collapsed loop members. INNER is true if
1650 this is for the element loop of a TILE. IVAR is the outer
1651 loop iteration variable, from which collapsed loop iteration values
1652 are calculated. COUNTS array has been initialized by
1653 expand_oacc_collapse_inits. */
1655 static void
1656 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1657 gimple_stmt_iterator *gsi,
1658 const oacc_collapse *counts, tree ivar)
1660 tree ivar_type = TREE_TYPE (ivar);
1662 /* The most rapidly changing iteration variable is the innermost
1663 one. */
1664 for (int ix = fd->collapse; ix--;)
1666 const omp_for_data_loop *loop = &fd->loops[ix];
1667 const oacc_collapse *collapse = &counts[ix];
1668 tree v = inner ? loop->v : collapse->outer;
1669 tree iter_type = TREE_TYPE (v);
1670 tree diff_type = TREE_TYPE (collapse->step);
1671 tree plus_type = iter_type;
1672 enum tree_code plus_code = PLUS_EXPR;
1673 tree expr;
1675 if (POINTER_TYPE_P (iter_type))
1677 plus_code = POINTER_PLUS_EXPR;
1678 plus_type = sizetype;
1681 expr = ivar;
1682 if (ix)
1684 tree mod = fold_convert (ivar_type, collapse->iters);
1685 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1686 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1687 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1688 true, GSI_SAME_STMT);
1691 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1692 collapse->step);
1693 expr = fold_build2 (plus_code, iter_type,
1694 inner ? collapse->outer : collapse->base,
1695 fold_convert (plus_type, expr));
1696 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1697 true, GSI_SAME_STMT);
1698 gassign *ass = gimple_build_assign (v, expr);
1699 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1703 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1704 of the combined collapse > 1 loop constructs, generate code like:
1705 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1706 if (cond3 is <)
1707 adj = STEP3 - 1;
1708 else
1709 adj = STEP3 + 1;
1710 count3 = (adj + N32 - N31) / STEP3;
1711 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1712 if (cond2 is <)
1713 adj = STEP2 - 1;
1714 else
1715 adj = STEP2 + 1;
1716 count2 = (adj + N22 - N21) / STEP2;
1717 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1718 if (cond1 is <)
1719 adj = STEP1 - 1;
1720 else
1721 adj = STEP1 + 1;
1722 count1 = (adj + N12 - N11) / STEP1;
1723 count = count1 * count2 * count3;
1724 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1725 count = 0;
1726 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1727 of the combined loop constructs, just initialize COUNTS array
1728 from the _looptemp_ clauses. */
1730 /* NOTE: It *could* be better to moosh all of the BBs together,
1731 creating one larger BB with all the computation and the unexpected
1732 jump at the end. I.e.
1734 bool zero3, zero2, zero1, zero;
1736 zero3 = N32 c3 N31;
1737 count3 = (N32 - N31) /[cl] STEP3;
1738 zero2 = N22 c2 N21;
1739 count2 = (N22 - N21) /[cl] STEP2;
1740 zero1 = N12 c1 N11;
1741 count1 = (N12 - N11) /[cl] STEP1;
1742 zero = zero3 || zero2 || zero1;
1743 count = count1 * count2 * count3;
1744 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1746 After all, we expect the zero=false, and thus we expect to have to
1747 evaluate all of the comparison expressions, so short-circuiting
1748 oughtn't be a win. Since the condition isn't protecting a
1749 denominator, we're not concerned about divide-by-zero, so we can
1750 fully evaluate count even if a numerator turned out to be wrong.
1752 It seems like putting this all together would create much better
1753 scheduling opportunities, and less pressure on the chip's branch
1754 predictor. */
1756 static void
1757 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1758 basic_block &entry_bb, tree *counts,
1759 basic_block &zero_iter1_bb, int &first_zero_iter1,
1760 basic_block &zero_iter2_bb, int &first_zero_iter2,
1761 basic_block &l2_dom_bb)
1763 tree t, type = TREE_TYPE (fd->loop.v);
1764 edge e, ne;
1765 int i;
1767 /* Collapsed loops need work for expansion into SSA form. */
1768 gcc_assert (!gimple_in_ssa_p (cfun));
1770 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1771 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1773 gcc_assert (fd->ordered == 0);
1774 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1775 isn't supposed to be handled, as the inner loop doesn't
1776 use it. */
1777 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1778 OMP_CLAUSE__LOOPTEMP_);
1779 gcc_assert (innerc);
1780 for (i = 0; i < fd->collapse; i++)
1782 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1783 OMP_CLAUSE__LOOPTEMP_);
1784 gcc_assert (innerc);
1785 if (i)
1786 counts[i] = OMP_CLAUSE_DECL (innerc);
1787 else
1788 counts[0] = NULL_TREE;
1790 return;
1793 for (i = fd->collapse; i < fd->ordered; i++)
1795 tree itype = TREE_TYPE (fd->loops[i].v);
1796 counts[i] = NULL_TREE;
1797 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1798 fold_convert (itype, fd->loops[i].n1),
1799 fold_convert (itype, fd->loops[i].n2));
1800 if (t && integer_zerop (t))
1802 for (i = fd->collapse; i < fd->ordered; i++)
1803 counts[i] = build_int_cst (type, 0);
1804 break;
1807 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1809 tree itype = TREE_TYPE (fd->loops[i].v);
1811 if (i >= fd->collapse && counts[i])
1812 continue;
1813 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1814 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1815 fold_convert (itype, fd->loops[i].n1),
1816 fold_convert (itype, fd->loops[i].n2)))
1817 == NULL_TREE || !integer_onep (t)))
1819 gcond *cond_stmt;
1820 tree n1, n2;
1821 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1822 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1823 true, GSI_SAME_STMT);
1824 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1825 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1826 true, GSI_SAME_STMT);
1827 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1828 NULL_TREE, NULL_TREE);
1829 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1830 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1831 expand_omp_regimplify_p, NULL, NULL)
1832 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1833 expand_omp_regimplify_p, NULL, NULL))
1835 *gsi = gsi_for_stmt (cond_stmt);
1836 gimple_regimplify_operands (cond_stmt, gsi);
1838 e = split_block (entry_bb, cond_stmt);
1839 basic_block &zero_iter_bb
1840 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1841 int &first_zero_iter
1842 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1843 if (zero_iter_bb == NULL)
1845 gassign *assign_stmt;
1846 first_zero_iter = i;
1847 zero_iter_bb = create_empty_bb (entry_bb);
1848 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1849 *gsi = gsi_after_labels (zero_iter_bb);
1850 if (i < fd->collapse)
1851 assign_stmt = gimple_build_assign (fd->loop.n2,
1852 build_zero_cst (type));
1853 else
1855 counts[i] = create_tmp_reg (type, ".count");
1856 assign_stmt
1857 = gimple_build_assign (counts[i], build_zero_cst (type));
1859 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1860 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1861 entry_bb);
1863 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1864 ne->probability = profile_probability::very_unlikely ();
1865 e->flags = EDGE_TRUE_VALUE;
1866 e->probability = ne->probability.invert ();
1867 if (l2_dom_bb == NULL)
1868 l2_dom_bb = entry_bb;
1869 entry_bb = e->dest;
1870 *gsi = gsi_last_nondebug_bb (entry_bb);
1873 if (POINTER_TYPE_P (itype))
1874 itype = signed_type_for (itype);
1875 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1876 ? -1 : 1));
1877 t = fold_build2 (PLUS_EXPR, itype,
1878 fold_convert (itype, fd->loops[i].step), t);
1879 t = fold_build2 (PLUS_EXPR, itype, t,
1880 fold_convert (itype, fd->loops[i].n2));
1881 t = fold_build2 (MINUS_EXPR, itype, t,
1882 fold_convert (itype, fd->loops[i].n1));
1883 /* ?? We could probably use CEIL_DIV_EXPR instead of
1884 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1885 generate the same code in the end because generically we
1886 don't know that the values involved must be negative for
1887 GT?? */
1888 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1889 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1890 fold_build1 (NEGATE_EXPR, itype, t),
1891 fold_build1 (NEGATE_EXPR, itype,
1892 fold_convert (itype,
1893 fd->loops[i].step)));
1894 else
1895 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1896 fold_convert (itype, fd->loops[i].step));
1897 t = fold_convert (type, t);
1898 if (TREE_CODE (t) == INTEGER_CST)
1899 counts[i] = t;
1900 else
1902 if (i < fd->collapse || i != first_zero_iter2)
1903 counts[i] = create_tmp_reg (type, ".count");
1904 expand_omp_build_assign (gsi, counts[i], t);
1906 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1908 if (i == 0)
1909 t = counts[0];
1910 else
1911 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1912 expand_omp_build_assign (gsi, fd->loop.n2, t);
1917 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1918 T = V;
1919 V3 = N31 + (T % count3) * STEP3;
1920 T = T / count3;
1921 V2 = N21 + (T % count2) * STEP2;
1922 T = T / count2;
1923 V1 = N11 + T * STEP1;
1924 if this loop doesn't have an inner loop construct combined with it.
1925 If it does have an inner loop construct combined with it and the
1926 iteration count isn't known constant, store values from counts array
1927 into its _looptemp_ temporaries instead. */
1929 static void
1930 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1931 tree *counts, gimple *inner_stmt, tree startvar)
1933 int i;
1934 if (gimple_omp_for_combined_p (fd->for_stmt))
1936 /* If fd->loop.n2 is constant, then no propagation of the counts
1937 is needed, they are constant. */
1938 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1939 return;
1941 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1942 ? gimple_omp_taskreg_clauses (inner_stmt)
1943 : gimple_omp_for_clauses (inner_stmt);
1944 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1945 isn't supposed to be handled, as the inner loop doesn't
1946 use it. */
1947 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1948 gcc_assert (innerc);
1949 for (i = 0; i < fd->collapse; i++)
1951 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1952 OMP_CLAUSE__LOOPTEMP_);
1953 gcc_assert (innerc);
1954 if (i)
1956 tree tem = OMP_CLAUSE_DECL (innerc);
1957 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1958 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1959 false, GSI_CONTINUE_LINKING);
1960 gassign *stmt = gimple_build_assign (tem, t);
1961 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1964 return;
1967 tree type = TREE_TYPE (fd->loop.v);
1968 tree tem = create_tmp_reg (type, ".tem");
1969 gassign *stmt = gimple_build_assign (tem, startvar);
1970 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1972 for (i = fd->collapse - 1; i >= 0; i--)
1974 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1975 itype = vtype;
1976 if (POINTER_TYPE_P (vtype))
1977 itype = signed_type_for (vtype);
1978 if (i != 0)
1979 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1980 else
1981 t = tem;
1982 t = fold_convert (itype, t);
1983 t = fold_build2 (MULT_EXPR, itype, t,
1984 fold_convert (itype, fd->loops[i].step));
1985 if (POINTER_TYPE_P (vtype))
1986 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1987 else
1988 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1989 t = force_gimple_operand_gsi (gsi, t,
1990 DECL_P (fd->loops[i].v)
1991 && TREE_ADDRESSABLE (fd->loops[i].v),
1992 NULL_TREE, false,
1993 GSI_CONTINUE_LINKING);
1994 stmt = gimple_build_assign (fd->loops[i].v, t);
1995 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1996 if (i != 0)
1998 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1999 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2000 false, GSI_CONTINUE_LINKING);
2001 stmt = gimple_build_assign (tem, t);
2002 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2007 /* Helper function for expand_omp_for_*. Generate code like:
2008 L10:
2009 V3 += STEP3;
2010 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2011 L11:
2012 V3 = N31;
2013 V2 += STEP2;
2014 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2015 L12:
2016 V2 = N21;
2017 V1 += STEP1;
2018 goto BODY_BB; */
2020 static basic_block
2021 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2022 basic_block body_bb)
2024 basic_block last_bb, bb, collapse_bb = NULL;
2025 int i;
2026 gimple_stmt_iterator gsi;
2027 edge e;
2028 tree t;
2029 gimple *stmt;
2031 last_bb = cont_bb;
2032 for (i = fd->collapse - 1; i >= 0; i--)
2034 tree vtype = TREE_TYPE (fd->loops[i].v);
2036 bb = create_empty_bb (last_bb);
2037 add_bb_to_loop (bb, last_bb->loop_father);
2038 gsi = gsi_start_bb (bb);
2040 if (i < fd->collapse - 1)
2042 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2043 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2045 t = fd->loops[i + 1].n1;
2046 t = force_gimple_operand_gsi (&gsi, t,
2047 DECL_P (fd->loops[i + 1].v)
2048 && TREE_ADDRESSABLE (fd->loops[i
2049 + 1].v),
2050 NULL_TREE, false,
2051 GSI_CONTINUE_LINKING);
2052 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2053 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2055 else
2056 collapse_bb = bb;
2058 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2060 if (POINTER_TYPE_P (vtype))
2061 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2062 else
2063 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2064 t = force_gimple_operand_gsi (&gsi, t,
2065 DECL_P (fd->loops[i].v)
2066 && TREE_ADDRESSABLE (fd->loops[i].v),
2067 NULL_TREE, false, GSI_CONTINUE_LINKING);
2068 stmt = gimple_build_assign (fd->loops[i].v, t);
2069 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2071 if (i > 0)
2073 t = fd->loops[i].n2;
2074 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2075 false, GSI_CONTINUE_LINKING);
2076 tree v = fd->loops[i].v;
2077 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2078 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2079 false, GSI_CONTINUE_LINKING);
2080 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2081 stmt = gimple_build_cond_empty (t);
2082 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2083 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2084 expand_omp_regimplify_p, NULL, NULL)
2085 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2086 expand_omp_regimplify_p, NULL, NULL))
2087 gimple_regimplify_operands (stmt, &gsi);
2088 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2089 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2091 else
2092 make_edge (bb, body_bb, EDGE_FALLTHRU);
2093 last_bb = bb;
2096 return collapse_bb;
2099 /* Expand #pragma omp ordered depend(source). */
2101 static void
2102 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2103 tree *counts, location_t loc)
2105 enum built_in_function source_ix
2106 = fd->iter_type == long_integer_type_node
2107 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2108 gimple *g
2109 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2110 build_fold_addr_expr (counts[fd->ordered]));
2111 gimple_set_location (g, loc);
2112 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2115 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2117 static void
2118 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2119 tree *counts, tree c, location_t loc)
2121 auto_vec<tree, 10> args;
2122 enum built_in_function sink_ix
2123 = fd->iter_type == long_integer_type_node
2124 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2125 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2126 int i;
2127 gimple_stmt_iterator gsi2 = *gsi;
2128 bool warned_step = false;
2130 for (i = 0; i < fd->ordered; i++)
2132 tree step = NULL_TREE;
2133 off = TREE_PURPOSE (deps);
2134 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2136 step = TREE_OPERAND (off, 1);
2137 off = TREE_OPERAND (off, 0);
2139 if (!integer_zerop (off))
2141 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2142 || fd->loops[i].cond_code == GT_EXPR);
2143 bool forward = fd->loops[i].cond_code == LT_EXPR;
2144 if (step)
2146 /* Non-simple Fortran DO loops. If step is variable,
2147 we don't know at compile even the direction, so can't
2148 warn. */
2149 if (TREE_CODE (step) != INTEGER_CST)
2150 break;
2151 forward = tree_int_cst_sgn (step) != -1;
2153 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2154 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2155 "waiting for lexically later iteration");
2156 break;
2158 deps = TREE_CHAIN (deps);
2160 /* If all offsets corresponding to the collapsed loops are zero,
2161 this depend clause can be ignored. FIXME: but there is still a
2162 flush needed. We need to emit one __sync_synchronize () for it
2163 though (perhaps conditionally)? Solve this together with the
2164 conservative dependence folding optimization.
2165 if (i >= fd->collapse)
2166 return; */
2168 deps = OMP_CLAUSE_DECL (c);
2169 gsi_prev (&gsi2);
2170 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2171 edge e2 = split_block_after_labels (e1->dest);
2173 gsi2 = gsi_after_labels (e1->dest);
2174 *gsi = gsi_last_bb (e1->src);
2175 for (i = 0; i < fd->ordered; i++)
2177 tree itype = TREE_TYPE (fd->loops[i].v);
2178 tree step = NULL_TREE;
2179 tree orig_off = NULL_TREE;
2180 if (POINTER_TYPE_P (itype))
2181 itype = sizetype;
2182 if (i)
2183 deps = TREE_CHAIN (deps);
2184 off = TREE_PURPOSE (deps);
2185 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2187 step = TREE_OPERAND (off, 1);
2188 off = TREE_OPERAND (off, 0);
2189 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2190 && integer_onep (fd->loops[i].step)
2191 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2193 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2194 if (step)
2196 off = fold_convert_loc (loc, itype, off);
2197 orig_off = off;
2198 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2201 if (integer_zerop (off))
2202 t = boolean_true_node;
2203 else
2205 tree a;
2206 tree co = fold_convert_loc (loc, itype, off);
2207 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2209 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2210 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2211 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2212 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2213 co);
2215 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2216 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2217 fd->loops[i].v, co);
2218 else
2219 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2220 fd->loops[i].v, co);
2221 if (step)
2223 tree t1, t2;
2224 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2225 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2226 fd->loops[i].n1);
2227 else
2228 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2229 fd->loops[i].n2);
2230 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2231 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2232 fd->loops[i].n2);
2233 else
2234 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2235 fd->loops[i].n1);
2236 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2237 step, build_int_cst (TREE_TYPE (step), 0));
2238 if (TREE_CODE (step) != INTEGER_CST)
2240 t1 = unshare_expr (t1);
2241 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2242 false, GSI_CONTINUE_LINKING);
2243 t2 = unshare_expr (t2);
2244 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2245 false, GSI_CONTINUE_LINKING);
2247 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2248 t, t2, t1);
2250 else if (fd->loops[i].cond_code == LT_EXPR)
2252 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2253 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2254 fd->loops[i].n1);
2255 else
2256 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2257 fd->loops[i].n2);
2259 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2260 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2261 fd->loops[i].n2);
2262 else
2263 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2264 fd->loops[i].n1);
2266 if (cond)
2267 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2268 else
2269 cond = t;
2271 off = fold_convert_loc (loc, itype, off);
2273 if (step
2274 || (fd->loops[i].cond_code == LT_EXPR
2275 ? !integer_onep (fd->loops[i].step)
2276 : !integer_minus_onep (fd->loops[i].step)))
2278 if (step == NULL_TREE
2279 && TYPE_UNSIGNED (itype)
2280 && fd->loops[i].cond_code == GT_EXPR)
2281 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2282 fold_build1_loc (loc, NEGATE_EXPR, itype,
2283 s));
2284 else
2285 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2286 orig_off ? orig_off : off, s);
2287 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2288 build_int_cst (itype, 0));
2289 if (integer_zerop (t) && !warned_step)
2291 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2292 "refers to iteration never in the iteration "
2293 "space");
2294 warned_step = true;
2296 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2297 cond, t);
2300 if (i <= fd->collapse - 1 && fd->collapse > 1)
2301 t = fd->loop.v;
2302 else if (counts[i])
2303 t = counts[i];
2304 else
2306 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2307 fd->loops[i].v, fd->loops[i].n1);
2308 t = fold_convert_loc (loc, fd->iter_type, t);
2310 if (step)
2311 /* We have divided off by step already earlier. */;
2312 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2313 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2314 fold_build1_loc (loc, NEGATE_EXPR, itype,
2315 s));
2316 else
2317 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2318 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2319 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2320 off = fold_convert_loc (loc, fd->iter_type, off);
2321 if (i <= fd->collapse - 1 && fd->collapse > 1)
2323 if (i)
2324 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2325 off);
2326 if (i < fd->collapse - 1)
2328 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2329 counts[i]);
2330 continue;
2333 off = unshare_expr (off);
2334 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2335 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2336 true, GSI_SAME_STMT);
2337 args.safe_push (t);
2339 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2340 gimple_set_location (g, loc);
2341 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2343 cond = unshare_expr (cond);
2344 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2345 GSI_CONTINUE_LINKING);
2346 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2347 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2348 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2349 e1->probability = e3->probability.invert ();
2350 e1->flags = EDGE_TRUE_VALUE;
2351 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2353 *gsi = gsi_after_labels (e2->dest);
2356 /* Expand all #pragma omp ordered depend(source) and
2357 #pragma omp ordered depend(sink:...) constructs in the current
2358 #pragma omp for ordered(n) region. */
2360 static void
2361 expand_omp_ordered_source_sink (struct omp_region *region,
2362 struct omp_for_data *fd, tree *counts,
2363 basic_block cont_bb)
2365 struct omp_region *inner;
2366 int i;
2367 for (i = fd->collapse - 1; i < fd->ordered; i++)
2368 if (i == fd->collapse - 1 && fd->collapse > 1)
2369 counts[i] = NULL_TREE;
2370 else if (i >= fd->collapse && !cont_bb)
2371 counts[i] = build_zero_cst (fd->iter_type);
2372 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2373 && integer_onep (fd->loops[i].step))
2374 counts[i] = NULL_TREE;
2375 else
2376 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2377 tree atype
2378 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2379 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2380 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2382 for (inner = region->inner; inner; inner = inner->next)
2383 if (inner->type == GIMPLE_OMP_ORDERED)
2385 gomp_ordered *ord_stmt = inner->ord_stmt;
2386 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2387 location_t loc = gimple_location (ord_stmt);
2388 tree c;
2389 for (c = gimple_omp_ordered_clauses (ord_stmt);
2390 c; c = OMP_CLAUSE_CHAIN (c))
2391 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2392 break;
2393 if (c)
2394 expand_omp_ordered_source (&gsi, fd, counts, loc);
2395 for (c = gimple_omp_ordered_clauses (ord_stmt);
2396 c; c = OMP_CLAUSE_CHAIN (c))
2397 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2398 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2399 gsi_remove (&gsi, true);
2403 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2404 collapsed. */
2406 static basic_block
2407 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2408 basic_block cont_bb, basic_block body_bb,
2409 bool ordered_lastprivate)
2411 if (fd->ordered == fd->collapse)
2412 return cont_bb;
2414 if (!cont_bb)
2416 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2417 for (int i = fd->collapse; i < fd->ordered; i++)
2419 tree type = TREE_TYPE (fd->loops[i].v);
2420 tree n1 = fold_convert (type, fd->loops[i].n1);
2421 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2422 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2423 size_int (i - fd->collapse + 1),
2424 NULL_TREE, NULL_TREE);
2425 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2427 return NULL;
2430 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2432 tree t, type = TREE_TYPE (fd->loops[i].v);
2433 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2434 expand_omp_build_assign (&gsi, fd->loops[i].v,
2435 fold_convert (type, fd->loops[i].n1));
2436 if (counts[i])
2437 expand_omp_build_assign (&gsi, counts[i],
2438 build_zero_cst (fd->iter_type));
2439 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2440 size_int (i - fd->collapse + 1),
2441 NULL_TREE, NULL_TREE);
2442 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2443 if (!gsi_end_p (gsi))
2444 gsi_prev (&gsi);
2445 else
2446 gsi = gsi_last_bb (body_bb);
2447 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2448 basic_block new_body = e1->dest;
2449 if (body_bb == cont_bb)
2450 cont_bb = new_body;
2451 edge e2 = NULL;
2452 basic_block new_header;
2453 if (EDGE_COUNT (cont_bb->preds) > 0)
2455 gsi = gsi_last_bb (cont_bb);
2456 if (POINTER_TYPE_P (type))
2457 t = fold_build_pointer_plus (fd->loops[i].v,
2458 fold_convert (sizetype,
2459 fd->loops[i].step));
2460 else
2461 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2462 fold_convert (type, fd->loops[i].step));
2463 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2464 if (counts[i])
2466 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2467 build_int_cst (fd->iter_type, 1));
2468 expand_omp_build_assign (&gsi, counts[i], t);
2469 t = counts[i];
2471 else
2473 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2474 fd->loops[i].v, fd->loops[i].n1);
2475 t = fold_convert (fd->iter_type, t);
2476 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2477 true, GSI_SAME_STMT);
2479 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2480 size_int (i - fd->collapse + 1),
2481 NULL_TREE, NULL_TREE);
2482 expand_omp_build_assign (&gsi, aref, t);
2483 gsi_prev (&gsi);
2484 e2 = split_block (cont_bb, gsi_stmt (gsi));
2485 new_header = e2->dest;
2487 else
2488 new_header = cont_bb;
2489 gsi = gsi_after_labels (new_header);
2490 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2491 true, GSI_SAME_STMT);
2492 tree n2
2493 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2494 true, NULL_TREE, true, GSI_SAME_STMT);
2495 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2496 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2497 edge e3 = split_block (new_header, gsi_stmt (gsi));
2498 cont_bb = e3->dest;
2499 remove_edge (e1);
2500 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2501 e3->flags = EDGE_FALSE_VALUE;
2502 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2503 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2504 e1->probability = e3->probability.invert ();
2506 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2507 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2509 if (e2)
2511 struct loop *loop = alloc_loop ();
2512 loop->header = new_header;
2513 loop->latch = e2->src;
2514 add_loop (loop, body_bb->loop_father);
2518 /* If there are any lastprivate clauses and it is possible some loops
2519 might have zero iterations, ensure all the decls are initialized,
2520 otherwise we could crash evaluating C++ class iterators with lastprivate
2521 clauses. */
2522 bool need_inits = false;
2523 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2524 if (need_inits)
2526 tree type = TREE_TYPE (fd->loops[i].v);
2527 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2528 expand_omp_build_assign (&gsi, fd->loops[i].v,
2529 fold_convert (type, fd->loops[i].n1));
2531 else
2533 tree type = TREE_TYPE (fd->loops[i].v);
2534 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2535 boolean_type_node,
2536 fold_convert (type, fd->loops[i].n1),
2537 fold_convert (type, fd->loops[i].n2));
2538 if (!integer_onep (this_cond))
2539 need_inits = true;
2542 return cont_bb;
2545 /* A subroutine of expand_omp_for. Generate code for a parallel
2546 loop with any schedule. Given parameters:
2548 for (V = N1; V cond N2; V += STEP) BODY;
2550 where COND is "<" or ">", we generate pseudocode
2552 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2553 if (more) goto L0; else goto L3;
2555 V = istart0;
2556 iend = iend0;
2558 BODY;
2559 V += STEP;
2560 if (V cond iend) goto L1; else goto L2;
2562 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2565 If this is a combined omp parallel loop, instead of the call to
2566 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2567 If this is gimple_omp_for_combined_p loop, then instead of assigning
2568 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2569 inner GIMPLE_OMP_FOR and V += STEP; and
2570 if (V cond iend) goto L1; else goto L2; are removed.
2572 For collapsed loops, given parameters:
2573 collapse(3)
2574 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2575 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2576 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2577 BODY;
2579 we generate pseudocode
2581 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2582 if (cond3 is <)
2583 adj = STEP3 - 1;
2584 else
2585 adj = STEP3 + 1;
2586 count3 = (adj + N32 - N31) / STEP3;
2587 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2588 if (cond2 is <)
2589 adj = STEP2 - 1;
2590 else
2591 adj = STEP2 + 1;
2592 count2 = (adj + N22 - N21) / STEP2;
2593 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2594 if (cond1 is <)
2595 adj = STEP1 - 1;
2596 else
2597 adj = STEP1 + 1;
2598 count1 = (adj + N12 - N11) / STEP1;
2599 count = count1 * count2 * count3;
2600 goto Z1;
2602 count = 0;
2604 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2605 if (more) goto L0; else goto L3;
2607 V = istart0;
2608 T = V;
2609 V3 = N31 + (T % count3) * STEP3;
2610 T = T / count3;
2611 V2 = N21 + (T % count2) * STEP2;
2612 T = T / count2;
2613 V1 = N11 + T * STEP1;
2614 iend = iend0;
2616 BODY;
2617 V += 1;
2618 if (V < iend) goto L10; else goto L2;
2619 L10:
2620 V3 += STEP3;
2621 if (V3 cond3 N32) goto L1; else goto L11;
2622 L11:
2623 V3 = N31;
2624 V2 += STEP2;
2625 if (V2 cond2 N22) goto L1; else goto L12;
2626 L12:
2627 V2 = N21;
2628 V1 += STEP1;
2629 goto L1;
2631 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2636 static void
2637 expand_omp_for_generic (struct omp_region *region,
2638 struct omp_for_data *fd,
2639 enum built_in_function start_fn,
2640 enum built_in_function next_fn,
2641 tree sched_arg,
2642 gimple *inner_stmt)
2644 tree type, istart0, iend0, iend;
2645 tree t, vmain, vback, bias = NULL_TREE;
2646 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2647 basic_block l2_bb = NULL, l3_bb = NULL;
2648 gimple_stmt_iterator gsi;
2649 gassign *assign_stmt;
2650 bool in_combined_parallel = is_combined_parallel (region);
2651 bool broken_loop = region->cont == NULL;
2652 edge e, ne;
2653 tree *counts = NULL;
2654 int i;
2655 bool ordered_lastprivate = false;
2657 gcc_assert (!broken_loop || !in_combined_parallel);
2658 gcc_assert (fd->iter_type == long_integer_type_node
2659 || !in_combined_parallel);
2661 entry_bb = region->entry;
2662 cont_bb = region->cont;
2663 collapse_bb = NULL;
2664 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2665 gcc_assert (broken_loop
2666 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2667 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2668 l1_bb = single_succ (l0_bb);
2669 if (!broken_loop)
2671 l2_bb = create_empty_bb (cont_bb);
2672 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2673 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2674 == l1_bb));
2675 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2677 else
2678 l2_bb = NULL;
2679 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2680 exit_bb = region->exit;
2682 gsi = gsi_last_nondebug_bb (entry_bb);
2684 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2685 if (fd->ordered
2686 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2687 OMP_CLAUSE_LASTPRIVATE))
2688 ordered_lastprivate = false;
2689 tree reductions = NULL_TREE;
2690 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2691 tree memv = NULL_TREE;
2692 if (fd->lastprivate_conditional)
2694 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2695 OMP_CLAUSE__CONDTEMP_);
2696 if (fd->have_pointer_condtemp)
2697 condtemp = OMP_CLAUSE_DECL (c);
2698 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2699 cond_var = OMP_CLAUSE_DECL (c);
2701 if (sched_arg)
2703 if (fd->have_reductemp)
2705 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2706 OMP_CLAUSE__REDUCTEMP_);
2707 reductions = OMP_CLAUSE_DECL (c);
2708 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2709 gimple *g = SSA_NAME_DEF_STMT (reductions);
2710 reductions = gimple_assign_rhs1 (g);
2711 OMP_CLAUSE_DECL (c) = reductions;
2712 entry_bb = gimple_bb (g);
2713 edge e = split_block (entry_bb, g);
2714 if (region->entry == entry_bb)
2715 region->entry = e->dest;
2716 gsi = gsi_last_bb (entry_bb);
2718 else
2719 reductions = null_pointer_node;
2720 if (fd->have_pointer_condtemp)
2722 tree type = TREE_TYPE (condtemp);
2723 memv = create_tmp_var (type);
2724 TREE_ADDRESSABLE (memv) = 1;
2725 unsigned HOST_WIDE_INT sz
2726 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2727 sz *= fd->lastprivate_conditional;
2728 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2729 false);
2730 mem = build_fold_addr_expr (memv);
2732 else
2733 mem = null_pointer_node;
2735 if (fd->collapse > 1 || fd->ordered)
2737 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2738 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2740 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2741 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2742 zero_iter1_bb, first_zero_iter1,
2743 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2745 if (zero_iter1_bb)
2747 /* Some counts[i] vars might be uninitialized if
2748 some loop has zero iterations. But the body shouldn't
2749 be executed in that case, so just avoid uninit warnings. */
2750 for (i = first_zero_iter1;
2751 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2752 if (SSA_VAR_P (counts[i]))
2753 TREE_NO_WARNING (counts[i]) = 1;
2754 gsi_prev (&gsi);
2755 e = split_block (entry_bb, gsi_stmt (gsi));
2756 entry_bb = e->dest;
2757 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2758 gsi = gsi_last_nondebug_bb (entry_bb);
2759 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2760 get_immediate_dominator (CDI_DOMINATORS,
2761 zero_iter1_bb));
2763 if (zero_iter2_bb)
2765 /* Some counts[i] vars might be uninitialized if
2766 some loop has zero iterations. But the body shouldn't
2767 be executed in that case, so just avoid uninit warnings. */
2768 for (i = first_zero_iter2; i < fd->ordered; i++)
2769 if (SSA_VAR_P (counts[i]))
2770 TREE_NO_WARNING (counts[i]) = 1;
2771 if (zero_iter1_bb)
2772 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2773 else
2775 gsi_prev (&gsi);
2776 e = split_block (entry_bb, gsi_stmt (gsi));
2777 entry_bb = e->dest;
2778 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2779 gsi = gsi_last_nondebug_bb (entry_bb);
2780 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2781 get_immediate_dominator
2782 (CDI_DOMINATORS, zero_iter2_bb));
2785 if (fd->collapse == 1)
2787 counts[0] = fd->loop.n2;
2788 fd->loop = fd->loops[0];
2792 type = TREE_TYPE (fd->loop.v);
2793 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2794 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2795 TREE_ADDRESSABLE (istart0) = 1;
2796 TREE_ADDRESSABLE (iend0) = 1;
2798 /* See if we need to bias by LLONG_MIN. */
2799 if (fd->iter_type == long_long_unsigned_type_node
2800 && TREE_CODE (type) == INTEGER_TYPE
2801 && !TYPE_UNSIGNED (type)
2802 && fd->ordered == 0)
2804 tree n1, n2;
2806 if (fd->loop.cond_code == LT_EXPR)
2808 n1 = fd->loop.n1;
2809 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2811 else
2813 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2814 n2 = fd->loop.n1;
2816 if (TREE_CODE (n1) != INTEGER_CST
2817 || TREE_CODE (n2) != INTEGER_CST
2818 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2819 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2822 gimple_stmt_iterator gsif = gsi;
2823 gsi_prev (&gsif);
2825 tree arr = NULL_TREE;
2826 if (in_combined_parallel)
2828 gcc_assert (fd->ordered == 0);
2829 /* In a combined parallel loop, emit a call to
2830 GOMP_loop_foo_next. */
2831 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2832 build_fold_addr_expr (istart0),
2833 build_fold_addr_expr (iend0));
2835 else
2837 tree t0, t1, t2, t3, t4;
2838 /* If this is not a combined parallel loop, emit a call to
2839 GOMP_loop_foo_start in ENTRY_BB. */
2840 t4 = build_fold_addr_expr (iend0);
2841 t3 = build_fold_addr_expr (istart0);
2842 if (fd->ordered)
2844 t0 = build_int_cst (unsigned_type_node,
2845 fd->ordered - fd->collapse + 1);
2846 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2847 fd->ordered
2848 - fd->collapse + 1),
2849 ".omp_counts");
2850 DECL_NAMELESS (arr) = 1;
2851 TREE_ADDRESSABLE (arr) = 1;
2852 TREE_STATIC (arr) = 1;
2853 vec<constructor_elt, va_gc> *v;
2854 vec_alloc (v, fd->ordered - fd->collapse + 1);
2855 int idx;
2857 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2859 tree c;
2860 if (idx == 0 && fd->collapse > 1)
2861 c = fd->loop.n2;
2862 else
2863 c = counts[idx + fd->collapse - 1];
2864 tree purpose = size_int (idx);
2865 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2866 if (TREE_CODE (c) != INTEGER_CST)
2867 TREE_STATIC (arr) = 0;
2870 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2871 if (!TREE_STATIC (arr))
2872 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2873 void_type_node, arr),
2874 true, NULL_TREE, true, GSI_SAME_STMT);
2875 t1 = build_fold_addr_expr (arr);
2876 t2 = NULL_TREE;
2878 else
2880 t2 = fold_convert (fd->iter_type, fd->loop.step);
2881 t1 = fd->loop.n2;
2882 t0 = fd->loop.n1;
2883 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2885 tree innerc
2886 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2887 OMP_CLAUSE__LOOPTEMP_);
2888 gcc_assert (innerc);
2889 t0 = OMP_CLAUSE_DECL (innerc);
2890 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2891 OMP_CLAUSE__LOOPTEMP_);
2892 gcc_assert (innerc);
2893 t1 = OMP_CLAUSE_DECL (innerc);
2895 if (POINTER_TYPE_P (TREE_TYPE (t0))
2896 && TYPE_PRECISION (TREE_TYPE (t0))
2897 != TYPE_PRECISION (fd->iter_type))
2899 /* Avoid casting pointers to integer of a different size. */
2900 tree itype = signed_type_for (type);
2901 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2902 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2904 else
2906 t1 = fold_convert (fd->iter_type, t1);
2907 t0 = fold_convert (fd->iter_type, t0);
2909 if (bias)
2911 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2912 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2915 if (fd->iter_type == long_integer_type_node || fd->ordered)
2917 if (fd->chunk_size)
2919 t = fold_convert (fd->iter_type, fd->chunk_size);
2920 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2921 if (sched_arg)
2923 if (fd->ordered)
2924 t = build_call_expr (builtin_decl_explicit (start_fn),
2925 8, t0, t1, sched_arg, t, t3, t4,
2926 reductions, mem);
2927 else
2928 t = build_call_expr (builtin_decl_explicit (start_fn),
2929 9, t0, t1, t2, sched_arg, t, t3, t4,
2930 reductions, mem);
2932 else if (fd->ordered)
2933 t = build_call_expr (builtin_decl_explicit (start_fn),
2934 5, t0, t1, t, t3, t4);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 6, t0, t1, t2, t, t3, t4);
2939 else if (fd->ordered)
2940 t = build_call_expr (builtin_decl_explicit (start_fn),
2941 4, t0, t1, t3, t4);
2942 else
2943 t = build_call_expr (builtin_decl_explicit (start_fn),
2944 5, t0, t1, t2, t3, t4);
2946 else
2948 tree t5;
2949 tree c_bool_type;
2950 tree bfn_decl;
2952 /* The GOMP_loop_ull_*start functions have additional boolean
2953 argument, true for < loops and false for > loops.
2954 In Fortran, the C bool type can be different from
2955 boolean_type_node. */
2956 bfn_decl = builtin_decl_explicit (start_fn);
2957 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2958 t5 = build_int_cst (c_bool_type,
2959 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2960 if (fd->chunk_size)
2962 tree bfn_decl = builtin_decl_explicit (start_fn);
2963 t = fold_convert (fd->iter_type, fd->chunk_size);
2964 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2965 if (sched_arg)
2966 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2967 t, t3, t4, reductions, mem);
2968 else
2969 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2971 else
2972 t = build_call_expr (builtin_decl_explicit (start_fn),
2973 6, t5, t0, t1, t2, t3, t4);
2976 if (TREE_TYPE (t) != boolean_type_node)
2977 t = fold_build2 (NE_EXPR, boolean_type_node,
2978 t, build_int_cst (TREE_TYPE (t), 0));
2979 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2980 true, GSI_SAME_STMT);
2981 if (arr && !TREE_STATIC (arr))
2983 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2984 TREE_THIS_VOLATILE (clobber) = 1;
2985 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2986 GSI_SAME_STMT);
2988 if (fd->have_pointer_condtemp)
2989 expand_omp_build_assign (&gsi, condtemp, memv, false);
2990 if (fd->have_reductemp)
2992 gimple *g = gsi_stmt (gsi);
2993 gsi_remove (&gsi, true);
2994 release_ssa_name (gimple_assign_lhs (g));
2996 entry_bb = region->entry;
2997 gsi = gsi_last_nondebug_bb (entry_bb);
2999 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3001 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3003 /* Remove the GIMPLE_OMP_FOR statement. */
3004 gsi_remove (&gsi, true);
3006 if (gsi_end_p (gsif))
3007 gsif = gsi_after_labels (gsi_bb (gsif));
3008 gsi_next (&gsif);
3010 /* Iteration setup for sequential loop goes in L0_BB. */
3011 tree startvar = fd->loop.v;
3012 tree endvar = NULL_TREE;
3014 if (gimple_omp_for_combined_p (fd->for_stmt))
3016 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3017 && gimple_omp_for_kind (inner_stmt)
3018 == GF_OMP_FOR_KIND_SIMD);
3019 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3020 OMP_CLAUSE__LOOPTEMP_);
3021 gcc_assert (innerc);
3022 startvar = OMP_CLAUSE_DECL (innerc);
3023 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3024 OMP_CLAUSE__LOOPTEMP_);
3025 gcc_assert (innerc);
3026 endvar = OMP_CLAUSE_DECL (innerc);
3029 gsi = gsi_start_bb (l0_bb);
3030 t = istart0;
3031 if (fd->ordered && fd->collapse == 1)
3032 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3033 fold_convert (fd->iter_type, fd->loop.step));
3034 else if (bias)
3035 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3036 if (fd->ordered && fd->collapse == 1)
3038 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3039 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3040 fd->loop.n1, fold_convert (sizetype, t));
3041 else
3043 t = fold_convert (TREE_TYPE (startvar), t);
3044 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3045 fd->loop.n1, t);
3048 else
3050 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3051 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3052 t = fold_convert (TREE_TYPE (startvar), t);
3054 t = force_gimple_operand_gsi (&gsi, t,
3055 DECL_P (startvar)
3056 && TREE_ADDRESSABLE (startvar),
3057 NULL_TREE, false, GSI_CONTINUE_LINKING);
3058 assign_stmt = gimple_build_assign (startvar, t);
3059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3060 if (cond_var)
3062 tree itype = TREE_TYPE (cond_var);
3063 /* For lastprivate(conditional:) itervar, we need some iteration
3064 counter that starts at unsigned non-zero and increases.
3065 Prefer as few IVs as possible, so if we can use startvar
3066 itself, use that, or startvar + constant (those would be
3067 incremented with step), and as last resort use the s0 + 1
3068 incremented by 1. */
3069 if ((fd->ordered && fd->collapse == 1)
3070 || bias
3071 || POINTER_TYPE_P (type)
3072 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3073 || fd->loop.cond_code != LT_EXPR)
3074 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3075 build_int_cst (itype, 1));
3076 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3077 t = fold_convert (itype, t);
3078 else
3080 tree c = fold_convert (itype, fd->loop.n1);
3081 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3084 t = force_gimple_operand_gsi (&gsi, t, false,
3085 NULL_TREE, false, GSI_CONTINUE_LINKING);
3086 assign_stmt = gimple_build_assign (cond_var, t);
3087 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3090 t = iend0;
3091 if (fd->ordered && fd->collapse == 1)
3092 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3093 fold_convert (fd->iter_type, fd->loop.step));
3094 else if (bias)
3095 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3096 if (fd->ordered && fd->collapse == 1)
3098 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3099 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3100 fd->loop.n1, fold_convert (sizetype, t));
3101 else
3103 t = fold_convert (TREE_TYPE (startvar), t);
3104 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3105 fd->loop.n1, t);
3108 else
3110 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3111 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3112 t = fold_convert (TREE_TYPE (startvar), t);
3114 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3115 false, GSI_CONTINUE_LINKING);
3116 if (endvar)
3118 assign_stmt = gimple_build_assign (endvar, iend);
3119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3120 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3121 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3122 else
3123 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3124 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3126 /* Handle linear clause adjustments. */
3127 tree itercnt = NULL_TREE;
3128 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3129 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3130 c; c = OMP_CLAUSE_CHAIN (c))
3131 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3132 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3134 tree d = OMP_CLAUSE_DECL (c);
3135 bool is_ref = omp_is_reference (d);
3136 tree t = d, a, dest;
3137 if (is_ref)
3138 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3139 tree type = TREE_TYPE (t);
3140 if (POINTER_TYPE_P (type))
3141 type = sizetype;
3142 dest = unshare_expr (t);
3143 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3144 expand_omp_build_assign (&gsif, v, t);
3145 if (itercnt == NULL_TREE)
3147 itercnt = startvar;
3148 tree n1 = fd->loop.n1;
3149 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3151 itercnt
3152 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3153 itercnt);
3154 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3156 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3157 itercnt, n1);
3158 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3159 itercnt, fd->loop.step);
3160 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3161 NULL_TREE, false,
3162 GSI_CONTINUE_LINKING);
3164 a = fold_build2 (MULT_EXPR, type,
3165 fold_convert (type, itercnt),
3166 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3167 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3168 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3169 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3170 false, GSI_CONTINUE_LINKING);
3171 assign_stmt = gimple_build_assign (dest, t);
3172 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3174 if (fd->collapse > 1)
3175 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3177 if (fd->ordered)
3179 /* Until now, counts array contained number of iterations or
3180 variable containing it for ith loop. From now on, we need
3181 those counts only for collapsed loops, and only for the 2nd
3182 till the last collapsed one. Move those one element earlier,
3183 we'll use counts[fd->collapse - 1] for the first source/sink
3184 iteration counter and so on and counts[fd->ordered]
3185 as the array holding the current counter values for
3186 depend(source). */
3187 if (fd->collapse > 1)
3188 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3189 if (broken_loop)
3191 int i;
3192 for (i = fd->collapse; i < fd->ordered; i++)
3194 tree type = TREE_TYPE (fd->loops[i].v);
3195 tree this_cond
3196 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3197 fold_convert (type, fd->loops[i].n1),
3198 fold_convert (type, fd->loops[i].n2));
3199 if (!integer_onep (this_cond))
3200 break;
3202 if (i < fd->ordered)
3204 cont_bb
3205 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3206 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3207 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3208 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3209 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3210 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3211 make_edge (cont_bb, l1_bb, 0);
3212 l2_bb = create_empty_bb (cont_bb);
3213 broken_loop = false;
3216 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3217 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3218 ordered_lastprivate);
3219 if (counts[fd->collapse - 1])
3221 gcc_assert (fd->collapse == 1);
3222 gsi = gsi_last_bb (l0_bb);
3223 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3224 istart0, true);
3225 gsi = gsi_last_bb (cont_bb);
3226 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3227 build_int_cst (fd->iter_type, 1));
3228 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3229 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3230 size_zero_node, NULL_TREE, NULL_TREE);
3231 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3232 t = counts[fd->collapse - 1];
3234 else if (fd->collapse > 1)
3235 t = fd->loop.v;
3236 else
3238 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3239 fd->loops[0].v, fd->loops[0].n1);
3240 t = fold_convert (fd->iter_type, t);
3242 gsi = gsi_last_bb (l0_bb);
3243 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3244 size_zero_node, NULL_TREE, NULL_TREE);
3245 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3246 false, GSI_CONTINUE_LINKING);
3247 expand_omp_build_assign (&gsi, aref, t, true);
3250 if (!broken_loop)
3252 /* Code to control the increment and predicate for the sequential
3253 loop goes in the CONT_BB. */
3254 gsi = gsi_last_nondebug_bb (cont_bb);
3255 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3256 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3257 vmain = gimple_omp_continue_control_use (cont_stmt);
3258 vback = gimple_omp_continue_control_def (cont_stmt);
3260 if (!gimple_omp_for_combined_p (fd->for_stmt))
3262 if (POINTER_TYPE_P (type))
3263 t = fold_build_pointer_plus (vmain, fd->loop.step);
3264 else
3265 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3266 t = force_gimple_operand_gsi (&gsi, t,
3267 DECL_P (vback)
3268 && TREE_ADDRESSABLE (vback),
3269 NULL_TREE, true, GSI_SAME_STMT);
3270 assign_stmt = gimple_build_assign (vback, t);
3271 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3273 if (cond_var)
3275 tree itype = TREE_TYPE (cond_var);
3276 tree t2;
3277 if ((fd->ordered && fd->collapse == 1)
3278 || bias
3279 || POINTER_TYPE_P (type)
3280 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3281 || fd->loop.cond_code != LT_EXPR)
3282 t2 = build_int_cst (itype, 1);
3283 else
3284 t2 = fold_convert (itype, fd->loop.step);
3285 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3286 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3287 NULL_TREE, true, GSI_SAME_STMT);
3288 assign_stmt = gimple_build_assign (cond_var, t2);
3289 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3292 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3294 tree tem;
3295 if (fd->collapse > 1)
3296 tem = fd->loop.v;
3297 else
3299 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3300 fd->loops[0].v, fd->loops[0].n1);
3301 tem = fold_convert (fd->iter_type, tem);
3303 tree aref = build4 (ARRAY_REF, fd->iter_type,
3304 counts[fd->ordered], size_zero_node,
3305 NULL_TREE, NULL_TREE);
3306 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3307 true, GSI_SAME_STMT);
3308 expand_omp_build_assign (&gsi, aref, tem);
3311 t = build2 (fd->loop.cond_code, boolean_type_node,
3312 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3313 iend);
3314 gcond *cond_stmt = gimple_build_cond_empty (t);
3315 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3318 /* Remove GIMPLE_OMP_CONTINUE. */
3319 gsi_remove (&gsi, true);
3321 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3322 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3324 /* Emit code to get the next parallel iteration in L2_BB. */
3325 gsi = gsi_start_bb (l2_bb);
3327 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3328 build_fold_addr_expr (istart0),
3329 build_fold_addr_expr (iend0));
3330 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3331 false, GSI_CONTINUE_LINKING);
3332 if (TREE_TYPE (t) != boolean_type_node)
3333 t = fold_build2 (NE_EXPR, boolean_type_node,
3334 t, build_int_cst (TREE_TYPE (t), 0));
3335 gcond *cond_stmt = gimple_build_cond_empty (t);
3336 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3339 /* Add the loop cleanup function. */
3340 gsi = gsi_last_nondebug_bb (exit_bb);
3341 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3342 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3343 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3344 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3345 else
3346 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3347 gcall *call_stmt = gimple_build_call (t, 0);
3348 if (fd->ordered)
3350 tree arr = counts[fd->ordered];
3351 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3352 TREE_THIS_VOLATILE (clobber) = 1;
3353 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3354 GSI_SAME_STMT);
3356 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3358 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3359 if (fd->have_reductemp)
3361 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3362 gimple_call_lhs (call_stmt));
3363 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3366 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3367 gsi_remove (&gsi, true);
3369 /* Connect the new blocks. */
3370 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3371 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3373 if (!broken_loop)
3375 gimple_seq phis;
3377 e = find_edge (cont_bb, l3_bb);
3378 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3380 phis = phi_nodes (l3_bb);
3381 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3383 gimple *phi = gsi_stmt (gsi);
3384 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3385 PHI_ARG_DEF_FROM_EDGE (phi, e));
3387 remove_edge (e);
3389 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3390 e = find_edge (cont_bb, l1_bb);
3391 if (e == NULL)
3393 e = BRANCH_EDGE (cont_bb);
3394 gcc_assert (single_succ (e->dest) == l1_bb);
3396 if (gimple_omp_for_combined_p (fd->for_stmt))
3398 remove_edge (e);
3399 e = NULL;
3401 else if (fd->collapse > 1)
3403 remove_edge (e);
3404 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3406 else
3407 e->flags = EDGE_TRUE_VALUE;
3408 if (e)
3410 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3411 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3413 else
3415 e = find_edge (cont_bb, l2_bb);
3416 e->flags = EDGE_FALLTHRU;
3418 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3420 if (gimple_in_ssa_p (cfun))
3422 /* Add phis to the outer loop that connect to the phis in the inner,
3423 original loop, and move the loop entry value of the inner phi to
3424 the loop entry value of the outer phi. */
3425 gphi_iterator psi;
3426 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3428 location_t locus;
3429 gphi *nphi;
3430 gphi *exit_phi = psi.phi ();
3432 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3433 continue;
3435 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3436 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3438 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3439 edge latch_to_l1 = find_edge (latch, l1_bb);
3440 gphi *inner_phi
3441 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3443 tree t = gimple_phi_result (exit_phi);
3444 tree new_res = copy_ssa_name (t, NULL);
3445 nphi = create_phi_node (new_res, l0_bb);
3447 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3448 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3449 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3450 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3451 add_phi_arg (nphi, t, entry_to_l0, locus);
3453 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3454 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3456 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3460 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3461 recompute_dominator (CDI_DOMINATORS, l2_bb));
3462 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3463 recompute_dominator (CDI_DOMINATORS, l3_bb));
3464 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3465 recompute_dominator (CDI_DOMINATORS, l0_bb));
3466 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3467 recompute_dominator (CDI_DOMINATORS, l1_bb));
3469 /* We enter expand_omp_for_generic with a loop. This original loop may
3470 have its own loop struct, or it may be part of an outer loop struct
3471 (which may be the fake loop). */
3472 struct loop *outer_loop = entry_bb->loop_father;
3473 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3475 add_bb_to_loop (l2_bb, outer_loop);
3477 /* We've added a new loop around the original loop. Allocate the
3478 corresponding loop struct. */
3479 struct loop *new_loop = alloc_loop ();
3480 new_loop->header = l0_bb;
3481 new_loop->latch = l2_bb;
3482 add_loop (new_loop, outer_loop);
3484 /* Allocate a loop structure for the original loop unless we already
3485 had one. */
3486 if (!orig_loop_has_loop_struct
3487 && !gimple_omp_for_combined_p (fd->for_stmt))
3489 struct loop *orig_loop = alloc_loop ();
3490 orig_loop->header = l1_bb;
3491 /* The loop may have multiple latches. */
3492 add_loop (orig_loop, new_loop);
3497 /* A subroutine of expand_omp_for. Generate code for a parallel
3498 loop with static schedule and no specified chunk size. Given
3499 parameters:
3501 for (V = N1; V cond N2; V += STEP) BODY;
3503 where COND is "<" or ">", we generate pseudocode
3505 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3506 if (cond is <)
3507 adj = STEP - 1;
3508 else
3509 adj = STEP + 1;
3510 if ((__typeof (V)) -1 > 0 && cond is >)
3511 n = -(adj + N2 - N1) / -STEP;
3512 else
3513 n = (adj + N2 - N1) / STEP;
3514 q = n / nthreads;
3515 tt = n % nthreads;
3516 if (threadid < tt) goto L3; else goto L4;
3518 tt = 0;
3519 q = q + 1;
3521 s0 = q * threadid + tt;
3522 e0 = s0 + q;
3523 V = s0 * STEP + N1;
3524 if (s0 >= e0) goto L2; else goto L0;
3526 e = e0 * STEP + N1;
3528 BODY;
3529 V += STEP;
3530 if (V cond e) goto L1;
3534 static void
3535 expand_omp_for_static_nochunk (struct omp_region *region,
3536 struct omp_for_data *fd,
3537 gimple *inner_stmt)
3539 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3540 tree type, itype, vmain, vback;
3541 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3542 basic_block body_bb, cont_bb, collapse_bb = NULL;
3543 basic_block fin_bb;
3544 gimple_stmt_iterator gsi, gsip;
3545 edge ep;
3546 bool broken_loop = region->cont == NULL;
3547 tree *counts = NULL;
3548 tree n1, n2, step;
3549 tree reductions = NULL_TREE;
3550 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
3552 itype = type = TREE_TYPE (fd->loop.v);
3553 if (POINTER_TYPE_P (type))
3554 itype = signed_type_for (type);
3556 entry_bb = region->entry;
3557 cont_bb = region->cont;
3558 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3559 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3560 gcc_assert (broken_loop
3561 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3562 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3563 body_bb = single_succ (seq_start_bb);
3564 if (!broken_loop)
3566 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3567 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3568 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3570 exit_bb = region->exit;
3572 /* Iteration space partitioning goes in ENTRY_BB. */
3573 gsi = gsi_last_nondebug_bb (entry_bb);
3574 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3575 gsip = gsi;
3576 gsi_prev (&gsip);
3578 if (fd->collapse > 1)
3580 int first_zero_iter = -1, dummy = -1;
3581 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3583 counts = XALLOCAVEC (tree, fd->collapse);
3584 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3585 fin_bb, first_zero_iter,
3586 dummy_bb, dummy, l2_dom_bb);
3587 t = NULL_TREE;
3589 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3590 t = integer_one_node;
3591 else
3592 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3593 fold_convert (type, fd->loop.n1),
3594 fold_convert (type, fd->loop.n2));
3595 if (fd->collapse == 1
3596 && TYPE_UNSIGNED (type)
3597 && (t == NULL_TREE || !integer_onep (t)))
3599 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3600 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3601 true, GSI_SAME_STMT);
3602 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3603 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3604 true, GSI_SAME_STMT);
3605 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3606 NULL_TREE, NULL_TREE);
3607 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3608 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3609 expand_omp_regimplify_p, NULL, NULL)
3610 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3611 expand_omp_regimplify_p, NULL, NULL))
3613 gsi = gsi_for_stmt (cond_stmt);
3614 gimple_regimplify_operands (cond_stmt, &gsi);
3616 ep = split_block (entry_bb, cond_stmt);
3617 ep->flags = EDGE_TRUE_VALUE;
3618 entry_bb = ep->dest;
3619 ep->probability = profile_probability::very_likely ();
3620 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3621 ep->probability = profile_probability::very_unlikely ();
3622 if (gimple_in_ssa_p (cfun))
3624 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3625 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3626 !gsi_end_p (gpi); gsi_next (&gpi))
3628 gphi *phi = gpi.phi ();
3629 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3630 ep, UNKNOWN_LOCATION);
3633 gsi = gsi_last_bb (entry_bb);
3636 if (fd->lastprivate_conditional)
3638 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3639 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3640 if (fd->have_pointer_condtemp)
3641 condtemp = OMP_CLAUSE_DECL (c);
3642 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3643 cond_var = OMP_CLAUSE_DECL (c);
3645 if (fd->have_reductemp || fd->have_pointer_condtemp)
3647 tree t1 = build_int_cst (long_integer_type_node, 0);
3648 tree t2 = build_int_cst (long_integer_type_node, 1);
3649 tree t3 = build_int_cstu (long_integer_type_node,
3650 (HOST_WIDE_INT_1U << 31) + 1);
3651 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3652 gimple_stmt_iterator gsi2 = gsi_none ();
3653 gimple *g = NULL;
3654 tree mem = null_pointer_node, memv = NULL_TREE;
3655 if (fd->have_reductemp)
3657 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3658 reductions = OMP_CLAUSE_DECL (c);
3659 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3660 g = SSA_NAME_DEF_STMT (reductions);
3661 reductions = gimple_assign_rhs1 (g);
3662 OMP_CLAUSE_DECL (c) = reductions;
3663 gsi2 = gsi_for_stmt (g);
3665 else
3667 if (gsi_end_p (gsip))
3668 gsi2 = gsi_after_labels (region->entry);
3669 else
3670 gsi2 = gsip;
3671 reductions = null_pointer_node;
3673 if (fd->have_pointer_condtemp)
3675 tree type = TREE_TYPE (condtemp);
3676 memv = create_tmp_var (type);
3677 TREE_ADDRESSABLE (memv) = 1;
3678 unsigned HOST_WIDE_INT sz
3679 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3680 sz *= fd->lastprivate_conditional;
3681 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
3682 false);
3683 mem = build_fold_addr_expr (memv);
3685 tree t
3686 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3687 9, t1, t2, t2, t3, t1, null_pointer_node,
3688 null_pointer_node, reductions, mem);
3689 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3690 true, GSI_SAME_STMT);
3691 if (fd->have_pointer_condtemp)
3692 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3693 if (fd->have_reductemp)
3695 gsi_remove (&gsi2, true);
3696 release_ssa_name (gimple_assign_lhs (g));
3699 switch (gimple_omp_for_kind (fd->for_stmt))
3701 case GF_OMP_FOR_KIND_FOR:
3702 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3703 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3704 break;
3705 case GF_OMP_FOR_KIND_DISTRIBUTE:
3706 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3707 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3708 break;
3709 default:
3710 gcc_unreachable ();
3712 nthreads = build_call_expr (nthreads, 0);
3713 nthreads = fold_convert (itype, nthreads);
3714 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3715 true, GSI_SAME_STMT);
3716 threadid = build_call_expr (threadid, 0);
3717 threadid = fold_convert (itype, threadid);
3718 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3719 true, GSI_SAME_STMT);
3721 n1 = fd->loop.n1;
3722 n2 = fd->loop.n2;
3723 step = fd->loop.step;
3724 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3726 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3727 OMP_CLAUSE__LOOPTEMP_);
3728 gcc_assert (innerc);
3729 n1 = OMP_CLAUSE_DECL (innerc);
3730 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3731 OMP_CLAUSE__LOOPTEMP_);
3732 gcc_assert (innerc);
3733 n2 = OMP_CLAUSE_DECL (innerc);
3735 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3736 true, NULL_TREE, true, GSI_SAME_STMT);
3737 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3738 true, NULL_TREE, true, GSI_SAME_STMT);
3739 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3740 true, NULL_TREE, true, GSI_SAME_STMT);
3742 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3743 t = fold_build2 (PLUS_EXPR, itype, step, t);
3744 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3745 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3746 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3747 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3748 fold_build1 (NEGATE_EXPR, itype, t),
3749 fold_build1 (NEGATE_EXPR, itype, step));
3750 else
3751 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3752 t = fold_convert (itype, t);
3753 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3755 q = create_tmp_reg (itype, "q");
3756 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3757 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3758 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3760 tt = create_tmp_reg (itype, "tt");
3761 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3762 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3763 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3765 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3766 gcond *cond_stmt = gimple_build_cond_empty (t);
3767 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3769 second_bb = split_block (entry_bb, cond_stmt)->dest;
3770 gsi = gsi_last_nondebug_bb (second_bb);
3771 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3773 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3774 GSI_SAME_STMT);
3775 gassign *assign_stmt
3776 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3777 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3779 third_bb = split_block (second_bb, assign_stmt)->dest;
3780 gsi = gsi_last_nondebug_bb (third_bb);
3781 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3783 t = build2 (MULT_EXPR, itype, q, threadid);
3784 t = build2 (PLUS_EXPR, itype, t, tt);
3785 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3787 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3788 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3790 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3791 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3793 /* Remove the GIMPLE_OMP_FOR statement. */
3794 gsi_remove (&gsi, true);
3796 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3797 gsi = gsi_start_bb (seq_start_bb);
3799 tree startvar = fd->loop.v;
3800 tree endvar = NULL_TREE;
3802 if (gimple_omp_for_combined_p (fd->for_stmt))
3804 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3805 ? gimple_omp_parallel_clauses (inner_stmt)
3806 : gimple_omp_for_clauses (inner_stmt);
3807 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3808 gcc_assert (innerc);
3809 startvar = OMP_CLAUSE_DECL (innerc);
3810 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3811 OMP_CLAUSE__LOOPTEMP_);
3812 gcc_assert (innerc);
3813 endvar = OMP_CLAUSE_DECL (innerc);
3814 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3815 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3817 int i;
3818 for (i = 1; i < fd->collapse; i++)
3820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3821 OMP_CLAUSE__LOOPTEMP_);
3822 gcc_assert (innerc);
3824 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3825 OMP_CLAUSE__LOOPTEMP_);
3826 if (innerc)
3828 /* If needed (distribute parallel for with lastprivate),
3829 propagate down the total number of iterations. */
3830 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3831 fd->loop.n2);
3832 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3833 GSI_CONTINUE_LINKING);
3834 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3835 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3839 t = fold_convert (itype, s0);
3840 t = fold_build2 (MULT_EXPR, itype, t, step);
3841 if (POINTER_TYPE_P (type))
3843 t = fold_build_pointer_plus (n1, t);
3844 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3845 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3846 t = fold_convert (signed_type_for (type), t);
3848 else
3849 t = fold_build2 (PLUS_EXPR, type, t, n1);
3850 t = fold_convert (TREE_TYPE (startvar), t);
3851 t = force_gimple_operand_gsi (&gsi, t,
3852 DECL_P (startvar)
3853 && TREE_ADDRESSABLE (startvar),
3854 NULL_TREE, false, GSI_CONTINUE_LINKING);
3855 assign_stmt = gimple_build_assign (startvar, t);
3856 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3857 if (cond_var)
3859 tree itype = TREE_TYPE (cond_var);
3860 /* For lastprivate(conditional:) itervar, we need some iteration
3861 counter that starts at unsigned non-zero and increases.
3862 Prefer as few IVs as possible, so if we can use startvar
3863 itself, use that, or startvar + constant (those would be
3864 incremented with step), and as last resort use the s0 + 1
3865 incremented by 1. */
3866 if (POINTER_TYPE_P (type)
3867 || TREE_CODE (n1) != INTEGER_CST
3868 || fd->loop.cond_code != LT_EXPR)
3869 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
3870 build_int_cst (itype, 1));
3871 else if (tree_int_cst_sgn (n1) == 1)
3872 t = fold_convert (itype, t);
3873 else
3875 tree c = fold_convert (itype, n1);
3876 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3877 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3879 t = force_gimple_operand_gsi (&gsi, t, false,
3880 NULL_TREE, false, GSI_CONTINUE_LINKING);
3881 assign_stmt = gimple_build_assign (cond_var, t);
3882 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3885 t = fold_convert (itype, e0);
3886 t = fold_build2 (MULT_EXPR, itype, t, step);
3887 if (POINTER_TYPE_P (type))
3889 t = fold_build_pointer_plus (n1, t);
3890 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3891 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3892 t = fold_convert (signed_type_for (type), t);
3894 else
3895 t = fold_build2 (PLUS_EXPR, type, t, n1);
3896 t = fold_convert (TREE_TYPE (startvar), t);
3897 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3898 false, GSI_CONTINUE_LINKING);
3899 if (endvar)
3901 assign_stmt = gimple_build_assign (endvar, e);
3902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3903 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3904 assign_stmt = gimple_build_assign (fd->loop.v, e);
3905 else
3906 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3907 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3909 /* Handle linear clause adjustments. */
3910 tree itercnt = NULL_TREE;
3911 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3912 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3913 c; c = OMP_CLAUSE_CHAIN (c))
3914 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3915 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3917 tree d = OMP_CLAUSE_DECL (c);
3918 bool is_ref = omp_is_reference (d);
3919 tree t = d, a, dest;
3920 if (is_ref)
3921 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3922 if (itercnt == NULL_TREE)
3924 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3926 itercnt = fold_build2 (MINUS_EXPR, itype,
3927 fold_convert (itype, n1),
3928 fold_convert (itype, fd->loop.n1));
3929 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3930 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3931 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3932 NULL_TREE, false,
3933 GSI_CONTINUE_LINKING);
3935 else
3936 itercnt = s0;
3938 tree type = TREE_TYPE (t);
3939 if (POINTER_TYPE_P (type))
3940 type = sizetype;
3941 a = fold_build2 (MULT_EXPR, type,
3942 fold_convert (type, itercnt),
3943 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3944 dest = unshare_expr (t);
3945 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3946 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3947 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948 false, GSI_CONTINUE_LINKING);
3949 assign_stmt = gimple_build_assign (dest, t);
3950 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3952 if (fd->collapse > 1)
3953 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3955 if (!broken_loop)
3957 /* The code controlling the sequential loop replaces the
3958 GIMPLE_OMP_CONTINUE. */
3959 gsi = gsi_last_nondebug_bb (cont_bb);
3960 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3961 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3962 vmain = gimple_omp_continue_control_use (cont_stmt);
3963 vback = gimple_omp_continue_control_def (cont_stmt);
3965 if (!gimple_omp_for_combined_p (fd->for_stmt))
3967 if (POINTER_TYPE_P (type))
3968 t = fold_build_pointer_plus (vmain, step);
3969 else
3970 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3971 t = force_gimple_operand_gsi (&gsi, t,
3972 DECL_P (vback)
3973 && TREE_ADDRESSABLE (vback),
3974 NULL_TREE, true, GSI_SAME_STMT);
3975 assign_stmt = gimple_build_assign (vback, t);
3976 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3978 if (cond_var)
3980 tree itype = TREE_TYPE (cond_var);
3981 tree t2;
3982 if (POINTER_TYPE_P (type)
3983 || TREE_CODE (n1) != INTEGER_CST
3984 || fd->loop.cond_code != LT_EXPR)
3985 t2 = build_int_cst (itype, 1);
3986 else
3987 t2 = fold_convert (itype, step);
3988 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3989 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3990 NULL_TREE, true, GSI_SAME_STMT);
3991 assign_stmt = gimple_build_assign (cond_var, t2);
3992 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3995 t = build2 (fd->loop.cond_code, boolean_type_node,
3996 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3997 ? t : vback, e);
3998 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4001 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4002 gsi_remove (&gsi, true);
4004 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4005 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4008 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4009 gsi = gsi_last_nondebug_bb (exit_bb);
4010 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4012 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4013 if (fd->have_reductemp || fd->have_pointer_condtemp)
4015 tree fn;
4016 if (t)
4017 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4018 else
4019 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4020 gcall *g = gimple_build_call (fn, 0);
4021 if (t)
4023 gimple_call_set_lhs (g, t);
4024 if (fd->have_reductemp)
4025 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4026 NOP_EXPR, t),
4027 GSI_SAME_STMT);
4029 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4031 else
4032 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4034 gsi_remove (&gsi, true);
4036 /* Connect all the blocks. */
4037 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4038 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4039 ep = find_edge (entry_bb, second_bb);
4040 ep->flags = EDGE_TRUE_VALUE;
4041 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4042 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4043 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4045 if (!broken_loop)
4047 ep = find_edge (cont_bb, body_bb);
4048 if (ep == NULL)
4050 ep = BRANCH_EDGE (cont_bb);
4051 gcc_assert (single_succ (ep->dest) == body_bb);
4053 if (gimple_omp_for_combined_p (fd->for_stmt))
4055 remove_edge (ep);
4056 ep = NULL;
4058 else if (fd->collapse > 1)
4060 remove_edge (ep);
4061 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4063 else
4064 ep->flags = EDGE_TRUE_VALUE;
4065 find_edge (cont_bb, fin_bb)->flags
4066 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4069 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4070 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4071 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
4073 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4074 recompute_dominator (CDI_DOMINATORS, body_bb));
4075 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4076 recompute_dominator (CDI_DOMINATORS, fin_bb));
4078 struct loop *loop = body_bb->loop_father;
4079 if (loop != entry_bb->loop_father)
4081 gcc_assert (broken_loop || loop->header == body_bb);
4082 gcc_assert (broken_loop
4083 || loop->latch == region->cont
4084 || single_pred (loop->latch) == region->cont);
4085 return;
4088 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4090 loop = alloc_loop ();
4091 loop->header = body_bb;
4092 if (collapse_bb == NULL)
4093 loop->latch = cont_bb;
4094 add_loop (loop, body_bb->loop_father);
4098 /* Return phi in E->DEST with ARG on edge E. */
4100 static gphi *
4101 find_phi_with_arg_on_edge (tree arg, edge e)
4103 basic_block bb = e->dest;
4105 for (gphi_iterator gpi = gsi_start_phis (bb);
4106 !gsi_end_p (gpi);
4107 gsi_next (&gpi))
4109 gphi *phi = gpi.phi ();
4110 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4111 return phi;
4114 return NULL;
4117 /* A subroutine of expand_omp_for. Generate code for a parallel
4118 loop with static schedule and a specified chunk size. Given
4119 parameters:
4121 for (V = N1; V cond N2; V += STEP) BODY;
4123 where COND is "<" or ">", we generate pseudocode
4125 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4126 if (cond is <)
4127 adj = STEP - 1;
4128 else
4129 adj = STEP + 1;
4130 if ((__typeof (V)) -1 > 0 && cond is >)
4131 n = -(adj + N2 - N1) / -STEP;
4132 else
4133 n = (adj + N2 - N1) / STEP;
4134 trip = 0;
4135 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4136 here so that V is defined
4137 if the loop is not entered
4139 s0 = (trip * nthreads + threadid) * CHUNK;
4140 e0 = min (s0 + CHUNK, n);
4141 if (s0 < n) goto L1; else goto L4;
4143 V = s0 * STEP + N1;
4144 e = e0 * STEP + N1;
4146 BODY;
4147 V += STEP;
4148 if (V cond e) goto L2; else goto L3;
4150 trip += 1;
4151 goto L0;
4155 static void
4156 expand_omp_for_static_chunk (struct omp_region *region,
4157 struct omp_for_data *fd, gimple *inner_stmt)
4159 tree n, s0, e0, e, t;
4160 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4161 tree type, itype, vmain, vback, vextra;
4162 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4163 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4164 gimple_stmt_iterator gsi, gsip;
4165 edge se;
4166 bool broken_loop = region->cont == NULL;
4167 tree *counts = NULL;
4168 tree n1, n2, step;
4169 tree reductions = NULL_TREE;
4170 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4172 itype = type = TREE_TYPE (fd->loop.v);
4173 if (POINTER_TYPE_P (type))
4174 itype = signed_type_for (type);
4176 entry_bb = region->entry;
4177 se = split_block (entry_bb, last_stmt (entry_bb));
4178 entry_bb = se->src;
4179 iter_part_bb = se->dest;
4180 cont_bb = region->cont;
4181 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4182 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4183 gcc_assert (broken_loop
4184 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4185 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4186 body_bb = single_succ (seq_start_bb);
4187 if (!broken_loop)
4189 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4190 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4191 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4192 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4194 exit_bb = region->exit;
4196 /* Trip and adjustment setup goes in ENTRY_BB. */
4197 gsi = gsi_last_nondebug_bb (entry_bb);
4198 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4199 gsip = gsi;
4200 gsi_prev (&gsip);
4202 if (fd->collapse > 1)
4204 int first_zero_iter = -1, dummy = -1;
4205 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4207 counts = XALLOCAVEC (tree, fd->collapse);
4208 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4209 fin_bb, first_zero_iter,
4210 dummy_bb, dummy, l2_dom_bb);
4211 t = NULL_TREE;
4213 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4214 t = integer_one_node;
4215 else
4216 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4217 fold_convert (type, fd->loop.n1),
4218 fold_convert (type, fd->loop.n2));
4219 if (fd->collapse == 1
4220 && TYPE_UNSIGNED (type)
4221 && (t == NULL_TREE || !integer_onep (t)))
4223 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4224 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4225 true, GSI_SAME_STMT);
4226 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4227 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4228 true, GSI_SAME_STMT);
4229 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4230 NULL_TREE, NULL_TREE);
4231 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4232 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4233 expand_omp_regimplify_p, NULL, NULL)
4234 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4235 expand_omp_regimplify_p, NULL, NULL))
4237 gsi = gsi_for_stmt (cond_stmt);
4238 gimple_regimplify_operands (cond_stmt, &gsi);
4240 se = split_block (entry_bb, cond_stmt);
4241 se->flags = EDGE_TRUE_VALUE;
4242 entry_bb = se->dest;
4243 se->probability = profile_probability::very_likely ();
4244 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4245 se->probability = profile_probability::very_unlikely ();
4246 if (gimple_in_ssa_p (cfun))
4248 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4249 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4250 !gsi_end_p (gpi); gsi_next (&gpi))
4252 gphi *phi = gpi.phi ();
4253 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4254 se, UNKNOWN_LOCATION);
4257 gsi = gsi_last_bb (entry_bb);
4260 if (fd->lastprivate_conditional)
4262 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4263 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4264 if (fd->have_pointer_condtemp)
4265 condtemp = OMP_CLAUSE_DECL (c);
4266 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4267 cond_var = OMP_CLAUSE_DECL (c);
4269 if (fd->have_reductemp || fd->have_pointer_condtemp)
4271 tree t1 = build_int_cst (long_integer_type_node, 0);
4272 tree t2 = build_int_cst (long_integer_type_node, 1);
4273 tree t3 = build_int_cstu (long_integer_type_node,
4274 (HOST_WIDE_INT_1U << 31) + 1);
4275 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4276 gimple_stmt_iterator gsi2 = gsi_none ();
4277 gimple *g = NULL;
4278 tree mem = null_pointer_node, memv = NULL_TREE;
4279 if (fd->have_reductemp)
4281 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4282 reductions = OMP_CLAUSE_DECL (c);
4283 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4284 g = SSA_NAME_DEF_STMT (reductions);
4285 reductions = gimple_assign_rhs1 (g);
4286 OMP_CLAUSE_DECL (c) = reductions;
4287 gsi2 = gsi_for_stmt (g);
4289 else
4291 if (gsi_end_p (gsip))
4292 gsi2 = gsi_after_labels (region->entry);
4293 else
4294 gsi2 = gsip;
4295 reductions = null_pointer_node;
4297 if (fd->have_pointer_condtemp)
4299 tree type = TREE_TYPE (condtemp);
4300 memv = create_tmp_var (type);
4301 TREE_ADDRESSABLE (memv) = 1;
4302 unsigned HOST_WIDE_INT sz
4303 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4304 sz *= fd->lastprivate_conditional;
4305 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4306 false);
4307 mem = build_fold_addr_expr (memv);
4309 tree t
4310 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4311 9, t1, t2, t2, t3, t1, null_pointer_node,
4312 null_pointer_node, reductions, mem);
4313 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4314 true, GSI_SAME_STMT);
4315 if (fd->have_pointer_condtemp)
4316 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4317 if (fd->have_reductemp)
4319 gsi_remove (&gsi2, true);
4320 release_ssa_name (gimple_assign_lhs (g));
4323 switch (gimple_omp_for_kind (fd->for_stmt))
4325 case GF_OMP_FOR_KIND_FOR:
4326 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4327 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4328 break;
4329 case GF_OMP_FOR_KIND_DISTRIBUTE:
4330 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4331 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4332 break;
4333 default:
4334 gcc_unreachable ();
4336 nthreads = build_call_expr (nthreads, 0);
4337 nthreads = fold_convert (itype, nthreads);
4338 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4339 true, GSI_SAME_STMT);
4340 threadid = build_call_expr (threadid, 0);
4341 threadid = fold_convert (itype, threadid);
4342 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4343 true, GSI_SAME_STMT);
4345 n1 = fd->loop.n1;
4346 n2 = fd->loop.n2;
4347 step = fd->loop.step;
4348 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4350 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4351 OMP_CLAUSE__LOOPTEMP_);
4352 gcc_assert (innerc);
4353 n1 = OMP_CLAUSE_DECL (innerc);
4354 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4355 OMP_CLAUSE__LOOPTEMP_);
4356 gcc_assert (innerc);
4357 n2 = OMP_CLAUSE_DECL (innerc);
4359 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4360 true, NULL_TREE, true, GSI_SAME_STMT);
4361 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4362 true, NULL_TREE, true, GSI_SAME_STMT);
4363 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4364 true, NULL_TREE, true, GSI_SAME_STMT);
4365 tree chunk_size = fold_convert (itype, fd->chunk_size);
4366 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4367 chunk_size
4368 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4369 GSI_SAME_STMT);
4371 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4372 t = fold_build2 (PLUS_EXPR, itype, step, t);
4373 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4374 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4375 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4376 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4377 fold_build1 (NEGATE_EXPR, itype, t),
4378 fold_build1 (NEGATE_EXPR, itype, step));
4379 else
4380 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4381 t = fold_convert (itype, t);
4382 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4383 true, GSI_SAME_STMT);
4385 trip_var = create_tmp_reg (itype, ".trip");
4386 if (gimple_in_ssa_p (cfun))
4388 trip_init = make_ssa_name (trip_var);
4389 trip_main = make_ssa_name (trip_var);
4390 trip_back = make_ssa_name (trip_var);
4392 else
4394 trip_init = trip_var;
4395 trip_main = trip_var;
4396 trip_back = trip_var;
4399 gassign *assign_stmt
4400 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4401 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4403 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4404 t = fold_build2 (MULT_EXPR, itype, t, step);
4405 if (POINTER_TYPE_P (type))
4406 t = fold_build_pointer_plus (n1, t);
4407 else
4408 t = fold_build2 (PLUS_EXPR, type, t, n1);
4409 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4410 true, GSI_SAME_STMT);
4412 /* Remove the GIMPLE_OMP_FOR. */
4413 gsi_remove (&gsi, true);
4415 gimple_stmt_iterator gsif = gsi;
4417 /* Iteration space partitioning goes in ITER_PART_BB. */
4418 gsi = gsi_last_bb (iter_part_bb);
4420 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4421 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4422 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4423 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4424 false, GSI_CONTINUE_LINKING);
4426 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4427 t = fold_build2 (MIN_EXPR, itype, t, n);
4428 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4429 false, GSI_CONTINUE_LINKING);
4431 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4432 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4434 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4435 gsi = gsi_start_bb (seq_start_bb);
4437 tree startvar = fd->loop.v;
4438 tree endvar = NULL_TREE;
4440 if (gimple_omp_for_combined_p (fd->for_stmt))
4442 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4443 ? gimple_omp_parallel_clauses (inner_stmt)
4444 : gimple_omp_for_clauses (inner_stmt);
4445 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4446 gcc_assert (innerc);
4447 startvar = OMP_CLAUSE_DECL (innerc);
4448 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4449 OMP_CLAUSE__LOOPTEMP_);
4450 gcc_assert (innerc);
4451 endvar = OMP_CLAUSE_DECL (innerc);
4452 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4453 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4455 int i;
4456 for (i = 1; i < fd->collapse; i++)
4458 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4459 OMP_CLAUSE__LOOPTEMP_);
4460 gcc_assert (innerc);
4462 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4463 OMP_CLAUSE__LOOPTEMP_);
4464 if (innerc)
4466 /* If needed (distribute parallel for with lastprivate),
4467 propagate down the total number of iterations. */
4468 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4469 fd->loop.n2);
4470 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4471 GSI_CONTINUE_LINKING);
4472 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4473 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4478 t = fold_convert (itype, s0);
4479 t = fold_build2 (MULT_EXPR, itype, t, step);
4480 if (POINTER_TYPE_P (type))
4482 t = fold_build_pointer_plus (n1, t);
4483 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4484 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4485 t = fold_convert (signed_type_for (type), t);
4487 else
4488 t = fold_build2 (PLUS_EXPR, type, t, n1);
4489 t = fold_convert (TREE_TYPE (startvar), t);
4490 t = force_gimple_operand_gsi (&gsi, t,
4491 DECL_P (startvar)
4492 && TREE_ADDRESSABLE (startvar),
4493 NULL_TREE, false, GSI_CONTINUE_LINKING);
4494 assign_stmt = gimple_build_assign (startvar, t);
4495 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4496 if (cond_var)
4498 tree itype = TREE_TYPE (cond_var);
4499 /* For lastprivate(conditional:) itervar, we need some iteration
4500 counter that starts at unsigned non-zero and increases.
4501 Prefer as few IVs as possible, so if we can use startvar
4502 itself, use that, or startvar + constant (those would be
4503 incremented with step), and as last resort use the s0 + 1
4504 incremented by 1. */
4505 if (POINTER_TYPE_P (type)
4506 || TREE_CODE (n1) != INTEGER_CST
4507 || fd->loop.cond_code != LT_EXPR)
4508 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4509 build_int_cst (itype, 1));
4510 else if (tree_int_cst_sgn (n1) == 1)
4511 t = fold_convert (itype, t);
4512 else
4514 tree c = fold_convert (itype, n1);
4515 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4516 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4518 t = force_gimple_operand_gsi (&gsi, t, false,
4519 NULL_TREE, false, GSI_CONTINUE_LINKING);
4520 assign_stmt = gimple_build_assign (cond_var, t);
4521 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4524 t = fold_convert (itype, e0);
4525 t = fold_build2 (MULT_EXPR, itype, t, step);
4526 if (POINTER_TYPE_P (type))
4528 t = fold_build_pointer_plus (n1, t);
4529 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4530 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4531 t = fold_convert (signed_type_for (type), t);
4533 else
4534 t = fold_build2 (PLUS_EXPR, type, t, n1);
4535 t = fold_convert (TREE_TYPE (startvar), t);
4536 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4537 false, GSI_CONTINUE_LINKING);
4538 if (endvar)
4540 assign_stmt = gimple_build_assign (endvar, e);
4541 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4542 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4543 assign_stmt = gimple_build_assign (fd->loop.v, e);
4544 else
4545 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4546 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4548 /* Handle linear clause adjustments. */
4549 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4550 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4551 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4552 c; c = OMP_CLAUSE_CHAIN (c))
4553 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4554 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4556 tree d = OMP_CLAUSE_DECL (c);
4557 bool is_ref = omp_is_reference (d);
4558 tree t = d, a, dest;
4559 if (is_ref)
4560 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4561 tree type = TREE_TYPE (t);
4562 if (POINTER_TYPE_P (type))
4563 type = sizetype;
4564 dest = unshare_expr (t);
4565 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4566 expand_omp_build_assign (&gsif, v, t);
4567 if (itercnt == NULL_TREE)
4569 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4571 itercntbias
4572 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4573 fold_convert (itype, fd->loop.n1));
4574 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4575 itercntbias, step);
4576 itercntbias
4577 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4578 NULL_TREE, true,
4579 GSI_SAME_STMT);
4580 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4581 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4582 NULL_TREE, false,
4583 GSI_CONTINUE_LINKING);
4585 else
4586 itercnt = s0;
4588 a = fold_build2 (MULT_EXPR, type,
4589 fold_convert (type, itercnt),
4590 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4591 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4592 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4593 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4594 false, GSI_CONTINUE_LINKING);
4595 assign_stmt = gimple_build_assign (dest, t);
4596 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4598 if (fd->collapse > 1)
4599 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4601 if (!broken_loop)
4603 /* The code controlling the sequential loop goes in CONT_BB,
4604 replacing the GIMPLE_OMP_CONTINUE. */
4605 gsi = gsi_last_nondebug_bb (cont_bb);
4606 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4607 vmain = gimple_omp_continue_control_use (cont_stmt);
4608 vback = gimple_omp_continue_control_def (cont_stmt);
4610 if (!gimple_omp_for_combined_p (fd->for_stmt))
4612 if (POINTER_TYPE_P (type))
4613 t = fold_build_pointer_plus (vmain, step);
4614 else
4615 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4616 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4617 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4618 true, GSI_SAME_STMT);
4619 assign_stmt = gimple_build_assign (vback, t);
4620 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4622 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4623 t = build2 (EQ_EXPR, boolean_type_node,
4624 build_int_cst (itype, 0),
4625 build_int_cst (itype, 1));
4626 else
4627 t = build2 (fd->loop.cond_code, boolean_type_node,
4628 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4629 ? t : vback, e);
4630 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4633 /* Remove GIMPLE_OMP_CONTINUE. */
4634 gsi_remove (&gsi, true);
4636 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4637 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4639 /* Trip update code goes into TRIP_UPDATE_BB. */
4640 gsi = gsi_start_bb (trip_update_bb);
4642 t = build_int_cst (itype, 1);
4643 t = build2 (PLUS_EXPR, itype, trip_main, t);
4644 assign_stmt = gimple_build_assign (trip_back, t);
4645 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4648 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4649 gsi = gsi_last_nondebug_bb (exit_bb);
4650 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4652 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4653 if (fd->have_reductemp || fd->have_pointer_condtemp)
4655 tree fn;
4656 if (t)
4657 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4658 else
4659 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4660 gcall *g = gimple_build_call (fn, 0);
4661 if (t)
4663 gimple_call_set_lhs (g, t);
4664 if (fd->have_reductemp)
4665 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4666 NOP_EXPR, t),
4667 GSI_SAME_STMT);
4669 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4671 else
4672 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4674 gsi_remove (&gsi, true);
4676 /* Connect the new blocks. */
4677 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4678 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4680 if (!broken_loop)
4682 se = find_edge (cont_bb, body_bb);
4683 if (se == NULL)
4685 se = BRANCH_EDGE (cont_bb);
4686 gcc_assert (single_succ (se->dest) == body_bb);
4688 if (gimple_omp_for_combined_p (fd->for_stmt))
4690 remove_edge (se);
4691 se = NULL;
4693 else if (fd->collapse > 1)
4695 remove_edge (se);
4696 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4698 else
4699 se->flags = EDGE_TRUE_VALUE;
4700 find_edge (cont_bb, trip_update_bb)->flags
4701 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4703 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4704 iter_part_bb);
4707 if (gimple_in_ssa_p (cfun))
4709 gphi_iterator psi;
4710 gphi *phi;
4711 edge re, ene;
4712 edge_var_map *vm;
4713 size_t i;
4715 gcc_assert (fd->collapse == 1 && !broken_loop);
4717 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4718 remove arguments of the phi nodes in fin_bb. We need to create
4719 appropriate phi nodes in iter_part_bb instead. */
4720 se = find_edge (iter_part_bb, fin_bb);
4721 re = single_succ_edge (trip_update_bb);
4722 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4723 ene = single_succ_edge (entry_bb);
4725 psi = gsi_start_phis (fin_bb);
4726 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4727 gsi_next (&psi), ++i)
4729 gphi *nphi;
4730 location_t locus;
4732 phi = psi.phi ();
4733 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4734 redirect_edge_var_map_def (vm), 0))
4735 continue;
4737 t = gimple_phi_result (phi);
4738 gcc_assert (t == redirect_edge_var_map_result (vm));
4740 if (!single_pred_p (fin_bb))
4741 t = copy_ssa_name (t, phi);
4743 nphi = create_phi_node (t, iter_part_bb);
4745 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4746 locus = gimple_phi_arg_location_from_edge (phi, se);
4748 /* A special case -- fd->loop.v is not yet computed in
4749 iter_part_bb, we need to use vextra instead. */
4750 if (t == fd->loop.v)
4751 t = vextra;
4752 add_phi_arg (nphi, t, ene, locus);
4753 locus = redirect_edge_var_map_location (vm);
4754 tree back_arg = redirect_edge_var_map_def (vm);
4755 add_phi_arg (nphi, back_arg, re, locus);
4756 edge ce = find_edge (cont_bb, body_bb);
4757 if (ce == NULL)
4759 ce = BRANCH_EDGE (cont_bb);
4760 gcc_assert (single_succ (ce->dest) == body_bb);
4761 ce = single_succ_edge (ce->dest);
4763 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4764 gcc_assert (inner_loop_phi != NULL);
4765 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4766 find_edge (seq_start_bb, body_bb), locus);
4768 if (!single_pred_p (fin_bb))
4769 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4771 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4772 redirect_edge_var_map_clear (re);
4773 if (single_pred_p (fin_bb))
4774 while (1)
4776 psi = gsi_start_phis (fin_bb);
4777 if (gsi_end_p (psi))
4778 break;
4779 remove_phi_node (&psi, false);
4782 /* Make phi node for trip. */
4783 phi = create_phi_node (trip_main, iter_part_bb);
4784 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4785 UNKNOWN_LOCATION);
4786 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4787 UNKNOWN_LOCATION);
4790 if (!broken_loop)
4791 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4792 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4793 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4794 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4795 recompute_dominator (CDI_DOMINATORS, fin_bb));
4796 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4797 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4798 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4799 recompute_dominator (CDI_DOMINATORS, body_bb));
4801 if (!broken_loop)
4803 struct loop *loop = body_bb->loop_father;
4804 struct loop *trip_loop = alloc_loop ();
4805 trip_loop->header = iter_part_bb;
4806 trip_loop->latch = trip_update_bb;
4807 add_loop (trip_loop, iter_part_bb->loop_father);
4809 if (loop != entry_bb->loop_father)
4811 gcc_assert (loop->header == body_bb);
4812 gcc_assert (loop->latch == region->cont
4813 || single_pred (loop->latch) == region->cont);
4814 trip_loop->inner = loop;
4815 return;
4818 if (!gimple_omp_for_combined_p (fd->for_stmt))
4820 loop = alloc_loop ();
4821 loop->header = body_bb;
4822 if (collapse_bb == NULL)
4823 loop->latch = cont_bb;
4824 add_loop (loop, trip_loop);
4829 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4830 loop. Given parameters:
4832 for (V = N1; V cond N2; V += STEP) BODY;
4834 where COND is "<" or ">", we generate pseudocode
4836 V = N1;
4837 goto L1;
4839 BODY;
4840 V += STEP;
4842 if (V cond N2) goto L0; else goto L2;
4845 For collapsed loops, given parameters:
4846 collapse(3)
4847 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4848 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4849 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4850 BODY;
4852 we generate pseudocode
4854 if (cond3 is <)
4855 adj = STEP3 - 1;
4856 else
4857 adj = STEP3 + 1;
4858 count3 = (adj + N32 - N31) / STEP3;
4859 if (cond2 is <)
4860 adj = STEP2 - 1;
4861 else
4862 adj = STEP2 + 1;
4863 count2 = (adj + N22 - N21) / STEP2;
4864 if (cond1 is <)
4865 adj = STEP1 - 1;
4866 else
4867 adj = STEP1 + 1;
4868 count1 = (adj + N12 - N11) / STEP1;
4869 count = count1 * count2 * count3;
4870 V = 0;
4871 V1 = N11;
4872 V2 = N21;
4873 V3 = N31;
4874 goto L1;
4876 BODY;
4877 V += 1;
4878 V3 += STEP3;
4879 V2 += (V3 cond3 N32) ? 0 : STEP2;
4880 V3 = (V3 cond3 N32) ? V3 : N31;
4881 V1 += (V2 cond2 N22) ? 0 : STEP1;
4882 V2 = (V2 cond2 N22) ? V2 : N21;
4884 if (V < count) goto L0; else goto L2;
4889 static void
4890 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4892 tree type, t;
4893 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4894 gimple_stmt_iterator gsi;
4895 gimple *stmt;
4896 gcond *cond_stmt;
4897 bool broken_loop = region->cont == NULL;
4898 edge e, ne;
4899 tree *counts = NULL;
4900 int i;
4901 int safelen_int = INT_MAX;
4902 bool dont_vectorize = false;
4903 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4904 OMP_CLAUSE_SAFELEN);
4905 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4906 OMP_CLAUSE__SIMDUID_);
4907 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4908 OMP_CLAUSE_IF);
4909 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4910 OMP_CLAUSE_SIMDLEN);
4911 tree n1, n2;
4913 if (safelen)
4915 poly_uint64 val;
4916 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4917 if (!poly_int_tree_p (safelen, &val))
4918 safelen_int = 0;
4919 else
4920 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4921 if (safelen_int == 1)
4922 safelen_int = 0;
4924 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
4925 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
4927 safelen_int = 0;
4928 dont_vectorize = true;
4930 type = TREE_TYPE (fd->loop.v);
4931 entry_bb = region->entry;
4932 cont_bb = region->cont;
4933 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4934 gcc_assert (broken_loop
4935 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4936 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4937 if (!broken_loop)
4939 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4940 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4941 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4942 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4944 else
4946 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4947 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4948 l2_bb = single_succ (l1_bb);
4950 exit_bb = region->exit;
4951 l2_dom_bb = NULL;
4953 gsi = gsi_last_nondebug_bb (entry_bb);
4955 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4956 /* Not needed in SSA form right now. */
4957 gcc_assert (!gimple_in_ssa_p (cfun));
4958 if (fd->collapse > 1)
4960 int first_zero_iter = -1, dummy = -1;
4961 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4963 counts = XALLOCAVEC (tree, fd->collapse);
4964 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4965 zero_iter_bb, first_zero_iter,
4966 dummy_bb, dummy, l2_dom_bb);
4968 if (l2_dom_bb == NULL)
4969 l2_dom_bb = l1_bb;
4971 n1 = fd->loop.n1;
4972 n2 = fd->loop.n2;
4973 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4975 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4976 OMP_CLAUSE__LOOPTEMP_);
4977 gcc_assert (innerc);
4978 n1 = OMP_CLAUSE_DECL (innerc);
4979 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4980 OMP_CLAUSE__LOOPTEMP_);
4981 gcc_assert (innerc);
4982 n2 = OMP_CLAUSE_DECL (innerc);
4984 tree step = fd->loop.step;
4986 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4987 OMP_CLAUSE__SIMT_);
4988 if (is_simt)
4990 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4991 is_simt = safelen_int > 1;
4993 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4994 if (is_simt)
4996 simt_lane = create_tmp_var (unsigned_type_node);
4997 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4998 gimple_call_set_lhs (g, simt_lane);
4999 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5000 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5001 fold_convert (TREE_TYPE (step), simt_lane));
5002 n1 = fold_convert (type, n1);
5003 if (POINTER_TYPE_P (type))
5004 n1 = fold_build_pointer_plus (n1, offset);
5005 else
5006 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5008 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5009 if (fd->collapse > 1)
5010 simt_maxlane = build_one_cst (unsigned_type_node);
5011 else if (safelen_int < omp_max_simt_vf ())
5012 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5013 tree vf
5014 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5015 unsigned_type_node, 0);
5016 if (simt_maxlane)
5017 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5018 vf = fold_convert (TREE_TYPE (step), vf);
5019 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5022 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5023 if (fd->collapse > 1)
5025 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5027 gsi_prev (&gsi);
5028 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5029 gsi_next (&gsi);
5031 else
5032 for (i = 0; i < fd->collapse; i++)
5034 tree itype = TREE_TYPE (fd->loops[i].v);
5035 if (POINTER_TYPE_P (itype))
5036 itype = signed_type_for (itype);
5037 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5038 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5042 /* Remove the GIMPLE_OMP_FOR statement. */
5043 gsi_remove (&gsi, true);
5045 if (!broken_loop)
5047 /* Code to control the increment goes in the CONT_BB. */
5048 gsi = gsi_last_nondebug_bb (cont_bb);
5049 stmt = gsi_stmt (gsi);
5050 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5052 if (POINTER_TYPE_P (type))
5053 t = fold_build_pointer_plus (fd->loop.v, step);
5054 else
5055 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5056 expand_omp_build_assign (&gsi, fd->loop.v, t);
5058 if (fd->collapse > 1)
5060 i = fd->collapse - 1;
5061 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5063 t = fold_convert (sizetype, fd->loops[i].step);
5064 t = fold_build_pointer_plus (fd->loops[i].v, t);
5066 else
5068 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5069 fd->loops[i].step);
5070 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5071 fd->loops[i].v, t);
5073 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5075 for (i = fd->collapse - 1; i > 0; i--)
5077 tree itype = TREE_TYPE (fd->loops[i].v);
5078 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5079 if (POINTER_TYPE_P (itype2))
5080 itype2 = signed_type_for (itype2);
5081 t = fold_convert (itype2, fd->loops[i - 1].step);
5082 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5083 GSI_SAME_STMT);
5084 t = build3 (COND_EXPR, itype2,
5085 build2 (fd->loops[i].cond_code, boolean_type_node,
5086 fd->loops[i].v,
5087 fold_convert (itype, fd->loops[i].n2)),
5088 build_int_cst (itype2, 0), t);
5089 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5090 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5091 else
5092 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5093 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5095 t = fold_convert (itype, fd->loops[i].n1);
5096 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5097 GSI_SAME_STMT);
5098 t = build3 (COND_EXPR, itype,
5099 build2 (fd->loops[i].cond_code, boolean_type_node,
5100 fd->loops[i].v,
5101 fold_convert (itype, fd->loops[i].n2)),
5102 fd->loops[i].v, t);
5103 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5107 /* Remove GIMPLE_OMP_CONTINUE. */
5108 gsi_remove (&gsi, true);
5111 /* Emit the condition in L1_BB. */
5112 gsi = gsi_start_bb (l1_bb);
5114 t = fold_convert (type, n2);
5115 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5116 false, GSI_CONTINUE_LINKING);
5117 tree v = fd->loop.v;
5118 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5119 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5120 false, GSI_CONTINUE_LINKING);
5121 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5122 cond_stmt = gimple_build_cond_empty (t);
5123 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5124 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5125 NULL, NULL)
5126 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5127 NULL, NULL))
5129 gsi = gsi_for_stmt (cond_stmt);
5130 gimple_regimplify_operands (cond_stmt, &gsi);
5133 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5134 if (is_simt)
5136 gsi = gsi_start_bb (l2_bb);
5137 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5138 if (POINTER_TYPE_P (type))
5139 t = fold_build_pointer_plus (fd->loop.v, step);
5140 else
5141 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5142 expand_omp_build_assign (&gsi, fd->loop.v, t);
5145 /* Remove GIMPLE_OMP_RETURN. */
5146 gsi = gsi_last_nondebug_bb (exit_bb);
5147 gsi_remove (&gsi, true);
5149 /* Connect the new blocks. */
5150 remove_edge (FALLTHRU_EDGE (entry_bb));
5152 if (!broken_loop)
5154 remove_edge (BRANCH_EDGE (entry_bb));
5155 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5157 e = BRANCH_EDGE (l1_bb);
5158 ne = FALLTHRU_EDGE (l1_bb);
5159 e->flags = EDGE_TRUE_VALUE;
5161 else
5163 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5165 ne = single_succ_edge (l1_bb);
5166 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5169 ne->flags = EDGE_FALSE_VALUE;
5170 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5171 ne->probability = e->probability.invert ();
5173 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5174 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5176 if (simt_maxlane)
5178 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5179 NULL_TREE, NULL_TREE);
5180 gsi = gsi_last_bb (entry_bb);
5181 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5182 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5183 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5184 FALLTHRU_EDGE (entry_bb)->probability
5185 = profile_probability::guessed_always ().apply_scale (7, 8);
5186 BRANCH_EDGE (entry_bb)->probability
5187 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5188 l2_dom_bb = entry_bb;
5190 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5192 if (!broken_loop)
5194 struct loop *loop = alloc_loop ();
5195 loop->header = l1_bb;
5196 loop->latch = cont_bb;
5197 add_loop (loop, l1_bb->loop_father);
5198 loop->safelen = safelen_int;
5199 if (simduid)
5201 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5202 cfun->has_simduid_loops = true;
5204 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5205 the loop. */
5206 if ((flag_tree_loop_vectorize
5207 || !global_options_set.x_flag_tree_loop_vectorize)
5208 && flag_tree_loop_optimize
5209 && loop->safelen > 1)
5211 loop->force_vectorize = true;
5212 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5214 unsigned HOST_WIDE_INT v
5215 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5216 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5217 loop->simdlen = v;
5219 cfun->has_force_vectorize_loops = true;
5221 else if (dont_vectorize)
5222 loop->dont_vectorize = true;
5224 else if (simduid)
5225 cfun->has_simduid_loops = true;
5228 /* Taskloop construct is represented after gimplification with
5229 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5230 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5231 which should just compute all the needed loop temporaries
5232 for GIMPLE_OMP_TASK. */
5234 static void
5235 expand_omp_taskloop_for_outer (struct omp_region *region,
5236 struct omp_for_data *fd,
5237 gimple *inner_stmt)
5239 tree type, bias = NULL_TREE;
5240 basic_block entry_bb, cont_bb, exit_bb;
5241 gimple_stmt_iterator gsi;
5242 gassign *assign_stmt;
5243 tree *counts = NULL;
5244 int i;
5246 gcc_assert (inner_stmt);
5247 gcc_assert (region->cont);
5248 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5249 && gimple_omp_task_taskloop_p (inner_stmt));
5250 type = TREE_TYPE (fd->loop.v);
5252 /* See if we need to bias by LLONG_MIN. */
5253 if (fd->iter_type == long_long_unsigned_type_node
5254 && TREE_CODE (type) == INTEGER_TYPE
5255 && !TYPE_UNSIGNED (type))
5257 tree n1, n2;
5259 if (fd->loop.cond_code == LT_EXPR)
5261 n1 = fd->loop.n1;
5262 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5264 else
5266 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5267 n2 = fd->loop.n1;
5269 if (TREE_CODE (n1) != INTEGER_CST
5270 || TREE_CODE (n2) != INTEGER_CST
5271 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5272 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5275 entry_bb = region->entry;
5276 cont_bb = region->cont;
5277 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5278 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5279 exit_bb = region->exit;
5281 gsi = gsi_last_nondebug_bb (entry_bb);
5282 gimple *for_stmt = gsi_stmt (gsi);
5283 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5284 if (fd->collapse > 1)
5286 int first_zero_iter = -1, dummy = -1;
5287 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5289 counts = XALLOCAVEC (tree, fd->collapse);
5290 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5291 zero_iter_bb, first_zero_iter,
5292 dummy_bb, dummy, l2_dom_bb);
5294 if (zero_iter_bb)
5296 /* Some counts[i] vars might be uninitialized if
5297 some loop has zero iterations. But the body shouldn't
5298 be executed in that case, so just avoid uninit warnings. */
5299 for (i = first_zero_iter; i < fd->collapse; i++)
5300 if (SSA_VAR_P (counts[i]))
5301 TREE_NO_WARNING (counts[i]) = 1;
5302 gsi_prev (&gsi);
5303 edge e = split_block (entry_bb, gsi_stmt (gsi));
5304 entry_bb = e->dest;
5305 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5306 gsi = gsi_last_bb (entry_bb);
5307 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5308 get_immediate_dominator (CDI_DOMINATORS,
5309 zero_iter_bb));
5313 tree t0, t1;
5314 t1 = fd->loop.n2;
5315 t0 = fd->loop.n1;
5316 if (POINTER_TYPE_P (TREE_TYPE (t0))
5317 && TYPE_PRECISION (TREE_TYPE (t0))
5318 != TYPE_PRECISION (fd->iter_type))
5320 /* Avoid casting pointers to integer of a different size. */
5321 tree itype = signed_type_for (type);
5322 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5323 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5325 else
5327 t1 = fold_convert (fd->iter_type, t1);
5328 t0 = fold_convert (fd->iter_type, t0);
5330 if (bias)
5332 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5333 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5336 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5337 OMP_CLAUSE__LOOPTEMP_);
5338 gcc_assert (innerc);
5339 tree startvar = OMP_CLAUSE_DECL (innerc);
5340 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5341 gcc_assert (innerc);
5342 tree endvar = OMP_CLAUSE_DECL (innerc);
5343 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5345 gcc_assert (innerc);
5346 for (i = 1; i < fd->collapse; i++)
5348 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5349 OMP_CLAUSE__LOOPTEMP_);
5350 gcc_assert (innerc);
5352 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5353 OMP_CLAUSE__LOOPTEMP_);
5354 if (innerc)
5356 /* If needed (inner taskloop has lastprivate clause), propagate
5357 down the total number of iterations. */
5358 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5359 NULL_TREE, false,
5360 GSI_CONTINUE_LINKING);
5361 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5362 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5366 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5367 GSI_CONTINUE_LINKING);
5368 assign_stmt = gimple_build_assign (startvar, t0);
5369 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5371 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5372 GSI_CONTINUE_LINKING);
5373 assign_stmt = gimple_build_assign (endvar, t1);
5374 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5375 if (fd->collapse > 1)
5376 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5378 /* Remove the GIMPLE_OMP_FOR statement. */
5379 gsi = gsi_for_stmt (for_stmt);
5380 gsi_remove (&gsi, true);
5382 gsi = gsi_last_nondebug_bb (cont_bb);
5383 gsi_remove (&gsi, true);
5385 gsi = gsi_last_nondebug_bb (exit_bb);
5386 gsi_remove (&gsi, true);
5388 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5389 remove_edge (BRANCH_EDGE (entry_bb));
5390 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5391 remove_edge (BRANCH_EDGE (cont_bb));
5392 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5393 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5394 recompute_dominator (CDI_DOMINATORS, region->entry));
5397 /* Taskloop construct is represented after gimplification with
5398 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5399 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5400 GOMP_taskloop{,_ull} function arranges for each task to be given just
5401 a single range of iterations. */
5403 static void
5404 expand_omp_taskloop_for_inner (struct omp_region *region,
5405 struct omp_for_data *fd,
5406 gimple *inner_stmt)
5408 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5409 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5410 basic_block fin_bb;
5411 gimple_stmt_iterator gsi;
5412 edge ep;
5413 bool broken_loop = region->cont == NULL;
5414 tree *counts = NULL;
5415 tree n1, n2, step;
5417 itype = type = TREE_TYPE (fd->loop.v);
5418 if (POINTER_TYPE_P (type))
5419 itype = signed_type_for (type);
5421 /* See if we need to bias by LLONG_MIN. */
5422 if (fd->iter_type == long_long_unsigned_type_node
5423 && TREE_CODE (type) == INTEGER_TYPE
5424 && !TYPE_UNSIGNED (type))
5426 tree n1, n2;
5428 if (fd->loop.cond_code == LT_EXPR)
5430 n1 = fd->loop.n1;
5431 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5433 else
5435 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5436 n2 = fd->loop.n1;
5438 if (TREE_CODE (n1) != INTEGER_CST
5439 || TREE_CODE (n2) != INTEGER_CST
5440 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5441 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5444 entry_bb = region->entry;
5445 cont_bb = region->cont;
5446 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5447 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5448 gcc_assert (broken_loop
5449 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5450 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5451 if (!broken_loop)
5453 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5454 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5456 exit_bb = region->exit;
5458 /* Iteration space partitioning goes in ENTRY_BB. */
5459 gsi = gsi_last_nondebug_bb (entry_bb);
5460 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5462 if (fd->collapse > 1)
5464 int first_zero_iter = -1, dummy = -1;
5465 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5467 counts = XALLOCAVEC (tree, fd->collapse);
5468 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5469 fin_bb, first_zero_iter,
5470 dummy_bb, dummy, l2_dom_bb);
5471 t = NULL_TREE;
5473 else
5474 t = integer_one_node;
5476 step = fd->loop.step;
5477 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5478 OMP_CLAUSE__LOOPTEMP_);
5479 gcc_assert (innerc);
5480 n1 = OMP_CLAUSE_DECL (innerc);
5481 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5482 gcc_assert (innerc);
5483 n2 = OMP_CLAUSE_DECL (innerc);
5484 if (bias)
5486 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5487 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5489 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5490 true, NULL_TREE, true, GSI_SAME_STMT);
5491 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5492 true, NULL_TREE, true, GSI_SAME_STMT);
5493 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5494 true, NULL_TREE, true, GSI_SAME_STMT);
5496 tree startvar = fd->loop.v;
5497 tree endvar = NULL_TREE;
5499 if (gimple_omp_for_combined_p (fd->for_stmt))
5501 tree clauses = gimple_omp_for_clauses (inner_stmt);
5502 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5503 gcc_assert (innerc);
5504 startvar = OMP_CLAUSE_DECL (innerc);
5505 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5506 OMP_CLAUSE__LOOPTEMP_);
5507 gcc_assert (innerc);
5508 endvar = OMP_CLAUSE_DECL (innerc);
5510 t = fold_convert (TREE_TYPE (startvar), n1);
5511 t = force_gimple_operand_gsi (&gsi, t,
5512 DECL_P (startvar)
5513 && TREE_ADDRESSABLE (startvar),
5514 NULL_TREE, false, GSI_CONTINUE_LINKING);
5515 gimple *assign_stmt = gimple_build_assign (startvar, t);
5516 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5518 t = fold_convert (TREE_TYPE (startvar), n2);
5519 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5520 false, GSI_CONTINUE_LINKING);
5521 if (endvar)
5523 assign_stmt = gimple_build_assign (endvar, e);
5524 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5525 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5526 assign_stmt = gimple_build_assign (fd->loop.v, e);
5527 else
5528 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5529 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5531 if (fd->collapse > 1)
5532 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5534 if (!broken_loop)
5536 /* The code controlling the sequential loop replaces the
5537 GIMPLE_OMP_CONTINUE. */
5538 gsi = gsi_last_nondebug_bb (cont_bb);
5539 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5540 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5541 vmain = gimple_omp_continue_control_use (cont_stmt);
5542 vback = gimple_omp_continue_control_def (cont_stmt);
5544 if (!gimple_omp_for_combined_p (fd->for_stmt))
5546 if (POINTER_TYPE_P (type))
5547 t = fold_build_pointer_plus (vmain, step);
5548 else
5549 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5550 t = force_gimple_operand_gsi (&gsi, t,
5551 DECL_P (vback)
5552 && TREE_ADDRESSABLE (vback),
5553 NULL_TREE, true, GSI_SAME_STMT);
5554 assign_stmt = gimple_build_assign (vback, t);
5555 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5557 t = build2 (fd->loop.cond_code, boolean_type_node,
5558 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5559 ? t : vback, e);
5560 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5563 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5564 gsi_remove (&gsi, true);
5566 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5567 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5570 /* Remove the GIMPLE_OMP_FOR statement. */
5571 gsi = gsi_for_stmt (fd->for_stmt);
5572 gsi_remove (&gsi, true);
5574 /* Remove the GIMPLE_OMP_RETURN statement. */
5575 gsi = gsi_last_nondebug_bb (exit_bb);
5576 gsi_remove (&gsi, true);
5578 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5579 if (!broken_loop)
5580 remove_edge (BRANCH_EDGE (entry_bb));
5581 else
5583 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5584 region->outer->cont = NULL;
5587 /* Connect all the blocks. */
5588 if (!broken_loop)
5590 ep = find_edge (cont_bb, body_bb);
5591 if (gimple_omp_for_combined_p (fd->for_stmt))
5593 remove_edge (ep);
5594 ep = NULL;
5596 else if (fd->collapse > 1)
5598 remove_edge (ep);
5599 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5601 else
5602 ep->flags = EDGE_TRUE_VALUE;
5603 find_edge (cont_bb, fin_bb)->flags
5604 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5607 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5608 recompute_dominator (CDI_DOMINATORS, body_bb));
5609 if (!broken_loop)
5610 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5611 recompute_dominator (CDI_DOMINATORS, fin_bb));
5613 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5615 struct loop *loop = alloc_loop ();
5616 loop->header = body_bb;
5617 if (collapse_bb == NULL)
5618 loop->latch = cont_bb;
5619 add_loop (loop, body_bb->loop_father);
5623 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5624 partitioned loop. The lowering here is abstracted, in that the
5625 loop parameters are passed through internal functions, which are
5626 further lowered by oacc_device_lower, once we get to the target
5627 compiler. The loop is of the form:
5629 for (V = B; V LTGT E; V += S) {BODY}
5631 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5632 (constant 0 for no chunking) and we will have a GWV partitioning
5633 mask, specifying dimensions over which the loop is to be
5634 partitioned (see note below). We generate code that looks like
5635 (this ignores tiling):
5637 <entry_bb> [incoming FALL->body, BRANCH->exit]
5638 typedef signedintify (typeof (V)) T; // underlying signed integral type
5639 T range = E - B;
5640 T chunk_no = 0;
5641 T DIR = LTGT == '<' ? +1 : -1;
5642 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5643 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5645 <head_bb> [created by splitting end of entry_bb]
5646 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5647 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5648 if (!(offset LTGT bound)) goto bottom_bb;
5650 <body_bb> [incoming]
5651 V = B + offset;
5652 {BODY}
5654 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5655 offset += step;
5656 if (offset LTGT bound) goto body_bb; [*]
5658 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5659 chunk_no++;
5660 if (chunk < chunk_max) goto head_bb;
5662 <exit_bb> [incoming]
5663 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5665 [*] Needed if V live at end of loop. */
5667 static void
5668 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5670 tree v = fd->loop.v;
5671 enum tree_code cond_code = fd->loop.cond_code;
5672 enum tree_code plus_code = PLUS_EXPR;
5674 tree chunk_size = integer_minus_one_node;
5675 tree gwv = integer_zero_node;
5676 tree iter_type = TREE_TYPE (v);
5677 tree diff_type = iter_type;
5678 tree plus_type = iter_type;
5679 struct oacc_collapse *counts = NULL;
5681 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5682 == GF_OMP_FOR_KIND_OACC_LOOP);
5683 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5684 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5686 if (POINTER_TYPE_P (iter_type))
5688 plus_code = POINTER_PLUS_EXPR;
5689 plus_type = sizetype;
5691 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5692 diff_type = signed_type_for (diff_type);
5693 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5694 diff_type = integer_type_node;
5696 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5697 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5698 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5699 basic_block bottom_bb = NULL;
5701 /* entry_bb has two sucessors; the branch edge is to the exit
5702 block, fallthrough edge to body. */
5703 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5704 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5706 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5707 body_bb, or to a block whose only successor is the body_bb. Its
5708 fallthrough successor is the final block (same as the branch
5709 successor of the entry_bb). */
5710 if (cont_bb)
5712 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5713 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5715 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5716 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5718 else
5719 gcc_assert (!gimple_in_ssa_p (cfun));
5721 /* The exit block only has entry_bb and cont_bb as predecessors. */
5722 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5724 tree chunk_no;
5725 tree chunk_max = NULL_TREE;
5726 tree bound, offset;
5727 tree step = create_tmp_var (diff_type, ".step");
5728 bool up = cond_code == LT_EXPR;
5729 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5730 bool chunking = !gimple_in_ssa_p (cfun);
5731 bool negating;
5733 /* Tiling vars. */
5734 tree tile_size = NULL_TREE;
5735 tree element_s = NULL_TREE;
5736 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5737 basic_block elem_body_bb = NULL;
5738 basic_block elem_cont_bb = NULL;
5740 /* SSA instances. */
5741 tree offset_incr = NULL_TREE;
5742 tree offset_init = NULL_TREE;
5744 gimple_stmt_iterator gsi;
5745 gassign *ass;
5746 gcall *call;
5747 gimple *stmt;
5748 tree expr;
5749 location_t loc;
5750 edge split, be, fte;
5752 /* Split the end of entry_bb to create head_bb. */
5753 split = split_block (entry_bb, last_stmt (entry_bb));
5754 basic_block head_bb = split->dest;
5755 entry_bb = split->src;
5757 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5758 gsi = gsi_last_nondebug_bb (entry_bb);
5759 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5760 loc = gimple_location (for_stmt);
5762 if (gimple_in_ssa_p (cfun))
5764 offset_init = gimple_omp_for_index (for_stmt, 0);
5765 gcc_assert (integer_zerop (fd->loop.n1));
5766 /* The SSA parallelizer does gang parallelism. */
5767 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5770 if (fd->collapse > 1 || fd->tiling)
5772 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5773 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5774 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5775 TREE_TYPE (fd->loop.n2), loc);
5777 if (SSA_VAR_P (fd->loop.n2))
5779 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5780 true, GSI_SAME_STMT);
5781 ass = gimple_build_assign (fd->loop.n2, total);
5782 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5786 tree b = fd->loop.n1;
5787 tree e = fd->loop.n2;
5788 tree s = fd->loop.step;
5790 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5791 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5793 /* Convert the step, avoiding possible unsigned->signed overflow. */
5794 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5795 if (negating)
5796 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5797 s = fold_convert (diff_type, s);
5798 if (negating)
5799 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5800 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5802 if (!chunking)
5803 chunk_size = integer_zero_node;
5804 expr = fold_convert (diff_type, chunk_size);
5805 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5806 NULL_TREE, true, GSI_SAME_STMT);
5808 if (fd->tiling)
5810 /* Determine the tile size and element step,
5811 modify the outer loop step size. */
5812 tile_size = create_tmp_var (diff_type, ".tile_size");
5813 expr = build_int_cst (diff_type, 1);
5814 for (int ix = 0; ix < fd->collapse; ix++)
5815 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5816 expr = force_gimple_operand_gsi (&gsi, expr, true,
5817 NULL_TREE, true, GSI_SAME_STMT);
5818 ass = gimple_build_assign (tile_size, expr);
5819 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5821 element_s = create_tmp_var (diff_type, ".element_s");
5822 ass = gimple_build_assign (element_s, s);
5823 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5825 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5826 s = force_gimple_operand_gsi (&gsi, expr, true,
5827 NULL_TREE, true, GSI_SAME_STMT);
5830 /* Determine the range, avoiding possible unsigned->signed overflow. */
5831 negating = !up && TYPE_UNSIGNED (iter_type);
5832 expr = fold_build2 (MINUS_EXPR, plus_type,
5833 fold_convert (plus_type, negating ? b : e),
5834 fold_convert (plus_type, negating ? e : b));
5835 expr = fold_convert (diff_type, expr);
5836 if (negating)
5837 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5838 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5839 NULL_TREE, true, GSI_SAME_STMT);
5841 chunk_no = build_int_cst (diff_type, 0);
5842 if (chunking)
5844 gcc_assert (!gimple_in_ssa_p (cfun));
5846 expr = chunk_no;
5847 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5848 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5850 ass = gimple_build_assign (chunk_no, expr);
5851 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5853 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5854 build_int_cst (integer_type_node,
5855 IFN_GOACC_LOOP_CHUNKS),
5856 dir, range, s, chunk_size, gwv);
5857 gimple_call_set_lhs (call, chunk_max);
5858 gimple_set_location (call, loc);
5859 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5861 else
5862 chunk_size = chunk_no;
5864 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5865 build_int_cst (integer_type_node,
5866 IFN_GOACC_LOOP_STEP),
5867 dir, range, s, chunk_size, gwv);
5868 gimple_call_set_lhs (call, step);
5869 gimple_set_location (call, loc);
5870 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5872 /* Remove the GIMPLE_OMP_FOR. */
5873 gsi_remove (&gsi, true);
5875 /* Fixup edges from head_bb. */
5876 be = BRANCH_EDGE (head_bb);
5877 fte = FALLTHRU_EDGE (head_bb);
5878 be->flags |= EDGE_FALSE_VALUE;
5879 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5881 basic_block body_bb = fte->dest;
5883 if (gimple_in_ssa_p (cfun))
5885 gsi = gsi_last_nondebug_bb (cont_bb);
5886 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5888 offset = gimple_omp_continue_control_use (cont_stmt);
5889 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5891 else
5893 offset = create_tmp_var (diff_type, ".offset");
5894 offset_init = offset_incr = offset;
5896 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5898 /* Loop offset & bound go into head_bb. */
5899 gsi = gsi_start_bb (head_bb);
5901 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5902 build_int_cst (integer_type_node,
5903 IFN_GOACC_LOOP_OFFSET),
5904 dir, range, s,
5905 chunk_size, gwv, chunk_no);
5906 gimple_call_set_lhs (call, offset_init);
5907 gimple_set_location (call, loc);
5908 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5910 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5911 build_int_cst (integer_type_node,
5912 IFN_GOACC_LOOP_BOUND),
5913 dir, range, s,
5914 chunk_size, gwv, offset_init);
5915 gimple_call_set_lhs (call, bound);
5916 gimple_set_location (call, loc);
5917 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5919 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5920 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5921 GSI_CONTINUE_LINKING);
5923 /* V assignment goes into body_bb. */
5924 if (!gimple_in_ssa_p (cfun))
5926 gsi = gsi_start_bb (body_bb);
5928 expr = build2 (plus_code, iter_type, b,
5929 fold_convert (plus_type, offset));
5930 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5931 true, GSI_SAME_STMT);
5932 ass = gimple_build_assign (v, expr);
5933 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5935 if (fd->collapse > 1 || fd->tiling)
5936 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5938 if (fd->tiling)
5940 /* Determine the range of the element loop -- usually simply
5941 the tile_size, but could be smaller if the final
5942 iteration of the outer loop is a partial tile. */
5943 tree e_range = create_tmp_var (diff_type, ".e_range");
5945 expr = build2 (MIN_EXPR, diff_type,
5946 build2 (MINUS_EXPR, diff_type, bound, offset),
5947 build2 (MULT_EXPR, diff_type, tile_size,
5948 element_s));
5949 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5950 true, GSI_SAME_STMT);
5951 ass = gimple_build_assign (e_range, expr);
5952 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5954 /* Determine bound, offset & step of inner loop. */
5955 e_bound = create_tmp_var (diff_type, ".e_bound");
5956 e_offset = create_tmp_var (diff_type, ".e_offset");
5957 e_step = create_tmp_var (diff_type, ".e_step");
5959 /* Mark these as element loops. */
5960 tree t, e_gwv = integer_minus_one_node;
5961 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5963 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5964 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5965 element_s, chunk, e_gwv, chunk);
5966 gimple_call_set_lhs (call, e_offset);
5967 gimple_set_location (call, loc);
5968 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5970 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5971 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5972 element_s, chunk, e_gwv, e_offset);
5973 gimple_call_set_lhs (call, e_bound);
5974 gimple_set_location (call, loc);
5975 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5977 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5978 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5979 element_s, chunk, e_gwv);
5980 gimple_call_set_lhs (call, e_step);
5981 gimple_set_location (call, loc);
5982 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5984 /* Add test and split block. */
5985 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5986 stmt = gimple_build_cond_empty (expr);
5987 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5988 split = split_block (body_bb, stmt);
5989 elem_body_bb = split->dest;
5990 if (cont_bb == body_bb)
5991 cont_bb = elem_body_bb;
5992 body_bb = split->src;
5994 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5996 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5997 if (cont_bb == NULL)
5999 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6000 e->probability = profile_probability::even ();
6001 split->probability = profile_probability::even ();
6004 /* Initialize the user's loop vars. */
6005 gsi = gsi_start_bb (elem_body_bb);
6006 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6010 /* Loop increment goes into cont_bb. If this is not a loop, we
6011 will have spawned threads as if it was, and each one will
6012 execute one iteration. The specification is not explicit about
6013 whether such constructs are ill-formed or not, and they can
6014 occur, especially when noreturn routines are involved. */
6015 if (cont_bb)
6017 gsi = gsi_last_nondebug_bb (cont_bb);
6018 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6019 loc = gimple_location (cont_stmt);
6021 if (fd->tiling)
6023 /* Insert element loop increment and test. */
6024 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6025 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6026 true, GSI_SAME_STMT);
6027 ass = gimple_build_assign (e_offset, expr);
6028 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6029 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6031 stmt = gimple_build_cond_empty (expr);
6032 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6033 split = split_block (cont_bb, stmt);
6034 elem_cont_bb = split->src;
6035 cont_bb = split->dest;
6037 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6038 split->probability = profile_probability::unlikely ().guessed ();
6039 edge latch_edge
6040 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6041 latch_edge->probability = profile_probability::likely ().guessed ();
6043 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6044 skip_edge->probability = profile_probability::unlikely ().guessed ();
6045 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6046 loop_entry_edge->probability
6047 = profile_probability::likely ().guessed ();
6049 gsi = gsi_for_stmt (cont_stmt);
6052 /* Increment offset. */
6053 if (gimple_in_ssa_p (cfun))
6054 expr = build2 (plus_code, iter_type, offset,
6055 fold_convert (plus_type, step));
6056 else
6057 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6058 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6059 true, GSI_SAME_STMT);
6060 ass = gimple_build_assign (offset_incr, expr);
6061 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6062 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6063 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6065 /* Remove the GIMPLE_OMP_CONTINUE. */
6066 gsi_remove (&gsi, true);
6068 /* Fixup edges from cont_bb. */
6069 be = BRANCH_EDGE (cont_bb);
6070 fte = FALLTHRU_EDGE (cont_bb);
6071 be->flags |= EDGE_TRUE_VALUE;
6072 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6074 if (chunking)
6076 /* Split the beginning of exit_bb to make bottom_bb. We
6077 need to insert a nop at the start, because splitting is
6078 after a stmt, not before. */
6079 gsi = gsi_start_bb (exit_bb);
6080 stmt = gimple_build_nop ();
6081 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6082 split = split_block (exit_bb, stmt);
6083 bottom_bb = split->src;
6084 exit_bb = split->dest;
6085 gsi = gsi_last_bb (bottom_bb);
6087 /* Chunk increment and test goes into bottom_bb. */
6088 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6089 build_int_cst (diff_type, 1));
6090 ass = gimple_build_assign (chunk_no, expr);
6091 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6093 /* Chunk test at end of bottom_bb. */
6094 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6095 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6096 GSI_CONTINUE_LINKING);
6098 /* Fixup edges from bottom_bb. */
6099 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6100 split->probability = profile_probability::unlikely ().guessed ();
6101 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6102 latch_edge->probability = profile_probability::likely ().guessed ();
6106 gsi = gsi_last_nondebug_bb (exit_bb);
6107 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6108 loc = gimple_location (gsi_stmt (gsi));
6110 if (!gimple_in_ssa_p (cfun))
6112 /* Insert the final value of V, in case it is live. This is the
6113 value for the only thread that survives past the join. */
6114 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6115 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6116 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6117 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6118 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6119 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6120 true, GSI_SAME_STMT);
6121 ass = gimple_build_assign (v, expr);
6122 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6125 /* Remove the OMP_RETURN. */
6126 gsi_remove (&gsi, true);
6128 if (cont_bb)
6130 /* We now have one, two or three nested loops. Update the loop
6131 structures. */
6132 struct loop *parent = entry_bb->loop_father;
6133 struct loop *body = body_bb->loop_father;
6135 if (chunking)
6137 struct loop *chunk_loop = alloc_loop ();
6138 chunk_loop->header = head_bb;
6139 chunk_loop->latch = bottom_bb;
6140 add_loop (chunk_loop, parent);
6141 parent = chunk_loop;
6143 else if (parent != body)
6145 gcc_assert (body->header == body_bb);
6146 gcc_assert (body->latch == cont_bb
6147 || single_pred (body->latch) == cont_bb);
6148 parent = NULL;
6151 if (parent)
6153 struct loop *body_loop = alloc_loop ();
6154 body_loop->header = body_bb;
6155 body_loop->latch = cont_bb;
6156 add_loop (body_loop, parent);
6158 if (fd->tiling)
6160 /* Insert tiling's element loop. */
6161 struct loop *inner_loop = alloc_loop ();
6162 inner_loop->header = elem_body_bb;
6163 inner_loop->latch = elem_cont_bb;
6164 add_loop (inner_loop, body_loop);
6170 /* Expand the OMP loop defined by REGION. */
6172 static void
6173 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6175 struct omp_for_data fd;
6176 struct omp_for_data_loop *loops;
6178 loops
6179 = (struct omp_for_data_loop *)
6180 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6181 * sizeof (struct omp_for_data_loop));
6182 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6183 &fd, loops);
6184 region->sched_kind = fd.sched_kind;
6185 region->sched_modifiers = fd.sched_modifiers;
6187 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6188 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6189 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6190 if (region->cont)
6192 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6193 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6194 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6196 else
6197 /* If there isn't a continue then this is a degerate case where
6198 the introduction of abnormal edges during lowering will prevent
6199 original loops from being detected. Fix that up. */
6200 loops_state_set (LOOPS_NEED_FIXUP);
6202 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
6203 expand_omp_simd (region, &fd);
6204 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6206 gcc_assert (!inner_stmt);
6207 expand_oacc_for (region, &fd);
6209 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6211 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6212 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6213 else
6214 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6216 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6217 && !fd.have_ordered)
6219 if (fd.chunk_size == NULL)
6220 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6221 else
6222 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6224 else
6226 int fn_index, start_ix, next_ix;
6227 unsigned HOST_WIDE_INT sched = 0;
6228 tree sched_arg = NULL_TREE;
6230 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6231 == GF_OMP_FOR_KIND_FOR);
6232 if (fd.chunk_size == NULL
6233 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6234 fd.chunk_size = integer_zero_node;
6235 switch (fd.sched_kind)
6237 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6238 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
6240 gcc_assert (!fd.have_ordered);
6241 fn_index = 6;
6242 sched = 4;
6244 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6245 && !fd.have_ordered)
6246 fn_index = 7;
6247 else
6249 fn_index = 3;
6250 sched = (HOST_WIDE_INT_1U << 31);
6252 break;
6253 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6254 case OMP_CLAUSE_SCHEDULE_GUIDED:
6255 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6256 && !fd.have_ordered)
6258 fn_index = 3 + fd.sched_kind;
6259 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6260 break;
6262 fn_index = fd.sched_kind;
6263 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6264 sched += (HOST_WIDE_INT_1U << 31);
6265 break;
6266 case OMP_CLAUSE_SCHEDULE_STATIC:
6267 gcc_assert (fd.have_ordered);
6268 fn_index = 0;
6269 sched = (HOST_WIDE_INT_1U << 31) + 1;
6270 break;
6271 default:
6272 gcc_unreachable ();
6274 if (!fd.ordered)
6275 fn_index += fd.have_ordered * 8;
6276 if (fd.ordered)
6277 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6278 else
6279 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6280 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6281 if (fd.have_reductemp || fd.have_pointer_condtemp)
6283 if (fd.ordered)
6284 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6285 else if (fd.have_ordered)
6286 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6287 else
6288 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6289 sched_arg = build_int_cstu (long_integer_type_node, sched);
6290 if (!fd.chunk_size)
6291 fd.chunk_size = integer_zero_node;
6293 if (fd.iter_type == long_long_unsigned_type_node)
6295 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6296 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6297 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6298 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6300 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6301 (enum built_in_function) next_ix, sched_arg,
6302 inner_stmt);
6305 if (gimple_in_ssa_p (cfun))
6306 update_ssa (TODO_update_ssa_only_virtuals);
6309 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6311 v = GOMP_sections_start (n);
6313 switch (v)
6315 case 0:
6316 goto L2;
6317 case 1:
6318 section 1;
6319 goto L1;
6320 case 2:
6322 case n:
6324 default:
6325 abort ();
6328 v = GOMP_sections_next ();
6329 goto L0;
6331 reduction;
6333 If this is a combined parallel sections, replace the call to
6334 GOMP_sections_start with call to GOMP_sections_next. */
6336 static void
6337 expand_omp_sections (struct omp_region *region)
6339 tree t, u, vin = NULL, vmain, vnext, l2;
6340 unsigned len;
6341 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6342 gimple_stmt_iterator si, switch_si;
6343 gomp_sections *sections_stmt;
6344 gimple *stmt;
6345 gomp_continue *cont;
6346 edge_iterator ei;
6347 edge e;
6348 struct omp_region *inner;
6349 unsigned i, casei;
6350 bool exit_reachable = region->cont != NULL;
6352 gcc_assert (region->exit != NULL);
6353 entry_bb = region->entry;
6354 l0_bb = single_succ (entry_bb);
6355 l1_bb = region->cont;
6356 l2_bb = region->exit;
6357 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6358 l2 = gimple_block_label (l2_bb);
6359 else
6361 /* This can happen if there are reductions. */
6362 len = EDGE_COUNT (l0_bb->succs);
6363 gcc_assert (len > 0);
6364 e = EDGE_SUCC (l0_bb, len - 1);
6365 si = gsi_last_nondebug_bb (e->dest);
6366 l2 = NULL_TREE;
6367 if (gsi_end_p (si)
6368 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6369 l2 = gimple_block_label (e->dest);
6370 else
6371 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6373 si = gsi_last_nondebug_bb (e->dest);
6374 if (gsi_end_p (si)
6375 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6377 l2 = gimple_block_label (e->dest);
6378 break;
6382 if (exit_reachable)
6383 default_bb = create_empty_bb (l1_bb->prev_bb);
6384 else
6385 default_bb = create_empty_bb (l0_bb);
6387 /* We will build a switch() with enough cases for all the
6388 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6389 and a default case to abort if something goes wrong. */
6390 len = EDGE_COUNT (l0_bb->succs);
6392 /* Use vec::quick_push on label_vec throughout, since we know the size
6393 in advance. */
6394 auto_vec<tree> label_vec (len);
6396 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6397 GIMPLE_OMP_SECTIONS statement. */
6398 si = gsi_last_nondebug_bb (entry_bb);
6399 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6400 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6401 vin = gimple_omp_sections_control (sections_stmt);
6402 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6403 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6404 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6405 tree cond_var = NULL_TREE;
6406 if (reductmp || condtmp)
6408 tree reductions = null_pointer_node, mem = null_pointer_node;
6409 tree memv = NULL_TREE, condtemp = NULL_TREE;
6410 gimple_stmt_iterator gsi = gsi_none ();
6411 gimple *g = NULL;
6412 if (reductmp)
6414 reductions = OMP_CLAUSE_DECL (reductmp);
6415 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6416 g = SSA_NAME_DEF_STMT (reductions);
6417 reductions = gimple_assign_rhs1 (g);
6418 OMP_CLAUSE_DECL (reductmp) = reductions;
6419 gsi = gsi_for_stmt (g);
6421 else
6422 gsi = si;
6423 if (condtmp)
6425 condtemp = OMP_CLAUSE_DECL (condtmp);
6426 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6427 OMP_CLAUSE__CONDTEMP_);
6428 cond_var = OMP_CLAUSE_DECL (c);
6429 tree type = TREE_TYPE (condtemp);
6430 memv = create_tmp_var (type);
6431 TREE_ADDRESSABLE (memv) = 1;
6432 unsigned cnt = 0;
6433 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6434 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6435 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6436 ++cnt;
6437 unsigned HOST_WIDE_INT sz
6438 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6439 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6440 false);
6441 mem = build_fold_addr_expr (memv);
6443 t = build_int_cst (unsigned_type_node, len - 1);
6444 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6445 stmt = gimple_build_call (u, 3, t, reductions, mem);
6446 gimple_call_set_lhs (stmt, vin);
6447 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6448 if (condtmp)
6450 expand_omp_build_assign (&gsi, condtemp, memv, false);
6451 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6452 vin, build_one_cst (TREE_TYPE (cond_var)));
6453 expand_omp_build_assign (&gsi, cond_var, t, false);
6455 if (reductmp)
6457 gsi_remove (&gsi, true);
6458 release_ssa_name (gimple_assign_lhs (g));
6461 else if (!is_combined_parallel (region))
6463 /* If we are not inside a combined parallel+sections region,
6464 call GOMP_sections_start. */
6465 t = build_int_cst (unsigned_type_node, len - 1);
6466 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6467 stmt = gimple_build_call (u, 1, t);
6469 else
6471 /* Otherwise, call GOMP_sections_next. */
6472 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6473 stmt = gimple_build_call (u, 0);
6475 if (!reductmp && !condtmp)
6477 gimple_call_set_lhs (stmt, vin);
6478 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6480 gsi_remove (&si, true);
6482 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6483 L0_BB. */
6484 switch_si = gsi_last_nondebug_bb (l0_bb);
6485 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6486 if (exit_reachable)
6488 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6489 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6490 vmain = gimple_omp_continue_control_use (cont);
6491 vnext = gimple_omp_continue_control_def (cont);
6493 else
6495 vmain = vin;
6496 vnext = NULL_TREE;
6499 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6500 label_vec.quick_push (t);
6501 i = 1;
6503 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6504 for (inner = region->inner, casei = 1;
6505 inner;
6506 inner = inner->next, i++, casei++)
6508 basic_block s_entry_bb, s_exit_bb;
6510 /* Skip optional reduction region. */
6511 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6513 --i;
6514 --casei;
6515 continue;
6518 s_entry_bb = inner->entry;
6519 s_exit_bb = inner->exit;
6521 t = gimple_block_label (s_entry_bb);
6522 u = build_int_cst (unsigned_type_node, casei);
6523 u = build_case_label (u, NULL, t);
6524 label_vec.quick_push (u);
6526 si = gsi_last_nondebug_bb (s_entry_bb);
6527 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6528 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6529 gsi_remove (&si, true);
6530 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6532 if (s_exit_bb == NULL)
6533 continue;
6535 si = gsi_last_nondebug_bb (s_exit_bb);
6536 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6537 gsi_remove (&si, true);
6539 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6542 /* Error handling code goes in DEFAULT_BB. */
6543 t = gimple_block_label (default_bb);
6544 u = build_case_label (NULL, NULL, t);
6545 make_edge (l0_bb, default_bb, 0);
6546 add_bb_to_loop (default_bb, current_loops->tree_root);
6548 stmt = gimple_build_switch (vmain, u, label_vec);
6549 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6550 gsi_remove (&switch_si, true);
6552 si = gsi_start_bb (default_bb);
6553 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6554 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6556 if (exit_reachable)
6558 tree bfn_decl;
6560 /* Code to get the next section goes in L1_BB. */
6561 si = gsi_last_nondebug_bb (l1_bb);
6562 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6564 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6565 stmt = gimple_build_call (bfn_decl, 0);
6566 gimple_call_set_lhs (stmt, vnext);
6567 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6568 if (cond_var)
6570 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6571 vnext, build_one_cst (TREE_TYPE (cond_var)));
6572 expand_omp_build_assign (&si, cond_var, t, false);
6574 gsi_remove (&si, true);
6576 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6579 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6580 si = gsi_last_nondebug_bb (l2_bb);
6581 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6582 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6583 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6584 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6585 else
6586 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6587 stmt = gimple_build_call (t, 0);
6588 if (gimple_omp_return_lhs (gsi_stmt (si)))
6589 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6590 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6591 gsi_remove (&si, true);
6593 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6596 /* Expand code for an OpenMP single directive. We've already expanded
6597 much of the code, here we simply place the GOMP_barrier call. */
6599 static void
6600 expand_omp_single (struct omp_region *region)
6602 basic_block entry_bb, exit_bb;
6603 gimple_stmt_iterator si;
6605 entry_bb = region->entry;
6606 exit_bb = region->exit;
6608 si = gsi_last_nondebug_bb (entry_bb);
6609 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6610 gsi_remove (&si, true);
6611 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6613 si = gsi_last_nondebug_bb (exit_bb);
6614 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6616 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6617 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6619 gsi_remove (&si, true);
6620 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6623 /* Generic expansion for OpenMP synchronization directives: master,
6624 ordered and critical. All we need to do here is remove the entry
6625 and exit markers for REGION. */
6627 static void
6628 expand_omp_synch (struct omp_region *region)
6630 basic_block entry_bb, exit_bb;
6631 gimple_stmt_iterator si;
6633 entry_bb = region->entry;
6634 exit_bb = region->exit;
6636 si = gsi_last_nondebug_bb (entry_bb);
6637 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6638 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6639 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6640 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6641 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6642 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6643 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6644 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6646 expand_omp_taskreg (region);
6647 return;
6649 gsi_remove (&si, true);
6650 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6652 if (exit_bb)
6654 si = gsi_last_nondebug_bb (exit_bb);
6655 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6656 gsi_remove (&si, true);
6657 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6661 /* Translate enum omp_memory_order to enum memmodel. The two enums
6662 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6663 is 0. */
6665 static enum memmodel
6666 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6668 switch (mo)
6670 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6671 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6672 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6673 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6674 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6675 default: gcc_unreachable ();
6679 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6680 operation as a normal volatile load. */
6682 static bool
6683 expand_omp_atomic_load (basic_block load_bb, tree addr,
6684 tree loaded_val, int index)
6686 enum built_in_function tmpbase;
6687 gimple_stmt_iterator gsi;
6688 basic_block store_bb;
6689 location_t loc;
6690 gimple *stmt;
6691 tree decl, call, type, itype;
6693 gsi = gsi_last_nondebug_bb (load_bb);
6694 stmt = gsi_stmt (gsi);
6695 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6696 loc = gimple_location (stmt);
6698 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6699 is smaller than word size, then expand_atomic_load assumes that the load
6700 is atomic. We could avoid the builtin entirely in this case. */
6702 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6703 decl = builtin_decl_explicit (tmpbase);
6704 if (decl == NULL_TREE)
6705 return false;
6707 type = TREE_TYPE (loaded_val);
6708 itype = TREE_TYPE (TREE_TYPE (decl));
6710 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6711 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6712 call = build_call_expr_loc (loc, decl, 2, addr, mo);
6713 if (!useless_type_conversion_p (type, itype))
6714 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6715 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6717 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6718 gsi_remove (&gsi, true);
6720 store_bb = single_succ (load_bb);
6721 gsi = gsi_last_nondebug_bb (store_bb);
6722 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6723 gsi_remove (&gsi, true);
6725 if (gimple_in_ssa_p (cfun))
6726 update_ssa (TODO_update_ssa_no_phi);
6728 return true;
6731 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6732 operation as a normal volatile store. */
6734 static bool
6735 expand_omp_atomic_store (basic_block load_bb, tree addr,
6736 tree loaded_val, tree stored_val, int index)
6738 enum built_in_function tmpbase;
6739 gimple_stmt_iterator gsi;
6740 basic_block store_bb = single_succ (load_bb);
6741 location_t loc;
6742 gimple *stmt;
6743 tree decl, call, type, itype;
6744 machine_mode imode;
6745 bool exchange;
6747 gsi = gsi_last_nondebug_bb (load_bb);
6748 stmt = gsi_stmt (gsi);
6749 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6751 /* If the load value is needed, then this isn't a store but an exchange. */
6752 exchange = gimple_omp_atomic_need_value_p (stmt);
6754 gsi = gsi_last_nondebug_bb (store_bb);
6755 stmt = gsi_stmt (gsi);
6756 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6757 loc = gimple_location (stmt);
6759 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6760 is smaller than word size, then expand_atomic_store assumes that the store
6761 is atomic. We could avoid the builtin entirely in this case. */
6763 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6764 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6765 decl = builtin_decl_explicit (tmpbase);
6766 if (decl == NULL_TREE)
6767 return false;
6769 type = TREE_TYPE (stored_val);
6771 /* Dig out the type of the function's second argument. */
6772 itype = TREE_TYPE (decl);
6773 itype = TYPE_ARG_TYPES (itype);
6774 itype = TREE_CHAIN (itype);
6775 itype = TREE_VALUE (itype);
6776 imode = TYPE_MODE (itype);
6778 if (exchange && !can_atomic_exchange_p (imode, true))
6779 return false;
6781 if (!useless_type_conversion_p (itype, type))
6782 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6783 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6784 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6785 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6786 if (exchange)
6788 if (!useless_type_conversion_p (type, itype))
6789 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6790 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6793 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6794 gsi_remove (&gsi, true);
6796 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6797 gsi = gsi_last_nondebug_bb (load_bb);
6798 gsi_remove (&gsi, true);
6800 if (gimple_in_ssa_p (cfun))
6801 update_ssa (TODO_update_ssa_no_phi);
6803 return true;
6806 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6807 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6808 size of the data type, and thus usable to find the index of the builtin
6809 decl. Returns false if the expression is not of the proper form. */
6811 static bool
6812 expand_omp_atomic_fetch_op (basic_block load_bb,
6813 tree addr, tree loaded_val,
6814 tree stored_val, int index)
6816 enum built_in_function oldbase, newbase, tmpbase;
6817 tree decl, itype, call;
6818 tree lhs, rhs;
6819 basic_block store_bb = single_succ (load_bb);
6820 gimple_stmt_iterator gsi;
6821 gimple *stmt;
6822 location_t loc;
6823 enum tree_code code;
6824 bool need_old, need_new;
6825 machine_mode imode;
6827 /* We expect to find the following sequences:
6829 load_bb:
6830 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6832 store_bb:
6833 val = tmp OP something; (or: something OP tmp)
6834 GIMPLE_OMP_STORE (val)
6836 ???FIXME: Allow a more flexible sequence.
6837 Perhaps use data flow to pick the statements.
6841 gsi = gsi_after_labels (store_bb);
6842 stmt = gsi_stmt (gsi);
6843 if (is_gimple_debug (stmt))
6845 gsi_next_nondebug (&gsi);
6846 if (gsi_end_p (gsi))
6847 return false;
6848 stmt = gsi_stmt (gsi);
6850 loc = gimple_location (stmt);
6851 if (!is_gimple_assign (stmt))
6852 return false;
6853 gsi_next_nondebug (&gsi);
6854 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6855 return false;
6856 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6857 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6858 enum omp_memory_order omo
6859 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6860 enum memmodel mo = omp_memory_order_to_memmodel (omo);
6861 gcc_checking_assert (!need_old || !need_new);
6863 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6864 return false;
6866 /* Check for one of the supported fetch-op operations. */
6867 code = gimple_assign_rhs_code (stmt);
6868 switch (code)
6870 case PLUS_EXPR:
6871 case POINTER_PLUS_EXPR:
6872 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6873 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6874 break;
6875 case MINUS_EXPR:
6876 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6877 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6878 break;
6879 case BIT_AND_EXPR:
6880 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6881 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6882 break;
6883 case BIT_IOR_EXPR:
6884 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6885 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6886 break;
6887 case BIT_XOR_EXPR:
6888 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6889 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6890 break;
6891 default:
6892 return false;
6895 /* Make sure the expression is of the proper form. */
6896 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6897 rhs = gimple_assign_rhs2 (stmt);
6898 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6899 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6900 rhs = gimple_assign_rhs1 (stmt);
6901 else
6902 return false;
6904 tmpbase = ((enum built_in_function)
6905 ((need_new ? newbase : oldbase) + index + 1));
6906 decl = builtin_decl_explicit (tmpbase);
6907 if (decl == NULL_TREE)
6908 return false;
6909 itype = TREE_TYPE (TREE_TYPE (decl));
6910 imode = TYPE_MODE (itype);
6912 /* We could test all of the various optabs involved, but the fact of the
6913 matter is that (with the exception of i486 vs i586 and xadd) all targets
6914 that support any atomic operaton optab also implements compare-and-swap.
6915 Let optabs.c take care of expanding any compare-and-swap loop. */
6916 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6917 return false;
6919 gsi = gsi_last_nondebug_bb (load_bb);
6920 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6922 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6923 It only requires that the operation happen atomically. Thus we can
6924 use the RELAXED memory model. */
6925 call = build_call_expr_loc (loc, decl, 3, addr,
6926 fold_convert_loc (loc, itype, rhs),
6927 build_int_cst (NULL, mo));
6929 if (need_old || need_new)
6931 lhs = need_old ? loaded_val : stored_val;
6932 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6933 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6935 else
6936 call = fold_convert_loc (loc, void_type_node, call);
6937 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6938 gsi_remove (&gsi, true);
6940 gsi = gsi_last_nondebug_bb (store_bb);
6941 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6942 gsi_remove (&gsi, true);
6943 gsi = gsi_last_nondebug_bb (store_bb);
6944 stmt = gsi_stmt (gsi);
6945 gsi_remove (&gsi, true);
6947 if (gimple_in_ssa_p (cfun))
6949 release_defs (stmt);
6950 update_ssa (TODO_update_ssa_no_phi);
6953 return true;
6956 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6958 oldval = *addr;
6959 repeat:
6960 newval = rhs; // with oldval replacing *addr in rhs
6961 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6962 if (oldval != newval)
6963 goto repeat;
6965 INDEX is log2 of the size of the data type, and thus usable to find the
6966 index of the builtin decl. */
6968 static bool
6969 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6970 tree addr, tree loaded_val, tree stored_val,
6971 int index)
6973 tree loadedi, storedi, initial, new_storedi, old_vali;
6974 tree type, itype, cmpxchg, iaddr, atype;
6975 gimple_stmt_iterator si;
6976 basic_block loop_header = single_succ (load_bb);
6977 gimple *phi, *stmt;
6978 edge e;
6979 enum built_in_function fncode;
6981 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6982 order to use the RELAXED memory model effectively. */
6983 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6984 + index + 1);
6985 cmpxchg = builtin_decl_explicit (fncode);
6986 if (cmpxchg == NULL_TREE)
6987 return false;
6988 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6989 atype = type;
6990 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6992 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6993 || !can_atomic_load_p (TYPE_MODE (itype)))
6994 return false;
6996 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6997 si = gsi_last_nondebug_bb (load_bb);
6998 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7000 /* For floating-point values, we'll need to view-convert them to integers
7001 so that we can perform the atomic compare and swap. Simplify the
7002 following code by always setting up the "i"ntegral variables. */
7003 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7005 tree iaddr_val;
7007 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7008 true));
7009 atype = itype;
7010 iaddr_val
7011 = force_gimple_operand_gsi (&si,
7012 fold_convert (TREE_TYPE (iaddr), addr),
7013 false, NULL_TREE, true, GSI_SAME_STMT);
7014 stmt = gimple_build_assign (iaddr, iaddr_val);
7015 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7016 loadedi = create_tmp_var (itype);
7017 if (gimple_in_ssa_p (cfun))
7018 loadedi = make_ssa_name (loadedi);
7020 else
7022 iaddr = addr;
7023 loadedi = loaded_val;
7026 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7027 tree loaddecl = builtin_decl_explicit (fncode);
7028 if (loaddecl)
7029 initial
7030 = fold_convert (atype,
7031 build_call_expr (loaddecl, 2, iaddr,
7032 build_int_cst (NULL_TREE,
7033 MEMMODEL_RELAXED)));
7034 else
7036 tree off
7037 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7038 true), 0);
7039 initial = build2 (MEM_REF, atype, iaddr, off);
7042 initial
7043 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7044 GSI_SAME_STMT);
7046 /* Move the value to the LOADEDI temporary. */
7047 if (gimple_in_ssa_p (cfun))
7049 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7050 phi = create_phi_node (loadedi, loop_header);
7051 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7052 initial);
7054 else
7055 gsi_insert_before (&si,
7056 gimple_build_assign (loadedi, initial),
7057 GSI_SAME_STMT);
7058 if (loadedi != loaded_val)
7060 gimple_stmt_iterator gsi2;
7061 tree x;
7063 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7064 gsi2 = gsi_start_bb (loop_header);
7065 if (gimple_in_ssa_p (cfun))
7067 gassign *stmt;
7068 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7069 true, GSI_SAME_STMT);
7070 stmt = gimple_build_assign (loaded_val, x);
7071 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7073 else
7075 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7076 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7077 true, GSI_SAME_STMT);
7080 gsi_remove (&si, true);
7082 si = gsi_last_nondebug_bb (store_bb);
7083 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7085 if (iaddr == addr)
7086 storedi = stored_val;
7087 else
7088 storedi
7089 = force_gimple_operand_gsi (&si,
7090 build1 (VIEW_CONVERT_EXPR, itype,
7091 stored_val), true, NULL_TREE, true,
7092 GSI_SAME_STMT);
7094 /* Build the compare&swap statement. */
7095 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7096 new_storedi = force_gimple_operand_gsi (&si,
7097 fold_convert (TREE_TYPE (loadedi),
7098 new_storedi),
7099 true, NULL_TREE,
7100 true, GSI_SAME_STMT);
7102 if (gimple_in_ssa_p (cfun))
7103 old_vali = loadedi;
7104 else
7106 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7107 stmt = gimple_build_assign (old_vali, loadedi);
7108 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7110 stmt = gimple_build_assign (loadedi, new_storedi);
7111 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7114 /* Note that we always perform the comparison as an integer, even for
7115 floating point. This allows the atomic operation to properly
7116 succeed even with NaNs and -0.0. */
7117 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7118 stmt = gimple_build_cond_empty (ne);
7119 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7121 /* Update cfg. */
7122 e = single_succ_edge (store_bb);
7123 e->flags &= ~EDGE_FALLTHRU;
7124 e->flags |= EDGE_FALSE_VALUE;
7125 /* Expect no looping. */
7126 e->probability = profile_probability::guessed_always ();
7128 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7129 e->probability = profile_probability::guessed_never ();
7131 /* Copy the new value to loadedi (we already did that before the condition
7132 if we are not in SSA). */
7133 if (gimple_in_ssa_p (cfun))
7135 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7136 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7139 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7140 gsi_remove (&si, true);
7142 struct loop *loop = alloc_loop ();
7143 loop->header = loop_header;
7144 loop->latch = store_bb;
7145 add_loop (loop, loop_header->loop_father);
7147 if (gimple_in_ssa_p (cfun))
7148 update_ssa (TODO_update_ssa_no_phi);
7150 return true;
7153 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7155 GOMP_atomic_start ();
7156 *addr = rhs;
7157 GOMP_atomic_end ();
7159 The result is not globally atomic, but works so long as all parallel
7160 references are within #pragma omp atomic directives. According to
7161 responses received from omp@openmp.org, appears to be within spec.
7162 Which makes sense, since that's how several other compilers handle
7163 this situation as well.
7164 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7165 expanding. STORED_VAL is the operand of the matching
7166 GIMPLE_OMP_ATOMIC_STORE.
7168 We replace
7169 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7170 loaded_val = *addr;
7172 and replace
7173 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7174 *addr = stored_val;
7177 static bool
7178 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7179 tree addr, tree loaded_val, tree stored_val)
7181 gimple_stmt_iterator si;
7182 gassign *stmt;
7183 tree t;
7185 si = gsi_last_nondebug_bb (load_bb);
7186 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7188 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7189 t = build_call_expr (t, 0);
7190 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7192 tree mem = build_simple_mem_ref (addr);
7193 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7194 TREE_OPERAND (mem, 1)
7195 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7196 true),
7197 TREE_OPERAND (mem, 1));
7198 stmt = gimple_build_assign (loaded_val, mem);
7199 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7200 gsi_remove (&si, true);
7202 si = gsi_last_nondebug_bb (store_bb);
7203 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7205 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7206 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7208 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7209 t = build_call_expr (t, 0);
7210 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7211 gsi_remove (&si, true);
7213 if (gimple_in_ssa_p (cfun))
7214 update_ssa (TODO_update_ssa_no_phi);
7215 return true;
7218 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7219 using expand_omp_atomic_fetch_op. If it failed, we try to
7220 call expand_omp_atomic_pipeline, and if it fails too, the
7221 ultimate fallback is wrapping the operation in a mutex
7222 (expand_omp_atomic_mutex). REGION is the atomic region built
7223 by build_omp_regions_1(). */
7225 static void
7226 expand_omp_atomic (struct omp_region *region)
7228 basic_block load_bb = region->entry, store_bb = region->exit;
7229 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7230 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7231 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7232 tree addr = gimple_omp_atomic_load_rhs (load);
7233 tree stored_val = gimple_omp_atomic_store_val (store);
7234 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7235 HOST_WIDE_INT index;
7237 /* Make sure the type is one of the supported sizes. */
7238 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7239 index = exact_log2 (index);
7240 if (index >= 0 && index <= 4)
7242 unsigned int align = TYPE_ALIGN_UNIT (type);
7244 /* __sync builtins require strict data alignment. */
7245 if (exact_log2 (align) >= index)
7247 /* Atomic load. */
7248 scalar_mode smode;
7249 if (loaded_val == stored_val
7250 && (is_int_mode (TYPE_MODE (type), &smode)
7251 || is_float_mode (TYPE_MODE (type), &smode))
7252 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7253 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7254 return;
7256 /* Atomic store. */
7257 if ((is_int_mode (TYPE_MODE (type), &smode)
7258 || is_float_mode (TYPE_MODE (type), &smode))
7259 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7260 && store_bb == single_succ (load_bb)
7261 && first_stmt (store_bb) == store
7262 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7263 stored_val, index))
7264 return;
7266 /* When possible, use specialized atomic update functions. */
7267 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7268 && store_bb == single_succ (load_bb)
7269 && expand_omp_atomic_fetch_op (load_bb, addr,
7270 loaded_val, stored_val, index))
7271 return;
7273 /* If we don't have specialized __sync builtins, try and implement
7274 as a compare and swap loop. */
7275 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7276 loaded_val, stored_val, index))
7277 return;
7281 /* The ultimate fallback is wrapping the operation in a mutex. */
7282 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7285 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7286 at REGION_EXIT. */
7288 static void
7289 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7290 basic_block region_exit)
7292 struct loop *outer = region_entry->loop_father;
7293 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7295 /* Don't parallelize the kernels region if it contains more than one outer
7296 loop. */
7297 unsigned int nr_outer_loops = 0;
7298 struct loop *single_outer = NULL;
7299 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
7301 gcc_assert (loop_outer (loop) == outer);
7303 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7304 continue;
7306 if (region_exit != NULL
7307 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7308 continue;
7310 nr_outer_loops++;
7311 single_outer = loop;
7313 if (nr_outer_loops != 1)
7314 return;
7316 for (struct loop *loop = single_outer->inner;
7317 loop != NULL;
7318 loop = loop->inner)
7319 if (loop->next)
7320 return;
7322 /* Mark the loops in the region. */
7323 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7324 loop->in_oacc_kernels_region = true;
7327 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7329 struct GTY(()) grid_launch_attributes_trees
7331 tree kernel_dim_array_type;
7332 tree kernel_lattrs_dimnum_decl;
7333 tree kernel_lattrs_grid_decl;
7334 tree kernel_lattrs_group_decl;
7335 tree kernel_launch_attributes_type;
7338 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7340 /* Create types used to pass kernel launch attributes to target. */
7342 static void
7343 grid_create_kernel_launch_attr_types (void)
7345 if (grid_attr_trees)
7346 return;
7347 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7349 tree dim_arr_index_type
7350 = build_index_type (build_int_cst (integer_type_node, 2));
7351 grid_attr_trees->kernel_dim_array_type
7352 = build_array_type (uint32_type_node, dim_arr_index_type);
7354 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7355 grid_attr_trees->kernel_lattrs_dimnum_decl
7356 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7357 uint32_type_node);
7358 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7360 grid_attr_trees->kernel_lattrs_grid_decl
7361 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7362 grid_attr_trees->kernel_dim_array_type);
7363 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7364 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7365 grid_attr_trees->kernel_lattrs_group_decl
7366 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7367 grid_attr_trees->kernel_dim_array_type);
7368 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7369 = grid_attr_trees->kernel_lattrs_grid_decl;
7370 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7371 "__gomp_kernel_launch_attributes",
7372 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7375 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7376 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7377 of type uint32_type_node. */
7379 static void
7380 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7381 tree fld_decl, int index, tree value)
7383 tree ref = build4 (ARRAY_REF, uint32_type_node,
7384 build3 (COMPONENT_REF,
7385 grid_attr_trees->kernel_dim_array_type,
7386 range_var, fld_decl, NULL_TREE),
7387 build_int_cst (integer_type_node, index),
7388 NULL_TREE, NULL_TREE);
7389 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7392 /* Return a tree representation of a pointer to a structure with grid and
7393 work-group size information. Statements filling that information will be
7394 inserted before GSI, TGT_STMT is the target statement which has the
7395 necessary information in it. */
7397 static tree
7398 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7399 gomp_target *tgt_stmt)
7401 grid_create_kernel_launch_attr_types ();
7402 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7403 "__kernel_launch_attrs");
7405 unsigned max_dim = 0;
7406 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7407 clause;
7408 clause = OMP_CLAUSE_CHAIN (clause))
7410 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7411 continue;
7413 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7414 max_dim = MAX (dim, max_dim);
7416 grid_insert_store_range_dim (gsi, lattrs,
7417 grid_attr_trees->kernel_lattrs_grid_decl,
7418 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7419 grid_insert_store_range_dim (gsi, lattrs,
7420 grid_attr_trees->kernel_lattrs_group_decl,
7421 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7424 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7425 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7426 gcc_checking_assert (max_dim <= 2);
7427 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7428 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7429 GSI_SAME_STMT);
7430 TREE_ADDRESSABLE (lattrs) = 1;
7431 return build_fold_addr_expr (lattrs);
7434 /* Build target argument identifier from the DEVICE identifier, value
7435 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7437 static tree
7438 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7440 tree t = build_int_cst (integer_type_node, device);
7441 if (subseqent_param)
7442 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7443 build_int_cst (integer_type_node,
7444 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7445 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7446 build_int_cst (integer_type_node, id));
7447 return t;
7450 /* Like above but return it in type that can be directly stored as an element
7451 of the argument array. */
7453 static tree
7454 get_target_argument_identifier (int device, bool subseqent_param, int id)
7456 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7457 return fold_convert (ptr_type_node, t);
7460 /* Return a target argument consisting of DEVICE identifier, value identifier
7461 ID, and the actual VALUE. */
7463 static tree
7464 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7465 tree value)
7467 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7468 fold_convert (integer_type_node, value),
7469 build_int_cst (unsigned_type_node,
7470 GOMP_TARGET_ARG_VALUE_SHIFT));
7471 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7472 get_target_argument_identifier_1 (device, false, id));
7473 t = fold_convert (ptr_type_node, t);
7474 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7477 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7478 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7479 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7480 arguments. */
7482 static void
7483 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7484 int id, tree value, vec <tree> *args)
7486 if (tree_fits_shwi_p (value)
7487 && tree_to_shwi (value) > -(1 << 15)
7488 && tree_to_shwi (value) < (1 << 15))
7489 args->quick_push (get_target_argument_value (gsi, device, id, value));
7490 else
7492 args->quick_push (get_target_argument_identifier (device, true, id));
7493 value = fold_convert (ptr_type_node, value);
7494 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7495 GSI_SAME_STMT);
7496 args->quick_push (value);
7500 /* Create an array of arguments that is then passed to GOMP_target. */
7502 static tree
7503 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7505 auto_vec <tree, 6> args;
7506 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7507 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7508 if (c)
7509 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7510 else
7511 t = integer_minus_one_node;
7512 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7513 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7515 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7516 if (c)
7517 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7518 else
7519 t = integer_minus_one_node;
7520 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7521 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7522 &args);
7524 /* Add HSA-specific grid sizes, if available. */
7525 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7526 OMP_CLAUSE__GRIDDIM_))
7528 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7529 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7530 args.quick_push (t);
7531 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7534 /* Produce more, perhaps device specific, arguments here. */
7536 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7537 args.length () + 1),
7538 ".omp_target_args");
7539 for (unsigned i = 0; i < args.length (); i++)
7541 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7542 build_int_cst (integer_type_node, i),
7543 NULL_TREE, NULL_TREE);
7544 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7545 GSI_SAME_STMT);
7547 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7548 build_int_cst (integer_type_node, args.length ()),
7549 NULL_TREE, NULL_TREE);
7550 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7551 GSI_SAME_STMT);
7552 TREE_ADDRESSABLE (argarray) = 1;
7553 return build_fold_addr_expr (argarray);
7556 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7558 static void
7559 expand_omp_target (struct omp_region *region)
7561 basic_block entry_bb, exit_bb, new_bb;
7562 struct function *child_cfun;
7563 tree child_fn, block, t;
7564 gimple_stmt_iterator gsi;
7565 gomp_target *entry_stmt;
7566 gimple *stmt;
7567 edge e;
7568 bool offloaded, data_region;
7570 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7571 new_bb = region->entry;
7573 offloaded = is_gimple_omp_offloaded (entry_stmt);
7574 switch (gimple_omp_target_kind (entry_stmt))
7576 case GF_OMP_TARGET_KIND_REGION:
7577 case GF_OMP_TARGET_KIND_UPDATE:
7578 case GF_OMP_TARGET_KIND_ENTER_DATA:
7579 case GF_OMP_TARGET_KIND_EXIT_DATA:
7580 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7581 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7582 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7583 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7584 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7585 data_region = false;
7586 break;
7587 case GF_OMP_TARGET_KIND_DATA:
7588 case GF_OMP_TARGET_KIND_OACC_DATA:
7589 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7590 data_region = true;
7591 break;
7592 default:
7593 gcc_unreachable ();
7596 child_fn = NULL_TREE;
7597 child_cfun = NULL;
7598 if (offloaded)
7600 child_fn = gimple_omp_target_child_fn (entry_stmt);
7601 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7604 /* Supported by expand_omp_taskreg, but not here. */
7605 if (child_cfun != NULL)
7606 gcc_checking_assert (!child_cfun->cfg);
7607 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7609 entry_bb = region->entry;
7610 exit_bb = region->exit;
7612 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7614 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7616 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7617 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7618 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7619 DECL_ATTRIBUTES (child_fn)
7620 = tree_cons (get_identifier ("oacc kernels"),
7621 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7624 if (offloaded)
7626 unsigned srcidx, dstidx, num;
7628 /* If the offloading region needs data sent from the parent
7629 function, then the very first statement (except possible
7630 tree profile counter updates) of the offloading body
7631 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7632 &.OMP_DATA_O is passed as an argument to the child function,
7633 we need to replace it with the argument as seen by the child
7634 function.
7636 In most cases, this will end up being the identity assignment
7637 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7638 a function call that has been inlined, the original PARM_DECL
7639 .OMP_DATA_I may have been converted into a different local
7640 variable. In which case, we need to keep the assignment. */
7641 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7642 if (data_arg)
7644 basic_block entry_succ_bb = single_succ (entry_bb);
7645 gimple_stmt_iterator gsi;
7646 tree arg;
7647 gimple *tgtcopy_stmt = NULL;
7648 tree sender = TREE_VEC_ELT (data_arg, 0);
7650 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7652 gcc_assert (!gsi_end_p (gsi));
7653 stmt = gsi_stmt (gsi);
7654 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7655 continue;
7657 if (gimple_num_ops (stmt) == 2)
7659 tree arg = gimple_assign_rhs1 (stmt);
7661 /* We're ignoring the subcode because we're
7662 effectively doing a STRIP_NOPS. */
7664 if (TREE_CODE (arg) == ADDR_EXPR
7665 && TREE_OPERAND (arg, 0) == sender)
7667 tgtcopy_stmt = stmt;
7668 break;
7673 gcc_assert (tgtcopy_stmt != NULL);
7674 arg = DECL_ARGUMENTS (child_fn);
7676 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7677 gsi_remove (&gsi, true);
7680 /* Declare local variables needed in CHILD_CFUN. */
7681 block = DECL_INITIAL (child_fn);
7682 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7683 /* The gimplifier could record temporaries in the offloading block
7684 rather than in containing function's local_decls chain,
7685 which would mean cgraph missed finalizing them. Do it now. */
7686 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7687 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7688 varpool_node::finalize_decl (t);
7689 DECL_SAVED_TREE (child_fn) = NULL;
7690 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7691 gimple_set_body (child_fn, NULL);
7692 TREE_USED (block) = 1;
7694 /* Reset DECL_CONTEXT on function arguments. */
7695 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7696 DECL_CONTEXT (t) = child_fn;
7698 /* Split ENTRY_BB at GIMPLE_*,
7699 so that it can be moved to the child function. */
7700 gsi = gsi_last_nondebug_bb (entry_bb);
7701 stmt = gsi_stmt (gsi);
7702 gcc_assert (stmt
7703 && gimple_code (stmt) == gimple_code (entry_stmt));
7704 e = split_block (entry_bb, stmt);
7705 gsi_remove (&gsi, true);
7706 entry_bb = e->dest;
7707 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7709 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7710 if (exit_bb)
7712 gsi = gsi_last_nondebug_bb (exit_bb);
7713 gcc_assert (!gsi_end_p (gsi)
7714 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7715 stmt = gimple_build_return (NULL);
7716 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7717 gsi_remove (&gsi, true);
7720 /* Move the offloading region into CHILD_CFUN. */
7722 block = gimple_block (entry_stmt);
7724 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7725 if (exit_bb)
7726 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7727 /* When the OMP expansion process cannot guarantee an up-to-date
7728 loop tree arrange for the child function to fixup loops. */
7729 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7730 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7732 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7733 num = vec_safe_length (child_cfun->local_decls);
7734 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7736 t = (*child_cfun->local_decls)[srcidx];
7737 if (DECL_CONTEXT (t) == cfun->decl)
7738 continue;
7739 if (srcidx != dstidx)
7740 (*child_cfun->local_decls)[dstidx] = t;
7741 dstidx++;
7743 if (dstidx != num)
7744 vec_safe_truncate (child_cfun->local_decls, dstidx);
7746 /* Inform the callgraph about the new function. */
7747 child_cfun->curr_properties = cfun->curr_properties;
7748 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7749 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7750 cgraph_node *node = cgraph_node::get_create (child_fn);
7751 node->parallelized_function = 1;
7752 cgraph_node::add_new_function (child_fn, true);
7754 /* Add the new function to the offload table. */
7755 if (ENABLE_OFFLOADING)
7757 if (in_lto_p)
7758 DECL_PRESERVE_P (child_fn) = 1;
7759 vec_safe_push (offload_funcs, child_fn);
7762 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7763 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7765 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7766 fixed in a following pass. */
7767 push_cfun (child_cfun);
7768 if (need_asm)
7769 assign_assembler_name_if_needed (child_fn);
7770 cgraph_edge::rebuild_edges ();
7772 /* Some EH regions might become dead, see PR34608. If
7773 pass_cleanup_cfg isn't the first pass to happen with the
7774 new child, these dead EH edges might cause problems.
7775 Clean them up now. */
7776 if (flag_exceptions)
7778 basic_block bb;
7779 bool changed = false;
7781 FOR_EACH_BB_FN (bb, cfun)
7782 changed |= gimple_purge_dead_eh_edges (bb);
7783 if (changed)
7784 cleanup_tree_cfg ();
7786 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7787 verify_loop_structure ();
7788 pop_cfun ();
7790 if (dump_file && !gimple_in_ssa_p (cfun))
7792 omp_any_child_fn_dumped = true;
7793 dump_function_header (dump_file, child_fn, dump_flags);
7794 dump_function_to_file (child_fn, dump_file, dump_flags);
7797 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7800 /* Emit a library call to launch the offloading region, or do data
7801 transfers. */
7802 tree t1, t2, t3, t4, depend, c, clauses;
7803 enum built_in_function start_ix;
7804 unsigned int flags_i = 0;
7806 switch (gimple_omp_target_kind (entry_stmt))
7808 case GF_OMP_TARGET_KIND_REGION:
7809 start_ix = BUILT_IN_GOMP_TARGET;
7810 break;
7811 case GF_OMP_TARGET_KIND_DATA:
7812 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7813 break;
7814 case GF_OMP_TARGET_KIND_UPDATE:
7815 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7816 break;
7817 case GF_OMP_TARGET_KIND_ENTER_DATA:
7818 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7819 break;
7820 case GF_OMP_TARGET_KIND_EXIT_DATA:
7821 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7822 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7823 break;
7824 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7825 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7826 start_ix = BUILT_IN_GOACC_PARALLEL;
7827 break;
7828 case GF_OMP_TARGET_KIND_OACC_DATA:
7829 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7830 start_ix = BUILT_IN_GOACC_DATA_START;
7831 break;
7832 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7833 start_ix = BUILT_IN_GOACC_UPDATE;
7834 break;
7835 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7836 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7837 break;
7838 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7839 start_ix = BUILT_IN_GOACC_DECLARE;
7840 break;
7841 default:
7842 gcc_unreachable ();
7845 clauses = gimple_omp_target_clauses (entry_stmt);
7847 tree device = NULL_TREE;
7848 location_t device_loc = UNKNOWN_LOCATION;
7849 tree goacc_flags = NULL_TREE;
7850 if (is_gimple_omp_oacc (entry_stmt))
7852 /* By default, no GOACC_FLAGs are set. */
7853 goacc_flags = integer_zero_node;
7855 else
7857 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7858 if (c)
7860 device = OMP_CLAUSE_DEVICE_ID (c);
7861 device_loc = OMP_CLAUSE_LOCATION (c);
7863 else
7865 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7866 library choose). */
7867 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7868 device_loc = gimple_location (entry_stmt);
7871 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7872 if (c)
7873 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7876 /* By default, there is no conditional. */
7877 tree cond = NULL_TREE;
7878 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7879 if (c)
7880 cond = OMP_CLAUSE_IF_EXPR (c);
7881 /* If we found the clause 'if (cond)', build:
7882 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7883 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
7884 if (cond)
7886 tree *tp;
7887 if (is_gimple_omp_oacc (entry_stmt))
7888 tp = &goacc_flags;
7889 else
7891 /* Ensure 'device' is of the correct type. */
7892 device = fold_convert_loc (device_loc, integer_type_node, device);
7894 tp = &device;
7897 cond = gimple_boolify (cond);
7899 basic_block cond_bb, then_bb, else_bb;
7900 edge e;
7901 tree tmp_var;
7903 tmp_var = create_tmp_var (TREE_TYPE (*tp));
7904 if (offloaded)
7905 e = split_block_after_labels (new_bb);
7906 else
7908 gsi = gsi_last_nondebug_bb (new_bb);
7909 gsi_prev (&gsi);
7910 e = split_block (new_bb, gsi_stmt (gsi));
7912 cond_bb = e->src;
7913 new_bb = e->dest;
7914 remove_edge (e);
7916 then_bb = create_empty_bb (cond_bb);
7917 else_bb = create_empty_bb (then_bb);
7918 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7919 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7921 stmt = gimple_build_cond_empty (cond);
7922 gsi = gsi_last_bb (cond_bb);
7923 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7925 gsi = gsi_start_bb (then_bb);
7926 stmt = gimple_build_assign (tmp_var, *tp);
7927 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7929 gsi = gsi_start_bb (else_bb);
7930 if (is_gimple_omp_oacc (entry_stmt))
7931 stmt = gimple_build_assign (tmp_var,
7932 BIT_IOR_EXPR,
7933 *tp,
7934 build_int_cst (integer_type_node,
7935 GOACC_FLAG_HOST_FALLBACK));
7936 else
7937 stmt = gimple_build_assign (tmp_var,
7938 build_int_cst (integer_type_node,
7939 GOMP_DEVICE_HOST_FALLBACK));
7940 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7942 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7943 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7944 add_bb_to_loop (then_bb, cond_bb->loop_father);
7945 add_bb_to_loop (else_bb, cond_bb->loop_father);
7946 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7947 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7949 *tp = tmp_var;
7951 gsi = gsi_last_nondebug_bb (new_bb);
7953 else
7955 gsi = gsi_last_nondebug_bb (new_bb);
7957 if (device != NULL_TREE)
7958 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7959 true, GSI_SAME_STMT);
7962 t = gimple_omp_target_data_arg (entry_stmt);
7963 if (t == NULL)
7965 t1 = size_zero_node;
7966 t2 = build_zero_cst (ptr_type_node);
7967 t3 = t2;
7968 t4 = t2;
7970 else
7972 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7973 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7974 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7975 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7976 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7979 gimple *g;
7980 bool tagging = false;
7981 /* The maximum number used by any start_ix, without varargs. */
7982 auto_vec<tree, 11> args;
7983 if (is_gimple_omp_oacc (entry_stmt))
7985 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
7986 TREE_TYPE (goacc_flags), goacc_flags);
7987 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
7988 NULL_TREE, true,
7989 GSI_SAME_STMT);
7990 args.quick_push (goacc_flags_m);
7992 else
7993 args.quick_push (device);
7994 if (offloaded)
7995 args.quick_push (build_fold_addr_expr (child_fn));
7996 args.quick_push (t1);
7997 args.quick_push (t2);
7998 args.quick_push (t3);
7999 args.quick_push (t4);
8000 switch (start_ix)
8002 case BUILT_IN_GOACC_DATA_START:
8003 case BUILT_IN_GOACC_DECLARE:
8004 case BUILT_IN_GOMP_TARGET_DATA:
8005 break;
8006 case BUILT_IN_GOMP_TARGET:
8007 case BUILT_IN_GOMP_TARGET_UPDATE:
8008 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8009 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8010 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8011 if (c)
8012 depend = OMP_CLAUSE_DECL (c);
8013 else
8014 depend = build_int_cst (ptr_type_node, 0);
8015 args.quick_push (depend);
8016 if (start_ix == BUILT_IN_GOMP_TARGET)
8017 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8018 break;
8019 case BUILT_IN_GOACC_PARALLEL:
8020 oacc_set_fn_attrib (child_fn, clauses, &args);
8021 tagging = true;
8022 /* FALLTHRU */
8023 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8024 case BUILT_IN_GOACC_UPDATE:
8026 tree t_async = NULL_TREE;
8028 /* If present, use the value specified by the respective
8029 clause, making sure that is of the correct type. */
8030 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8031 if (c)
8032 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8033 integer_type_node,
8034 OMP_CLAUSE_ASYNC_EXPR (c));
8035 else if (!tagging)
8036 /* Default values for t_async. */
8037 t_async = fold_convert_loc (gimple_location (entry_stmt),
8038 integer_type_node,
8039 build_int_cst (integer_type_node,
8040 GOMP_ASYNC_SYNC));
8041 if (tagging && t_async)
8043 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8045 if (TREE_CODE (t_async) == INTEGER_CST)
8047 /* See if we can pack the async arg in to the tag's
8048 operand. */
8049 i_async = TREE_INT_CST_LOW (t_async);
8050 if (i_async < GOMP_LAUNCH_OP_MAX)
8051 t_async = NULL_TREE;
8052 else
8053 i_async = GOMP_LAUNCH_OP_MAX;
8055 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8056 i_async));
8058 if (t_async)
8059 args.safe_push (t_async);
8061 /* Save the argument index, and ... */
8062 unsigned t_wait_idx = args.length ();
8063 unsigned num_waits = 0;
8064 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8065 if (!tagging || c)
8066 /* ... push a placeholder. */
8067 args.safe_push (integer_zero_node);
8069 for (; c; c = OMP_CLAUSE_CHAIN (c))
8070 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8072 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8073 integer_type_node,
8074 OMP_CLAUSE_WAIT_EXPR (c)));
8075 num_waits++;
8078 if (!tagging || num_waits)
8080 tree len;
8082 /* Now that we know the number, update the placeholder. */
8083 if (tagging)
8084 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8085 else
8086 len = build_int_cst (integer_type_node, num_waits);
8087 len = fold_convert_loc (gimple_location (entry_stmt),
8088 unsigned_type_node, len);
8089 args[t_wait_idx] = len;
8092 break;
8093 default:
8094 gcc_unreachable ();
8096 if (tagging)
8097 /* Push terminal marker - zero. */
8098 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8100 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8101 gimple_set_location (g, gimple_location (entry_stmt));
8102 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8103 if (!offloaded)
8105 g = gsi_stmt (gsi);
8106 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8107 gsi_remove (&gsi, true);
8109 if (data_region && region->exit)
8111 gsi = gsi_last_nondebug_bb (region->exit);
8112 g = gsi_stmt (gsi);
8113 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8114 gsi_remove (&gsi, true);
8118 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8119 iteration variable derived from the thread number. INTRA_GROUP means this
8120 is an expansion of a loop iterating over work-items within a separate
8121 iteration over groups. */
8123 static void
8124 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8126 gimple_stmt_iterator gsi;
8127 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8128 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8129 == GF_OMP_FOR_KIND_GRID_LOOP);
8130 size_t collapse = gimple_omp_for_collapse (for_stmt);
8131 struct omp_for_data_loop *loops
8132 = XALLOCAVEC (struct omp_for_data_loop,
8133 gimple_omp_for_collapse (for_stmt));
8134 struct omp_for_data fd;
8136 remove_edge (BRANCH_EDGE (kfor->entry));
8137 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8139 gcc_assert (kfor->cont);
8140 omp_extract_for_data (for_stmt, &fd, loops);
8142 gsi = gsi_start_bb (body_bb);
8144 for (size_t dim = 0; dim < collapse; dim++)
8146 tree type, itype;
8147 itype = type = TREE_TYPE (fd.loops[dim].v);
8148 if (POINTER_TYPE_P (type))
8149 itype = signed_type_for (type);
8151 tree n1 = fd.loops[dim].n1;
8152 tree step = fd.loops[dim].step;
8153 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8154 true, NULL_TREE, true, GSI_SAME_STMT);
8155 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8156 true, NULL_TREE, true, GSI_SAME_STMT);
8157 tree threadid;
8158 if (gimple_omp_for_grid_group_iter (for_stmt))
8160 gcc_checking_assert (!intra_group);
8161 threadid = build_call_expr (builtin_decl_explicit
8162 (BUILT_IN_HSA_WORKGROUPID), 1,
8163 build_int_cstu (unsigned_type_node, dim));
8165 else if (intra_group)
8166 threadid = build_call_expr (builtin_decl_explicit
8167 (BUILT_IN_HSA_WORKITEMID), 1,
8168 build_int_cstu (unsigned_type_node, dim));
8169 else
8170 threadid = build_call_expr (builtin_decl_explicit
8171 (BUILT_IN_HSA_WORKITEMABSID), 1,
8172 build_int_cstu (unsigned_type_node, dim));
8173 threadid = fold_convert (itype, threadid);
8174 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8175 true, GSI_SAME_STMT);
8177 tree startvar = fd.loops[dim].v;
8178 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8179 if (POINTER_TYPE_P (type))
8180 t = fold_build_pointer_plus (n1, t);
8181 else
8182 t = fold_build2 (PLUS_EXPR, type, t, n1);
8183 t = fold_convert (type, t);
8184 t = force_gimple_operand_gsi (&gsi, t,
8185 DECL_P (startvar)
8186 && TREE_ADDRESSABLE (startvar),
8187 NULL_TREE, true, GSI_SAME_STMT);
8188 gassign *assign_stmt = gimple_build_assign (startvar, t);
8189 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8191 /* Remove the omp for statement. */
8192 gsi = gsi_last_nondebug_bb (kfor->entry);
8193 gsi_remove (&gsi, true);
8195 /* Remove the GIMPLE_OMP_CONTINUE statement. */
8196 gsi = gsi_last_nondebug_bb (kfor->cont);
8197 gcc_assert (!gsi_end_p (gsi)
8198 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8199 gsi_remove (&gsi, true);
8201 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
8202 gsi = gsi_last_nondebug_bb (kfor->exit);
8203 gcc_assert (!gsi_end_p (gsi)
8204 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8205 if (intra_group)
8206 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8207 gsi_remove (&gsi, true);
8209 /* Fixup the much simpler CFG. */
8210 remove_edge (find_edge (kfor->cont, body_bb));
8212 if (kfor->cont != body_bb)
8213 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8214 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8217 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8218 argument_decls. */
8220 struct grid_arg_decl_map
8222 tree old_arg;
8223 tree new_arg;
8226 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8227 pertaining to kernel function. */
8229 static tree
8230 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8232 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8233 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8234 tree t = *tp;
8236 if (t == adm->old_arg)
8237 *tp = adm->new_arg;
8238 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8239 return NULL_TREE;
8242 /* If TARGET region contains a kernel body for loop, remove its region from the
8243 TARGET and expand it in HSA gridified kernel fashion. */
8245 static void
8246 grid_expand_target_grid_body (struct omp_region *target)
8248 if (!hsa_gen_requested_p ())
8249 return;
8251 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8252 struct omp_region **pp;
8254 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8255 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8256 break;
8258 struct omp_region *gpukernel = *pp;
8260 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8261 if (!gpukernel)
8263 /* HSA cannot handle OACC stuff. */
8264 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8265 return;
8266 gcc_checking_assert (orig_child_fndecl);
8267 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8268 OMP_CLAUSE__GRIDDIM_));
8269 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8271 hsa_register_kernel (n);
8272 return;
8275 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8276 OMP_CLAUSE__GRIDDIM_));
8277 tree inside_block
8278 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8279 *pp = gpukernel->next;
8280 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8281 if ((*pp)->type == GIMPLE_OMP_FOR)
8282 break;
8284 struct omp_region *kfor = *pp;
8285 gcc_assert (kfor);
8286 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8287 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8288 *pp = kfor->next;
8289 if (kfor->inner)
8291 if (gimple_omp_for_grid_group_iter (for_stmt))
8293 struct omp_region **next_pp;
8294 for (pp = &kfor->inner; *pp; pp = next_pp)
8296 next_pp = &(*pp)->next;
8297 if ((*pp)->type != GIMPLE_OMP_FOR)
8298 continue;
8299 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8300 gcc_assert (gimple_omp_for_kind (inner)
8301 == GF_OMP_FOR_KIND_GRID_LOOP);
8302 grid_expand_omp_for_loop (*pp, true);
8303 *pp = (*pp)->next;
8304 next_pp = pp;
8307 expand_omp (kfor->inner);
8309 if (gpukernel->inner)
8310 expand_omp (gpukernel->inner);
8312 tree kern_fndecl = copy_node (orig_child_fndecl);
8313 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8314 "kernel");
8315 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8316 tree tgtblock = gimple_block (tgt_stmt);
8317 tree fniniblock = make_node (BLOCK);
8318 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8319 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8320 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8321 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8322 DECL_INITIAL (kern_fndecl) = fniniblock;
8323 push_struct_function (kern_fndecl);
8324 cfun->function_end_locus = gimple_location (tgt_stmt);
8325 init_tree_ssa (cfun);
8326 pop_cfun ();
8328 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8329 gcc_assert (!DECL_CHAIN (old_parm_decl));
8330 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8331 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8332 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8333 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8334 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8335 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8336 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8337 kern_cfun->curr_properties = cfun->curr_properties;
8339 grid_expand_omp_for_loop (kfor, false);
8341 /* Remove the omp for statement. */
8342 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8343 gsi_remove (&gsi, true);
8344 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8345 return. */
8346 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8347 gcc_assert (!gsi_end_p (gsi)
8348 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8349 gimple *ret_stmt = gimple_build_return (NULL);
8350 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8351 gsi_remove (&gsi, true);
8353 /* Statements in the first BB in the target construct have been produced by
8354 target lowering and must be copied inside the GPUKERNEL, with the two
8355 exceptions of the first OMP statement and the OMP_DATA assignment
8356 statement. */
8357 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8358 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8359 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8360 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8361 !gsi_end_p (tsi); gsi_next (&tsi))
8363 gimple *stmt = gsi_stmt (tsi);
8364 if (is_gimple_omp (stmt))
8365 break;
8366 if (sender
8367 && is_gimple_assign (stmt)
8368 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8369 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8370 continue;
8371 gimple *copy = gimple_copy (stmt);
8372 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8373 gimple_set_block (copy, fniniblock);
8376 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8377 gpukernel->exit, inside_block);
8379 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8380 kcn->mark_force_output ();
8381 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8383 hsa_register_kernel (kcn, orig_child);
8385 cgraph_node::add_new_function (kern_fndecl, true);
8386 push_cfun (kern_cfun);
8387 cgraph_edge::rebuild_edges ();
8389 /* Re-map any mention of the PARM_DECL of the original function to the
8390 PARM_DECL of the new one.
8392 TODO: It would be great if lowering produced references into the GPU
8393 kernel decl straight away and we did not have to do this. */
8394 struct grid_arg_decl_map adm;
8395 adm.old_arg = old_parm_decl;
8396 adm.new_arg = new_parm_decl;
8397 basic_block bb;
8398 FOR_EACH_BB_FN (bb, kern_cfun)
8400 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8402 gimple *stmt = gsi_stmt (gsi);
8403 struct walk_stmt_info wi;
8404 memset (&wi, 0, sizeof (wi));
8405 wi.info = &adm;
8406 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8409 pop_cfun ();
8411 return;
8414 /* Expand the parallel region tree rooted at REGION. Expansion
8415 proceeds in depth-first order. Innermost regions are expanded
8416 first. This way, parallel regions that require a new function to
8417 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8418 internal dependencies in their body. */
8420 static void
8421 expand_omp (struct omp_region *region)
8423 omp_any_child_fn_dumped = false;
8424 while (region)
8426 location_t saved_location;
8427 gimple *inner_stmt = NULL;
8429 /* First, determine whether this is a combined parallel+workshare
8430 region. */
8431 if (region->type == GIMPLE_OMP_PARALLEL)
8432 determine_parallel_type (region);
8433 else if (region->type == GIMPLE_OMP_TARGET)
8434 grid_expand_target_grid_body (region);
8436 if (region->type == GIMPLE_OMP_FOR
8437 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8438 inner_stmt = last_stmt (region->inner->entry);
8440 if (region->inner)
8441 expand_omp (region->inner);
8443 saved_location = input_location;
8444 if (gimple_has_location (last_stmt (region->entry)))
8445 input_location = gimple_location (last_stmt (region->entry));
8447 switch (region->type)
8449 case GIMPLE_OMP_PARALLEL:
8450 case GIMPLE_OMP_TASK:
8451 expand_omp_taskreg (region);
8452 break;
8454 case GIMPLE_OMP_FOR:
8455 expand_omp_for (region, inner_stmt);
8456 break;
8458 case GIMPLE_OMP_SECTIONS:
8459 expand_omp_sections (region);
8460 break;
8462 case GIMPLE_OMP_SECTION:
8463 /* Individual omp sections are handled together with their
8464 parent GIMPLE_OMP_SECTIONS region. */
8465 break;
8467 case GIMPLE_OMP_SINGLE:
8468 expand_omp_single (region);
8469 break;
8471 case GIMPLE_OMP_ORDERED:
8473 gomp_ordered *ord_stmt
8474 = as_a <gomp_ordered *> (last_stmt (region->entry));
8475 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8476 OMP_CLAUSE_DEPEND))
8478 /* We'll expand these when expanding corresponding
8479 worksharing region with ordered(n) clause. */
8480 gcc_assert (region->outer
8481 && region->outer->type == GIMPLE_OMP_FOR);
8482 region->ord_stmt = ord_stmt;
8483 break;
8486 /* FALLTHRU */
8487 case GIMPLE_OMP_MASTER:
8488 case GIMPLE_OMP_TASKGROUP:
8489 case GIMPLE_OMP_CRITICAL:
8490 case GIMPLE_OMP_TEAMS:
8491 expand_omp_synch (region);
8492 break;
8494 case GIMPLE_OMP_ATOMIC_LOAD:
8495 expand_omp_atomic (region);
8496 break;
8498 case GIMPLE_OMP_TARGET:
8499 expand_omp_target (region);
8500 break;
8502 default:
8503 gcc_unreachable ();
8506 input_location = saved_location;
8507 region = region->next;
8509 if (omp_any_child_fn_dumped)
8511 if (dump_file)
8512 dump_function_header (dump_file, current_function_decl, dump_flags);
8513 omp_any_child_fn_dumped = false;
8517 /* Helper for build_omp_regions. Scan the dominator tree starting at
8518 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8519 true, the function ends once a single tree is built (otherwise, whole
8520 forest of OMP constructs may be built). */
8522 static void
8523 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8524 bool single_tree)
8526 gimple_stmt_iterator gsi;
8527 gimple *stmt;
8528 basic_block son;
8530 gsi = gsi_last_nondebug_bb (bb);
8531 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8533 struct omp_region *region;
8534 enum gimple_code code;
8536 stmt = gsi_stmt (gsi);
8537 code = gimple_code (stmt);
8538 if (code == GIMPLE_OMP_RETURN)
8540 /* STMT is the return point out of region PARENT. Mark it
8541 as the exit point and make PARENT the immediately
8542 enclosing region. */
8543 gcc_assert (parent);
8544 region = parent;
8545 region->exit = bb;
8546 parent = parent->outer;
8548 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8550 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8551 GIMPLE_OMP_RETURN, but matches with
8552 GIMPLE_OMP_ATOMIC_LOAD. */
8553 gcc_assert (parent);
8554 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8555 region = parent;
8556 region->exit = bb;
8557 parent = parent->outer;
8559 else if (code == GIMPLE_OMP_CONTINUE)
8561 gcc_assert (parent);
8562 parent->cont = bb;
8564 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8566 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8567 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8569 else
8571 region = new_omp_region (bb, code, parent);
8572 /* Otherwise... */
8573 if (code == GIMPLE_OMP_TARGET)
8575 switch (gimple_omp_target_kind (stmt))
8577 case GF_OMP_TARGET_KIND_REGION:
8578 case GF_OMP_TARGET_KIND_DATA:
8579 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8580 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8581 case GF_OMP_TARGET_KIND_OACC_DATA:
8582 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8583 break;
8584 case GF_OMP_TARGET_KIND_UPDATE:
8585 case GF_OMP_TARGET_KIND_ENTER_DATA:
8586 case GF_OMP_TARGET_KIND_EXIT_DATA:
8587 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8588 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8589 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8590 /* ..., other than for those stand-alone directives... */
8591 region = NULL;
8592 break;
8593 default:
8594 gcc_unreachable ();
8597 else if (code == GIMPLE_OMP_ORDERED
8598 && omp_find_clause (gimple_omp_ordered_clauses
8599 (as_a <gomp_ordered *> (stmt)),
8600 OMP_CLAUSE_DEPEND))
8601 /* #pragma omp ordered depend is also just a stand-alone
8602 directive. */
8603 region = NULL;
8604 else if (code == GIMPLE_OMP_TASK
8605 && gimple_omp_task_taskwait_p (stmt))
8606 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8607 region = NULL;
8608 /* ..., this directive becomes the parent for a new region. */
8609 if (region)
8610 parent = region;
8614 if (single_tree && !parent)
8615 return;
8617 for (son = first_dom_son (CDI_DOMINATORS, bb);
8618 son;
8619 son = next_dom_son (CDI_DOMINATORS, son))
8620 build_omp_regions_1 (son, parent, single_tree);
8623 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8624 root_omp_region. */
8626 static void
8627 build_omp_regions_root (basic_block root)
8629 gcc_assert (root_omp_region == NULL);
8630 build_omp_regions_1 (root, NULL, true);
8631 gcc_assert (root_omp_region != NULL);
8634 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8636 void
8637 omp_expand_local (basic_block head)
8639 build_omp_regions_root (head);
8640 if (dump_file && (dump_flags & TDF_DETAILS))
8642 fprintf (dump_file, "\nOMP region tree\n\n");
8643 dump_omp_region (dump_file, root_omp_region, 0);
8644 fprintf (dump_file, "\n");
8647 remove_exit_barriers (root_omp_region);
8648 expand_omp (root_omp_region);
8650 omp_free_regions ();
8653 /* Scan the CFG and build a tree of OMP regions. Return the root of
8654 the OMP region tree. */
8656 static void
8657 build_omp_regions (void)
8659 gcc_assert (root_omp_region == NULL);
8660 calculate_dominance_info (CDI_DOMINATORS);
8661 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8664 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8666 static unsigned int
8667 execute_expand_omp (void)
8669 build_omp_regions ();
8671 if (!root_omp_region)
8672 return 0;
8674 if (dump_file)
8676 fprintf (dump_file, "\nOMP region tree\n\n");
8677 dump_omp_region (dump_file, root_omp_region, 0);
8678 fprintf (dump_file, "\n");
8681 remove_exit_barriers (root_omp_region);
8683 expand_omp (root_omp_region);
8685 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8686 verify_loop_structure ();
8687 cleanup_tree_cfg ();
8689 omp_free_regions ();
8691 return 0;
8694 /* OMP expansion -- the default pass, run before creation of SSA form. */
8696 namespace {
8698 const pass_data pass_data_expand_omp =
8700 GIMPLE_PASS, /* type */
8701 "ompexp", /* name */
8702 OPTGROUP_OMP, /* optinfo_flags */
8703 TV_NONE, /* tv_id */
8704 PROP_gimple_any, /* properties_required */
8705 PROP_gimple_eomp, /* properties_provided */
8706 0, /* properties_destroyed */
8707 0, /* todo_flags_start */
8708 0, /* todo_flags_finish */
8711 class pass_expand_omp : public gimple_opt_pass
8713 public:
8714 pass_expand_omp (gcc::context *ctxt)
8715 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8718 /* opt_pass methods: */
8719 virtual unsigned int execute (function *)
8721 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8722 || flag_openmp_simd != 0)
8723 && !seen_error ());
8725 /* This pass always runs, to provide PROP_gimple_eomp.
8726 But often, there is nothing to do. */
8727 if (!gate)
8728 return 0;
8730 return execute_expand_omp ();
8733 }; // class pass_expand_omp
8735 } // anon namespace
8737 gimple_opt_pass *
8738 make_pass_expand_omp (gcc::context *ctxt)
8740 return new pass_expand_omp (ctxt);
8743 namespace {
8745 const pass_data pass_data_expand_omp_ssa =
8747 GIMPLE_PASS, /* type */
8748 "ompexpssa", /* name */
8749 OPTGROUP_OMP, /* optinfo_flags */
8750 TV_NONE, /* tv_id */
8751 PROP_cfg | PROP_ssa, /* properties_required */
8752 PROP_gimple_eomp, /* properties_provided */
8753 0, /* properties_destroyed */
8754 0, /* todo_flags_start */
8755 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8758 class pass_expand_omp_ssa : public gimple_opt_pass
8760 public:
8761 pass_expand_omp_ssa (gcc::context *ctxt)
8762 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8765 /* opt_pass methods: */
8766 virtual bool gate (function *fun)
8768 return !(fun->curr_properties & PROP_gimple_eomp);
8770 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8771 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8773 }; // class pass_expand_omp_ssa
8775 } // anon namespace
8777 gimple_opt_pass *
8778 make_pass_expand_omp_ssa (gcc::context *ctxt)
8780 return new pass_expand_omp_ssa (ctxt);
8783 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8784 GIMPLE_* codes. */
8786 bool
8787 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8788 int *region_idx)
8790 gimple *last = last_stmt (bb);
8791 enum gimple_code code = gimple_code (last);
8792 struct omp_region *cur_region = *region;
8793 bool fallthru = false;
8795 switch (code)
8797 case GIMPLE_OMP_PARALLEL:
8798 case GIMPLE_OMP_FOR:
8799 case GIMPLE_OMP_SINGLE:
8800 case GIMPLE_OMP_TEAMS:
8801 case GIMPLE_OMP_MASTER:
8802 case GIMPLE_OMP_TASKGROUP:
8803 case GIMPLE_OMP_CRITICAL:
8804 case GIMPLE_OMP_SECTION:
8805 case GIMPLE_OMP_GRID_BODY:
8806 cur_region = new_omp_region (bb, code, cur_region);
8807 fallthru = true;
8808 break;
8810 case GIMPLE_OMP_TASK:
8811 cur_region = new_omp_region (bb, code, cur_region);
8812 fallthru = true;
8813 if (gimple_omp_task_taskwait_p (last))
8814 cur_region = cur_region->outer;
8815 break;
8817 case GIMPLE_OMP_ORDERED:
8818 cur_region = new_omp_region (bb, code, cur_region);
8819 fallthru = true;
8820 if (omp_find_clause (gimple_omp_ordered_clauses
8821 (as_a <gomp_ordered *> (last)),
8822 OMP_CLAUSE_DEPEND))
8823 cur_region = cur_region->outer;
8824 break;
8826 case GIMPLE_OMP_TARGET:
8827 cur_region = new_omp_region (bb, code, cur_region);
8828 fallthru = true;
8829 switch (gimple_omp_target_kind (last))
8831 case GF_OMP_TARGET_KIND_REGION:
8832 case GF_OMP_TARGET_KIND_DATA:
8833 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8834 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8835 case GF_OMP_TARGET_KIND_OACC_DATA:
8836 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8837 break;
8838 case GF_OMP_TARGET_KIND_UPDATE:
8839 case GF_OMP_TARGET_KIND_ENTER_DATA:
8840 case GF_OMP_TARGET_KIND_EXIT_DATA:
8841 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8842 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8843 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8844 cur_region = cur_region->outer;
8845 break;
8846 default:
8847 gcc_unreachable ();
8849 break;
8851 case GIMPLE_OMP_SECTIONS:
8852 cur_region = new_omp_region (bb, code, cur_region);
8853 fallthru = true;
8854 break;
8856 case GIMPLE_OMP_SECTIONS_SWITCH:
8857 fallthru = false;
8858 break;
8860 case GIMPLE_OMP_ATOMIC_LOAD:
8861 case GIMPLE_OMP_ATOMIC_STORE:
8862 fallthru = true;
8863 break;
8865 case GIMPLE_OMP_RETURN:
8866 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8867 somewhere other than the next block. This will be
8868 created later. */
8869 cur_region->exit = bb;
8870 if (cur_region->type == GIMPLE_OMP_TASK)
8871 /* Add an edge corresponding to not scheduling the task
8872 immediately. */
8873 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8874 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8875 cur_region = cur_region->outer;
8876 break;
8878 case GIMPLE_OMP_CONTINUE:
8879 cur_region->cont = bb;
8880 switch (cur_region->type)
8882 case GIMPLE_OMP_FOR:
8883 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8884 succs edges as abnormal to prevent splitting
8885 them. */
8886 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8887 /* Make the loopback edge. */
8888 make_edge (bb, single_succ (cur_region->entry),
8889 EDGE_ABNORMAL);
8891 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8892 corresponds to the case that the body of the loop
8893 is not executed at all. */
8894 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8895 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8896 fallthru = false;
8897 break;
8899 case GIMPLE_OMP_SECTIONS:
8900 /* Wire up the edges into and out of the nested sections. */
8902 basic_block switch_bb = single_succ (cur_region->entry);
8904 struct omp_region *i;
8905 for (i = cur_region->inner; i ; i = i->next)
8907 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8908 make_edge (switch_bb, i->entry, 0);
8909 make_edge (i->exit, bb, EDGE_FALLTHRU);
8912 /* Make the loopback edge to the block with
8913 GIMPLE_OMP_SECTIONS_SWITCH. */
8914 make_edge (bb, switch_bb, 0);
8916 /* Make the edge from the switch to exit. */
8917 make_edge (switch_bb, bb->next_bb, 0);
8918 fallthru = false;
8920 break;
8922 case GIMPLE_OMP_TASK:
8923 fallthru = true;
8924 break;
8926 default:
8927 gcc_unreachable ();
8929 break;
8931 default:
8932 gcc_unreachable ();
8935 if (*region != cur_region)
8937 *region = cur_region;
8938 if (cur_region)
8939 *region_idx = cur_region->entry->index;
8940 else
8941 *region_idx = 0;
8944 return fallthru;
8947 #include "gt-omp-expand.h"