Add GCC support to ENQCMD.
[official-gcc.git] / gcc / omp-expand.c
blob038781c918ccd9b902c85461255b59a8e2b41c9b
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2019 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
66 struct omp_region
68 /* The enclosing region. */
69 struct omp_region *outer;
71 /* First child region. */
72 struct omp_region *inner;
74 /* Next peer region. */
75 struct omp_region *next;
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
116 /* Return true if REGION is a combined parallel+workshare region. */
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
121 return region->is_combined_parallel;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
138 Is lowered into:
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
197 return true;
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
206 if (!simd_schedule || integer_zerop (chunk_size))
207 return chunk_size;
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
211 return chunk_size;
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
233 struct omp_for_data fd;
234 tree n1, n2;
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
240 if (gimple_omp_for_combined_into_p (for_stmt))
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
264 if (fd.chunk_size)
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
271 return ws_args;
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
285 gcc_unreachable ();
288 /* Discover whether REGION is a combined parallel+workshare region. */
290 static void
291 determine_parallel_type (struct omp_region *region)
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
331 if (region->inner->type == GIMPLE_OMP_FOR)
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
349 || omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
350 return;
352 else if (region->inner->type == GIMPLE_OMP_SECTIONS
353 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
354 OMP_CLAUSE__REDUCTEMP_))
355 return;
357 region->is_combined_parallel = true;
358 region->inner->is_combined_parallel = true;
359 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
363 /* Debugging dumps for parallel regions. */
364 void dump_omp_region (FILE *, struct omp_region *, int);
365 void debug_omp_region (struct omp_region *);
366 void debug_all_omp_regions (void);
368 /* Dump the parallel region tree rooted at REGION. */
370 void
371 dump_omp_region (FILE *file, struct omp_region *region, int indent)
373 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
374 gimple_code_name[region->type]);
376 if (region->inner)
377 dump_omp_region (file, region->inner, indent + 4);
379 if (region->cont)
381 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
382 region->cont->index);
385 if (region->exit)
386 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
387 region->exit->index);
388 else
389 fprintf (file, "%*s[no exit marker]\n", indent, "");
391 if (region->next)
392 dump_omp_region (file, region->next, indent);
395 DEBUG_FUNCTION void
396 debug_omp_region (struct omp_region *region)
398 dump_omp_region (stderr, region, 0);
401 DEBUG_FUNCTION void
402 debug_all_omp_regions (void)
404 dump_omp_region (stderr, root_omp_region, 0);
407 /* Create a new parallel region starting at STMT inside region PARENT. */
409 static struct omp_region *
410 new_omp_region (basic_block bb, enum gimple_code type,
411 struct omp_region *parent)
413 struct omp_region *region = XCNEW (struct omp_region);
415 region->outer = parent;
416 region->entry = bb;
417 region->type = type;
419 if (parent)
421 /* This is a nested region. Add it to the list of inner
422 regions in PARENT. */
423 region->next = parent->inner;
424 parent->inner = region;
426 else
428 /* This is a toplevel region. Add it to the list of toplevel
429 regions in ROOT_OMP_REGION. */
430 region->next = root_omp_region;
431 root_omp_region = region;
434 return region;
437 /* Release the memory associated with the region tree rooted at REGION. */
439 static void
440 free_omp_region_1 (struct omp_region *region)
442 struct omp_region *i, *n;
444 for (i = region->inner; i ; i = n)
446 n = i->next;
447 free_omp_region_1 (i);
450 free (region);
453 /* Release the memory for the entire omp region tree. */
455 void
456 omp_free_regions (void)
458 struct omp_region *r, *n;
459 for (r = root_omp_region; r ; r = n)
461 n = r->next;
462 free_omp_region_1 (r);
464 root_omp_region = NULL;
467 /* A convenience function to build an empty GIMPLE_COND with just the
468 condition. */
470 static gcond *
471 gimple_build_cond_empty (tree cond)
473 enum tree_code pred_code;
474 tree lhs, rhs;
476 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
477 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
480 /* Return true if a parallel REGION is within a declare target function or
481 within a target region and is not a part of a gridified target. */
483 static bool
484 parallel_needs_hsa_kernel_p (struct omp_region *region)
486 bool indirect = false;
487 for (region = region->outer; region; region = region->outer)
489 if (region->type == GIMPLE_OMP_PARALLEL)
490 indirect = true;
491 else if (region->type == GIMPLE_OMP_TARGET)
493 gomp_target *tgt_stmt
494 = as_a <gomp_target *> (last_stmt (region->entry));
496 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
497 OMP_CLAUSE__GRIDDIM_))
498 return indirect;
499 else
500 return true;
504 if (lookup_attribute ("omp declare target",
505 DECL_ATTRIBUTES (current_function_decl)))
506 return true;
508 return false;
511 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
512 Add CHILD_FNDECL to decl chain of the supercontext of the block
513 ENTRY_BLOCK - this is the block which originally contained the
514 code from which CHILD_FNDECL was created.
516 Together, these actions ensure that the debug info for the outlined
517 function will be emitted with the correct lexical scope. */
519 static void
520 adjust_context_and_scope (struct omp_region *region, tree entry_block,
521 tree child_fndecl)
523 tree parent_fndecl = NULL_TREE;
524 gimple *entry_stmt;
525 /* OMP expansion expands inner regions before outer ones, so if
526 we e.g. have explicit task region nested in parallel region, when
527 expanding the task region current_function_decl will be the original
528 source function, but we actually want to use as context the child
529 function of the parallel. */
530 for (region = region->outer;
531 region && parent_fndecl == NULL_TREE; region = region->outer)
532 switch (region->type)
534 case GIMPLE_OMP_PARALLEL:
535 case GIMPLE_OMP_TASK:
536 case GIMPLE_OMP_TEAMS:
537 entry_stmt = last_stmt (region->entry);
538 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
539 break;
540 case GIMPLE_OMP_TARGET:
541 entry_stmt = last_stmt (region->entry);
542 parent_fndecl
543 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
544 break;
545 default:
546 break;
549 if (parent_fndecl == NULL_TREE)
550 parent_fndecl = current_function_decl;
551 DECL_CONTEXT (child_fndecl) = parent_fndecl;
553 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
555 tree b = BLOCK_SUPERCONTEXT (entry_block);
556 if (TREE_CODE (b) == BLOCK)
558 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
559 BLOCK_VARS (b) = child_fndecl;
564 /* Build the function calls to GOMP_parallel etc to actually
565 generate the parallel operation. REGION is the parallel region
566 being expanded. BB is the block where to insert the code. WS_ARGS
567 will be set if this is a call to a combined parallel+workshare
568 construct, it contains the list of additional arguments needed by
569 the workshare construct. */
571 static void
572 expand_parallel_call (struct omp_region *region, basic_block bb,
573 gomp_parallel *entry_stmt,
574 vec<tree, va_gc> *ws_args)
576 tree t, t1, t2, val, cond, c, clauses, flags;
577 gimple_stmt_iterator gsi;
578 gimple *stmt;
579 enum built_in_function start_ix;
580 int start_ix2;
581 location_t clause_loc;
582 vec<tree, va_gc> *args;
584 clauses = gimple_omp_parallel_clauses (entry_stmt);
586 /* Determine what flavor of GOMP_parallel we will be
587 emitting. */
588 start_ix = BUILT_IN_GOMP_PARALLEL;
589 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
590 if (rtmp)
591 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
592 else if (is_combined_parallel (region))
594 switch (region->inner->type)
596 case GIMPLE_OMP_FOR:
597 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
598 switch (region->inner->sched_kind)
600 case OMP_CLAUSE_SCHEDULE_RUNTIME:
601 if ((region->inner->sched_modifiers
602 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
603 start_ix2 = 6;
604 else if ((region->inner->sched_modifiers
605 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
606 start_ix2 = 7;
607 else
608 start_ix2 = 3;
609 break;
610 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
611 case OMP_CLAUSE_SCHEDULE_GUIDED:
612 if ((region->inner->sched_modifiers
613 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
615 start_ix2 = 3 + region->inner->sched_kind;
616 break;
618 /* FALLTHRU */
619 default:
620 start_ix2 = region->inner->sched_kind;
621 break;
623 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
624 start_ix = (enum built_in_function) start_ix2;
625 break;
626 case GIMPLE_OMP_SECTIONS:
627 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
628 break;
629 default:
630 gcc_unreachable ();
634 /* By default, the value of NUM_THREADS is zero (selected at run time)
635 and there is no conditional. */
636 cond = NULL_TREE;
637 val = build_int_cst (unsigned_type_node, 0);
638 flags = build_int_cst (unsigned_type_node, 0);
640 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
641 if (c)
642 cond = OMP_CLAUSE_IF_EXPR (c);
644 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
645 if (c)
647 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
648 clause_loc = OMP_CLAUSE_LOCATION (c);
650 else
651 clause_loc = gimple_location (entry_stmt);
653 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
654 if (c)
655 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
657 /* Ensure 'val' is of the correct type. */
658 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
660 /* If we found the clause 'if (cond)', build either
661 (cond != 0) or (cond ? val : 1u). */
662 if (cond)
664 cond = gimple_boolify (cond);
666 if (integer_zerop (val))
667 val = fold_build2_loc (clause_loc,
668 EQ_EXPR, unsigned_type_node, cond,
669 build_int_cst (TREE_TYPE (cond), 0));
670 else
672 basic_block cond_bb, then_bb, else_bb;
673 edge e, e_then, e_else;
674 tree tmp_then, tmp_else, tmp_join, tmp_var;
676 tmp_var = create_tmp_var (TREE_TYPE (val));
677 if (gimple_in_ssa_p (cfun))
679 tmp_then = make_ssa_name (tmp_var);
680 tmp_else = make_ssa_name (tmp_var);
681 tmp_join = make_ssa_name (tmp_var);
683 else
685 tmp_then = tmp_var;
686 tmp_else = tmp_var;
687 tmp_join = tmp_var;
690 e = split_block_after_labels (bb);
691 cond_bb = e->src;
692 bb = e->dest;
693 remove_edge (e);
695 then_bb = create_empty_bb (cond_bb);
696 else_bb = create_empty_bb (then_bb);
697 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
698 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
700 stmt = gimple_build_cond_empty (cond);
701 gsi = gsi_start_bb (cond_bb);
702 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
704 gsi = gsi_start_bb (then_bb);
705 expand_omp_build_assign (&gsi, tmp_then, val, true);
707 gsi = gsi_start_bb (else_bb);
708 expand_omp_build_assign (&gsi, tmp_else,
709 build_int_cst (unsigned_type_node, 1),
710 true);
712 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
713 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
714 add_bb_to_loop (then_bb, cond_bb->loop_father);
715 add_bb_to_loop (else_bb, cond_bb->loop_father);
716 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
717 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
719 if (gimple_in_ssa_p (cfun))
721 gphi *phi = create_phi_node (tmp_join, bb);
722 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
723 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
726 val = tmp_join;
729 gsi = gsi_start_bb (bb);
730 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
731 false, GSI_CONTINUE_LINKING);
734 gsi = gsi_last_nondebug_bb (bb);
735 t = gimple_omp_parallel_data_arg (entry_stmt);
736 if (t == NULL)
737 t1 = null_pointer_node;
738 else
739 t1 = build_fold_addr_expr (t);
740 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
741 t2 = build_fold_addr_expr (child_fndecl);
743 vec_alloc (args, 4 + vec_safe_length (ws_args));
744 args->quick_push (t2);
745 args->quick_push (t1);
746 args->quick_push (val);
747 if (ws_args)
748 args->splice (*ws_args);
749 args->quick_push (flags);
751 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
752 builtin_decl_explicit (start_ix), args);
754 if (rtmp)
756 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
757 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
758 fold_convert (type,
759 fold_convert (pointer_sized_int_node, t)));
761 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
762 false, GSI_CONTINUE_LINKING);
764 if (hsa_gen_requested_p ()
765 && parallel_needs_hsa_kernel_p (region))
767 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
768 hsa_register_kernel (child_cnode);
772 /* Build the function call to GOMP_task to actually
773 generate the task operation. BB is the block where to insert the code. */
775 static void
776 expand_task_call (struct omp_region *region, basic_block bb,
777 gomp_task *entry_stmt)
779 tree t1, t2, t3;
780 gimple_stmt_iterator gsi;
781 location_t loc = gimple_location (entry_stmt);
783 tree clauses = gimple_omp_task_clauses (entry_stmt);
785 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
786 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
787 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
788 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
789 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
790 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
792 unsigned int iflags
793 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
794 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
795 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
797 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
798 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
799 tree num_tasks = NULL_TREE;
800 bool ull = false;
801 if (taskloop_p)
803 gimple *g = last_stmt (region->outer->entry);
804 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
805 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
806 struct omp_for_data fd;
807 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
808 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
809 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
810 OMP_CLAUSE__LOOPTEMP_);
811 startvar = OMP_CLAUSE_DECL (startvar);
812 endvar = OMP_CLAUSE_DECL (endvar);
813 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
814 if (fd.loop.cond_code == LT_EXPR)
815 iflags |= GOMP_TASK_FLAG_UP;
816 tree tclauses = gimple_omp_for_clauses (g);
817 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
818 if (num_tasks)
819 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
820 else
822 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
823 if (num_tasks)
825 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
826 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
828 else
829 num_tasks = integer_zero_node;
831 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
832 if (ifc == NULL_TREE)
833 iflags |= GOMP_TASK_FLAG_IF;
834 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
835 iflags |= GOMP_TASK_FLAG_NOGROUP;
836 ull = fd.iter_type == long_long_unsigned_type_node;
837 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
838 iflags |= GOMP_TASK_FLAG_REDUCTION;
840 else if (priority)
841 iflags |= GOMP_TASK_FLAG_PRIORITY;
843 tree flags = build_int_cst (unsigned_type_node, iflags);
845 tree cond = boolean_true_node;
846 if (ifc)
848 if (taskloop_p)
850 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
851 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
852 build_int_cst (unsigned_type_node,
853 GOMP_TASK_FLAG_IF),
854 build_int_cst (unsigned_type_node, 0));
855 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
856 flags, t);
858 else
859 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
862 if (finalc)
864 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
865 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
866 build_int_cst (unsigned_type_node,
867 GOMP_TASK_FLAG_FINAL),
868 build_int_cst (unsigned_type_node, 0));
869 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
871 if (depend)
872 depend = OMP_CLAUSE_DECL (depend);
873 else
874 depend = build_int_cst (ptr_type_node, 0);
875 if (priority)
876 priority = fold_convert (integer_type_node,
877 OMP_CLAUSE_PRIORITY_EXPR (priority));
878 else
879 priority = integer_zero_node;
881 gsi = gsi_last_nondebug_bb (bb);
882 tree t = gimple_omp_task_data_arg (entry_stmt);
883 if (t == NULL)
884 t2 = null_pointer_node;
885 else
886 t2 = build_fold_addr_expr_loc (loc, t);
887 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
888 t = gimple_omp_task_copy_fn (entry_stmt);
889 if (t == NULL)
890 t3 = null_pointer_node;
891 else
892 t3 = build_fold_addr_expr_loc (loc, t);
894 if (taskloop_p)
895 t = build_call_expr (ull
896 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
897 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
898 11, t1, t2, t3,
899 gimple_omp_task_arg_size (entry_stmt),
900 gimple_omp_task_arg_align (entry_stmt), flags,
901 num_tasks, priority, startvar, endvar, step);
902 else
903 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
904 9, t1, t2, t3,
905 gimple_omp_task_arg_size (entry_stmt),
906 gimple_omp_task_arg_align (entry_stmt), cond, flags,
907 depend, priority);
909 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
910 false, GSI_CONTINUE_LINKING);
913 /* Build the function call to GOMP_taskwait_depend to actually
914 generate the taskwait operation. BB is the block where to insert the
915 code. */
917 static void
918 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
920 tree clauses = gimple_omp_task_clauses (entry_stmt);
921 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
922 if (depend == NULL_TREE)
923 return;
925 depend = OMP_CLAUSE_DECL (depend);
927 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
928 tree t
929 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
930 1, depend);
932 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
933 false, GSI_CONTINUE_LINKING);
936 /* Build the function call to GOMP_teams_reg to actually
937 generate the host teams operation. REGION is the teams region
938 being expanded. BB is the block where to insert the code. */
940 static void
941 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
943 tree clauses = gimple_omp_teams_clauses (entry_stmt);
944 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
945 if (num_teams == NULL_TREE)
946 num_teams = build_int_cst (unsigned_type_node, 0);
947 else
949 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
950 num_teams = fold_convert (unsigned_type_node, num_teams);
952 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
953 if (thread_limit == NULL_TREE)
954 thread_limit = build_int_cst (unsigned_type_node, 0);
955 else
957 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
958 thread_limit = fold_convert (unsigned_type_node, thread_limit);
961 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
962 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
963 if (t == NULL)
964 t1 = null_pointer_node;
965 else
966 t1 = build_fold_addr_expr (t);
967 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
968 tree t2 = build_fold_addr_expr (child_fndecl);
970 vec<tree, va_gc> *args;
971 vec_alloc (args, 5);
972 args->quick_push (t2);
973 args->quick_push (t1);
974 args->quick_push (num_teams);
975 args->quick_push (thread_limit);
976 /* For future extensibility. */
977 args->quick_push (build_zero_cst (unsigned_type_node));
979 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
980 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
981 args);
983 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
984 false, GSI_CONTINUE_LINKING);
987 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
989 static tree
990 vec2chain (vec<tree, va_gc> *v)
992 tree chain = NULL_TREE, t;
993 unsigned ix;
995 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
997 DECL_CHAIN (t) = chain;
998 chain = t;
1001 return chain;
1004 /* Remove barriers in REGION->EXIT's block. Note that this is only
1005 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1006 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1007 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1008 removed. */
1010 static void
1011 remove_exit_barrier (struct omp_region *region)
1013 gimple_stmt_iterator gsi;
1014 basic_block exit_bb;
1015 edge_iterator ei;
1016 edge e;
1017 gimple *stmt;
1018 int any_addressable_vars = -1;
1020 exit_bb = region->exit;
1022 /* If the parallel region doesn't return, we don't have REGION->EXIT
1023 block at all. */
1024 if (! exit_bb)
1025 return;
1027 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1028 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1029 statements that can appear in between are extremely limited -- no
1030 memory operations at all. Here, we allow nothing at all, so the
1031 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1032 gsi = gsi_last_nondebug_bb (exit_bb);
1033 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1034 gsi_prev_nondebug (&gsi);
1035 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1036 return;
1038 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1040 gsi = gsi_last_nondebug_bb (e->src);
1041 if (gsi_end_p (gsi))
1042 continue;
1043 stmt = gsi_stmt (gsi);
1044 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1045 && !gimple_omp_return_nowait_p (stmt))
1047 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1048 in many cases. If there could be tasks queued, the barrier
1049 might be needed to let the tasks run before some local
1050 variable of the parallel that the task uses as shared
1051 runs out of scope. The task can be spawned either
1052 from within current function (this would be easy to check)
1053 or from some function it calls and gets passed an address
1054 of such a variable. */
1055 if (any_addressable_vars < 0)
1057 gomp_parallel *parallel_stmt
1058 = as_a <gomp_parallel *> (last_stmt (region->entry));
1059 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1060 tree local_decls, block, decl;
1061 unsigned ix;
1063 any_addressable_vars = 0;
1064 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1065 if (TREE_ADDRESSABLE (decl))
1067 any_addressable_vars = 1;
1068 break;
1070 for (block = gimple_block (stmt);
1071 !any_addressable_vars
1072 && block
1073 && TREE_CODE (block) == BLOCK;
1074 block = BLOCK_SUPERCONTEXT (block))
1076 for (local_decls = BLOCK_VARS (block);
1077 local_decls;
1078 local_decls = DECL_CHAIN (local_decls))
1079 if (TREE_ADDRESSABLE (local_decls))
1081 any_addressable_vars = 1;
1082 break;
1084 if (block == gimple_block (parallel_stmt))
1085 break;
1088 if (!any_addressable_vars)
1089 gimple_omp_return_set_nowait (stmt);
1094 static void
1095 remove_exit_barriers (struct omp_region *region)
1097 if (region->type == GIMPLE_OMP_PARALLEL)
1098 remove_exit_barrier (region);
1100 if (region->inner)
1102 region = region->inner;
1103 remove_exit_barriers (region);
1104 while (region->next)
1106 region = region->next;
1107 remove_exit_barriers (region);
1112 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1113 calls. These can't be declared as const functions, but
1114 within one parallel body they are constant, so they can be
1115 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1116 which are declared const. Similarly for task body, except
1117 that in untied task omp_get_thread_num () can change at any task
1118 scheduling point. */
1120 static void
1121 optimize_omp_library_calls (gimple *entry_stmt)
1123 basic_block bb;
1124 gimple_stmt_iterator gsi;
1125 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1126 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1127 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1128 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1129 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1130 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1131 OMP_CLAUSE_UNTIED) != NULL);
1133 FOR_EACH_BB_FN (bb, cfun)
1134 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1136 gimple *call = gsi_stmt (gsi);
1137 tree decl;
1139 if (is_gimple_call (call)
1140 && (decl = gimple_call_fndecl (call))
1141 && DECL_EXTERNAL (decl)
1142 && TREE_PUBLIC (decl)
1143 && DECL_INITIAL (decl) == NULL)
1145 tree built_in;
1147 if (DECL_NAME (decl) == thr_num_id)
1149 /* In #pragma omp task untied omp_get_thread_num () can change
1150 during the execution of the task region. */
1151 if (untied_task)
1152 continue;
1153 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1155 else if (DECL_NAME (decl) == num_thr_id)
1156 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1157 else
1158 continue;
1160 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1161 || gimple_call_num_args (call) != 0)
1162 continue;
1164 if (flag_exceptions && !TREE_NOTHROW (decl))
1165 continue;
1167 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1168 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1169 TREE_TYPE (TREE_TYPE (built_in))))
1170 continue;
1172 gimple_call_set_fndecl (call, built_in);
1177 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1178 regimplified. */
1180 static tree
1181 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1183 tree t = *tp;
1185 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1186 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1187 return t;
1189 if (TREE_CODE (t) == ADDR_EXPR)
1190 recompute_tree_invariant_for_addr_expr (t);
1192 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1193 return NULL_TREE;
1196 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1198 static void
1199 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1200 bool after)
1202 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1203 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1204 !after, after ? GSI_CONTINUE_LINKING
1205 : GSI_SAME_STMT);
1206 gimple *stmt = gimple_build_assign (to, from);
1207 if (after)
1208 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1209 else
1210 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1211 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1212 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1214 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1215 gimple_regimplify_operands (stmt, &gsi);
1219 /* Expand the OpenMP parallel or task directive starting at REGION. */
1221 static void
1222 expand_omp_taskreg (struct omp_region *region)
1224 basic_block entry_bb, exit_bb, new_bb;
1225 struct function *child_cfun;
1226 tree child_fn, block, t;
1227 gimple_stmt_iterator gsi;
1228 gimple *entry_stmt, *stmt;
1229 edge e;
1230 vec<tree, va_gc> *ws_args;
1232 entry_stmt = last_stmt (region->entry);
1233 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1234 && gimple_omp_task_taskwait_p (entry_stmt))
1236 new_bb = region->entry;
1237 gsi = gsi_last_nondebug_bb (region->entry);
1238 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1239 gsi_remove (&gsi, true);
1240 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1241 return;
1244 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1245 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1247 entry_bb = region->entry;
1248 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1249 exit_bb = region->cont;
1250 else
1251 exit_bb = region->exit;
1253 if (is_combined_parallel (region))
1254 ws_args = region->ws_args;
1255 else
1256 ws_args = NULL;
1258 if (child_cfun->cfg)
1260 /* Due to inlining, it may happen that we have already outlined
1261 the region, in which case all we need to do is make the
1262 sub-graph unreachable and emit the parallel call. */
1263 edge entry_succ_e, exit_succ_e;
1265 entry_succ_e = single_succ_edge (entry_bb);
1267 gsi = gsi_last_nondebug_bb (entry_bb);
1268 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1269 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1270 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1271 gsi_remove (&gsi, true);
1273 new_bb = entry_bb;
1274 if (exit_bb)
1276 exit_succ_e = single_succ_edge (exit_bb);
1277 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1279 remove_edge_and_dominated_blocks (entry_succ_e);
1281 else
1283 unsigned srcidx, dstidx, num;
1285 /* If the parallel region needs data sent from the parent
1286 function, then the very first statement (except possible
1287 tree profile counter updates) of the parallel body
1288 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1289 &.OMP_DATA_O is passed as an argument to the child function,
1290 we need to replace it with the argument as seen by the child
1291 function.
1293 In most cases, this will end up being the identity assignment
1294 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1295 a function call that has been inlined, the original PARM_DECL
1296 .OMP_DATA_I may have been converted into a different local
1297 variable. In which case, we need to keep the assignment. */
1298 if (gimple_omp_taskreg_data_arg (entry_stmt))
1300 basic_block entry_succ_bb
1301 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1302 : FALLTHRU_EDGE (entry_bb)->dest;
1303 tree arg;
1304 gimple *parcopy_stmt = NULL;
1306 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1308 gimple *stmt;
1310 gcc_assert (!gsi_end_p (gsi));
1311 stmt = gsi_stmt (gsi);
1312 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1313 continue;
1315 if (gimple_num_ops (stmt) == 2)
1317 tree arg = gimple_assign_rhs1 (stmt);
1319 /* We're ignore the subcode because we're
1320 effectively doing a STRIP_NOPS. */
1322 if (TREE_CODE (arg) == ADDR_EXPR
1323 && (TREE_OPERAND (arg, 0)
1324 == gimple_omp_taskreg_data_arg (entry_stmt)))
1326 parcopy_stmt = stmt;
1327 break;
1332 gcc_assert (parcopy_stmt != NULL);
1333 arg = DECL_ARGUMENTS (child_fn);
1335 if (!gimple_in_ssa_p (cfun))
1337 if (gimple_assign_lhs (parcopy_stmt) == arg)
1338 gsi_remove (&gsi, true);
1339 else
1341 /* ?? Is setting the subcode really necessary ?? */
1342 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1343 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1346 else
1348 tree lhs = gimple_assign_lhs (parcopy_stmt);
1349 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1350 /* We'd like to set the rhs to the default def in the child_fn,
1351 but it's too early to create ssa names in the child_fn.
1352 Instead, we set the rhs to the parm. In
1353 move_sese_region_to_fn, we introduce a default def for the
1354 parm, map the parm to it's default def, and once we encounter
1355 this stmt, replace the parm with the default def. */
1356 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1357 update_stmt (parcopy_stmt);
1361 /* Declare local variables needed in CHILD_CFUN. */
1362 block = DECL_INITIAL (child_fn);
1363 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1364 /* The gimplifier could record temporaries in parallel/task block
1365 rather than in containing function's local_decls chain,
1366 which would mean cgraph missed finalizing them. Do it now. */
1367 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1368 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1369 varpool_node::finalize_decl (t);
1370 DECL_SAVED_TREE (child_fn) = NULL;
1371 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1372 gimple_set_body (child_fn, NULL);
1373 TREE_USED (block) = 1;
1375 /* Reset DECL_CONTEXT on function arguments. */
1376 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1377 DECL_CONTEXT (t) = child_fn;
1379 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1380 so that it can be moved to the child function. */
1381 gsi = gsi_last_nondebug_bb (entry_bb);
1382 stmt = gsi_stmt (gsi);
1383 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1384 || gimple_code (stmt) == GIMPLE_OMP_TASK
1385 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1386 e = split_block (entry_bb, stmt);
1387 gsi_remove (&gsi, true);
1388 entry_bb = e->dest;
1389 edge e2 = NULL;
1390 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1391 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1392 else
1394 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1395 gcc_assert (e2->dest == region->exit);
1396 remove_edge (BRANCH_EDGE (entry_bb));
1397 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1398 gsi = gsi_last_nondebug_bb (region->exit);
1399 gcc_assert (!gsi_end_p (gsi)
1400 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1401 gsi_remove (&gsi, true);
1404 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1405 if (exit_bb)
1407 gsi = gsi_last_nondebug_bb (exit_bb);
1408 gcc_assert (!gsi_end_p (gsi)
1409 && (gimple_code (gsi_stmt (gsi))
1410 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1411 stmt = gimple_build_return (NULL);
1412 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1413 gsi_remove (&gsi, true);
1416 /* Move the parallel region into CHILD_CFUN. */
1418 if (gimple_in_ssa_p (cfun))
1420 init_tree_ssa (child_cfun);
1421 init_ssa_operands (child_cfun);
1422 child_cfun->gimple_df->in_ssa_p = true;
1423 block = NULL_TREE;
1425 else
1426 block = gimple_block (entry_stmt);
1428 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1429 if (exit_bb)
1430 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1431 if (e2)
1433 basic_block dest_bb = e2->dest;
1434 if (!exit_bb)
1435 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1436 remove_edge (e2);
1437 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1439 /* When the OMP expansion process cannot guarantee an up-to-date
1440 loop tree arrange for the child function to fixup loops. */
1441 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1442 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1444 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1445 num = vec_safe_length (child_cfun->local_decls);
1446 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1448 t = (*child_cfun->local_decls)[srcidx];
1449 if (DECL_CONTEXT (t) == cfun->decl)
1450 continue;
1451 if (srcidx != dstidx)
1452 (*child_cfun->local_decls)[dstidx] = t;
1453 dstidx++;
1455 if (dstidx != num)
1456 vec_safe_truncate (child_cfun->local_decls, dstidx);
1458 /* Inform the callgraph about the new function. */
1459 child_cfun->curr_properties = cfun->curr_properties;
1460 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1461 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1462 cgraph_node *node = cgraph_node::get_create (child_fn);
1463 node->parallelized_function = 1;
1464 cgraph_node::add_new_function (child_fn, true);
1466 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1467 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1469 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1470 fixed in a following pass. */
1471 push_cfun (child_cfun);
1472 if (need_asm)
1473 assign_assembler_name_if_needed (child_fn);
1475 if (optimize)
1476 optimize_omp_library_calls (entry_stmt);
1477 update_max_bb_count ();
1478 cgraph_edge::rebuild_edges ();
1480 /* Some EH regions might become dead, see PR34608. If
1481 pass_cleanup_cfg isn't the first pass to happen with the
1482 new child, these dead EH edges might cause problems.
1483 Clean them up now. */
1484 if (flag_exceptions)
1486 basic_block bb;
1487 bool changed = false;
1489 FOR_EACH_BB_FN (bb, cfun)
1490 changed |= gimple_purge_dead_eh_edges (bb);
1491 if (changed)
1492 cleanup_tree_cfg ();
1494 if (gimple_in_ssa_p (cfun))
1495 update_ssa (TODO_update_ssa);
1496 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1497 verify_loop_structure ();
1498 pop_cfun ();
1500 if (dump_file && !gimple_in_ssa_p (cfun))
1502 omp_any_child_fn_dumped = true;
1503 dump_function_header (dump_file, child_fn, dump_flags);
1504 dump_function_to_file (child_fn, dump_file, dump_flags);
1508 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1510 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1511 expand_parallel_call (region, new_bb,
1512 as_a <gomp_parallel *> (entry_stmt), ws_args);
1513 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1514 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1515 else
1516 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1517 if (gimple_in_ssa_p (cfun))
1518 update_ssa (TODO_update_ssa_only_virtuals);
1521 /* Information about members of an OpenACC collapsed loop nest. */
1523 struct oacc_collapse
1525 tree base; /* Base value. */
1526 tree iters; /* Number of steps. */
1527 tree step; /* Step size. */
1528 tree tile; /* Tile increment (if tiled). */
1529 tree outer; /* Tile iterator var. */
1532 /* Helper for expand_oacc_for. Determine collapsed loop information.
1533 Fill in COUNTS array. Emit any initialization code before GSI.
1534 Return the calculated outer loop bound of BOUND_TYPE. */
1536 static tree
1537 expand_oacc_collapse_init (const struct omp_for_data *fd,
1538 gimple_stmt_iterator *gsi,
1539 oacc_collapse *counts, tree bound_type,
1540 location_t loc)
1542 tree tiling = fd->tiling;
1543 tree total = build_int_cst (bound_type, 1);
1544 int ix;
1546 gcc_assert (integer_onep (fd->loop.step));
1547 gcc_assert (integer_zerop (fd->loop.n1));
1549 /* When tiling, the first operand of the tile clause applies to the
1550 innermost loop, and we work outwards from there. Seems
1551 backwards, but whatever. */
1552 for (ix = fd->collapse; ix--;)
1554 const omp_for_data_loop *loop = &fd->loops[ix];
1556 tree iter_type = TREE_TYPE (loop->v);
1557 tree diff_type = iter_type;
1558 tree plus_type = iter_type;
1560 gcc_assert (loop->cond_code == fd->loop.cond_code);
1562 if (POINTER_TYPE_P (iter_type))
1563 plus_type = sizetype;
1564 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1565 diff_type = signed_type_for (diff_type);
1566 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1567 diff_type = integer_type_node;
1569 if (tiling)
1571 tree num = build_int_cst (integer_type_node, fd->collapse);
1572 tree loop_no = build_int_cst (integer_type_node, ix);
1573 tree tile = TREE_VALUE (tiling);
1574 gcall *call
1575 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1576 /* gwv-outer=*/integer_zero_node,
1577 /* gwv-inner=*/integer_zero_node);
1579 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1580 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1581 gimple_call_set_lhs (call, counts[ix].tile);
1582 gimple_set_location (call, loc);
1583 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1585 tiling = TREE_CHAIN (tiling);
1587 else
1589 counts[ix].tile = NULL;
1590 counts[ix].outer = loop->v;
1593 tree b = loop->n1;
1594 tree e = loop->n2;
1595 tree s = loop->step;
1596 bool up = loop->cond_code == LT_EXPR;
1597 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1598 bool negating;
1599 tree expr;
1601 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1602 true, GSI_SAME_STMT);
1603 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1604 true, GSI_SAME_STMT);
1606 /* Convert the step, avoiding possible unsigned->signed overflow. */
1607 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1608 if (negating)
1609 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1610 s = fold_convert (diff_type, s);
1611 if (negating)
1612 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1613 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1616 /* Determine the range, avoiding possible unsigned->signed overflow. */
1617 negating = !up && TYPE_UNSIGNED (iter_type);
1618 expr = fold_build2 (MINUS_EXPR, plus_type,
1619 fold_convert (plus_type, negating ? b : e),
1620 fold_convert (plus_type, negating ? e : b));
1621 expr = fold_convert (diff_type, expr);
1622 if (negating)
1623 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1624 tree range = force_gimple_operand_gsi
1625 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1627 /* Determine number of iterations. */
1628 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1629 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1630 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1632 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1633 true, GSI_SAME_STMT);
1635 counts[ix].base = b;
1636 counts[ix].iters = iters;
1637 counts[ix].step = s;
1639 total = fold_build2 (MULT_EXPR, bound_type, total,
1640 fold_convert (bound_type, iters));
1643 return total;
1646 /* Emit initializers for collapsed loop members. INNER is true if
1647 this is for the element loop of a TILE. IVAR is the outer
1648 loop iteration variable, from which collapsed loop iteration values
1649 are calculated. COUNTS array has been initialized by
1650 expand_oacc_collapse_inits. */
1652 static void
1653 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1654 gimple_stmt_iterator *gsi,
1655 const oacc_collapse *counts, tree ivar)
1657 tree ivar_type = TREE_TYPE (ivar);
1659 /* The most rapidly changing iteration variable is the innermost
1660 one. */
1661 for (int ix = fd->collapse; ix--;)
1663 const omp_for_data_loop *loop = &fd->loops[ix];
1664 const oacc_collapse *collapse = &counts[ix];
1665 tree v = inner ? loop->v : collapse->outer;
1666 tree iter_type = TREE_TYPE (v);
1667 tree diff_type = TREE_TYPE (collapse->step);
1668 tree plus_type = iter_type;
1669 enum tree_code plus_code = PLUS_EXPR;
1670 tree expr;
1672 if (POINTER_TYPE_P (iter_type))
1674 plus_code = POINTER_PLUS_EXPR;
1675 plus_type = sizetype;
1678 expr = ivar;
1679 if (ix)
1681 tree mod = fold_convert (ivar_type, collapse->iters);
1682 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1683 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1684 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1685 true, GSI_SAME_STMT);
1688 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1689 collapse->step);
1690 expr = fold_build2 (plus_code, iter_type,
1691 inner ? collapse->outer : collapse->base,
1692 fold_convert (plus_type, expr));
1693 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1694 true, GSI_SAME_STMT);
1695 gassign *ass = gimple_build_assign (v, expr);
1696 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1700 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1701 of the combined collapse > 1 loop constructs, generate code like:
1702 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1703 if (cond3 is <)
1704 adj = STEP3 - 1;
1705 else
1706 adj = STEP3 + 1;
1707 count3 = (adj + N32 - N31) / STEP3;
1708 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1709 if (cond2 is <)
1710 adj = STEP2 - 1;
1711 else
1712 adj = STEP2 + 1;
1713 count2 = (adj + N22 - N21) / STEP2;
1714 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1715 if (cond1 is <)
1716 adj = STEP1 - 1;
1717 else
1718 adj = STEP1 + 1;
1719 count1 = (adj + N12 - N11) / STEP1;
1720 count = count1 * count2 * count3;
1721 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1722 count = 0;
1723 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1724 of the combined loop constructs, just initialize COUNTS array
1725 from the _looptemp_ clauses. */
1727 /* NOTE: It *could* be better to moosh all of the BBs together,
1728 creating one larger BB with all the computation and the unexpected
1729 jump at the end. I.e.
1731 bool zero3, zero2, zero1, zero;
1733 zero3 = N32 c3 N31;
1734 count3 = (N32 - N31) /[cl] STEP3;
1735 zero2 = N22 c2 N21;
1736 count2 = (N22 - N21) /[cl] STEP2;
1737 zero1 = N12 c1 N11;
1738 count1 = (N12 - N11) /[cl] STEP1;
1739 zero = zero3 || zero2 || zero1;
1740 count = count1 * count2 * count3;
1741 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1743 After all, we expect the zero=false, and thus we expect to have to
1744 evaluate all of the comparison expressions, so short-circuiting
1745 oughtn't be a win. Since the condition isn't protecting a
1746 denominator, we're not concerned about divide-by-zero, so we can
1747 fully evaluate count even if a numerator turned out to be wrong.
1749 It seems like putting this all together would create much better
1750 scheduling opportunities, and less pressure on the chip's branch
1751 predictor. */
1753 static void
1754 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1755 basic_block &entry_bb, tree *counts,
1756 basic_block &zero_iter1_bb, int &first_zero_iter1,
1757 basic_block &zero_iter2_bb, int &first_zero_iter2,
1758 basic_block &l2_dom_bb)
1760 tree t, type = TREE_TYPE (fd->loop.v);
1761 edge e, ne;
1762 int i;
1764 /* Collapsed loops need work for expansion into SSA form. */
1765 gcc_assert (!gimple_in_ssa_p (cfun));
1767 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1768 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1770 gcc_assert (fd->ordered == 0);
1771 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1772 isn't supposed to be handled, as the inner loop doesn't
1773 use it. */
1774 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1775 OMP_CLAUSE__LOOPTEMP_);
1776 gcc_assert (innerc);
1777 for (i = 0; i < fd->collapse; i++)
1779 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1780 OMP_CLAUSE__LOOPTEMP_);
1781 gcc_assert (innerc);
1782 if (i)
1783 counts[i] = OMP_CLAUSE_DECL (innerc);
1784 else
1785 counts[0] = NULL_TREE;
1787 return;
1790 for (i = fd->collapse; i < fd->ordered; i++)
1792 tree itype = TREE_TYPE (fd->loops[i].v);
1793 counts[i] = NULL_TREE;
1794 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1795 fold_convert (itype, fd->loops[i].n1),
1796 fold_convert (itype, fd->loops[i].n2));
1797 if (t && integer_zerop (t))
1799 for (i = fd->collapse; i < fd->ordered; i++)
1800 counts[i] = build_int_cst (type, 0);
1801 break;
1804 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1806 tree itype = TREE_TYPE (fd->loops[i].v);
1808 if (i >= fd->collapse && counts[i])
1809 continue;
1810 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1811 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1812 fold_convert (itype, fd->loops[i].n1),
1813 fold_convert (itype, fd->loops[i].n2)))
1814 == NULL_TREE || !integer_onep (t)))
1816 gcond *cond_stmt;
1817 tree n1, n2;
1818 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1819 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1820 true, GSI_SAME_STMT);
1821 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1822 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1823 true, GSI_SAME_STMT);
1824 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1825 NULL_TREE, NULL_TREE);
1826 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1828 expand_omp_regimplify_p, NULL, NULL)
1829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1830 expand_omp_regimplify_p, NULL, NULL))
1832 *gsi = gsi_for_stmt (cond_stmt);
1833 gimple_regimplify_operands (cond_stmt, gsi);
1835 e = split_block (entry_bb, cond_stmt);
1836 basic_block &zero_iter_bb
1837 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1838 int &first_zero_iter
1839 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1840 if (zero_iter_bb == NULL)
1842 gassign *assign_stmt;
1843 first_zero_iter = i;
1844 zero_iter_bb = create_empty_bb (entry_bb);
1845 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1846 *gsi = gsi_after_labels (zero_iter_bb);
1847 if (i < fd->collapse)
1848 assign_stmt = gimple_build_assign (fd->loop.n2,
1849 build_zero_cst (type));
1850 else
1852 counts[i] = create_tmp_reg (type, ".count");
1853 assign_stmt
1854 = gimple_build_assign (counts[i], build_zero_cst (type));
1856 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1857 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1858 entry_bb);
1860 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1861 ne->probability = profile_probability::very_unlikely ();
1862 e->flags = EDGE_TRUE_VALUE;
1863 e->probability = ne->probability.invert ();
1864 if (l2_dom_bb == NULL)
1865 l2_dom_bb = entry_bb;
1866 entry_bb = e->dest;
1867 *gsi = gsi_last_nondebug_bb (entry_bb);
1870 if (POINTER_TYPE_P (itype))
1871 itype = signed_type_for (itype);
1872 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1873 ? -1 : 1));
1874 t = fold_build2 (PLUS_EXPR, itype,
1875 fold_convert (itype, fd->loops[i].step), t);
1876 t = fold_build2 (PLUS_EXPR, itype, t,
1877 fold_convert (itype, fd->loops[i].n2));
1878 t = fold_build2 (MINUS_EXPR, itype, t,
1879 fold_convert (itype, fd->loops[i].n1));
1880 /* ?? We could probably use CEIL_DIV_EXPR instead of
1881 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1882 generate the same code in the end because generically we
1883 don't know that the values involved must be negative for
1884 GT?? */
1885 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1886 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1887 fold_build1 (NEGATE_EXPR, itype, t),
1888 fold_build1 (NEGATE_EXPR, itype,
1889 fold_convert (itype,
1890 fd->loops[i].step)));
1891 else
1892 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1893 fold_convert (itype, fd->loops[i].step));
1894 t = fold_convert (type, t);
1895 if (TREE_CODE (t) == INTEGER_CST)
1896 counts[i] = t;
1897 else
1899 if (i < fd->collapse || i != first_zero_iter2)
1900 counts[i] = create_tmp_reg (type, ".count");
1901 expand_omp_build_assign (gsi, counts[i], t);
1903 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1905 if (i == 0)
1906 t = counts[0];
1907 else
1908 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1909 expand_omp_build_assign (gsi, fd->loop.n2, t);
1914 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1915 T = V;
1916 V3 = N31 + (T % count3) * STEP3;
1917 T = T / count3;
1918 V2 = N21 + (T % count2) * STEP2;
1919 T = T / count2;
1920 V1 = N11 + T * STEP1;
1921 if this loop doesn't have an inner loop construct combined with it.
1922 If it does have an inner loop construct combined with it and the
1923 iteration count isn't known constant, store values from counts array
1924 into its _looptemp_ temporaries instead. */
1926 static void
1927 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1928 tree *counts, gimple *inner_stmt, tree startvar)
1930 int i;
1931 if (gimple_omp_for_combined_p (fd->for_stmt))
1933 /* If fd->loop.n2 is constant, then no propagation of the counts
1934 is needed, they are constant. */
1935 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1936 return;
1938 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1939 ? gimple_omp_taskreg_clauses (inner_stmt)
1940 : gimple_omp_for_clauses (inner_stmt);
1941 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1942 isn't supposed to be handled, as the inner loop doesn't
1943 use it. */
1944 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1945 gcc_assert (innerc);
1946 for (i = 0; i < fd->collapse; i++)
1948 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1949 OMP_CLAUSE__LOOPTEMP_);
1950 gcc_assert (innerc);
1951 if (i)
1953 tree tem = OMP_CLAUSE_DECL (innerc);
1954 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1955 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1956 false, GSI_CONTINUE_LINKING);
1957 gassign *stmt = gimple_build_assign (tem, t);
1958 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1961 return;
1964 tree type = TREE_TYPE (fd->loop.v);
1965 tree tem = create_tmp_reg (type, ".tem");
1966 gassign *stmt = gimple_build_assign (tem, startvar);
1967 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1969 for (i = fd->collapse - 1; i >= 0; i--)
1971 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1972 itype = vtype;
1973 if (POINTER_TYPE_P (vtype))
1974 itype = signed_type_for (vtype);
1975 if (i != 0)
1976 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1977 else
1978 t = tem;
1979 t = fold_convert (itype, t);
1980 t = fold_build2 (MULT_EXPR, itype, t,
1981 fold_convert (itype, fd->loops[i].step));
1982 if (POINTER_TYPE_P (vtype))
1983 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1984 else
1985 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1986 t = force_gimple_operand_gsi (gsi, t,
1987 DECL_P (fd->loops[i].v)
1988 && TREE_ADDRESSABLE (fd->loops[i].v),
1989 NULL_TREE, false,
1990 GSI_CONTINUE_LINKING);
1991 stmt = gimple_build_assign (fd->loops[i].v, t);
1992 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1993 if (i != 0)
1995 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1996 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1997 false, GSI_CONTINUE_LINKING);
1998 stmt = gimple_build_assign (tem, t);
1999 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2004 /* Helper function for expand_omp_for_*. Generate code like:
2005 L10:
2006 V3 += STEP3;
2007 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2008 L11:
2009 V3 = N31;
2010 V2 += STEP2;
2011 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2012 L12:
2013 V2 = N21;
2014 V1 += STEP1;
2015 goto BODY_BB; */
2017 static basic_block
2018 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2019 basic_block body_bb)
2021 basic_block last_bb, bb, collapse_bb = NULL;
2022 int i;
2023 gimple_stmt_iterator gsi;
2024 edge e;
2025 tree t;
2026 gimple *stmt;
2028 last_bb = cont_bb;
2029 for (i = fd->collapse - 1; i >= 0; i--)
2031 tree vtype = TREE_TYPE (fd->loops[i].v);
2033 bb = create_empty_bb (last_bb);
2034 add_bb_to_loop (bb, last_bb->loop_father);
2035 gsi = gsi_start_bb (bb);
2037 if (i < fd->collapse - 1)
2039 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2040 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2042 t = fd->loops[i + 1].n1;
2043 t = force_gimple_operand_gsi (&gsi, t,
2044 DECL_P (fd->loops[i + 1].v)
2045 && TREE_ADDRESSABLE (fd->loops[i
2046 + 1].v),
2047 NULL_TREE, false,
2048 GSI_CONTINUE_LINKING);
2049 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2050 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2052 else
2053 collapse_bb = bb;
2055 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2057 if (POINTER_TYPE_P (vtype))
2058 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2059 else
2060 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2061 t = force_gimple_operand_gsi (&gsi, t,
2062 DECL_P (fd->loops[i].v)
2063 && TREE_ADDRESSABLE (fd->loops[i].v),
2064 NULL_TREE, false, GSI_CONTINUE_LINKING);
2065 stmt = gimple_build_assign (fd->loops[i].v, t);
2066 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2068 if (i > 0)
2070 t = fd->loops[i].n2;
2071 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2072 false, GSI_CONTINUE_LINKING);
2073 tree v = fd->loops[i].v;
2074 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2075 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2076 false, GSI_CONTINUE_LINKING);
2077 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2078 stmt = gimple_build_cond_empty (t);
2079 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2080 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2081 expand_omp_regimplify_p, NULL, NULL)
2082 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2083 expand_omp_regimplify_p, NULL, NULL))
2084 gimple_regimplify_operands (stmt, &gsi);
2085 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2086 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2088 else
2089 make_edge (bb, body_bb, EDGE_FALLTHRU);
2090 last_bb = bb;
2093 return collapse_bb;
2096 /* Expand #pragma omp ordered depend(source). */
2098 static void
2099 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2100 tree *counts, location_t loc)
2102 enum built_in_function source_ix
2103 = fd->iter_type == long_integer_type_node
2104 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2105 gimple *g
2106 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2107 build_fold_addr_expr (counts[fd->ordered]));
2108 gimple_set_location (g, loc);
2109 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2112 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2114 static void
2115 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2116 tree *counts, tree c, location_t loc)
2118 auto_vec<tree, 10> args;
2119 enum built_in_function sink_ix
2120 = fd->iter_type == long_integer_type_node
2121 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2122 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2123 int i;
2124 gimple_stmt_iterator gsi2 = *gsi;
2125 bool warned_step = false;
2127 for (i = 0; i < fd->ordered; i++)
2129 tree step = NULL_TREE;
2130 off = TREE_PURPOSE (deps);
2131 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2133 step = TREE_OPERAND (off, 1);
2134 off = TREE_OPERAND (off, 0);
2136 if (!integer_zerop (off))
2138 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2139 || fd->loops[i].cond_code == GT_EXPR);
2140 bool forward = fd->loops[i].cond_code == LT_EXPR;
2141 if (step)
2143 /* Non-simple Fortran DO loops. If step is variable,
2144 we don't know at compile even the direction, so can't
2145 warn. */
2146 if (TREE_CODE (step) != INTEGER_CST)
2147 break;
2148 forward = tree_int_cst_sgn (step) != -1;
2150 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2151 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2152 "waiting for lexically later iteration");
2153 break;
2155 deps = TREE_CHAIN (deps);
2157 /* If all offsets corresponding to the collapsed loops are zero,
2158 this depend clause can be ignored. FIXME: but there is still a
2159 flush needed. We need to emit one __sync_synchronize () for it
2160 though (perhaps conditionally)? Solve this together with the
2161 conservative dependence folding optimization.
2162 if (i >= fd->collapse)
2163 return; */
2165 deps = OMP_CLAUSE_DECL (c);
2166 gsi_prev (&gsi2);
2167 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2168 edge e2 = split_block_after_labels (e1->dest);
2170 gsi2 = gsi_after_labels (e1->dest);
2171 *gsi = gsi_last_bb (e1->src);
2172 for (i = 0; i < fd->ordered; i++)
2174 tree itype = TREE_TYPE (fd->loops[i].v);
2175 tree step = NULL_TREE;
2176 tree orig_off = NULL_TREE;
2177 if (POINTER_TYPE_P (itype))
2178 itype = sizetype;
2179 if (i)
2180 deps = TREE_CHAIN (deps);
2181 off = TREE_PURPOSE (deps);
2182 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2184 step = TREE_OPERAND (off, 1);
2185 off = TREE_OPERAND (off, 0);
2186 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2187 && integer_onep (fd->loops[i].step)
2188 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2190 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2191 if (step)
2193 off = fold_convert_loc (loc, itype, off);
2194 orig_off = off;
2195 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2198 if (integer_zerop (off))
2199 t = boolean_true_node;
2200 else
2202 tree a;
2203 tree co = fold_convert_loc (loc, itype, off);
2204 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2206 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2207 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2208 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2209 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2210 co);
2212 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2213 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2214 fd->loops[i].v, co);
2215 else
2216 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2217 fd->loops[i].v, co);
2218 if (step)
2220 tree t1, t2;
2221 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2222 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2223 fd->loops[i].n1);
2224 else
2225 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2226 fd->loops[i].n2);
2227 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2228 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2229 fd->loops[i].n2);
2230 else
2231 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2232 fd->loops[i].n1);
2233 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2234 step, build_int_cst (TREE_TYPE (step), 0));
2235 if (TREE_CODE (step) != INTEGER_CST)
2237 t1 = unshare_expr (t1);
2238 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2239 false, GSI_CONTINUE_LINKING);
2240 t2 = unshare_expr (t2);
2241 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2242 false, GSI_CONTINUE_LINKING);
2244 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2245 t, t2, t1);
2247 else if (fd->loops[i].cond_code == LT_EXPR)
2249 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2250 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2251 fd->loops[i].n1);
2252 else
2253 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2254 fd->loops[i].n2);
2256 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2257 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2258 fd->loops[i].n2);
2259 else
2260 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2261 fd->loops[i].n1);
2263 if (cond)
2264 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2265 else
2266 cond = t;
2268 off = fold_convert_loc (loc, itype, off);
2270 if (step
2271 || (fd->loops[i].cond_code == LT_EXPR
2272 ? !integer_onep (fd->loops[i].step)
2273 : !integer_minus_onep (fd->loops[i].step)))
2275 if (step == NULL_TREE
2276 && TYPE_UNSIGNED (itype)
2277 && fd->loops[i].cond_code == GT_EXPR)
2278 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2279 fold_build1_loc (loc, NEGATE_EXPR, itype,
2280 s));
2281 else
2282 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2283 orig_off ? orig_off : off, s);
2284 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2285 build_int_cst (itype, 0));
2286 if (integer_zerop (t) && !warned_step)
2288 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2289 "refers to iteration never in the iteration "
2290 "space");
2291 warned_step = true;
2293 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2294 cond, t);
2297 if (i <= fd->collapse - 1 && fd->collapse > 1)
2298 t = fd->loop.v;
2299 else if (counts[i])
2300 t = counts[i];
2301 else
2303 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2304 fd->loops[i].v, fd->loops[i].n1);
2305 t = fold_convert_loc (loc, fd->iter_type, t);
2307 if (step)
2308 /* We have divided off by step already earlier. */;
2309 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2310 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2311 fold_build1_loc (loc, NEGATE_EXPR, itype,
2312 s));
2313 else
2314 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2315 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2316 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2317 off = fold_convert_loc (loc, fd->iter_type, off);
2318 if (i <= fd->collapse - 1 && fd->collapse > 1)
2320 if (i)
2321 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2322 off);
2323 if (i < fd->collapse - 1)
2325 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2326 counts[i]);
2327 continue;
2330 off = unshare_expr (off);
2331 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2332 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2333 true, GSI_SAME_STMT);
2334 args.safe_push (t);
2336 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2337 gimple_set_location (g, loc);
2338 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2340 cond = unshare_expr (cond);
2341 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2342 GSI_CONTINUE_LINKING);
2343 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2344 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2345 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2346 e1->probability = e3->probability.invert ();
2347 e1->flags = EDGE_TRUE_VALUE;
2348 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2350 *gsi = gsi_after_labels (e2->dest);
2353 /* Expand all #pragma omp ordered depend(source) and
2354 #pragma omp ordered depend(sink:...) constructs in the current
2355 #pragma omp for ordered(n) region. */
2357 static void
2358 expand_omp_ordered_source_sink (struct omp_region *region,
2359 struct omp_for_data *fd, tree *counts,
2360 basic_block cont_bb)
2362 struct omp_region *inner;
2363 int i;
2364 for (i = fd->collapse - 1; i < fd->ordered; i++)
2365 if (i == fd->collapse - 1 && fd->collapse > 1)
2366 counts[i] = NULL_TREE;
2367 else if (i >= fd->collapse && !cont_bb)
2368 counts[i] = build_zero_cst (fd->iter_type);
2369 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2370 && integer_onep (fd->loops[i].step))
2371 counts[i] = NULL_TREE;
2372 else
2373 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2374 tree atype
2375 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2376 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2377 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2379 for (inner = region->inner; inner; inner = inner->next)
2380 if (inner->type == GIMPLE_OMP_ORDERED)
2382 gomp_ordered *ord_stmt = inner->ord_stmt;
2383 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2384 location_t loc = gimple_location (ord_stmt);
2385 tree c;
2386 for (c = gimple_omp_ordered_clauses (ord_stmt);
2387 c; c = OMP_CLAUSE_CHAIN (c))
2388 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2389 break;
2390 if (c)
2391 expand_omp_ordered_source (&gsi, fd, counts, loc);
2392 for (c = gimple_omp_ordered_clauses (ord_stmt);
2393 c; c = OMP_CLAUSE_CHAIN (c))
2394 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2395 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2396 gsi_remove (&gsi, true);
2400 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2401 collapsed. */
2403 static basic_block
2404 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2405 basic_block cont_bb, basic_block body_bb,
2406 bool ordered_lastprivate)
2408 if (fd->ordered == fd->collapse)
2409 return cont_bb;
2411 if (!cont_bb)
2413 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2414 for (int i = fd->collapse; i < fd->ordered; i++)
2416 tree type = TREE_TYPE (fd->loops[i].v);
2417 tree n1 = fold_convert (type, fd->loops[i].n1);
2418 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2419 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2420 size_int (i - fd->collapse + 1),
2421 NULL_TREE, NULL_TREE);
2422 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2424 return NULL;
2427 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2429 tree t, type = TREE_TYPE (fd->loops[i].v);
2430 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2431 expand_omp_build_assign (&gsi, fd->loops[i].v,
2432 fold_convert (type, fd->loops[i].n1));
2433 if (counts[i])
2434 expand_omp_build_assign (&gsi, counts[i],
2435 build_zero_cst (fd->iter_type));
2436 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2437 size_int (i - fd->collapse + 1),
2438 NULL_TREE, NULL_TREE);
2439 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2440 if (!gsi_end_p (gsi))
2441 gsi_prev (&gsi);
2442 else
2443 gsi = gsi_last_bb (body_bb);
2444 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2445 basic_block new_body = e1->dest;
2446 if (body_bb == cont_bb)
2447 cont_bb = new_body;
2448 edge e2 = NULL;
2449 basic_block new_header;
2450 if (EDGE_COUNT (cont_bb->preds) > 0)
2452 gsi = gsi_last_bb (cont_bb);
2453 if (POINTER_TYPE_P (type))
2454 t = fold_build_pointer_plus (fd->loops[i].v,
2455 fold_convert (sizetype,
2456 fd->loops[i].step));
2457 else
2458 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2459 fold_convert (type, fd->loops[i].step));
2460 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2461 if (counts[i])
2463 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2464 build_int_cst (fd->iter_type, 1));
2465 expand_omp_build_assign (&gsi, counts[i], t);
2466 t = counts[i];
2468 else
2470 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2471 fd->loops[i].v, fd->loops[i].n1);
2472 t = fold_convert (fd->iter_type, t);
2473 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2474 true, GSI_SAME_STMT);
2476 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2477 size_int (i - fd->collapse + 1),
2478 NULL_TREE, NULL_TREE);
2479 expand_omp_build_assign (&gsi, aref, t);
2480 gsi_prev (&gsi);
2481 e2 = split_block (cont_bb, gsi_stmt (gsi));
2482 new_header = e2->dest;
2484 else
2485 new_header = cont_bb;
2486 gsi = gsi_after_labels (new_header);
2487 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2488 true, GSI_SAME_STMT);
2489 tree n2
2490 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2491 true, NULL_TREE, true, GSI_SAME_STMT);
2492 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2493 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2494 edge e3 = split_block (new_header, gsi_stmt (gsi));
2495 cont_bb = e3->dest;
2496 remove_edge (e1);
2497 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2498 e3->flags = EDGE_FALSE_VALUE;
2499 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2500 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2501 e1->probability = e3->probability.invert ();
2503 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2504 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2506 if (e2)
2508 struct loop *loop = alloc_loop ();
2509 loop->header = new_header;
2510 loop->latch = e2->src;
2511 add_loop (loop, body_bb->loop_father);
2515 /* If there are any lastprivate clauses and it is possible some loops
2516 might have zero iterations, ensure all the decls are initialized,
2517 otherwise we could crash evaluating C++ class iterators with lastprivate
2518 clauses. */
2519 bool need_inits = false;
2520 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2521 if (need_inits)
2523 tree type = TREE_TYPE (fd->loops[i].v);
2524 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2525 expand_omp_build_assign (&gsi, fd->loops[i].v,
2526 fold_convert (type, fd->loops[i].n1));
2528 else
2530 tree type = TREE_TYPE (fd->loops[i].v);
2531 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2532 boolean_type_node,
2533 fold_convert (type, fd->loops[i].n1),
2534 fold_convert (type, fd->loops[i].n2));
2535 if (!integer_onep (this_cond))
2536 need_inits = true;
2539 return cont_bb;
2542 /* A subroutine of expand_omp_for. Generate code for a parallel
2543 loop with any schedule. Given parameters:
2545 for (V = N1; V cond N2; V += STEP) BODY;
2547 where COND is "<" or ">", we generate pseudocode
2549 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2550 if (more) goto L0; else goto L3;
2552 V = istart0;
2553 iend = iend0;
2555 BODY;
2556 V += STEP;
2557 if (V cond iend) goto L1; else goto L2;
2559 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2562 If this is a combined omp parallel loop, instead of the call to
2563 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2564 If this is gimple_omp_for_combined_p loop, then instead of assigning
2565 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2566 inner GIMPLE_OMP_FOR and V += STEP; and
2567 if (V cond iend) goto L1; else goto L2; are removed.
2569 For collapsed loops, given parameters:
2570 collapse(3)
2571 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2572 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2573 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2574 BODY;
2576 we generate pseudocode
2578 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2579 if (cond3 is <)
2580 adj = STEP3 - 1;
2581 else
2582 adj = STEP3 + 1;
2583 count3 = (adj + N32 - N31) / STEP3;
2584 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2585 if (cond2 is <)
2586 adj = STEP2 - 1;
2587 else
2588 adj = STEP2 + 1;
2589 count2 = (adj + N22 - N21) / STEP2;
2590 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2591 if (cond1 is <)
2592 adj = STEP1 - 1;
2593 else
2594 adj = STEP1 + 1;
2595 count1 = (adj + N12 - N11) / STEP1;
2596 count = count1 * count2 * count3;
2597 goto Z1;
2599 count = 0;
2601 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2602 if (more) goto L0; else goto L3;
2604 V = istart0;
2605 T = V;
2606 V3 = N31 + (T % count3) * STEP3;
2607 T = T / count3;
2608 V2 = N21 + (T % count2) * STEP2;
2609 T = T / count2;
2610 V1 = N11 + T * STEP1;
2611 iend = iend0;
2613 BODY;
2614 V += 1;
2615 if (V < iend) goto L10; else goto L2;
2616 L10:
2617 V3 += STEP3;
2618 if (V3 cond3 N32) goto L1; else goto L11;
2619 L11:
2620 V3 = N31;
2621 V2 += STEP2;
2622 if (V2 cond2 N22) goto L1; else goto L12;
2623 L12:
2624 V2 = N21;
2625 V1 += STEP1;
2626 goto L1;
2628 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2633 static void
2634 expand_omp_for_generic (struct omp_region *region,
2635 struct omp_for_data *fd,
2636 enum built_in_function start_fn,
2637 enum built_in_function next_fn,
2638 tree sched_arg,
2639 gimple *inner_stmt)
2641 tree type, istart0, iend0, iend;
2642 tree t, vmain, vback, bias = NULL_TREE;
2643 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2644 basic_block l2_bb = NULL, l3_bb = NULL;
2645 gimple_stmt_iterator gsi;
2646 gassign *assign_stmt;
2647 bool in_combined_parallel = is_combined_parallel (region);
2648 bool broken_loop = region->cont == NULL;
2649 edge e, ne;
2650 tree *counts = NULL;
2651 int i;
2652 bool ordered_lastprivate = false;
2654 gcc_assert (!broken_loop || !in_combined_parallel);
2655 gcc_assert (fd->iter_type == long_integer_type_node
2656 || !in_combined_parallel);
2658 entry_bb = region->entry;
2659 cont_bb = region->cont;
2660 collapse_bb = NULL;
2661 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2662 gcc_assert (broken_loop
2663 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2664 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2665 l1_bb = single_succ (l0_bb);
2666 if (!broken_loop)
2668 l2_bb = create_empty_bb (cont_bb);
2669 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2670 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2671 == l1_bb));
2672 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2674 else
2675 l2_bb = NULL;
2676 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2677 exit_bb = region->exit;
2679 gsi = gsi_last_nondebug_bb (entry_bb);
2681 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2682 if (fd->ordered
2683 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2684 OMP_CLAUSE_LASTPRIVATE))
2685 ordered_lastprivate = false;
2686 tree reductions = NULL_TREE;
2687 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2688 tree memv = NULL_TREE;
2689 if (sched_arg)
2691 if (fd->have_reductemp)
2693 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2694 OMP_CLAUSE__REDUCTEMP_);
2695 reductions = OMP_CLAUSE_DECL (c);
2696 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2697 gimple *g = SSA_NAME_DEF_STMT (reductions);
2698 reductions = gimple_assign_rhs1 (g);
2699 OMP_CLAUSE_DECL (c) = reductions;
2700 entry_bb = gimple_bb (g);
2701 edge e = split_block (entry_bb, g);
2702 if (region->entry == entry_bb)
2703 region->entry = e->dest;
2704 gsi = gsi_last_bb (entry_bb);
2706 else
2707 reductions = null_pointer_node;
2708 if (fd->lastprivate_conditional)
2710 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2711 OMP_CLAUSE__CONDTEMP_);
2712 condtemp = OMP_CLAUSE_DECL (c);
2713 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2714 cond_var = OMP_CLAUSE_DECL (c);
2715 tree type = TREE_TYPE (condtemp);
2716 memv = create_tmp_var (type);
2717 TREE_ADDRESSABLE (memv) = 1;
2718 unsigned HOST_WIDE_INT sz
2719 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2720 sz *= fd->lastprivate_conditional;
2721 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2722 false);
2723 mem = build_fold_addr_expr (memv);
2725 else
2726 mem = null_pointer_node;
2728 if (fd->collapse > 1 || fd->ordered)
2730 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2731 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2733 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2734 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2735 zero_iter1_bb, first_zero_iter1,
2736 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2738 if (zero_iter1_bb)
2740 /* Some counts[i] vars might be uninitialized if
2741 some loop has zero iterations. But the body shouldn't
2742 be executed in that case, so just avoid uninit warnings. */
2743 for (i = first_zero_iter1;
2744 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2745 if (SSA_VAR_P (counts[i]))
2746 TREE_NO_WARNING (counts[i]) = 1;
2747 gsi_prev (&gsi);
2748 e = split_block (entry_bb, gsi_stmt (gsi));
2749 entry_bb = e->dest;
2750 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2751 gsi = gsi_last_nondebug_bb (entry_bb);
2752 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2753 get_immediate_dominator (CDI_DOMINATORS,
2754 zero_iter1_bb));
2756 if (zero_iter2_bb)
2758 /* Some counts[i] vars might be uninitialized if
2759 some loop has zero iterations. But the body shouldn't
2760 be executed in that case, so just avoid uninit warnings. */
2761 for (i = first_zero_iter2; i < fd->ordered; i++)
2762 if (SSA_VAR_P (counts[i]))
2763 TREE_NO_WARNING (counts[i]) = 1;
2764 if (zero_iter1_bb)
2765 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2766 else
2768 gsi_prev (&gsi);
2769 e = split_block (entry_bb, gsi_stmt (gsi));
2770 entry_bb = e->dest;
2771 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2772 gsi = gsi_last_nondebug_bb (entry_bb);
2773 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2774 get_immediate_dominator
2775 (CDI_DOMINATORS, zero_iter2_bb));
2778 if (fd->collapse == 1)
2780 counts[0] = fd->loop.n2;
2781 fd->loop = fd->loops[0];
2785 type = TREE_TYPE (fd->loop.v);
2786 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2787 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2788 TREE_ADDRESSABLE (istart0) = 1;
2789 TREE_ADDRESSABLE (iend0) = 1;
2791 /* See if we need to bias by LLONG_MIN. */
2792 if (fd->iter_type == long_long_unsigned_type_node
2793 && TREE_CODE (type) == INTEGER_TYPE
2794 && !TYPE_UNSIGNED (type)
2795 && fd->ordered == 0)
2797 tree n1, n2;
2799 if (fd->loop.cond_code == LT_EXPR)
2801 n1 = fd->loop.n1;
2802 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2804 else
2806 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2807 n2 = fd->loop.n1;
2809 if (TREE_CODE (n1) != INTEGER_CST
2810 || TREE_CODE (n2) != INTEGER_CST
2811 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2812 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2815 gimple_stmt_iterator gsif = gsi;
2816 gsi_prev (&gsif);
2818 tree arr = NULL_TREE;
2819 if (in_combined_parallel)
2821 gcc_assert (fd->ordered == 0);
2822 /* In a combined parallel loop, emit a call to
2823 GOMP_loop_foo_next. */
2824 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2825 build_fold_addr_expr (istart0),
2826 build_fold_addr_expr (iend0));
2828 else
2830 tree t0, t1, t2, t3, t4;
2831 /* If this is not a combined parallel loop, emit a call to
2832 GOMP_loop_foo_start in ENTRY_BB. */
2833 t4 = build_fold_addr_expr (iend0);
2834 t3 = build_fold_addr_expr (istart0);
2835 if (fd->ordered)
2837 t0 = build_int_cst (unsigned_type_node,
2838 fd->ordered - fd->collapse + 1);
2839 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2840 fd->ordered
2841 - fd->collapse + 1),
2842 ".omp_counts");
2843 DECL_NAMELESS (arr) = 1;
2844 TREE_ADDRESSABLE (arr) = 1;
2845 TREE_STATIC (arr) = 1;
2846 vec<constructor_elt, va_gc> *v;
2847 vec_alloc (v, fd->ordered - fd->collapse + 1);
2848 int idx;
2850 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2852 tree c;
2853 if (idx == 0 && fd->collapse > 1)
2854 c = fd->loop.n2;
2855 else
2856 c = counts[idx + fd->collapse - 1];
2857 tree purpose = size_int (idx);
2858 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2859 if (TREE_CODE (c) != INTEGER_CST)
2860 TREE_STATIC (arr) = 0;
2863 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2864 if (!TREE_STATIC (arr))
2865 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2866 void_type_node, arr),
2867 true, NULL_TREE, true, GSI_SAME_STMT);
2868 t1 = build_fold_addr_expr (arr);
2869 t2 = NULL_TREE;
2871 else
2873 t2 = fold_convert (fd->iter_type, fd->loop.step);
2874 t1 = fd->loop.n2;
2875 t0 = fd->loop.n1;
2876 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2878 tree innerc
2879 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2880 OMP_CLAUSE__LOOPTEMP_);
2881 gcc_assert (innerc);
2882 t0 = OMP_CLAUSE_DECL (innerc);
2883 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2884 OMP_CLAUSE__LOOPTEMP_);
2885 gcc_assert (innerc);
2886 t1 = OMP_CLAUSE_DECL (innerc);
2888 if (POINTER_TYPE_P (TREE_TYPE (t0))
2889 && TYPE_PRECISION (TREE_TYPE (t0))
2890 != TYPE_PRECISION (fd->iter_type))
2892 /* Avoid casting pointers to integer of a different size. */
2893 tree itype = signed_type_for (type);
2894 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2895 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2897 else
2899 t1 = fold_convert (fd->iter_type, t1);
2900 t0 = fold_convert (fd->iter_type, t0);
2902 if (bias)
2904 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2905 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2908 if (fd->iter_type == long_integer_type_node || fd->ordered)
2910 if (fd->chunk_size)
2912 t = fold_convert (fd->iter_type, fd->chunk_size);
2913 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2914 if (sched_arg)
2916 if (fd->ordered)
2917 t = build_call_expr (builtin_decl_explicit (start_fn),
2918 8, t0, t1, sched_arg, t, t3, t4,
2919 reductions, mem);
2920 else
2921 t = build_call_expr (builtin_decl_explicit (start_fn),
2922 9, t0, t1, t2, sched_arg, t, t3, t4,
2923 reductions, mem);
2925 else if (fd->ordered)
2926 t = build_call_expr (builtin_decl_explicit (start_fn),
2927 5, t0, t1, t, t3, t4);
2928 else
2929 t = build_call_expr (builtin_decl_explicit (start_fn),
2930 6, t0, t1, t2, t, t3, t4);
2932 else if (fd->ordered)
2933 t = build_call_expr (builtin_decl_explicit (start_fn),
2934 4, t0, t1, t3, t4);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 5, t0, t1, t2, t3, t4);
2939 else
2941 tree t5;
2942 tree c_bool_type;
2943 tree bfn_decl;
2945 /* The GOMP_loop_ull_*start functions have additional boolean
2946 argument, true for < loops and false for > loops.
2947 In Fortran, the C bool type can be different from
2948 boolean_type_node. */
2949 bfn_decl = builtin_decl_explicit (start_fn);
2950 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2951 t5 = build_int_cst (c_bool_type,
2952 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2953 if (fd->chunk_size)
2955 tree bfn_decl = builtin_decl_explicit (start_fn);
2956 t = fold_convert (fd->iter_type, fd->chunk_size);
2957 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2958 if (sched_arg)
2959 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2960 t, t3, t4, reductions, mem);
2961 else
2962 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2964 else
2965 t = build_call_expr (builtin_decl_explicit (start_fn),
2966 6, t5, t0, t1, t2, t3, t4);
2969 if (TREE_TYPE (t) != boolean_type_node)
2970 t = fold_build2 (NE_EXPR, boolean_type_node,
2971 t, build_int_cst (TREE_TYPE (t), 0));
2972 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2973 true, GSI_SAME_STMT);
2974 if (arr && !TREE_STATIC (arr))
2976 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2977 TREE_THIS_VOLATILE (clobber) = 1;
2978 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2979 GSI_SAME_STMT);
2981 if (fd->lastprivate_conditional)
2982 expand_omp_build_assign (&gsi, condtemp, memv, false);
2983 if (fd->have_reductemp)
2985 gimple *g = gsi_stmt (gsi);
2986 gsi_remove (&gsi, true);
2987 release_ssa_name (gimple_assign_lhs (g));
2989 entry_bb = region->entry;
2990 gsi = gsi_last_nondebug_bb (entry_bb);
2992 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2994 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2996 /* Remove the GIMPLE_OMP_FOR statement. */
2997 gsi_remove (&gsi, true);
2999 if (gsi_end_p (gsif))
3000 gsif = gsi_after_labels (gsi_bb (gsif));
3001 gsi_next (&gsif);
3003 /* Iteration setup for sequential loop goes in L0_BB. */
3004 tree startvar = fd->loop.v;
3005 tree endvar = NULL_TREE;
3007 if (gimple_omp_for_combined_p (fd->for_stmt))
3009 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3010 && gimple_omp_for_kind (inner_stmt)
3011 == GF_OMP_FOR_KIND_SIMD);
3012 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3013 OMP_CLAUSE__LOOPTEMP_);
3014 gcc_assert (innerc);
3015 startvar = OMP_CLAUSE_DECL (innerc);
3016 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3017 OMP_CLAUSE__LOOPTEMP_);
3018 gcc_assert (innerc);
3019 endvar = OMP_CLAUSE_DECL (innerc);
3022 gsi = gsi_start_bb (l0_bb);
3023 t = istart0;
3024 if (fd->ordered && fd->collapse == 1)
3025 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3026 fold_convert (fd->iter_type, fd->loop.step));
3027 else if (bias)
3028 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3029 if (fd->ordered && fd->collapse == 1)
3031 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3032 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3033 fd->loop.n1, fold_convert (sizetype, t));
3034 else
3036 t = fold_convert (TREE_TYPE (startvar), t);
3037 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3038 fd->loop.n1, t);
3041 else
3043 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3044 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3045 t = fold_convert (TREE_TYPE (startvar), t);
3047 t = force_gimple_operand_gsi (&gsi, t,
3048 DECL_P (startvar)
3049 && TREE_ADDRESSABLE (startvar),
3050 NULL_TREE, false, GSI_CONTINUE_LINKING);
3051 assign_stmt = gimple_build_assign (startvar, t);
3052 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3053 if (cond_var)
3055 tree itype = TREE_TYPE (cond_var);
3056 /* For lastprivate(conditional:) itervar, we need some iteration
3057 counter that starts at unsigned non-zero and increases.
3058 Prefer as few IVs as possible, so if we can use startvar
3059 itself, use that, or startvar + constant (those would be
3060 incremented with step), and as last resort use the s0 + 1
3061 incremented by 1. */
3062 if ((fd->ordered && fd->collapse == 1)
3063 || bias
3064 || POINTER_TYPE_P (type)
3065 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3066 || fd->loop.cond_code != LT_EXPR)
3067 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3068 build_int_cst (itype, 1));
3069 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3070 t = fold_convert (itype, t);
3071 else
3073 tree c = fold_convert (itype, fd->loop.n1);
3074 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3075 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3077 t = force_gimple_operand_gsi (&gsi, t, false,
3078 NULL_TREE, false, GSI_CONTINUE_LINKING);
3079 assign_stmt = gimple_build_assign (cond_var, t);
3080 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3083 t = iend0;
3084 if (fd->ordered && fd->collapse == 1)
3085 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3086 fold_convert (fd->iter_type, fd->loop.step));
3087 else if (bias)
3088 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3089 if (fd->ordered && fd->collapse == 1)
3091 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3092 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3093 fd->loop.n1, fold_convert (sizetype, t));
3094 else
3096 t = fold_convert (TREE_TYPE (startvar), t);
3097 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3098 fd->loop.n1, t);
3101 else
3103 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3104 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3105 t = fold_convert (TREE_TYPE (startvar), t);
3107 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3108 false, GSI_CONTINUE_LINKING);
3109 if (endvar)
3111 assign_stmt = gimple_build_assign (endvar, iend);
3112 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3113 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3114 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3115 else
3116 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3117 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3119 /* Handle linear clause adjustments. */
3120 tree itercnt = NULL_TREE;
3121 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3122 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3123 c; c = OMP_CLAUSE_CHAIN (c))
3124 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3125 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3127 tree d = OMP_CLAUSE_DECL (c);
3128 bool is_ref = omp_is_reference (d);
3129 tree t = d, a, dest;
3130 if (is_ref)
3131 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3132 tree type = TREE_TYPE (t);
3133 if (POINTER_TYPE_P (type))
3134 type = sizetype;
3135 dest = unshare_expr (t);
3136 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3137 expand_omp_build_assign (&gsif, v, t);
3138 if (itercnt == NULL_TREE)
3140 itercnt = startvar;
3141 tree n1 = fd->loop.n1;
3142 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3144 itercnt
3145 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3146 itercnt);
3147 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3149 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3150 itercnt, n1);
3151 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3152 itercnt, fd->loop.step);
3153 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3154 NULL_TREE, false,
3155 GSI_CONTINUE_LINKING);
3157 a = fold_build2 (MULT_EXPR, type,
3158 fold_convert (type, itercnt),
3159 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3160 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3161 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3162 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3163 false, GSI_CONTINUE_LINKING);
3164 assign_stmt = gimple_build_assign (dest, t);
3165 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3167 if (fd->collapse > 1)
3168 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3170 if (fd->ordered)
3172 /* Until now, counts array contained number of iterations or
3173 variable containing it for ith loop. From now on, we need
3174 those counts only for collapsed loops, and only for the 2nd
3175 till the last collapsed one. Move those one element earlier,
3176 we'll use counts[fd->collapse - 1] for the first source/sink
3177 iteration counter and so on and counts[fd->ordered]
3178 as the array holding the current counter values for
3179 depend(source). */
3180 if (fd->collapse > 1)
3181 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3182 if (broken_loop)
3184 int i;
3185 for (i = fd->collapse; i < fd->ordered; i++)
3187 tree type = TREE_TYPE (fd->loops[i].v);
3188 tree this_cond
3189 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3190 fold_convert (type, fd->loops[i].n1),
3191 fold_convert (type, fd->loops[i].n2));
3192 if (!integer_onep (this_cond))
3193 break;
3195 if (i < fd->ordered)
3197 cont_bb
3198 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3199 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3200 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3201 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3202 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3203 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3204 make_edge (cont_bb, l1_bb, 0);
3205 l2_bb = create_empty_bb (cont_bb);
3206 broken_loop = false;
3209 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3210 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3211 ordered_lastprivate);
3212 if (counts[fd->collapse - 1])
3214 gcc_assert (fd->collapse == 1);
3215 gsi = gsi_last_bb (l0_bb);
3216 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3217 istart0, true);
3218 gsi = gsi_last_bb (cont_bb);
3219 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3220 build_int_cst (fd->iter_type, 1));
3221 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3222 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3223 size_zero_node, NULL_TREE, NULL_TREE);
3224 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3225 t = counts[fd->collapse - 1];
3227 else if (fd->collapse > 1)
3228 t = fd->loop.v;
3229 else
3231 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3232 fd->loops[0].v, fd->loops[0].n1);
3233 t = fold_convert (fd->iter_type, t);
3235 gsi = gsi_last_bb (l0_bb);
3236 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3237 size_zero_node, NULL_TREE, NULL_TREE);
3238 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3239 false, GSI_CONTINUE_LINKING);
3240 expand_omp_build_assign (&gsi, aref, t, true);
3243 if (!broken_loop)
3245 /* Code to control the increment and predicate for the sequential
3246 loop goes in the CONT_BB. */
3247 gsi = gsi_last_nondebug_bb (cont_bb);
3248 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3249 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3250 vmain = gimple_omp_continue_control_use (cont_stmt);
3251 vback = gimple_omp_continue_control_def (cont_stmt);
3253 if (!gimple_omp_for_combined_p (fd->for_stmt))
3255 if (POINTER_TYPE_P (type))
3256 t = fold_build_pointer_plus (vmain, fd->loop.step);
3257 else
3258 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3259 t = force_gimple_operand_gsi (&gsi, t,
3260 DECL_P (vback)
3261 && TREE_ADDRESSABLE (vback),
3262 NULL_TREE, true, GSI_SAME_STMT);
3263 assign_stmt = gimple_build_assign (vback, t);
3264 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3266 if (cond_var)
3268 tree itype = TREE_TYPE (cond_var);
3269 tree t2;
3270 if ((fd->ordered && fd->collapse == 1)
3271 || bias
3272 || POINTER_TYPE_P (type)
3273 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3274 || fd->loop.cond_code != LT_EXPR)
3275 t2 = build_int_cst (itype, 1);
3276 else
3277 t2 = fold_convert (itype, fd->loop.step);
3278 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3279 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3280 NULL_TREE, true, GSI_SAME_STMT);
3281 assign_stmt = gimple_build_assign (cond_var, t2);
3282 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3285 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3287 tree tem;
3288 if (fd->collapse > 1)
3289 tem = fd->loop.v;
3290 else
3292 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3293 fd->loops[0].v, fd->loops[0].n1);
3294 tem = fold_convert (fd->iter_type, tem);
3296 tree aref = build4 (ARRAY_REF, fd->iter_type,
3297 counts[fd->ordered], size_zero_node,
3298 NULL_TREE, NULL_TREE);
3299 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3300 true, GSI_SAME_STMT);
3301 expand_omp_build_assign (&gsi, aref, tem);
3304 t = build2 (fd->loop.cond_code, boolean_type_node,
3305 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3306 iend);
3307 gcond *cond_stmt = gimple_build_cond_empty (t);
3308 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3311 /* Remove GIMPLE_OMP_CONTINUE. */
3312 gsi_remove (&gsi, true);
3314 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3315 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3317 /* Emit code to get the next parallel iteration in L2_BB. */
3318 gsi = gsi_start_bb (l2_bb);
3320 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3321 build_fold_addr_expr (istart0),
3322 build_fold_addr_expr (iend0));
3323 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3324 false, GSI_CONTINUE_LINKING);
3325 if (TREE_TYPE (t) != boolean_type_node)
3326 t = fold_build2 (NE_EXPR, boolean_type_node,
3327 t, build_int_cst (TREE_TYPE (t), 0));
3328 gcond *cond_stmt = gimple_build_cond_empty (t);
3329 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3332 /* Add the loop cleanup function. */
3333 gsi = gsi_last_nondebug_bb (exit_bb);
3334 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3335 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3336 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3337 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3338 else
3339 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3340 gcall *call_stmt = gimple_build_call (t, 0);
3341 if (fd->ordered)
3343 tree arr = counts[fd->ordered];
3344 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3345 TREE_THIS_VOLATILE (clobber) = 1;
3346 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3347 GSI_SAME_STMT);
3349 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3351 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3352 if (fd->have_reductemp)
3354 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3355 gimple_call_lhs (call_stmt));
3356 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3359 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3360 gsi_remove (&gsi, true);
3362 /* Connect the new blocks. */
3363 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3364 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3366 if (!broken_loop)
3368 gimple_seq phis;
3370 e = find_edge (cont_bb, l3_bb);
3371 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3373 phis = phi_nodes (l3_bb);
3374 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3376 gimple *phi = gsi_stmt (gsi);
3377 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3378 PHI_ARG_DEF_FROM_EDGE (phi, e));
3380 remove_edge (e);
3382 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3383 e = find_edge (cont_bb, l1_bb);
3384 if (e == NULL)
3386 e = BRANCH_EDGE (cont_bb);
3387 gcc_assert (single_succ (e->dest) == l1_bb);
3389 if (gimple_omp_for_combined_p (fd->for_stmt))
3391 remove_edge (e);
3392 e = NULL;
3394 else if (fd->collapse > 1)
3396 remove_edge (e);
3397 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3399 else
3400 e->flags = EDGE_TRUE_VALUE;
3401 if (e)
3403 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3404 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3406 else
3408 e = find_edge (cont_bb, l2_bb);
3409 e->flags = EDGE_FALLTHRU;
3411 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3413 if (gimple_in_ssa_p (cfun))
3415 /* Add phis to the outer loop that connect to the phis in the inner,
3416 original loop, and move the loop entry value of the inner phi to
3417 the loop entry value of the outer phi. */
3418 gphi_iterator psi;
3419 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3421 location_t locus;
3422 gphi *nphi;
3423 gphi *exit_phi = psi.phi ();
3425 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3426 continue;
3428 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3429 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3431 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3432 edge latch_to_l1 = find_edge (latch, l1_bb);
3433 gphi *inner_phi
3434 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3436 tree t = gimple_phi_result (exit_phi);
3437 tree new_res = copy_ssa_name (t, NULL);
3438 nphi = create_phi_node (new_res, l0_bb);
3440 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3441 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3442 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3443 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3444 add_phi_arg (nphi, t, entry_to_l0, locus);
3446 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3447 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3449 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3453 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3454 recompute_dominator (CDI_DOMINATORS, l2_bb));
3455 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3456 recompute_dominator (CDI_DOMINATORS, l3_bb));
3457 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3458 recompute_dominator (CDI_DOMINATORS, l0_bb));
3459 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3460 recompute_dominator (CDI_DOMINATORS, l1_bb));
3462 /* We enter expand_omp_for_generic with a loop. This original loop may
3463 have its own loop struct, or it may be part of an outer loop struct
3464 (which may be the fake loop). */
3465 struct loop *outer_loop = entry_bb->loop_father;
3466 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3468 add_bb_to_loop (l2_bb, outer_loop);
3470 /* We've added a new loop around the original loop. Allocate the
3471 corresponding loop struct. */
3472 struct loop *new_loop = alloc_loop ();
3473 new_loop->header = l0_bb;
3474 new_loop->latch = l2_bb;
3475 add_loop (new_loop, outer_loop);
3477 /* Allocate a loop structure for the original loop unless we already
3478 had one. */
3479 if (!orig_loop_has_loop_struct
3480 && !gimple_omp_for_combined_p (fd->for_stmt))
3482 struct loop *orig_loop = alloc_loop ();
3483 orig_loop->header = l1_bb;
3484 /* The loop may have multiple latches. */
3485 add_loop (orig_loop, new_loop);
3490 /* A subroutine of expand_omp_for. Generate code for a parallel
3491 loop with static schedule and no specified chunk size. Given
3492 parameters:
3494 for (V = N1; V cond N2; V += STEP) BODY;
3496 where COND is "<" or ">", we generate pseudocode
3498 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3499 if (cond is <)
3500 adj = STEP - 1;
3501 else
3502 adj = STEP + 1;
3503 if ((__typeof (V)) -1 > 0 && cond is >)
3504 n = -(adj + N2 - N1) / -STEP;
3505 else
3506 n = (adj + N2 - N1) / STEP;
3507 q = n / nthreads;
3508 tt = n % nthreads;
3509 if (threadid < tt) goto L3; else goto L4;
3511 tt = 0;
3512 q = q + 1;
3514 s0 = q * threadid + tt;
3515 e0 = s0 + q;
3516 V = s0 * STEP + N1;
3517 if (s0 >= e0) goto L2; else goto L0;
3519 e = e0 * STEP + N1;
3521 BODY;
3522 V += STEP;
3523 if (V cond e) goto L1;
3527 static void
3528 expand_omp_for_static_nochunk (struct omp_region *region,
3529 struct omp_for_data *fd,
3530 gimple *inner_stmt)
3532 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3533 tree type, itype, vmain, vback;
3534 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3535 basic_block body_bb, cont_bb, collapse_bb = NULL;
3536 basic_block fin_bb;
3537 gimple_stmt_iterator gsi, gsip;
3538 edge ep;
3539 bool broken_loop = region->cont == NULL;
3540 tree *counts = NULL;
3541 tree n1, n2, step;
3542 tree reductions = NULL_TREE;
3543 tree cond_var = NULL_TREE;
3545 itype = type = TREE_TYPE (fd->loop.v);
3546 if (POINTER_TYPE_P (type))
3547 itype = signed_type_for (type);
3549 entry_bb = region->entry;
3550 cont_bb = region->cont;
3551 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3552 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3553 gcc_assert (broken_loop
3554 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3555 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3556 body_bb = single_succ (seq_start_bb);
3557 if (!broken_loop)
3559 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3560 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3561 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3563 exit_bb = region->exit;
3565 /* Iteration space partitioning goes in ENTRY_BB. */
3566 gsi = gsi_last_nondebug_bb (entry_bb);
3567 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3568 gsip = gsi;
3569 gsi_prev (&gsip);
3571 if (fd->collapse > 1)
3573 int first_zero_iter = -1, dummy = -1;
3574 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3576 counts = XALLOCAVEC (tree, fd->collapse);
3577 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3578 fin_bb, first_zero_iter,
3579 dummy_bb, dummy, l2_dom_bb);
3580 t = NULL_TREE;
3582 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3583 t = integer_one_node;
3584 else
3585 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3586 fold_convert (type, fd->loop.n1),
3587 fold_convert (type, fd->loop.n2));
3588 if (fd->collapse == 1
3589 && TYPE_UNSIGNED (type)
3590 && (t == NULL_TREE || !integer_onep (t)))
3592 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3593 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3594 true, GSI_SAME_STMT);
3595 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3596 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3597 true, GSI_SAME_STMT);
3598 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3599 NULL_TREE, NULL_TREE);
3600 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3601 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3602 expand_omp_regimplify_p, NULL, NULL)
3603 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3604 expand_omp_regimplify_p, NULL, NULL))
3606 gsi = gsi_for_stmt (cond_stmt);
3607 gimple_regimplify_operands (cond_stmt, &gsi);
3609 ep = split_block (entry_bb, cond_stmt);
3610 ep->flags = EDGE_TRUE_VALUE;
3611 entry_bb = ep->dest;
3612 ep->probability = profile_probability::very_likely ();
3613 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3614 ep->probability = profile_probability::very_unlikely ();
3615 if (gimple_in_ssa_p (cfun))
3617 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3618 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3619 !gsi_end_p (gpi); gsi_next (&gpi))
3621 gphi *phi = gpi.phi ();
3622 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3623 ep, UNKNOWN_LOCATION);
3626 gsi = gsi_last_bb (entry_bb);
3629 if (fd->have_reductemp || fd->lastprivate_conditional)
3631 tree t1 = build_int_cst (long_integer_type_node, 0);
3632 tree t2 = build_int_cst (long_integer_type_node, 1);
3633 tree t3 = build_int_cstu (long_integer_type_node,
3634 (HOST_WIDE_INT_1U << 31) + 1);
3635 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3636 gimple_stmt_iterator gsi2 = gsi_none ();
3637 gimple *g = NULL;
3638 tree mem = null_pointer_node, memv = NULL_TREE;
3639 tree condtemp = NULL_TREE;
3640 if (fd->have_reductemp)
3642 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3643 reductions = OMP_CLAUSE_DECL (c);
3644 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3645 g = SSA_NAME_DEF_STMT (reductions);
3646 reductions = gimple_assign_rhs1 (g);
3647 OMP_CLAUSE_DECL (c) = reductions;
3648 gsi2 = gsi_for_stmt (g);
3650 else
3652 if (gsi_end_p (gsip))
3653 gsi2 = gsi_after_labels (region->entry);
3654 else
3655 gsi2 = gsip;
3656 reductions = null_pointer_node;
3658 if (fd->lastprivate_conditional)
3660 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3661 condtemp = OMP_CLAUSE_DECL (c);
3662 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3663 cond_var = OMP_CLAUSE_DECL (c);
3664 tree type = TREE_TYPE (condtemp);
3665 memv = create_tmp_var (type);
3666 TREE_ADDRESSABLE (memv) = 1;
3667 unsigned HOST_WIDE_INT sz
3668 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3669 sz *= fd->lastprivate_conditional;
3670 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
3671 false);
3672 mem = build_fold_addr_expr (memv);
3674 tree t
3675 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3676 9, t1, t2, t2, t3, t1, null_pointer_node,
3677 null_pointer_node, reductions, mem);
3678 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3679 true, GSI_SAME_STMT);
3680 if (fd->lastprivate_conditional)
3681 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3682 if (fd->have_reductemp)
3684 gsi_remove (&gsi2, true);
3685 release_ssa_name (gimple_assign_lhs (g));
3688 switch (gimple_omp_for_kind (fd->for_stmt))
3690 case GF_OMP_FOR_KIND_FOR:
3691 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3692 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3693 break;
3694 case GF_OMP_FOR_KIND_DISTRIBUTE:
3695 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3696 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3697 break;
3698 default:
3699 gcc_unreachable ();
3701 nthreads = build_call_expr (nthreads, 0);
3702 nthreads = fold_convert (itype, nthreads);
3703 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3704 true, GSI_SAME_STMT);
3705 threadid = build_call_expr (threadid, 0);
3706 threadid = fold_convert (itype, threadid);
3707 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3708 true, GSI_SAME_STMT);
3710 n1 = fd->loop.n1;
3711 n2 = fd->loop.n2;
3712 step = fd->loop.step;
3713 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3715 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3716 OMP_CLAUSE__LOOPTEMP_);
3717 gcc_assert (innerc);
3718 n1 = OMP_CLAUSE_DECL (innerc);
3719 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3720 OMP_CLAUSE__LOOPTEMP_);
3721 gcc_assert (innerc);
3722 n2 = OMP_CLAUSE_DECL (innerc);
3724 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3725 true, NULL_TREE, true, GSI_SAME_STMT);
3726 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3727 true, NULL_TREE, true, GSI_SAME_STMT);
3728 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3729 true, NULL_TREE, true, GSI_SAME_STMT);
3731 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3732 t = fold_build2 (PLUS_EXPR, itype, step, t);
3733 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3734 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3735 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3736 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3737 fold_build1 (NEGATE_EXPR, itype, t),
3738 fold_build1 (NEGATE_EXPR, itype, step));
3739 else
3740 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3741 t = fold_convert (itype, t);
3742 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3744 q = create_tmp_reg (itype, "q");
3745 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3746 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3747 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3749 tt = create_tmp_reg (itype, "tt");
3750 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3751 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3752 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3754 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3755 gcond *cond_stmt = gimple_build_cond_empty (t);
3756 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3758 second_bb = split_block (entry_bb, cond_stmt)->dest;
3759 gsi = gsi_last_nondebug_bb (second_bb);
3760 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3762 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3763 GSI_SAME_STMT);
3764 gassign *assign_stmt
3765 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3766 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3768 third_bb = split_block (second_bb, assign_stmt)->dest;
3769 gsi = gsi_last_nondebug_bb (third_bb);
3770 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3772 t = build2 (MULT_EXPR, itype, q, threadid);
3773 t = build2 (PLUS_EXPR, itype, t, tt);
3774 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3776 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3777 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3779 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3780 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3782 /* Remove the GIMPLE_OMP_FOR statement. */
3783 gsi_remove (&gsi, true);
3785 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3786 gsi = gsi_start_bb (seq_start_bb);
3788 tree startvar = fd->loop.v;
3789 tree endvar = NULL_TREE;
3791 if (gimple_omp_for_combined_p (fd->for_stmt))
3793 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3794 ? gimple_omp_parallel_clauses (inner_stmt)
3795 : gimple_omp_for_clauses (inner_stmt);
3796 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3797 gcc_assert (innerc);
3798 startvar = OMP_CLAUSE_DECL (innerc);
3799 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3800 OMP_CLAUSE__LOOPTEMP_);
3801 gcc_assert (innerc);
3802 endvar = OMP_CLAUSE_DECL (innerc);
3803 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3804 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3806 int i;
3807 for (i = 1; i < fd->collapse; i++)
3809 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3810 OMP_CLAUSE__LOOPTEMP_);
3811 gcc_assert (innerc);
3813 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3814 OMP_CLAUSE__LOOPTEMP_);
3815 if (innerc)
3817 /* If needed (distribute parallel for with lastprivate),
3818 propagate down the total number of iterations. */
3819 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3820 fd->loop.n2);
3821 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3822 GSI_CONTINUE_LINKING);
3823 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3824 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3828 t = fold_convert (itype, s0);
3829 t = fold_build2 (MULT_EXPR, itype, t, step);
3830 if (POINTER_TYPE_P (type))
3832 t = fold_build_pointer_plus (n1, t);
3833 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3834 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3835 t = fold_convert (signed_type_for (type), t);
3837 else
3838 t = fold_build2 (PLUS_EXPR, type, t, n1);
3839 t = fold_convert (TREE_TYPE (startvar), t);
3840 t = force_gimple_operand_gsi (&gsi, t,
3841 DECL_P (startvar)
3842 && TREE_ADDRESSABLE (startvar),
3843 NULL_TREE, false, GSI_CONTINUE_LINKING);
3844 assign_stmt = gimple_build_assign (startvar, t);
3845 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3846 if (cond_var)
3848 tree itype = TREE_TYPE (cond_var);
3849 /* For lastprivate(conditional:) itervar, we need some iteration
3850 counter that starts at unsigned non-zero and increases.
3851 Prefer as few IVs as possible, so if we can use startvar
3852 itself, use that, or startvar + constant (those would be
3853 incremented with step), and as last resort use the s0 + 1
3854 incremented by 1. */
3855 if (POINTER_TYPE_P (type)
3856 || TREE_CODE (n1) != INTEGER_CST
3857 || fd->loop.cond_code != LT_EXPR)
3858 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
3859 build_int_cst (itype, 1));
3860 else if (tree_int_cst_sgn (n1) == 1)
3861 t = fold_convert (itype, t);
3862 else
3864 tree c = fold_convert (itype, n1);
3865 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3866 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3868 t = force_gimple_operand_gsi (&gsi, t, false,
3869 NULL_TREE, false, GSI_CONTINUE_LINKING);
3870 assign_stmt = gimple_build_assign (cond_var, t);
3871 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3874 t = fold_convert (itype, e0);
3875 t = fold_build2 (MULT_EXPR, itype, t, step);
3876 if (POINTER_TYPE_P (type))
3878 t = fold_build_pointer_plus (n1, t);
3879 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3880 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3881 t = fold_convert (signed_type_for (type), t);
3883 else
3884 t = fold_build2 (PLUS_EXPR, type, t, n1);
3885 t = fold_convert (TREE_TYPE (startvar), t);
3886 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3887 false, GSI_CONTINUE_LINKING);
3888 if (endvar)
3890 assign_stmt = gimple_build_assign (endvar, e);
3891 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3892 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3893 assign_stmt = gimple_build_assign (fd->loop.v, e);
3894 else
3895 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3896 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3898 /* Handle linear clause adjustments. */
3899 tree itercnt = NULL_TREE;
3900 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3901 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3902 c; c = OMP_CLAUSE_CHAIN (c))
3903 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3904 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3906 tree d = OMP_CLAUSE_DECL (c);
3907 bool is_ref = omp_is_reference (d);
3908 tree t = d, a, dest;
3909 if (is_ref)
3910 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3911 if (itercnt == NULL_TREE)
3913 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3915 itercnt = fold_build2 (MINUS_EXPR, itype,
3916 fold_convert (itype, n1),
3917 fold_convert (itype, fd->loop.n1));
3918 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3919 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3920 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3921 NULL_TREE, false,
3922 GSI_CONTINUE_LINKING);
3924 else
3925 itercnt = s0;
3927 tree type = TREE_TYPE (t);
3928 if (POINTER_TYPE_P (type))
3929 type = sizetype;
3930 a = fold_build2 (MULT_EXPR, type,
3931 fold_convert (type, itercnt),
3932 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3933 dest = unshare_expr (t);
3934 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3935 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3936 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3937 false, GSI_CONTINUE_LINKING);
3938 assign_stmt = gimple_build_assign (dest, t);
3939 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3941 if (fd->collapse > 1)
3942 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3944 if (!broken_loop)
3946 /* The code controlling the sequential loop replaces the
3947 GIMPLE_OMP_CONTINUE. */
3948 gsi = gsi_last_nondebug_bb (cont_bb);
3949 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3950 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3951 vmain = gimple_omp_continue_control_use (cont_stmt);
3952 vback = gimple_omp_continue_control_def (cont_stmt);
3954 if (!gimple_omp_for_combined_p (fd->for_stmt))
3956 if (POINTER_TYPE_P (type))
3957 t = fold_build_pointer_plus (vmain, step);
3958 else
3959 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3960 t = force_gimple_operand_gsi (&gsi, t,
3961 DECL_P (vback)
3962 && TREE_ADDRESSABLE (vback),
3963 NULL_TREE, true, GSI_SAME_STMT);
3964 assign_stmt = gimple_build_assign (vback, t);
3965 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3967 if (cond_var)
3969 tree itype = TREE_TYPE (cond_var);
3970 tree t2;
3971 if (POINTER_TYPE_P (type)
3972 || TREE_CODE (n1) != INTEGER_CST
3973 || fd->loop.cond_code != LT_EXPR)
3974 t2 = build_int_cst (itype, 1);
3975 else
3976 t2 = fold_convert (itype, step);
3977 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3978 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3979 NULL_TREE, true, GSI_SAME_STMT);
3980 assign_stmt = gimple_build_assign (cond_var, t2);
3981 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3984 t = build2 (fd->loop.cond_code, boolean_type_node,
3985 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3986 ? t : vback, e);
3987 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3990 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3991 gsi_remove (&gsi, true);
3993 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3994 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3997 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3998 gsi = gsi_last_nondebug_bb (exit_bb);
3999 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4001 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4002 if (fd->have_reductemp || fd->lastprivate_conditional)
4004 tree fn;
4005 if (t)
4006 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4007 else
4008 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4009 gcall *g = gimple_build_call (fn, 0);
4010 if (t)
4012 gimple_call_set_lhs (g, t);
4013 if (fd->have_reductemp)
4014 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4015 NOP_EXPR, t),
4016 GSI_SAME_STMT);
4018 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4020 else
4021 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4023 gsi_remove (&gsi, true);
4025 /* Connect all the blocks. */
4026 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4027 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4028 ep = find_edge (entry_bb, second_bb);
4029 ep->flags = EDGE_TRUE_VALUE;
4030 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4031 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4032 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4034 if (!broken_loop)
4036 ep = find_edge (cont_bb, body_bb);
4037 if (ep == NULL)
4039 ep = BRANCH_EDGE (cont_bb);
4040 gcc_assert (single_succ (ep->dest) == body_bb);
4042 if (gimple_omp_for_combined_p (fd->for_stmt))
4044 remove_edge (ep);
4045 ep = NULL;
4047 else if (fd->collapse > 1)
4049 remove_edge (ep);
4050 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4052 else
4053 ep->flags = EDGE_TRUE_VALUE;
4054 find_edge (cont_bb, fin_bb)->flags
4055 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4058 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4059 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4060 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
4062 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4063 recompute_dominator (CDI_DOMINATORS, body_bb));
4064 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4065 recompute_dominator (CDI_DOMINATORS, fin_bb));
4067 struct loop *loop = body_bb->loop_father;
4068 if (loop != entry_bb->loop_father)
4070 gcc_assert (broken_loop || loop->header == body_bb);
4071 gcc_assert (broken_loop
4072 || loop->latch == region->cont
4073 || single_pred (loop->latch) == region->cont);
4074 return;
4077 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4079 loop = alloc_loop ();
4080 loop->header = body_bb;
4081 if (collapse_bb == NULL)
4082 loop->latch = cont_bb;
4083 add_loop (loop, body_bb->loop_father);
4087 /* Return phi in E->DEST with ARG on edge E. */
4089 static gphi *
4090 find_phi_with_arg_on_edge (tree arg, edge e)
4092 basic_block bb = e->dest;
4094 for (gphi_iterator gpi = gsi_start_phis (bb);
4095 !gsi_end_p (gpi);
4096 gsi_next (&gpi))
4098 gphi *phi = gpi.phi ();
4099 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4100 return phi;
4103 return NULL;
4106 /* A subroutine of expand_omp_for. Generate code for a parallel
4107 loop with static schedule and a specified chunk size. Given
4108 parameters:
4110 for (V = N1; V cond N2; V += STEP) BODY;
4112 where COND is "<" or ">", we generate pseudocode
4114 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4115 if (cond is <)
4116 adj = STEP - 1;
4117 else
4118 adj = STEP + 1;
4119 if ((__typeof (V)) -1 > 0 && cond is >)
4120 n = -(adj + N2 - N1) / -STEP;
4121 else
4122 n = (adj + N2 - N1) / STEP;
4123 trip = 0;
4124 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4125 here so that V is defined
4126 if the loop is not entered
4128 s0 = (trip * nthreads + threadid) * CHUNK;
4129 e0 = min (s0 + CHUNK, n);
4130 if (s0 < n) goto L1; else goto L4;
4132 V = s0 * STEP + N1;
4133 e = e0 * STEP + N1;
4135 BODY;
4136 V += STEP;
4137 if (V cond e) goto L2; else goto L3;
4139 trip += 1;
4140 goto L0;
4144 static void
4145 expand_omp_for_static_chunk (struct omp_region *region,
4146 struct omp_for_data *fd, gimple *inner_stmt)
4148 tree n, s0, e0, e, t;
4149 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4150 tree type, itype, vmain, vback, vextra;
4151 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4152 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4153 gimple_stmt_iterator gsi, gsip;
4154 edge se;
4155 bool broken_loop = region->cont == NULL;
4156 tree *counts = NULL;
4157 tree n1, n2, step;
4158 tree reductions = NULL_TREE;
4159 tree cond_var = NULL_TREE;
4161 itype = type = TREE_TYPE (fd->loop.v);
4162 if (POINTER_TYPE_P (type))
4163 itype = signed_type_for (type);
4165 entry_bb = region->entry;
4166 se = split_block (entry_bb, last_stmt (entry_bb));
4167 entry_bb = se->src;
4168 iter_part_bb = se->dest;
4169 cont_bb = region->cont;
4170 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4171 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4172 gcc_assert (broken_loop
4173 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4174 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4175 body_bb = single_succ (seq_start_bb);
4176 if (!broken_loop)
4178 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4179 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4180 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4181 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4183 exit_bb = region->exit;
4185 /* Trip and adjustment setup goes in ENTRY_BB. */
4186 gsi = gsi_last_nondebug_bb (entry_bb);
4187 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4188 gsip = gsi;
4189 gsi_prev (&gsip);
4191 if (fd->collapse > 1)
4193 int first_zero_iter = -1, dummy = -1;
4194 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4196 counts = XALLOCAVEC (tree, fd->collapse);
4197 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4198 fin_bb, first_zero_iter,
4199 dummy_bb, dummy, l2_dom_bb);
4200 t = NULL_TREE;
4202 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4203 t = integer_one_node;
4204 else
4205 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4206 fold_convert (type, fd->loop.n1),
4207 fold_convert (type, fd->loop.n2));
4208 if (fd->collapse == 1
4209 && TYPE_UNSIGNED (type)
4210 && (t == NULL_TREE || !integer_onep (t)))
4212 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4213 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4214 true, GSI_SAME_STMT);
4215 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4216 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4217 true, GSI_SAME_STMT);
4218 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4219 NULL_TREE, NULL_TREE);
4220 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4221 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4222 expand_omp_regimplify_p, NULL, NULL)
4223 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4224 expand_omp_regimplify_p, NULL, NULL))
4226 gsi = gsi_for_stmt (cond_stmt);
4227 gimple_regimplify_operands (cond_stmt, &gsi);
4229 se = split_block (entry_bb, cond_stmt);
4230 se->flags = EDGE_TRUE_VALUE;
4231 entry_bb = se->dest;
4232 se->probability = profile_probability::very_likely ();
4233 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4234 se->probability = profile_probability::very_unlikely ();
4235 if (gimple_in_ssa_p (cfun))
4237 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4238 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4239 !gsi_end_p (gpi); gsi_next (&gpi))
4241 gphi *phi = gpi.phi ();
4242 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4243 se, UNKNOWN_LOCATION);
4246 gsi = gsi_last_bb (entry_bb);
4249 if (fd->have_reductemp || fd->lastprivate_conditional)
4251 tree t1 = build_int_cst (long_integer_type_node, 0);
4252 tree t2 = build_int_cst (long_integer_type_node, 1);
4253 tree t3 = build_int_cstu (long_integer_type_node,
4254 (HOST_WIDE_INT_1U << 31) + 1);
4255 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4256 gimple_stmt_iterator gsi2 = gsi_none ();
4257 gimple *g = NULL;
4258 tree mem = null_pointer_node, memv = NULL_TREE;
4259 tree condtemp = NULL_TREE;
4260 if (fd->have_reductemp)
4262 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4263 reductions = OMP_CLAUSE_DECL (c);
4264 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4265 g = SSA_NAME_DEF_STMT (reductions);
4266 reductions = gimple_assign_rhs1 (g);
4267 OMP_CLAUSE_DECL (c) = reductions;
4268 gsi2 = gsi_for_stmt (g);
4270 else
4272 if (gsi_end_p (gsip))
4273 gsi2 = gsi_after_labels (region->entry);
4274 else
4275 gsi2 = gsip;
4276 reductions = null_pointer_node;
4278 if (fd->lastprivate_conditional)
4280 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4281 condtemp = OMP_CLAUSE_DECL (c);
4282 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4283 cond_var = OMP_CLAUSE_DECL (c);
4284 tree type = TREE_TYPE (condtemp);
4285 memv = create_tmp_var (type);
4286 TREE_ADDRESSABLE (memv) = 1;
4287 unsigned HOST_WIDE_INT sz
4288 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4289 sz *= fd->lastprivate_conditional;
4290 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4291 false);
4292 mem = build_fold_addr_expr (memv);
4294 tree t
4295 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4296 9, t1, t2, t2, t3, t1, null_pointer_node,
4297 null_pointer_node, reductions, mem);
4298 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4299 true, GSI_SAME_STMT);
4300 if (fd->lastprivate_conditional)
4301 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4302 if (fd->have_reductemp)
4304 gsi_remove (&gsi2, true);
4305 release_ssa_name (gimple_assign_lhs (g));
4308 switch (gimple_omp_for_kind (fd->for_stmt))
4310 case GF_OMP_FOR_KIND_FOR:
4311 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4312 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4313 break;
4314 case GF_OMP_FOR_KIND_DISTRIBUTE:
4315 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4316 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4317 break;
4318 default:
4319 gcc_unreachable ();
4321 nthreads = build_call_expr (nthreads, 0);
4322 nthreads = fold_convert (itype, nthreads);
4323 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4324 true, GSI_SAME_STMT);
4325 threadid = build_call_expr (threadid, 0);
4326 threadid = fold_convert (itype, threadid);
4327 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4328 true, GSI_SAME_STMT);
4330 n1 = fd->loop.n1;
4331 n2 = fd->loop.n2;
4332 step = fd->loop.step;
4333 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4335 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4336 OMP_CLAUSE__LOOPTEMP_);
4337 gcc_assert (innerc);
4338 n1 = OMP_CLAUSE_DECL (innerc);
4339 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4340 OMP_CLAUSE__LOOPTEMP_);
4341 gcc_assert (innerc);
4342 n2 = OMP_CLAUSE_DECL (innerc);
4344 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4345 true, NULL_TREE, true, GSI_SAME_STMT);
4346 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4347 true, NULL_TREE, true, GSI_SAME_STMT);
4348 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4349 true, NULL_TREE, true, GSI_SAME_STMT);
4350 tree chunk_size = fold_convert (itype, fd->chunk_size);
4351 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4352 chunk_size
4353 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4354 GSI_SAME_STMT);
4356 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4357 t = fold_build2 (PLUS_EXPR, itype, step, t);
4358 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4359 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4360 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4361 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4362 fold_build1 (NEGATE_EXPR, itype, t),
4363 fold_build1 (NEGATE_EXPR, itype, step));
4364 else
4365 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4366 t = fold_convert (itype, t);
4367 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4368 true, GSI_SAME_STMT);
4370 trip_var = create_tmp_reg (itype, ".trip");
4371 if (gimple_in_ssa_p (cfun))
4373 trip_init = make_ssa_name (trip_var);
4374 trip_main = make_ssa_name (trip_var);
4375 trip_back = make_ssa_name (trip_var);
4377 else
4379 trip_init = trip_var;
4380 trip_main = trip_var;
4381 trip_back = trip_var;
4384 gassign *assign_stmt
4385 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4386 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4388 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4389 t = fold_build2 (MULT_EXPR, itype, t, step);
4390 if (POINTER_TYPE_P (type))
4391 t = fold_build_pointer_plus (n1, t);
4392 else
4393 t = fold_build2 (PLUS_EXPR, type, t, n1);
4394 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4395 true, GSI_SAME_STMT);
4397 /* Remove the GIMPLE_OMP_FOR. */
4398 gsi_remove (&gsi, true);
4400 gimple_stmt_iterator gsif = gsi;
4402 /* Iteration space partitioning goes in ITER_PART_BB. */
4403 gsi = gsi_last_bb (iter_part_bb);
4405 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4406 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4407 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4408 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4409 false, GSI_CONTINUE_LINKING);
4411 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4412 t = fold_build2 (MIN_EXPR, itype, t, n);
4413 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4414 false, GSI_CONTINUE_LINKING);
4416 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4417 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4419 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4420 gsi = gsi_start_bb (seq_start_bb);
4422 tree startvar = fd->loop.v;
4423 tree endvar = NULL_TREE;
4425 if (gimple_omp_for_combined_p (fd->for_stmt))
4427 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4428 ? gimple_omp_parallel_clauses (inner_stmt)
4429 : gimple_omp_for_clauses (inner_stmt);
4430 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4431 gcc_assert (innerc);
4432 startvar = OMP_CLAUSE_DECL (innerc);
4433 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4434 OMP_CLAUSE__LOOPTEMP_);
4435 gcc_assert (innerc);
4436 endvar = OMP_CLAUSE_DECL (innerc);
4437 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4438 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4440 int i;
4441 for (i = 1; i < fd->collapse; i++)
4443 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4444 OMP_CLAUSE__LOOPTEMP_);
4445 gcc_assert (innerc);
4447 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4448 OMP_CLAUSE__LOOPTEMP_);
4449 if (innerc)
4451 /* If needed (distribute parallel for with lastprivate),
4452 propagate down the total number of iterations. */
4453 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4454 fd->loop.n2);
4455 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4456 GSI_CONTINUE_LINKING);
4457 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4458 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4463 t = fold_convert (itype, s0);
4464 t = fold_build2 (MULT_EXPR, itype, t, step);
4465 if (POINTER_TYPE_P (type))
4467 t = fold_build_pointer_plus (n1, t);
4468 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4469 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4470 t = fold_convert (signed_type_for (type), t);
4472 else
4473 t = fold_build2 (PLUS_EXPR, type, t, n1);
4474 t = fold_convert (TREE_TYPE (startvar), t);
4475 t = force_gimple_operand_gsi (&gsi, t,
4476 DECL_P (startvar)
4477 && TREE_ADDRESSABLE (startvar),
4478 NULL_TREE, false, GSI_CONTINUE_LINKING);
4479 assign_stmt = gimple_build_assign (startvar, t);
4480 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4481 if (cond_var)
4483 tree itype = TREE_TYPE (cond_var);
4484 /* For lastprivate(conditional:) itervar, we need some iteration
4485 counter that starts at unsigned non-zero and increases.
4486 Prefer as few IVs as possible, so if we can use startvar
4487 itself, use that, or startvar + constant (those would be
4488 incremented with step), and as last resort use the s0 + 1
4489 incremented by 1. */
4490 if (POINTER_TYPE_P (type)
4491 || TREE_CODE (n1) != INTEGER_CST
4492 || fd->loop.cond_code != LT_EXPR)
4493 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4494 build_int_cst (itype, 1));
4495 else if (tree_int_cst_sgn (n1) == 1)
4496 t = fold_convert (itype, t);
4497 else
4499 tree c = fold_convert (itype, n1);
4500 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4501 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4503 t = force_gimple_operand_gsi (&gsi, t, false,
4504 NULL_TREE, false, GSI_CONTINUE_LINKING);
4505 assign_stmt = gimple_build_assign (cond_var, t);
4506 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4509 t = fold_convert (itype, e0);
4510 t = fold_build2 (MULT_EXPR, itype, t, step);
4511 if (POINTER_TYPE_P (type))
4513 t = fold_build_pointer_plus (n1, t);
4514 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4515 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4516 t = fold_convert (signed_type_for (type), t);
4518 else
4519 t = fold_build2 (PLUS_EXPR, type, t, n1);
4520 t = fold_convert (TREE_TYPE (startvar), t);
4521 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4522 false, GSI_CONTINUE_LINKING);
4523 if (endvar)
4525 assign_stmt = gimple_build_assign (endvar, e);
4526 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4527 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4528 assign_stmt = gimple_build_assign (fd->loop.v, e);
4529 else
4530 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4531 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4533 /* Handle linear clause adjustments. */
4534 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4535 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4536 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4537 c; c = OMP_CLAUSE_CHAIN (c))
4538 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4539 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4541 tree d = OMP_CLAUSE_DECL (c);
4542 bool is_ref = omp_is_reference (d);
4543 tree t = d, a, dest;
4544 if (is_ref)
4545 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4546 tree type = TREE_TYPE (t);
4547 if (POINTER_TYPE_P (type))
4548 type = sizetype;
4549 dest = unshare_expr (t);
4550 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4551 expand_omp_build_assign (&gsif, v, t);
4552 if (itercnt == NULL_TREE)
4554 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4556 itercntbias
4557 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4558 fold_convert (itype, fd->loop.n1));
4559 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4560 itercntbias, step);
4561 itercntbias
4562 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4563 NULL_TREE, true,
4564 GSI_SAME_STMT);
4565 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4566 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4567 NULL_TREE, false,
4568 GSI_CONTINUE_LINKING);
4570 else
4571 itercnt = s0;
4573 a = fold_build2 (MULT_EXPR, type,
4574 fold_convert (type, itercnt),
4575 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4576 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4577 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4578 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4579 false, GSI_CONTINUE_LINKING);
4580 assign_stmt = gimple_build_assign (dest, t);
4581 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4583 if (fd->collapse > 1)
4584 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4586 if (!broken_loop)
4588 /* The code controlling the sequential loop goes in CONT_BB,
4589 replacing the GIMPLE_OMP_CONTINUE. */
4590 gsi = gsi_last_nondebug_bb (cont_bb);
4591 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4592 vmain = gimple_omp_continue_control_use (cont_stmt);
4593 vback = gimple_omp_continue_control_def (cont_stmt);
4595 if (!gimple_omp_for_combined_p (fd->for_stmt))
4597 if (POINTER_TYPE_P (type))
4598 t = fold_build_pointer_plus (vmain, step);
4599 else
4600 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4601 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4602 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4603 true, GSI_SAME_STMT);
4604 assign_stmt = gimple_build_assign (vback, t);
4605 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4607 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4608 t = build2 (EQ_EXPR, boolean_type_node,
4609 build_int_cst (itype, 0),
4610 build_int_cst (itype, 1));
4611 else
4612 t = build2 (fd->loop.cond_code, boolean_type_node,
4613 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4614 ? t : vback, e);
4615 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4618 /* Remove GIMPLE_OMP_CONTINUE. */
4619 gsi_remove (&gsi, true);
4621 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4622 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4624 /* Trip update code goes into TRIP_UPDATE_BB. */
4625 gsi = gsi_start_bb (trip_update_bb);
4627 t = build_int_cst (itype, 1);
4628 t = build2 (PLUS_EXPR, itype, trip_main, t);
4629 assign_stmt = gimple_build_assign (trip_back, t);
4630 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4633 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4634 gsi = gsi_last_nondebug_bb (exit_bb);
4635 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4637 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4638 if (fd->have_reductemp || fd->lastprivate_conditional)
4640 tree fn;
4641 if (t)
4642 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4643 else
4644 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4645 gcall *g = gimple_build_call (fn, 0);
4646 if (t)
4648 gimple_call_set_lhs (g, t);
4649 if (fd->have_reductemp)
4650 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4651 NOP_EXPR, t),
4652 GSI_SAME_STMT);
4654 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4656 else
4657 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4659 gsi_remove (&gsi, true);
4661 /* Connect the new blocks. */
4662 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4663 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4665 if (!broken_loop)
4667 se = find_edge (cont_bb, body_bb);
4668 if (se == NULL)
4670 se = BRANCH_EDGE (cont_bb);
4671 gcc_assert (single_succ (se->dest) == body_bb);
4673 if (gimple_omp_for_combined_p (fd->for_stmt))
4675 remove_edge (se);
4676 se = NULL;
4678 else if (fd->collapse > 1)
4680 remove_edge (se);
4681 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4683 else
4684 se->flags = EDGE_TRUE_VALUE;
4685 find_edge (cont_bb, trip_update_bb)->flags
4686 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4688 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4689 iter_part_bb);
4692 if (gimple_in_ssa_p (cfun))
4694 gphi_iterator psi;
4695 gphi *phi;
4696 edge re, ene;
4697 edge_var_map *vm;
4698 size_t i;
4700 gcc_assert (fd->collapse == 1 && !broken_loop);
4702 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4703 remove arguments of the phi nodes in fin_bb. We need to create
4704 appropriate phi nodes in iter_part_bb instead. */
4705 se = find_edge (iter_part_bb, fin_bb);
4706 re = single_succ_edge (trip_update_bb);
4707 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4708 ene = single_succ_edge (entry_bb);
4710 psi = gsi_start_phis (fin_bb);
4711 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4712 gsi_next (&psi), ++i)
4714 gphi *nphi;
4715 location_t locus;
4717 phi = psi.phi ();
4718 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4719 redirect_edge_var_map_def (vm), 0))
4720 continue;
4722 t = gimple_phi_result (phi);
4723 gcc_assert (t == redirect_edge_var_map_result (vm));
4725 if (!single_pred_p (fin_bb))
4726 t = copy_ssa_name (t, phi);
4728 nphi = create_phi_node (t, iter_part_bb);
4730 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4731 locus = gimple_phi_arg_location_from_edge (phi, se);
4733 /* A special case -- fd->loop.v is not yet computed in
4734 iter_part_bb, we need to use vextra instead. */
4735 if (t == fd->loop.v)
4736 t = vextra;
4737 add_phi_arg (nphi, t, ene, locus);
4738 locus = redirect_edge_var_map_location (vm);
4739 tree back_arg = redirect_edge_var_map_def (vm);
4740 add_phi_arg (nphi, back_arg, re, locus);
4741 edge ce = find_edge (cont_bb, body_bb);
4742 if (ce == NULL)
4744 ce = BRANCH_EDGE (cont_bb);
4745 gcc_assert (single_succ (ce->dest) == body_bb);
4746 ce = single_succ_edge (ce->dest);
4748 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4749 gcc_assert (inner_loop_phi != NULL);
4750 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4751 find_edge (seq_start_bb, body_bb), locus);
4753 if (!single_pred_p (fin_bb))
4754 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4756 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4757 redirect_edge_var_map_clear (re);
4758 if (single_pred_p (fin_bb))
4759 while (1)
4761 psi = gsi_start_phis (fin_bb);
4762 if (gsi_end_p (psi))
4763 break;
4764 remove_phi_node (&psi, false);
4767 /* Make phi node for trip. */
4768 phi = create_phi_node (trip_main, iter_part_bb);
4769 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4770 UNKNOWN_LOCATION);
4771 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4772 UNKNOWN_LOCATION);
4775 if (!broken_loop)
4776 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4777 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4778 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4779 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4780 recompute_dominator (CDI_DOMINATORS, fin_bb));
4781 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4782 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4783 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4784 recompute_dominator (CDI_DOMINATORS, body_bb));
4786 if (!broken_loop)
4788 struct loop *loop = body_bb->loop_father;
4789 struct loop *trip_loop = alloc_loop ();
4790 trip_loop->header = iter_part_bb;
4791 trip_loop->latch = trip_update_bb;
4792 add_loop (trip_loop, iter_part_bb->loop_father);
4794 if (loop != entry_bb->loop_father)
4796 gcc_assert (loop->header == body_bb);
4797 gcc_assert (loop->latch == region->cont
4798 || single_pred (loop->latch) == region->cont);
4799 trip_loop->inner = loop;
4800 return;
4803 if (!gimple_omp_for_combined_p (fd->for_stmt))
4805 loop = alloc_loop ();
4806 loop->header = body_bb;
4807 if (collapse_bb == NULL)
4808 loop->latch = cont_bb;
4809 add_loop (loop, trip_loop);
4814 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4815 loop. Given parameters:
4817 for (V = N1; V cond N2; V += STEP) BODY;
4819 where COND is "<" or ">", we generate pseudocode
4821 V = N1;
4822 goto L1;
4824 BODY;
4825 V += STEP;
4827 if (V cond N2) goto L0; else goto L2;
4830 For collapsed loops, given parameters:
4831 collapse(3)
4832 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4833 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4834 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4835 BODY;
4837 we generate pseudocode
4839 if (cond3 is <)
4840 adj = STEP3 - 1;
4841 else
4842 adj = STEP3 + 1;
4843 count3 = (adj + N32 - N31) / STEP3;
4844 if (cond2 is <)
4845 adj = STEP2 - 1;
4846 else
4847 adj = STEP2 + 1;
4848 count2 = (adj + N22 - N21) / STEP2;
4849 if (cond1 is <)
4850 adj = STEP1 - 1;
4851 else
4852 adj = STEP1 + 1;
4853 count1 = (adj + N12 - N11) / STEP1;
4854 count = count1 * count2 * count3;
4855 V = 0;
4856 V1 = N11;
4857 V2 = N21;
4858 V3 = N31;
4859 goto L1;
4861 BODY;
4862 V += 1;
4863 V3 += STEP3;
4864 V2 += (V3 cond3 N32) ? 0 : STEP2;
4865 V3 = (V3 cond3 N32) ? V3 : N31;
4866 V1 += (V2 cond2 N22) ? 0 : STEP1;
4867 V2 = (V2 cond2 N22) ? V2 : N21;
4869 if (V < count) goto L0; else goto L2;
4874 static void
4875 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4877 tree type, t;
4878 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4879 gimple_stmt_iterator gsi;
4880 gimple *stmt;
4881 gcond *cond_stmt;
4882 bool broken_loop = region->cont == NULL;
4883 edge e, ne;
4884 tree *counts = NULL;
4885 int i;
4886 int safelen_int = INT_MAX;
4887 bool dont_vectorize = false;
4888 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4889 OMP_CLAUSE_SAFELEN);
4890 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4891 OMP_CLAUSE__SIMDUID_);
4892 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4893 OMP_CLAUSE_IF);
4894 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4895 OMP_CLAUSE_SIMDLEN);
4896 tree n1, n2;
4898 if (safelen)
4900 poly_uint64 val;
4901 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4902 if (!poly_int_tree_p (safelen, &val))
4903 safelen_int = 0;
4904 else
4905 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4906 if (safelen_int == 1)
4907 safelen_int = 0;
4909 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
4910 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
4912 safelen_int = 0;
4913 dont_vectorize = true;
4915 type = TREE_TYPE (fd->loop.v);
4916 entry_bb = region->entry;
4917 cont_bb = region->cont;
4918 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4919 gcc_assert (broken_loop
4920 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4921 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4922 if (!broken_loop)
4924 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4925 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4926 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4927 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4929 else
4931 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4932 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4933 l2_bb = single_succ (l1_bb);
4935 exit_bb = region->exit;
4936 l2_dom_bb = NULL;
4938 gsi = gsi_last_nondebug_bb (entry_bb);
4940 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4941 /* Not needed in SSA form right now. */
4942 gcc_assert (!gimple_in_ssa_p (cfun));
4943 if (fd->collapse > 1)
4945 int first_zero_iter = -1, dummy = -1;
4946 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4948 counts = XALLOCAVEC (tree, fd->collapse);
4949 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4950 zero_iter_bb, first_zero_iter,
4951 dummy_bb, dummy, l2_dom_bb);
4953 if (l2_dom_bb == NULL)
4954 l2_dom_bb = l1_bb;
4956 n1 = fd->loop.n1;
4957 n2 = fd->loop.n2;
4958 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4960 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4961 OMP_CLAUSE__LOOPTEMP_);
4962 gcc_assert (innerc);
4963 n1 = OMP_CLAUSE_DECL (innerc);
4964 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4965 OMP_CLAUSE__LOOPTEMP_);
4966 gcc_assert (innerc);
4967 n2 = OMP_CLAUSE_DECL (innerc);
4969 tree step = fd->loop.step;
4971 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4972 OMP_CLAUSE__SIMT_);
4973 if (is_simt)
4975 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4976 is_simt = safelen_int > 1;
4978 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4979 if (is_simt)
4981 simt_lane = create_tmp_var (unsigned_type_node);
4982 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4983 gimple_call_set_lhs (g, simt_lane);
4984 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4985 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4986 fold_convert (TREE_TYPE (step), simt_lane));
4987 n1 = fold_convert (type, n1);
4988 if (POINTER_TYPE_P (type))
4989 n1 = fold_build_pointer_plus (n1, offset);
4990 else
4991 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4993 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4994 if (fd->collapse > 1)
4995 simt_maxlane = build_one_cst (unsigned_type_node);
4996 else if (safelen_int < omp_max_simt_vf ())
4997 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4998 tree vf
4999 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5000 unsigned_type_node, 0);
5001 if (simt_maxlane)
5002 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5003 vf = fold_convert (TREE_TYPE (step), vf);
5004 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5007 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5008 if (fd->collapse > 1)
5010 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5012 gsi_prev (&gsi);
5013 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5014 gsi_next (&gsi);
5016 else
5017 for (i = 0; i < fd->collapse; i++)
5019 tree itype = TREE_TYPE (fd->loops[i].v);
5020 if (POINTER_TYPE_P (itype))
5021 itype = signed_type_for (itype);
5022 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5023 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5027 /* Remove the GIMPLE_OMP_FOR statement. */
5028 gsi_remove (&gsi, true);
5030 if (!broken_loop)
5032 /* Code to control the increment goes in the CONT_BB. */
5033 gsi = gsi_last_nondebug_bb (cont_bb);
5034 stmt = gsi_stmt (gsi);
5035 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5037 if (POINTER_TYPE_P (type))
5038 t = fold_build_pointer_plus (fd->loop.v, step);
5039 else
5040 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5041 expand_omp_build_assign (&gsi, fd->loop.v, t);
5043 if (fd->collapse > 1)
5045 i = fd->collapse - 1;
5046 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5048 t = fold_convert (sizetype, fd->loops[i].step);
5049 t = fold_build_pointer_plus (fd->loops[i].v, t);
5051 else
5053 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5054 fd->loops[i].step);
5055 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5056 fd->loops[i].v, t);
5058 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5060 for (i = fd->collapse - 1; i > 0; i--)
5062 tree itype = TREE_TYPE (fd->loops[i].v);
5063 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5064 if (POINTER_TYPE_P (itype2))
5065 itype2 = signed_type_for (itype2);
5066 t = fold_convert (itype2, fd->loops[i - 1].step);
5067 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5068 GSI_SAME_STMT);
5069 t = build3 (COND_EXPR, itype2,
5070 build2 (fd->loops[i].cond_code, boolean_type_node,
5071 fd->loops[i].v,
5072 fold_convert (itype, fd->loops[i].n2)),
5073 build_int_cst (itype2, 0), t);
5074 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5075 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5076 else
5077 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5078 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5080 t = fold_convert (itype, fd->loops[i].n1);
5081 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5082 GSI_SAME_STMT);
5083 t = build3 (COND_EXPR, itype,
5084 build2 (fd->loops[i].cond_code, boolean_type_node,
5085 fd->loops[i].v,
5086 fold_convert (itype, fd->loops[i].n2)),
5087 fd->loops[i].v, t);
5088 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5092 /* Remove GIMPLE_OMP_CONTINUE. */
5093 gsi_remove (&gsi, true);
5096 /* Emit the condition in L1_BB. */
5097 gsi = gsi_start_bb (l1_bb);
5099 t = fold_convert (type, n2);
5100 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5101 false, GSI_CONTINUE_LINKING);
5102 tree v = fd->loop.v;
5103 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5104 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5105 false, GSI_CONTINUE_LINKING);
5106 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5107 cond_stmt = gimple_build_cond_empty (t);
5108 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5109 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5110 NULL, NULL)
5111 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5112 NULL, NULL))
5114 gsi = gsi_for_stmt (cond_stmt);
5115 gimple_regimplify_operands (cond_stmt, &gsi);
5118 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5119 if (is_simt)
5121 gsi = gsi_start_bb (l2_bb);
5122 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5123 if (POINTER_TYPE_P (type))
5124 t = fold_build_pointer_plus (fd->loop.v, step);
5125 else
5126 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5127 expand_omp_build_assign (&gsi, fd->loop.v, t);
5130 /* Remove GIMPLE_OMP_RETURN. */
5131 gsi = gsi_last_nondebug_bb (exit_bb);
5132 gsi_remove (&gsi, true);
5134 /* Connect the new blocks. */
5135 remove_edge (FALLTHRU_EDGE (entry_bb));
5137 if (!broken_loop)
5139 remove_edge (BRANCH_EDGE (entry_bb));
5140 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5142 e = BRANCH_EDGE (l1_bb);
5143 ne = FALLTHRU_EDGE (l1_bb);
5144 e->flags = EDGE_TRUE_VALUE;
5146 else
5148 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5150 ne = single_succ_edge (l1_bb);
5151 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5154 ne->flags = EDGE_FALSE_VALUE;
5155 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5156 ne->probability = e->probability.invert ();
5158 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5159 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5161 if (simt_maxlane)
5163 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5164 NULL_TREE, NULL_TREE);
5165 gsi = gsi_last_bb (entry_bb);
5166 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5167 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5168 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5169 FALLTHRU_EDGE (entry_bb)->probability
5170 = profile_probability::guessed_always ().apply_scale (7, 8);
5171 BRANCH_EDGE (entry_bb)->probability
5172 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5173 l2_dom_bb = entry_bb;
5175 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5177 if (!broken_loop)
5179 struct loop *loop = alloc_loop ();
5180 loop->header = l1_bb;
5181 loop->latch = cont_bb;
5182 add_loop (loop, l1_bb->loop_father);
5183 loop->safelen = safelen_int;
5184 if (simduid)
5186 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5187 cfun->has_simduid_loops = true;
5189 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5190 the loop. */
5191 if ((flag_tree_loop_vectorize
5192 || !global_options_set.x_flag_tree_loop_vectorize)
5193 && flag_tree_loop_optimize
5194 && loop->safelen > 1)
5196 loop->force_vectorize = true;
5197 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5199 unsigned HOST_WIDE_INT v
5200 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5201 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5202 loop->simdlen = v;
5204 cfun->has_force_vectorize_loops = true;
5206 else if (dont_vectorize)
5207 loop->dont_vectorize = true;
5209 else if (simduid)
5210 cfun->has_simduid_loops = true;
5213 /* Taskloop construct is represented after gimplification with
5214 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5215 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5216 which should just compute all the needed loop temporaries
5217 for GIMPLE_OMP_TASK. */
5219 static void
5220 expand_omp_taskloop_for_outer (struct omp_region *region,
5221 struct omp_for_data *fd,
5222 gimple *inner_stmt)
5224 tree type, bias = NULL_TREE;
5225 basic_block entry_bb, cont_bb, exit_bb;
5226 gimple_stmt_iterator gsi;
5227 gassign *assign_stmt;
5228 tree *counts = NULL;
5229 int i;
5231 gcc_assert (inner_stmt);
5232 gcc_assert (region->cont);
5233 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5234 && gimple_omp_task_taskloop_p (inner_stmt));
5235 type = TREE_TYPE (fd->loop.v);
5237 /* See if we need to bias by LLONG_MIN. */
5238 if (fd->iter_type == long_long_unsigned_type_node
5239 && TREE_CODE (type) == INTEGER_TYPE
5240 && !TYPE_UNSIGNED (type))
5242 tree n1, n2;
5244 if (fd->loop.cond_code == LT_EXPR)
5246 n1 = fd->loop.n1;
5247 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5249 else
5251 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5252 n2 = fd->loop.n1;
5254 if (TREE_CODE (n1) != INTEGER_CST
5255 || TREE_CODE (n2) != INTEGER_CST
5256 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5257 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5260 entry_bb = region->entry;
5261 cont_bb = region->cont;
5262 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5263 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5264 exit_bb = region->exit;
5266 gsi = gsi_last_nondebug_bb (entry_bb);
5267 gimple *for_stmt = gsi_stmt (gsi);
5268 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5269 if (fd->collapse > 1)
5271 int first_zero_iter = -1, dummy = -1;
5272 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5274 counts = XALLOCAVEC (tree, fd->collapse);
5275 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5276 zero_iter_bb, first_zero_iter,
5277 dummy_bb, dummy, l2_dom_bb);
5279 if (zero_iter_bb)
5281 /* Some counts[i] vars might be uninitialized if
5282 some loop has zero iterations. But the body shouldn't
5283 be executed in that case, so just avoid uninit warnings. */
5284 for (i = first_zero_iter; i < fd->collapse; i++)
5285 if (SSA_VAR_P (counts[i]))
5286 TREE_NO_WARNING (counts[i]) = 1;
5287 gsi_prev (&gsi);
5288 edge e = split_block (entry_bb, gsi_stmt (gsi));
5289 entry_bb = e->dest;
5290 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5291 gsi = gsi_last_bb (entry_bb);
5292 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5293 get_immediate_dominator (CDI_DOMINATORS,
5294 zero_iter_bb));
5298 tree t0, t1;
5299 t1 = fd->loop.n2;
5300 t0 = fd->loop.n1;
5301 if (POINTER_TYPE_P (TREE_TYPE (t0))
5302 && TYPE_PRECISION (TREE_TYPE (t0))
5303 != TYPE_PRECISION (fd->iter_type))
5305 /* Avoid casting pointers to integer of a different size. */
5306 tree itype = signed_type_for (type);
5307 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5308 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5310 else
5312 t1 = fold_convert (fd->iter_type, t1);
5313 t0 = fold_convert (fd->iter_type, t0);
5315 if (bias)
5317 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5318 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5321 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5322 OMP_CLAUSE__LOOPTEMP_);
5323 gcc_assert (innerc);
5324 tree startvar = OMP_CLAUSE_DECL (innerc);
5325 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5326 gcc_assert (innerc);
5327 tree endvar = OMP_CLAUSE_DECL (innerc);
5328 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5330 gcc_assert (innerc);
5331 for (i = 1; i < fd->collapse; i++)
5333 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5334 OMP_CLAUSE__LOOPTEMP_);
5335 gcc_assert (innerc);
5337 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5338 OMP_CLAUSE__LOOPTEMP_);
5339 if (innerc)
5341 /* If needed (inner taskloop has lastprivate clause), propagate
5342 down the total number of iterations. */
5343 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5344 NULL_TREE, false,
5345 GSI_CONTINUE_LINKING);
5346 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5347 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5351 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5352 GSI_CONTINUE_LINKING);
5353 assign_stmt = gimple_build_assign (startvar, t0);
5354 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5356 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5357 GSI_CONTINUE_LINKING);
5358 assign_stmt = gimple_build_assign (endvar, t1);
5359 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5360 if (fd->collapse > 1)
5361 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5363 /* Remove the GIMPLE_OMP_FOR statement. */
5364 gsi = gsi_for_stmt (for_stmt);
5365 gsi_remove (&gsi, true);
5367 gsi = gsi_last_nondebug_bb (cont_bb);
5368 gsi_remove (&gsi, true);
5370 gsi = gsi_last_nondebug_bb (exit_bb);
5371 gsi_remove (&gsi, true);
5373 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5374 remove_edge (BRANCH_EDGE (entry_bb));
5375 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5376 remove_edge (BRANCH_EDGE (cont_bb));
5377 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5378 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5379 recompute_dominator (CDI_DOMINATORS, region->entry));
5382 /* Taskloop construct is represented after gimplification with
5383 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5384 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5385 GOMP_taskloop{,_ull} function arranges for each task to be given just
5386 a single range of iterations. */
5388 static void
5389 expand_omp_taskloop_for_inner (struct omp_region *region,
5390 struct omp_for_data *fd,
5391 gimple *inner_stmt)
5393 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5394 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5395 basic_block fin_bb;
5396 gimple_stmt_iterator gsi;
5397 edge ep;
5398 bool broken_loop = region->cont == NULL;
5399 tree *counts = NULL;
5400 tree n1, n2, step;
5402 itype = type = TREE_TYPE (fd->loop.v);
5403 if (POINTER_TYPE_P (type))
5404 itype = signed_type_for (type);
5406 /* See if we need to bias by LLONG_MIN. */
5407 if (fd->iter_type == long_long_unsigned_type_node
5408 && TREE_CODE (type) == INTEGER_TYPE
5409 && !TYPE_UNSIGNED (type))
5411 tree n1, n2;
5413 if (fd->loop.cond_code == LT_EXPR)
5415 n1 = fd->loop.n1;
5416 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5418 else
5420 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5421 n2 = fd->loop.n1;
5423 if (TREE_CODE (n1) != INTEGER_CST
5424 || TREE_CODE (n2) != INTEGER_CST
5425 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5426 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5429 entry_bb = region->entry;
5430 cont_bb = region->cont;
5431 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5432 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5433 gcc_assert (broken_loop
5434 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5435 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5436 if (!broken_loop)
5438 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5439 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5441 exit_bb = region->exit;
5443 /* Iteration space partitioning goes in ENTRY_BB. */
5444 gsi = gsi_last_nondebug_bb (entry_bb);
5445 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5447 if (fd->collapse > 1)
5449 int first_zero_iter = -1, dummy = -1;
5450 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5452 counts = XALLOCAVEC (tree, fd->collapse);
5453 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5454 fin_bb, first_zero_iter,
5455 dummy_bb, dummy, l2_dom_bb);
5456 t = NULL_TREE;
5458 else
5459 t = integer_one_node;
5461 step = fd->loop.step;
5462 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5463 OMP_CLAUSE__LOOPTEMP_);
5464 gcc_assert (innerc);
5465 n1 = OMP_CLAUSE_DECL (innerc);
5466 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5467 gcc_assert (innerc);
5468 n2 = OMP_CLAUSE_DECL (innerc);
5469 if (bias)
5471 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5472 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5474 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5475 true, NULL_TREE, true, GSI_SAME_STMT);
5476 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5477 true, NULL_TREE, true, GSI_SAME_STMT);
5478 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5479 true, NULL_TREE, true, GSI_SAME_STMT);
5481 tree startvar = fd->loop.v;
5482 tree endvar = NULL_TREE;
5484 if (gimple_omp_for_combined_p (fd->for_stmt))
5486 tree clauses = gimple_omp_for_clauses (inner_stmt);
5487 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5488 gcc_assert (innerc);
5489 startvar = OMP_CLAUSE_DECL (innerc);
5490 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5491 OMP_CLAUSE__LOOPTEMP_);
5492 gcc_assert (innerc);
5493 endvar = OMP_CLAUSE_DECL (innerc);
5495 t = fold_convert (TREE_TYPE (startvar), n1);
5496 t = force_gimple_operand_gsi (&gsi, t,
5497 DECL_P (startvar)
5498 && TREE_ADDRESSABLE (startvar),
5499 NULL_TREE, false, GSI_CONTINUE_LINKING);
5500 gimple *assign_stmt = gimple_build_assign (startvar, t);
5501 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5503 t = fold_convert (TREE_TYPE (startvar), n2);
5504 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5505 false, GSI_CONTINUE_LINKING);
5506 if (endvar)
5508 assign_stmt = gimple_build_assign (endvar, e);
5509 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5510 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5511 assign_stmt = gimple_build_assign (fd->loop.v, e);
5512 else
5513 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5514 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5516 if (fd->collapse > 1)
5517 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5519 if (!broken_loop)
5521 /* The code controlling the sequential loop replaces the
5522 GIMPLE_OMP_CONTINUE. */
5523 gsi = gsi_last_nondebug_bb (cont_bb);
5524 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5525 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5526 vmain = gimple_omp_continue_control_use (cont_stmt);
5527 vback = gimple_omp_continue_control_def (cont_stmt);
5529 if (!gimple_omp_for_combined_p (fd->for_stmt))
5531 if (POINTER_TYPE_P (type))
5532 t = fold_build_pointer_plus (vmain, step);
5533 else
5534 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5535 t = force_gimple_operand_gsi (&gsi, t,
5536 DECL_P (vback)
5537 && TREE_ADDRESSABLE (vback),
5538 NULL_TREE, true, GSI_SAME_STMT);
5539 assign_stmt = gimple_build_assign (vback, t);
5540 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5542 t = build2 (fd->loop.cond_code, boolean_type_node,
5543 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5544 ? t : vback, e);
5545 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5548 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5549 gsi_remove (&gsi, true);
5551 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5552 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5555 /* Remove the GIMPLE_OMP_FOR statement. */
5556 gsi = gsi_for_stmt (fd->for_stmt);
5557 gsi_remove (&gsi, true);
5559 /* Remove the GIMPLE_OMP_RETURN statement. */
5560 gsi = gsi_last_nondebug_bb (exit_bb);
5561 gsi_remove (&gsi, true);
5563 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5564 if (!broken_loop)
5565 remove_edge (BRANCH_EDGE (entry_bb));
5566 else
5568 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5569 region->outer->cont = NULL;
5572 /* Connect all the blocks. */
5573 if (!broken_loop)
5575 ep = find_edge (cont_bb, body_bb);
5576 if (gimple_omp_for_combined_p (fd->for_stmt))
5578 remove_edge (ep);
5579 ep = NULL;
5581 else if (fd->collapse > 1)
5583 remove_edge (ep);
5584 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5586 else
5587 ep->flags = EDGE_TRUE_VALUE;
5588 find_edge (cont_bb, fin_bb)->flags
5589 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5592 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5593 recompute_dominator (CDI_DOMINATORS, body_bb));
5594 if (!broken_loop)
5595 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5596 recompute_dominator (CDI_DOMINATORS, fin_bb));
5598 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5600 struct loop *loop = alloc_loop ();
5601 loop->header = body_bb;
5602 if (collapse_bb == NULL)
5603 loop->latch = cont_bb;
5604 add_loop (loop, body_bb->loop_father);
5608 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5609 partitioned loop. The lowering here is abstracted, in that the
5610 loop parameters are passed through internal functions, which are
5611 further lowered by oacc_device_lower, once we get to the target
5612 compiler. The loop is of the form:
5614 for (V = B; V LTGT E; V += S) {BODY}
5616 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5617 (constant 0 for no chunking) and we will have a GWV partitioning
5618 mask, specifying dimensions over which the loop is to be
5619 partitioned (see note below). We generate code that looks like
5620 (this ignores tiling):
5622 <entry_bb> [incoming FALL->body, BRANCH->exit]
5623 typedef signedintify (typeof (V)) T; // underlying signed integral type
5624 T range = E - B;
5625 T chunk_no = 0;
5626 T DIR = LTGT == '<' ? +1 : -1;
5627 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5628 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5630 <head_bb> [created by splitting end of entry_bb]
5631 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5632 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5633 if (!(offset LTGT bound)) goto bottom_bb;
5635 <body_bb> [incoming]
5636 V = B + offset;
5637 {BODY}
5639 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5640 offset += step;
5641 if (offset LTGT bound) goto body_bb; [*]
5643 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5644 chunk_no++;
5645 if (chunk < chunk_max) goto head_bb;
5647 <exit_bb> [incoming]
5648 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5650 [*] Needed if V live at end of loop. */
5652 static void
5653 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5655 tree v = fd->loop.v;
5656 enum tree_code cond_code = fd->loop.cond_code;
5657 enum tree_code plus_code = PLUS_EXPR;
5659 tree chunk_size = integer_minus_one_node;
5660 tree gwv = integer_zero_node;
5661 tree iter_type = TREE_TYPE (v);
5662 tree diff_type = iter_type;
5663 tree plus_type = iter_type;
5664 struct oacc_collapse *counts = NULL;
5666 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5667 == GF_OMP_FOR_KIND_OACC_LOOP);
5668 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5669 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5671 if (POINTER_TYPE_P (iter_type))
5673 plus_code = POINTER_PLUS_EXPR;
5674 plus_type = sizetype;
5676 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5677 diff_type = signed_type_for (diff_type);
5678 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5679 diff_type = integer_type_node;
5681 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5682 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5683 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5684 basic_block bottom_bb = NULL;
5686 /* entry_bb has two sucessors; the branch edge is to the exit
5687 block, fallthrough edge to body. */
5688 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5689 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5691 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5692 body_bb, or to a block whose only successor is the body_bb. Its
5693 fallthrough successor is the final block (same as the branch
5694 successor of the entry_bb). */
5695 if (cont_bb)
5697 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5698 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5700 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5701 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5703 else
5704 gcc_assert (!gimple_in_ssa_p (cfun));
5706 /* The exit block only has entry_bb and cont_bb as predecessors. */
5707 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5709 tree chunk_no;
5710 tree chunk_max = NULL_TREE;
5711 tree bound, offset;
5712 tree step = create_tmp_var (diff_type, ".step");
5713 bool up = cond_code == LT_EXPR;
5714 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5715 bool chunking = !gimple_in_ssa_p (cfun);
5716 bool negating;
5718 /* Tiling vars. */
5719 tree tile_size = NULL_TREE;
5720 tree element_s = NULL_TREE;
5721 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5722 basic_block elem_body_bb = NULL;
5723 basic_block elem_cont_bb = NULL;
5725 /* SSA instances. */
5726 tree offset_incr = NULL_TREE;
5727 tree offset_init = NULL_TREE;
5729 gimple_stmt_iterator gsi;
5730 gassign *ass;
5731 gcall *call;
5732 gimple *stmt;
5733 tree expr;
5734 location_t loc;
5735 edge split, be, fte;
5737 /* Split the end of entry_bb to create head_bb. */
5738 split = split_block (entry_bb, last_stmt (entry_bb));
5739 basic_block head_bb = split->dest;
5740 entry_bb = split->src;
5742 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5743 gsi = gsi_last_nondebug_bb (entry_bb);
5744 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5745 loc = gimple_location (for_stmt);
5747 if (gimple_in_ssa_p (cfun))
5749 offset_init = gimple_omp_for_index (for_stmt, 0);
5750 gcc_assert (integer_zerop (fd->loop.n1));
5751 /* The SSA parallelizer does gang parallelism. */
5752 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5755 if (fd->collapse > 1 || fd->tiling)
5757 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5758 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5759 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5760 TREE_TYPE (fd->loop.n2), loc);
5762 if (SSA_VAR_P (fd->loop.n2))
5764 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5765 true, GSI_SAME_STMT);
5766 ass = gimple_build_assign (fd->loop.n2, total);
5767 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5771 tree b = fd->loop.n1;
5772 tree e = fd->loop.n2;
5773 tree s = fd->loop.step;
5775 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5776 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5778 /* Convert the step, avoiding possible unsigned->signed overflow. */
5779 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5780 if (negating)
5781 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5782 s = fold_convert (diff_type, s);
5783 if (negating)
5784 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5785 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5787 if (!chunking)
5788 chunk_size = integer_zero_node;
5789 expr = fold_convert (diff_type, chunk_size);
5790 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5791 NULL_TREE, true, GSI_SAME_STMT);
5793 if (fd->tiling)
5795 /* Determine the tile size and element step,
5796 modify the outer loop step size. */
5797 tile_size = create_tmp_var (diff_type, ".tile_size");
5798 expr = build_int_cst (diff_type, 1);
5799 for (int ix = 0; ix < fd->collapse; ix++)
5800 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5801 expr = force_gimple_operand_gsi (&gsi, expr, true,
5802 NULL_TREE, true, GSI_SAME_STMT);
5803 ass = gimple_build_assign (tile_size, expr);
5804 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5806 element_s = create_tmp_var (diff_type, ".element_s");
5807 ass = gimple_build_assign (element_s, s);
5808 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5810 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5811 s = force_gimple_operand_gsi (&gsi, expr, true,
5812 NULL_TREE, true, GSI_SAME_STMT);
5815 /* Determine the range, avoiding possible unsigned->signed overflow. */
5816 negating = !up && TYPE_UNSIGNED (iter_type);
5817 expr = fold_build2 (MINUS_EXPR, plus_type,
5818 fold_convert (plus_type, negating ? b : e),
5819 fold_convert (plus_type, negating ? e : b));
5820 expr = fold_convert (diff_type, expr);
5821 if (negating)
5822 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5823 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5824 NULL_TREE, true, GSI_SAME_STMT);
5826 chunk_no = build_int_cst (diff_type, 0);
5827 if (chunking)
5829 gcc_assert (!gimple_in_ssa_p (cfun));
5831 expr = chunk_no;
5832 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5833 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5835 ass = gimple_build_assign (chunk_no, expr);
5836 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5838 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5839 build_int_cst (integer_type_node,
5840 IFN_GOACC_LOOP_CHUNKS),
5841 dir, range, s, chunk_size, gwv);
5842 gimple_call_set_lhs (call, chunk_max);
5843 gimple_set_location (call, loc);
5844 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5846 else
5847 chunk_size = chunk_no;
5849 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5850 build_int_cst (integer_type_node,
5851 IFN_GOACC_LOOP_STEP),
5852 dir, range, s, chunk_size, gwv);
5853 gimple_call_set_lhs (call, step);
5854 gimple_set_location (call, loc);
5855 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5857 /* Remove the GIMPLE_OMP_FOR. */
5858 gsi_remove (&gsi, true);
5860 /* Fixup edges from head_bb. */
5861 be = BRANCH_EDGE (head_bb);
5862 fte = FALLTHRU_EDGE (head_bb);
5863 be->flags |= EDGE_FALSE_VALUE;
5864 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5866 basic_block body_bb = fte->dest;
5868 if (gimple_in_ssa_p (cfun))
5870 gsi = gsi_last_nondebug_bb (cont_bb);
5871 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5873 offset = gimple_omp_continue_control_use (cont_stmt);
5874 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5876 else
5878 offset = create_tmp_var (diff_type, ".offset");
5879 offset_init = offset_incr = offset;
5881 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5883 /* Loop offset & bound go into head_bb. */
5884 gsi = gsi_start_bb (head_bb);
5886 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5887 build_int_cst (integer_type_node,
5888 IFN_GOACC_LOOP_OFFSET),
5889 dir, range, s,
5890 chunk_size, gwv, chunk_no);
5891 gimple_call_set_lhs (call, offset_init);
5892 gimple_set_location (call, loc);
5893 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5895 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5896 build_int_cst (integer_type_node,
5897 IFN_GOACC_LOOP_BOUND),
5898 dir, range, s,
5899 chunk_size, gwv, offset_init);
5900 gimple_call_set_lhs (call, bound);
5901 gimple_set_location (call, loc);
5902 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5904 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5905 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5906 GSI_CONTINUE_LINKING);
5908 /* V assignment goes into body_bb. */
5909 if (!gimple_in_ssa_p (cfun))
5911 gsi = gsi_start_bb (body_bb);
5913 expr = build2 (plus_code, iter_type, b,
5914 fold_convert (plus_type, offset));
5915 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5916 true, GSI_SAME_STMT);
5917 ass = gimple_build_assign (v, expr);
5918 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5920 if (fd->collapse > 1 || fd->tiling)
5921 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5923 if (fd->tiling)
5925 /* Determine the range of the element loop -- usually simply
5926 the tile_size, but could be smaller if the final
5927 iteration of the outer loop is a partial tile. */
5928 tree e_range = create_tmp_var (diff_type, ".e_range");
5930 expr = build2 (MIN_EXPR, diff_type,
5931 build2 (MINUS_EXPR, diff_type, bound, offset),
5932 build2 (MULT_EXPR, diff_type, tile_size,
5933 element_s));
5934 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5935 true, GSI_SAME_STMT);
5936 ass = gimple_build_assign (e_range, expr);
5937 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5939 /* Determine bound, offset & step of inner loop. */
5940 e_bound = create_tmp_var (diff_type, ".e_bound");
5941 e_offset = create_tmp_var (diff_type, ".e_offset");
5942 e_step = create_tmp_var (diff_type, ".e_step");
5944 /* Mark these as element loops. */
5945 tree t, e_gwv = integer_minus_one_node;
5946 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5948 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5949 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5950 element_s, chunk, e_gwv, chunk);
5951 gimple_call_set_lhs (call, e_offset);
5952 gimple_set_location (call, loc);
5953 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5955 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5956 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5957 element_s, chunk, e_gwv, e_offset);
5958 gimple_call_set_lhs (call, e_bound);
5959 gimple_set_location (call, loc);
5960 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5962 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5963 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5964 element_s, chunk, e_gwv);
5965 gimple_call_set_lhs (call, e_step);
5966 gimple_set_location (call, loc);
5967 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5969 /* Add test and split block. */
5970 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5971 stmt = gimple_build_cond_empty (expr);
5972 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5973 split = split_block (body_bb, stmt);
5974 elem_body_bb = split->dest;
5975 if (cont_bb == body_bb)
5976 cont_bb = elem_body_bb;
5977 body_bb = split->src;
5979 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5981 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5982 if (cont_bb == NULL)
5984 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5985 e->probability = profile_probability::even ();
5986 split->probability = profile_probability::even ();
5989 /* Initialize the user's loop vars. */
5990 gsi = gsi_start_bb (elem_body_bb);
5991 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5995 /* Loop increment goes into cont_bb. If this is not a loop, we
5996 will have spawned threads as if it was, and each one will
5997 execute one iteration. The specification is not explicit about
5998 whether such constructs are ill-formed or not, and they can
5999 occur, especially when noreturn routines are involved. */
6000 if (cont_bb)
6002 gsi = gsi_last_nondebug_bb (cont_bb);
6003 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6004 loc = gimple_location (cont_stmt);
6006 if (fd->tiling)
6008 /* Insert element loop increment and test. */
6009 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6010 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6011 true, GSI_SAME_STMT);
6012 ass = gimple_build_assign (e_offset, expr);
6013 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6014 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6016 stmt = gimple_build_cond_empty (expr);
6017 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6018 split = split_block (cont_bb, stmt);
6019 elem_cont_bb = split->src;
6020 cont_bb = split->dest;
6022 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6023 split->probability = profile_probability::unlikely ().guessed ();
6024 edge latch_edge
6025 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6026 latch_edge->probability = profile_probability::likely ().guessed ();
6028 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6029 skip_edge->probability = profile_probability::unlikely ().guessed ();
6030 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6031 loop_entry_edge->probability
6032 = profile_probability::likely ().guessed ();
6034 gsi = gsi_for_stmt (cont_stmt);
6037 /* Increment offset. */
6038 if (gimple_in_ssa_p (cfun))
6039 expr = build2 (plus_code, iter_type, offset,
6040 fold_convert (plus_type, step));
6041 else
6042 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6043 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6044 true, GSI_SAME_STMT);
6045 ass = gimple_build_assign (offset_incr, expr);
6046 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6047 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6048 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6050 /* Remove the GIMPLE_OMP_CONTINUE. */
6051 gsi_remove (&gsi, true);
6053 /* Fixup edges from cont_bb. */
6054 be = BRANCH_EDGE (cont_bb);
6055 fte = FALLTHRU_EDGE (cont_bb);
6056 be->flags |= EDGE_TRUE_VALUE;
6057 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6059 if (chunking)
6061 /* Split the beginning of exit_bb to make bottom_bb. We
6062 need to insert a nop at the start, because splitting is
6063 after a stmt, not before. */
6064 gsi = gsi_start_bb (exit_bb);
6065 stmt = gimple_build_nop ();
6066 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6067 split = split_block (exit_bb, stmt);
6068 bottom_bb = split->src;
6069 exit_bb = split->dest;
6070 gsi = gsi_last_bb (bottom_bb);
6072 /* Chunk increment and test goes into bottom_bb. */
6073 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6074 build_int_cst (diff_type, 1));
6075 ass = gimple_build_assign (chunk_no, expr);
6076 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6078 /* Chunk test at end of bottom_bb. */
6079 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6080 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6081 GSI_CONTINUE_LINKING);
6083 /* Fixup edges from bottom_bb. */
6084 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6085 split->probability = profile_probability::unlikely ().guessed ();
6086 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6087 latch_edge->probability = profile_probability::likely ().guessed ();
6091 gsi = gsi_last_nondebug_bb (exit_bb);
6092 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6093 loc = gimple_location (gsi_stmt (gsi));
6095 if (!gimple_in_ssa_p (cfun))
6097 /* Insert the final value of V, in case it is live. This is the
6098 value for the only thread that survives past the join. */
6099 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6100 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6101 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6102 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6103 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6104 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6105 true, GSI_SAME_STMT);
6106 ass = gimple_build_assign (v, expr);
6107 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6110 /* Remove the OMP_RETURN. */
6111 gsi_remove (&gsi, true);
6113 if (cont_bb)
6115 /* We now have one, two or three nested loops. Update the loop
6116 structures. */
6117 struct loop *parent = entry_bb->loop_father;
6118 struct loop *body = body_bb->loop_father;
6120 if (chunking)
6122 struct loop *chunk_loop = alloc_loop ();
6123 chunk_loop->header = head_bb;
6124 chunk_loop->latch = bottom_bb;
6125 add_loop (chunk_loop, parent);
6126 parent = chunk_loop;
6128 else if (parent != body)
6130 gcc_assert (body->header == body_bb);
6131 gcc_assert (body->latch == cont_bb
6132 || single_pred (body->latch) == cont_bb);
6133 parent = NULL;
6136 if (parent)
6138 struct loop *body_loop = alloc_loop ();
6139 body_loop->header = body_bb;
6140 body_loop->latch = cont_bb;
6141 add_loop (body_loop, parent);
6143 if (fd->tiling)
6145 /* Insert tiling's element loop. */
6146 struct loop *inner_loop = alloc_loop ();
6147 inner_loop->header = elem_body_bb;
6148 inner_loop->latch = elem_cont_bb;
6149 add_loop (inner_loop, body_loop);
6155 /* Expand the OMP loop defined by REGION. */
6157 static void
6158 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6160 struct omp_for_data fd;
6161 struct omp_for_data_loop *loops;
6163 loops
6164 = (struct omp_for_data_loop *)
6165 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6166 * sizeof (struct omp_for_data_loop));
6167 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6168 &fd, loops);
6169 region->sched_kind = fd.sched_kind;
6170 region->sched_modifiers = fd.sched_modifiers;
6172 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6173 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6174 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6175 if (region->cont)
6177 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6178 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6179 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6181 else
6182 /* If there isn't a continue then this is a degerate case where
6183 the introduction of abnormal edges during lowering will prevent
6184 original loops from being detected. Fix that up. */
6185 loops_state_set (LOOPS_NEED_FIXUP);
6187 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
6188 expand_omp_simd (region, &fd);
6189 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6191 gcc_assert (!inner_stmt);
6192 expand_oacc_for (region, &fd);
6194 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6196 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6197 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6198 else
6199 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6201 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6202 && !fd.have_ordered)
6204 if (fd.chunk_size == NULL)
6205 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6206 else
6207 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6209 else
6211 int fn_index, start_ix, next_ix;
6212 unsigned HOST_WIDE_INT sched = 0;
6213 tree sched_arg = NULL_TREE;
6215 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6216 == GF_OMP_FOR_KIND_FOR);
6217 if (fd.chunk_size == NULL
6218 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6219 fd.chunk_size = integer_zero_node;
6220 switch (fd.sched_kind)
6222 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6223 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
6225 gcc_assert (!fd.have_ordered);
6226 fn_index = 6;
6227 sched = 4;
6229 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6230 && !fd.have_ordered)
6231 fn_index = 7;
6232 else
6234 fn_index = 3;
6235 sched = (HOST_WIDE_INT_1U << 31);
6237 break;
6238 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6239 case OMP_CLAUSE_SCHEDULE_GUIDED:
6240 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6241 && !fd.have_ordered)
6243 fn_index = 3 + fd.sched_kind;
6244 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6245 break;
6247 fn_index = fd.sched_kind;
6248 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6249 sched += (HOST_WIDE_INT_1U << 31);
6250 break;
6251 case OMP_CLAUSE_SCHEDULE_STATIC:
6252 gcc_assert (fd.have_ordered);
6253 fn_index = 0;
6254 sched = (HOST_WIDE_INT_1U << 31) + 1;
6255 break;
6256 default:
6257 gcc_unreachable ();
6259 if (!fd.ordered)
6260 fn_index += fd.have_ordered * 8;
6261 if (fd.ordered)
6262 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6263 else
6264 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6265 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6266 if (fd.have_reductemp || fd.lastprivate_conditional)
6268 if (fd.ordered)
6269 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6270 else if (fd.have_ordered)
6271 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6272 else
6273 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6274 sched_arg = build_int_cstu (long_integer_type_node, sched);
6275 if (!fd.chunk_size)
6276 fd.chunk_size = integer_zero_node;
6278 if (fd.iter_type == long_long_unsigned_type_node)
6280 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6281 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6282 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6283 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6285 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6286 (enum built_in_function) next_ix, sched_arg,
6287 inner_stmt);
6290 if (gimple_in_ssa_p (cfun))
6291 update_ssa (TODO_update_ssa_only_virtuals);
6294 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6296 v = GOMP_sections_start (n);
6298 switch (v)
6300 case 0:
6301 goto L2;
6302 case 1:
6303 section 1;
6304 goto L1;
6305 case 2:
6307 case n:
6309 default:
6310 abort ();
6313 v = GOMP_sections_next ();
6314 goto L0;
6316 reduction;
6318 If this is a combined parallel sections, replace the call to
6319 GOMP_sections_start with call to GOMP_sections_next. */
6321 static void
6322 expand_omp_sections (struct omp_region *region)
6324 tree t, u, vin = NULL, vmain, vnext, l2;
6325 unsigned len;
6326 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6327 gimple_stmt_iterator si, switch_si;
6328 gomp_sections *sections_stmt;
6329 gimple *stmt;
6330 gomp_continue *cont;
6331 edge_iterator ei;
6332 edge e;
6333 struct omp_region *inner;
6334 unsigned i, casei;
6335 bool exit_reachable = region->cont != NULL;
6337 gcc_assert (region->exit != NULL);
6338 entry_bb = region->entry;
6339 l0_bb = single_succ (entry_bb);
6340 l1_bb = region->cont;
6341 l2_bb = region->exit;
6342 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6343 l2 = gimple_block_label (l2_bb);
6344 else
6346 /* This can happen if there are reductions. */
6347 len = EDGE_COUNT (l0_bb->succs);
6348 gcc_assert (len > 0);
6349 e = EDGE_SUCC (l0_bb, len - 1);
6350 si = gsi_last_nondebug_bb (e->dest);
6351 l2 = NULL_TREE;
6352 if (gsi_end_p (si)
6353 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6354 l2 = gimple_block_label (e->dest);
6355 else
6356 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6358 si = gsi_last_nondebug_bb (e->dest);
6359 if (gsi_end_p (si)
6360 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6362 l2 = gimple_block_label (e->dest);
6363 break;
6367 if (exit_reachable)
6368 default_bb = create_empty_bb (l1_bb->prev_bb);
6369 else
6370 default_bb = create_empty_bb (l0_bb);
6372 /* We will build a switch() with enough cases for all the
6373 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6374 and a default case to abort if something goes wrong. */
6375 len = EDGE_COUNT (l0_bb->succs);
6377 /* Use vec::quick_push on label_vec throughout, since we know the size
6378 in advance. */
6379 auto_vec<tree> label_vec (len);
6381 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6382 GIMPLE_OMP_SECTIONS statement. */
6383 si = gsi_last_nondebug_bb (entry_bb);
6384 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6385 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6386 vin = gimple_omp_sections_control (sections_stmt);
6387 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6388 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6389 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6390 tree cond_var = NULL_TREE;
6391 if (reductmp || condtmp)
6393 tree reductions = null_pointer_node, mem = null_pointer_node;
6394 tree memv = NULL_TREE, condtemp = NULL_TREE;
6395 gimple_stmt_iterator gsi = gsi_none ();
6396 gimple *g = NULL;
6397 if (reductmp)
6399 reductions = OMP_CLAUSE_DECL (reductmp);
6400 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6401 g = SSA_NAME_DEF_STMT (reductions);
6402 reductions = gimple_assign_rhs1 (g);
6403 OMP_CLAUSE_DECL (reductmp) = reductions;
6404 gsi = gsi_for_stmt (g);
6406 else
6407 gsi = si;
6408 if (condtmp)
6410 condtemp = OMP_CLAUSE_DECL (condtmp);
6411 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6412 OMP_CLAUSE__CONDTEMP_);
6413 cond_var = OMP_CLAUSE_DECL (c);
6414 tree type = TREE_TYPE (condtemp);
6415 memv = create_tmp_var (type);
6416 TREE_ADDRESSABLE (memv) = 1;
6417 unsigned cnt = 0;
6418 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6419 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6420 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6421 ++cnt;
6422 unsigned HOST_WIDE_INT sz
6423 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6424 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6425 false);
6426 mem = build_fold_addr_expr (memv);
6428 t = build_int_cst (unsigned_type_node, len - 1);
6429 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6430 stmt = gimple_build_call (u, 3, t, reductions, mem);
6431 gimple_call_set_lhs (stmt, vin);
6432 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6433 if (condtmp)
6435 expand_omp_build_assign (&gsi, condtemp, memv, false);
6436 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6437 vin, build_one_cst (TREE_TYPE (cond_var)));
6438 expand_omp_build_assign (&gsi, cond_var, t, false);
6440 if (reductmp)
6442 gsi_remove (&gsi, true);
6443 release_ssa_name (gimple_assign_lhs (g));
6446 else if (!is_combined_parallel (region))
6448 /* If we are not inside a combined parallel+sections region,
6449 call GOMP_sections_start. */
6450 t = build_int_cst (unsigned_type_node, len - 1);
6451 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6452 stmt = gimple_build_call (u, 1, t);
6454 else
6456 /* Otherwise, call GOMP_sections_next. */
6457 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6458 stmt = gimple_build_call (u, 0);
6460 if (!reductmp && !condtmp)
6462 gimple_call_set_lhs (stmt, vin);
6463 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6465 gsi_remove (&si, true);
6467 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6468 L0_BB. */
6469 switch_si = gsi_last_nondebug_bb (l0_bb);
6470 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6471 if (exit_reachable)
6473 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6474 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6475 vmain = gimple_omp_continue_control_use (cont);
6476 vnext = gimple_omp_continue_control_def (cont);
6478 else
6480 vmain = vin;
6481 vnext = NULL_TREE;
6484 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6485 label_vec.quick_push (t);
6486 i = 1;
6488 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6489 for (inner = region->inner, casei = 1;
6490 inner;
6491 inner = inner->next, i++, casei++)
6493 basic_block s_entry_bb, s_exit_bb;
6495 /* Skip optional reduction region. */
6496 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6498 --i;
6499 --casei;
6500 continue;
6503 s_entry_bb = inner->entry;
6504 s_exit_bb = inner->exit;
6506 t = gimple_block_label (s_entry_bb);
6507 u = build_int_cst (unsigned_type_node, casei);
6508 u = build_case_label (u, NULL, t);
6509 label_vec.quick_push (u);
6511 si = gsi_last_nondebug_bb (s_entry_bb);
6512 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6513 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6514 gsi_remove (&si, true);
6515 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6517 if (s_exit_bb == NULL)
6518 continue;
6520 si = gsi_last_nondebug_bb (s_exit_bb);
6521 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6522 gsi_remove (&si, true);
6524 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6527 /* Error handling code goes in DEFAULT_BB. */
6528 t = gimple_block_label (default_bb);
6529 u = build_case_label (NULL, NULL, t);
6530 make_edge (l0_bb, default_bb, 0);
6531 add_bb_to_loop (default_bb, current_loops->tree_root);
6533 stmt = gimple_build_switch (vmain, u, label_vec);
6534 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6535 gsi_remove (&switch_si, true);
6537 si = gsi_start_bb (default_bb);
6538 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6539 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6541 if (exit_reachable)
6543 tree bfn_decl;
6545 /* Code to get the next section goes in L1_BB. */
6546 si = gsi_last_nondebug_bb (l1_bb);
6547 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6549 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6550 stmt = gimple_build_call (bfn_decl, 0);
6551 gimple_call_set_lhs (stmt, vnext);
6552 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6553 if (cond_var)
6555 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6556 vnext, build_one_cst (TREE_TYPE (cond_var)));
6557 expand_omp_build_assign (&si, cond_var, t, false);
6559 gsi_remove (&si, true);
6561 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6564 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6565 si = gsi_last_nondebug_bb (l2_bb);
6566 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6567 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6568 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6569 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6570 else
6571 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6572 stmt = gimple_build_call (t, 0);
6573 if (gimple_omp_return_lhs (gsi_stmt (si)))
6574 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6575 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6576 gsi_remove (&si, true);
6578 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6581 /* Expand code for an OpenMP single directive. We've already expanded
6582 much of the code, here we simply place the GOMP_barrier call. */
6584 static void
6585 expand_omp_single (struct omp_region *region)
6587 basic_block entry_bb, exit_bb;
6588 gimple_stmt_iterator si;
6590 entry_bb = region->entry;
6591 exit_bb = region->exit;
6593 si = gsi_last_nondebug_bb (entry_bb);
6594 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6595 gsi_remove (&si, true);
6596 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6598 si = gsi_last_nondebug_bb (exit_bb);
6599 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6601 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6602 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6604 gsi_remove (&si, true);
6605 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6608 /* Generic expansion for OpenMP synchronization directives: master,
6609 ordered and critical. All we need to do here is remove the entry
6610 and exit markers for REGION. */
6612 static void
6613 expand_omp_synch (struct omp_region *region)
6615 basic_block entry_bb, exit_bb;
6616 gimple_stmt_iterator si;
6618 entry_bb = region->entry;
6619 exit_bb = region->exit;
6621 si = gsi_last_nondebug_bb (entry_bb);
6622 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6623 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6624 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6625 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6626 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6627 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6628 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6629 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6631 expand_omp_taskreg (region);
6632 return;
6634 gsi_remove (&si, true);
6635 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6637 if (exit_bb)
6639 si = gsi_last_nondebug_bb (exit_bb);
6640 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6641 gsi_remove (&si, true);
6642 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6646 /* Translate enum omp_memory_order to enum memmodel. The two enums
6647 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6648 is 0. */
6650 static enum memmodel
6651 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6653 switch (mo)
6655 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6656 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6657 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6658 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6659 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6660 default: gcc_unreachable ();
6664 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6665 operation as a normal volatile load. */
6667 static bool
6668 expand_omp_atomic_load (basic_block load_bb, tree addr,
6669 tree loaded_val, int index)
6671 enum built_in_function tmpbase;
6672 gimple_stmt_iterator gsi;
6673 basic_block store_bb;
6674 location_t loc;
6675 gimple *stmt;
6676 tree decl, call, type, itype;
6678 gsi = gsi_last_nondebug_bb (load_bb);
6679 stmt = gsi_stmt (gsi);
6680 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6681 loc = gimple_location (stmt);
6683 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6684 is smaller than word size, then expand_atomic_load assumes that the load
6685 is atomic. We could avoid the builtin entirely in this case. */
6687 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6688 decl = builtin_decl_explicit (tmpbase);
6689 if (decl == NULL_TREE)
6690 return false;
6692 type = TREE_TYPE (loaded_val);
6693 itype = TREE_TYPE (TREE_TYPE (decl));
6695 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6696 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6697 call = build_call_expr_loc (loc, decl, 2, addr, mo);
6698 if (!useless_type_conversion_p (type, itype))
6699 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6700 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6702 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6703 gsi_remove (&gsi, true);
6705 store_bb = single_succ (load_bb);
6706 gsi = gsi_last_nondebug_bb (store_bb);
6707 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6708 gsi_remove (&gsi, true);
6710 if (gimple_in_ssa_p (cfun))
6711 update_ssa (TODO_update_ssa_no_phi);
6713 return true;
6716 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6717 operation as a normal volatile store. */
6719 static bool
6720 expand_omp_atomic_store (basic_block load_bb, tree addr,
6721 tree loaded_val, tree stored_val, int index)
6723 enum built_in_function tmpbase;
6724 gimple_stmt_iterator gsi;
6725 basic_block store_bb = single_succ (load_bb);
6726 location_t loc;
6727 gimple *stmt;
6728 tree decl, call, type, itype;
6729 machine_mode imode;
6730 bool exchange;
6732 gsi = gsi_last_nondebug_bb (load_bb);
6733 stmt = gsi_stmt (gsi);
6734 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6736 /* If the load value is needed, then this isn't a store but an exchange. */
6737 exchange = gimple_omp_atomic_need_value_p (stmt);
6739 gsi = gsi_last_nondebug_bb (store_bb);
6740 stmt = gsi_stmt (gsi);
6741 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6742 loc = gimple_location (stmt);
6744 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6745 is smaller than word size, then expand_atomic_store assumes that the store
6746 is atomic. We could avoid the builtin entirely in this case. */
6748 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6749 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6750 decl = builtin_decl_explicit (tmpbase);
6751 if (decl == NULL_TREE)
6752 return false;
6754 type = TREE_TYPE (stored_val);
6756 /* Dig out the type of the function's second argument. */
6757 itype = TREE_TYPE (decl);
6758 itype = TYPE_ARG_TYPES (itype);
6759 itype = TREE_CHAIN (itype);
6760 itype = TREE_VALUE (itype);
6761 imode = TYPE_MODE (itype);
6763 if (exchange && !can_atomic_exchange_p (imode, true))
6764 return false;
6766 if (!useless_type_conversion_p (itype, type))
6767 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6768 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6769 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6770 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6771 if (exchange)
6773 if (!useless_type_conversion_p (type, itype))
6774 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6775 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6778 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6779 gsi_remove (&gsi, true);
6781 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6782 gsi = gsi_last_nondebug_bb (load_bb);
6783 gsi_remove (&gsi, true);
6785 if (gimple_in_ssa_p (cfun))
6786 update_ssa (TODO_update_ssa_no_phi);
6788 return true;
6791 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6792 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6793 size of the data type, and thus usable to find the index of the builtin
6794 decl. Returns false if the expression is not of the proper form. */
6796 static bool
6797 expand_omp_atomic_fetch_op (basic_block load_bb,
6798 tree addr, tree loaded_val,
6799 tree stored_val, int index)
6801 enum built_in_function oldbase, newbase, tmpbase;
6802 tree decl, itype, call;
6803 tree lhs, rhs;
6804 basic_block store_bb = single_succ (load_bb);
6805 gimple_stmt_iterator gsi;
6806 gimple *stmt;
6807 location_t loc;
6808 enum tree_code code;
6809 bool need_old, need_new;
6810 machine_mode imode;
6812 /* We expect to find the following sequences:
6814 load_bb:
6815 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6817 store_bb:
6818 val = tmp OP something; (or: something OP tmp)
6819 GIMPLE_OMP_STORE (val)
6821 ???FIXME: Allow a more flexible sequence.
6822 Perhaps use data flow to pick the statements.
6826 gsi = gsi_after_labels (store_bb);
6827 stmt = gsi_stmt (gsi);
6828 if (is_gimple_debug (stmt))
6830 gsi_next_nondebug (&gsi);
6831 if (gsi_end_p (gsi))
6832 return false;
6833 stmt = gsi_stmt (gsi);
6835 loc = gimple_location (stmt);
6836 if (!is_gimple_assign (stmt))
6837 return false;
6838 gsi_next_nondebug (&gsi);
6839 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6840 return false;
6841 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6842 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6843 enum omp_memory_order omo
6844 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6845 enum memmodel mo = omp_memory_order_to_memmodel (omo);
6846 gcc_checking_assert (!need_old || !need_new);
6848 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6849 return false;
6851 /* Check for one of the supported fetch-op operations. */
6852 code = gimple_assign_rhs_code (stmt);
6853 switch (code)
6855 case PLUS_EXPR:
6856 case POINTER_PLUS_EXPR:
6857 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6858 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6859 break;
6860 case MINUS_EXPR:
6861 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6862 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6863 break;
6864 case BIT_AND_EXPR:
6865 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6866 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6867 break;
6868 case BIT_IOR_EXPR:
6869 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6870 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6871 break;
6872 case BIT_XOR_EXPR:
6873 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6874 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6875 break;
6876 default:
6877 return false;
6880 /* Make sure the expression is of the proper form. */
6881 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6882 rhs = gimple_assign_rhs2 (stmt);
6883 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6884 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6885 rhs = gimple_assign_rhs1 (stmt);
6886 else
6887 return false;
6889 tmpbase = ((enum built_in_function)
6890 ((need_new ? newbase : oldbase) + index + 1));
6891 decl = builtin_decl_explicit (tmpbase);
6892 if (decl == NULL_TREE)
6893 return false;
6894 itype = TREE_TYPE (TREE_TYPE (decl));
6895 imode = TYPE_MODE (itype);
6897 /* We could test all of the various optabs involved, but the fact of the
6898 matter is that (with the exception of i486 vs i586 and xadd) all targets
6899 that support any atomic operaton optab also implements compare-and-swap.
6900 Let optabs.c take care of expanding any compare-and-swap loop. */
6901 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6902 return false;
6904 gsi = gsi_last_nondebug_bb (load_bb);
6905 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6907 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6908 It only requires that the operation happen atomically. Thus we can
6909 use the RELAXED memory model. */
6910 call = build_call_expr_loc (loc, decl, 3, addr,
6911 fold_convert_loc (loc, itype, rhs),
6912 build_int_cst (NULL, mo));
6914 if (need_old || need_new)
6916 lhs = need_old ? loaded_val : stored_val;
6917 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6918 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6920 else
6921 call = fold_convert_loc (loc, void_type_node, call);
6922 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6923 gsi_remove (&gsi, true);
6925 gsi = gsi_last_nondebug_bb (store_bb);
6926 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6927 gsi_remove (&gsi, true);
6928 gsi = gsi_last_nondebug_bb (store_bb);
6929 stmt = gsi_stmt (gsi);
6930 gsi_remove (&gsi, true);
6932 if (gimple_in_ssa_p (cfun))
6934 release_defs (stmt);
6935 update_ssa (TODO_update_ssa_no_phi);
6938 return true;
6941 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6943 oldval = *addr;
6944 repeat:
6945 newval = rhs; // with oldval replacing *addr in rhs
6946 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6947 if (oldval != newval)
6948 goto repeat;
6950 INDEX is log2 of the size of the data type, and thus usable to find the
6951 index of the builtin decl. */
6953 static bool
6954 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6955 tree addr, tree loaded_val, tree stored_val,
6956 int index)
6958 tree loadedi, storedi, initial, new_storedi, old_vali;
6959 tree type, itype, cmpxchg, iaddr, atype;
6960 gimple_stmt_iterator si;
6961 basic_block loop_header = single_succ (load_bb);
6962 gimple *phi, *stmt;
6963 edge e;
6964 enum built_in_function fncode;
6966 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6967 order to use the RELAXED memory model effectively. */
6968 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6969 + index + 1);
6970 cmpxchg = builtin_decl_explicit (fncode);
6971 if (cmpxchg == NULL_TREE)
6972 return false;
6973 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6974 atype = type;
6975 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6977 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6978 || !can_atomic_load_p (TYPE_MODE (itype)))
6979 return false;
6981 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6982 si = gsi_last_nondebug_bb (load_bb);
6983 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6985 /* For floating-point values, we'll need to view-convert them to integers
6986 so that we can perform the atomic compare and swap. Simplify the
6987 following code by always setting up the "i"ntegral variables. */
6988 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6990 tree iaddr_val;
6992 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6993 true));
6994 atype = itype;
6995 iaddr_val
6996 = force_gimple_operand_gsi (&si,
6997 fold_convert (TREE_TYPE (iaddr), addr),
6998 false, NULL_TREE, true, GSI_SAME_STMT);
6999 stmt = gimple_build_assign (iaddr, iaddr_val);
7000 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7001 loadedi = create_tmp_var (itype);
7002 if (gimple_in_ssa_p (cfun))
7003 loadedi = make_ssa_name (loadedi);
7005 else
7007 iaddr = addr;
7008 loadedi = loaded_val;
7011 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7012 tree loaddecl = builtin_decl_explicit (fncode);
7013 if (loaddecl)
7014 initial
7015 = fold_convert (atype,
7016 build_call_expr (loaddecl, 2, iaddr,
7017 build_int_cst (NULL_TREE,
7018 MEMMODEL_RELAXED)));
7019 else
7021 tree off
7022 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7023 true), 0);
7024 initial = build2 (MEM_REF, atype, iaddr, off);
7027 initial
7028 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7029 GSI_SAME_STMT);
7031 /* Move the value to the LOADEDI temporary. */
7032 if (gimple_in_ssa_p (cfun))
7034 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7035 phi = create_phi_node (loadedi, loop_header);
7036 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7037 initial);
7039 else
7040 gsi_insert_before (&si,
7041 gimple_build_assign (loadedi, initial),
7042 GSI_SAME_STMT);
7043 if (loadedi != loaded_val)
7045 gimple_stmt_iterator gsi2;
7046 tree x;
7048 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7049 gsi2 = gsi_start_bb (loop_header);
7050 if (gimple_in_ssa_p (cfun))
7052 gassign *stmt;
7053 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7054 true, GSI_SAME_STMT);
7055 stmt = gimple_build_assign (loaded_val, x);
7056 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7058 else
7060 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7061 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7062 true, GSI_SAME_STMT);
7065 gsi_remove (&si, true);
7067 si = gsi_last_nondebug_bb (store_bb);
7068 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7070 if (iaddr == addr)
7071 storedi = stored_val;
7072 else
7073 storedi
7074 = force_gimple_operand_gsi (&si,
7075 build1 (VIEW_CONVERT_EXPR, itype,
7076 stored_val), true, NULL_TREE, true,
7077 GSI_SAME_STMT);
7079 /* Build the compare&swap statement. */
7080 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7081 new_storedi = force_gimple_operand_gsi (&si,
7082 fold_convert (TREE_TYPE (loadedi),
7083 new_storedi),
7084 true, NULL_TREE,
7085 true, GSI_SAME_STMT);
7087 if (gimple_in_ssa_p (cfun))
7088 old_vali = loadedi;
7089 else
7091 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7092 stmt = gimple_build_assign (old_vali, loadedi);
7093 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7095 stmt = gimple_build_assign (loadedi, new_storedi);
7096 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7099 /* Note that we always perform the comparison as an integer, even for
7100 floating point. This allows the atomic operation to properly
7101 succeed even with NaNs and -0.0. */
7102 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7103 stmt = gimple_build_cond_empty (ne);
7104 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7106 /* Update cfg. */
7107 e = single_succ_edge (store_bb);
7108 e->flags &= ~EDGE_FALLTHRU;
7109 e->flags |= EDGE_FALSE_VALUE;
7110 /* Expect no looping. */
7111 e->probability = profile_probability::guessed_always ();
7113 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7114 e->probability = profile_probability::guessed_never ();
7116 /* Copy the new value to loadedi (we already did that before the condition
7117 if we are not in SSA). */
7118 if (gimple_in_ssa_p (cfun))
7120 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7121 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7124 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7125 gsi_remove (&si, true);
7127 struct loop *loop = alloc_loop ();
7128 loop->header = loop_header;
7129 loop->latch = store_bb;
7130 add_loop (loop, loop_header->loop_father);
7132 if (gimple_in_ssa_p (cfun))
7133 update_ssa (TODO_update_ssa_no_phi);
7135 return true;
7138 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7140 GOMP_atomic_start ();
7141 *addr = rhs;
7142 GOMP_atomic_end ();
7144 The result is not globally atomic, but works so long as all parallel
7145 references are within #pragma omp atomic directives. According to
7146 responses received from omp@openmp.org, appears to be within spec.
7147 Which makes sense, since that's how several other compilers handle
7148 this situation as well.
7149 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7150 expanding. STORED_VAL is the operand of the matching
7151 GIMPLE_OMP_ATOMIC_STORE.
7153 We replace
7154 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7155 loaded_val = *addr;
7157 and replace
7158 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7159 *addr = stored_val;
7162 static bool
7163 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7164 tree addr, tree loaded_val, tree stored_val)
7166 gimple_stmt_iterator si;
7167 gassign *stmt;
7168 tree t;
7170 si = gsi_last_nondebug_bb (load_bb);
7171 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7173 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7174 t = build_call_expr (t, 0);
7175 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7177 tree mem = build_simple_mem_ref (addr);
7178 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7179 TREE_OPERAND (mem, 1)
7180 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7181 true),
7182 TREE_OPERAND (mem, 1));
7183 stmt = gimple_build_assign (loaded_val, mem);
7184 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7185 gsi_remove (&si, true);
7187 si = gsi_last_nondebug_bb (store_bb);
7188 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7190 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7191 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7193 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7194 t = build_call_expr (t, 0);
7195 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7196 gsi_remove (&si, true);
7198 if (gimple_in_ssa_p (cfun))
7199 update_ssa (TODO_update_ssa_no_phi);
7200 return true;
7203 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7204 using expand_omp_atomic_fetch_op. If it failed, we try to
7205 call expand_omp_atomic_pipeline, and if it fails too, the
7206 ultimate fallback is wrapping the operation in a mutex
7207 (expand_omp_atomic_mutex). REGION is the atomic region built
7208 by build_omp_regions_1(). */
7210 static void
7211 expand_omp_atomic (struct omp_region *region)
7213 basic_block load_bb = region->entry, store_bb = region->exit;
7214 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7215 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7216 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7217 tree addr = gimple_omp_atomic_load_rhs (load);
7218 tree stored_val = gimple_omp_atomic_store_val (store);
7219 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7220 HOST_WIDE_INT index;
7222 /* Make sure the type is one of the supported sizes. */
7223 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7224 index = exact_log2 (index);
7225 if (index >= 0 && index <= 4)
7227 unsigned int align = TYPE_ALIGN_UNIT (type);
7229 /* __sync builtins require strict data alignment. */
7230 if (exact_log2 (align) >= index)
7232 /* Atomic load. */
7233 scalar_mode smode;
7234 if (loaded_val == stored_val
7235 && (is_int_mode (TYPE_MODE (type), &smode)
7236 || is_float_mode (TYPE_MODE (type), &smode))
7237 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7238 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7239 return;
7241 /* Atomic store. */
7242 if ((is_int_mode (TYPE_MODE (type), &smode)
7243 || is_float_mode (TYPE_MODE (type), &smode))
7244 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7245 && store_bb == single_succ (load_bb)
7246 && first_stmt (store_bb) == store
7247 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7248 stored_val, index))
7249 return;
7251 /* When possible, use specialized atomic update functions. */
7252 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7253 && store_bb == single_succ (load_bb)
7254 && expand_omp_atomic_fetch_op (load_bb, addr,
7255 loaded_val, stored_val, index))
7256 return;
7258 /* If we don't have specialized __sync builtins, try and implement
7259 as a compare and swap loop. */
7260 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7261 loaded_val, stored_val, index))
7262 return;
7266 /* The ultimate fallback is wrapping the operation in a mutex. */
7267 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7270 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7271 at REGION_EXIT. */
7273 static void
7274 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7275 basic_block region_exit)
7277 struct loop *outer = region_entry->loop_father;
7278 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7280 /* Don't parallelize the kernels region if it contains more than one outer
7281 loop. */
7282 unsigned int nr_outer_loops = 0;
7283 struct loop *single_outer = NULL;
7284 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
7286 gcc_assert (loop_outer (loop) == outer);
7288 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7289 continue;
7291 if (region_exit != NULL
7292 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7293 continue;
7295 nr_outer_loops++;
7296 single_outer = loop;
7298 if (nr_outer_loops != 1)
7299 return;
7301 for (struct loop *loop = single_outer->inner;
7302 loop != NULL;
7303 loop = loop->inner)
7304 if (loop->next)
7305 return;
7307 /* Mark the loops in the region. */
7308 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7309 loop->in_oacc_kernels_region = true;
7312 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7314 struct GTY(()) grid_launch_attributes_trees
7316 tree kernel_dim_array_type;
7317 tree kernel_lattrs_dimnum_decl;
7318 tree kernel_lattrs_grid_decl;
7319 tree kernel_lattrs_group_decl;
7320 tree kernel_launch_attributes_type;
7323 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7325 /* Create types used to pass kernel launch attributes to target. */
7327 static void
7328 grid_create_kernel_launch_attr_types (void)
7330 if (grid_attr_trees)
7331 return;
7332 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7334 tree dim_arr_index_type
7335 = build_index_type (build_int_cst (integer_type_node, 2));
7336 grid_attr_trees->kernel_dim_array_type
7337 = build_array_type (uint32_type_node, dim_arr_index_type);
7339 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7340 grid_attr_trees->kernel_lattrs_dimnum_decl
7341 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7342 uint32_type_node);
7343 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7345 grid_attr_trees->kernel_lattrs_grid_decl
7346 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7347 grid_attr_trees->kernel_dim_array_type);
7348 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7349 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7350 grid_attr_trees->kernel_lattrs_group_decl
7351 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7352 grid_attr_trees->kernel_dim_array_type);
7353 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7354 = grid_attr_trees->kernel_lattrs_grid_decl;
7355 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7356 "__gomp_kernel_launch_attributes",
7357 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7360 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7361 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7362 of type uint32_type_node. */
7364 static void
7365 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7366 tree fld_decl, int index, tree value)
7368 tree ref = build4 (ARRAY_REF, uint32_type_node,
7369 build3 (COMPONENT_REF,
7370 grid_attr_trees->kernel_dim_array_type,
7371 range_var, fld_decl, NULL_TREE),
7372 build_int_cst (integer_type_node, index),
7373 NULL_TREE, NULL_TREE);
7374 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7377 /* Return a tree representation of a pointer to a structure with grid and
7378 work-group size information. Statements filling that information will be
7379 inserted before GSI, TGT_STMT is the target statement which has the
7380 necessary information in it. */
7382 static tree
7383 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7384 gomp_target *tgt_stmt)
7386 grid_create_kernel_launch_attr_types ();
7387 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7388 "__kernel_launch_attrs");
7390 unsigned max_dim = 0;
7391 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7392 clause;
7393 clause = OMP_CLAUSE_CHAIN (clause))
7395 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7396 continue;
7398 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7399 max_dim = MAX (dim, max_dim);
7401 grid_insert_store_range_dim (gsi, lattrs,
7402 grid_attr_trees->kernel_lattrs_grid_decl,
7403 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7404 grid_insert_store_range_dim (gsi, lattrs,
7405 grid_attr_trees->kernel_lattrs_group_decl,
7406 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7409 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7410 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7411 gcc_checking_assert (max_dim <= 2);
7412 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7413 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7414 GSI_SAME_STMT);
7415 TREE_ADDRESSABLE (lattrs) = 1;
7416 return build_fold_addr_expr (lattrs);
7419 /* Build target argument identifier from the DEVICE identifier, value
7420 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7422 static tree
7423 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7425 tree t = build_int_cst (integer_type_node, device);
7426 if (subseqent_param)
7427 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7428 build_int_cst (integer_type_node,
7429 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7430 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7431 build_int_cst (integer_type_node, id));
7432 return t;
7435 /* Like above but return it in type that can be directly stored as an element
7436 of the argument array. */
7438 static tree
7439 get_target_argument_identifier (int device, bool subseqent_param, int id)
7441 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7442 return fold_convert (ptr_type_node, t);
7445 /* Return a target argument consisting of DEVICE identifier, value identifier
7446 ID, and the actual VALUE. */
7448 static tree
7449 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7450 tree value)
7452 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7453 fold_convert (integer_type_node, value),
7454 build_int_cst (unsigned_type_node,
7455 GOMP_TARGET_ARG_VALUE_SHIFT));
7456 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7457 get_target_argument_identifier_1 (device, false, id));
7458 t = fold_convert (ptr_type_node, t);
7459 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7462 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7463 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7464 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7465 arguments. */
7467 static void
7468 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7469 int id, tree value, vec <tree> *args)
7471 if (tree_fits_shwi_p (value)
7472 && tree_to_shwi (value) > -(1 << 15)
7473 && tree_to_shwi (value) < (1 << 15))
7474 args->quick_push (get_target_argument_value (gsi, device, id, value));
7475 else
7477 args->quick_push (get_target_argument_identifier (device, true, id));
7478 value = fold_convert (ptr_type_node, value);
7479 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7480 GSI_SAME_STMT);
7481 args->quick_push (value);
7485 /* Create an array of arguments that is then passed to GOMP_target. */
7487 static tree
7488 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7490 auto_vec <tree, 6> args;
7491 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7492 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7493 if (c)
7494 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7495 else
7496 t = integer_minus_one_node;
7497 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7498 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7500 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7501 if (c)
7502 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7503 else
7504 t = integer_minus_one_node;
7505 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7506 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7507 &args);
7509 /* Add HSA-specific grid sizes, if available. */
7510 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7511 OMP_CLAUSE__GRIDDIM_))
7513 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7514 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7515 args.quick_push (t);
7516 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7519 /* Produce more, perhaps device specific, arguments here. */
7521 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7522 args.length () + 1),
7523 ".omp_target_args");
7524 for (unsigned i = 0; i < args.length (); i++)
7526 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7527 build_int_cst (integer_type_node, i),
7528 NULL_TREE, NULL_TREE);
7529 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7530 GSI_SAME_STMT);
7532 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7533 build_int_cst (integer_type_node, args.length ()),
7534 NULL_TREE, NULL_TREE);
7535 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7536 GSI_SAME_STMT);
7537 TREE_ADDRESSABLE (argarray) = 1;
7538 return build_fold_addr_expr (argarray);
7541 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7543 static void
7544 expand_omp_target (struct omp_region *region)
7546 basic_block entry_bb, exit_bb, new_bb;
7547 struct function *child_cfun;
7548 tree child_fn, block, t;
7549 gimple_stmt_iterator gsi;
7550 gomp_target *entry_stmt;
7551 gimple *stmt;
7552 edge e;
7553 bool offloaded, data_region;
7555 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7556 new_bb = region->entry;
7558 offloaded = is_gimple_omp_offloaded (entry_stmt);
7559 switch (gimple_omp_target_kind (entry_stmt))
7561 case GF_OMP_TARGET_KIND_REGION:
7562 case GF_OMP_TARGET_KIND_UPDATE:
7563 case GF_OMP_TARGET_KIND_ENTER_DATA:
7564 case GF_OMP_TARGET_KIND_EXIT_DATA:
7565 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7566 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7567 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7568 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7569 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7570 data_region = false;
7571 break;
7572 case GF_OMP_TARGET_KIND_DATA:
7573 case GF_OMP_TARGET_KIND_OACC_DATA:
7574 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7575 data_region = true;
7576 break;
7577 default:
7578 gcc_unreachable ();
7581 child_fn = NULL_TREE;
7582 child_cfun = NULL;
7583 if (offloaded)
7585 child_fn = gimple_omp_target_child_fn (entry_stmt);
7586 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7589 /* Supported by expand_omp_taskreg, but not here. */
7590 if (child_cfun != NULL)
7591 gcc_checking_assert (!child_cfun->cfg);
7592 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7594 entry_bb = region->entry;
7595 exit_bb = region->exit;
7597 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7599 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7601 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7602 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7603 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7604 DECL_ATTRIBUTES (child_fn)
7605 = tree_cons (get_identifier ("oacc kernels"),
7606 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7609 if (offloaded)
7611 unsigned srcidx, dstidx, num;
7613 /* If the offloading region needs data sent from the parent
7614 function, then the very first statement (except possible
7615 tree profile counter updates) of the offloading body
7616 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7617 &.OMP_DATA_O is passed as an argument to the child function,
7618 we need to replace it with the argument as seen by the child
7619 function.
7621 In most cases, this will end up being the identity assignment
7622 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7623 a function call that has been inlined, the original PARM_DECL
7624 .OMP_DATA_I may have been converted into a different local
7625 variable. In which case, we need to keep the assignment. */
7626 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7627 if (data_arg)
7629 basic_block entry_succ_bb = single_succ (entry_bb);
7630 gimple_stmt_iterator gsi;
7631 tree arg;
7632 gimple *tgtcopy_stmt = NULL;
7633 tree sender = TREE_VEC_ELT (data_arg, 0);
7635 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7637 gcc_assert (!gsi_end_p (gsi));
7638 stmt = gsi_stmt (gsi);
7639 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7640 continue;
7642 if (gimple_num_ops (stmt) == 2)
7644 tree arg = gimple_assign_rhs1 (stmt);
7646 /* We're ignoring the subcode because we're
7647 effectively doing a STRIP_NOPS. */
7649 if (TREE_CODE (arg) == ADDR_EXPR
7650 && TREE_OPERAND (arg, 0) == sender)
7652 tgtcopy_stmt = stmt;
7653 break;
7658 gcc_assert (tgtcopy_stmt != NULL);
7659 arg = DECL_ARGUMENTS (child_fn);
7661 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7662 gsi_remove (&gsi, true);
7665 /* Declare local variables needed in CHILD_CFUN. */
7666 block = DECL_INITIAL (child_fn);
7667 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7668 /* The gimplifier could record temporaries in the offloading block
7669 rather than in containing function's local_decls chain,
7670 which would mean cgraph missed finalizing them. Do it now. */
7671 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7672 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7673 varpool_node::finalize_decl (t);
7674 DECL_SAVED_TREE (child_fn) = NULL;
7675 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7676 gimple_set_body (child_fn, NULL);
7677 TREE_USED (block) = 1;
7679 /* Reset DECL_CONTEXT on function arguments. */
7680 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7681 DECL_CONTEXT (t) = child_fn;
7683 /* Split ENTRY_BB at GIMPLE_*,
7684 so that it can be moved to the child function. */
7685 gsi = gsi_last_nondebug_bb (entry_bb);
7686 stmt = gsi_stmt (gsi);
7687 gcc_assert (stmt
7688 && gimple_code (stmt) == gimple_code (entry_stmt));
7689 e = split_block (entry_bb, stmt);
7690 gsi_remove (&gsi, true);
7691 entry_bb = e->dest;
7692 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7694 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7695 if (exit_bb)
7697 gsi = gsi_last_nondebug_bb (exit_bb);
7698 gcc_assert (!gsi_end_p (gsi)
7699 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7700 stmt = gimple_build_return (NULL);
7701 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7702 gsi_remove (&gsi, true);
7705 /* Move the offloading region into CHILD_CFUN. */
7707 block = gimple_block (entry_stmt);
7709 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7710 if (exit_bb)
7711 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7712 /* When the OMP expansion process cannot guarantee an up-to-date
7713 loop tree arrange for the child function to fixup loops. */
7714 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7715 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7717 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7718 num = vec_safe_length (child_cfun->local_decls);
7719 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7721 t = (*child_cfun->local_decls)[srcidx];
7722 if (DECL_CONTEXT (t) == cfun->decl)
7723 continue;
7724 if (srcidx != dstidx)
7725 (*child_cfun->local_decls)[dstidx] = t;
7726 dstidx++;
7728 if (dstidx != num)
7729 vec_safe_truncate (child_cfun->local_decls, dstidx);
7731 /* Inform the callgraph about the new function. */
7732 child_cfun->curr_properties = cfun->curr_properties;
7733 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7734 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7735 cgraph_node *node = cgraph_node::get_create (child_fn);
7736 node->parallelized_function = 1;
7737 cgraph_node::add_new_function (child_fn, true);
7739 /* Add the new function to the offload table. */
7740 if (ENABLE_OFFLOADING)
7742 if (in_lto_p)
7743 DECL_PRESERVE_P (child_fn) = 1;
7744 vec_safe_push (offload_funcs, child_fn);
7747 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7748 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7750 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7751 fixed in a following pass. */
7752 push_cfun (child_cfun);
7753 if (need_asm)
7754 assign_assembler_name_if_needed (child_fn);
7755 cgraph_edge::rebuild_edges ();
7757 /* Some EH regions might become dead, see PR34608. If
7758 pass_cleanup_cfg isn't the first pass to happen with the
7759 new child, these dead EH edges might cause problems.
7760 Clean them up now. */
7761 if (flag_exceptions)
7763 basic_block bb;
7764 bool changed = false;
7766 FOR_EACH_BB_FN (bb, cfun)
7767 changed |= gimple_purge_dead_eh_edges (bb);
7768 if (changed)
7769 cleanup_tree_cfg ();
7771 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7772 verify_loop_structure ();
7773 pop_cfun ();
7775 if (dump_file && !gimple_in_ssa_p (cfun))
7777 omp_any_child_fn_dumped = true;
7778 dump_function_header (dump_file, child_fn, dump_flags);
7779 dump_function_to_file (child_fn, dump_file, dump_flags);
7782 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7785 /* Emit a library call to launch the offloading region, or do data
7786 transfers. */
7787 tree t1, t2, t3, t4, depend, c, clauses;
7788 enum built_in_function start_ix;
7789 unsigned int flags_i = 0;
7791 switch (gimple_omp_target_kind (entry_stmt))
7793 case GF_OMP_TARGET_KIND_REGION:
7794 start_ix = BUILT_IN_GOMP_TARGET;
7795 break;
7796 case GF_OMP_TARGET_KIND_DATA:
7797 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7798 break;
7799 case GF_OMP_TARGET_KIND_UPDATE:
7800 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7801 break;
7802 case GF_OMP_TARGET_KIND_ENTER_DATA:
7803 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7804 break;
7805 case GF_OMP_TARGET_KIND_EXIT_DATA:
7806 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7807 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7808 break;
7809 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7810 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7811 start_ix = BUILT_IN_GOACC_PARALLEL;
7812 break;
7813 case GF_OMP_TARGET_KIND_OACC_DATA:
7814 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7815 start_ix = BUILT_IN_GOACC_DATA_START;
7816 break;
7817 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7818 start_ix = BUILT_IN_GOACC_UPDATE;
7819 break;
7820 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7821 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7822 break;
7823 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7824 start_ix = BUILT_IN_GOACC_DECLARE;
7825 break;
7826 default:
7827 gcc_unreachable ();
7830 clauses = gimple_omp_target_clauses (entry_stmt);
7832 tree device = NULL_TREE;
7833 location_t device_loc = UNKNOWN_LOCATION;
7834 tree goacc_flags = NULL_TREE;
7835 if (is_gimple_omp_oacc (entry_stmt))
7837 /* By default, no GOACC_FLAGs are set. */
7838 goacc_flags = integer_zero_node;
7840 else
7842 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7843 if (c)
7845 device = OMP_CLAUSE_DEVICE_ID (c);
7846 device_loc = OMP_CLAUSE_LOCATION (c);
7848 else
7850 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7851 library choose). */
7852 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7853 device_loc = gimple_location (entry_stmt);
7856 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7857 if (c)
7858 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7861 /* By default, there is no conditional. */
7862 tree cond = NULL_TREE;
7863 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7864 if (c)
7865 cond = OMP_CLAUSE_IF_EXPR (c);
7866 /* If we found the clause 'if (cond)', build:
7867 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7868 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
7869 if (cond)
7871 tree *tp;
7872 if (is_gimple_omp_oacc (entry_stmt))
7873 tp = &goacc_flags;
7874 else
7876 /* Ensure 'device' is of the correct type. */
7877 device = fold_convert_loc (device_loc, integer_type_node, device);
7879 tp = &device;
7882 cond = gimple_boolify (cond);
7884 basic_block cond_bb, then_bb, else_bb;
7885 edge e;
7886 tree tmp_var;
7888 tmp_var = create_tmp_var (TREE_TYPE (*tp));
7889 if (offloaded)
7890 e = split_block_after_labels (new_bb);
7891 else
7893 gsi = gsi_last_nondebug_bb (new_bb);
7894 gsi_prev (&gsi);
7895 e = split_block (new_bb, gsi_stmt (gsi));
7897 cond_bb = e->src;
7898 new_bb = e->dest;
7899 remove_edge (e);
7901 then_bb = create_empty_bb (cond_bb);
7902 else_bb = create_empty_bb (then_bb);
7903 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7904 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7906 stmt = gimple_build_cond_empty (cond);
7907 gsi = gsi_last_bb (cond_bb);
7908 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7910 gsi = gsi_start_bb (then_bb);
7911 stmt = gimple_build_assign (tmp_var, *tp);
7912 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7914 gsi = gsi_start_bb (else_bb);
7915 if (is_gimple_omp_oacc (entry_stmt))
7916 stmt = gimple_build_assign (tmp_var,
7917 BIT_IOR_EXPR,
7918 *tp,
7919 build_int_cst (integer_type_node,
7920 GOACC_FLAG_HOST_FALLBACK));
7921 else
7922 stmt = gimple_build_assign (tmp_var,
7923 build_int_cst (integer_type_node,
7924 GOMP_DEVICE_HOST_FALLBACK));
7925 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7927 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7928 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7929 add_bb_to_loop (then_bb, cond_bb->loop_father);
7930 add_bb_to_loop (else_bb, cond_bb->loop_father);
7931 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7932 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7934 *tp = tmp_var;
7936 gsi = gsi_last_nondebug_bb (new_bb);
7938 else
7940 gsi = gsi_last_nondebug_bb (new_bb);
7942 if (device != NULL_TREE)
7943 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7944 true, GSI_SAME_STMT);
7947 t = gimple_omp_target_data_arg (entry_stmt);
7948 if (t == NULL)
7950 t1 = size_zero_node;
7951 t2 = build_zero_cst (ptr_type_node);
7952 t3 = t2;
7953 t4 = t2;
7955 else
7957 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7958 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7959 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7960 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7961 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7964 gimple *g;
7965 bool tagging = false;
7966 /* The maximum number used by any start_ix, without varargs. */
7967 auto_vec<tree, 11> args;
7968 if (is_gimple_omp_oacc (entry_stmt))
7970 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
7971 TREE_TYPE (goacc_flags), goacc_flags);
7972 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
7973 NULL_TREE, true,
7974 GSI_SAME_STMT);
7975 args.quick_push (goacc_flags_m);
7977 else
7978 args.quick_push (device);
7979 if (offloaded)
7980 args.quick_push (build_fold_addr_expr (child_fn));
7981 args.quick_push (t1);
7982 args.quick_push (t2);
7983 args.quick_push (t3);
7984 args.quick_push (t4);
7985 switch (start_ix)
7987 case BUILT_IN_GOACC_DATA_START:
7988 case BUILT_IN_GOACC_DECLARE:
7989 case BUILT_IN_GOMP_TARGET_DATA:
7990 break;
7991 case BUILT_IN_GOMP_TARGET:
7992 case BUILT_IN_GOMP_TARGET_UPDATE:
7993 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7994 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7995 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7996 if (c)
7997 depend = OMP_CLAUSE_DECL (c);
7998 else
7999 depend = build_int_cst (ptr_type_node, 0);
8000 args.quick_push (depend);
8001 if (start_ix == BUILT_IN_GOMP_TARGET)
8002 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8003 break;
8004 case BUILT_IN_GOACC_PARALLEL:
8005 oacc_set_fn_attrib (child_fn, clauses, &args);
8006 tagging = true;
8007 /* FALLTHRU */
8008 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8009 case BUILT_IN_GOACC_UPDATE:
8011 tree t_async = NULL_TREE;
8013 /* If present, use the value specified by the respective
8014 clause, making sure that is of the correct type. */
8015 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8016 if (c)
8017 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8018 integer_type_node,
8019 OMP_CLAUSE_ASYNC_EXPR (c));
8020 else if (!tagging)
8021 /* Default values for t_async. */
8022 t_async = fold_convert_loc (gimple_location (entry_stmt),
8023 integer_type_node,
8024 build_int_cst (integer_type_node,
8025 GOMP_ASYNC_SYNC));
8026 if (tagging && t_async)
8028 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8030 if (TREE_CODE (t_async) == INTEGER_CST)
8032 /* See if we can pack the async arg in to the tag's
8033 operand. */
8034 i_async = TREE_INT_CST_LOW (t_async);
8035 if (i_async < GOMP_LAUNCH_OP_MAX)
8036 t_async = NULL_TREE;
8037 else
8038 i_async = GOMP_LAUNCH_OP_MAX;
8040 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8041 i_async));
8043 if (t_async)
8044 args.safe_push (t_async);
8046 /* Save the argument index, and ... */
8047 unsigned t_wait_idx = args.length ();
8048 unsigned num_waits = 0;
8049 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8050 if (!tagging || c)
8051 /* ... push a placeholder. */
8052 args.safe_push (integer_zero_node);
8054 for (; c; c = OMP_CLAUSE_CHAIN (c))
8055 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8057 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8058 integer_type_node,
8059 OMP_CLAUSE_WAIT_EXPR (c)));
8060 num_waits++;
8063 if (!tagging || num_waits)
8065 tree len;
8067 /* Now that we know the number, update the placeholder. */
8068 if (tagging)
8069 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8070 else
8071 len = build_int_cst (integer_type_node, num_waits);
8072 len = fold_convert_loc (gimple_location (entry_stmt),
8073 unsigned_type_node, len);
8074 args[t_wait_idx] = len;
8077 break;
8078 default:
8079 gcc_unreachable ();
8081 if (tagging)
8082 /* Push terminal marker - zero. */
8083 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8085 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8086 gimple_set_location (g, gimple_location (entry_stmt));
8087 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8088 if (!offloaded)
8090 g = gsi_stmt (gsi);
8091 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8092 gsi_remove (&gsi, true);
8094 if (data_region && region->exit)
8096 gsi = gsi_last_nondebug_bb (region->exit);
8097 g = gsi_stmt (gsi);
8098 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8099 gsi_remove (&gsi, true);
8103 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8104 iteration variable derived from the thread number. INTRA_GROUP means this
8105 is an expansion of a loop iterating over work-items within a separate
8106 iteration over groups. */
8108 static void
8109 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8111 gimple_stmt_iterator gsi;
8112 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8113 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8114 == GF_OMP_FOR_KIND_GRID_LOOP);
8115 size_t collapse = gimple_omp_for_collapse (for_stmt);
8116 struct omp_for_data_loop *loops
8117 = XALLOCAVEC (struct omp_for_data_loop,
8118 gimple_omp_for_collapse (for_stmt));
8119 struct omp_for_data fd;
8121 remove_edge (BRANCH_EDGE (kfor->entry));
8122 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8124 gcc_assert (kfor->cont);
8125 omp_extract_for_data (for_stmt, &fd, loops);
8127 gsi = gsi_start_bb (body_bb);
8129 for (size_t dim = 0; dim < collapse; dim++)
8131 tree type, itype;
8132 itype = type = TREE_TYPE (fd.loops[dim].v);
8133 if (POINTER_TYPE_P (type))
8134 itype = signed_type_for (type);
8136 tree n1 = fd.loops[dim].n1;
8137 tree step = fd.loops[dim].step;
8138 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8139 true, NULL_TREE, true, GSI_SAME_STMT);
8140 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8141 true, NULL_TREE, true, GSI_SAME_STMT);
8142 tree threadid;
8143 if (gimple_omp_for_grid_group_iter (for_stmt))
8145 gcc_checking_assert (!intra_group);
8146 threadid = build_call_expr (builtin_decl_explicit
8147 (BUILT_IN_HSA_WORKGROUPID), 1,
8148 build_int_cstu (unsigned_type_node, dim));
8150 else if (intra_group)
8151 threadid = build_call_expr (builtin_decl_explicit
8152 (BUILT_IN_HSA_WORKITEMID), 1,
8153 build_int_cstu (unsigned_type_node, dim));
8154 else
8155 threadid = build_call_expr (builtin_decl_explicit
8156 (BUILT_IN_HSA_WORKITEMABSID), 1,
8157 build_int_cstu (unsigned_type_node, dim));
8158 threadid = fold_convert (itype, threadid);
8159 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8160 true, GSI_SAME_STMT);
8162 tree startvar = fd.loops[dim].v;
8163 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8164 if (POINTER_TYPE_P (type))
8165 t = fold_build_pointer_plus (n1, t);
8166 else
8167 t = fold_build2 (PLUS_EXPR, type, t, n1);
8168 t = fold_convert (type, t);
8169 t = force_gimple_operand_gsi (&gsi, t,
8170 DECL_P (startvar)
8171 && TREE_ADDRESSABLE (startvar),
8172 NULL_TREE, true, GSI_SAME_STMT);
8173 gassign *assign_stmt = gimple_build_assign (startvar, t);
8174 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8176 /* Remove the omp for statement. */
8177 gsi = gsi_last_nondebug_bb (kfor->entry);
8178 gsi_remove (&gsi, true);
8180 /* Remove the GIMPLE_OMP_CONTINUE statement. */
8181 gsi = gsi_last_nondebug_bb (kfor->cont);
8182 gcc_assert (!gsi_end_p (gsi)
8183 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8184 gsi_remove (&gsi, true);
8186 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
8187 gsi = gsi_last_nondebug_bb (kfor->exit);
8188 gcc_assert (!gsi_end_p (gsi)
8189 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8190 if (intra_group)
8191 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8192 gsi_remove (&gsi, true);
8194 /* Fixup the much simpler CFG. */
8195 remove_edge (find_edge (kfor->cont, body_bb));
8197 if (kfor->cont != body_bb)
8198 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8199 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8202 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8203 argument_decls. */
8205 struct grid_arg_decl_map
8207 tree old_arg;
8208 tree new_arg;
8211 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8212 pertaining to kernel function. */
8214 static tree
8215 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8217 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8218 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8219 tree t = *tp;
8221 if (t == adm->old_arg)
8222 *tp = adm->new_arg;
8223 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8224 return NULL_TREE;
8227 /* If TARGET region contains a kernel body for loop, remove its region from the
8228 TARGET and expand it in HSA gridified kernel fashion. */
8230 static void
8231 grid_expand_target_grid_body (struct omp_region *target)
8233 if (!hsa_gen_requested_p ())
8234 return;
8236 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8237 struct omp_region **pp;
8239 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8240 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8241 break;
8243 struct omp_region *gpukernel = *pp;
8245 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8246 if (!gpukernel)
8248 /* HSA cannot handle OACC stuff. */
8249 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8250 return;
8251 gcc_checking_assert (orig_child_fndecl);
8252 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8253 OMP_CLAUSE__GRIDDIM_));
8254 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8256 hsa_register_kernel (n);
8257 return;
8260 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8261 OMP_CLAUSE__GRIDDIM_));
8262 tree inside_block
8263 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8264 *pp = gpukernel->next;
8265 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8266 if ((*pp)->type == GIMPLE_OMP_FOR)
8267 break;
8269 struct omp_region *kfor = *pp;
8270 gcc_assert (kfor);
8271 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8272 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8273 *pp = kfor->next;
8274 if (kfor->inner)
8276 if (gimple_omp_for_grid_group_iter (for_stmt))
8278 struct omp_region **next_pp;
8279 for (pp = &kfor->inner; *pp; pp = next_pp)
8281 next_pp = &(*pp)->next;
8282 if ((*pp)->type != GIMPLE_OMP_FOR)
8283 continue;
8284 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8285 gcc_assert (gimple_omp_for_kind (inner)
8286 == GF_OMP_FOR_KIND_GRID_LOOP);
8287 grid_expand_omp_for_loop (*pp, true);
8288 *pp = (*pp)->next;
8289 next_pp = pp;
8292 expand_omp (kfor->inner);
8294 if (gpukernel->inner)
8295 expand_omp (gpukernel->inner);
8297 tree kern_fndecl = copy_node (orig_child_fndecl);
8298 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8299 "kernel");
8300 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8301 tree tgtblock = gimple_block (tgt_stmt);
8302 tree fniniblock = make_node (BLOCK);
8303 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8304 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8305 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8306 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8307 DECL_INITIAL (kern_fndecl) = fniniblock;
8308 push_struct_function (kern_fndecl);
8309 cfun->function_end_locus = gimple_location (tgt_stmt);
8310 init_tree_ssa (cfun);
8311 pop_cfun ();
8313 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8314 gcc_assert (!DECL_CHAIN (old_parm_decl));
8315 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8316 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8317 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8318 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8319 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8320 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8321 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8322 kern_cfun->curr_properties = cfun->curr_properties;
8324 grid_expand_omp_for_loop (kfor, false);
8326 /* Remove the omp for statement. */
8327 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8328 gsi_remove (&gsi, true);
8329 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8330 return. */
8331 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8332 gcc_assert (!gsi_end_p (gsi)
8333 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8334 gimple *ret_stmt = gimple_build_return (NULL);
8335 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8336 gsi_remove (&gsi, true);
8338 /* Statements in the first BB in the target construct have been produced by
8339 target lowering and must be copied inside the GPUKERNEL, with the two
8340 exceptions of the first OMP statement and the OMP_DATA assignment
8341 statement. */
8342 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8343 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8344 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8345 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8346 !gsi_end_p (tsi); gsi_next (&tsi))
8348 gimple *stmt = gsi_stmt (tsi);
8349 if (is_gimple_omp (stmt))
8350 break;
8351 if (sender
8352 && is_gimple_assign (stmt)
8353 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8354 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8355 continue;
8356 gimple *copy = gimple_copy (stmt);
8357 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8358 gimple_set_block (copy, fniniblock);
8361 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8362 gpukernel->exit, inside_block);
8364 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8365 kcn->mark_force_output ();
8366 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8368 hsa_register_kernel (kcn, orig_child);
8370 cgraph_node::add_new_function (kern_fndecl, true);
8371 push_cfun (kern_cfun);
8372 cgraph_edge::rebuild_edges ();
8374 /* Re-map any mention of the PARM_DECL of the original function to the
8375 PARM_DECL of the new one.
8377 TODO: It would be great if lowering produced references into the GPU
8378 kernel decl straight away and we did not have to do this. */
8379 struct grid_arg_decl_map adm;
8380 adm.old_arg = old_parm_decl;
8381 adm.new_arg = new_parm_decl;
8382 basic_block bb;
8383 FOR_EACH_BB_FN (bb, kern_cfun)
8385 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8387 gimple *stmt = gsi_stmt (gsi);
8388 struct walk_stmt_info wi;
8389 memset (&wi, 0, sizeof (wi));
8390 wi.info = &adm;
8391 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8394 pop_cfun ();
8396 return;
8399 /* Expand the parallel region tree rooted at REGION. Expansion
8400 proceeds in depth-first order. Innermost regions are expanded
8401 first. This way, parallel regions that require a new function to
8402 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8403 internal dependencies in their body. */
8405 static void
8406 expand_omp (struct omp_region *region)
8408 omp_any_child_fn_dumped = false;
8409 while (region)
8411 location_t saved_location;
8412 gimple *inner_stmt = NULL;
8414 /* First, determine whether this is a combined parallel+workshare
8415 region. */
8416 if (region->type == GIMPLE_OMP_PARALLEL)
8417 determine_parallel_type (region);
8418 else if (region->type == GIMPLE_OMP_TARGET)
8419 grid_expand_target_grid_body (region);
8421 if (region->type == GIMPLE_OMP_FOR
8422 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8423 inner_stmt = last_stmt (region->inner->entry);
8425 if (region->inner)
8426 expand_omp (region->inner);
8428 saved_location = input_location;
8429 if (gimple_has_location (last_stmt (region->entry)))
8430 input_location = gimple_location (last_stmt (region->entry));
8432 switch (region->type)
8434 case GIMPLE_OMP_PARALLEL:
8435 case GIMPLE_OMP_TASK:
8436 expand_omp_taskreg (region);
8437 break;
8439 case GIMPLE_OMP_FOR:
8440 expand_omp_for (region, inner_stmt);
8441 break;
8443 case GIMPLE_OMP_SECTIONS:
8444 expand_omp_sections (region);
8445 break;
8447 case GIMPLE_OMP_SECTION:
8448 /* Individual omp sections are handled together with their
8449 parent GIMPLE_OMP_SECTIONS region. */
8450 break;
8452 case GIMPLE_OMP_SINGLE:
8453 expand_omp_single (region);
8454 break;
8456 case GIMPLE_OMP_ORDERED:
8458 gomp_ordered *ord_stmt
8459 = as_a <gomp_ordered *> (last_stmt (region->entry));
8460 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8461 OMP_CLAUSE_DEPEND))
8463 /* We'll expand these when expanding corresponding
8464 worksharing region with ordered(n) clause. */
8465 gcc_assert (region->outer
8466 && region->outer->type == GIMPLE_OMP_FOR);
8467 region->ord_stmt = ord_stmt;
8468 break;
8471 /* FALLTHRU */
8472 case GIMPLE_OMP_MASTER:
8473 case GIMPLE_OMP_TASKGROUP:
8474 case GIMPLE_OMP_CRITICAL:
8475 case GIMPLE_OMP_TEAMS:
8476 expand_omp_synch (region);
8477 break;
8479 case GIMPLE_OMP_ATOMIC_LOAD:
8480 expand_omp_atomic (region);
8481 break;
8483 case GIMPLE_OMP_TARGET:
8484 expand_omp_target (region);
8485 break;
8487 default:
8488 gcc_unreachable ();
8491 input_location = saved_location;
8492 region = region->next;
8494 if (omp_any_child_fn_dumped)
8496 if (dump_file)
8497 dump_function_header (dump_file, current_function_decl, dump_flags);
8498 omp_any_child_fn_dumped = false;
8502 /* Helper for build_omp_regions. Scan the dominator tree starting at
8503 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8504 true, the function ends once a single tree is built (otherwise, whole
8505 forest of OMP constructs may be built). */
8507 static void
8508 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8509 bool single_tree)
8511 gimple_stmt_iterator gsi;
8512 gimple *stmt;
8513 basic_block son;
8515 gsi = gsi_last_nondebug_bb (bb);
8516 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8518 struct omp_region *region;
8519 enum gimple_code code;
8521 stmt = gsi_stmt (gsi);
8522 code = gimple_code (stmt);
8523 if (code == GIMPLE_OMP_RETURN)
8525 /* STMT is the return point out of region PARENT. Mark it
8526 as the exit point and make PARENT the immediately
8527 enclosing region. */
8528 gcc_assert (parent);
8529 region = parent;
8530 region->exit = bb;
8531 parent = parent->outer;
8533 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8535 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8536 GIMPLE_OMP_RETURN, but matches with
8537 GIMPLE_OMP_ATOMIC_LOAD. */
8538 gcc_assert (parent);
8539 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8540 region = parent;
8541 region->exit = bb;
8542 parent = parent->outer;
8544 else if (code == GIMPLE_OMP_CONTINUE)
8546 gcc_assert (parent);
8547 parent->cont = bb;
8549 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8551 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8552 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8554 else
8556 region = new_omp_region (bb, code, parent);
8557 /* Otherwise... */
8558 if (code == GIMPLE_OMP_TARGET)
8560 switch (gimple_omp_target_kind (stmt))
8562 case GF_OMP_TARGET_KIND_REGION:
8563 case GF_OMP_TARGET_KIND_DATA:
8564 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8565 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8566 case GF_OMP_TARGET_KIND_OACC_DATA:
8567 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8568 break;
8569 case GF_OMP_TARGET_KIND_UPDATE:
8570 case GF_OMP_TARGET_KIND_ENTER_DATA:
8571 case GF_OMP_TARGET_KIND_EXIT_DATA:
8572 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8573 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8574 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8575 /* ..., other than for those stand-alone directives... */
8576 region = NULL;
8577 break;
8578 default:
8579 gcc_unreachable ();
8582 else if (code == GIMPLE_OMP_ORDERED
8583 && omp_find_clause (gimple_omp_ordered_clauses
8584 (as_a <gomp_ordered *> (stmt)),
8585 OMP_CLAUSE_DEPEND))
8586 /* #pragma omp ordered depend is also just a stand-alone
8587 directive. */
8588 region = NULL;
8589 else if (code == GIMPLE_OMP_TASK
8590 && gimple_omp_task_taskwait_p (stmt))
8591 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8592 region = NULL;
8593 /* ..., this directive becomes the parent for a new region. */
8594 if (region)
8595 parent = region;
8599 if (single_tree && !parent)
8600 return;
8602 for (son = first_dom_son (CDI_DOMINATORS, bb);
8603 son;
8604 son = next_dom_son (CDI_DOMINATORS, son))
8605 build_omp_regions_1 (son, parent, single_tree);
8608 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8609 root_omp_region. */
8611 static void
8612 build_omp_regions_root (basic_block root)
8614 gcc_assert (root_omp_region == NULL);
8615 build_omp_regions_1 (root, NULL, true);
8616 gcc_assert (root_omp_region != NULL);
8619 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8621 void
8622 omp_expand_local (basic_block head)
8624 build_omp_regions_root (head);
8625 if (dump_file && (dump_flags & TDF_DETAILS))
8627 fprintf (dump_file, "\nOMP region tree\n\n");
8628 dump_omp_region (dump_file, root_omp_region, 0);
8629 fprintf (dump_file, "\n");
8632 remove_exit_barriers (root_omp_region);
8633 expand_omp (root_omp_region);
8635 omp_free_regions ();
8638 /* Scan the CFG and build a tree of OMP regions. Return the root of
8639 the OMP region tree. */
8641 static void
8642 build_omp_regions (void)
8644 gcc_assert (root_omp_region == NULL);
8645 calculate_dominance_info (CDI_DOMINATORS);
8646 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8649 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8651 static unsigned int
8652 execute_expand_omp (void)
8654 build_omp_regions ();
8656 if (!root_omp_region)
8657 return 0;
8659 if (dump_file)
8661 fprintf (dump_file, "\nOMP region tree\n\n");
8662 dump_omp_region (dump_file, root_omp_region, 0);
8663 fprintf (dump_file, "\n");
8666 remove_exit_barriers (root_omp_region);
8668 expand_omp (root_omp_region);
8670 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8671 verify_loop_structure ();
8672 cleanup_tree_cfg ();
8674 omp_free_regions ();
8676 return 0;
8679 /* OMP expansion -- the default pass, run before creation of SSA form. */
8681 namespace {
8683 const pass_data pass_data_expand_omp =
8685 GIMPLE_PASS, /* type */
8686 "ompexp", /* name */
8687 OPTGROUP_OMP, /* optinfo_flags */
8688 TV_NONE, /* tv_id */
8689 PROP_gimple_any, /* properties_required */
8690 PROP_gimple_eomp, /* properties_provided */
8691 0, /* properties_destroyed */
8692 0, /* todo_flags_start */
8693 0, /* todo_flags_finish */
8696 class pass_expand_omp : public gimple_opt_pass
8698 public:
8699 pass_expand_omp (gcc::context *ctxt)
8700 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8703 /* opt_pass methods: */
8704 virtual unsigned int execute (function *)
8706 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8707 || flag_openmp_simd != 0)
8708 && !seen_error ());
8710 /* This pass always runs, to provide PROP_gimple_eomp.
8711 But often, there is nothing to do. */
8712 if (!gate)
8713 return 0;
8715 return execute_expand_omp ();
8718 }; // class pass_expand_omp
8720 } // anon namespace
8722 gimple_opt_pass *
8723 make_pass_expand_omp (gcc::context *ctxt)
8725 return new pass_expand_omp (ctxt);
8728 namespace {
8730 const pass_data pass_data_expand_omp_ssa =
8732 GIMPLE_PASS, /* type */
8733 "ompexpssa", /* name */
8734 OPTGROUP_OMP, /* optinfo_flags */
8735 TV_NONE, /* tv_id */
8736 PROP_cfg | PROP_ssa, /* properties_required */
8737 PROP_gimple_eomp, /* properties_provided */
8738 0, /* properties_destroyed */
8739 0, /* todo_flags_start */
8740 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8743 class pass_expand_omp_ssa : public gimple_opt_pass
8745 public:
8746 pass_expand_omp_ssa (gcc::context *ctxt)
8747 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8750 /* opt_pass methods: */
8751 virtual bool gate (function *fun)
8753 return !(fun->curr_properties & PROP_gimple_eomp);
8755 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8756 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8758 }; // class pass_expand_omp_ssa
8760 } // anon namespace
8762 gimple_opt_pass *
8763 make_pass_expand_omp_ssa (gcc::context *ctxt)
8765 return new pass_expand_omp_ssa (ctxt);
8768 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8769 GIMPLE_* codes. */
8771 bool
8772 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8773 int *region_idx)
8775 gimple *last = last_stmt (bb);
8776 enum gimple_code code = gimple_code (last);
8777 struct omp_region *cur_region = *region;
8778 bool fallthru = false;
8780 switch (code)
8782 case GIMPLE_OMP_PARALLEL:
8783 case GIMPLE_OMP_FOR:
8784 case GIMPLE_OMP_SINGLE:
8785 case GIMPLE_OMP_TEAMS:
8786 case GIMPLE_OMP_MASTER:
8787 case GIMPLE_OMP_TASKGROUP:
8788 case GIMPLE_OMP_CRITICAL:
8789 case GIMPLE_OMP_SECTION:
8790 case GIMPLE_OMP_GRID_BODY:
8791 cur_region = new_omp_region (bb, code, cur_region);
8792 fallthru = true;
8793 break;
8795 case GIMPLE_OMP_TASK:
8796 cur_region = new_omp_region (bb, code, cur_region);
8797 fallthru = true;
8798 if (gimple_omp_task_taskwait_p (last))
8799 cur_region = cur_region->outer;
8800 break;
8802 case GIMPLE_OMP_ORDERED:
8803 cur_region = new_omp_region (bb, code, cur_region);
8804 fallthru = true;
8805 if (omp_find_clause (gimple_omp_ordered_clauses
8806 (as_a <gomp_ordered *> (last)),
8807 OMP_CLAUSE_DEPEND))
8808 cur_region = cur_region->outer;
8809 break;
8811 case GIMPLE_OMP_TARGET:
8812 cur_region = new_omp_region (bb, code, cur_region);
8813 fallthru = true;
8814 switch (gimple_omp_target_kind (last))
8816 case GF_OMP_TARGET_KIND_REGION:
8817 case GF_OMP_TARGET_KIND_DATA:
8818 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8819 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8820 case GF_OMP_TARGET_KIND_OACC_DATA:
8821 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8822 break;
8823 case GF_OMP_TARGET_KIND_UPDATE:
8824 case GF_OMP_TARGET_KIND_ENTER_DATA:
8825 case GF_OMP_TARGET_KIND_EXIT_DATA:
8826 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8827 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8828 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8829 cur_region = cur_region->outer;
8830 break;
8831 default:
8832 gcc_unreachable ();
8834 break;
8836 case GIMPLE_OMP_SECTIONS:
8837 cur_region = new_omp_region (bb, code, cur_region);
8838 fallthru = true;
8839 break;
8841 case GIMPLE_OMP_SECTIONS_SWITCH:
8842 fallthru = false;
8843 break;
8845 case GIMPLE_OMP_ATOMIC_LOAD:
8846 case GIMPLE_OMP_ATOMIC_STORE:
8847 fallthru = true;
8848 break;
8850 case GIMPLE_OMP_RETURN:
8851 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8852 somewhere other than the next block. This will be
8853 created later. */
8854 cur_region->exit = bb;
8855 if (cur_region->type == GIMPLE_OMP_TASK)
8856 /* Add an edge corresponding to not scheduling the task
8857 immediately. */
8858 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8859 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8860 cur_region = cur_region->outer;
8861 break;
8863 case GIMPLE_OMP_CONTINUE:
8864 cur_region->cont = bb;
8865 switch (cur_region->type)
8867 case GIMPLE_OMP_FOR:
8868 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8869 succs edges as abnormal to prevent splitting
8870 them. */
8871 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8872 /* Make the loopback edge. */
8873 make_edge (bb, single_succ (cur_region->entry),
8874 EDGE_ABNORMAL);
8876 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8877 corresponds to the case that the body of the loop
8878 is not executed at all. */
8879 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8880 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8881 fallthru = false;
8882 break;
8884 case GIMPLE_OMP_SECTIONS:
8885 /* Wire up the edges into and out of the nested sections. */
8887 basic_block switch_bb = single_succ (cur_region->entry);
8889 struct omp_region *i;
8890 for (i = cur_region->inner; i ; i = i->next)
8892 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8893 make_edge (switch_bb, i->entry, 0);
8894 make_edge (i->exit, bb, EDGE_FALLTHRU);
8897 /* Make the loopback edge to the block with
8898 GIMPLE_OMP_SECTIONS_SWITCH. */
8899 make_edge (bb, switch_bb, 0);
8901 /* Make the edge from the switch to exit. */
8902 make_edge (switch_bb, bb->next_bb, 0);
8903 fallthru = false;
8905 break;
8907 case GIMPLE_OMP_TASK:
8908 fallthru = true;
8909 break;
8911 default:
8912 gcc_unreachable ();
8914 break;
8916 default:
8917 gcc_unreachable ();
8920 if (*region != cur_region)
8922 *region = cur_region;
8923 if (cur_region)
8924 *region_idx = cur_region->entry->index;
8925 else
8926 *region_idx = 0;
8929 return fallthru;
8932 #include "gt-omp-expand.h"