Document gcov-io (PR gcov-profile/84735).
[official-gcc.git] / gcc / omp-expand.c
blobbb204906ea64efa94a78b42bbc606c0e61f29192
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "debug.h"
60 #include "stringpool.h"
61 #include "attribs.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
117 /* Return true if REGION is a combined parallel+workshare region. */
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
122 return region->is_combined_parallel;
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
139 Is lowered into:
141 # BLOCK 2 (PAR_ENTRY_BB)
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
196 return true;
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205 if (!simd_schedule)
206 return chunk_size;
208 poly_uint64 vf = omp_max_vf ();
209 if (known_eq (vf, 1U))
210 return chunk_size;
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 struct omp_for_data fd;
233 tree n1, n2;
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
239 if (gimple_omp_for_combined_into_p (for_stmt))
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
263 if (fd.chunk_size)
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
270 return ws_args;
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
284 gcc_unreachable ();
287 /* Discover whether REGION is a combined parallel+workshare region. */
289 static void
290 determine_parallel_type (struct omp_region *region)
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
323 if (region->inner->type == GIMPLE_OMP_FOR)
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
358 /* Dump the parallel region tree rooted at REGION. */
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
369 if (region->cont)
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
388 dump_omp_region (stderr, region, 0);
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
394 dump_omp_region (stderr, root_omp_region, 0);
397 /* Create a new parallel region starting at STMT inside region PARENT. */
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
403 struct omp_region *region = XCNEW (struct omp_region);
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
409 if (parent)
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
416 else
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
424 return region;
427 /* Release the memory associated with the region tree rooted at REGION. */
429 static void
430 free_omp_region_1 (struct omp_region *region)
432 struct omp_region *i, *n;
434 for (i = region->inner; i ; i = n)
436 n = i->next;
437 free_omp_region_1 (i);
440 free (region);
443 /* Release the memory for the entire omp region tree. */
445 void
446 omp_free_regions (void)
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
451 n = r->next;
452 free_omp_region_1 (r);
454 root_omp_region = NULL;
457 /* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
460 static gcond *
461 gimple_build_cond_empty (tree cond)
463 enum tree_code pred_code;
464 tree lhs, rhs;
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
498 return false;
501 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
502 Add CHILD_FNDECL to decl chain of the supercontext of the block
503 ENTRY_BLOCK - this is the block which originally contained the
504 code from which CHILD_FNDECL was created.
506 Together, these actions ensure that the debug info for the outlined
507 function will be emitted with the correct lexical scope. */
509 static void
510 adjust_context_and_scope (tree entry_block, tree child_fndecl)
512 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
514 tree b = BLOCK_SUPERCONTEXT (entry_block);
516 if (TREE_CODE (b) == BLOCK)
518 tree parent_fndecl;
520 /* Follow supercontext chain until the parent fndecl
521 is found. */
522 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
523 TREE_CODE (parent_fndecl) == BLOCK;
524 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
527 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
531 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
532 BLOCK_VARS (b) = child_fndecl;
537 /* Build the function calls to GOMP_parallel_start etc to actually
538 generate the parallel operation. REGION is the parallel region
539 being expanded. BB is the block where to insert the code. WS_ARGS
540 will be set if this is a call to a combined parallel+workshare
541 construct, it contains the list of additional arguments needed by
542 the workshare construct. */
544 static void
545 expand_parallel_call (struct omp_region *region, basic_block bb,
546 gomp_parallel *entry_stmt,
547 vec<tree, va_gc> *ws_args)
549 tree t, t1, t2, val, cond, c, clauses, flags;
550 gimple_stmt_iterator gsi;
551 gimple *stmt;
552 enum built_in_function start_ix;
553 int start_ix2;
554 location_t clause_loc;
555 vec<tree, va_gc> *args;
557 clauses = gimple_omp_parallel_clauses (entry_stmt);
559 /* Determine what flavor of GOMP_parallel we will be
560 emitting. */
561 start_ix = BUILT_IN_GOMP_PARALLEL;
562 if (is_combined_parallel (region))
564 switch (region->inner->type)
566 case GIMPLE_OMP_FOR:
567 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
568 switch (region->inner->sched_kind)
570 case OMP_CLAUSE_SCHEDULE_RUNTIME:
571 start_ix2 = 3;
572 break;
573 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
574 case OMP_CLAUSE_SCHEDULE_GUIDED:
575 if (region->inner->sched_modifiers
576 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
578 start_ix2 = 3 + region->inner->sched_kind;
579 break;
581 /* FALLTHRU */
582 default:
583 start_ix2 = region->inner->sched_kind;
584 break;
586 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
587 start_ix = (enum built_in_function) start_ix2;
588 break;
589 case GIMPLE_OMP_SECTIONS:
590 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
591 break;
592 default:
593 gcc_unreachable ();
597 /* By default, the value of NUM_THREADS is zero (selected at run time)
598 and there is no conditional. */
599 cond = NULL_TREE;
600 val = build_int_cst (unsigned_type_node, 0);
601 flags = build_int_cst (unsigned_type_node, 0);
603 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
604 if (c)
605 cond = OMP_CLAUSE_IF_EXPR (c);
607 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
608 if (c)
610 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
611 clause_loc = OMP_CLAUSE_LOCATION (c);
613 else
614 clause_loc = gimple_location (entry_stmt);
616 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
617 if (c)
618 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
620 /* Ensure 'val' is of the correct type. */
621 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
623 /* If we found the clause 'if (cond)', build either
624 (cond != 0) or (cond ? val : 1u). */
625 if (cond)
627 cond = gimple_boolify (cond);
629 if (integer_zerop (val))
630 val = fold_build2_loc (clause_loc,
631 EQ_EXPR, unsigned_type_node, cond,
632 build_int_cst (TREE_TYPE (cond), 0));
633 else
635 basic_block cond_bb, then_bb, else_bb;
636 edge e, e_then, e_else;
637 tree tmp_then, tmp_else, tmp_join, tmp_var;
639 tmp_var = create_tmp_var (TREE_TYPE (val));
640 if (gimple_in_ssa_p (cfun))
642 tmp_then = make_ssa_name (tmp_var);
643 tmp_else = make_ssa_name (tmp_var);
644 tmp_join = make_ssa_name (tmp_var);
646 else
648 tmp_then = tmp_var;
649 tmp_else = tmp_var;
650 tmp_join = tmp_var;
653 e = split_block_after_labels (bb);
654 cond_bb = e->src;
655 bb = e->dest;
656 remove_edge (e);
658 then_bb = create_empty_bb (cond_bb);
659 else_bb = create_empty_bb (then_bb);
660 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
661 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
663 stmt = gimple_build_cond_empty (cond);
664 gsi = gsi_start_bb (cond_bb);
665 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
667 gsi = gsi_start_bb (then_bb);
668 expand_omp_build_assign (&gsi, tmp_then, val, true);
670 gsi = gsi_start_bb (else_bb);
671 expand_omp_build_assign (&gsi, tmp_else,
672 build_int_cst (unsigned_type_node, 1),
673 true);
675 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
676 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
677 add_bb_to_loop (then_bb, cond_bb->loop_father);
678 add_bb_to_loop (else_bb, cond_bb->loop_father);
679 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
680 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
682 if (gimple_in_ssa_p (cfun))
684 gphi *phi = create_phi_node (tmp_join, bb);
685 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
686 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
689 val = tmp_join;
692 gsi = gsi_start_bb (bb);
693 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
694 false, GSI_CONTINUE_LINKING);
697 gsi = gsi_last_nondebug_bb (bb);
698 t = gimple_omp_parallel_data_arg (entry_stmt);
699 if (t == NULL)
700 t1 = null_pointer_node;
701 else
702 t1 = build_fold_addr_expr (t);
703 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
704 t2 = build_fold_addr_expr (child_fndecl);
706 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
708 vec_alloc (args, 4 + vec_safe_length (ws_args));
709 args->quick_push (t2);
710 args->quick_push (t1);
711 args->quick_push (val);
712 if (ws_args)
713 args->splice (*ws_args);
714 args->quick_push (flags);
716 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
717 builtin_decl_explicit (start_ix), args);
719 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
720 false, GSI_CONTINUE_LINKING);
722 if (hsa_gen_requested_p ()
723 && parallel_needs_hsa_kernel_p (region))
725 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
726 hsa_register_kernel (child_cnode);
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 else
787 num_tasks = integer_zero_node;
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
799 tree flags = build_int_cst (unsigned_type_node, iflags);
801 tree cond = boolean_true_node;
802 if (ifc)
804 if (taskloop_p)
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
818 if (finalc)
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
837 gsi = gsi_last_nondebug_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
874 tree chain = NULL_TREE, t;
875 unsigned ix;
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879 DECL_CHAIN (t) = chain;
880 chain = t;
883 return chain;
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
892 static void
893 remove_exit_barrier (struct omp_region *region)
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
902 exit_bb = region->exit;
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_nondebug_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev_nondebug (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
922 gsi = gsi_last_nondebug_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
949 any_addressable_vars = 1;
950 break;
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
963 any_addressable_vars = 1;
964 break;
966 if (block == gimple_block (parallel_stmt))
967 break;
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
976 static void
977 remove_exit_barriers (struct omp_region *region)
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
982 if (region->inner)
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
988 region = region->next;
989 remove_exit_barriers (region);
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1027 tree built_in;
1029 if (DECL_NAME (decl) == thr_num_id)
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1054 gimple_call_set_fndecl (call, built_in);
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065 tree t = *tp;
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1124 if (is_combined_parallel (region))
1125 ws_args = region->ws_args;
1126 else
1127 ws_args = NULL;
1129 if (child_cfun->cfg)
1131 /* Due to inlining, it may happen that we have already outlined
1132 the region, in which case all we need to do is make the
1133 sub-graph unreachable and emit the parallel call. */
1134 edge entry_succ_e, exit_succ_e;
1136 entry_succ_e = single_succ_edge (entry_bb);
1138 gsi = gsi_last_nondebug_bb (entry_bb);
1139 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1140 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1141 gsi_remove (&gsi, true);
1143 new_bb = entry_bb;
1144 if (exit_bb)
1146 exit_succ_e = single_succ_edge (exit_bb);
1147 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1149 remove_edge_and_dominated_blocks (entry_succ_e);
1151 else
1153 unsigned srcidx, dstidx, num;
1155 /* If the parallel region needs data sent from the parent
1156 function, then the very first statement (except possible
1157 tree profile counter updates) of the parallel body
1158 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1159 &.OMP_DATA_O is passed as an argument to the child function,
1160 we need to replace it with the argument as seen by the child
1161 function.
1163 In most cases, this will end up being the identity assignment
1164 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1165 a function call that has been inlined, the original PARM_DECL
1166 .OMP_DATA_I may have been converted into a different local
1167 variable. In which case, we need to keep the assignment. */
1168 if (gimple_omp_taskreg_data_arg (entry_stmt))
1170 basic_block entry_succ_bb
1171 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1172 : FALLTHRU_EDGE (entry_bb)->dest;
1173 tree arg;
1174 gimple *parcopy_stmt = NULL;
1176 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1178 gimple *stmt;
1180 gcc_assert (!gsi_end_p (gsi));
1181 stmt = gsi_stmt (gsi);
1182 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1183 continue;
1185 if (gimple_num_ops (stmt) == 2)
1187 tree arg = gimple_assign_rhs1 (stmt);
1189 /* We're ignore the subcode because we're
1190 effectively doing a STRIP_NOPS. */
1192 if (TREE_CODE (arg) == ADDR_EXPR
1193 && TREE_OPERAND (arg, 0)
1194 == gimple_omp_taskreg_data_arg (entry_stmt))
1196 parcopy_stmt = stmt;
1197 break;
1202 gcc_assert (parcopy_stmt != NULL);
1203 arg = DECL_ARGUMENTS (child_fn);
1205 if (!gimple_in_ssa_p (cfun))
1207 if (gimple_assign_lhs (parcopy_stmt) == arg)
1208 gsi_remove (&gsi, true);
1209 else
1211 /* ?? Is setting the subcode really necessary ?? */
1212 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1213 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1216 else
1218 tree lhs = gimple_assign_lhs (parcopy_stmt);
1219 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1220 /* We'd like to set the rhs to the default def in the child_fn,
1221 but it's too early to create ssa names in the child_fn.
1222 Instead, we set the rhs to the parm. In
1223 move_sese_region_to_fn, we introduce a default def for the
1224 parm, map the parm to it's default def, and once we encounter
1225 this stmt, replace the parm with the default def. */
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 update_stmt (parcopy_stmt);
1231 /* Declare local variables needed in CHILD_CFUN. */
1232 block = DECL_INITIAL (child_fn);
1233 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1234 /* The gimplifier could record temporaries in parallel/task block
1235 rather than in containing function's local_decls chain,
1236 which would mean cgraph missed finalizing them. Do it now. */
1237 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1238 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1239 varpool_node::finalize_decl (t);
1240 DECL_SAVED_TREE (child_fn) = NULL;
1241 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1242 gimple_set_body (child_fn, NULL);
1243 TREE_USED (block) = 1;
1245 /* Reset DECL_CONTEXT on function arguments. */
1246 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1247 DECL_CONTEXT (t) = child_fn;
1249 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1250 so that it can be moved to the child function. */
1251 gsi = gsi_last_nondebug_bb (entry_bb);
1252 stmt = gsi_stmt (gsi);
1253 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1255 e = split_block (entry_bb, stmt);
1256 gsi_remove (&gsi, true);
1257 entry_bb = e->dest;
1258 edge e2 = NULL;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1260 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1261 else
1263 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1264 gcc_assert (e2->dest == region->exit);
1265 remove_edge (BRANCH_EDGE (entry_bb));
1266 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1267 gsi = gsi_last_nondebug_bb (region->exit);
1268 gcc_assert (!gsi_end_p (gsi)
1269 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1270 gsi_remove (&gsi, true);
1273 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1274 if (exit_bb)
1276 gsi = gsi_last_nondebug_bb (exit_bb);
1277 gcc_assert (!gsi_end_p (gsi)
1278 && (gimple_code (gsi_stmt (gsi))
1279 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1280 stmt = gimple_build_return (NULL);
1281 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1282 gsi_remove (&gsi, true);
1285 /* Move the parallel region into CHILD_CFUN. */
1287 if (gimple_in_ssa_p (cfun))
1289 init_tree_ssa (child_cfun);
1290 init_ssa_operands (child_cfun);
1291 child_cfun->gimple_df->in_ssa_p = true;
1292 block = NULL_TREE;
1294 else
1295 block = gimple_block (entry_stmt);
1297 /* Make sure to generate early debug for the function before
1298 outlining anything. */
1299 if (! gimple_in_ssa_p (cfun))
1300 (*debug_hooks->early_global_decl) (cfun->decl);
1302 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1303 if (exit_bb)
1304 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1305 if (e2)
1307 basic_block dest_bb = e2->dest;
1308 if (!exit_bb)
1309 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1310 remove_edge (e2);
1311 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1313 /* When the OMP expansion process cannot guarantee an up-to-date
1314 loop tree arrange for the child function to fixup loops. */
1315 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1316 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1318 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1319 num = vec_safe_length (child_cfun->local_decls);
1320 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1322 t = (*child_cfun->local_decls)[srcidx];
1323 if (DECL_CONTEXT (t) == cfun->decl)
1324 continue;
1325 if (srcidx != dstidx)
1326 (*child_cfun->local_decls)[dstidx] = t;
1327 dstidx++;
1329 if (dstidx != num)
1330 vec_safe_truncate (child_cfun->local_decls, dstidx);
1332 /* Inform the callgraph about the new function. */
1333 child_cfun->curr_properties = cfun->curr_properties;
1334 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1335 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1336 cgraph_node *node = cgraph_node::get_create (child_fn);
1337 node->parallelized_function = 1;
1338 cgraph_node::add_new_function (child_fn, true);
1340 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1341 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1343 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1344 fixed in a following pass. */
1345 push_cfun (child_cfun);
1346 if (need_asm)
1347 assign_assembler_name_if_needed (child_fn);
1349 if (optimize)
1350 optimize_omp_library_calls (entry_stmt);
1351 update_max_bb_count ();
1352 cgraph_edge::rebuild_edges ();
1354 /* Some EH regions might become dead, see PR34608. If
1355 pass_cleanup_cfg isn't the first pass to happen with the
1356 new child, these dead EH edges might cause problems.
1357 Clean them up now. */
1358 if (flag_exceptions)
1360 basic_block bb;
1361 bool changed = false;
1363 FOR_EACH_BB_FN (bb, cfun)
1364 changed |= gimple_purge_dead_eh_edges (bb);
1365 if (changed)
1366 cleanup_tree_cfg ();
1368 if (gimple_in_ssa_p (cfun))
1369 update_ssa (TODO_update_ssa);
1370 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1371 verify_loop_structure ();
1372 pop_cfun ();
1374 if (dump_file && !gimple_in_ssa_p (cfun))
1376 omp_any_child_fn_dumped = true;
1377 dump_function_header (dump_file, child_fn, dump_flags);
1378 dump_function_to_file (child_fn, dump_file, dump_flags);
1382 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1383 expand_parallel_call (region, new_bb,
1384 as_a <gomp_parallel *> (entry_stmt), ws_args);
1385 else
1386 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1387 if (gimple_in_ssa_p (cfun))
1388 update_ssa (TODO_update_ssa_only_virtuals);
1391 /* Information about members of an OpenACC collapsed loop nest. */
1393 struct oacc_collapse
1395 tree base; /* Base value. */
1396 tree iters; /* Number of steps. */
1397 tree step; /* Step size. */
1398 tree tile; /* Tile increment (if tiled). */
1399 tree outer; /* Tile iterator var. */
1402 /* Helper for expand_oacc_for. Determine collapsed loop information.
1403 Fill in COUNTS array. Emit any initialization code before GSI.
1404 Return the calculated outer loop bound of BOUND_TYPE. */
1406 static tree
1407 expand_oacc_collapse_init (const struct omp_for_data *fd,
1408 gimple_stmt_iterator *gsi,
1409 oacc_collapse *counts, tree bound_type,
1410 location_t loc)
1412 tree tiling = fd->tiling;
1413 tree total = build_int_cst (bound_type, 1);
1414 int ix;
1416 gcc_assert (integer_onep (fd->loop.step));
1417 gcc_assert (integer_zerop (fd->loop.n1));
1419 /* When tiling, the first operand of the tile clause applies to the
1420 innermost loop, and we work outwards from there. Seems
1421 backwards, but whatever. */
1422 for (ix = fd->collapse; ix--;)
1424 const omp_for_data_loop *loop = &fd->loops[ix];
1426 tree iter_type = TREE_TYPE (loop->v);
1427 tree diff_type = iter_type;
1428 tree plus_type = iter_type;
1430 gcc_assert (loop->cond_code == fd->loop.cond_code);
1432 if (POINTER_TYPE_P (iter_type))
1433 plus_type = sizetype;
1434 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1435 diff_type = signed_type_for (diff_type);
1436 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1437 diff_type = integer_type_node;
1439 if (tiling)
1441 tree num = build_int_cst (integer_type_node, fd->collapse);
1442 tree loop_no = build_int_cst (integer_type_node, ix);
1443 tree tile = TREE_VALUE (tiling);
1444 gcall *call
1445 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1446 /* gwv-outer=*/integer_zero_node,
1447 /* gwv-inner=*/integer_zero_node);
1449 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1450 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1451 gimple_call_set_lhs (call, counts[ix].tile);
1452 gimple_set_location (call, loc);
1453 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1455 tiling = TREE_CHAIN (tiling);
1457 else
1459 counts[ix].tile = NULL;
1460 counts[ix].outer = loop->v;
1463 tree b = loop->n1;
1464 tree e = loop->n2;
1465 tree s = loop->step;
1466 bool up = loop->cond_code == LT_EXPR;
1467 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1468 bool negating;
1469 tree expr;
1471 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1472 true, GSI_SAME_STMT);
1473 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1474 true, GSI_SAME_STMT);
1476 /* Convert the step, avoiding possible unsigned->signed overflow. */
1477 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1478 if (negating)
1479 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1480 s = fold_convert (diff_type, s);
1481 if (negating)
1482 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1483 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1484 true, GSI_SAME_STMT);
1486 /* Determine the range, avoiding possible unsigned->signed overflow. */
1487 negating = !up && TYPE_UNSIGNED (iter_type);
1488 expr = fold_build2 (MINUS_EXPR, plus_type,
1489 fold_convert (plus_type, negating ? b : e),
1490 fold_convert (plus_type, negating ? e : b));
1491 expr = fold_convert (diff_type, expr);
1492 if (negating)
1493 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1494 tree range = force_gimple_operand_gsi
1495 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1497 /* Determine number of iterations. */
1498 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1499 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1500 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1502 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1503 true, GSI_SAME_STMT);
1505 counts[ix].base = b;
1506 counts[ix].iters = iters;
1507 counts[ix].step = s;
1509 total = fold_build2 (MULT_EXPR, bound_type, total,
1510 fold_convert (bound_type, iters));
1513 return total;
1516 /* Emit initializers for collapsed loop members. INNER is true if
1517 this is for the element loop of a TILE. IVAR is the outer
1518 loop iteration variable, from which collapsed loop iteration values
1519 are calculated. COUNTS array has been initialized by
1520 expand_oacc_collapse_inits. */
1522 static void
1523 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1524 gimple_stmt_iterator *gsi,
1525 const oacc_collapse *counts, tree ivar)
1527 tree ivar_type = TREE_TYPE (ivar);
1529 /* The most rapidly changing iteration variable is the innermost
1530 one. */
1531 for (int ix = fd->collapse; ix--;)
1533 const omp_for_data_loop *loop = &fd->loops[ix];
1534 const oacc_collapse *collapse = &counts[ix];
1535 tree v = inner ? loop->v : collapse->outer;
1536 tree iter_type = TREE_TYPE (v);
1537 tree diff_type = TREE_TYPE (collapse->step);
1538 tree plus_type = iter_type;
1539 enum tree_code plus_code = PLUS_EXPR;
1540 tree expr;
1542 if (POINTER_TYPE_P (iter_type))
1544 plus_code = POINTER_PLUS_EXPR;
1545 plus_type = sizetype;
1548 expr = ivar;
1549 if (ix)
1551 tree mod = fold_convert (ivar_type, collapse->iters);
1552 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1553 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1554 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1555 true, GSI_SAME_STMT);
1558 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1559 collapse->step);
1560 expr = fold_build2 (plus_code, iter_type,
1561 inner ? collapse->outer : collapse->base,
1562 fold_convert (plus_type, expr));
1563 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1564 true, GSI_SAME_STMT);
1565 gassign *ass = gimple_build_assign (v, expr);
1566 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1570 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1571 of the combined collapse > 1 loop constructs, generate code like:
1572 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1573 if (cond3 is <)
1574 adj = STEP3 - 1;
1575 else
1576 adj = STEP3 + 1;
1577 count3 = (adj + N32 - N31) / STEP3;
1578 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1579 if (cond2 is <)
1580 adj = STEP2 - 1;
1581 else
1582 adj = STEP2 + 1;
1583 count2 = (adj + N22 - N21) / STEP2;
1584 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1585 if (cond1 is <)
1586 adj = STEP1 - 1;
1587 else
1588 adj = STEP1 + 1;
1589 count1 = (adj + N12 - N11) / STEP1;
1590 count = count1 * count2 * count3;
1591 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1592 count = 0;
1593 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1594 of the combined loop constructs, just initialize COUNTS array
1595 from the _looptemp_ clauses. */
1597 /* NOTE: It *could* be better to moosh all of the BBs together,
1598 creating one larger BB with all the computation and the unexpected
1599 jump at the end. I.e.
1601 bool zero3, zero2, zero1, zero;
1603 zero3 = N32 c3 N31;
1604 count3 = (N32 - N31) /[cl] STEP3;
1605 zero2 = N22 c2 N21;
1606 count2 = (N22 - N21) /[cl] STEP2;
1607 zero1 = N12 c1 N11;
1608 count1 = (N12 - N11) /[cl] STEP1;
1609 zero = zero3 || zero2 || zero1;
1610 count = count1 * count2 * count3;
1611 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1613 After all, we expect the zero=false, and thus we expect to have to
1614 evaluate all of the comparison expressions, so short-circuiting
1615 oughtn't be a win. Since the condition isn't protecting a
1616 denominator, we're not concerned about divide-by-zero, so we can
1617 fully evaluate count even if a numerator turned out to be wrong.
1619 It seems like putting this all together would create much better
1620 scheduling opportunities, and less pressure on the chip's branch
1621 predictor. */
1623 static void
1624 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1625 basic_block &entry_bb, tree *counts,
1626 basic_block &zero_iter1_bb, int &first_zero_iter1,
1627 basic_block &zero_iter2_bb, int &first_zero_iter2,
1628 basic_block &l2_dom_bb)
1630 tree t, type = TREE_TYPE (fd->loop.v);
1631 edge e, ne;
1632 int i;
1634 /* Collapsed loops need work for expansion into SSA form. */
1635 gcc_assert (!gimple_in_ssa_p (cfun));
1637 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1638 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1640 gcc_assert (fd->ordered == 0);
1641 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1642 isn't supposed to be handled, as the inner loop doesn't
1643 use it. */
1644 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1645 OMP_CLAUSE__LOOPTEMP_);
1646 gcc_assert (innerc);
1647 for (i = 0; i < fd->collapse; i++)
1649 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1650 OMP_CLAUSE__LOOPTEMP_);
1651 gcc_assert (innerc);
1652 if (i)
1653 counts[i] = OMP_CLAUSE_DECL (innerc);
1654 else
1655 counts[0] = NULL_TREE;
1657 return;
1660 for (i = fd->collapse; i < fd->ordered; i++)
1662 tree itype = TREE_TYPE (fd->loops[i].v);
1663 counts[i] = NULL_TREE;
1664 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1665 fold_convert (itype, fd->loops[i].n1),
1666 fold_convert (itype, fd->loops[i].n2));
1667 if (t && integer_zerop (t))
1669 for (i = fd->collapse; i < fd->ordered; i++)
1670 counts[i] = build_int_cst (type, 0);
1671 break;
1674 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1676 tree itype = TREE_TYPE (fd->loops[i].v);
1678 if (i >= fd->collapse && counts[i])
1679 continue;
1680 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1681 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1682 fold_convert (itype, fd->loops[i].n1),
1683 fold_convert (itype, fd->loops[i].n2)))
1684 == NULL_TREE || !integer_onep (t)))
1686 gcond *cond_stmt;
1687 tree n1, n2;
1688 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1689 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1690 true, GSI_SAME_STMT);
1691 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1692 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1693 true, GSI_SAME_STMT);
1694 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1695 NULL_TREE, NULL_TREE);
1696 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1697 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1698 expand_omp_regimplify_p, NULL, NULL)
1699 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1700 expand_omp_regimplify_p, NULL, NULL))
1702 *gsi = gsi_for_stmt (cond_stmt);
1703 gimple_regimplify_operands (cond_stmt, gsi);
1705 e = split_block (entry_bb, cond_stmt);
1706 basic_block &zero_iter_bb
1707 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1708 int &first_zero_iter
1709 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1710 if (zero_iter_bb == NULL)
1712 gassign *assign_stmt;
1713 first_zero_iter = i;
1714 zero_iter_bb = create_empty_bb (entry_bb);
1715 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1716 *gsi = gsi_after_labels (zero_iter_bb);
1717 if (i < fd->collapse)
1718 assign_stmt = gimple_build_assign (fd->loop.n2,
1719 build_zero_cst (type));
1720 else
1722 counts[i] = create_tmp_reg (type, ".count");
1723 assign_stmt
1724 = gimple_build_assign (counts[i], build_zero_cst (type));
1726 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1727 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1728 entry_bb);
1730 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1731 ne->probability = profile_probability::very_unlikely ();
1732 e->flags = EDGE_TRUE_VALUE;
1733 e->probability = ne->probability.invert ();
1734 if (l2_dom_bb == NULL)
1735 l2_dom_bb = entry_bb;
1736 entry_bb = e->dest;
1737 *gsi = gsi_last_nondebug_bb (entry_bb);
1740 if (POINTER_TYPE_P (itype))
1741 itype = signed_type_for (itype);
1742 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1743 ? -1 : 1));
1744 t = fold_build2 (PLUS_EXPR, itype,
1745 fold_convert (itype, fd->loops[i].step), t);
1746 t = fold_build2 (PLUS_EXPR, itype, t,
1747 fold_convert (itype, fd->loops[i].n2));
1748 t = fold_build2 (MINUS_EXPR, itype, t,
1749 fold_convert (itype, fd->loops[i].n1));
1750 /* ?? We could probably use CEIL_DIV_EXPR instead of
1751 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1752 generate the same code in the end because generically we
1753 don't know that the values involved must be negative for
1754 GT?? */
1755 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1756 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1757 fold_build1 (NEGATE_EXPR, itype, t),
1758 fold_build1 (NEGATE_EXPR, itype,
1759 fold_convert (itype,
1760 fd->loops[i].step)));
1761 else
1762 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1763 fold_convert (itype, fd->loops[i].step));
1764 t = fold_convert (type, t);
1765 if (TREE_CODE (t) == INTEGER_CST)
1766 counts[i] = t;
1767 else
1769 if (i < fd->collapse || i != first_zero_iter2)
1770 counts[i] = create_tmp_reg (type, ".count");
1771 expand_omp_build_assign (gsi, counts[i], t);
1773 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1775 if (i == 0)
1776 t = counts[0];
1777 else
1778 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1779 expand_omp_build_assign (gsi, fd->loop.n2, t);
1784 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1785 T = V;
1786 V3 = N31 + (T % count3) * STEP3;
1787 T = T / count3;
1788 V2 = N21 + (T % count2) * STEP2;
1789 T = T / count2;
1790 V1 = N11 + T * STEP1;
1791 if this loop doesn't have an inner loop construct combined with it.
1792 If it does have an inner loop construct combined with it and the
1793 iteration count isn't known constant, store values from counts array
1794 into its _looptemp_ temporaries instead. */
1796 static void
1797 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1798 tree *counts, gimple *inner_stmt, tree startvar)
1800 int i;
1801 if (gimple_omp_for_combined_p (fd->for_stmt))
1803 /* If fd->loop.n2 is constant, then no propagation of the counts
1804 is needed, they are constant. */
1805 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1806 return;
1808 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1809 ? gimple_omp_taskreg_clauses (inner_stmt)
1810 : gimple_omp_for_clauses (inner_stmt);
1811 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1812 isn't supposed to be handled, as the inner loop doesn't
1813 use it. */
1814 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1815 gcc_assert (innerc);
1816 for (i = 0; i < fd->collapse; i++)
1818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1819 OMP_CLAUSE__LOOPTEMP_);
1820 gcc_assert (innerc);
1821 if (i)
1823 tree tem = OMP_CLAUSE_DECL (innerc);
1824 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1825 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1826 false, GSI_CONTINUE_LINKING);
1827 gassign *stmt = gimple_build_assign (tem, t);
1828 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1831 return;
1834 tree type = TREE_TYPE (fd->loop.v);
1835 tree tem = create_tmp_reg (type, ".tem");
1836 gassign *stmt = gimple_build_assign (tem, startvar);
1837 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1839 for (i = fd->collapse - 1; i >= 0; i--)
1841 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1842 itype = vtype;
1843 if (POINTER_TYPE_P (vtype))
1844 itype = signed_type_for (vtype);
1845 if (i != 0)
1846 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1847 else
1848 t = tem;
1849 t = fold_convert (itype, t);
1850 t = fold_build2 (MULT_EXPR, itype, t,
1851 fold_convert (itype, fd->loops[i].step));
1852 if (POINTER_TYPE_P (vtype))
1853 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1854 else
1855 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1856 t = force_gimple_operand_gsi (gsi, t,
1857 DECL_P (fd->loops[i].v)
1858 && TREE_ADDRESSABLE (fd->loops[i].v),
1859 NULL_TREE, false,
1860 GSI_CONTINUE_LINKING);
1861 stmt = gimple_build_assign (fd->loops[i].v, t);
1862 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1863 if (i != 0)
1865 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1866 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1867 false, GSI_CONTINUE_LINKING);
1868 stmt = gimple_build_assign (tem, t);
1869 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1874 /* Helper function for expand_omp_for_*. Generate code like:
1875 L10:
1876 V3 += STEP3;
1877 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1878 L11:
1879 V3 = N31;
1880 V2 += STEP2;
1881 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1882 L12:
1883 V2 = N21;
1884 V1 += STEP1;
1885 goto BODY_BB; */
1887 static basic_block
1888 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1889 basic_block body_bb)
1891 basic_block last_bb, bb, collapse_bb = NULL;
1892 int i;
1893 gimple_stmt_iterator gsi;
1894 edge e;
1895 tree t;
1896 gimple *stmt;
1898 last_bb = cont_bb;
1899 for (i = fd->collapse - 1; i >= 0; i--)
1901 tree vtype = TREE_TYPE (fd->loops[i].v);
1903 bb = create_empty_bb (last_bb);
1904 add_bb_to_loop (bb, last_bb->loop_father);
1905 gsi = gsi_start_bb (bb);
1907 if (i < fd->collapse - 1)
1909 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1910 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1912 t = fd->loops[i + 1].n1;
1913 t = force_gimple_operand_gsi (&gsi, t,
1914 DECL_P (fd->loops[i + 1].v)
1915 && TREE_ADDRESSABLE (fd->loops[i
1916 + 1].v),
1917 NULL_TREE, false,
1918 GSI_CONTINUE_LINKING);
1919 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1920 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1922 else
1923 collapse_bb = bb;
1925 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1927 if (POINTER_TYPE_P (vtype))
1928 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1929 else
1930 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1931 t = force_gimple_operand_gsi (&gsi, t,
1932 DECL_P (fd->loops[i].v)
1933 && TREE_ADDRESSABLE (fd->loops[i].v),
1934 NULL_TREE, false, GSI_CONTINUE_LINKING);
1935 stmt = gimple_build_assign (fd->loops[i].v, t);
1936 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1938 if (i > 0)
1940 t = fd->loops[i].n2;
1941 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1942 false, GSI_CONTINUE_LINKING);
1943 tree v = fd->loops[i].v;
1944 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1945 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1946 false, GSI_CONTINUE_LINKING);
1947 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1948 stmt = gimple_build_cond_empty (t);
1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1951 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1953 else
1954 make_edge (bb, body_bb, EDGE_FALLTHRU);
1955 last_bb = bb;
1958 return collapse_bb;
1961 /* Expand #pragma omp ordered depend(source). */
1963 static void
1964 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1965 tree *counts, location_t loc)
1967 enum built_in_function source_ix
1968 = fd->iter_type == long_integer_type_node
1969 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1970 gimple *g
1971 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1972 build_fold_addr_expr (counts[fd->ordered]));
1973 gimple_set_location (g, loc);
1974 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1977 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1979 static void
1980 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1981 tree *counts, tree c, location_t loc)
1983 auto_vec<tree, 10> args;
1984 enum built_in_function sink_ix
1985 = fd->iter_type == long_integer_type_node
1986 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1987 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1988 int i;
1989 gimple_stmt_iterator gsi2 = *gsi;
1990 bool warned_step = false;
1992 for (i = 0; i < fd->ordered; i++)
1994 tree step = NULL_TREE;
1995 off = TREE_PURPOSE (deps);
1996 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1998 step = TREE_OPERAND (off, 1);
1999 off = TREE_OPERAND (off, 0);
2001 if (!integer_zerop (off))
2003 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2004 || fd->loops[i].cond_code == GT_EXPR);
2005 bool forward = fd->loops[i].cond_code == LT_EXPR;
2006 if (step)
2008 /* Non-simple Fortran DO loops. If step is variable,
2009 we don't know at compile even the direction, so can't
2010 warn. */
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 break;
2013 forward = tree_int_cst_sgn (step) != -1;
2015 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2016 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2017 "lexically later iteration");
2018 break;
2020 deps = TREE_CHAIN (deps);
2022 /* If all offsets corresponding to the collapsed loops are zero,
2023 this depend clause can be ignored. FIXME: but there is still a
2024 flush needed. We need to emit one __sync_synchronize () for it
2025 though (perhaps conditionally)? Solve this together with the
2026 conservative dependence folding optimization.
2027 if (i >= fd->collapse)
2028 return; */
2030 deps = OMP_CLAUSE_DECL (c);
2031 gsi_prev (&gsi2);
2032 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2033 edge e2 = split_block_after_labels (e1->dest);
2035 gsi2 = gsi_after_labels (e1->dest);
2036 *gsi = gsi_last_bb (e1->src);
2037 for (i = 0; i < fd->ordered; i++)
2039 tree itype = TREE_TYPE (fd->loops[i].v);
2040 tree step = NULL_TREE;
2041 tree orig_off = NULL_TREE;
2042 if (POINTER_TYPE_P (itype))
2043 itype = sizetype;
2044 if (i)
2045 deps = TREE_CHAIN (deps);
2046 off = TREE_PURPOSE (deps);
2047 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2049 step = TREE_OPERAND (off, 1);
2050 off = TREE_OPERAND (off, 0);
2051 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2052 && integer_onep (fd->loops[i].step)
2053 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2055 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2056 if (step)
2058 off = fold_convert_loc (loc, itype, off);
2059 orig_off = off;
2060 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2063 if (integer_zerop (off))
2064 t = boolean_true_node;
2065 else
2067 tree a;
2068 tree co = fold_convert_loc (loc, itype, off);
2069 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2071 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2072 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2073 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2074 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2075 co);
2077 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2078 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2079 fd->loops[i].v, co);
2080 else
2081 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2082 fd->loops[i].v, co);
2083 if (step)
2085 tree t1, t2;
2086 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2088 fd->loops[i].n1);
2089 else
2090 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2091 fd->loops[i].n2);
2092 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2094 fd->loops[i].n2);
2095 else
2096 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2097 fd->loops[i].n1);
2098 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2099 step, build_int_cst (TREE_TYPE (step), 0));
2100 if (TREE_CODE (step) != INTEGER_CST)
2102 t1 = unshare_expr (t1);
2103 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2104 false, GSI_CONTINUE_LINKING);
2105 t2 = unshare_expr (t2);
2106 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2107 false, GSI_CONTINUE_LINKING);
2109 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2110 t, t2, t1);
2112 else if (fd->loops[i].cond_code == LT_EXPR)
2114 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2115 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2116 fd->loops[i].n1);
2117 else
2118 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2119 fd->loops[i].n2);
2121 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2122 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2123 fd->loops[i].n2);
2124 else
2125 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2126 fd->loops[i].n1);
2128 if (cond)
2129 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2130 else
2131 cond = t;
2133 off = fold_convert_loc (loc, itype, off);
2135 if (step
2136 || (fd->loops[i].cond_code == LT_EXPR
2137 ? !integer_onep (fd->loops[i].step)
2138 : !integer_minus_onep (fd->loops[i].step)))
2140 if (step == NULL_TREE
2141 && TYPE_UNSIGNED (itype)
2142 && fd->loops[i].cond_code == GT_EXPR)
2143 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2144 fold_build1_loc (loc, NEGATE_EXPR, itype,
2145 s));
2146 else
2147 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2148 orig_off ? orig_off : off, s);
2149 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2150 build_int_cst (itype, 0));
2151 if (integer_zerop (t) && !warned_step)
2153 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2154 "in the iteration space");
2155 warned_step = true;
2157 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2158 cond, t);
2161 if (i <= fd->collapse - 1 && fd->collapse > 1)
2162 t = fd->loop.v;
2163 else if (counts[i])
2164 t = counts[i];
2165 else
2167 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2168 fd->loops[i].v, fd->loops[i].n1);
2169 t = fold_convert_loc (loc, fd->iter_type, t);
2171 if (step)
2172 /* We have divided off by step already earlier. */;
2173 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2174 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2175 fold_build1_loc (loc, NEGATE_EXPR, itype,
2176 s));
2177 else
2178 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2179 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2180 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2181 off = fold_convert_loc (loc, fd->iter_type, off);
2182 if (i <= fd->collapse - 1 && fd->collapse > 1)
2184 if (i)
2185 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2186 off);
2187 if (i < fd->collapse - 1)
2189 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2190 counts[i]);
2191 continue;
2194 off = unshare_expr (off);
2195 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2196 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2197 true, GSI_SAME_STMT);
2198 args.safe_push (t);
2200 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2201 gimple_set_location (g, loc);
2202 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2204 cond = unshare_expr (cond);
2205 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2206 GSI_CONTINUE_LINKING);
2207 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2208 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2209 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2210 e1->probability = e3->probability.invert ();
2211 e1->flags = EDGE_TRUE_VALUE;
2212 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2214 *gsi = gsi_after_labels (e2->dest);
2217 /* Expand all #pragma omp ordered depend(source) and
2218 #pragma omp ordered depend(sink:...) constructs in the current
2219 #pragma omp for ordered(n) region. */
2221 static void
2222 expand_omp_ordered_source_sink (struct omp_region *region,
2223 struct omp_for_data *fd, tree *counts,
2224 basic_block cont_bb)
2226 struct omp_region *inner;
2227 int i;
2228 for (i = fd->collapse - 1; i < fd->ordered; i++)
2229 if (i == fd->collapse - 1 && fd->collapse > 1)
2230 counts[i] = NULL_TREE;
2231 else if (i >= fd->collapse && !cont_bb)
2232 counts[i] = build_zero_cst (fd->iter_type);
2233 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2234 && integer_onep (fd->loops[i].step))
2235 counts[i] = NULL_TREE;
2236 else
2237 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2238 tree atype
2239 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2240 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2241 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2243 for (inner = region->inner; inner; inner = inner->next)
2244 if (inner->type == GIMPLE_OMP_ORDERED)
2246 gomp_ordered *ord_stmt = inner->ord_stmt;
2247 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2248 location_t loc = gimple_location (ord_stmt);
2249 tree c;
2250 for (c = gimple_omp_ordered_clauses (ord_stmt);
2251 c; c = OMP_CLAUSE_CHAIN (c))
2252 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2253 break;
2254 if (c)
2255 expand_omp_ordered_source (&gsi, fd, counts, loc);
2256 for (c = gimple_omp_ordered_clauses (ord_stmt);
2257 c; c = OMP_CLAUSE_CHAIN (c))
2258 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2259 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2260 gsi_remove (&gsi, true);
2264 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2265 collapsed. */
2267 static basic_block
2268 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2269 basic_block cont_bb, basic_block body_bb,
2270 bool ordered_lastprivate)
2272 if (fd->ordered == fd->collapse)
2273 return cont_bb;
2275 if (!cont_bb)
2277 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2278 for (int i = fd->collapse; i < fd->ordered; i++)
2280 tree type = TREE_TYPE (fd->loops[i].v);
2281 tree n1 = fold_convert (type, fd->loops[i].n1);
2282 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2283 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2284 size_int (i - fd->collapse + 1),
2285 NULL_TREE, NULL_TREE);
2286 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2288 return NULL;
2291 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2293 tree t, type = TREE_TYPE (fd->loops[i].v);
2294 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2295 expand_omp_build_assign (&gsi, fd->loops[i].v,
2296 fold_convert (type, fd->loops[i].n1));
2297 if (counts[i])
2298 expand_omp_build_assign (&gsi, counts[i],
2299 build_zero_cst (fd->iter_type));
2300 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2301 size_int (i - fd->collapse + 1),
2302 NULL_TREE, NULL_TREE);
2303 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2304 if (!gsi_end_p (gsi))
2305 gsi_prev (&gsi);
2306 else
2307 gsi = gsi_last_bb (body_bb);
2308 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2309 basic_block new_body = e1->dest;
2310 if (body_bb == cont_bb)
2311 cont_bb = new_body;
2312 edge e2 = NULL;
2313 basic_block new_header;
2314 if (EDGE_COUNT (cont_bb->preds) > 0)
2316 gsi = gsi_last_bb (cont_bb);
2317 if (POINTER_TYPE_P (type))
2318 t = fold_build_pointer_plus (fd->loops[i].v,
2319 fold_convert (sizetype,
2320 fd->loops[i].step));
2321 else
2322 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2323 fold_convert (type, fd->loops[i].step));
2324 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2325 if (counts[i])
2327 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2328 build_int_cst (fd->iter_type, 1));
2329 expand_omp_build_assign (&gsi, counts[i], t);
2330 t = counts[i];
2332 else
2334 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2335 fd->loops[i].v, fd->loops[i].n1);
2336 t = fold_convert (fd->iter_type, t);
2337 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2338 true, GSI_SAME_STMT);
2340 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2341 size_int (i - fd->collapse + 1),
2342 NULL_TREE, NULL_TREE);
2343 expand_omp_build_assign (&gsi, aref, t);
2344 gsi_prev (&gsi);
2345 e2 = split_block (cont_bb, gsi_stmt (gsi));
2346 new_header = e2->dest;
2348 else
2349 new_header = cont_bb;
2350 gsi = gsi_after_labels (new_header);
2351 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2352 true, GSI_SAME_STMT);
2353 tree n2
2354 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2355 true, NULL_TREE, true, GSI_SAME_STMT);
2356 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2357 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2358 edge e3 = split_block (new_header, gsi_stmt (gsi));
2359 cont_bb = e3->dest;
2360 remove_edge (e1);
2361 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2362 e3->flags = EDGE_FALSE_VALUE;
2363 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2364 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2365 e1->probability = e3->probability.invert ();
2367 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2368 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2370 if (e2)
2372 struct loop *loop = alloc_loop ();
2373 loop->header = new_header;
2374 loop->latch = e2->src;
2375 add_loop (loop, body_bb->loop_father);
2379 /* If there are any lastprivate clauses and it is possible some loops
2380 might have zero iterations, ensure all the decls are initialized,
2381 otherwise we could crash evaluating C++ class iterators with lastprivate
2382 clauses. */
2383 bool need_inits = false;
2384 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2385 if (need_inits)
2387 tree type = TREE_TYPE (fd->loops[i].v);
2388 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2389 expand_omp_build_assign (&gsi, fd->loops[i].v,
2390 fold_convert (type, fd->loops[i].n1));
2392 else
2394 tree type = TREE_TYPE (fd->loops[i].v);
2395 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2396 boolean_type_node,
2397 fold_convert (type, fd->loops[i].n1),
2398 fold_convert (type, fd->loops[i].n2));
2399 if (!integer_onep (this_cond))
2400 need_inits = true;
2403 return cont_bb;
2406 /* A subroutine of expand_omp_for. Generate code for a parallel
2407 loop with any schedule. Given parameters:
2409 for (V = N1; V cond N2; V += STEP) BODY;
2411 where COND is "<" or ">", we generate pseudocode
2413 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2414 if (more) goto L0; else goto L3;
2416 V = istart0;
2417 iend = iend0;
2419 BODY;
2420 V += STEP;
2421 if (V cond iend) goto L1; else goto L2;
2423 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2426 If this is a combined omp parallel loop, instead of the call to
2427 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2428 If this is gimple_omp_for_combined_p loop, then instead of assigning
2429 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2430 inner GIMPLE_OMP_FOR and V += STEP; and
2431 if (V cond iend) goto L1; else goto L2; are removed.
2433 For collapsed loops, given parameters:
2434 collapse(3)
2435 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2436 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2437 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2438 BODY;
2440 we generate pseudocode
2442 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2443 if (cond3 is <)
2444 adj = STEP3 - 1;
2445 else
2446 adj = STEP3 + 1;
2447 count3 = (adj + N32 - N31) / STEP3;
2448 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2449 if (cond2 is <)
2450 adj = STEP2 - 1;
2451 else
2452 adj = STEP2 + 1;
2453 count2 = (adj + N22 - N21) / STEP2;
2454 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2455 if (cond1 is <)
2456 adj = STEP1 - 1;
2457 else
2458 adj = STEP1 + 1;
2459 count1 = (adj + N12 - N11) / STEP1;
2460 count = count1 * count2 * count3;
2461 goto Z1;
2463 count = 0;
2465 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2466 if (more) goto L0; else goto L3;
2468 V = istart0;
2469 T = V;
2470 V3 = N31 + (T % count3) * STEP3;
2471 T = T / count3;
2472 V2 = N21 + (T % count2) * STEP2;
2473 T = T / count2;
2474 V1 = N11 + T * STEP1;
2475 iend = iend0;
2477 BODY;
2478 V += 1;
2479 if (V < iend) goto L10; else goto L2;
2480 L10:
2481 V3 += STEP3;
2482 if (V3 cond3 N32) goto L1; else goto L11;
2483 L11:
2484 V3 = N31;
2485 V2 += STEP2;
2486 if (V2 cond2 N22) goto L1; else goto L12;
2487 L12:
2488 V2 = N21;
2489 V1 += STEP1;
2490 goto L1;
2492 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2497 static void
2498 expand_omp_for_generic (struct omp_region *region,
2499 struct omp_for_data *fd,
2500 enum built_in_function start_fn,
2501 enum built_in_function next_fn,
2502 gimple *inner_stmt)
2504 tree type, istart0, iend0, iend;
2505 tree t, vmain, vback, bias = NULL_TREE;
2506 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2507 basic_block l2_bb = NULL, l3_bb = NULL;
2508 gimple_stmt_iterator gsi;
2509 gassign *assign_stmt;
2510 bool in_combined_parallel = is_combined_parallel (region);
2511 bool broken_loop = region->cont == NULL;
2512 edge e, ne;
2513 tree *counts = NULL;
2514 int i;
2515 bool ordered_lastprivate = false;
2517 gcc_assert (!broken_loop || !in_combined_parallel);
2518 gcc_assert (fd->iter_type == long_integer_type_node
2519 || !in_combined_parallel);
2521 entry_bb = region->entry;
2522 cont_bb = region->cont;
2523 collapse_bb = NULL;
2524 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2525 gcc_assert (broken_loop
2526 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2527 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2528 l1_bb = single_succ (l0_bb);
2529 if (!broken_loop)
2531 l2_bb = create_empty_bb (cont_bb);
2532 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2533 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2534 == l1_bb));
2535 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2537 else
2538 l2_bb = NULL;
2539 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2540 exit_bb = region->exit;
2542 gsi = gsi_last_nondebug_bb (entry_bb);
2544 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2545 if (fd->ordered
2546 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2547 OMP_CLAUSE_LASTPRIVATE))
2548 ordered_lastprivate = false;
2549 if (fd->collapse > 1 || fd->ordered)
2551 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2552 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2554 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2555 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2556 zero_iter1_bb, first_zero_iter1,
2557 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2559 if (zero_iter1_bb)
2561 /* Some counts[i] vars might be uninitialized if
2562 some loop has zero iterations. But the body shouldn't
2563 be executed in that case, so just avoid uninit warnings. */
2564 for (i = first_zero_iter1;
2565 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2566 if (SSA_VAR_P (counts[i]))
2567 TREE_NO_WARNING (counts[i]) = 1;
2568 gsi_prev (&gsi);
2569 e = split_block (entry_bb, gsi_stmt (gsi));
2570 entry_bb = e->dest;
2571 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2572 gsi = gsi_last_nondebug_bb (entry_bb);
2573 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2574 get_immediate_dominator (CDI_DOMINATORS,
2575 zero_iter1_bb));
2577 if (zero_iter2_bb)
2579 /* Some counts[i] vars might be uninitialized if
2580 some loop has zero iterations. But the body shouldn't
2581 be executed in that case, so just avoid uninit warnings. */
2582 for (i = first_zero_iter2; i < fd->ordered; i++)
2583 if (SSA_VAR_P (counts[i]))
2584 TREE_NO_WARNING (counts[i]) = 1;
2585 if (zero_iter1_bb)
2586 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2587 else
2589 gsi_prev (&gsi);
2590 e = split_block (entry_bb, gsi_stmt (gsi));
2591 entry_bb = e->dest;
2592 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2593 gsi = gsi_last_nondebug_bb (entry_bb);
2594 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2595 get_immediate_dominator
2596 (CDI_DOMINATORS, zero_iter2_bb));
2599 if (fd->collapse == 1)
2601 counts[0] = fd->loop.n2;
2602 fd->loop = fd->loops[0];
2606 type = TREE_TYPE (fd->loop.v);
2607 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2608 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2609 TREE_ADDRESSABLE (istart0) = 1;
2610 TREE_ADDRESSABLE (iend0) = 1;
2612 /* See if we need to bias by LLONG_MIN. */
2613 if (fd->iter_type == long_long_unsigned_type_node
2614 && TREE_CODE (type) == INTEGER_TYPE
2615 && !TYPE_UNSIGNED (type)
2616 && fd->ordered == 0)
2618 tree n1, n2;
2620 if (fd->loop.cond_code == LT_EXPR)
2622 n1 = fd->loop.n1;
2623 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2625 else
2627 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2628 n2 = fd->loop.n1;
2630 if (TREE_CODE (n1) != INTEGER_CST
2631 || TREE_CODE (n2) != INTEGER_CST
2632 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2633 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2636 gimple_stmt_iterator gsif = gsi;
2637 gsi_prev (&gsif);
2639 tree arr = NULL_TREE;
2640 if (in_combined_parallel)
2642 gcc_assert (fd->ordered == 0);
2643 /* In a combined parallel loop, emit a call to
2644 GOMP_loop_foo_next. */
2645 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2646 build_fold_addr_expr (istart0),
2647 build_fold_addr_expr (iend0));
2649 else
2651 tree t0, t1, t2, t3, t4;
2652 /* If this is not a combined parallel loop, emit a call to
2653 GOMP_loop_foo_start in ENTRY_BB. */
2654 t4 = build_fold_addr_expr (iend0);
2655 t3 = build_fold_addr_expr (istart0);
2656 if (fd->ordered)
2658 t0 = build_int_cst (unsigned_type_node,
2659 fd->ordered - fd->collapse + 1);
2660 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2661 fd->ordered
2662 - fd->collapse + 1),
2663 ".omp_counts");
2664 DECL_NAMELESS (arr) = 1;
2665 TREE_ADDRESSABLE (arr) = 1;
2666 TREE_STATIC (arr) = 1;
2667 vec<constructor_elt, va_gc> *v;
2668 vec_alloc (v, fd->ordered - fd->collapse + 1);
2669 int idx;
2671 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2673 tree c;
2674 if (idx == 0 && fd->collapse > 1)
2675 c = fd->loop.n2;
2676 else
2677 c = counts[idx + fd->collapse - 1];
2678 tree purpose = size_int (idx);
2679 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2680 if (TREE_CODE (c) != INTEGER_CST)
2681 TREE_STATIC (arr) = 0;
2684 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2685 if (!TREE_STATIC (arr))
2686 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2687 void_type_node, arr),
2688 true, NULL_TREE, true, GSI_SAME_STMT);
2689 t1 = build_fold_addr_expr (arr);
2690 t2 = NULL_TREE;
2692 else
2694 t2 = fold_convert (fd->iter_type, fd->loop.step);
2695 t1 = fd->loop.n2;
2696 t0 = fd->loop.n1;
2697 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2699 tree innerc
2700 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2701 OMP_CLAUSE__LOOPTEMP_);
2702 gcc_assert (innerc);
2703 t0 = OMP_CLAUSE_DECL (innerc);
2704 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2705 OMP_CLAUSE__LOOPTEMP_);
2706 gcc_assert (innerc);
2707 t1 = OMP_CLAUSE_DECL (innerc);
2709 if (POINTER_TYPE_P (TREE_TYPE (t0))
2710 && TYPE_PRECISION (TREE_TYPE (t0))
2711 != TYPE_PRECISION (fd->iter_type))
2713 /* Avoid casting pointers to integer of a different size. */
2714 tree itype = signed_type_for (type);
2715 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2716 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2718 else
2720 t1 = fold_convert (fd->iter_type, t1);
2721 t0 = fold_convert (fd->iter_type, t0);
2723 if (bias)
2725 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2726 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2729 if (fd->iter_type == long_integer_type_node || fd->ordered)
2731 if (fd->chunk_size)
2733 t = fold_convert (fd->iter_type, fd->chunk_size);
2734 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2735 if (fd->ordered)
2736 t = build_call_expr (builtin_decl_explicit (start_fn),
2737 5, t0, t1, t, t3, t4);
2738 else
2739 t = build_call_expr (builtin_decl_explicit (start_fn),
2740 6, t0, t1, t2, t, t3, t4);
2742 else if (fd->ordered)
2743 t = build_call_expr (builtin_decl_explicit (start_fn),
2744 4, t0, t1, t3, t4);
2745 else
2746 t = build_call_expr (builtin_decl_explicit (start_fn),
2747 5, t0, t1, t2, t3, t4);
2749 else
2751 tree t5;
2752 tree c_bool_type;
2753 tree bfn_decl;
2755 /* The GOMP_loop_ull_*start functions have additional boolean
2756 argument, true for < loops and false for > loops.
2757 In Fortran, the C bool type can be different from
2758 boolean_type_node. */
2759 bfn_decl = builtin_decl_explicit (start_fn);
2760 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2761 t5 = build_int_cst (c_bool_type,
2762 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2763 if (fd->chunk_size)
2765 tree bfn_decl = builtin_decl_explicit (start_fn);
2766 t = fold_convert (fd->iter_type, fd->chunk_size);
2767 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2768 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2770 else
2771 t = build_call_expr (builtin_decl_explicit (start_fn),
2772 6, t5, t0, t1, t2, t3, t4);
2775 if (TREE_TYPE (t) != boolean_type_node)
2776 t = fold_build2 (NE_EXPR, boolean_type_node,
2777 t, build_int_cst (TREE_TYPE (t), 0));
2778 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2779 true, GSI_SAME_STMT);
2780 if (arr && !TREE_STATIC (arr))
2782 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2783 TREE_THIS_VOLATILE (clobber) = 1;
2784 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2785 GSI_SAME_STMT);
2787 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2789 /* Remove the GIMPLE_OMP_FOR statement. */
2790 gsi_remove (&gsi, true);
2792 if (gsi_end_p (gsif))
2793 gsif = gsi_after_labels (gsi_bb (gsif));
2794 gsi_next (&gsif);
2796 /* Iteration setup for sequential loop goes in L0_BB. */
2797 tree startvar = fd->loop.v;
2798 tree endvar = NULL_TREE;
2800 if (gimple_omp_for_combined_p (fd->for_stmt))
2802 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2803 && gimple_omp_for_kind (inner_stmt)
2804 == GF_OMP_FOR_KIND_SIMD);
2805 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2806 OMP_CLAUSE__LOOPTEMP_);
2807 gcc_assert (innerc);
2808 startvar = OMP_CLAUSE_DECL (innerc);
2809 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2810 OMP_CLAUSE__LOOPTEMP_);
2811 gcc_assert (innerc);
2812 endvar = OMP_CLAUSE_DECL (innerc);
2815 gsi = gsi_start_bb (l0_bb);
2816 t = istart0;
2817 if (fd->ordered && fd->collapse == 1)
2818 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2819 fold_convert (fd->iter_type, fd->loop.step));
2820 else if (bias)
2821 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2822 if (fd->ordered && fd->collapse == 1)
2824 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2825 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2826 fd->loop.n1, fold_convert (sizetype, t));
2827 else
2829 t = fold_convert (TREE_TYPE (startvar), t);
2830 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2831 fd->loop.n1, t);
2834 else
2836 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2837 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2838 t = fold_convert (TREE_TYPE (startvar), t);
2840 t = force_gimple_operand_gsi (&gsi, t,
2841 DECL_P (startvar)
2842 && TREE_ADDRESSABLE (startvar),
2843 NULL_TREE, false, GSI_CONTINUE_LINKING);
2844 assign_stmt = gimple_build_assign (startvar, t);
2845 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2847 t = iend0;
2848 if (fd->ordered && fd->collapse == 1)
2849 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2850 fold_convert (fd->iter_type, fd->loop.step));
2851 else if (bias)
2852 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2853 if (fd->ordered && fd->collapse == 1)
2855 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2856 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2857 fd->loop.n1, fold_convert (sizetype, t));
2858 else
2860 t = fold_convert (TREE_TYPE (startvar), t);
2861 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2862 fd->loop.n1, t);
2865 else
2867 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2868 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2869 t = fold_convert (TREE_TYPE (startvar), t);
2871 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2872 false, GSI_CONTINUE_LINKING);
2873 if (endvar)
2875 assign_stmt = gimple_build_assign (endvar, iend);
2876 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2877 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2878 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2879 else
2880 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2881 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2883 /* Handle linear clause adjustments. */
2884 tree itercnt = NULL_TREE;
2885 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2886 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2887 c; c = OMP_CLAUSE_CHAIN (c))
2888 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2889 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2891 tree d = OMP_CLAUSE_DECL (c);
2892 bool is_ref = omp_is_reference (d);
2893 tree t = d, a, dest;
2894 if (is_ref)
2895 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2896 tree type = TREE_TYPE (t);
2897 if (POINTER_TYPE_P (type))
2898 type = sizetype;
2899 dest = unshare_expr (t);
2900 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2901 expand_omp_build_assign (&gsif, v, t);
2902 if (itercnt == NULL_TREE)
2904 itercnt = startvar;
2905 tree n1 = fd->loop.n1;
2906 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2908 itercnt
2909 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2910 itercnt);
2911 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2913 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2914 itercnt, n1);
2915 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2916 itercnt, fd->loop.step);
2917 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2918 NULL_TREE, false,
2919 GSI_CONTINUE_LINKING);
2921 a = fold_build2 (MULT_EXPR, type,
2922 fold_convert (type, itercnt),
2923 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2924 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2925 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2926 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2927 false, GSI_CONTINUE_LINKING);
2928 assign_stmt = gimple_build_assign (dest, t);
2929 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2931 if (fd->collapse > 1)
2932 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2934 if (fd->ordered)
2936 /* Until now, counts array contained number of iterations or
2937 variable containing it for ith loop. From now on, we need
2938 those counts only for collapsed loops, and only for the 2nd
2939 till the last collapsed one. Move those one element earlier,
2940 we'll use counts[fd->collapse - 1] for the first source/sink
2941 iteration counter and so on and counts[fd->ordered]
2942 as the array holding the current counter values for
2943 depend(source). */
2944 if (fd->collapse > 1)
2945 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2946 if (broken_loop)
2948 int i;
2949 for (i = fd->collapse; i < fd->ordered; i++)
2951 tree type = TREE_TYPE (fd->loops[i].v);
2952 tree this_cond
2953 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2954 fold_convert (type, fd->loops[i].n1),
2955 fold_convert (type, fd->loops[i].n2));
2956 if (!integer_onep (this_cond))
2957 break;
2959 if (i < fd->ordered)
2961 cont_bb
2962 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2963 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2964 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2965 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2966 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2967 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2968 make_edge (cont_bb, l1_bb, 0);
2969 l2_bb = create_empty_bb (cont_bb);
2970 broken_loop = false;
2973 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2974 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2975 ordered_lastprivate);
2976 if (counts[fd->collapse - 1])
2978 gcc_assert (fd->collapse == 1);
2979 gsi = gsi_last_bb (l0_bb);
2980 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2981 istart0, true);
2982 gsi = gsi_last_bb (cont_bb);
2983 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2984 build_int_cst (fd->iter_type, 1));
2985 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2986 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2987 size_zero_node, NULL_TREE, NULL_TREE);
2988 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2989 t = counts[fd->collapse - 1];
2991 else if (fd->collapse > 1)
2992 t = fd->loop.v;
2993 else
2995 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2996 fd->loops[0].v, fd->loops[0].n1);
2997 t = fold_convert (fd->iter_type, t);
2999 gsi = gsi_last_bb (l0_bb);
3000 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3001 size_zero_node, NULL_TREE, NULL_TREE);
3002 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3003 false, GSI_CONTINUE_LINKING);
3004 expand_omp_build_assign (&gsi, aref, t, true);
3007 if (!broken_loop)
3009 /* Code to control the increment and predicate for the sequential
3010 loop goes in the CONT_BB. */
3011 gsi = gsi_last_nondebug_bb (cont_bb);
3012 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3013 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3014 vmain = gimple_omp_continue_control_use (cont_stmt);
3015 vback = gimple_omp_continue_control_def (cont_stmt);
3017 if (!gimple_omp_for_combined_p (fd->for_stmt))
3019 if (POINTER_TYPE_P (type))
3020 t = fold_build_pointer_plus (vmain, fd->loop.step);
3021 else
3022 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3023 t = force_gimple_operand_gsi (&gsi, t,
3024 DECL_P (vback)
3025 && TREE_ADDRESSABLE (vback),
3026 NULL_TREE, true, GSI_SAME_STMT);
3027 assign_stmt = gimple_build_assign (vback, t);
3028 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3030 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3032 if (fd->collapse > 1)
3033 t = fd->loop.v;
3034 else
3036 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3037 fd->loops[0].v, fd->loops[0].n1);
3038 t = fold_convert (fd->iter_type, t);
3040 tree aref = build4 (ARRAY_REF, fd->iter_type,
3041 counts[fd->ordered], size_zero_node,
3042 NULL_TREE, NULL_TREE);
3043 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3044 true, GSI_SAME_STMT);
3045 expand_omp_build_assign (&gsi, aref, t);
3048 t = build2 (fd->loop.cond_code, boolean_type_node,
3049 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3050 iend);
3051 gcond *cond_stmt = gimple_build_cond_empty (t);
3052 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3055 /* Remove GIMPLE_OMP_CONTINUE. */
3056 gsi_remove (&gsi, true);
3058 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3059 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3061 /* Emit code to get the next parallel iteration in L2_BB. */
3062 gsi = gsi_start_bb (l2_bb);
3064 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3065 build_fold_addr_expr (istart0),
3066 build_fold_addr_expr (iend0));
3067 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3068 false, GSI_CONTINUE_LINKING);
3069 if (TREE_TYPE (t) != boolean_type_node)
3070 t = fold_build2 (NE_EXPR, boolean_type_node,
3071 t, build_int_cst (TREE_TYPE (t), 0));
3072 gcond *cond_stmt = gimple_build_cond_empty (t);
3073 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3076 /* Add the loop cleanup function. */
3077 gsi = gsi_last_nondebug_bb (exit_bb);
3078 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3079 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3080 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3081 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3082 else
3083 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3084 gcall *call_stmt = gimple_build_call (t, 0);
3085 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3086 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3087 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3088 if (fd->ordered)
3090 tree arr = counts[fd->ordered];
3091 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3092 TREE_THIS_VOLATILE (clobber) = 1;
3093 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3094 GSI_SAME_STMT);
3096 gsi_remove (&gsi, true);
3098 /* Connect the new blocks. */
3099 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3100 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3102 if (!broken_loop)
3104 gimple_seq phis;
3106 e = find_edge (cont_bb, l3_bb);
3107 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3109 phis = phi_nodes (l3_bb);
3110 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3112 gimple *phi = gsi_stmt (gsi);
3113 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3114 PHI_ARG_DEF_FROM_EDGE (phi, e));
3116 remove_edge (e);
3118 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3119 e = find_edge (cont_bb, l1_bb);
3120 if (e == NULL)
3122 e = BRANCH_EDGE (cont_bb);
3123 gcc_assert (single_succ (e->dest) == l1_bb);
3125 if (gimple_omp_for_combined_p (fd->for_stmt))
3127 remove_edge (e);
3128 e = NULL;
3130 else if (fd->collapse > 1)
3132 remove_edge (e);
3133 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3135 else
3136 e->flags = EDGE_TRUE_VALUE;
3137 if (e)
3139 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3140 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3142 else
3144 e = find_edge (cont_bb, l2_bb);
3145 e->flags = EDGE_FALLTHRU;
3147 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3149 if (gimple_in_ssa_p (cfun))
3151 /* Add phis to the outer loop that connect to the phis in the inner,
3152 original loop, and move the loop entry value of the inner phi to
3153 the loop entry value of the outer phi. */
3154 gphi_iterator psi;
3155 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3157 source_location locus;
3158 gphi *nphi;
3159 gphi *exit_phi = psi.phi ();
3161 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3162 continue;
3164 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3165 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3167 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3168 edge latch_to_l1 = find_edge (latch, l1_bb);
3169 gphi *inner_phi
3170 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3172 tree t = gimple_phi_result (exit_phi);
3173 tree new_res = copy_ssa_name (t, NULL);
3174 nphi = create_phi_node (new_res, l0_bb);
3176 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3177 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3178 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3179 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3180 add_phi_arg (nphi, t, entry_to_l0, locus);
3182 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3183 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3185 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3189 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3190 recompute_dominator (CDI_DOMINATORS, l2_bb));
3191 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3192 recompute_dominator (CDI_DOMINATORS, l3_bb));
3193 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3194 recompute_dominator (CDI_DOMINATORS, l0_bb));
3195 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3196 recompute_dominator (CDI_DOMINATORS, l1_bb));
3198 /* We enter expand_omp_for_generic with a loop. This original loop may
3199 have its own loop struct, or it may be part of an outer loop struct
3200 (which may be the fake loop). */
3201 struct loop *outer_loop = entry_bb->loop_father;
3202 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3204 add_bb_to_loop (l2_bb, outer_loop);
3206 /* We've added a new loop around the original loop. Allocate the
3207 corresponding loop struct. */
3208 struct loop *new_loop = alloc_loop ();
3209 new_loop->header = l0_bb;
3210 new_loop->latch = l2_bb;
3211 add_loop (new_loop, outer_loop);
3213 /* Allocate a loop structure for the original loop unless we already
3214 had one. */
3215 if (!orig_loop_has_loop_struct
3216 && !gimple_omp_for_combined_p (fd->for_stmt))
3218 struct loop *orig_loop = alloc_loop ();
3219 orig_loop->header = l1_bb;
3220 /* The loop may have multiple latches. */
3221 add_loop (orig_loop, new_loop);
3226 /* A subroutine of expand_omp_for. Generate code for a parallel
3227 loop with static schedule and no specified chunk size. Given
3228 parameters:
3230 for (V = N1; V cond N2; V += STEP) BODY;
3232 where COND is "<" or ">", we generate pseudocode
3234 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3235 if (cond is <)
3236 adj = STEP - 1;
3237 else
3238 adj = STEP + 1;
3239 if ((__typeof (V)) -1 > 0 && cond is >)
3240 n = -(adj + N2 - N1) / -STEP;
3241 else
3242 n = (adj + N2 - N1) / STEP;
3243 q = n / nthreads;
3244 tt = n % nthreads;
3245 if (threadid < tt) goto L3; else goto L4;
3247 tt = 0;
3248 q = q + 1;
3250 s0 = q * threadid + tt;
3251 e0 = s0 + q;
3252 V = s0 * STEP + N1;
3253 if (s0 >= e0) goto L2; else goto L0;
3255 e = e0 * STEP + N1;
3257 BODY;
3258 V += STEP;
3259 if (V cond e) goto L1;
3263 static void
3264 expand_omp_for_static_nochunk (struct omp_region *region,
3265 struct omp_for_data *fd,
3266 gimple *inner_stmt)
3268 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3269 tree type, itype, vmain, vback;
3270 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3271 basic_block body_bb, cont_bb, collapse_bb = NULL;
3272 basic_block fin_bb;
3273 gimple_stmt_iterator gsi;
3274 edge ep;
3275 bool broken_loop = region->cont == NULL;
3276 tree *counts = NULL;
3277 tree n1, n2, step;
3279 itype = type = TREE_TYPE (fd->loop.v);
3280 if (POINTER_TYPE_P (type))
3281 itype = signed_type_for (type);
3283 entry_bb = region->entry;
3284 cont_bb = region->cont;
3285 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3286 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3287 gcc_assert (broken_loop
3288 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3289 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3290 body_bb = single_succ (seq_start_bb);
3291 if (!broken_loop)
3293 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3294 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3295 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3297 exit_bb = region->exit;
3299 /* Iteration space partitioning goes in ENTRY_BB. */
3300 gsi = gsi_last_nondebug_bb (entry_bb);
3301 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3303 if (fd->collapse > 1)
3305 int first_zero_iter = -1, dummy = -1;
3306 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3308 counts = XALLOCAVEC (tree, fd->collapse);
3309 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3310 fin_bb, first_zero_iter,
3311 dummy_bb, dummy, l2_dom_bb);
3312 t = NULL_TREE;
3314 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3315 t = integer_one_node;
3316 else
3317 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3318 fold_convert (type, fd->loop.n1),
3319 fold_convert (type, fd->loop.n2));
3320 if (fd->collapse == 1
3321 && TYPE_UNSIGNED (type)
3322 && (t == NULL_TREE || !integer_onep (t)))
3324 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3325 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3326 true, GSI_SAME_STMT);
3327 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3328 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3329 true, GSI_SAME_STMT);
3330 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3331 NULL_TREE, NULL_TREE);
3332 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3333 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3334 expand_omp_regimplify_p, NULL, NULL)
3335 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3336 expand_omp_regimplify_p, NULL, NULL))
3338 gsi = gsi_for_stmt (cond_stmt);
3339 gimple_regimplify_operands (cond_stmt, &gsi);
3341 ep = split_block (entry_bb, cond_stmt);
3342 ep->flags = EDGE_TRUE_VALUE;
3343 entry_bb = ep->dest;
3344 ep->probability = profile_probability::very_likely ();
3345 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3346 ep->probability = profile_probability::very_unlikely ();
3347 if (gimple_in_ssa_p (cfun))
3349 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3350 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3351 !gsi_end_p (gpi); gsi_next (&gpi))
3353 gphi *phi = gpi.phi ();
3354 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3355 ep, UNKNOWN_LOCATION);
3358 gsi = gsi_last_bb (entry_bb);
3361 switch (gimple_omp_for_kind (fd->for_stmt))
3363 case GF_OMP_FOR_KIND_FOR:
3364 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3365 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3366 break;
3367 case GF_OMP_FOR_KIND_DISTRIBUTE:
3368 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3369 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3370 break;
3371 default:
3372 gcc_unreachable ();
3374 nthreads = build_call_expr (nthreads, 0);
3375 nthreads = fold_convert (itype, nthreads);
3376 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3377 true, GSI_SAME_STMT);
3378 threadid = build_call_expr (threadid, 0);
3379 threadid = fold_convert (itype, threadid);
3380 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3381 true, GSI_SAME_STMT);
3383 n1 = fd->loop.n1;
3384 n2 = fd->loop.n2;
3385 step = fd->loop.step;
3386 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3388 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3389 OMP_CLAUSE__LOOPTEMP_);
3390 gcc_assert (innerc);
3391 n1 = OMP_CLAUSE_DECL (innerc);
3392 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3393 OMP_CLAUSE__LOOPTEMP_);
3394 gcc_assert (innerc);
3395 n2 = OMP_CLAUSE_DECL (innerc);
3397 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3398 true, NULL_TREE, true, GSI_SAME_STMT);
3399 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3400 true, NULL_TREE, true, GSI_SAME_STMT);
3401 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3402 true, NULL_TREE, true, GSI_SAME_STMT);
3404 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3405 t = fold_build2 (PLUS_EXPR, itype, step, t);
3406 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3407 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3408 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3409 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3410 fold_build1 (NEGATE_EXPR, itype, t),
3411 fold_build1 (NEGATE_EXPR, itype, step));
3412 else
3413 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3414 t = fold_convert (itype, t);
3415 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3417 q = create_tmp_reg (itype, "q");
3418 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3419 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3420 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3422 tt = create_tmp_reg (itype, "tt");
3423 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3424 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3425 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3427 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3428 gcond *cond_stmt = gimple_build_cond_empty (t);
3429 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3431 second_bb = split_block (entry_bb, cond_stmt)->dest;
3432 gsi = gsi_last_nondebug_bb (second_bb);
3433 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3435 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3436 GSI_SAME_STMT);
3437 gassign *assign_stmt
3438 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3439 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3441 third_bb = split_block (second_bb, assign_stmt)->dest;
3442 gsi = gsi_last_nondebug_bb (third_bb);
3443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3445 t = build2 (MULT_EXPR, itype, q, threadid);
3446 t = build2 (PLUS_EXPR, itype, t, tt);
3447 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3449 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3450 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3452 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3453 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3455 /* Remove the GIMPLE_OMP_FOR statement. */
3456 gsi_remove (&gsi, true);
3458 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3459 gsi = gsi_start_bb (seq_start_bb);
3461 tree startvar = fd->loop.v;
3462 tree endvar = NULL_TREE;
3464 if (gimple_omp_for_combined_p (fd->for_stmt))
3466 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3467 ? gimple_omp_parallel_clauses (inner_stmt)
3468 : gimple_omp_for_clauses (inner_stmt);
3469 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3470 gcc_assert (innerc);
3471 startvar = OMP_CLAUSE_DECL (innerc);
3472 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3473 OMP_CLAUSE__LOOPTEMP_);
3474 gcc_assert (innerc);
3475 endvar = OMP_CLAUSE_DECL (innerc);
3476 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3477 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3479 int i;
3480 for (i = 1; i < fd->collapse; i++)
3482 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3483 OMP_CLAUSE__LOOPTEMP_);
3484 gcc_assert (innerc);
3486 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3487 OMP_CLAUSE__LOOPTEMP_);
3488 if (innerc)
3490 /* If needed (distribute parallel for with lastprivate),
3491 propagate down the total number of iterations. */
3492 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3493 fd->loop.n2);
3494 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3495 GSI_CONTINUE_LINKING);
3496 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3497 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3501 t = fold_convert (itype, s0);
3502 t = fold_build2 (MULT_EXPR, itype, t, step);
3503 if (POINTER_TYPE_P (type))
3504 t = fold_build_pointer_plus (n1, t);
3505 else
3506 t = fold_build2 (PLUS_EXPR, type, t, n1);
3507 t = fold_convert (TREE_TYPE (startvar), t);
3508 t = force_gimple_operand_gsi (&gsi, t,
3509 DECL_P (startvar)
3510 && TREE_ADDRESSABLE (startvar),
3511 NULL_TREE, false, GSI_CONTINUE_LINKING);
3512 assign_stmt = gimple_build_assign (startvar, t);
3513 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3515 t = fold_convert (itype, e0);
3516 t = fold_build2 (MULT_EXPR, itype, t, step);
3517 if (POINTER_TYPE_P (type))
3518 t = fold_build_pointer_plus (n1, t);
3519 else
3520 t = fold_build2 (PLUS_EXPR, type, t, n1);
3521 t = fold_convert (TREE_TYPE (startvar), t);
3522 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3523 false, GSI_CONTINUE_LINKING);
3524 if (endvar)
3526 assign_stmt = gimple_build_assign (endvar, e);
3527 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3528 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3529 assign_stmt = gimple_build_assign (fd->loop.v, e);
3530 else
3531 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3532 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3534 /* Handle linear clause adjustments. */
3535 tree itercnt = NULL_TREE;
3536 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3537 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3538 c; c = OMP_CLAUSE_CHAIN (c))
3539 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3540 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3542 tree d = OMP_CLAUSE_DECL (c);
3543 bool is_ref = omp_is_reference (d);
3544 tree t = d, a, dest;
3545 if (is_ref)
3546 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3547 if (itercnt == NULL_TREE)
3549 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3551 itercnt = fold_build2 (MINUS_EXPR, itype,
3552 fold_convert (itype, n1),
3553 fold_convert (itype, fd->loop.n1));
3554 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3555 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3556 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3557 NULL_TREE, false,
3558 GSI_CONTINUE_LINKING);
3560 else
3561 itercnt = s0;
3563 tree type = TREE_TYPE (t);
3564 if (POINTER_TYPE_P (type))
3565 type = sizetype;
3566 a = fold_build2 (MULT_EXPR, type,
3567 fold_convert (type, itercnt),
3568 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3569 dest = unshare_expr (t);
3570 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3571 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3572 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3573 false, GSI_CONTINUE_LINKING);
3574 assign_stmt = gimple_build_assign (dest, t);
3575 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3577 if (fd->collapse > 1)
3578 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3580 if (!broken_loop)
3582 /* The code controlling the sequential loop replaces the
3583 GIMPLE_OMP_CONTINUE. */
3584 gsi = gsi_last_nondebug_bb (cont_bb);
3585 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3586 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3587 vmain = gimple_omp_continue_control_use (cont_stmt);
3588 vback = gimple_omp_continue_control_def (cont_stmt);
3590 if (!gimple_omp_for_combined_p (fd->for_stmt))
3592 if (POINTER_TYPE_P (type))
3593 t = fold_build_pointer_plus (vmain, step);
3594 else
3595 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3596 t = force_gimple_operand_gsi (&gsi, t,
3597 DECL_P (vback)
3598 && TREE_ADDRESSABLE (vback),
3599 NULL_TREE, true, GSI_SAME_STMT);
3600 assign_stmt = gimple_build_assign (vback, t);
3601 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3603 t = build2 (fd->loop.cond_code, boolean_type_node,
3604 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3605 ? t : vback, e);
3606 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3609 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3610 gsi_remove (&gsi, true);
3612 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3613 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3616 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3617 gsi = gsi_last_nondebug_bb (exit_bb);
3618 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3620 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3621 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3623 gsi_remove (&gsi, true);
3625 /* Connect all the blocks. */
3626 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3627 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3628 ep = find_edge (entry_bb, second_bb);
3629 ep->flags = EDGE_TRUE_VALUE;
3630 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3631 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3632 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3634 if (!broken_loop)
3636 ep = find_edge (cont_bb, body_bb);
3637 if (ep == NULL)
3639 ep = BRANCH_EDGE (cont_bb);
3640 gcc_assert (single_succ (ep->dest) == body_bb);
3642 if (gimple_omp_for_combined_p (fd->for_stmt))
3644 remove_edge (ep);
3645 ep = NULL;
3647 else if (fd->collapse > 1)
3649 remove_edge (ep);
3650 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3652 else
3653 ep->flags = EDGE_TRUE_VALUE;
3654 find_edge (cont_bb, fin_bb)->flags
3655 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3658 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3659 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3660 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3662 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3663 recompute_dominator (CDI_DOMINATORS, body_bb));
3664 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3665 recompute_dominator (CDI_DOMINATORS, fin_bb));
3667 struct loop *loop = body_bb->loop_father;
3668 if (loop != entry_bb->loop_father)
3670 gcc_assert (broken_loop || loop->header == body_bb);
3671 gcc_assert (broken_loop
3672 || loop->latch == region->cont
3673 || single_pred (loop->latch) == region->cont);
3674 return;
3677 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3679 loop = alloc_loop ();
3680 loop->header = body_bb;
3681 if (collapse_bb == NULL)
3682 loop->latch = cont_bb;
3683 add_loop (loop, body_bb->loop_father);
3687 /* Return phi in E->DEST with ARG on edge E. */
3689 static gphi *
3690 find_phi_with_arg_on_edge (tree arg, edge e)
3692 basic_block bb = e->dest;
3694 for (gphi_iterator gpi = gsi_start_phis (bb);
3695 !gsi_end_p (gpi);
3696 gsi_next (&gpi))
3698 gphi *phi = gpi.phi ();
3699 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3700 return phi;
3703 return NULL;
3706 /* A subroutine of expand_omp_for. Generate code for a parallel
3707 loop with static schedule and a specified chunk size. Given
3708 parameters:
3710 for (V = N1; V cond N2; V += STEP) BODY;
3712 where COND is "<" or ">", we generate pseudocode
3714 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3715 if (cond is <)
3716 adj = STEP - 1;
3717 else
3718 adj = STEP + 1;
3719 if ((__typeof (V)) -1 > 0 && cond is >)
3720 n = -(adj + N2 - N1) / -STEP;
3721 else
3722 n = (adj + N2 - N1) / STEP;
3723 trip = 0;
3724 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3725 here so that V is defined
3726 if the loop is not entered
3728 s0 = (trip * nthreads + threadid) * CHUNK;
3729 e0 = min (s0 + CHUNK, n);
3730 if (s0 < n) goto L1; else goto L4;
3732 V = s0 * STEP + N1;
3733 e = e0 * STEP + N1;
3735 BODY;
3736 V += STEP;
3737 if (V cond e) goto L2; else goto L3;
3739 trip += 1;
3740 goto L0;
3744 static void
3745 expand_omp_for_static_chunk (struct omp_region *region,
3746 struct omp_for_data *fd, gimple *inner_stmt)
3748 tree n, s0, e0, e, t;
3749 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3750 tree type, itype, vmain, vback, vextra;
3751 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3752 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3753 gimple_stmt_iterator gsi;
3754 edge se;
3755 bool broken_loop = region->cont == NULL;
3756 tree *counts = NULL;
3757 tree n1, n2, step;
3759 itype = type = TREE_TYPE (fd->loop.v);
3760 if (POINTER_TYPE_P (type))
3761 itype = signed_type_for (type);
3763 entry_bb = region->entry;
3764 se = split_block (entry_bb, last_stmt (entry_bb));
3765 entry_bb = se->src;
3766 iter_part_bb = se->dest;
3767 cont_bb = region->cont;
3768 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3769 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3770 gcc_assert (broken_loop
3771 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3772 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3773 body_bb = single_succ (seq_start_bb);
3774 if (!broken_loop)
3776 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3777 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3778 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3779 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3781 exit_bb = region->exit;
3783 /* Trip and adjustment setup goes in ENTRY_BB. */
3784 gsi = gsi_last_nondebug_bb (entry_bb);
3785 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3787 if (fd->collapse > 1)
3789 int first_zero_iter = -1, dummy = -1;
3790 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3792 counts = XALLOCAVEC (tree, fd->collapse);
3793 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3794 fin_bb, first_zero_iter,
3795 dummy_bb, dummy, l2_dom_bb);
3796 t = NULL_TREE;
3798 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3799 t = integer_one_node;
3800 else
3801 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3802 fold_convert (type, fd->loop.n1),
3803 fold_convert (type, fd->loop.n2));
3804 if (fd->collapse == 1
3805 && TYPE_UNSIGNED (type)
3806 && (t == NULL_TREE || !integer_onep (t)))
3808 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3809 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3810 true, GSI_SAME_STMT);
3811 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3812 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3813 true, GSI_SAME_STMT);
3814 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3815 NULL_TREE, NULL_TREE);
3816 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3817 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3818 expand_omp_regimplify_p, NULL, NULL)
3819 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3820 expand_omp_regimplify_p, NULL, NULL))
3822 gsi = gsi_for_stmt (cond_stmt);
3823 gimple_regimplify_operands (cond_stmt, &gsi);
3825 se = split_block (entry_bb, cond_stmt);
3826 se->flags = EDGE_TRUE_VALUE;
3827 entry_bb = se->dest;
3828 se->probability = profile_probability::very_likely ();
3829 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3830 se->probability = profile_probability::very_unlikely ();
3831 if (gimple_in_ssa_p (cfun))
3833 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3834 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3835 !gsi_end_p (gpi); gsi_next (&gpi))
3837 gphi *phi = gpi.phi ();
3838 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3839 se, UNKNOWN_LOCATION);
3842 gsi = gsi_last_bb (entry_bb);
3845 switch (gimple_omp_for_kind (fd->for_stmt))
3847 case GF_OMP_FOR_KIND_FOR:
3848 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3849 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3850 break;
3851 case GF_OMP_FOR_KIND_DISTRIBUTE:
3852 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3853 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3854 break;
3855 default:
3856 gcc_unreachable ();
3858 nthreads = build_call_expr (nthreads, 0);
3859 nthreads = fold_convert (itype, nthreads);
3860 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3861 true, GSI_SAME_STMT);
3862 threadid = build_call_expr (threadid, 0);
3863 threadid = fold_convert (itype, threadid);
3864 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3865 true, GSI_SAME_STMT);
3867 n1 = fd->loop.n1;
3868 n2 = fd->loop.n2;
3869 step = fd->loop.step;
3870 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3872 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3873 OMP_CLAUSE__LOOPTEMP_);
3874 gcc_assert (innerc);
3875 n1 = OMP_CLAUSE_DECL (innerc);
3876 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3877 OMP_CLAUSE__LOOPTEMP_);
3878 gcc_assert (innerc);
3879 n2 = OMP_CLAUSE_DECL (innerc);
3881 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3882 true, NULL_TREE, true, GSI_SAME_STMT);
3883 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3884 true, NULL_TREE, true, GSI_SAME_STMT);
3885 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3886 true, NULL_TREE, true, GSI_SAME_STMT);
3887 tree chunk_size = fold_convert (itype, fd->chunk_size);
3888 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3889 chunk_size
3890 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3891 GSI_SAME_STMT);
3893 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3894 t = fold_build2 (PLUS_EXPR, itype, step, t);
3895 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3896 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3897 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3898 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3899 fold_build1 (NEGATE_EXPR, itype, t),
3900 fold_build1 (NEGATE_EXPR, itype, step));
3901 else
3902 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3903 t = fold_convert (itype, t);
3904 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3905 true, GSI_SAME_STMT);
3907 trip_var = create_tmp_reg (itype, ".trip");
3908 if (gimple_in_ssa_p (cfun))
3910 trip_init = make_ssa_name (trip_var);
3911 trip_main = make_ssa_name (trip_var);
3912 trip_back = make_ssa_name (trip_var);
3914 else
3916 trip_init = trip_var;
3917 trip_main = trip_var;
3918 trip_back = trip_var;
3921 gassign *assign_stmt
3922 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3923 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3925 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3926 t = fold_build2 (MULT_EXPR, itype, t, step);
3927 if (POINTER_TYPE_P (type))
3928 t = fold_build_pointer_plus (n1, t);
3929 else
3930 t = fold_build2 (PLUS_EXPR, type, t, n1);
3931 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3932 true, GSI_SAME_STMT);
3934 /* Remove the GIMPLE_OMP_FOR. */
3935 gsi_remove (&gsi, true);
3937 gimple_stmt_iterator gsif = gsi;
3939 /* Iteration space partitioning goes in ITER_PART_BB. */
3940 gsi = gsi_last_bb (iter_part_bb);
3942 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3943 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3944 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3945 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3946 false, GSI_CONTINUE_LINKING);
3948 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3949 t = fold_build2 (MIN_EXPR, itype, t, n);
3950 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3951 false, GSI_CONTINUE_LINKING);
3953 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3954 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3956 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3957 gsi = gsi_start_bb (seq_start_bb);
3959 tree startvar = fd->loop.v;
3960 tree endvar = NULL_TREE;
3962 if (gimple_omp_for_combined_p (fd->for_stmt))
3964 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3965 ? gimple_omp_parallel_clauses (inner_stmt)
3966 : gimple_omp_for_clauses (inner_stmt);
3967 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3968 gcc_assert (innerc);
3969 startvar = OMP_CLAUSE_DECL (innerc);
3970 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3971 OMP_CLAUSE__LOOPTEMP_);
3972 gcc_assert (innerc);
3973 endvar = OMP_CLAUSE_DECL (innerc);
3974 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3975 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3977 int i;
3978 for (i = 1; i < fd->collapse; i++)
3980 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3981 OMP_CLAUSE__LOOPTEMP_);
3982 gcc_assert (innerc);
3984 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3985 OMP_CLAUSE__LOOPTEMP_);
3986 if (innerc)
3988 /* If needed (distribute parallel for with lastprivate),
3989 propagate down the total number of iterations. */
3990 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3991 fd->loop.n2);
3992 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3993 GSI_CONTINUE_LINKING);
3994 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3995 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4000 t = fold_convert (itype, s0);
4001 t = fold_build2 (MULT_EXPR, itype, t, step);
4002 if (POINTER_TYPE_P (type))
4003 t = fold_build_pointer_plus (n1, t);
4004 else
4005 t = fold_build2 (PLUS_EXPR, type, t, n1);
4006 t = fold_convert (TREE_TYPE (startvar), t);
4007 t = force_gimple_operand_gsi (&gsi, t,
4008 DECL_P (startvar)
4009 && TREE_ADDRESSABLE (startvar),
4010 NULL_TREE, false, GSI_CONTINUE_LINKING);
4011 assign_stmt = gimple_build_assign (startvar, t);
4012 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4014 t = fold_convert (itype, e0);
4015 t = fold_build2 (MULT_EXPR, itype, t, step);
4016 if (POINTER_TYPE_P (type))
4017 t = fold_build_pointer_plus (n1, t);
4018 else
4019 t = fold_build2 (PLUS_EXPR, type, t, n1);
4020 t = fold_convert (TREE_TYPE (startvar), t);
4021 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4022 false, GSI_CONTINUE_LINKING);
4023 if (endvar)
4025 assign_stmt = gimple_build_assign (endvar, e);
4026 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4027 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4028 assign_stmt = gimple_build_assign (fd->loop.v, e);
4029 else
4030 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4031 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4033 /* Handle linear clause adjustments. */
4034 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4035 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4036 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4037 c; c = OMP_CLAUSE_CHAIN (c))
4038 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4039 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4041 tree d = OMP_CLAUSE_DECL (c);
4042 bool is_ref = omp_is_reference (d);
4043 tree t = d, a, dest;
4044 if (is_ref)
4045 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4046 tree type = TREE_TYPE (t);
4047 if (POINTER_TYPE_P (type))
4048 type = sizetype;
4049 dest = unshare_expr (t);
4050 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4051 expand_omp_build_assign (&gsif, v, t);
4052 if (itercnt == NULL_TREE)
4054 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4056 itercntbias
4057 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4058 fold_convert (itype, fd->loop.n1));
4059 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4060 itercntbias, step);
4061 itercntbias
4062 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4063 NULL_TREE, true,
4064 GSI_SAME_STMT);
4065 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4066 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4067 NULL_TREE, false,
4068 GSI_CONTINUE_LINKING);
4070 else
4071 itercnt = s0;
4073 a = fold_build2 (MULT_EXPR, type,
4074 fold_convert (type, itercnt),
4075 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4076 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4077 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4078 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4079 false, GSI_CONTINUE_LINKING);
4080 assign_stmt = gimple_build_assign (dest, t);
4081 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4083 if (fd->collapse > 1)
4084 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4086 if (!broken_loop)
4088 /* The code controlling the sequential loop goes in CONT_BB,
4089 replacing the GIMPLE_OMP_CONTINUE. */
4090 gsi = gsi_last_nondebug_bb (cont_bb);
4091 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4092 vmain = gimple_omp_continue_control_use (cont_stmt);
4093 vback = gimple_omp_continue_control_def (cont_stmt);
4095 if (!gimple_omp_for_combined_p (fd->for_stmt))
4097 if (POINTER_TYPE_P (type))
4098 t = fold_build_pointer_plus (vmain, step);
4099 else
4100 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4101 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4102 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4103 true, GSI_SAME_STMT);
4104 assign_stmt = gimple_build_assign (vback, t);
4105 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4107 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4108 t = build2 (EQ_EXPR, boolean_type_node,
4109 build_int_cst (itype, 0),
4110 build_int_cst (itype, 1));
4111 else
4112 t = build2 (fd->loop.cond_code, boolean_type_node,
4113 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4114 ? t : vback, e);
4115 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4118 /* Remove GIMPLE_OMP_CONTINUE. */
4119 gsi_remove (&gsi, true);
4121 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4122 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4124 /* Trip update code goes into TRIP_UPDATE_BB. */
4125 gsi = gsi_start_bb (trip_update_bb);
4127 t = build_int_cst (itype, 1);
4128 t = build2 (PLUS_EXPR, itype, trip_main, t);
4129 assign_stmt = gimple_build_assign (trip_back, t);
4130 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4133 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4134 gsi = gsi_last_nondebug_bb (exit_bb);
4135 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4137 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4138 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4140 gsi_remove (&gsi, true);
4142 /* Connect the new blocks. */
4143 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4144 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4146 if (!broken_loop)
4148 se = find_edge (cont_bb, body_bb);
4149 if (se == NULL)
4151 se = BRANCH_EDGE (cont_bb);
4152 gcc_assert (single_succ (se->dest) == body_bb);
4154 if (gimple_omp_for_combined_p (fd->for_stmt))
4156 remove_edge (se);
4157 se = NULL;
4159 else if (fd->collapse > 1)
4161 remove_edge (se);
4162 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4164 else
4165 se->flags = EDGE_TRUE_VALUE;
4166 find_edge (cont_bb, trip_update_bb)->flags
4167 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4169 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4170 iter_part_bb);
4173 if (gimple_in_ssa_p (cfun))
4175 gphi_iterator psi;
4176 gphi *phi;
4177 edge re, ene;
4178 edge_var_map *vm;
4179 size_t i;
4181 gcc_assert (fd->collapse == 1 && !broken_loop);
4183 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4184 remove arguments of the phi nodes in fin_bb. We need to create
4185 appropriate phi nodes in iter_part_bb instead. */
4186 se = find_edge (iter_part_bb, fin_bb);
4187 re = single_succ_edge (trip_update_bb);
4188 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4189 ene = single_succ_edge (entry_bb);
4191 psi = gsi_start_phis (fin_bb);
4192 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4193 gsi_next (&psi), ++i)
4195 gphi *nphi;
4196 source_location locus;
4198 phi = psi.phi ();
4199 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4200 redirect_edge_var_map_def (vm), 0))
4201 continue;
4203 t = gimple_phi_result (phi);
4204 gcc_assert (t == redirect_edge_var_map_result (vm));
4206 if (!single_pred_p (fin_bb))
4207 t = copy_ssa_name (t, phi);
4209 nphi = create_phi_node (t, iter_part_bb);
4211 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4212 locus = gimple_phi_arg_location_from_edge (phi, se);
4214 /* A special case -- fd->loop.v is not yet computed in
4215 iter_part_bb, we need to use vextra instead. */
4216 if (t == fd->loop.v)
4217 t = vextra;
4218 add_phi_arg (nphi, t, ene, locus);
4219 locus = redirect_edge_var_map_location (vm);
4220 tree back_arg = redirect_edge_var_map_def (vm);
4221 add_phi_arg (nphi, back_arg, re, locus);
4222 edge ce = find_edge (cont_bb, body_bb);
4223 if (ce == NULL)
4225 ce = BRANCH_EDGE (cont_bb);
4226 gcc_assert (single_succ (ce->dest) == body_bb);
4227 ce = single_succ_edge (ce->dest);
4229 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4230 gcc_assert (inner_loop_phi != NULL);
4231 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4232 find_edge (seq_start_bb, body_bb), locus);
4234 if (!single_pred_p (fin_bb))
4235 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4237 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4238 redirect_edge_var_map_clear (re);
4239 if (single_pred_p (fin_bb))
4240 while (1)
4242 psi = gsi_start_phis (fin_bb);
4243 if (gsi_end_p (psi))
4244 break;
4245 remove_phi_node (&psi, false);
4248 /* Make phi node for trip. */
4249 phi = create_phi_node (trip_main, iter_part_bb);
4250 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4251 UNKNOWN_LOCATION);
4252 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4253 UNKNOWN_LOCATION);
4256 if (!broken_loop)
4257 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4258 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4259 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4260 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4261 recompute_dominator (CDI_DOMINATORS, fin_bb));
4262 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4263 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4264 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4265 recompute_dominator (CDI_DOMINATORS, body_bb));
4267 if (!broken_loop)
4269 struct loop *loop = body_bb->loop_father;
4270 struct loop *trip_loop = alloc_loop ();
4271 trip_loop->header = iter_part_bb;
4272 trip_loop->latch = trip_update_bb;
4273 add_loop (trip_loop, iter_part_bb->loop_father);
4275 if (loop != entry_bb->loop_father)
4277 gcc_assert (loop->header == body_bb);
4278 gcc_assert (loop->latch == region->cont
4279 || single_pred (loop->latch) == region->cont);
4280 trip_loop->inner = loop;
4281 return;
4284 if (!gimple_omp_for_combined_p (fd->for_stmt))
4286 loop = alloc_loop ();
4287 loop->header = body_bb;
4288 if (collapse_bb == NULL)
4289 loop->latch = cont_bb;
4290 add_loop (loop, trip_loop);
4295 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4296 loop. Given parameters:
4298 for (V = N1; V cond N2; V += STEP) BODY;
4300 where COND is "<" or ">", we generate pseudocode
4302 V = N1;
4303 goto L1;
4305 BODY;
4306 V += STEP;
4308 if (V cond N2) goto L0; else goto L2;
4311 For collapsed loops, given parameters:
4312 collapse(3)
4313 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4314 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4315 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4316 BODY;
4318 we generate pseudocode
4320 if (cond3 is <)
4321 adj = STEP3 - 1;
4322 else
4323 adj = STEP3 + 1;
4324 count3 = (adj + N32 - N31) / STEP3;
4325 if (cond2 is <)
4326 adj = STEP2 - 1;
4327 else
4328 adj = STEP2 + 1;
4329 count2 = (adj + N22 - N21) / STEP2;
4330 if (cond1 is <)
4331 adj = STEP1 - 1;
4332 else
4333 adj = STEP1 + 1;
4334 count1 = (adj + N12 - N11) / STEP1;
4335 count = count1 * count2 * count3;
4336 V = 0;
4337 V1 = N11;
4338 V2 = N21;
4339 V3 = N31;
4340 goto L1;
4342 BODY;
4343 V += 1;
4344 V3 += STEP3;
4345 V2 += (V3 cond3 N32) ? 0 : STEP2;
4346 V3 = (V3 cond3 N32) ? V3 : N31;
4347 V1 += (V2 cond2 N22) ? 0 : STEP1;
4348 V2 = (V2 cond2 N22) ? V2 : N21;
4350 if (V < count) goto L0; else goto L2;
4355 static void
4356 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4358 tree type, t;
4359 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4360 gimple_stmt_iterator gsi;
4361 gimple *stmt;
4362 gcond *cond_stmt;
4363 bool broken_loop = region->cont == NULL;
4364 edge e, ne;
4365 tree *counts = NULL;
4366 int i;
4367 int safelen_int = INT_MAX;
4368 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4369 OMP_CLAUSE_SAFELEN);
4370 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4371 OMP_CLAUSE__SIMDUID_);
4372 tree n1, n2;
4374 if (safelen)
4376 poly_uint64 val;
4377 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4378 if (!poly_int_tree_p (safelen, &val))
4379 safelen_int = 0;
4380 else
4381 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4382 if (safelen_int == 1)
4383 safelen_int = 0;
4385 type = TREE_TYPE (fd->loop.v);
4386 entry_bb = region->entry;
4387 cont_bb = region->cont;
4388 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4389 gcc_assert (broken_loop
4390 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4391 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4392 if (!broken_loop)
4394 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4395 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4396 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4397 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4399 else
4401 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4402 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4403 l2_bb = single_succ (l1_bb);
4405 exit_bb = region->exit;
4406 l2_dom_bb = NULL;
4408 gsi = gsi_last_nondebug_bb (entry_bb);
4410 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4411 /* Not needed in SSA form right now. */
4412 gcc_assert (!gimple_in_ssa_p (cfun));
4413 if (fd->collapse > 1)
4415 int first_zero_iter = -1, dummy = -1;
4416 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4418 counts = XALLOCAVEC (tree, fd->collapse);
4419 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4420 zero_iter_bb, first_zero_iter,
4421 dummy_bb, dummy, l2_dom_bb);
4423 if (l2_dom_bb == NULL)
4424 l2_dom_bb = l1_bb;
4426 n1 = fd->loop.n1;
4427 n2 = fd->loop.n2;
4428 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4430 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4431 OMP_CLAUSE__LOOPTEMP_);
4432 gcc_assert (innerc);
4433 n1 = OMP_CLAUSE_DECL (innerc);
4434 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4435 OMP_CLAUSE__LOOPTEMP_);
4436 gcc_assert (innerc);
4437 n2 = OMP_CLAUSE_DECL (innerc);
4439 tree step = fd->loop.step;
4441 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4442 OMP_CLAUSE__SIMT_);
4443 if (is_simt)
4445 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4446 is_simt = safelen_int > 1;
4448 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4449 if (is_simt)
4451 simt_lane = create_tmp_var (unsigned_type_node);
4452 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4453 gimple_call_set_lhs (g, simt_lane);
4454 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4455 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4456 fold_convert (TREE_TYPE (step), simt_lane));
4457 n1 = fold_convert (type, n1);
4458 if (POINTER_TYPE_P (type))
4459 n1 = fold_build_pointer_plus (n1, offset);
4460 else
4461 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4463 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4464 if (fd->collapse > 1)
4465 simt_maxlane = build_one_cst (unsigned_type_node);
4466 else if (safelen_int < omp_max_simt_vf ())
4467 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4468 tree vf
4469 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4470 unsigned_type_node, 0);
4471 if (simt_maxlane)
4472 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4473 vf = fold_convert (TREE_TYPE (step), vf);
4474 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4477 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4478 if (fd->collapse > 1)
4480 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4482 gsi_prev (&gsi);
4483 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4484 gsi_next (&gsi);
4486 else
4487 for (i = 0; i < fd->collapse; i++)
4489 tree itype = TREE_TYPE (fd->loops[i].v);
4490 if (POINTER_TYPE_P (itype))
4491 itype = signed_type_for (itype);
4492 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4493 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4497 /* Remove the GIMPLE_OMP_FOR statement. */
4498 gsi_remove (&gsi, true);
4500 if (!broken_loop)
4502 /* Code to control the increment goes in the CONT_BB. */
4503 gsi = gsi_last_nondebug_bb (cont_bb);
4504 stmt = gsi_stmt (gsi);
4505 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4507 if (POINTER_TYPE_P (type))
4508 t = fold_build_pointer_plus (fd->loop.v, step);
4509 else
4510 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4511 expand_omp_build_assign (&gsi, fd->loop.v, t);
4513 if (fd->collapse > 1)
4515 i = fd->collapse - 1;
4516 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4518 t = fold_convert (sizetype, fd->loops[i].step);
4519 t = fold_build_pointer_plus (fd->loops[i].v, t);
4521 else
4523 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4524 fd->loops[i].step);
4525 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4526 fd->loops[i].v, t);
4528 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4530 for (i = fd->collapse - 1; i > 0; i--)
4532 tree itype = TREE_TYPE (fd->loops[i].v);
4533 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4534 if (POINTER_TYPE_P (itype2))
4535 itype2 = signed_type_for (itype2);
4536 t = fold_convert (itype2, fd->loops[i - 1].step);
4537 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4538 GSI_SAME_STMT);
4539 t = build3 (COND_EXPR, itype2,
4540 build2 (fd->loops[i].cond_code, boolean_type_node,
4541 fd->loops[i].v,
4542 fold_convert (itype, fd->loops[i].n2)),
4543 build_int_cst (itype2, 0), t);
4544 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4545 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4546 else
4547 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4548 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4550 t = fold_convert (itype, fd->loops[i].n1);
4551 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4552 GSI_SAME_STMT);
4553 t = build3 (COND_EXPR, itype,
4554 build2 (fd->loops[i].cond_code, boolean_type_node,
4555 fd->loops[i].v,
4556 fold_convert (itype, fd->loops[i].n2)),
4557 fd->loops[i].v, t);
4558 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4562 /* Remove GIMPLE_OMP_CONTINUE. */
4563 gsi_remove (&gsi, true);
4566 /* Emit the condition in L1_BB. */
4567 gsi = gsi_start_bb (l1_bb);
4569 t = fold_convert (type, n2);
4570 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4571 false, GSI_CONTINUE_LINKING);
4572 tree v = fd->loop.v;
4573 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4574 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4575 false, GSI_CONTINUE_LINKING);
4576 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4577 cond_stmt = gimple_build_cond_empty (t);
4578 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4579 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4580 NULL, NULL)
4581 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4582 NULL, NULL))
4584 gsi = gsi_for_stmt (cond_stmt);
4585 gimple_regimplify_operands (cond_stmt, &gsi);
4588 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4589 if (is_simt)
4591 gsi = gsi_start_bb (l2_bb);
4592 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4593 if (POINTER_TYPE_P (type))
4594 t = fold_build_pointer_plus (fd->loop.v, step);
4595 else
4596 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4597 expand_omp_build_assign (&gsi, fd->loop.v, t);
4600 /* Remove GIMPLE_OMP_RETURN. */
4601 gsi = gsi_last_nondebug_bb (exit_bb);
4602 gsi_remove (&gsi, true);
4604 /* Connect the new blocks. */
4605 remove_edge (FALLTHRU_EDGE (entry_bb));
4607 if (!broken_loop)
4609 remove_edge (BRANCH_EDGE (entry_bb));
4610 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4612 e = BRANCH_EDGE (l1_bb);
4613 ne = FALLTHRU_EDGE (l1_bb);
4614 e->flags = EDGE_TRUE_VALUE;
4616 else
4618 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4620 ne = single_succ_edge (l1_bb);
4621 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4624 ne->flags = EDGE_FALSE_VALUE;
4625 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4626 ne->probability = e->probability.invert ();
4628 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4629 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4631 if (simt_maxlane)
4633 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4634 NULL_TREE, NULL_TREE);
4635 gsi = gsi_last_bb (entry_bb);
4636 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4637 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4638 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4639 FALLTHRU_EDGE (entry_bb)->probability
4640 = profile_probability::guessed_always ().apply_scale (7, 8);
4641 BRANCH_EDGE (entry_bb)->probability
4642 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4643 l2_dom_bb = entry_bb;
4645 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4647 if (!broken_loop)
4649 struct loop *loop = alloc_loop ();
4650 loop->header = l1_bb;
4651 loop->latch = cont_bb;
4652 add_loop (loop, l1_bb->loop_father);
4653 loop->safelen = safelen_int;
4654 if (simduid)
4656 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4657 cfun->has_simduid_loops = true;
4659 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4660 the loop. */
4661 if ((flag_tree_loop_vectorize
4662 || !global_options_set.x_flag_tree_loop_vectorize)
4663 && flag_tree_loop_optimize
4664 && loop->safelen > 1)
4666 loop->force_vectorize = true;
4667 cfun->has_force_vectorize_loops = true;
4670 else if (simduid)
4671 cfun->has_simduid_loops = true;
4674 /* Taskloop construct is represented after gimplification with
4675 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4676 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4677 which should just compute all the needed loop temporaries
4678 for GIMPLE_OMP_TASK. */
4680 static void
4681 expand_omp_taskloop_for_outer (struct omp_region *region,
4682 struct omp_for_data *fd,
4683 gimple *inner_stmt)
4685 tree type, bias = NULL_TREE;
4686 basic_block entry_bb, cont_bb, exit_bb;
4687 gimple_stmt_iterator gsi;
4688 gassign *assign_stmt;
4689 tree *counts = NULL;
4690 int i;
4692 gcc_assert (inner_stmt);
4693 gcc_assert (region->cont);
4694 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4695 && gimple_omp_task_taskloop_p (inner_stmt));
4696 type = TREE_TYPE (fd->loop.v);
4698 /* See if we need to bias by LLONG_MIN. */
4699 if (fd->iter_type == long_long_unsigned_type_node
4700 && TREE_CODE (type) == INTEGER_TYPE
4701 && !TYPE_UNSIGNED (type))
4703 tree n1, n2;
4705 if (fd->loop.cond_code == LT_EXPR)
4707 n1 = fd->loop.n1;
4708 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4710 else
4712 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4713 n2 = fd->loop.n1;
4715 if (TREE_CODE (n1) != INTEGER_CST
4716 || TREE_CODE (n2) != INTEGER_CST
4717 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4718 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4721 entry_bb = region->entry;
4722 cont_bb = region->cont;
4723 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4724 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4725 exit_bb = region->exit;
4727 gsi = gsi_last_nondebug_bb (entry_bb);
4728 gimple *for_stmt = gsi_stmt (gsi);
4729 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4730 if (fd->collapse > 1)
4732 int first_zero_iter = -1, dummy = -1;
4733 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4735 counts = XALLOCAVEC (tree, fd->collapse);
4736 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4737 zero_iter_bb, first_zero_iter,
4738 dummy_bb, dummy, l2_dom_bb);
4740 if (zero_iter_bb)
4742 /* Some counts[i] vars might be uninitialized if
4743 some loop has zero iterations. But the body shouldn't
4744 be executed in that case, so just avoid uninit warnings. */
4745 for (i = first_zero_iter; i < fd->collapse; i++)
4746 if (SSA_VAR_P (counts[i]))
4747 TREE_NO_WARNING (counts[i]) = 1;
4748 gsi_prev (&gsi);
4749 edge e = split_block (entry_bb, gsi_stmt (gsi));
4750 entry_bb = e->dest;
4751 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4752 gsi = gsi_last_bb (entry_bb);
4753 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4754 get_immediate_dominator (CDI_DOMINATORS,
4755 zero_iter_bb));
4759 tree t0, t1;
4760 t1 = fd->loop.n2;
4761 t0 = fd->loop.n1;
4762 if (POINTER_TYPE_P (TREE_TYPE (t0))
4763 && TYPE_PRECISION (TREE_TYPE (t0))
4764 != TYPE_PRECISION (fd->iter_type))
4766 /* Avoid casting pointers to integer of a different size. */
4767 tree itype = signed_type_for (type);
4768 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4769 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4771 else
4773 t1 = fold_convert (fd->iter_type, t1);
4774 t0 = fold_convert (fd->iter_type, t0);
4776 if (bias)
4778 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4779 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4782 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4783 OMP_CLAUSE__LOOPTEMP_);
4784 gcc_assert (innerc);
4785 tree startvar = OMP_CLAUSE_DECL (innerc);
4786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4787 gcc_assert (innerc);
4788 tree endvar = OMP_CLAUSE_DECL (innerc);
4789 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4791 gcc_assert (innerc);
4792 for (i = 1; i < fd->collapse; i++)
4794 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4795 OMP_CLAUSE__LOOPTEMP_);
4796 gcc_assert (innerc);
4798 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4799 OMP_CLAUSE__LOOPTEMP_);
4800 if (innerc)
4802 /* If needed (inner taskloop has lastprivate clause), propagate
4803 down the total number of iterations. */
4804 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4805 NULL_TREE, false,
4806 GSI_CONTINUE_LINKING);
4807 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4808 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4812 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4813 GSI_CONTINUE_LINKING);
4814 assign_stmt = gimple_build_assign (startvar, t0);
4815 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4817 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4818 GSI_CONTINUE_LINKING);
4819 assign_stmt = gimple_build_assign (endvar, t1);
4820 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4821 if (fd->collapse > 1)
4822 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4824 /* Remove the GIMPLE_OMP_FOR statement. */
4825 gsi = gsi_for_stmt (for_stmt);
4826 gsi_remove (&gsi, true);
4828 gsi = gsi_last_nondebug_bb (cont_bb);
4829 gsi_remove (&gsi, true);
4831 gsi = gsi_last_nondebug_bb (exit_bb);
4832 gsi_remove (&gsi, true);
4834 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4835 remove_edge (BRANCH_EDGE (entry_bb));
4836 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4837 remove_edge (BRANCH_EDGE (cont_bb));
4838 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4839 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4840 recompute_dominator (CDI_DOMINATORS, region->entry));
4843 /* Taskloop construct is represented after gimplification with
4844 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4845 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4846 GOMP_taskloop{,_ull} function arranges for each task to be given just
4847 a single range of iterations. */
4849 static void
4850 expand_omp_taskloop_for_inner (struct omp_region *region,
4851 struct omp_for_data *fd,
4852 gimple *inner_stmt)
4854 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4855 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4856 basic_block fin_bb;
4857 gimple_stmt_iterator gsi;
4858 edge ep;
4859 bool broken_loop = region->cont == NULL;
4860 tree *counts = NULL;
4861 tree n1, n2, step;
4863 itype = type = TREE_TYPE (fd->loop.v);
4864 if (POINTER_TYPE_P (type))
4865 itype = signed_type_for (type);
4867 /* See if we need to bias by LLONG_MIN. */
4868 if (fd->iter_type == long_long_unsigned_type_node
4869 && TREE_CODE (type) == INTEGER_TYPE
4870 && !TYPE_UNSIGNED (type))
4872 tree n1, n2;
4874 if (fd->loop.cond_code == LT_EXPR)
4876 n1 = fd->loop.n1;
4877 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4879 else
4881 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4882 n2 = fd->loop.n1;
4884 if (TREE_CODE (n1) != INTEGER_CST
4885 || TREE_CODE (n2) != INTEGER_CST
4886 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4887 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4890 entry_bb = region->entry;
4891 cont_bb = region->cont;
4892 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4893 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4894 gcc_assert (broken_loop
4895 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4896 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4897 if (!broken_loop)
4899 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4900 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4902 exit_bb = region->exit;
4904 /* Iteration space partitioning goes in ENTRY_BB. */
4905 gsi = gsi_last_nondebug_bb (entry_bb);
4906 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4908 if (fd->collapse > 1)
4910 int first_zero_iter = -1, dummy = -1;
4911 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4913 counts = XALLOCAVEC (tree, fd->collapse);
4914 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4915 fin_bb, first_zero_iter,
4916 dummy_bb, dummy, l2_dom_bb);
4917 t = NULL_TREE;
4919 else
4920 t = integer_one_node;
4922 step = fd->loop.step;
4923 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4924 OMP_CLAUSE__LOOPTEMP_);
4925 gcc_assert (innerc);
4926 n1 = OMP_CLAUSE_DECL (innerc);
4927 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4928 gcc_assert (innerc);
4929 n2 = OMP_CLAUSE_DECL (innerc);
4930 if (bias)
4932 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4933 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4935 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4936 true, NULL_TREE, true, GSI_SAME_STMT);
4937 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4938 true, NULL_TREE, true, GSI_SAME_STMT);
4939 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4940 true, NULL_TREE, true, GSI_SAME_STMT);
4942 tree startvar = fd->loop.v;
4943 tree endvar = NULL_TREE;
4945 if (gimple_omp_for_combined_p (fd->for_stmt))
4947 tree clauses = gimple_omp_for_clauses (inner_stmt);
4948 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4949 gcc_assert (innerc);
4950 startvar = OMP_CLAUSE_DECL (innerc);
4951 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4952 OMP_CLAUSE__LOOPTEMP_);
4953 gcc_assert (innerc);
4954 endvar = OMP_CLAUSE_DECL (innerc);
4956 t = fold_convert (TREE_TYPE (startvar), n1);
4957 t = force_gimple_operand_gsi (&gsi, t,
4958 DECL_P (startvar)
4959 && TREE_ADDRESSABLE (startvar),
4960 NULL_TREE, false, GSI_CONTINUE_LINKING);
4961 gimple *assign_stmt = gimple_build_assign (startvar, t);
4962 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4964 t = fold_convert (TREE_TYPE (startvar), n2);
4965 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4966 false, GSI_CONTINUE_LINKING);
4967 if (endvar)
4969 assign_stmt = gimple_build_assign (endvar, e);
4970 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4971 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4972 assign_stmt = gimple_build_assign (fd->loop.v, e);
4973 else
4974 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4975 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4977 if (fd->collapse > 1)
4978 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4980 if (!broken_loop)
4982 /* The code controlling the sequential loop replaces the
4983 GIMPLE_OMP_CONTINUE. */
4984 gsi = gsi_last_nondebug_bb (cont_bb);
4985 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4986 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4987 vmain = gimple_omp_continue_control_use (cont_stmt);
4988 vback = gimple_omp_continue_control_def (cont_stmt);
4990 if (!gimple_omp_for_combined_p (fd->for_stmt))
4992 if (POINTER_TYPE_P (type))
4993 t = fold_build_pointer_plus (vmain, step);
4994 else
4995 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4996 t = force_gimple_operand_gsi (&gsi, t,
4997 DECL_P (vback)
4998 && TREE_ADDRESSABLE (vback),
4999 NULL_TREE, true, GSI_SAME_STMT);
5000 assign_stmt = gimple_build_assign (vback, t);
5001 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5003 t = build2 (fd->loop.cond_code, boolean_type_node,
5004 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5005 ? t : vback, e);
5006 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5009 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5010 gsi_remove (&gsi, true);
5012 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5013 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5016 /* Remove the GIMPLE_OMP_FOR statement. */
5017 gsi = gsi_for_stmt (fd->for_stmt);
5018 gsi_remove (&gsi, true);
5020 /* Remove the GIMPLE_OMP_RETURN statement. */
5021 gsi = gsi_last_nondebug_bb (exit_bb);
5022 gsi_remove (&gsi, true);
5024 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5025 if (!broken_loop)
5026 remove_edge (BRANCH_EDGE (entry_bb));
5027 else
5029 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5030 region->outer->cont = NULL;
5033 /* Connect all the blocks. */
5034 if (!broken_loop)
5036 ep = find_edge (cont_bb, body_bb);
5037 if (gimple_omp_for_combined_p (fd->for_stmt))
5039 remove_edge (ep);
5040 ep = NULL;
5042 else if (fd->collapse > 1)
5044 remove_edge (ep);
5045 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5047 else
5048 ep->flags = EDGE_TRUE_VALUE;
5049 find_edge (cont_bb, fin_bb)->flags
5050 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5053 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5054 recompute_dominator (CDI_DOMINATORS, body_bb));
5055 if (!broken_loop)
5056 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5057 recompute_dominator (CDI_DOMINATORS, fin_bb));
5059 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5061 struct loop *loop = alloc_loop ();
5062 loop->header = body_bb;
5063 if (collapse_bb == NULL)
5064 loop->latch = cont_bb;
5065 add_loop (loop, body_bb->loop_father);
5069 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5070 partitioned loop. The lowering here is abstracted, in that the
5071 loop parameters are passed through internal functions, which are
5072 further lowered by oacc_device_lower, once we get to the target
5073 compiler. The loop is of the form:
5075 for (V = B; V LTGT E; V += S) {BODY}
5077 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5078 (constant 0 for no chunking) and we will have a GWV partitioning
5079 mask, specifying dimensions over which the loop is to be
5080 partitioned (see note below). We generate code that looks like
5081 (this ignores tiling):
5083 <entry_bb> [incoming FALL->body, BRANCH->exit]
5084 typedef signedintify (typeof (V)) T; // underlying signed integral type
5085 T range = E - B;
5086 T chunk_no = 0;
5087 T DIR = LTGT == '<' ? +1 : -1;
5088 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5089 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5091 <head_bb> [created by splitting end of entry_bb]
5092 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5093 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5094 if (!(offset LTGT bound)) goto bottom_bb;
5096 <body_bb> [incoming]
5097 V = B + offset;
5098 {BODY}
5100 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5101 offset += step;
5102 if (offset LTGT bound) goto body_bb; [*]
5104 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5105 chunk_no++;
5106 if (chunk < chunk_max) goto head_bb;
5108 <exit_bb> [incoming]
5109 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5111 [*] Needed if V live at end of loop. */
5113 static void
5114 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5116 tree v = fd->loop.v;
5117 enum tree_code cond_code = fd->loop.cond_code;
5118 enum tree_code plus_code = PLUS_EXPR;
5120 tree chunk_size = integer_minus_one_node;
5121 tree gwv = integer_zero_node;
5122 tree iter_type = TREE_TYPE (v);
5123 tree diff_type = iter_type;
5124 tree plus_type = iter_type;
5125 struct oacc_collapse *counts = NULL;
5127 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5128 == GF_OMP_FOR_KIND_OACC_LOOP);
5129 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5130 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5132 if (POINTER_TYPE_P (iter_type))
5134 plus_code = POINTER_PLUS_EXPR;
5135 plus_type = sizetype;
5137 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5138 diff_type = signed_type_for (diff_type);
5139 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5140 diff_type = integer_type_node;
5142 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5143 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5144 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5145 basic_block bottom_bb = NULL;
5147 /* entry_bb has two sucessors; the branch edge is to the exit
5148 block, fallthrough edge to body. */
5149 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5150 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5152 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5153 body_bb, or to a block whose only successor is the body_bb. Its
5154 fallthrough successor is the final block (same as the branch
5155 successor of the entry_bb). */
5156 if (cont_bb)
5158 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5159 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5161 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5162 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5164 else
5165 gcc_assert (!gimple_in_ssa_p (cfun));
5167 /* The exit block only has entry_bb and cont_bb as predecessors. */
5168 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5170 tree chunk_no;
5171 tree chunk_max = NULL_TREE;
5172 tree bound, offset;
5173 tree step = create_tmp_var (diff_type, ".step");
5174 bool up = cond_code == LT_EXPR;
5175 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5176 bool chunking = !gimple_in_ssa_p (cfun);
5177 bool negating;
5179 /* Tiling vars. */
5180 tree tile_size = NULL_TREE;
5181 tree element_s = NULL_TREE;
5182 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5183 basic_block elem_body_bb = NULL;
5184 basic_block elem_cont_bb = NULL;
5186 /* SSA instances. */
5187 tree offset_incr = NULL_TREE;
5188 tree offset_init = NULL_TREE;
5190 gimple_stmt_iterator gsi;
5191 gassign *ass;
5192 gcall *call;
5193 gimple *stmt;
5194 tree expr;
5195 location_t loc;
5196 edge split, be, fte;
5198 /* Split the end of entry_bb to create head_bb. */
5199 split = split_block (entry_bb, last_stmt (entry_bb));
5200 basic_block head_bb = split->dest;
5201 entry_bb = split->src;
5203 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5204 gsi = gsi_last_nondebug_bb (entry_bb);
5205 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5206 loc = gimple_location (for_stmt);
5208 if (gimple_in_ssa_p (cfun))
5210 offset_init = gimple_omp_for_index (for_stmt, 0);
5211 gcc_assert (integer_zerop (fd->loop.n1));
5212 /* The SSA parallelizer does gang parallelism. */
5213 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5216 if (fd->collapse > 1 || fd->tiling)
5218 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5219 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5220 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5221 TREE_TYPE (fd->loop.n2), loc);
5223 if (SSA_VAR_P (fd->loop.n2))
5225 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5226 true, GSI_SAME_STMT);
5227 ass = gimple_build_assign (fd->loop.n2, total);
5228 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5232 tree b = fd->loop.n1;
5233 tree e = fd->loop.n2;
5234 tree s = fd->loop.step;
5236 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5237 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5239 /* Convert the step, avoiding possible unsigned->signed overflow. */
5240 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5241 if (negating)
5242 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5243 s = fold_convert (diff_type, s);
5244 if (negating)
5245 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5246 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5248 if (!chunking)
5249 chunk_size = integer_zero_node;
5250 expr = fold_convert (diff_type, chunk_size);
5251 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5252 NULL_TREE, true, GSI_SAME_STMT);
5254 if (fd->tiling)
5256 /* Determine the tile size and element step,
5257 modify the outer loop step size. */
5258 tile_size = create_tmp_var (diff_type, ".tile_size");
5259 expr = build_int_cst (diff_type, 1);
5260 for (int ix = 0; ix < fd->collapse; ix++)
5261 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5262 expr = force_gimple_operand_gsi (&gsi, expr, true,
5263 NULL_TREE, true, GSI_SAME_STMT);
5264 ass = gimple_build_assign (tile_size, expr);
5265 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5267 element_s = create_tmp_var (diff_type, ".element_s");
5268 ass = gimple_build_assign (element_s, s);
5269 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5271 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5272 s = force_gimple_operand_gsi (&gsi, expr, true,
5273 NULL_TREE, true, GSI_SAME_STMT);
5276 /* Determine the range, avoiding possible unsigned->signed overflow. */
5277 negating = !up && TYPE_UNSIGNED (iter_type);
5278 expr = fold_build2 (MINUS_EXPR, plus_type,
5279 fold_convert (plus_type, negating ? b : e),
5280 fold_convert (plus_type, negating ? e : b));
5281 expr = fold_convert (diff_type, expr);
5282 if (negating)
5283 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5284 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5285 NULL_TREE, true, GSI_SAME_STMT);
5287 chunk_no = build_int_cst (diff_type, 0);
5288 if (chunking)
5290 gcc_assert (!gimple_in_ssa_p (cfun));
5292 expr = chunk_no;
5293 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5294 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5296 ass = gimple_build_assign (chunk_no, expr);
5297 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5299 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5300 build_int_cst (integer_type_node,
5301 IFN_GOACC_LOOP_CHUNKS),
5302 dir, range, s, chunk_size, gwv);
5303 gimple_call_set_lhs (call, chunk_max);
5304 gimple_set_location (call, loc);
5305 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5307 else
5308 chunk_size = chunk_no;
5310 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5311 build_int_cst (integer_type_node,
5312 IFN_GOACC_LOOP_STEP),
5313 dir, range, s, chunk_size, gwv);
5314 gimple_call_set_lhs (call, step);
5315 gimple_set_location (call, loc);
5316 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5318 /* Remove the GIMPLE_OMP_FOR. */
5319 gsi_remove (&gsi, true);
5321 /* Fixup edges from head_bb. */
5322 be = BRANCH_EDGE (head_bb);
5323 fte = FALLTHRU_EDGE (head_bb);
5324 be->flags |= EDGE_FALSE_VALUE;
5325 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5327 basic_block body_bb = fte->dest;
5329 if (gimple_in_ssa_p (cfun))
5331 gsi = gsi_last_nondebug_bb (cont_bb);
5332 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5334 offset = gimple_omp_continue_control_use (cont_stmt);
5335 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5337 else
5339 offset = create_tmp_var (diff_type, ".offset");
5340 offset_init = offset_incr = offset;
5342 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5344 /* Loop offset & bound go into head_bb. */
5345 gsi = gsi_start_bb (head_bb);
5347 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5348 build_int_cst (integer_type_node,
5349 IFN_GOACC_LOOP_OFFSET),
5350 dir, range, s,
5351 chunk_size, gwv, chunk_no);
5352 gimple_call_set_lhs (call, offset_init);
5353 gimple_set_location (call, loc);
5354 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5356 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5357 build_int_cst (integer_type_node,
5358 IFN_GOACC_LOOP_BOUND),
5359 dir, range, s,
5360 chunk_size, gwv, offset_init);
5361 gimple_call_set_lhs (call, bound);
5362 gimple_set_location (call, loc);
5363 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5365 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5366 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5367 GSI_CONTINUE_LINKING);
5369 /* V assignment goes into body_bb. */
5370 if (!gimple_in_ssa_p (cfun))
5372 gsi = gsi_start_bb (body_bb);
5374 expr = build2 (plus_code, iter_type, b,
5375 fold_convert (plus_type, offset));
5376 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5377 true, GSI_SAME_STMT);
5378 ass = gimple_build_assign (v, expr);
5379 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5381 if (fd->collapse > 1 || fd->tiling)
5382 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5384 if (fd->tiling)
5386 /* Determine the range of the element loop -- usually simply
5387 the tile_size, but could be smaller if the final
5388 iteration of the outer loop is a partial tile. */
5389 tree e_range = create_tmp_var (diff_type, ".e_range");
5391 expr = build2 (MIN_EXPR, diff_type,
5392 build2 (MINUS_EXPR, diff_type, bound, offset),
5393 build2 (MULT_EXPR, diff_type, tile_size,
5394 element_s));
5395 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5396 true, GSI_SAME_STMT);
5397 ass = gimple_build_assign (e_range, expr);
5398 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5400 /* Determine bound, offset & step of inner loop. */
5401 e_bound = create_tmp_var (diff_type, ".e_bound");
5402 e_offset = create_tmp_var (diff_type, ".e_offset");
5403 e_step = create_tmp_var (diff_type, ".e_step");
5405 /* Mark these as element loops. */
5406 tree t, e_gwv = integer_minus_one_node;
5407 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5409 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5410 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5411 element_s, chunk, e_gwv, chunk);
5412 gimple_call_set_lhs (call, e_offset);
5413 gimple_set_location (call, loc);
5414 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5416 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5417 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5418 element_s, chunk, e_gwv, e_offset);
5419 gimple_call_set_lhs (call, e_bound);
5420 gimple_set_location (call, loc);
5421 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5423 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5424 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5425 element_s, chunk, e_gwv);
5426 gimple_call_set_lhs (call, e_step);
5427 gimple_set_location (call, loc);
5428 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5430 /* Add test and split block. */
5431 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5432 stmt = gimple_build_cond_empty (expr);
5433 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5434 split = split_block (body_bb, stmt);
5435 elem_body_bb = split->dest;
5436 if (cont_bb == body_bb)
5437 cont_bb = elem_body_bb;
5438 body_bb = split->src;
5440 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5442 /* Initialize the user's loop vars. */
5443 gsi = gsi_start_bb (elem_body_bb);
5444 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5448 /* Loop increment goes into cont_bb. If this is not a loop, we
5449 will have spawned threads as if it was, and each one will
5450 execute one iteration. The specification is not explicit about
5451 whether such constructs are ill-formed or not, and they can
5452 occur, especially when noreturn routines are involved. */
5453 if (cont_bb)
5455 gsi = gsi_last_nondebug_bb (cont_bb);
5456 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5457 loc = gimple_location (cont_stmt);
5459 if (fd->tiling)
5461 /* Insert element loop increment and test. */
5462 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5463 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5464 true, GSI_SAME_STMT);
5465 ass = gimple_build_assign (e_offset, expr);
5466 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5467 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5469 stmt = gimple_build_cond_empty (expr);
5470 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5471 split = split_block (cont_bb, stmt);
5472 elem_cont_bb = split->src;
5473 cont_bb = split->dest;
5475 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5476 split->probability = profile_probability::unlikely ().guessed ();
5477 edge latch_edge
5478 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5479 latch_edge->probability = profile_probability::likely ().guessed ();
5481 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5482 skip_edge->probability = profile_probability::unlikely ().guessed ();
5483 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5484 loop_entry_edge->probability
5485 = profile_probability::likely ().guessed ();
5487 gsi = gsi_for_stmt (cont_stmt);
5490 /* Increment offset. */
5491 if (gimple_in_ssa_p (cfun))
5492 expr = build2 (plus_code, iter_type, offset,
5493 fold_convert (plus_type, step));
5494 else
5495 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5496 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5497 true, GSI_SAME_STMT);
5498 ass = gimple_build_assign (offset_incr, expr);
5499 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5500 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5501 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5503 /* Remove the GIMPLE_OMP_CONTINUE. */
5504 gsi_remove (&gsi, true);
5506 /* Fixup edges from cont_bb. */
5507 be = BRANCH_EDGE (cont_bb);
5508 fte = FALLTHRU_EDGE (cont_bb);
5509 be->flags |= EDGE_TRUE_VALUE;
5510 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5512 if (chunking)
5514 /* Split the beginning of exit_bb to make bottom_bb. We
5515 need to insert a nop at the start, because splitting is
5516 after a stmt, not before. */
5517 gsi = gsi_start_bb (exit_bb);
5518 stmt = gimple_build_nop ();
5519 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5520 split = split_block (exit_bb, stmt);
5521 bottom_bb = split->src;
5522 exit_bb = split->dest;
5523 gsi = gsi_last_bb (bottom_bb);
5525 /* Chunk increment and test goes into bottom_bb. */
5526 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5527 build_int_cst (diff_type, 1));
5528 ass = gimple_build_assign (chunk_no, expr);
5529 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5531 /* Chunk test at end of bottom_bb. */
5532 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5533 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5534 GSI_CONTINUE_LINKING);
5536 /* Fixup edges from bottom_bb. */
5537 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5538 split->probability = profile_probability::unlikely ().guessed ();
5539 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5540 latch_edge->probability = profile_probability::likely ().guessed ();
5544 gsi = gsi_last_nondebug_bb (exit_bb);
5545 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5546 loc = gimple_location (gsi_stmt (gsi));
5548 if (!gimple_in_ssa_p (cfun))
5550 /* Insert the final value of V, in case it is live. This is the
5551 value for the only thread that survives past the join. */
5552 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5553 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5554 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5555 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5556 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5557 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5558 true, GSI_SAME_STMT);
5559 ass = gimple_build_assign (v, expr);
5560 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5563 /* Remove the OMP_RETURN. */
5564 gsi_remove (&gsi, true);
5566 if (cont_bb)
5568 /* We now have one, two or three nested loops. Update the loop
5569 structures. */
5570 struct loop *parent = entry_bb->loop_father;
5571 struct loop *body = body_bb->loop_father;
5573 if (chunking)
5575 struct loop *chunk_loop = alloc_loop ();
5576 chunk_loop->header = head_bb;
5577 chunk_loop->latch = bottom_bb;
5578 add_loop (chunk_loop, parent);
5579 parent = chunk_loop;
5581 else if (parent != body)
5583 gcc_assert (body->header == body_bb);
5584 gcc_assert (body->latch == cont_bb
5585 || single_pred (body->latch) == cont_bb);
5586 parent = NULL;
5589 if (parent)
5591 struct loop *body_loop = alloc_loop ();
5592 body_loop->header = body_bb;
5593 body_loop->latch = cont_bb;
5594 add_loop (body_loop, parent);
5596 if (fd->tiling)
5598 /* Insert tiling's element loop. */
5599 struct loop *inner_loop = alloc_loop ();
5600 inner_loop->header = elem_body_bb;
5601 inner_loop->latch = elem_cont_bb;
5602 add_loop (inner_loop, body_loop);
5608 /* Expand the OMP loop defined by REGION. */
5610 static void
5611 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5613 struct omp_for_data fd;
5614 struct omp_for_data_loop *loops;
5616 loops
5617 = (struct omp_for_data_loop *)
5618 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5619 * sizeof (struct omp_for_data_loop));
5620 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5621 &fd, loops);
5622 region->sched_kind = fd.sched_kind;
5623 region->sched_modifiers = fd.sched_modifiers;
5625 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5626 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5627 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5628 if (region->cont)
5630 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5631 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5632 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5634 else
5635 /* If there isn't a continue then this is a degerate case where
5636 the introduction of abnormal edges during lowering will prevent
5637 original loops from being detected. Fix that up. */
5638 loops_state_set (LOOPS_NEED_FIXUP);
5640 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5641 expand_omp_simd (region, &fd);
5642 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5644 gcc_assert (!inner_stmt);
5645 expand_oacc_for (region, &fd);
5647 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5649 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5650 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5651 else
5652 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5654 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5655 && !fd.have_ordered)
5657 if (fd.chunk_size == NULL)
5658 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5659 else
5660 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5662 else
5664 int fn_index, start_ix, next_ix;
5666 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5667 == GF_OMP_FOR_KIND_FOR);
5668 if (fd.chunk_size == NULL
5669 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5670 fd.chunk_size = integer_zero_node;
5671 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5672 switch (fd.sched_kind)
5674 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5675 fn_index = 3;
5676 break;
5677 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5678 case OMP_CLAUSE_SCHEDULE_GUIDED:
5679 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5680 && !fd.ordered
5681 && !fd.have_ordered)
5683 fn_index = 3 + fd.sched_kind;
5684 break;
5686 /* FALLTHRU */
5687 default:
5688 fn_index = fd.sched_kind;
5689 break;
5691 if (!fd.ordered)
5692 fn_index += fd.have_ordered * 6;
5693 if (fd.ordered)
5694 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5695 else
5696 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5697 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5698 if (fd.iter_type == long_long_unsigned_type_node)
5700 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5701 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5702 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5703 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5705 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5706 (enum built_in_function) next_ix, inner_stmt);
5709 if (gimple_in_ssa_p (cfun))
5710 update_ssa (TODO_update_ssa_only_virtuals);
5713 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5715 v = GOMP_sections_start (n);
5717 switch (v)
5719 case 0:
5720 goto L2;
5721 case 1:
5722 section 1;
5723 goto L1;
5724 case 2:
5726 case n:
5728 default:
5729 abort ();
5732 v = GOMP_sections_next ();
5733 goto L0;
5735 reduction;
5737 If this is a combined parallel sections, replace the call to
5738 GOMP_sections_start with call to GOMP_sections_next. */
5740 static void
5741 expand_omp_sections (struct omp_region *region)
5743 tree t, u, vin = NULL, vmain, vnext, l2;
5744 unsigned len;
5745 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5746 gimple_stmt_iterator si, switch_si;
5747 gomp_sections *sections_stmt;
5748 gimple *stmt;
5749 gomp_continue *cont;
5750 edge_iterator ei;
5751 edge e;
5752 struct omp_region *inner;
5753 unsigned i, casei;
5754 bool exit_reachable = region->cont != NULL;
5756 gcc_assert (region->exit != NULL);
5757 entry_bb = region->entry;
5758 l0_bb = single_succ (entry_bb);
5759 l1_bb = region->cont;
5760 l2_bb = region->exit;
5761 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5762 l2 = gimple_block_label (l2_bb);
5763 else
5765 /* This can happen if there are reductions. */
5766 len = EDGE_COUNT (l0_bb->succs);
5767 gcc_assert (len > 0);
5768 e = EDGE_SUCC (l0_bb, len - 1);
5769 si = gsi_last_nondebug_bb (e->dest);
5770 l2 = NULL_TREE;
5771 if (gsi_end_p (si)
5772 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5773 l2 = gimple_block_label (e->dest);
5774 else
5775 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5777 si = gsi_last_nondebug_bb (e->dest);
5778 if (gsi_end_p (si)
5779 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5781 l2 = gimple_block_label (e->dest);
5782 break;
5786 if (exit_reachable)
5787 default_bb = create_empty_bb (l1_bb->prev_bb);
5788 else
5789 default_bb = create_empty_bb (l0_bb);
5791 /* We will build a switch() with enough cases for all the
5792 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5793 and a default case to abort if something goes wrong. */
5794 len = EDGE_COUNT (l0_bb->succs);
5796 /* Use vec::quick_push on label_vec throughout, since we know the size
5797 in advance. */
5798 auto_vec<tree> label_vec (len);
5800 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5801 GIMPLE_OMP_SECTIONS statement. */
5802 si = gsi_last_nondebug_bb (entry_bb);
5803 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5804 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5805 vin = gimple_omp_sections_control (sections_stmt);
5806 if (!is_combined_parallel (region))
5808 /* If we are not inside a combined parallel+sections region,
5809 call GOMP_sections_start. */
5810 t = build_int_cst (unsigned_type_node, len - 1);
5811 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5812 stmt = gimple_build_call (u, 1, t);
5814 else
5816 /* Otherwise, call GOMP_sections_next. */
5817 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5818 stmt = gimple_build_call (u, 0);
5820 gimple_call_set_lhs (stmt, vin);
5821 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5822 gsi_remove (&si, true);
5824 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5825 L0_BB. */
5826 switch_si = gsi_last_nondebug_bb (l0_bb);
5827 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5828 if (exit_reachable)
5830 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5831 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5832 vmain = gimple_omp_continue_control_use (cont);
5833 vnext = gimple_omp_continue_control_def (cont);
5835 else
5837 vmain = vin;
5838 vnext = NULL_TREE;
5841 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5842 label_vec.quick_push (t);
5843 i = 1;
5845 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5846 for (inner = region->inner, casei = 1;
5847 inner;
5848 inner = inner->next, i++, casei++)
5850 basic_block s_entry_bb, s_exit_bb;
5852 /* Skip optional reduction region. */
5853 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5855 --i;
5856 --casei;
5857 continue;
5860 s_entry_bb = inner->entry;
5861 s_exit_bb = inner->exit;
5863 t = gimple_block_label (s_entry_bb);
5864 u = build_int_cst (unsigned_type_node, casei);
5865 u = build_case_label (u, NULL, t);
5866 label_vec.quick_push (u);
5868 si = gsi_last_nondebug_bb (s_entry_bb);
5869 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5870 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5871 gsi_remove (&si, true);
5872 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5874 if (s_exit_bb == NULL)
5875 continue;
5877 si = gsi_last_nondebug_bb (s_exit_bb);
5878 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5879 gsi_remove (&si, true);
5881 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5884 /* Error handling code goes in DEFAULT_BB. */
5885 t = gimple_block_label (default_bb);
5886 u = build_case_label (NULL, NULL, t);
5887 make_edge (l0_bb, default_bb, 0);
5888 add_bb_to_loop (default_bb, current_loops->tree_root);
5890 stmt = gimple_build_switch (vmain, u, label_vec);
5891 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5892 gsi_remove (&switch_si, true);
5894 si = gsi_start_bb (default_bb);
5895 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5896 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5898 if (exit_reachable)
5900 tree bfn_decl;
5902 /* Code to get the next section goes in L1_BB. */
5903 si = gsi_last_nondebug_bb (l1_bb);
5904 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5906 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5907 stmt = gimple_build_call (bfn_decl, 0);
5908 gimple_call_set_lhs (stmt, vnext);
5909 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5910 gsi_remove (&si, true);
5912 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5915 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5916 si = gsi_last_nondebug_bb (l2_bb);
5917 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5918 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5919 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5920 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5921 else
5922 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5923 stmt = gimple_build_call (t, 0);
5924 if (gimple_omp_return_lhs (gsi_stmt (si)))
5925 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5926 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5927 gsi_remove (&si, true);
5929 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5932 /* Expand code for an OpenMP single directive. We've already expanded
5933 much of the code, here we simply place the GOMP_barrier call. */
5935 static void
5936 expand_omp_single (struct omp_region *region)
5938 basic_block entry_bb, exit_bb;
5939 gimple_stmt_iterator si;
5941 entry_bb = region->entry;
5942 exit_bb = region->exit;
5944 si = gsi_last_nondebug_bb (entry_bb);
5945 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5946 gsi_remove (&si, true);
5947 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5949 si = gsi_last_nondebug_bb (exit_bb);
5950 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5952 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5953 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5955 gsi_remove (&si, true);
5956 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5959 /* Generic expansion for OpenMP synchronization directives: master,
5960 ordered and critical. All we need to do here is remove the entry
5961 and exit markers for REGION. */
5963 static void
5964 expand_omp_synch (struct omp_region *region)
5966 basic_block entry_bb, exit_bb;
5967 gimple_stmt_iterator si;
5969 entry_bb = region->entry;
5970 exit_bb = region->exit;
5972 si = gsi_last_nondebug_bb (entry_bb);
5973 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5974 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5975 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5976 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5977 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5978 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5979 gsi_remove (&si, true);
5980 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5982 if (exit_bb)
5984 si = gsi_last_nondebug_bb (exit_bb);
5985 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5986 gsi_remove (&si, true);
5987 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5991 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
5992 operation as a normal volatile load. */
5994 static bool
5995 expand_omp_atomic_load (basic_block load_bb, tree addr,
5996 tree loaded_val, int index)
5998 enum built_in_function tmpbase;
5999 gimple_stmt_iterator gsi;
6000 basic_block store_bb;
6001 location_t loc;
6002 gimple *stmt;
6003 tree decl, call, type, itype;
6005 gsi = gsi_last_nondebug_bb (load_bb);
6006 stmt = gsi_stmt (gsi);
6007 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6008 loc = gimple_location (stmt);
6010 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6011 is smaller than word size, then expand_atomic_load assumes that the load
6012 is atomic. We could avoid the builtin entirely in this case. */
6014 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6015 decl = builtin_decl_explicit (tmpbase);
6016 if (decl == NULL_TREE)
6017 return false;
6019 type = TREE_TYPE (loaded_val);
6020 itype = TREE_TYPE (TREE_TYPE (decl));
6022 call = build_call_expr_loc (loc, decl, 2, addr,
6023 build_int_cst (NULL,
6024 gimple_omp_atomic_seq_cst_p (stmt)
6025 ? MEMMODEL_SEQ_CST
6026 : MEMMODEL_RELAXED));
6027 if (!useless_type_conversion_p (type, itype))
6028 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6029 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6031 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6032 gsi_remove (&gsi, true);
6034 store_bb = single_succ (load_bb);
6035 gsi = gsi_last_nondebug_bb (store_bb);
6036 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6037 gsi_remove (&gsi, true);
6039 if (gimple_in_ssa_p (cfun))
6040 update_ssa (TODO_update_ssa_no_phi);
6042 return true;
6045 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6046 operation as a normal volatile store. */
6048 static bool
6049 expand_omp_atomic_store (basic_block load_bb, tree addr,
6050 tree loaded_val, tree stored_val, int index)
6052 enum built_in_function tmpbase;
6053 gimple_stmt_iterator gsi;
6054 basic_block store_bb = single_succ (load_bb);
6055 location_t loc;
6056 gimple *stmt;
6057 tree decl, call, type, itype;
6058 machine_mode imode;
6059 bool exchange;
6061 gsi = gsi_last_nondebug_bb (load_bb);
6062 stmt = gsi_stmt (gsi);
6063 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6065 /* If the load value is needed, then this isn't a store but an exchange. */
6066 exchange = gimple_omp_atomic_need_value_p (stmt);
6068 gsi = gsi_last_nondebug_bb (store_bb);
6069 stmt = gsi_stmt (gsi);
6070 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6071 loc = gimple_location (stmt);
6073 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6074 is smaller than word size, then expand_atomic_store assumes that the store
6075 is atomic. We could avoid the builtin entirely in this case. */
6077 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6078 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6079 decl = builtin_decl_explicit (tmpbase);
6080 if (decl == NULL_TREE)
6081 return false;
6083 type = TREE_TYPE (stored_val);
6085 /* Dig out the type of the function's second argument. */
6086 itype = TREE_TYPE (decl);
6087 itype = TYPE_ARG_TYPES (itype);
6088 itype = TREE_CHAIN (itype);
6089 itype = TREE_VALUE (itype);
6090 imode = TYPE_MODE (itype);
6092 if (exchange && !can_atomic_exchange_p (imode, true))
6093 return false;
6095 if (!useless_type_conversion_p (itype, type))
6096 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6097 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6098 build_int_cst (NULL,
6099 gimple_omp_atomic_seq_cst_p (stmt)
6100 ? MEMMODEL_SEQ_CST
6101 : MEMMODEL_RELAXED));
6102 if (exchange)
6104 if (!useless_type_conversion_p (type, itype))
6105 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6106 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6109 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6110 gsi_remove (&gsi, true);
6112 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6113 gsi = gsi_last_nondebug_bb (load_bb);
6114 gsi_remove (&gsi, true);
6116 if (gimple_in_ssa_p (cfun))
6117 update_ssa (TODO_update_ssa_no_phi);
6119 return true;
6122 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6123 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6124 size of the data type, and thus usable to find the index of the builtin
6125 decl. Returns false if the expression is not of the proper form. */
6127 static bool
6128 expand_omp_atomic_fetch_op (basic_block load_bb,
6129 tree addr, tree loaded_val,
6130 tree stored_val, int index)
6132 enum built_in_function oldbase, newbase, tmpbase;
6133 tree decl, itype, call;
6134 tree lhs, rhs;
6135 basic_block store_bb = single_succ (load_bb);
6136 gimple_stmt_iterator gsi;
6137 gimple *stmt;
6138 location_t loc;
6139 enum tree_code code;
6140 bool need_old, need_new;
6141 machine_mode imode;
6142 bool seq_cst;
6144 /* We expect to find the following sequences:
6146 load_bb:
6147 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6149 store_bb:
6150 val = tmp OP something; (or: something OP tmp)
6151 GIMPLE_OMP_STORE (val)
6153 ???FIXME: Allow a more flexible sequence.
6154 Perhaps use data flow to pick the statements.
6158 gsi = gsi_after_labels (store_bb);
6159 stmt = gsi_stmt (gsi);
6160 if (is_gimple_debug (stmt))
6162 gsi_next_nondebug (&gsi);
6163 if (gsi_end_p (gsi))
6164 return false;
6165 stmt = gsi_stmt (gsi);
6167 loc = gimple_location (stmt);
6168 if (!is_gimple_assign (stmt))
6169 return false;
6170 gsi_next_nondebug (&gsi);
6171 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6172 return false;
6173 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6174 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6175 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6176 gcc_checking_assert (!need_old || !need_new);
6178 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6179 return false;
6181 /* Check for one of the supported fetch-op operations. */
6182 code = gimple_assign_rhs_code (stmt);
6183 switch (code)
6185 case PLUS_EXPR:
6186 case POINTER_PLUS_EXPR:
6187 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6188 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6189 break;
6190 case MINUS_EXPR:
6191 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6192 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6193 break;
6194 case BIT_AND_EXPR:
6195 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6196 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6197 break;
6198 case BIT_IOR_EXPR:
6199 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6200 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6201 break;
6202 case BIT_XOR_EXPR:
6203 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6204 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6205 break;
6206 default:
6207 return false;
6210 /* Make sure the expression is of the proper form. */
6211 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6212 rhs = gimple_assign_rhs2 (stmt);
6213 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6214 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6215 rhs = gimple_assign_rhs1 (stmt);
6216 else
6217 return false;
6219 tmpbase = ((enum built_in_function)
6220 ((need_new ? newbase : oldbase) + index + 1));
6221 decl = builtin_decl_explicit (tmpbase);
6222 if (decl == NULL_TREE)
6223 return false;
6224 itype = TREE_TYPE (TREE_TYPE (decl));
6225 imode = TYPE_MODE (itype);
6227 /* We could test all of the various optabs involved, but the fact of the
6228 matter is that (with the exception of i486 vs i586 and xadd) all targets
6229 that support any atomic operaton optab also implements compare-and-swap.
6230 Let optabs.c take care of expanding any compare-and-swap loop. */
6231 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6232 return false;
6234 gsi = gsi_last_nondebug_bb (load_bb);
6235 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6237 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6238 It only requires that the operation happen atomically. Thus we can
6239 use the RELAXED memory model. */
6240 call = build_call_expr_loc (loc, decl, 3, addr,
6241 fold_convert_loc (loc, itype, rhs),
6242 build_int_cst (NULL,
6243 seq_cst ? MEMMODEL_SEQ_CST
6244 : MEMMODEL_RELAXED));
6246 if (need_old || need_new)
6248 lhs = need_old ? loaded_val : stored_val;
6249 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6250 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6252 else
6253 call = fold_convert_loc (loc, void_type_node, call);
6254 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6255 gsi_remove (&gsi, true);
6257 gsi = gsi_last_nondebug_bb (store_bb);
6258 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6259 gsi_remove (&gsi, true);
6260 gsi = gsi_last_nondebug_bb (store_bb);
6261 stmt = gsi_stmt (gsi);
6262 gsi_remove (&gsi, true);
6264 if (gimple_in_ssa_p (cfun))
6266 release_defs (stmt);
6267 update_ssa (TODO_update_ssa_no_phi);
6270 return true;
6273 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6275 oldval = *addr;
6276 repeat:
6277 newval = rhs; // with oldval replacing *addr in rhs
6278 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6279 if (oldval != newval)
6280 goto repeat;
6282 INDEX is log2 of the size of the data type, and thus usable to find the
6283 index of the builtin decl. */
6285 static bool
6286 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6287 tree addr, tree loaded_val, tree stored_val,
6288 int index)
6290 tree loadedi, storedi, initial, new_storedi, old_vali;
6291 tree type, itype, cmpxchg, iaddr, atype;
6292 gimple_stmt_iterator si;
6293 basic_block loop_header = single_succ (load_bb);
6294 gimple *phi, *stmt;
6295 edge e;
6296 enum built_in_function fncode;
6298 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6299 order to use the RELAXED memory model effectively. */
6300 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6301 + index + 1);
6302 cmpxchg = builtin_decl_explicit (fncode);
6303 if (cmpxchg == NULL_TREE)
6304 return false;
6305 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6306 atype = type;
6307 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6309 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6310 || !can_atomic_load_p (TYPE_MODE (itype)))
6311 return false;
6313 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6314 si = gsi_last_nondebug_bb (load_bb);
6315 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6317 /* For floating-point values, we'll need to view-convert them to integers
6318 so that we can perform the atomic compare and swap. Simplify the
6319 following code by always setting up the "i"ntegral variables. */
6320 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6322 tree iaddr_val;
6324 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6325 true));
6326 atype = itype;
6327 iaddr_val
6328 = force_gimple_operand_gsi (&si,
6329 fold_convert (TREE_TYPE (iaddr), addr),
6330 false, NULL_TREE, true, GSI_SAME_STMT);
6331 stmt = gimple_build_assign (iaddr, iaddr_val);
6332 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6333 loadedi = create_tmp_var (itype);
6334 if (gimple_in_ssa_p (cfun))
6335 loadedi = make_ssa_name (loadedi);
6337 else
6339 iaddr = addr;
6340 loadedi = loaded_val;
6343 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6344 tree loaddecl = builtin_decl_explicit (fncode);
6345 if (loaddecl)
6346 initial
6347 = fold_convert (atype,
6348 build_call_expr (loaddecl, 2, iaddr,
6349 build_int_cst (NULL_TREE,
6350 MEMMODEL_RELAXED)));
6351 else
6353 tree off
6354 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6355 true), 0);
6356 initial = build2 (MEM_REF, atype, iaddr, off);
6359 initial
6360 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6361 GSI_SAME_STMT);
6363 /* Move the value to the LOADEDI temporary. */
6364 if (gimple_in_ssa_p (cfun))
6366 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6367 phi = create_phi_node (loadedi, loop_header);
6368 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6369 initial);
6371 else
6372 gsi_insert_before (&si,
6373 gimple_build_assign (loadedi, initial),
6374 GSI_SAME_STMT);
6375 if (loadedi != loaded_val)
6377 gimple_stmt_iterator gsi2;
6378 tree x;
6380 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6381 gsi2 = gsi_start_bb (loop_header);
6382 if (gimple_in_ssa_p (cfun))
6384 gassign *stmt;
6385 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6386 true, GSI_SAME_STMT);
6387 stmt = gimple_build_assign (loaded_val, x);
6388 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6390 else
6392 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6393 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6394 true, GSI_SAME_STMT);
6397 gsi_remove (&si, true);
6399 si = gsi_last_nondebug_bb (store_bb);
6400 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6402 if (iaddr == addr)
6403 storedi = stored_val;
6404 else
6405 storedi
6406 = force_gimple_operand_gsi (&si,
6407 build1 (VIEW_CONVERT_EXPR, itype,
6408 stored_val), true, NULL_TREE, true,
6409 GSI_SAME_STMT);
6411 /* Build the compare&swap statement. */
6412 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6413 new_storedi = force_gimple_operand_gsi (&si,
6414 fold_convert (TREE_TYPE (loadedi),
6415 new_storedi),
6416 true, NULL_TREE,
6417 true, GSI_SAME_STMT);
6419 if (gimple_in_ssa_p (cfun))
6420 old_vali = loadedi;
6421 else
6423 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6424 stmt = gimple_build_assign (old_vali, loadedi);
6425 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6427 stmt = gimple_build_assign (loadedi, new_storedi);
6428 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6431 /* Note that we always perform the comparison as an integer, even for
6432 floating point. This allows the atomic operation to properly
6433 succeed even with NaNs and -0.0. */
6434 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6435 stmt = gimple_build_cond_empty (ne);
6436 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6438 /* Update cfg. */
6439 e = single_succ_edge (store_bb);
6440 e->flags &= ~EDGE_FALLTHRU;
6441 e->flags |= EDGE_FALSE_VALUE;
6442 /* Expect no looping. */
6443 e->probability = profile_probability::guessed_always ();
6445 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6446 e->probability = profile_probability::guessed_never ();
6448 /* Copy the new value to loadedi (we already did that before the condition
6449 if we are not in SSA). */
6450 if (gimple_in_ssa_p (cfun))
6452 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6453 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6456 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6457 gsi_remove (&si, true);
6459 struct loop *loop = alloc_loop ();
6460 loop->header = loop_header;
6461 loop->latch = store_bb;
6462 add_loop (loop, loop_header->loop_father);
6464 if (gimple_in_ssa_p (cfun))
6465 update_ssa (TODO_update_ssa_no_phi);
6467 return true;
6470 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6472 GOMP_atomic_start ();
6473 *addr = rhs;
6474 GOMP_atomic_end ();
6476 The result is not globally atomic, but works so long as all parallel
6477 references are within #pragma omp atomic directives. According to
6478 responses received from omp@openmp.org, appears to be within spec.
6479 Which makes sense, since that's how several other compilers handle
6480 this situation as well.
6481 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6482 expanding. STORED_VAL is the operand of the matching
6483 GIMPLE_OMP_ATOMIC_STORE.
6485 We replace
6486 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6487 loaded_val = *addr;
6489 and replace
6490 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6491 *addr = stored_val;
6494 static bool
6495 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6496 tree addr, tree loaded_val, tree stored_val)
6498 gimple_stmt_iterator si;
6499 gassign *stmt;
6500 tree t;
6502 si = gsi_last_nondebug_bb (load_bb);
6503 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6505 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6506 t = build_call_expr (t, 0);
6507 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6509 tree mem = build_simple_mem_ref (addr);
6510 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6511 TREE_OPERAND (mem, 1)
6512 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6513 true),
6514 TREE_OPERAND (mem, 1));
6515 stmt = gimple_build_assign (loaded_val, mem);
6516 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6517 gsi_remove (&si, true);
6519 si = gsi_last_nondebug_bb (store_bb);
6520 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6522 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6523 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6525 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6526 t = build_call_expr (t, 0);
6527 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6528 gsi_remove (&si, true);
6530 if (gimple_in_ssa_p (cfun))
6531 update_ssa (TODO_update_ssa_no_phi);
6532 return true;
6535 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6536 using expand_omp_atomic_fetch_op. If it failed, we try to
6537 call expand_omp_atomic_pipeline, and if it fails too, the
6538 ultimate fallback is wrapping the operation in a mutex
6539 (expand_omp_atomic_mutex). REGION is the atomic region built
6540 by build_omp_regions_1(). */
6542 static void
6543 expand_omp_atomic (struct omp_region *region)
6545 basic_block load_bb = region->entry, store_bb = region->exit;
6546 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6547 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6548 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6549 tree addr = gimple_omp_atomic_load_rhs (load);
6550 tree stored_val = gimple_omp_atomic_store_val (store);
6551 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6552 HOST_WIDE_INT index;
6554 /* Make sure the type is one of the supported sizes. */
6555 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6556 index = exact_log2 (index);
6557 if (index >= 0 && index <= 4)
6559 unsigned int align = TYPE_ALIGN_UNIT (type);
6561 /* __sync builtins require strict data alignment. */
6562 if (exact_log2 (align) >= index)
6564 /* Atomic load. */
6565 scalar_mode smode;
6566 if (loaded_val == stored_val
6567 && (is_int_mode (TYPE_MODE (type), &smode)
6568 || is_float_mode (TYPE_MODE (type), &smode))
6569 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6570 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6571 return;
6573 /* Atomic store. */
6574 if ((is_int_mode (TYPE_MODE (type), &smode)
6575 || is_float_mode (TYPE_MODE (type), &smode))
6576 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6577 && store_bb == single_succ (load_bb)
6578 && first_stmt (store_bb) == store
6579 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6580 stored_val, index))
6581 return;
6583 /* When possible, use specialized atomic update functions. */
6584 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6585 && store_bb == single_succ (load_bb)
6586 && expand_omp_atomic_fetch_op (load_bb, addr,
6587 loaded_val, stored_val, index))
6588 return;
6590 /* If we don't have specialized __sync builtins, try and implement
6591 as a compare and swap loop. */
6592 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6593 loaded_val, stored_val, index))
6594 return;
6598 /* The ultimate fallback is wrapping the operation in a mutex. */
6599 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6602 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6603 at REGION_EXIT. */
6605 static void
6606 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6607 basic_block region_exit)
6609 struct loop *outer = region_entry->loop_father;
6610 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6612 /* Don't parallelize the kernels region if it contains more than one outer
6613 loop. */
6614 unsigned int nr_outer_loops = 0;
6615 struct loop *single_outer = NULL;
6616 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6618 gcc_assert (loop_outer (loop) == outer);
6620 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6621 continue;
6623 if (region_exit != NULL
6624 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6625 continue;
6627 nr_outer_loops++;
6628 single_outer = loop;
6630 if (nr_outer_loops != 1)
6631 return;
6633 for (struct loop *loop = single_outer->inner;
6634 loop != NULL;
6635 loop = loop->inner)
6636 if (loop->next)
6637 return;
6639 /* Mark the loops in the region. */
6640 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6641 loop->in_oacc_kernels_region = true;
6644 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6646 struct GTY(()) grid_launch_attributes_trees
6648 tree kernel_dim_array_type;
6649 tree kernel_lattrs_dimnum_decl;
6650 tree kernel_lattrs_grid_decl;
6651 tree kernel_lattrs_group_decl;
6652 tree kernel_launch_attributes_type;
6655 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6657 /* Create types used to pass kernel launch attributes to target. */
6659 static void
6660 grid_create_kernel_launch_attr_types (void)
6662 if (grid_attr_trees)
6663 return;
6664 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6666 tree dim_arr_index_type
6667 = build_index_type (build_int_cst (integer_type_node, 2));
6668 grid_attr_trees->kernel_dim_array_type
6669 = build_array_type (uint32_type_node, dim_arr_index_type);
6671 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6672 grid_attr_trees->kernel_lattrs_dimnum_decl
6673 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6674 uint32_type_node);
6675 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6677 grid_attr_trees->kernel_lattrs_grid_decl
6678 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6679 grid_attr_trees->kernel_dim_array_type);
6680 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6681 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6682 grid_attr_trees->kernel_lattrs_group_decl
6683 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6684 grid_attr_trees->kernel_dim_array_type);
6685 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6686 = grid_attr_trees->kernel_lattrs_grid_decl;
6687 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6688 "__gomp_kernel_launch_attributes",
6689 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6692 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6693 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6694 of type uint32_type_node. */
6696 static void
6697 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6698 tree fld_decl, int index, tree value)
6700 tree ref = build4 (ARRAY_REF, uint32_type_node,
6701 build3 (COMPONENT_REF,
6702 grid_attr_trees->kernel_dim_array_type,
6703 range_var, fld_decl, NULL_TREE),
6704 build_int_cst (integer_type_node, index),
6705 NULL_TREE, NULL_TREE);
6706 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6709 /* Return a tree representation of a pointer to a structure with grid and
6710 work-group size information. Statements filling that information will be
6711 inserted before GSI, TGT_STMT is the target statement which has the
6712 necessary information in it. */
6714 static tree
6715 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6716 gomp_target *tgt_stmt)
6718 grid_create_kernel_launch_attr_types ();
6719 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6720 "__kernel_launch_attrs");
6722 unsigned max_dim = 0;
6723 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6724 clause;
6725 clause = OMP_CLAUSE_CHAIN (clause))
6727 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6728 continue;
6730 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6731 max_dim = MAX (dim, max_dim);
6733 grid_insert_store_range_dim (gsi, lattrs,
6734 grid_attr_trees->kernel_lattrs_grid_decl,
6735 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6736 grid_insert_store_range_dim (gsi, lattrs,
6737 grid_attr_trees->kernel_lattrs_group_decl,
6738 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6741 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6742 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6743 gcc_checking_assert (max_dim <= 2);
6744 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6745 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6746 GSI_SAME_STMT);
6747 TREE_ADDRESSABLE (lattrs) = 1;
6748 return build_fold_addr_expr (lattrs);
6751 /* Build target argument identifier from the DEVICE identifier, value
6752 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6754 static tree
6755 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6757 tree t = build_int_cst (integer_type_node, device);
6758 if (subseqent_param)
6759 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6760 build_int_cst (integer_type_node,
6761 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6762 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6763 build_int_cst (integer_type_node, id));
6764 return t;
6767 /* Like above but return it in type that can be directly stored as an element
6768 of the argument array. */
6770 static tree
6771 get_target_argument_identifier (int device, bool subseqent_param, int id)
6773 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6774 return fold_convert (ptr_type_node, t);
6777 /* Return a target argument consisting of DEVICE identifier, value identifier
6778 ID, and the actual VALUE. */
6780 static tree
6781 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6782 tree value)
6784 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6785 fold_convert (integer_type_node, value),
6786 build_int_cst (unsigned_type_node,
6787 GOMP_TARGET_ARG_VALUE_SHIFT));
6788 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6789 get_target_argument_identifier_1 (device, false, id));
6790 t = fold_convert (ptr_type_node, t);
6791 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6794 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6795 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6796 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6797 arguments. */
6799 static void
6800 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6801 int id, tree value, vec <tree> *args)
6803 if (tree_fits_shwi_p (value)
6804 && tree_to_shwi (value) > -(1 << 15)
6805 && tree_to_shwi (value) < (1 << 15))
6806 args->quick_push (get_target_argument_value (gsi, device, id, value));
6807 else
6809 args->quick_push (get_target_argument_identifier (device, true, id));
6810 value = fold_convert (ptr_type_node, value);
6811 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6812 GSI_SAME_STMT);
6813 args->quick_push (value);
6817 /* Create an array of arguments that is then passed to GOMP_target. */
6819 static tree
6820 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6822 auto_vec <tree, 6> args;
6823 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6824 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6825 if (c)
6826 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6827 else
6828 t = integer_minus_one_node;
6829 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6830 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6832 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6833 if (c)
6834 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6835 else
6836 t = integer_minus_one_node;
6837 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6838 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6839 &args);
6841 /* Add HSA-specific grid sizes, if available. */
6842 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6843 OMP_CLAUSE__GRIDDIM_))
6845 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6846 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6847 args.quick_push (t);
6848 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6851 /* Produce more, perhaps device specific, arguments here. */
6853 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6854 args.length () + 1),
6855 ".omp_target_args");
6856 for (unsigned i = 0; i < args.length (); i++)
6858 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6859 build_int_cst (integer_type_node, i),
6860 NULL_TREE, NULL_TREE);
6861 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6862 GSI_SAME_STMT);
6864 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6865 build_int_cst (integer_type_node, args.length ()),
6866 NULL_TREE, NULL_TREE);
6867 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6868 GSI_SAME_STMT);
6869 TREE_ADDRESSABLE (argarray) = 1;
6870 return build_fold_addr_expr (argarray);
6873 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6875 static void
6876 expand_omp_target (struct omp_region *region)
6878 basic_block entry_bb, exit_bb, new_bb;
6879 struct function *child_cfun;
6880 tree child_fn, block, t;
6881 gimple_stmt_iterator gsi;
6882 gomp_target *entry_stmt;
6883 gimple *stmt;
6884 edge e;
6885 bool offloaded, data_region;
6887 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6888 new_bb = region->entry;
6890 offloaded = is_gimple_omp_offloaded (entry_stmt);
6891 switch (gimple_omp_target_kind (entry_stmt))
6893 case GF_OMP_TARGET_KIND_REGION:
6894 case GF_OMP_TARGET_KIND_UPDATE:
6895 case GF_OMP_TARGET_KIND_ENTER_DATA:
6896 case GF_OMP_TARGET_KIND_EXIT_DATA:
6897 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6898 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6899 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6900 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6901 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6902 data_region = false;
6903 break;
6904 case GF_OMP_TARGET_KIND_DATA:
6905 case GF_OMP_TARGET_KIND_OACC_DATA:
6906 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6907 data_region = true;
6908 break;
6909 default:
6910 gcc_unreachable ();
6913 child_fn = NULL_TREE;
6914 child_cfun = NULL;
6915 if (offloaded)
6917 child_fn = gimple_omp_target_child_fn (entry_stmt);
6918 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6921 /* Supported by expand_omp_taskreg, but not here. */
6922 if (child_cfun != NULL)
6923 gcc_checking_assert (!child_cfun->cfg);
6924 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6926 entry_bb = region->entry;
6927 exit_bb = region->exit;
6929 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6931 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6933 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6934 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6935 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6936 DECL_ATTRIBUTES (child_fn)
6937 = tree_cons (get_identifier ("oacc kernels"),
6938 NULL_TREE, DECL_ATTRIBUTES (child_fn));
6941 if (offloaded)
6943 unsigned srcidx, dstidx, num;
6945 /* If the offloading region needs data sent from the parent
6946 function, then the very first statement (except possible
6947 tree profile counter updates) of the offloading body
6948 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6949 &.OMP_DATA_O is passed as an argument to the child function,
6950 we need to replace it with the argument as seen by the child
6951 function.
6953 In most cases, this will end up being the identity assignment
6954 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6955 a function call that has been inlined, the original PARM_DECL
6956 .OMP_DATA_I may have been converted into a different local
6957 variable. In which case, we need to keep the assignment. */
6958 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6959 if (data_arg)
6961 basic_block entry_succ_bb = single_succ (entry_bb);
6962 gimple_stmt_iterator gsi;
6963 tree arg;
6964 gimple *tgtcopy_stmt = NULL;
6965 tree sender = TREE_VEC_ELT (data_arg, 0);
6967 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6969 gcc_assert (!gsi_end_p (gsi));
6970 stmt = gsi_stmt (gsi);
6971 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6972 continue;
6974 if (gimple_num_ops (stmt) == 2)
6976 tree arg = gimple_assign_rhs1 (stmt);
6978 /* We're ignoring the subcode because we're
6979 effectively doing a STRIP_NOPS. */
6981 if (TREE_CODE (arg) == ADDR_EXPR
6982 && TREE_OPERAND (arg, 0) == sender)
6984 tgtcopy_stmt = stmt;
6985 break;
6990 gcc_assert (tgtcopy_stmt != NULL);
6991 arg = DECL_ARGUMENTS (child_fn);
6993 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6994 gsi_remove (&gsi, true);
6997 /* Declare local variables needed in CHILD_CFUN. */
6998 block = DECL_INITIAL (child_fn);
6999 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7000 /* The gimplifier could record temporaries in the offloading block
7001 rather than in containing function's local_decls chain,
7002 which would mean cgraph missed finalizing them. Do it now. */
7003 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7004 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7005 varpool_node::finalize_decl (t);
7006 DECL_SAVED_TREE (child_fn) = NULL;
7007 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7008 gimple_set_body (child_fn, NULL);
7009 TREE_USED (block) = 1;
7011 /* Reset DECL_CONTEXT on function arguments. */
7012 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7013 DECL_CONTEXT (t) = child_fn;
7015 /* Split ENTRY_BB at GIMPLE_*,
7016 so that it can be moved to the child function. */
7017 gsi = gsi_last_nondebug_bb (entry_bb);
7018 stmt = gsi_stmt (gsi);
7019 gcc_assert (stmt
7020 && gimple_code (stmt) == gimple_code (entry_stmt));
7021 e = split_block (entry_bb, stmt);
7022 gsi_remove (&gsi, true);
7023 entry_bb = e->dest;
7024 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7026 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7027 if (exit_bb)
7029 gsi = gsi_last_nondebug_bb (exit_bb);
7030 gcc_assert (!gsi_end_p (gsi)
7031 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7032 stmt = gimple_build_return (NULL);
7033 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7034 gsi_remove (&gsi, true);
7037 /* Make sure to generate early debug for the function before
7038 outlining anything. */
7039 if (! gimple_in_ssa_p (cfun))
7040 (*debug_hooks->early_global_decl) (cfun->decl);
7042 /* Move the offloading region into CHILD_CFUN. */
7044 block = gimple_block (entry_stmt);
7046 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7047 if (exit_bb)
7048 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7049 /* When the OMP expansion process cannot guarantee an up-to-date
7050 loop tree arrange for the child function to fixup loops. */
7051 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7052 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7054 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7055 num = vec_safe_length (child_cfun->local_decls);
7056 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7058 t = (*child_cfun->local_decls)[srcidx];
7059 if (DECL_CONTEXT (t) == cfun->decl)
7060 continue;
7061 if (srcidx != dstidx)
7062 (*child_cfun->local_decls)[dstidx] = t;
7063 dstidx++;
7065 if (dstidx != num)
7066 vec_safe_truncate (child_cfun->local_decls, dstidx);
7068 /* Inform the callgraph about the new function. */
7069 child_cfun->curr_properties = cfun->curr_properties;
7070 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7071 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7072 cgraph_node *node = cgraph_node::get_create (child_fn);
7073 node->parallelized_function = 1;
7074 cgraph_node::add_new_function (child_fn, true);
7076 /* Add the new function to the offload table. */
7077 if (ENABLE_OFFLOADING)
7079 if (in_lto_p)
7080 DECL_PRESERVE_P (child_fn) = 1;
7081 vec_safe_push (offload_funcs, child_fn);
7084 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7085 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7087 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7088 fixed in a following pass. */
7089 push_cfun (child_cfun);
7090 if (need_asm)
7091 assign_assembler_name_if_needed (child_fn);
7092 cgraph_edge::rebuild_edges ();
7094 /* Some EH regions might become dead, see PR34608. If
7095 pass_cleanup_cfg isn't the first pass to happen with the
7096 new child, these dead EH edges might cause problems.
7097 Clean them up now. */
7098 if (flag_exceptions)
7100 basic_block bb;
7101 bool changed = false;
7103 FOR_EACH_BB_FN (bb, cfun)
7104 changed |= gimple_purge_dead_eh_edges (bb);
7105 if (changed)
7106 cleanup_tree_cfg ();
7108 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7109 verify_loop_structure ();
7110 pop_cfun ();
7112 if (dump_file && !gimple_in_ssa_p (cfun))
7114 omp_any_child_fn_dumped = true;
7115 dump_function_header (dump_file, child_fn, dump_flags);
7116 dump_function_to_file (child_fn, dump_file, dump_flags);
7120 /* Emit a library call to launch the offloading region, or do data
7121 transfers. */
7122 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7123 enum built_in_function start_ix;
7124 location_t clause_loc;
7125 unsigned int flags_i = 0;
7127 switch (gimple_omp_target_kind (entry_stmt))
7129 case GF_OMP_TARGET_KIND_REGION:
7130 start_ix = BUILT_IN_GOMP_TARGET;
7131 break;
7132 case GF_OMP_TARGET_KIND_DATA:
7133 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7134 break;
7135 case GF_OMP_TARGET_KIND_UPDATE:
7136 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7137 break;
7138 case GF_OMP_TARGET_KIND_ENTER_DATA:
7139 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7140 break;
7141 case GF_OMP_TARGET_KIND_EXIT_DATA:
7142 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7143 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7144 break;
7145 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7146 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7147 start_ix = BUILT_IN_GOACC_PARALLEL;
7148 break;
7149 case GF_OMP_TARGET_KIND_OACC_DATA:
7150 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7151 start_ix = BUILT_IN_GOACC_DATA_START;
7152 break;
7153 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7154 start_ix = BUILT_IN_GOACC_UPDATE;
7155 break;
7156 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7157 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7158 break;
7159 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7160 start_ix = BUILT_IN_GOACC_DECLARE;
7161 break;
7162 default:
7163 gcc_unreachable ();
7166 clauses = gimple_omp_target_clauses (entry_stmt);
7168 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7169 library choose) and there is no conditional. */
7170 cond = NULL_TREE;
7171 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7173 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7174 if (c)
7175 cond = OMP_CLAUSE_IF_EXPR (c);
7177 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7178 if (c)
7180 /* Even if we pass it to all library function calls, it is currently only
7181 defined/used for the OpenMP target ones. */
7182 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7183 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7184 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7185 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7187 device = OMP_CLAUSE_DEVICE_ID (c);
7188 clause_loc = OMP_CLAUSE_LOCATION (c);
7190 else
7191 clause_loc = gimple_location (entry_stmt);
7193 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7194 if (c)
7195 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7197 /* Ensure 'device' is of the correct type. */
7198 device = fold_convert_loc (clause_loc, integer_type_node, device);
7200 /* If we found the clause 'if (cond)', build
7201 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7202 if (cond)
7204 cond = gimple_boolify (cond);
7206 basic_block cond_bb, then_bb, else_bb;
7207 edge e;
7208 tree tmp_var;
7210 tmp_var = create_tmp_var (TREE_TYPE (device));
7211 if (offloaded)
7212 e = split_block_after_labels (new_bb);
7213 else
7215 gsi = gsi_last_nondebug_bb (new_bb);
7216 gsi_prev (&gsi);
7217 e = split_block (new_bb, gsi_stmt (gsi));
7219 cond_bb = e->src;
7220 new_bb = e->dest;
7221 remove_edge (e);
7223 then_bb = create_empty_bb (cond_bb);
7224 else_bb = create_empty_bb (then_bb);
7225 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7226 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7228 stmt = gimple_build_cond_empty (cond);
7229 gsi = gsi_last_bb (cond_bb);
7230 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7232 gsi = gsi_start_bb (then_bb);
7233 stmt = gimple_build_assign (tmp_var, device);
7234 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7236 gsi = gsi_start_bb (else_bb);
7237 stmt = gimple_build_assign (tmp_var,
7238 build_int_cst (integer_type_node,
7239 GOMP_DEVICE_HOST_FALLBACK));
7240 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7242 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7243 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7244 add_bb_to_loop (then_bb, cond_bb->loop_father);
7245 add_bb_to_loop (else_bb, cond_bb->loop_father);
7246 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7247 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7249 device = tmp_var;
7250 gsi = gsi_last_nondebug_bb (new_bb);
7252 else
7254 gsi = gsi_last_nondebug_bb (new_bb);
7255 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7256 true, GSI_SAME_STMT);
7259 t = gimple_omp_target_data_arg (entry_stmt);
7260 if (t == NULL)
7262 t1 = size_zero_node;
7263 t2 = build_zero_cst (ptr_type_node);
7264 t3 = t2;
7265 t4 = t2;
7267 else
7269 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7270 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7271 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7272 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7273 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7276 gimple *g;
7277 bool tagging = false;
7278 /* The maximum number used by any start_ix, without varargs. */
7279 auto_vec<tree, 11> args;
7280 args.quick_push (device);
7281 if (offloaded)
7282 args.quick_push (build_fold_addr_expr (child_fn));
7283 args.quick_push (t1);
7284 args.quick_push (t2);
7285 args.quick_push (t3);
7286 args.quick_push (t4);
7287 switch (start_ix)
7289 case BUILT_IN_GOACC_DATA_START:
7290 case BUILT_IN_GOACC_DECLARE:
7291 case BUILT_IN_GOMP_TARGET_DATA:
7292 break;
7293 case BUILT_IN_GOMP_TARGET:
7294 case BUILT_IN_GOMP_TARGET_UPDATE:
7295 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7296 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7297 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7298 if (c)
7299 depend = OMP_CLAUSE_DECL (c);
7300 else
7301 depend = build_int_cst (ptr_type_node, 0);
7302 args.quick_push (depend);
7303 if (start_ix == BUILT_IN_GOMP_TARGET)
7304 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7305 break;
7306 case BUILT_IN_GOACC_PARALLEL:
7307 oacc_set_fn_attrib (child_fn, clauses, &args);
7308 tagging = true;
7309 /* FALLTHRU */
7310 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7311 case BUILT_IN_GOACC_UPDATE:
7313 tree t_async = NULL_TREE;
7315 /* If present, use the value specified by the respective
7316 clause, making sure that is of the correct type. */
7317 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7318 if (c)
7319 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7320 integer_type_node,
7321 OMP_CLAUSE_ASYNC_EXPR (c));
7322 else if (!tagging)
7323 /* Default values for t_async. */
7324 t_async = fold_convert_loc (gimple_location (entry_stmt),
7325 integer_type_node,
7326 build_int_cst (integer_type_node,
7327 GOMP_ASYNC_SYNC));
7328 if (tagging && t_async)
7330 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7332 if (TREE_CODE (t_async) == INTEGER_CST)
7334 /* See if we can pack the async arg in to the tag's
7335 operand. */
7336 i_async = TREE_INT_CST_LOW (t_async);
7337 if (i_async < GOMP_LAUNCH_OP_MAX)
7338 t_async = NULL_TREE;
7339 else
7340 i_async = GOMP_LAUNCH_OP_MAX;
7342 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7343 i_async));
7345 if (t_async)
7346 args.safe_push (t_async);
7348 /* Save the argument index, and ... */
7349 unsigned t_wait_idx = args.length ();
7350 unsigned num_waits = 0;
7351 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7352 if (!tagging || c)
7353 /* ... push a placeholder. */
7354 args.safe_push (integer_zero_node);
7356 for (; c; c = OMP_CLAUSE_CHAIN (c))
7357 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7359 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7360 integer_type_node,
7361 OMP_CLAUSE_WAIT_EXPR (c)));
7362 num_waits++;
7365 if (!tagging || num_waits)
7367 tree len;
7369 /* Now that we know the number, update the placeholder. */
7370 if (tagging)
7371 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7372 else
7373 len = build_int_cst (integer_type_node, num_waits);
7374 len = fold_convert_loc (gimple_location (entry_stmt),
7375 unsigned_type_node, len);
7376 args[t_wait_idx] = len;
7379 break;
7380 default:
7381 gcc_unreachable ();
7383 if (tagging)
7384 /* Push terminal marker - zero. */
7385 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7387 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7388 gimple_set_location (g, gimple_location (entry_stmt));
7389 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7390 if (!offloaded)
7392 g = gsi_stmt (gsi);
7393 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7394 gsi_remove (&gsi, true);
7396 if (data_region && region->exit)
7398 gsi = gsi_last_nondebug_bb (region->exit);
7399 g = gsi_stmt (gsi);
7400 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7401 gsi_remove (&gsi, true);
7405 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7406 iteration variable derived from the thread number. INTRA_GROUP means this
7407 is an expansion of a loop iterating over work-items within a separate
7408 iteration over groups. */
7410 static void
7411 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7413 gimple_stmt_iterator gsi;
7414 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7415 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7416 == GF_OMP_FOR_KIND_GRID_LOOP);
7417 size_t collapse = gimple_omp_for_collapse (for_stmt);
7418 struct omp_for_data_loop *loops
7419 = XALLOCAVEC (struct omp_for_data_loop,
7420 gimple_omp_for_collapse (for_stmt));
7421 struct omp_for_data fd;
7423 remove_edge (BRANCH_EDGE (kfor->entry));
7424 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7426 gcc_assert (kfor->cont);
7427 omp_extract_for_data (for_stmt, &fd, loops);
7429 gsi = gsi_start_bb (body_bb);
7431 for (size_t dim = 0; dim < collapse; dim++)
7433 tree type, itype;
7434 itype = type = TREE_TYPE (fd.loops[dim].v);
7435 if (POINTER_TYPE_P (type))
7436 itype = signed_type_for (type);
7438 tree n1 = fd.loops[dim].n1;
7439 tree step = fd.loops[dim].step;
7440 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7441 true, NULL_TREE, true, GSI_SAME_STMT);
7442 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7443 true, NULL_TREE, true, GSI_SAME_STMT);
7444 tree threadid;
7445 if (gimple_omp_for_grid_group_iter (for_stmt))
7447 gcc_checking_assert (!intra_group);
7448 threadid = build_call_expr (builtin_decl_explicit
7449 (BUILT_IN_HSA_WORKGROUPID), 1,
7450 build_int_cstu (unsigned_type_node, dim));
7452 else if (intra_group)
7453 threadid = build_call_expr (builtin_decl_explicit
7454 (BUILT_IN_HSA_WORKITEMID), 1,
7455 build_int_cstu (unsigned_type_node, dim));
7456 else
7457 threadid = build_call_expr (builtin_decl_explicit
7458 (BUILT_IN_HSA_WORKITEMABSID), 1,
7459 build_int_cstu (unsigned_type_node, dim));
7460 threadid = fold_convert (itype, threadid);
7461 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7462 true, GSI_SAME_STMT);
7464 tree startvar = fd.loops[dim].v;
7465 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7466 if (POINTER_TYPE_P (type))
7467 t = fold_build_pointer_plus (n1, t);
7468 else
7469 t = fold_build2 (PLUS_EXPR, type, t, n1);
7470 t = fold_convert (type, t);
7471 t = force_gimple_operand_gsi (&gsi, t,
7472 DECL_P (startvar)
7473 && TREE_ADDRESSABLE (startvar),
7474 NULL_TREE, true, GSI_SAME_STMT);
7475 gassign *assign_stmt = gimple_build_assign (startvar, t);
7476 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7478 /* Remove the omp for statement. */
7479 gsi = gsi_last_nondebug_bb (kfor->entry);
7480 gsi_remove (&gsi, true);
7482 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7483 gsi = gsi_last_nondebug_bb (kfor->cont);
7484 gcc_assert (!gsi_end_p (gsi)
7485 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7486 gsi_remove (&gsi, true);
7488 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7489 gsi = gsi_last_nondebug_bb (kfor->exit);
7490 gcc_assert (!gsi_end_p (gsi)
7491 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7492 if (intra_group)
7493 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7494 gsi_remove (&gsi, true);
7496 /* Fixup the much simpler CFG. */
7497 remove_edge (find_edge (kfor->cont, body_bb));
7499 if (kfor->cont != body_bb)
7500 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7501 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7504 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7505 argument_decls. */
7507 struct grid_arg_decl_map
7509 tree old_arg;
7510 tree new_arg;
7513 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7514 pertaining to kernel function. */
7516 static tree
7517 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7519 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7520 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7521 tree t = *tp;
7523 if (t == adm->old_arg)
7524 *tp = adm->new_arg;
7525 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7526 return NULL_TREE;
7529 /* If TARGET region contains a kernel body for loop, remove its region from the
7530 TARGET and expand it in HSA gridified kernel fashion. */
7532 static void
7533 grid_expand_target_grid_body (struct omp_region *target)
7535 if (!hsa_gen_requested_p ())
7536 return;
7538 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7539 struct omp_region **pp;
7541 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7542 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7543 break;
7545 struct omp_region *gpukernel = *pp;
7547 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7548 if (!gpukernel)
7550 /* HSA cannot handle OACC stuff. */
7551 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7552 return;
7553 gcc_checking_assert (orig_child_fndecl);
7554 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7555 OMP_CLAUSE__GRIDDIM_));
7556 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7558 hsa_register_kernel (n);
7559 return;
7562 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7563 OMP_CLAUSE__GRIDDIM_));
7564 tree inside_block
7565 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7566 *pp = gpukernel->next;
7567 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7568 if ((*pp)->type == GIMPLE_OMP_FOR)
7569 break;
7571 struct omp_region *kfor = *pp;
7572 gcc_assert (kfor);
7573 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7574 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7575 *pp = kfor->next;
7576 if (kfor->inner)
7578 if (gimple_omp_for_grid_group_iter (for_stmt))
7580 struct omp_region **next_pp;
7581 for (pp = &kfor->inner; *pp; pp = next_pp)
7583 next_pp = &(*pp)->next;
7584 if ((*pp)->type != GIMPLE_OMP_FOR)
7585 continue;
7586 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7587 gcc_assert (gimple_omp_for_kind (inner)
7588 == GF_OMP_FOR_KIND_GRID_LOOP);
7589 grid_expand_omp_for_loop (*pp, true);
7590 *pp = (*pp)->next;
7591 next_pp = pp;
7594 expand_omp (kfor->inner);
7596 if (gpukernel->inner)
7597 expand_omp (gpukernel->inner);
7599 tree kern_fndecl = copy_node (orig_child_fndecl);
7600 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7601 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7602 tree tgtblock = gimple_block (tgt_stmt);
7603 tree fniniblock = make_node (BLOCK);
7604 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7605 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7606 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7607 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7608 DECL_INITIAL (kern_fndecl) = fniniblock;
7609 push_struct_function (kern_fndecl);
7610 cfun->function_end_locus = gimple_location (tgt_stmt);
7611 init_tree_ssa (cfun);
7612 pop_cfun ();
7614 /* Make sure to generate early debug for the function before
7615 outlining anything. */
7616 if (! gimple_in_ssa_p (cfun))
7617 (*debug_hooks->early_global_decl) (cfun->decl);
7619 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7620 gcc_assert (!DECL_CHAIN (old_parm_decl));
7621 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7622 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7623 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7624 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7625 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7626 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7627 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7628 kern_cfun->curr_properties = cfun->curr_properties;
7630 grid_expand_omp_for_loop (kfor, false);
7632 /* Remove the omp for statement. */
7633 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7634 gsi_remove (&gsi, true);
7635 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7636 return. */
7637 gsi = gsi_last_nondebug_bb (gpukernel->exit);
7638 gcc_assert (!gsi_end_p (gsi)
7639 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7640 gimple *ret_stmt = gimple_build_return (NULL);
7641 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7642 gsi_remove (&gsi, true);
7644 /* Statements in the first BB in the target construct have been produced by
7645 target lowering and must be copied inside the GPUKERNEL, with the two
7646 exceptions of the first OMP statement and the OMP_DATA assignment
7647 statement. */
7648 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7649 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7650 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7651 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7652 !gsi_end_p (tsi); gsi_next (&tsi))
7654 gimple *stmt = gsi_stmt (tsi);
7655 if (is_gimple_omp (stmt))
7656 break;
7657 if (sender
7658 && is_gimple_assign (stmt)
7659 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7660 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7661 continue;
7662 gimple *copy = gimple_copy (stmt);
7663 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7664 gimple_set_block (copy, fniniblock);
7667 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7668 gpukernel->exit, inside_block);
7670 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7671 kcn->mark_force_output ();
7672 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7674 hsa_register_kernel (kcn, orig_child);
7676 cgraph_node::add_new_function (kern_fndecl, true);
7677 push_cfun (kern_cfun);
7678 cgraph_edge::rebuild_edges ();
7680 /* Re-map any mention of the PARM_DECL of the original function to the
7681 PARM_DECL of the new one.
7683 TODO: It would be great if lowering produced references into the GPU
7684 kernel decl straight away and we did not have to do this. */
7685 struct grid_arg_decl_map adm;
7686 adm.old_arg = old_parm_decl;
7687 adm.new_arg = new_parm_decl;
7688 basic_block bb;
7689 FOR_EACH_BB_FN (bb, kern_cfun)
7691 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7693 gimple *stmt = gsi_stmt (gsi);
7694 struct walk_stmt_info wi;
7695 memset (&wi, 0, sizeof (wi));
7696 wi.info = &adm;
7697 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7700 pop_cfun ();
7702 return;
7705 /* Expand the parallel region tree rooted at REGION. Expansion
7706 proceeds in depth-first order. Innermost regions are expanded
7707 first. This way, parallel regions that require a new function to
7708 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7709 internal dependencies in their body. */
7711 static void
7712 expand_omp (struct omp_region *region)
7714 omp_any_child_fn_dumped = false;
7715 while (region)
7717 location_t saved_location;
7718 gimple *inner_stmt = NULL;
7720 /* First, determine whether this is a combined parallel+workshare
7721 region. */
7722 if (region->type == GIMPLE_OMP_PARALLEL)
7723 determine_parallel_type (region);
7724 else if (region->type == GIMPLE_OMP_TARGET)
7725 grid_expand_target_grid_body (region);
7727 if (region->type == GIMPLE_OMP_FOR
7728 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7729 inner_stmt = last_stmt (region->inner->entry);
7731 if (region->inner)
7732 expand_omp (region->inner);
7734 saved_location = input_location;
7735 if (gimple_has_location (last_stmt (region->entry)))
7736 input_location = gimple_location (last_stmt (region->entry));
7738 switch (region->type)
7740 case GIMPLE_OMP_PARALLEL:
7741 case GIMPLE_OMP_TASK:
7742 expand_omp_taskreg (region);
7743 break;
7745 case GIMPLE_OMP_FOR:
7746 expand_omp_for (region, inner_stmt);
7747 break;
7749 case GIMPLE_OMP_SECTIONS:
7750 expand_omp_sections (region);
7751 break;
7753 case GIMPLE_OMP_SECTION:
7754 /* Individual omp sections are handled together with their
7755 parent GIMPLE_OMP_SECTIONS region. */
7756 break;
7758 case GIMPLE_OMP_SINGLE:
7759 expand_omp_single (region);
7760 break;
7762 case GIMPLE_OMP_ORDERED:
7764 gomp_ordered *ord_stmt
7765 = as_a <gomp_ordered *> (last_stmt (region->entry));
7766 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7767 OMP_CLAUSE_DEPEND))
7769 /* We'll expand these when expanding corresponding
7770 worksharing region with ordered(n) clause. */
7771 gcc_assert (region->outer
7772 && region->outer->type == GIMPLE_OMP_FOR);
7773 region->ord_stmt = ord_stmt;
7774 break;
7777 /* FALLTHRU */
7778 case GIMPLE_OMP_MASTER:
7779 case GIMPLE_OMP_TASKGROUP:
7780 case GIMPLE_OMP_CRITICAL:
7781 case GIMPLE_OMP_TEAMS:
7782 expand_omp_synch (region);
7783 break;
7785 case GIMPLE_OMP_ATOMIC_LOAD:
7786 expand_omp_atomic (region);
7787 break;
7789 case GIMPLE_OMP_TARGET:
7790 expand_omp_target (region);
7791 break;
7793 default:
7794 gcc_unreachable ();
7797 input_location = saved_location;
7798 region = region->next;
7800 if (omp_any_child_fn_dumped)
7802 if (dump_file)
7803 dump_function_header (dump_file, current_function_decl, dump_flags);
7804 omp_any_child_fn_dumped = false;
7808 /* Helper for build_omp_regions. Scan the dominator tree starting at
7809 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7810 true, the function ends once a single tree is built (otherwise, whole
7811 forest of OMP constructs may be built). */
7813 static void
7814 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7815 bool single_tree)
7817 gimple_stmt_iterator gsi;
7818 gimple *stmt;
7819 basic_block son;
7821 gsi = gsi_last_nondebug_bb (bb);
7822 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7824 struct omp_region *region;
7825 enum gimple_code code;
7827 stmt = gsi_stmt (gsi);
7828 code = gimple_code (stmt);
7829 if (code == GIMPLE_OMP_RETURN)
7831 /* STMT is the return point out of region PARENT. Mark it
7832 as the exit point and make PARENT the immediately
7833 enclosing region. */
7834 gcc_assert (parent);
7835 region = parent;
7836 region->exit = bb;
7837 parent = parent->outer;
7839 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7841 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7842 GIMPLE_OMP_RETURN, but matches with
7843 GIMPLE_OMP_ATOMIC_LOAD. */
7844 gcc_assert (parent);
7845 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7846 region = parent;
7847 region->exit = bb;
7848 parent = parent->outer;
7850 else if (code == GIMPLE_OMP_CONTINUE)
7852 gcc_assert (parent);
7853 parent->cont = bb;
7855 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7857 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7858 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7860 else
7862 region = new_omp_region (bb, code, parent);
7863 /* Otherwise... */
7864 if (code == GIMPLE_OMP_TARGET)
7866 switch (gimple_omp_target_kind (stmt))
7868 case GF_OMP_TARGET_KIND_REGION:
7869 case GF_OMP_TARGET_KIND_DATA:
7870 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7871 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7872 case GF_OMP_TARGET_KIND_OACC_DATA:
7873 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7874 break;
7875 case GF_OMP_TARGET_KIND_UPDATE:
7876 case GF_OMP_TARGET_KIND_ENTER_DATA:
7877 case GF_OMP_TARGET_KIND_EXIT_DATA:
7878 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7879 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7880 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7881 /* ..., other than for those stand-alone directives... */
7882 region = NULL;
7883 break;
7884 default:
7885 gcc_unreachable ();
7888 else if (code == GIMPLE_OMP_ORDERED
7889 && omp_find_clause (gimple_omp_ordered_clauses
7890 (as_a <gomp_ordered *> (stmt)),
7891 OMP_CLAUSE_DEPEND))
7892 /* #pragma omp ordered depend is also just a stand-alone
7893 directive. */
7894 region = NULL;
7895 /* ..., this directive becomes the parent for a new region. */
7896 if (region)
7897 parent = region;
7901 if (single_tree && !parent)
7902 return;
7904 for (son = first_dom_son (CDI_DOMINATORS, bb);
7905 son;
7906 son = next_dom_son (CDI_DOMINATORS, son))
7907 build_omp_regions_1 (son, parent, single_tree);
7910 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7911 root_omp_region. */
7913 static void
7914 build_omp_regions_root (basic_block root)
7916 gcc_assert (root_omp_region == NULL);
7917 build_omp_regions_1 (root, NULL, true);
7918 gcc_assert (root_omp_region != NULL);
7921 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7923 void
7924 omp_expand_local (basic_block head)
7926 build_omp_regions_root (head);
7927 if (dump_file && (dump_flags & TDF_DETAILS))
7929 fprintf (dump_file, "\nOMP region tree\n\n");
7930 dump_omp_region (dump_file, root_omp_region, 0);
7931 fprintf (dump_file, "\n");
7934 remove_exit_barriers (root_omp_region);
7935 expand_omp (root_omp_region);
7937 omp_free_regions ();
7940 /* Scan the CFG and build a tree of OMP regions. Return the root of
7941 the OMP region tree. */
7943 static void
7944 build_omp_regions (void)
7946 gcc_assert (root_omp_region == NULL);
7947 calculate_dominance_info (CDI_DOMINATORS);
7948 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7951 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7953 static unsigned int
7954 execute_expand_omp (void)
7956 build_omp_regions ();
7958 if (!root_omp_region)
7959 return 0;
7961 if (dump_file)
7963 fprintf (dump_file, "\nOMP region tree\n\n");
7964 dump_omp_region (dump_file, root_omp_region, 0);
7965 fprintf (dump_file, "\n");
7968 remove_exit_barriers (root_omp_region);
7970 expand_omp (root_omp_region);
7972 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7973 verify_loop_structure ();
7974 cleanup_tree_cfg ();
7976 omp_free_regions ();
7978 return 0;
7981 /* OMP expansion -- the default pass, run before creation of SSA form. */
7983 namespace {
7985 const pass_data pass_data_expand_omp =
7987 GIMPLE_PASS, /* type */
7988 "ompexp", /* name */
7989 OPTGROUP_OMP, /* optinfo_flags */
7990 TV_NONE, /* tv_id */
7991 PROP_gimple_any, /* properties_required */
7992 PROP_gimple_eomp, /* properties_provided */
7993 0, /* properties_destroyed */
7994 0, /* todo_flags_start */
7995 0, /* todo_flags_finish */
7998 class pass_expand_omp : public gimple_opt_pass
8000 public:
8001 pass_expand_omp (gcc::context *ctxt)
8002 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8005 /* opt_pass methods: */
8006 virtual unsigned int execute (function *)
8008 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8009 || flag_openmp_simd != 0)
8010 && !seen_error ());
8012 /* This pass always runs, to provide PROP_gimple_eomp.
8013 But often, there is nothing to do. */
8014 if (!gate)
8015 return 0;
8017 return execute_expand_omp ();
8020 }; // class pass_expand_omp
8022 } // anon namespace
8024 gimple_opt_pass *
8025 make_pass_expand_omp (gcc::context *ctxt)
8027 return new pass_expand_omp (ctxt);
8030 namespace {
8032 const pass_data pass_data_expand_omp_ssa =
8034 GIMPLE_PASS, /* type */
8035 "ompexpssa", /* name */
8036 OPTGROUP_OMP, /* optinfo_flags */
8037 TV_NONE, /* tv_id */
8038 PROP_cfg | PROP_ssa, /* properties_required */
8039 PROP_gimple_eomp, /* properties_provided */
8040 0, /* properties_destroyed */
8041 0, /* todo_flags_start */
8042 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8045 class pass_expand_omp_ssa : public gimple_opt_pass
8047 public:
8048 pass_expand_omp_ssa (gcc::context *ctxt)
8049 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8052 /* opt_pass methods: */
8053 virtual bool gate (function *fun)
8055 return !(fun->curr_properties & PROP_gimple_eomp);
8057 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8058 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8060 }; // class pass_expand_omp_ssa
8062 } // anon namespace
8064 gimple_opt_pass *
8065 make_pass_expand_omp_ssa (gcc::context *ctxt)
8067 return new pass_expand_omp_ssa (ctxt);
8070 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8071 GIMPLE_* codes. */
8073 bool
8074 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8075 int *region_idx)
8077 gimple *last = last_stmt (bb);
8078 enum gimple_code code = gimple_code (last);
8079 struct omp_region *cur_region = *region;
8080 bool fallthru = false;
8082 switch (code)
8084 case GIMPLE_OMP_PARALLEL:
8085 case GIMPLE_OMP_TASK:
8086 case GIMPLE_OMP_FOR:
8087 case GIMPLE_OMP_SINGLE:
8088 case GIMPLE_OMP_TEAMS:
8089 case GIMPLE_OMP_MASTER:
8090 case GIMPLE_OMP_TASKGROUP:
8091 case GIMPLE_OMP_CRITICAL:
8092 case GIMPLE_OMP_SECTION:
8093 case GIMPLE_OMP_GRID_BODY:
8094 cur_region = new_omp_region (bb, code, cur_region);
8095 fallthru = true;
8096 break;
8098 case GIMPLE_OMP_ORDERED:
8099 cur_region = new_omp_region (bb, code, cur_region);
8100 fallthru = true;
8101 if (omp_find_clause (gimple_omp_ordered_clauses
8102 (as_a <gomp_ordered *> (last)),
8103 OMP_CLAUSE_DEPEND))
8104 cur_region = cur_region->outer;
8105 break;
8107 case GIMPLE_OMP_TARGET:
8108 cur_region = new_omp_region (bb, code, cur_region);
8109 fallthru = true;
8110 switch (gimple_omp_target_kind (last))
8112 case GF_OMP_TARGET_KIND_REGION:
8113 case GF_OMP_TARGET_KIND_DATA:
8114 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8115 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8116 case GF_OMP_TARGET_KIND_OACC_DATA:
8117 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8118 break;
8119 case GF_OMP_TARGET_KIND_UPDATE:
8120 case GF_OMP_TARGET_KIND_ENTER_DATA:
8121 case GF_OMP_TARGET_KIND_EXIT_DATA:
8122 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8123 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8124 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8125 cur_region = cur_region->outer;
8126 break;
8127 default:
8128 gcc_unreachable ();
8130 break;
8132 case GIMPLE_OMP_SECTIONS:
8133 cur_region = new_omp_region (bb, code, cur_region);
8134 fallthru = true;
8135 break;
8137 case GIMPLE_OMP_SECTIONS_SWITCH:
8138 fallthru = false;
8139 break;
8141 case GIMPLE_OMP_ATOMIC_LOAD:
8142 case GIMPLE_OMP_ATOMIC_STORE:
8143 fallthru = true;
8144 break;
8146 case GIMPLE_OMP_RETURN:
8147 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8148 somewhere other than the next block. This will be
8149 created later. */
8150 cur_region->exit = bb;
8151 if (cur_region->type == GIMPLE_OMP_TASK)
8152 /* Add an edge corresponding to not scheduling the task
8153 immediately. */
8154 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8155 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8156 cur_region = cur_region->outer;
8157 break;
8159 case GIMPLE_OMP_CONTINUE:
8160 cur_region->cont = bb;
8161 switch (cur_region->type)
8163 case GIMPLE_OMP_FOR:
8164 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8165 succs edges as abnormal to prevent splitting
8166 them. */
8167 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8168 /* Make the loopback edge. */
8169 make_edge (bb, single_succ (cur_region->entry),
8170 EDGE_ABNORMAL);
8172 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8173 corresponds to the case that the body of the loop
8174 is not executed at all. */
8175 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8176 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8177 fallthru = false;
8178 break;
8180 case GIMPLE_OMP_SECTIONS:
8181 /* Wire up the edges into and out of the nested sections. */
8183 basic_block switch_bb = single_succ (cur_region->entry);
8185 struct omp_region *i;
8186 for (i = cur_region->inner; i ; i = i->next)
8188 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8189 make_edge (switch_bb, i->entry, 0);
8190 make_edge (i->exit, bb, EDGE_FALLTHRU);
8193 /* Make the loopback edge to the block with
8194 GIMPLE_OMP_SECTIONS_SWITCH. */
8195 make_edge (bb, switch_bb, 0);
8197 /* Make the edge from the switch to exit. */
8198 make_edge (switch_bb, bb->next_bb, 0);
8199 fallthru = false;
8201 break;
8203 case GIMPLE_OMP_TASK:
8204 fallthru = true;
8205 break;
8207 default:
8208 gcc_unreachable ();
8210 break;
8212 default:
8213 gcc_unreachable ();
8216 if (*region != cur_region)
8218 *region = cur_region;
8219 if (cur_region)
8220 *region_idx = cur_region->entry->index;
8221 else
8222 *region_idx = 0;
8225 return fallthru;
8228 #include "gt-omp-expand.h"