PR tree-optimization/85446
[official-gcc.git] / gcc / omp-expand.c
blobc7d30ea39641e24bc618de6c186862aaf74c2841
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "debug.h"
60 #include "stringpool.h"
61 #include "attribs.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
117 /* Return true if REGION is a combined parallel+workshare region. */
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
122 return region->is_combined_parallel;
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
139 Is lowered into:
141 # BLOCK 2 (PAR_ENTRY_BB)
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
196 return true;
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205 if (!simd_schedule)
206 return chunk_size;
208 poly_uint64 vf = omp_max_vf ();
209 if (known_eq (vf, 1U))
210 return chunk_size;
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 struct omp_for_data fd;
233 tree n1, n2;
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
239 if (gimple_omp_for_combined_into_p (for_stmt))
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
263 if (fd.chunk_size)
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
270 return ws_args;
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
284 gcc_unreachable ();
287 /* Discover whether REGION is a combined parallel+workshare region. */
289 static void
290 determine_parallel_type (struct omp_region *region)
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
323 if (region->inner->type == GIMPLE_OMP_FOR)
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
358 /* Dump the parallel region tree rooted at REGION. */
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
369 if (region->cont)
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
388 dump_omp_region (stderr, region, 0);
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
394 dump_omp_region (stderr, root_omp_region, 0);
397 /* Create a new parallel region starting at STMT inside region PARENT. */
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
403 struct omp_region *region = XCNEW (struct omp_region);
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
409 if (parent)
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
416 else
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
424 return region;
427 /* Release the memory associated with the region tree rooted at REGION. */
429 static void
430 free_omp_region_1 (struct omp_region *region)
432 struct omp_region *i, *n;
434 for (i = region->inner; i ; i = n)
436 n = i->next;
437 free_omp_region_1 (i);
440 free (region);
443 /* Release the memory for the entire omp region tree. */
445 void
446 omp_free_regions (void)
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
451 n = r->next;
452 free_omp_region_1 (r);
454 root_omp_region = NULL;
457 /* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
460 static gcond *
461 gimple_build_cond_empty (tree cond)
463 enum tree_code pred_code;
464 tree lhs, rhs;
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
498 return false;
501 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
502 Add CHILD_FNDECL to decl chain of the supercontext of the block
503 ENTRY_BLOCK - this is the block which originally contained the
504 code from which CHILD_FNDECL was created.
506 Together, these actions ensure that the debug info for the outlined
507 function will be emitted with the correct lexical scope. */
509 static void
510 adjust_context_and_scope (tree entry_block, tree child_fndecl)
512 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
514 tree b = BLOCK_SUPERCONTEXT (entry_block);
516 if (TREE_CODE (b) == BLOCK)
518 tree parent_fndecl;
520 /* Follow supercontext chain until the parent fndecl
521 is found. */
522 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
523 TREE_CODE (parent_fndecl) == BLOCK;
524 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
527 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
531 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
532 BLOCK_VARS (b) = child_fndecl;
537 /* Build the function calls to GOMP_parallel_start etc to actually
538 generate the parallel operation. REGION is the parallel region
539 being expanded. BB is the block where to insert the code. WS_ARGS
540 will be set if this is a call to a combined parallel+workshare
541 construct, it contains the list of additional arguments needed by
542 the workshare construct. */
544 static void
545 expand_parallel_call (struct omp_region *region, basic_block bb,
546 gomp_parallel *entry_stmt,
547 vec<tree, va_gc> *ws_args)
549 tree t, t1, t2, val, cond, c, clauses, flags;
550 gimple_stmt_iterator gsi;
551 gimple *stmt;
552 enum built_in_function start_ix;
553 int start_ix2;
554 location_t clause_loc;
555 vec<tree, va_gc> *args;
557 clauses = gimple_omp_parallel_clauses (entry_stmt);
559 /* Determine what flavor of GOMP_parallel we will be
560 emitting. */
561 start_ix = BUILT_IN_GOMP_PARALLEL;
562 if (is_combined_parallel (region))
564 switch (region->inner->type)
566 case GIMPLE_OMP_FOR:
567 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
568 switch (region->inner->sched_kind)
570 case OMP_CLAUSE_SCHEDULE_RUNTIME:
571 start_ix2 = 3;
572 break;
573 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
574 case OMP_CLAUSE_SCHEDULE_GUIDED:
575 if (region->inner->sched_modifiers
576 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
578 start_ix2 = 3 + region->inner->sched_kind;
579 break;
581 /* FALLTHRU */
582 default:
583 start_ix2 = region->inner->sched_kind;
584 break;
586 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
587 start_ix = (enum built_in_function) start_ix2;
588 break;
589 case GIMPLE_OMP_SECTIONS:
590 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
591 break;
592 default:
593 gcc_unreachable ();
597 /* By default, the value of NUM_THREADS is zero (selected at run time)
598 and there is no conditional. */
599 cond = NULL_TREE;
600 val = build_int_cst (unsigned_type_node, 0);
601 flags = build_int_cst (unsigned_type_node, 0);
603 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
604 if (c)
605 cond = OMP_CLAUSE_IF_EXPR (c);
607 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
608 if (c)
610 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
611 clause_loc = OMP_CLAUSE_LOCATION (c);
613 else
614 clause_loc = gimple_location (entry_stmt);
616 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
617 if (c)
618 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
620 /* Ensure 'val' is of the correct type. */
621 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
623 /* If we found the clause 'if (cond)', build either
624 (cond != 0) or (cond ? val : 1u). */
625 if (cond)
627 cond = gimple_boolify (cond);
629 if (integer_zerop (val))
630 val = fold_build2_loc (clause_loc,
631 EQ_EXPR, unsigned_type_node, cond,
632 build_int_cst (TREE_TYPE (cond), 0));
633 else
635 basic_block cond_bb, then_bb, else_bb;
636 edge e, e_then, e_else;
637 tree tmp_then, tmp_else, tmp_join, tmp_var;
639 tmp_var = create_tmp_var (TREE_TYPE (val));
640 if (gimple_in_ssa_p (cfun))
642 tmp_then = make_ssa_name (tmp_var);
643 tmp_else = make_ssa_name (tmp_var);
644 tmp_join = make_ssa_name (tmp_var);
646 else
648 tmp_then = tmp_var;
649 tmp_else = tmp_var;
650 tmp_join = tmp_var;
653 e = split_block_after_labels (bb);
654 cond_bb = e->src;
655 bb = e->dest;
656 remove_edge (e);
658 then_bb = create_empty_bb (cond_bb);
659 else_bb = create_empty_bb (then_bb);
660 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
661 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
663 stmt = gimple_build_cond_empty (cond);
664 gsi = gsi_start_bb (cond_bb);
665 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
667 gsi = gsi_start_bb (then_bb);
668 expand_omp_build_assign (&gsi, tmp_then, val, true);
670 gsi = gsi_start_bb (else_bb);
671 expand_omp_build_assign (&gsi, tmp_else,
672 build_int_cst (unsigned_type_node, 1),
673 true);
675 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
676 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
677 add_bb_to_loop (then_bb, cond_bb->loop_father);
678 add_bb_to_loop (else_bb, cond_bb->loop_father);
679 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
680 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
682 if (gimple_in_ssa_p (cfun))
684 gphi *phi = create_phi_node (tmp_join, bb);
685 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
686 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
689 val = tmp_join;
692 gsi = gsi_start_bb (bb);
693 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
694 false, GSI_CONTINUE_LINKING);
697 gsi = gsi_last_nondebug_bb (bb);
698 t = gimple_omp_parallel_data_arg (entry_stmt);
699 if (t == NULL)
700 t1 = null_pointer_node;
701 else
702 t1 = build_fold_addr_expr (t);
703 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
704 t2 = build_fold_addr_expr (child_fndecl);
706 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
708 vec_alloc (args, 4 + vec_safe_length (ws_args));
709 args->quick_push (t2);
710 args->quick_push (t1);
711 args->quick_push (val);
712 if (ws_args)
713 args->splice (*ws_args);
714 args->quick_push (flags);
716 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
717 builtin_decl_explicit (start_ix), args);
719 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
720 false, GSI_CONTINUE_LINKING);
722 if (hsa_gen_requested_p ()
723 && parallel_needs_hsa_kernel_p (region))
725 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
726 hsa_register_kernel (child_cnode);
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 else
787 num_tasks = integer_zero_node;
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
799 tree flags = build_int_cst (unsigned_type_node, iflags);
801 tree cond = boolean_true_node;
802 if (ifc)
804 if (taskloop_p)
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
818 if (finalc)
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
837 gsi = gsi_last_nondebug_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
874 tree chain = NULL_TREE, t;
875 unsigned ix;
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879 DECL_CHAIN (t) = chain;
880 chain = t;
883 return chain;
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
892 static void
893 remove_exit_barrier (struct omp_region *region)
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
902 exit_bb = region->exit;
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_nondebug_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev_nondebug (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
922 gsi = gsi_last_nondebug_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
949 any_addressable_vars = 1;
950 break;
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
963 any_addressable_vars = 1;
964 break;
966 if (block == gimple_block (parallel_stmt))
967 break;
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
976 static void
977 remove_exit_barriers (struct omp_region *region)
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
982 if (region->inner)
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
988 region = region->next;
989 remove_exit_barriers (region);
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1027 tree built_in;
1029 if (DECL_NAME (decl) == thr_num_id)
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1054 gimple_call_set_fndecl (call, built_in);
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065 tree t = *tp;
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1124 if (is_combined_parallel (region))
1125 ws_args = region->ws_args;
1126 else
1127 ws_args = NULL;
1129 if (child_cfun->cfg)
1131 /* Due to inlining, it may happen that we have already outlined
1132 the region, in which case all we need to do is make the
1133 sub-graph unreachable and emit the parallel call. */
1134 edge entry_succ_e, exit_succ_e;
1136 entry_succ_e = single_succ_edge (entry_bb);
1138 gsi = gsi_last_nondebug_bb (entry_bb);
1139 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1140 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1141 gsi_remove (&gsi, true);
1143 new_bb = entry_bb;
1144 if (exit_bb)
1146 exit_succ_e = single_succ_edge (exit_bb);
1147 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1149 remove_edge_and_dominated_blocks (entry_succ_e);
1151 else
1153 unsigned srcidx, dstidx, num;
1155 /* If the parallel region needs data sent from the parent
1156 function, then the very first statement (except possible
1157 tree profile counter updates) of the parallel body
1158 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1159 &.OMP_DATA_O is passed as an argument to the child function,
1160 we need to replace it with the argument as seen by the child
1161 function.
1163 In most cases, this will end up being the identity assignment
1164 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1165 a function call that has been inlined, the original PARM_DECL
1166 .OMP_DATA_I may have been converted into a different local
1167 variable. In which case, we need to keep the assignment. */
1168 if (gimple_omp_taskreg_data_arg (entry_stmt))
1170 basic_block entry_succ_bb
1171 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1172 : FALLTHRU_EDGE (entry_bb)->dest;
1173 tree arg;
1174 gimple *parcopy_stmt = NULL;
1176 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1178 gimple *stmt;
1180 gcc_assert (!gsi_end_p (gsi));
1181 stmt = gsi_stmt (gsi);
1182 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1183 continue;
1185 if (gimple_num_ops (stmt) == 2)
1187 tree arg = gimple_assign_rhs1 (stmt);
1189 /* We're ignore the subcode because we're
1190 effectively doing a STRIP_NOPS. */
1192 if (TREE_CODE (arg) == ADDR_EXPR
1193 && TREE_OPERAND (arg, 0)
1194 == gimple_omp_taskreg_data_arg (entry_stmt))
1196 parcopy_stmt = stmt;
1197 break;
1202 gcc_assert (parcopy_stmt != NULL);
1203 arg = DECL_ARGUMENTS (child_fn);
1205 if (!gimple_in_ssa_p (cfun))
1207 if (gimple_assign_lhs (parcopy_stmt) == arg)
1208 gsi_remove (&gsi, true);
1209 else
1211 /* ?? Is setting the subcode really necessary ?? */
1212 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1213 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1216 else
1218 tree lhs = gimple_assign_lhs (parcopy_stmt);
1219 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1220 /* We'd like to set the rhs to the default def in the child_fn,
1221 but it's too early to create ssa names in the child_fn.
1222 Instead, we set the rhs to the parm. In
1223 move_sese_region_to_fn, we introduce a default def for the
1224 parm, map the parm to it's default def, and once we encounter
1225 this stmt, replace the parm with the default def. */
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 update_stmt (parcopy_stmt);
1231 /* Declare local variables needed in CHILD_CFUN. */
1232 block = DECL_INITIAL (child_fn);
1233 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1234 /* The gimplifier could record temporaries in parallel/task block
1235 rather than in containing function's local_decls chain,
1236 which would mean cgraph missed finalizing them. Do it now. */
1237 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1238 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1239 varpool_node::finalize_decl (t);
1240 DECL_SAVED_TREE (child_fn) = NULL;
1241 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1242 gimple_set_body (child_fn, NULL);
1243 TREE_USED (block) = 1;
1245 /* Reset DECL_CONTEXT on function arguments. */
1246 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1247 DECL_CONTEXT (t) = child_fn;
1249 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1250 so that it can be moved to the child function. */
1251 gsi = gsi_last_nondebug_bb (entry_bb);
1252 stmt = gsi_stmt (gsi);
1253 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1255 e = split_block (entry_bb, stmt);
1256 gsi_remove (&gsi, true);
1257 entry_bb = e->dest;
1258 edge e2 = NULL;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1260 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1261 else
1263 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1264 gcc_assert (e2->dest == region->exit);
1265 remove_edge (BRANCH_EDGE (entry_bb));
1266 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1267 gsi = gsi_last_nondebug_bb (region->exit);
1268 gcc_assert (!gsi_end_p (gsi)
1269 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1270 gsi_remove (&gsi, true);
1273 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1274 if (exit_bb)
1276 gsi = gsi_last_nondebug_bb (exit_bb);
1277 gcc_assert (!gsi_end_p (gsi)
1278 && (gimple_code (gsi_stmt (gsi))
1279 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1280 stmt = gimple_build_return (NULL);
1281 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1282 gsi_remove (&gsi, true);
1285 /* Move the parallel region into CHILD_CFUN. */
1287 if (gimple_in_ssa_p (cfun))
1289 init_tree_ssa (child_cfun);
1290 init_ssa_operands (child_cfun);
1291 child_cfun->gimple_df->in_ssa_p = true;
1292 block = NULL_TREE;
1294 else
1295 block = gimple_block (entry_stmt);
1297 /* Make sure to generate early debug for the function before
1298 outlining anything. */
1299 if (! gimple_in_ssa_p (cfun))
1300 (*debug_hooks->early_global_decl) (cfun->decl);
1302 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1303 if (exit_bb)
1304 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1305 if (e2)
1307 basic_block dest_bb = e2->dest;
1308 if (!exit_bb)
1309 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1310 remove_edge (e2);
1311 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1313 /* When the OMP expansion process cannot guarantee an up-to-date
1314 loop tree arrange for the child function to fixup loops. */
1315 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1316 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1318 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1319 num = vec_safe_length (child_cfun->local_decls);
1320 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1322 t = (*child_cfun->local_decls)[srcidx];
1323 if (DECL_CONTEXT (t) == cfun->decl)
1324 continue;
1325 if (srcidx != dstidx)
1326 (*child_cfun->local_decls)[dstidx] = t;
1327 dstidx++;
1329 if (dstidx != num)
1330 vec_safe_truncate (child_cfun->local_decls, dstidx);
1332 /* Inform the callgraph about the new function. */
1333 child_cfun->curr_properties = cfun->curr_properties;
1334 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1335 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1336 cgraph_node *node = cgraph_node::get_create (child_fn);
1337 node->parallelized_function = 1;
1338 cgraph_node::add_new_function (child_fn, true);
1340 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1341 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1343 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1344 fixed in a following pass. */
1345 push_cfun (child_cfun);
1346 if (need_asm)
1347 assign_assembler_name_if_needed (child_fn);
1349 if (optimize)
1350 optimize_omp_library_calls (entry_stmt);
1351 update_max_bb_count ();
1352 cgraph_edge::rebuild_edges ();
1354 /* Some EH regions might become dead, see PR34608. If
1355 pass_cleanup_cfg isn't the first pass to happen with the
1356 new child, these dead EH edges might cause problems.
1357 Clean them up now. */
1358 if (flag_exceptions)
1360 basic_block bb;
1361 bool changed = false;
1363 FOR_EACH_BB_FN (bb, cfun)
1364 changed |= gimple_purge_dead_eh_edges (bb);
1365 if (changed)
1366 cleanup_tree_cfg ();
1368 if (gimple_in_ssa_p (cfun))
1369 update_ssa (TODO_update_ssa);
1370 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1371 verify_loop_structure ();
1372 pop_cfun ();
1374 if (dump_file && !gimple_in_ssa_p (cfun))
1376 omp_any_child_fn_dumped = true;
1377 dump_function_header (dump_file, child_fn, dump_flags);
1378 dump_function_to_file (child_fn, dump_file, dump_flags);
1382 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1383 expand_parallel_call (region, new_bb,
1384 as_a <gomp_parallel *> (entry_stmt), ws_args);
1385 else
1386 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1387 if (gimple_in_ssa_p (cfun))
1388 update_ssa (TODO_update_ssa_only_virtuals);
1391 /* Information about members of an OpenACC collapsed loop nest. */
1393 struct oacc_collapse
1395 tree base; /* Base value. */
1396 tree iters; /* Number of steps. */
1397 tree step; /* Step size. */
1398 tree tile; /* Tile increment (if tiled). */
1399 tree outer; /* Tile iterator var. */
1402 /* Helper for expand_oacc_for. Determine collapsed loop information.
1403 Fill in COUNTS array. Emit any initialization code before GSI.
1404 Return the calculated outer loop bound of BOUND_TYPE. */
1406 static tree
1407 expand_oacc_collapse_init (const struct omp_for_data *fd,
1408 gimple_stmt_iterator *gsi,
1409 oacc_collapse *counts, tree bound_type,
1410 location_t loc)
1412 tree tiling = fd->tiling;
1413 tree total = build_int_cst (bound_type, 1);
1414 int ix;
1416 gcc_assert (integer_onep (fd->loop.step));
1417 gcc_assert (integer_zerop (fd->loop.n1));
1419 /* When tiling, the first operand of the tile clause applies to the
1420 innermost loop, and we work outwards from there. Seems
1421 backwards, but whatever. */
1422 for (ix = fd->collapse; ix--;)
1424 const omp_for_data_loop *loop = &fd->loops[ix];
1426 tree iter_type = TREE_TYPE (loop->v);
1427 tree diff_type = iter_type;
1428 tree plus_type = iter_type;
1430 gcc_assert (loop->cond_code == fd->loop.cond_code);
1432 if (POINTER_TYPE_P (iter_type))
1433 plus_type = sizetype;
1434 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1435 diff_type = signed_type_for (diff_type);
1436 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1437 diff_type = integer_type_node;
1439 if (tiling)
1441 tree num = build_int_cst (integer_type_node, fd->collapse);
1442 tree loop_no = build_int_cst (integer_type_node, ix);
1443 tree tile = TREE_VALUE (tiling);
1444 gcall *call
1445 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1446 /* gwv-outer=*/integer_zero_node,
1447 /* gwv-inner=*/integer_zero_node);
1449 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1450 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1451 gimple_call_set_lhs (call, counts[ix].tile);
1452 gimple_set_location (call, loc);
1453 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1455 tiling = TREE_CHAIN (tiling);
1457 else
1459 counts[ix].tile = NULL;
1460 counts[ix].outer = loop->v;
1463 tree b = loop->n1;
1464 tree e = loop->n2;
1465 tree s = loop->step;
1466 bool up = loop->cond_code == LT_EXPR;
1467 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1468 bool negating;
1469 tree expr;
1471 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1472 true, GSI_SAME_STMT);
1473 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1474 true, GSI_SAME_STMT);
1476 /* Convert the step, avoiding possible unsigned->signed overflow. */
1477 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1478 if (negating)
1479 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1480 s = fold_convert (diff_type, s);
1481 if (negating)
1482 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1483 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1484 true, GSI_SAME_STMT);
1486 /* Determine the range, avoiding possible unsigned->signed overflow. */
1487 negating = !up && TYPE_UNSIGNED (iter_type);
1488 expr = fold_build2 (MINUS_EXPR, plus_type,
1489 fold_convert (plus_type, negating ? b : e),
1490 fold_convert (plus_type, negating ? e : b));
1491 expr = fold_convert (diff_type, expr);
1492 if (negating)
1493 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1494 tree range = force_gimple_operand_gsi
1495 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1497 /* Determine number of iterations. */
1498 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1499 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1500 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1502 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1503 true, GSI_SAME_STMT);
1505 counts[ix].base = b;
1506 counts[ix].iters = iters;
1507 counts[ix].step = s;
1509 total = fold_build2 (MULT_EXPR, bound_type, total,
1510 fold_convert (bound_type, iters));
1513 return total;
1516 /* Emit initializers for collapsed loop members. INNER is true if
1517 this is for the element loop of a TILE. IVAR is the outer
1518 loop iteration variable, from which collapsed loop iteration values
1519 are calculated. COUNTS array has been initialized by
1520 expand_oacc_collapse_inits. */
1522 static void
1523 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1524 gimple_stmt_iterator *gsi,
1525 const oacc_collapse *counts, tree ivar)
1527 tree ivar_type = TREE_TYPE (ivar);
1529 /* The most rapidly changing iteration variable is the innermost
1530 one. */
1531 for (int ix = fd->collapse; ix--;)
1533 const omp_for_data_loop *loop = &fd->loops[ix];
1534 const oacc_collapse *collapse = &counts[ix];
1535 tree v = inner ? loop->v : collapse->outer;
1536 tree iter_type = TREE_TYPE (v);
1537 tree diff_type = TREE_TYPE (collapse->step);
1538 tree plus_type = iter_type;
1539 enum tree_code plus_code = PLUS_EXPR;
1540 tree expr;
1542 if (POINTER_TYPE_P (iter_type))
1544 plus_code = POINTER_PLUS_EXPR;
1545 plus_type = sizetype;
1548 expr = ivar;
1549 if (ix)
1551 tree mod = fold_convert (ivar_type, collapse->iters);
1552 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1553 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1554 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1555 true, GSI_SAME_STMT);
1558 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1559 collapse->step);
1560 expr = fold_build2 (plus_code, iter_type,
1561 inner ? collapse->outer : collapse->base,
1562 fold_convert (plus_type, expr));
1563 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1564 true, GSI_SAME_STMT);
1565 gassign *ass = gimple_build_assign (v, expr);
1566 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1570 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1571 of the combined collapse > 1 loop constructs, generate code like:
1572 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1573 if (cond3 is <)
1574 adj = STEP3 - 1;
1575 else
1576 adj = STEP3 + 1;
1577 count3 = (adj + N32 - N31) / STEP3;
1578 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1579 if (cond2 is <)
1580 adj = STEP2 - 1;
1581 else
1582 adj = STEP2 + 1;
1583 count2 = (adj + N22 - N21) / STEP2;
1584 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1585 if (cond1 is <)
1586 adj = STEP1 - 1;
1587 else
1588 adj = STEP1 + 1;
1589 count1 = (adj + N12 - N11) / STEP1;
1590 count = count1 * count2 * count3;
1591 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1592 count = 0;
1593 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1594 of the combined loop constructs, just initialize COUNTS array
1595 from the _looptemp_ clauses. */
1597 /* NOTE: It *could* be better to moosh all of the BBs together,
1598 creating one larger BB with all the computation and the unexpected
1599 jump at the end. I.e.
1601 bool zero3, zero2, zero1, zero;
1603 zero3 = N32 c3 N31;
1604 count3 = (N32 - N31) /[cl] STEP3;
1605 zero2 = N22 c2 N21;
1606 count2 = (N22 - N21) /[cl] STEP2;
1607 zero1 = N12 c1 N11;
1608 count1 = (N12 - N11) /[cl] STEP1;
1609 zero = zero3 || zero2 || zero1;
1610 count = count1 * count2 * count3;
1611 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1613 After all, we expect the zero=false, and thus we expect to have to
1614 evaluate all of the comparison expressions, so short-circuiting
1615 oughtn't be a win. Since the condition isn't protecting a
1616 denominator, we're not concerned about divide-by-zero, so we can
1617 fully evaluate count even if a numerator turned out to be wrong.
1619 It seems like putting this all together would create much better
1620 scheduling opportunities, and less pressure on the chip's branch
1621 predictor. */
1623 static void
1624 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1625 basic_block &entry_bb, tree *counts,
1626 basic_block &zero_iter1_bb, int &first_zero_iter1,
1627 basic_block &zero_iter2_bb, int &first_zero_iter2,
1628 basic_block &l2_dom_bb)
1630 tree t, type = TREE_TYPE (fd->loop.v);
1631 edge e, ne;
1632 int i;
1634 /* Collapsed loops need work for expansion into SSA form. */
1635 gcc_assert (!gimple_in_ssa_p (cfun));
1637 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1638 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1640 gcc_assert (fd->ordered == 0);
1641 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1642 isn't supposed to be handled, as the inner loop doesn't
1643 use it. */
1644 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1645 OMP_CLAUSE__LOOPTEMP_);
1646 gcc_assert (innerc);
1647 for (i = 0; i < fd->collapse; i++)
1649 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1650 OMP_CLAUSE__LOOPTEMP_);
1651 gcc_assert (innerc);
1652 if (i)
1653 counts[i] = OMP_CLAUSE_DECL (innerc);
1654 else
1655 counts[0] = NULL_TREE;
1657 return;
1660 for (i = fd->collapse; i < fd->ordered; i++)
1662 tree itype = TREE_TYPE (fd->loops[i].v);
1663 counts[i] = NULL_TREE;
1664 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1665 fold_convert (itype, fd->loops[i].n1),
1666 fold_convert (itype, fd->loops[i].n2));
1667 if (t && integer_zerop (t))
1669 for (i = fd->collapse; i < fd->ordered; i++)
1670 counts[i] = build_int_cst (type, 0);
1671 break;
1674 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1676 tree itype = TREE_TYPE (fd->loops[i].v);
1678 if (i >= fd->collapse && counts[i])
1679 continue;
1680 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1681 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1682 fold_convert (itype, fd->loops[i].n1),
1683 fold_convert (itype, fd->loops[i].n2)))
1684 == NULL_TREE || !integer_onep (t)))
1686 gcond *cond_stmt;
1687 tree n1, n2;
1688 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1689 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1690 true, GSI_SAME_STMT);
1691 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1692 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1693 true, GSI_SAME_STMT);
1694 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1695 NULL_TREE, NULL_TREE);
1696 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1697 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1698 expand_omp_regimplify_p, NULL, NULL)
1699 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1700 expand_omp_regimplify_p, NULL, NULL))
1702 *gsi = gsi_for_stmt (cond_stmt);
1703 gimple_regimplify_operands (cond_stmt, gsi);
1705 e = split_block (entry_bb, cond_stmt);
1706 basic_block &zero_iter_bb
1707 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1708 int &first_zero_iter
1709 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1710 if (zero_iter_bb == NULL)
1712 gassign *assign_stmt;
1713 first_zero_iter = i;
1714 zero_iter_bb = create_empty_bb (entry_bb);
1715 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1716 *gsi = gsi_after_labels (zero_iter_bb);
1717 if (i < fd->collapse)
1718 assign_stmt = gimple_build_assign (fd->loop.n2,
1719 build_zero_cst (type));
1720 else
1722 counts[i] = create_tmp_reg (type, ".count");
1723 assign_stmt
1724 = gimple_build_assign (counts[i], build_zero_cst (type));
1726 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1727 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1728 entry_bb);
1730 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1731 ne->probability = profile_probability::very_unlikely ();
1732 e->flags = EDGE_TRUE_VALUE;
1733 e->probability = ne->probability.invert ();
1734 if (l2_dom_bb == NULL)
1735 l2_dom_bb = entry_bb;
1736 entry_bb = e->dest;
1737 *gsi = gsi_last_nondebug_bb (entry_bb);
1740 if (POINTER_TYPE_P (itype))
1741 itype = signed_type_for (itype);
1742 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1743 ? -1 : 1));
1744 t = fold_build2 (PLUS_EXPR, itype,
1745 fold_convert (itype, fd->loops[i].step), t);
1746 t = fold_build2 (PLUS_EXPR, itype, t,
1747 fold_convert (itype, fd->loops[i].n2));
1748 t = fold_build2 (MINUS_EXPR, itype, t,
1749 fold_convert (itype, fd->loops[i].n1));
1750 /* ?? We could probably use CEIL_DIV_EXPR instead of
1751 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1752 generate the same code in the end because generically we
1753 don't know that the values involved must be negative for
1754 GT?? */
1755 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1756 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1757 fold_build1 (NEGATE_EXPR, itype, t),
1758 fold_build1 (NEGATE_EXPR, itype,
1759 fold_convert (itype,
1760 fd->loops[i].step)));
1761 else
1762 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1763 fold_convert (itype, fd->loops[i].step));
1764 t = fold_convert (type, t);
1765 if (TREE_CODE (t) == INTEGER_CST)
1766 counts[i] = t;
1767 else
1769 if (i < fd->collapse || i != first_zero_iter2)
1770 counts[i] = create_tmp_reg (type, ".count");
1771 expand_omp_build_assign (gsi, counts[i], t);
1773 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1775 if (i == 0)
1776 t = counts[0];
1777 else
1778 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1779 expand_omp_build_assign (gsi, fd->loop.n2, t);
1784 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1785 T = V;
1786 V3 = N31 + (T % count3) * STEP3;
1787 T = T / count3;
1788 V2 = N21 + (T % count2) * STEP2;
1789 T = T / count2;
1790 V1 = N11 + T * STEP1;
1791 if this loop doesn't have an inner loop construct combined with it.
1792 If it does have an inner loop construct combined with it and the
1793 iteration count isn't known constant, store values from counts array
1794 into its _looptemp_ temporaries instead. */
1796 static void
1797 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1798 tree *counts, gimple *inner_stmt, tree startvar)
1800 int i;
1801 if (gimple_omp_for_combined_p (fd->for_stmt))
1803 /* If fd->loop.n2 is constant, then no propagation of the counts
1804 is needed, they are constant. */
1805 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1806 return;
1808 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1809 ? gimple_omp_taskreg_clauses (inner_stmt)
1810 : gimple_omp_for_clauses (inner_stmt);
1811 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1812 isn't supposed to be handled, as the inner loop doesn't
1813 use it. */
1814 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1815 gcc_assert (innerc);
1816 for (i = 0; i < fd->collapse; i++)
1818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1819 OMP_CLAUSE__LOOPTEMP_);
1820 gcc_assert (innerc);
1821 if (i)
1823 tree tem = OMP_CLAUSE_DECL (innerc);
1824 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1825 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1826 false, GSI_CONTINUE_LINKING);
1827 gassign *stmt = gimple_build_assign (tem, t);
1828 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1831 return;
1834 tree type = TREE_TYPE (fd->loop.v);
1835 tree tem = create_tmp_reg (type, ".tem");
1836 gassign *stmt = gimple_build_assign (tem, startvar);
1837 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1839 for (i = fd->collapse - 1; i >= 0; i--)
1841 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1842 itype = vtype;
1843 if (POINTER_TYPE_P (vtype))
1844 itype = signed_type_for (vtype);
1845 if (i != 0)
1846 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1847 else
1848 t = tem;
1849 t = fold_convert (itype, t);
1850 t = fold_build2 (MULT_EXPR, itype, t,
1851 fold_convert (itype, fd->loops[i].step));
1852 if (POINTER_TYPE_P (vtype))
1853 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1854 else
1855 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1856 t = force_gimple_operand_gsi (gsi, t,
1857 DECL_P (fd->loops[i].v)
1858 && TREE_ADDRESSABLE (fd->loops[i].v),
1859 NULL_TREE, false,
1860 GSI_CONTINUE_LINKING);
1861 stmt = gimple_build_assign (fd->loops[i].v, t);
1862 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1863 if (i != 0)
1865 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1866 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1867 false, GSI_CONTINUE_LINKING);
1868 stmt = gimple_build_assign (tem, t);
1869 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1874 /* Helper function for expand_omp_for_*. Generate code like:
1875 L10:
1876 V3 += STEP3;
1877 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1878 L11:
1879 V3 = N31;
1880 V2 += STEP2;
1881 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1882 L12:
1883 V2 = N21;
1884 V1 += STEP1;
1885 goto BODY_BB; */
1887 static basic_block
1888 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1889 basic_block body_bb)
1891 basic_block last_bb, bb, collapse_bb = NULL;
1892 int i;
1893 gimple_stmt_iterator gsi;
1894 edge e;
1895 tree t;
1896 gimple *stmt;
1898 last_bb = cont_bb;
1899 for (i = fd->collapse - 1; i >= 0; i--)
1901 tree vtype = TREE_TYPE (fd->loops[i].v);
1903 bb = create_empty_bb (last_bb);
1904 add_bb_to_loop (bb, last_bb->loop_father);
1905 gsi = gsi_start_bb (bb);
1907 if (i < fd->collapse - 1)
1909 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1910 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1912 t = fd->loops[i + 1].n1;
1913 t = force_gimple_operand_gsi (&gsi, t,
1914 DECL_P (fd->loops[i + 1].v)
1915 && TREE_ADDRESSABLE (fd->loops[i
1916 + 1].v),
1917 NULL_TREE, false,
1918 GSI_CONTINUE_LINKING);
1919 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1920 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1922 else
1923 collapse_bb = bb;
1925 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1927 if (POINTER_TYPE_P (vtype))
1928 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1929 else
1930 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1931 t = force_gimple_operand_gsi (&gsi, t,
1932 DECL_P (fd->loops[i].v)
1933 && TREE_ADDRESSABLE (fd->loops[i].v),
1934 NULL_TREE, false, GSI_CONTINUE_LINKING);
1935 stmt = gimple_build_assign (fd->loops[i].v, t);
1936 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1938 if (i > 0)
1940 t = fd->loops[i].n2;
1941 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1942 false, GSI_CONTINUE_LINKING);
1943 tree v = fd->loops[i].v;
1944 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1945 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1946 false, GSI_CONTINUE_LINKING);
1947 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1948 stmt = gimple_build_cond_empty (t);
1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1951 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1953 else
1954 make_edge (bb, body_bb, EDGE_FALLTHRU);
1955 last_bb = bb;
1958 return collapse_bb;
1961 /* Expand #pragma omp ordered depend(source). */
1963 static void
1964 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1965 tree *counts, location_t loc)
1967 enum built_in_function source_ix
1968 = fd->iter_type == long_integer_type_node
1969 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1970 gimple *g
1971 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1972 build_fold_addr_expr (counts[fd->ordered]));
1973 gimple_set_location (g, loc);
1974 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1977 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1979 static void
1980 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1981 tree *counts, tree c, location_t loc)
1983 auto_vec<tree, 10> args;
1984 enum built_in_function sink_ix
1985 = fd->iter_type == long_integer_type_node
1986 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1987 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1988 int i;
1989 gimple_stmt_iterator gsi2 = *gsi;
1990 bool warned_step = false;
1992 for (i = 0; i < fd->ordered; i++)
1994 tree step = NULL_TREE;
1995 off = TREE_PURPOSE (deps);
1996 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1998 step = TREE_OPERAND (off, 1);
1999 off = TREE_OPERAND (off, 0);
2001 if (!integer_zerop (off))
2003 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2004 || fd->loops[i].cond_code == GT_EXPR);
2005 bool forward = fd->loops[i].cond_code == LT_EXPR;
2006 if (step)
2008 /* Non-simple Fortran DO loops. If step is variable,
2009 we don't know at compile even the direction, so can't
2010 warn. */
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 break;
2013 forward = tree_int_cst_sgn (step) != -1;
2015 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2016 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2017 "lexically later iteration");
2018 break;
2020 deps = TREE_CHAIN (deps);
2022 /* If all offsets corresponding to the collapsed loops are zero,
2023 this depend clause can be ignored. FIXME: but there is still a
2024 flush needed. We need to emit one __sync_synchronize () for it
2025 though (perhaps conditionally)? Solve this together with the
2026 conservative dependence folding optimization.
2027 if (i >= fd->collapse)
2028 return; */
2030 deps = OMP_CLAUSE_DECL (c);
2031 gsi_prev (&gsi2);
2032 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2033 edge e2 = split_block_after_labels (e1->dest);
2035 gsi2 = gsi_after_labels (e1->dest);
2036 *gsi = gsi_last_bb (e1->src);
2037 for (i = 0; i < fd->ordered; i++)
2039 tree itype = TREE_TYPE (fd->loops[i].v);
2040 tree step = NULL_TREE;
2041 tree orig_off = NULL_TREE;
2042 if (POINTER_TYPE_P (itype))
2043 itype = sizetype;
2044 if (i)
2045 deps = TREE_CHAIN (deps);
2046 off = TREE_PURPOSE (deps);
2047 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2049 step = TREE_OPERAND (off, 1);
2050 off = TREE_OPERAND (off, 0);
2051 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2052 && integer_onep (fd->loops[i].step)
2053 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2055 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2056 if (step)
2058 off = fold_convert_loc (loc, itype, off);
2059 orig_off = off;
2060 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2063 if (integer_zerop (off))
2064 t = boolean_true_node;
2065 else
2067 tree a;
2068 tree co = fold_convert_loc (loc, itype, off);
2069 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2071 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2072 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2073 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2074 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2075 co);
2077 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2078 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2079 fd->loops[i].v, co);
2080 else
2081 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2082 fd->loops[i].v, co);
2083 if (step)
2085 tree t1, t2;
2086 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2088 fd->loops[i].n1);
2089 else
2090 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2091 fd->loops[i].n2);
2092 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2094 fd->loops[i].n2);
2095 else
2096 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2097 fd->loops[i].n1);
2098 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2099 step, build_int_cst (TREE_TYPE (step), 0));
2100 if (TREE_CODE (step) != INTEGER_CST)
2102 t1 = unshare_expr (t1);
2103 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2104 false, GSI_CONTINUE_LINKING);
2105 t2 = unshare_expr (t2);
2106 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2107 false, GSI_CONTINUE_LINKING);
2109 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2110 t, t2, t1);
2112 else if (fd->loops[i].cond_code == LT_EXPR)
2114 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2115 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2116 fd->loops[i].n1);
2117 else
2118 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2119 fd->loops[i].n2);
2121 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2122 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2123 fd->loops[i].n2);
2124 else
2125 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2126 fd->loops[i].n1);
2128 if (cond)
2129 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2130 else
2131 cond = t;
2133 off = fold_convert_loc (loc, itype, off);
2135 if (step
2136 || (fd->loops[i].cond_code == LT_EXPR
2137 ? !integer_onep (fd->loops[i].step)
2138 : !integer_minus_onep (fd->loops[i].step)))
2140 if (step == NULL_TREE
2141 && TYPE_UNSIGNED (itype)
2142 && fd->loops[i].cond_code == GT_EXPR)
2143 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2144 fold_build1_loc (loc, NEGATE_EXPR, itype,
2145 s));
2146 else
2147 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2148 orig_off ? orig_off : off, s);
2149 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2150 build_int_cst (itype, 0));
2151 if (integer_zerop (t) && !warned_step)
2153 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2154 "in the iteration space");
2155 warned_step = true;
2157 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2158 cond, t);
2161 if (i <= fd->collapse - 1 && fd->collapse > 1)
2162 t = fd->loop.v;
2163 else if (counts[i])
2164 t = counts[i];
2165 else
2167 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2168 fd->loops[i].v, fd->loops[i].n1);
2169 t = fold_convert_loc (loc, fd->iter_type, t);
2171 if (step)
2172 /* We have divided off by step already earlier. */;
2173 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2174 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2175 fold_build1_loc (loc, NEGATE_EXPR, itype,
2176 s));
2177 else
2178 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2179 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2180 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2181 off = fold_convert_loc (loc, fd->iter_type, off);
2182 if (i <= fd->collapse - 1 && fd->collapse > 1)
2184 if (i)
2185 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2186 off);
2187 if (i < fd->collapse - 1)
2189 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2190 counts[i]);
2191 continue;
2194 off = unshare_expr (off);
2195 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2196 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2197 true, GSI_SAME_STMT);
2198 args.safe_push (t);
2200 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2201 gimple_set_location (g, loc);
2202 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2204 cond = unshare_expr (cond);
2205 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2206 GSI_CONTINUE_LINKING);
2207 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2208 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2209 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2210 e1->probability = e3->probability.invert ();
2211 e1->flags = EDGE_TRUE_VALUE;
2212 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2214 *gsi = gsi_after_labels (e2->dest);
2217 /* Expand all #pragma omp ordered depend(source) and
2218 #pragma omp ordered depend(sink:...) constructs in the current
2219 #pragma omp for ordered(n) region. */
2221 static void
2222 expand_omp_ordered_source_sink (struct omp_region *region,
2223 struct omp_for_data *fd, tree *counts,
2224 basic_block cont_bb)
2226 struct omp_region *inner;
2227 int i;
2228 for (i = fd->collapse - 1; i < fd->ordered; i++)
2229 if (i == fd->collapse - 1 && fd->collapse > 1)
2230 counts[i] = NULL_TREE;
2231 else if (i >= fd->collapse && !cont_bb)
2232 counts[i] = build_zero_cst (fd->iter_type);
2233 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2234 && integer_onep (fd->loops[i].step))
2235 counts[i] = NULL_TREE;
2236 else
2237 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2238 tree atype
2239 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2240 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2241 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2243 for (inner = region->inner; inner; inner = inner->next)
2244 if (inner->type == GIMPLE_OMP_ORDERED)
2246 gomp_ordered *ord_stmt = inner->ord_stmt;
2247 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2248 location_t loc = gimple_location (ord_stmt);
2249 tree c;
2250 for (c = gimple_omp_ordered_clauses (ord_stmt);
2251 c; c = OMP_CLAUSE_CHAIN (c))
2252 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2253 break;
2254 if (c)
2255 expand_omp_ordered_source (&gsi, fd, counts, loc);
2256 for (c = gimple_omp_ordered_clauses (ord_stmt);
2257 c; c = OMP_CLAUSE_CHAIN (c))
2258 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2259 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2260 gsi_remove (&gsi, true);
2264 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2265 collapsed. */
2267 static basic_block
2268 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2269 basic_block cont_bb, basic_block body_bb,
2270 bool ordered_lastprivate)
2272 if (fd->ordered == fd->collapse)
2273 return cont_bb;
2275 if (!cont_bb)
2277 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2278 for (int i = fd->collapse; i < fd->ordered; i++)
2280 tree type = TREE_TYPE (fd->loops[i].v);
2281 tree n1 = fold_convert (type, fd->loops[i].n1);
2282 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2283 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2284 size_int (i - fd->collapse + 1),
2285 NULL_TREE, NULL_TREE);
2286 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2288 return NULL;
2291 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2293 tree t, type = TREE_TYPE (fd->loops[i].v);
2294 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2295 expand_omp_build_assign (&gsi, fd->loops[i].v,
2296 fold_convert (type, fd->loops[i].n1));
2297 if (counts[i])
2298 expand_omp_build_assign (&gsi, counts[i],
2299 build_zero_cst (fd->iter_type));
2300 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2301 size_int (i - fd->collapse + 1),
2302 NULL_TREE, NULL_TREE);
2303 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2304 if (!gsi_end_p (gsi))
2305 gsi_prev (&gsi);
2306 else
2307 gsi = gsi_last_bb (body_bb);
2308 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2309 basic_block new_body = e1->dest;
2310 if (body_bb == cont_bb)
2311 cont_bb = new_body;
2312 edge e2 = NULL;
2313 basic_block new_header;
2314 if (EDGE_COUNT (cont_bb->preds) > 0)
2316 gsi = gsi_last_bb (cont_bb);
2317 if (POINTER_TYPE_P (type))
2318 t = fold_build_pointer_plus (fd->loops[i].v,
2319 fold_convert (sizetype,
2320 fd->loops[i].step));
2321 else
2322 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2323 fold_convert (type, fd->loops[i].step));
2324 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2325 if (counts[i])
2327 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2328 build_int_cst (fd->iter_type, 1));
2329 expand_omp_build_assign (&gsi, counts[i], t);
2330 t = counts[i];
2332 else
2334 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2335 fd->loops[i].v, fd->loops[i].n1);
2336 t = fold_convert (fd->iter_type, t);
2337 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2338 true, GSI_SAME_STMT);
2340 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2341 size_int (i - fd->collapse + 1),
2342 NULL_TREE, NULL_TREE);
2343 expand_omp_build_assign (&gsi, aref, t);
2344 gsi_prev (&gsi);
2345 e2 = split_block (cont_bb, gsi_stmt (gsi));
2346 new_header = e2->dest;
2348 else
2349 new_header = cont_bb;
2350 gsi = gsi_after_labels (new_header);
2351 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2352 true, GSI_SAME_STMT);
2353 tree n2
2354 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2355 true, NULL_TREE, true, GSI_SAME_STMT);
2356 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2357 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2358 edge e3 = split_block (new_header, gsi_stmt (gsi));
2359 cont_bb = e3->dest;
2360 remove_edge (e1);
2361 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2362 e3->flags = EDGE_FALSE_VALUE;
2363 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2364 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2365 e1->probability = e3->probability.invert ();
2367 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2368 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2370 if (e2)
2372 struct loop *loop = alloc_loop ();
2373 loop->header = new_header;
2374 loop->latch = e2->src;
2375 add_loop (loop, body_bb->loop_father);
2379 /* If there are any lastprivate clauses and it is possible some loops
2380 might have zero iterations, ensure all the decls are initialized,
2381 otherwise we could crash evaluating C++ class iterators with lastprivate
2382 clauses. */
2383 bool need_inits = false;
2384 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2385 if (need_inits)
2387 tree type = TREE_TYPE (fd->loops[i].v);
2388 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2389 expand_omp_build_assign (&gsi, fd->loops[i].v,
2390 fold_convert (type, fd->loops[i].n1));
2392 else
2394 tree type = TREE_TYPE (fd->loops[i].v);
2395 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2396 boolean_type_node,
2397 fold_convert (type, fd->loops[i].n1),
2398 fold_convert (type, fd->loops[i].n2));
2399 if (!integer_onep (this_cond))
2400 need_inits = true;
2403 return cont_bb;
2406 /* A subroutine of expand_omp_for. Generate code for a parallel
2407 loop with any schedule. Given parameters:
2409 for (V = N1; V cond N2; V += STEP) BODY;
2411 where COND is "<" or ">", we generate pseudocode
2413 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2414 if (more) goto L0; else goto L3;
2416 V = istart0;
2417 iend = iend0;
2419 BODY;
2420 V += STEP;
2421 if (V cond iend) goto L1; else goto L2;
2423 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2426 If this is a combined omp parallel loop, instead of the call to
2427 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2428 If this is gimple_omp_for_combined_p loop, then instead of assigning
2429 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2430 inner GIMPLE_OMP_FOR and V += STEP; and
2431 if (V cond iend) goto L1; else goto L2; are removed.
2433 For collapsed loops, given parameters:
2434 collapse(3)
2435 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2436 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2437 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2438 BODY;
2440 we generate pseudocode
2442 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2443 if (cond3 is <)
2444 adj = STEP3 - 1;
2445 else
2446 adj = STEP3 + 1;
2447 count3 = (adj + N32 - N31) / STEP3;
2448 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2449 if (cond2 is <)
2450 adj = STEP2 - 1;
2451 else
2452 adj = STEP2 + 1;
2453 count2 = (adj + N22 - N21) / STEP2;
2454 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2455 if (cond1 is <)
2456 adj = STEP1 - 1;
2457 else
2458 adj = STEP1 + 1;
2459 count1 = (adj + N12 - N11) / STEP1;
2460 count = count1 * count2 * count3;
2461 goto Z1;
2463 count = 0;
2465 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2466 if (more) goto L0; else goto L3;
2468 V = istart0;
2469 T = V;
2470 V3 = N31 + (T % count3) * STEP3;
2471 T = T / count3;
2472 V2 = N21 + (T % count2) * STEP2;
2473 T = T / count2;
2474 V1 = N11 + T * STEP1;
2475 iend = iend0;
2477 BODY;
2478 V += 1;
2479 if (V < iend) goto L10; else goto L2;
2480 L10:
2481 V3 += STEP3;
2482 if (V3 cond3 N32) goto L1; else goto L11;
2483 L11:
2484 V3 = N31;
2485 V2 += STEP2;
2486 if (V2 cond2 N22) goto L1; else goto L12;
2487 L12:
2488 V2 = N21;
2489 V1 += STEP1;
2490 goto L1;
2492 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2497 static void
2498 expand_omp_for_generic (struct omp_region *region,
2499 struct omp_for_data *fd,
2500 enum built_in_function start_fn,
2501 enum built_in_function next_fn,
2502 gimple *inner_stmt)
2504 tree type, istart0, iend0, iend;
2505 tree t, vmain, vback, bias = NULL_TREE;
2506 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2507 basic_block l2_bb = NULL, l3_bb = NULL;
2508 gimple_stmt_iterator gsi;
2509 gassign *assign_stmt;
2510 bool in_combined_parallel = is_combined_parallel (region);
2511 bool broken_loop = region->cont == NULL;
2512 edge e, ne;
2513 tree *counts = NULL;
2514 int i;
2515 bool ordered_lastprivate = false;
2517 gcc_assert (!broken_loop || !in_combined_parallel);
2518 gcc_assert (fd->iter_type == long_integer_type_node
2519 || !in_combined_parallel);
2521 entry_bb = region->entry;
2522 cont_bb = region->cont;
2523 collapse_bb = NULL;
2524 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2525 gcc_assert (broken_loop
2526 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2527 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2528 l1_bb = single_succ (l0_bb);
2529 if (!broken_loop)
2531 l2_bb = create_empty_bb (cont_bb);
2532 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2533 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2534 == l1_bb));
2535 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2537 else
2538 l2_bb = NULL;
2539 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2540 exit_bb = region->exit;
2542 gsi = gsi_last_nondebug_bb (entry_bb);
2544 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2545 if (fd->ordered
2546 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2547 OMP_CLAUSE_LASTPRIVATE))
2548 ordered_lastprivate = false;
2549 if (fd->collapse > 1 || fd->ordered)
2551 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2552 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2554 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2555 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2556 zero_iter1_bb, first_zero_iter1,
2557 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2559 if (zero_iter1_bb)
2561 /* Some counts[i] vars might be uninitialized if
2562 some loop has zero iterations. But the body shouldn't
2563 be executed in that case, so just avoid uninit warnings. */
2564 for (i = first_zero_iter1;
2565 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2566 if (SSA_VAR_P (counts[i]))
2567 TREE_NO_WARNING (counts[i]) = 1;
2568 gsi_prev (&gsi);
2569 e = split_block (entry_bb, gsi_stmt (gsi));
2570 entry_bb = e->dest;
2571 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2572 gsi = gsi_last_nondebug_bb (entry_bb);
2573 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2574 get_immediate_dominator (CDI_DOMINATORS,
2575 zero_iter1_bb));
2577 if (zero_iter2_bb)
2579 /* Some counts[i] vars might be uninitialized if
2580 some loop has zero iterations. But the body shouldn't
2581 be executed in that case, so just avoid uninit warnings. */
2582 for (i = first_zero_iter2; i < fd->ordered; i++)
2583 if (SSA_VAR_P (counts[i]))
2584 TREE_NO_WARNING (counts[i]) = 1;
2585 if (zero_iter1_bb)
2586 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2587 else
2589 gsi_prev (&gsi);
2590 e = split_block (entry_bb, gsi_stmt (gsi));
2591 entry_bb = e->dest;
2592 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2593 gsi = gsi_last_nondebug_bb (entry_bb);
2594 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2595 get_immediate_dominator
2596 (CDI_DOMINATORS, zero_iter2_bb));
2599 if (fd->collapse == 1)
2601 counts[0] = fd->loop.n2;
2602 fd->loop = fd->loops[0];
2606 type = TREE_TYPE (fd->loop.v);
2607 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2608 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2609 TREE_ADDRESSABLE (istart0) = 1;
2610 TREE_ADDRESSABLE (iend0) = 1;
2612 /* See if we need to bias by LLONG_MIN. */
2613 if (fd->iter_type == long_long_unsigned_type_node
2614 && TREE_CODE (type) == INTEGER_TYPE
2615 && !TYPE_UNSIGNED (type)
2616 && fd->ordered == 0)
2618 tree n1, n2;
2620 if (fd->loop.cond_code == LT_EXPR)
2622 n1 = fd->loop.n1;
2623 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2625 else
2627 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2628 n2 = fd->loop.n1;
2630 if (TREE_CODE (n1) != INTEGER_CST
2631 || TREE_CODE (n2) != INTEGER_CST
2632 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2633 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2636 gimple_stmt_iterator gsif = gsi;
2637 gsi_prev (&gsif);
2639 tree arr = NULL_TREE;
2640 if (in_combined_parallel)
2642 gcc_assert (fd->ordered == 0);
2643 /* In a combined parallel loop, emit a call to
2644 GOMP_loop_foo_next. */
2645 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2646 build_fold_addr_expr (istart0),
2647 build_fold_addr_expr (iend0));
2649 else
2651 tree t0, t1, t2, t3, t4;
2652 /* If this is not a combined parallel loop, emit a call to
2653 GOMP_loop_foo_start in ENTRY_BB. */
2654 t4 = build_fold_addr_expr (iend0);
2655 t3 = build_fold_addr_expr (istart0);
2656 if (fd->ordered)
2658 t0 = build_int_cst (unsigned_type_node,
2659 fd->ordered - fd->collapse + 1);
2660 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2661 fd->ordered
2662 - fd->collapse + 1),
2663 ".omp_counts");
2664 DECL_NAMELESS (arr) = 1;
2665 TREE_ADDRESSABLE (arr) = 1;
2666 TREE_STATIC (arr) = 1;
2667 vec<constructor_elt, va_gc> *v;
2668 vec_alloc (v, fd->ordered - fd->collapse + 1);
2669 int idx;
2671 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2673 tree c;
2674 if (idx == 0 && fd->collapse > 1)
2675 c = fd->loop.n2;
2676 else
2677 c = counts[idx + fd->collapse - 1];
2678 tree purpose = size_int (idx);
2679 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2680 if (TREE_CODE (c) != INTEGER_CST)
2681 TREE_STATIC (arr) = 0;
2684 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2685 if (!TREE_STATIC (arr))
2686 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2687 void_type_node, arr),
2688 true, NULL_TREE, true, GSI_SAME_STMT);
2689 t1 = build_fold_addr_expr (arr);
2690 t2 = NULL_TREE;
2692 else
2694 t2 = fold_convert (fd->iter_type, fd->loop.step);
2695 t1 = fd->loop.n2;
2696 t0 = fd->loop.n1;
2697 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2699 tree innerc
2700 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2701 OMP_CLAUSE__LOOPTEMP_);
2702 gcc_assert (innerc);
2703 t0 = OMP_CLAUSE_DECL (innerc);
2704 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2705 OMP_CLAUSE__LOOPTEMP_);
2706 gcc_assert (innerc);
2707 t1 = OMP_CLAUSE_DECL (innerc);
2709 if (POINTER_TYPE_P (TREE_TYPE (t0))
2710 && TYPE_PRECISION (TREE_TYPE (t0))
2711 != TYPE_PRECISION (fd->iter_type))
2713 /* Avoid casting pointers to integer of a different size. */
2714 tree itype = signed_type_for (type);
2715 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2716 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2718 else
2720 t1 = fold_convert (fd->iter_type, t1);
2721 t0 = fold_convert (fd->iter_type, t0);
2723 if (bias)
2725 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2726 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2729 if (fd->iter_type == long_integer_type_node || fd->ordered)
2731 if (fd->chunk_size)
2733 t = fold_convert (fd->iter_type, fd->chunk_size);
2734 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2735 if (fd->ordered)
2736 t = build_call_expr (builtin_decl_explicit (start_fn),
2737 5, t0, t1, t, t3, t4);
2738 else
2739 t = build_call_expr (builtin_decl_explicit (start_fn),
2740 6, t0, t1, t2, t, t3, t4);
2742 else if (fd->ordered)
2743 t = build_call_expr (builtin_decl_explicit (start_fn),
2744 4, t0, t1, t3, t4);
2745 else
2746 t = build_call_expr (builtin_decl_explicit (start_fn),
2747 5, t0, t1, t2, t3, t4);
2749 else
2751 tree t5;
2752 tree c_bool_type;
2753 tree bfn_decl;
2755 /* The GOMP_loop_ull_*start functions have additional boolean
2756 argument, true for < loops and false for > loops.
2757 In Fortran, the C bool type can be different from
2758 boolean_type_node. */
2759 bfn_decl = builtin_decl_explicit (start_fn);
2760 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2761 t5 = build_int_cst (c_bool_type,
2762 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2763 if (fd->chunk_size)
2765 tree bfn_decl = builtin_decl_explicit (start_fn);
2766 t = fold_convert (fd->iter_type, fd->chunk_size);
2767 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2768 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2770 else
2771 t = build_call_expr (builtin_decl_explicit (start_fn),
2772 6, t5, t0, t1, t2, t3, t4);
2775 if (TREE_TYPE (t) != boolean_type_node)
2776 t = fold_build2 (NE_EXPR, boolean_type_node,
2777 t, build_int_cst (TREE_TYPE (t), 0));
2778 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2779 true, GSI_SAME_STMT);
2780 if (arr && !TREE_STATIC (arr))
2782 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2783 TREE_THIS_VOLATILE (clobber) = 1;
2784 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2785 GSI_SAME_STMT);
2787 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2789 /* Remove the GIMPLE_OMP_FOR statement. */
2790 gsi_remove (&gsi, true);
2792 if (gsi_end_p (gsif))
2793 gsif = gsi_after_labels (gsi_bb (gsif));
2794 gsi_next (&gsif);
2796 /* Iteration setup for sequential loop goes in L0_BB. */
2797 tree startvar = fd->loop.v;
2798 tree endvar = NULL_TREE;
2800 if (gimple_omp_for_combined_p (fd->for_stmt))
2802 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2803 && gimple_omp_for_kind (inner_stmt)
2804 == GF_OMP_FOR_KIND_SIMD);
2805 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2806 OMP_CLAUSE__LOOPTEMP_);
2807 gcc_assert (innerc);
2808 startvar = OMP_CLAUSE_DECL (innerc);
2809 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2810 OMP_CLAUSE__LOOPTEMP_);
2811 gcc_assert (innerc);
2812 endvar = OMP_CLAUSE_DECL (innerc);
2815 gsi = gsi_start_bb (l0_bb);
2816 t = istart0;
2817 if (fd->ordered && fd->collapse == 1)
2818 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2819 fold_convert (fd->iter_type, fd->loop.step));
2820 else if (bias)
2821 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2822 if (fd->ordered && fd->collapse == 1)
2824 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2825 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2826 fd->loop.n1, fold_convert (sizetype, t));
2827 else
2829 t = fold_convert (TREE_TYPE (startvar), t);
2830 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2831 fd->loop.n1, t);
2834 else
2836 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2837 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2838 t = fold_convert (TREE_TYPE (startvar), t);
2840 t = force_gimple_operand_gsi (&gsi, t,
2841 DECL_P (startvar)
2842 && TREE_ADDRESSABLE (startvar),
2843 NULL_TREE, false, GSI_CONTINUE_LINKING);
2844 assign_stmt = gimple_build_assign (startvar, t);
2845 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2847 t = iend0;
2848 if (fd->ordered && fd->collapse == 1)
2849 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2850 fold_convert (fd->iter_type, fd->loop.step));
2851 else if (bias)
2852 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2853 if (fd->ordered && fd->collapse == 1)
2855 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2856 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2857 fd->loop.n1, fold_convert (sizetype, t));
2858 else
2860 t = fold_convert (TREE_TYPE (startvar), t);
2861 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2862 fd->loop.n1, t);
2865 else
2867 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2868 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2869 t = fold_convert (TREE_TYPE (startvar), t);
2871 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2872 false, GSI_CONTINUE_LINKING);
2873 if (endvar)
2875 assign_stmt = gimple_build_assign (endvar, iend);
2876 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2877 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2878 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2879 else
2880 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2881 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2883 /* Handle linear clause adjustments. */
2884 tree itercnt = NULL_TREE;
2885 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2886 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2887 c; c = OMP_CLAUSE_CHAIN (c))
2888 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2889 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2891 tree d = OMP_CLAUSE_DECL (c);
2892 bool is_ref = omp_is_reference (d);
2893 tree t = d, a, dest;
2894 if (is_ref)
2895 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2896 tree type = TREE_TYPE (t);
2897 if (POINTER_TYPE_P (type))
2898 type = sizetype;
2899 dest = unshare_expr (t);
2900 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2901 expand_omp_build_assign (&gsif, v, t);
2902 if (itercnt == NULL_TREE)
2904 itercnt = startvar;
2905 tree n1 = fd->loop.n1;
2906 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2908 itercnt
2909 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2910 itercnt);
2911 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2913 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2914 itercnt, n1);
2915 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2916 itercnt, fd->loop.step);
2917 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2918 NULL_TREE, false,
2919 GSI_CONTINUE_LINKING);
2921 a = fold_build2 (MULT_EXPR, type,
2922 fold_convert (type, itercnt),
2923 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2924 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2925 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2926 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2927 false, GSI_CONTINUE_LINKING);
2928 assign_stmt = gimple_build_assign (dest, t);
2929 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2931 if (fd->collapse > 1)
2932 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2934 if (fd->ordered)
2936 /* Until now, counts array contained number of iterations or
2937 variable containing it for ith loop. From now on, we need
2938 those counts only for collapsed loops, and only for the 2nd
2939 till the last collapsed one. Move those one element earlier,
2940 we'll use counts[fd->collapse - 1] for the first source/sink
2941 iteration counter and so on and counts[fd->ordered]
2942 as the array holding the current counter values for
2943 depend(source). */
2944 if (fd->collapse > 1)
2945 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2946 if (broken_loop)
2948 int i;
2949 for (i = fd->collapse; i < fd->ordered; i++)
2951 tree type = TREE_TYPE (fd->loops[i].v);
2952 tree this_cond
2953 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2954 fold_convert (type, fd->loops[i].n1),
2955 fold_convert (type, fd->loops[i].n2));
2956 if (!integer_onep (this_cond))
2957 break;
2959 if (i < fd->ordered)
2961 cont_bb
2962 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2963 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2964 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2965 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2966 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2967 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2968 make_edge (cont_bb, l1_bb, 0);
2969 l2_bb = create_empty_bb (cont_bb);
2970 broken_loop = false;
2973 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2974 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2975 ordered_lastprivate);
2976 if (counts[fd->collapse - 1])
2978 gcc_assert (fd->collapse == 1);
2979 gsi = gsi_last_bb (l0_bb);
2980 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2981 istart0, true);
2982 gsi = gsi_last_bb (cont_bb);
2983 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2984 build_int_cst (fd->iter_type, 1));
2985 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2986 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2987 size_zero_node, NULL_TREE, NULL_TREE);
2988 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2989 t = counts[fd->collapse - 1];
2991 else if (fd->collapse > 1)
2992 t = fd->loop.v;
2993 else
2995 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2996 fd->loops[0].v, fd->loops[0].n1);
2997 t = fold_convert (fd->iter_type, t);
2999 gsi = gsi_last_bb (l0_bb);
3000 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3001 size_zero_node, NULL_TREE, NULL_TREE);
3002 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3003 false, GSI_CONTINUE_LINKING);
3004 expand_omp_build_assign (&gsi, aref, t, true);
3007 if (!broken_loop)
3009 /* Code to control the increment and predicate for the sequential
3010 loop goes in the CONT_BB. */
3011 gsi = gsi_last_nondebug_bb (cont_bb);
3012 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3013 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3014 vmain = gimple_omp_continue_control_use (cont_stmt);
3015 vback = gimple_omp_continue_control_def (cont_stmt);
3017 if (!gimple_omp_for_combined_p (fd->for_stmt))
3019 if (POINTER_TYPE_P (type))
3020 t = fold_build_pointer_plus (vmain, fd->loop.step);
3021 else
3022 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3023 t = force_gimple_operand_gsi (&gsi, t,
3024 DECL_P (vback)
3025 && TREE_ADDRESSABLE (vback),
3026 NULL_TREE, true, GSI_SAME_STMT);
3027 assign_stmt = gimple_build_assign (vback, t);
3028 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3030 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3032 if (fd->collapse > 1)
3033 t = fd->loop.v;
3034 else
3036 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3037 fd->loops[0].v, fd->loops[0].n1);
3038 t = fold_convert (fd->iter_type, t);
3040 tree aref = build4 (ARRAY_REF, fd->iter_type,
3041 counts[fd->ordered], size_zero_node,
3042 NULL_TREE, NULL_TREE);
3043 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3044 true, GSI_SAME_STMT);
3045 expand_omp_build_assign (&gsi, aref, t);
3048 t = build2 (fd->loop.cond_code, boolean_type_node,
3049 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3050 iend);
3051 gcond *cond_stmt = gimple_build_cond_empty (t);
3052 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3055 /* Remove GIMPLE_OMP_CONTINUE. */
3056 gsi_remove (&gsi, true);
3058 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3059 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3061 /* Emit code to get the next parallel iteration in L2_BB. */
3062 gsi = gsi_start_bb (l2_bb);
3064 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3065 build_fold_addr_expr (istart0),
3066 build_fold_addr_expr (iend0));
3067 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3068 false, GSI_CONTINUE_LINKING);
3069 if (TREE_TYPE (t) != boolean_type_node)
3070 t = fold_build2 (NE_EXPR, boolean_type_node,
3071 t, build_int_cst (TREE_TYPE (t), 0));
3072 gcond *cond_stmt = gimple_build_cond_empty (t);
3073 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3076 /* Add the loop cleanup function. */
3077 gsi = gsi_last_nondebug_bb (exit_bb);
3078 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3079 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3080 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3081 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3082 else
3083 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3084 gcall *call_stmt = gimple_build_call (t, 0);
3085 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3086 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3087 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3088 if (fd->ordered)
3090 tree arr = counts[fd->ordered];
3091 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3092 TREE_THIS_VOLATILE (clobber) = 1;
3093 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3094 GSI_SAME_STMT);
3096 gsi_remove (&gsi, true);
3098 /* Connect the new blocks. */
3099 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3100 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3102 if (!broken_loop)
3104 gimple_seq phis;
3106 e = find_edge (cont_bb, l3_bb);
3107 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3109 phis = phi_nodes (l3_bb);
3110 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3112 gimple *phi = gsi_stmt (gsi);
3113 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3114 PHI_ARG_DEF_FROM_EDGE (phi, e));
3116 remove_edge (e);
3118 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3119 e = find_edge (cont_bb, l1_bb);
3120 if (e == NULL)
3122 e = BRANCH_EDGE (cont_bb);
3123 gcc_assert (single_succ (e->dest) == l1_bb);
3125 if (gimple_omp_for_combined_p (fd->for_stmt))
3127 remove_edge (e);
3128 e = NULL;
3130 else if (fd->collapse > 1)
3132 remove_edge (e);
3133 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3135 else
3136 e->flags = EDGE_TRUE_VALUE;
3137 if (e)
3139 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3140 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3142 else
3144 e = find_edge (cont_bb, l2_bb);
3145 e->flags = EDGE_FALLTHRU;
3147 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3149 if (gimple_in_ssa_p (cfun))
3151 /* Add phis to the outer loop that connect to the phis in the inner,
3152 original loop, and move the loop entry value of the inner phi to
3153 the loop entry value of the outer phi. */
3154 gphi_iterator psi;
3155 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3157 source_location locus;
3158 gphi *nphi;
3159 gphi *exit_phi = psi.phi ();
3161 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3162 continue;
3164 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3165 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3167 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3168 edge latch_to_l1 = find_edge (latch, l1_bb);
3169 gphi *inner_phi
3170 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3172 tree t = gimple_phi_result (exit_phi);
3173 tree new_res = copy_ssa_name (t, NULL);
3174 nphi = create_phi_node (new_res, l0_bb);
3176 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3177 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3178 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3179 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3180 add_phi_arg (nphi, t, entry_to_l0, locus);
3182 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3183 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3185 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3189 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3190 recompute_dominator (CDI_DOMINATORS, l2_bb));
3191 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3192 recompute_dominator (CDI_DOMINATORS, l3_bb));
3193 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3194 recompute_dominator (CDI_DOMINATORS, l0_bb));
3195 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3196 recompute_dominator (CDI_DOMINATORS, l1_bb));
3198 /* We enter expand_omp_for_generic with a loop. This original loop may
3199 have its own loop struct, or it may be part of an outer loop struct
3200 (which may be the fake loop). */
3201 struct loop *outer_loop = entry_bb->loop_father;
3202 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3204 add_bb_to_loop (l2_bb, outer_loop);
3206 /* We've added a new loop around the original loop. Allocate the
3207 corresponding loop struct. */
3208 struct loop *new_loop = alloc_loop ();
3209 new_loop->header = l0_bb;
3210 new_loop->latch = l2_bb;
3211 add_loop (new_loop, outer_loop);
3213 /* Allocate a loop structure for the original loop unless we already
3214 had one. */
3215 if (!orig_loop_has_loop_struct
3216 && !gimple_omp_for_combined_p (fd->for_stmt))
3218 struct loop *orig_loop = alloc_loop ();
3219 orig_loop->header = l1_bb;
3220 /* The loop may have multiple latches. */
3221 add_loop (orig_loop, new_loop);
3226 /* A subroutine of expand_omp_for. Generate code for a parallel
3227 loop with static schedule and no specified chunk size. Given
3228 parameters:
3230 for (V = N1; V cond N2; V += STEP) BODY;
3232 where COND is "<" or ">", we generate pseudocode
3234 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3235 if (cond is <)
3236 adj = STEP - 1;
3237 else
3238 adj = STEP + 1;
3239 if ((__typeof (V)) -1 > 0 && cond is >)
3240 n = -(adj + N2 - N1) / -STEP;
3241 else
3242 n = (adj + N2 - N1) / STEP;
3243 q = n / nthreads;
3244 tt = n % nthreads;
3245 if (threadid < tt) goto L3; else goto L4;
3247 tt = 0;
3248 q = q + 1;
3250 s0 = q * threadid + tt;
3251 e0 = s0 + q;
3252 V = s0 * STEP + N1;
3253 if (s0 >= e0) goto L2; else goto L0;
3255 e = e0 * STEP + N1;
3257 BODY;
3258 V += STEP;
3259 if (V cond e) goto L1;
3263 static void
3264 expand_omp_for_static_nochunk (struct omp_region *region,
3265 struct omp_for_data *fd,
3266 gimple *inner_stmt)
3268 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3269 tree type, itype, vmain, vback;
3270 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3271 basic_block body_bb, cont_bb, collapse_bb = NULL;
3272 basic_block fin_bb;
3273 gimple_stmt_iterator gsi;
3274 edge ep;
3275 bool broken_loop = region->cont == NULL;
3276 tree *counts = NULL;
3277 tree n1, n2, step;
3279 itype = type = TREE_TYPE (fd->loop.v);
3280 if (POINTER_TYPE_P (type))
3281 itype = signed_type_for (type);
3283 entry_bb = region->entry;
3284 cont_bb = region->cont;
3285 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3286 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3287 gcc_assert (broken_loop
3288 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3289 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3290 body_bb = single_succ (seq_start_bb);
3291 if (!broken_loop)
3293 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3294 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3295 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3297 exit_bb = region->exit;
3299 /* Iteration space partitioning goes in ENTRY_BB. */
3300 gsi = gsi_last_nondebug_bb (entry_bb);
3301 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3303 if (fd->collapse > 1)
3305 int first_zero_iter = -1, dummy = -1;
3306 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3308 counts = XALLOCAVEC (tree, fd->collapse);
3309 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3310 fin_bb, first_zero_iter,
3311 dummy_bb, dummy, l2_dom_bb);
3312 t = NULL_TREE;
3314 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3315 t = integer_one_node;
3316 else
3317 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3318 fold_convert (type, fd->loop.n1),
3319 fold_convert (type, fd->loop.n2));
3320 if (fd->collapse == 1
3321 && TYPE_UNSIGNED (type)
3322 && (t == NULL_TREE || !integer_onep (t)))
3324 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3325 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3326 true, GSI_SAME_STMT);
3327 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3328 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3329 true, GSI_SAME_STMT);
3330 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3331 NULL_TREE, NULL_TREE);
3332 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3333 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3334 expand_omp_regimplify_p, NULL, NULL)
3335 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3336 expand_omp_regimplify_p, NULL, NULL))
3338 gsi = gsi_for_stmt (cond_stmt);
3339 gimple_regimplify_operands (cond_stmt, &gsi);
3341 ep = split_block (entry_bb, cond_stmt);
3342 ep->flags = EDGE_TRUE_VALUE;
3343 entry_bb = ep->dest;
3344 ep->probability = profile_probability::very_likely ();
3345 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3346 ep->probability = profile_probability::very_unlikely ();
3347 if (gimple_in_ssa_p (cfun))
3349 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3350 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3351 !gsi_end_p (gpi); gsi_next (&gpi))
3353 gphi *phi = gpi.phi ();
3354 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3355 ep, UNKNOWN_LOCATION);
3358 gsi = gsi_last_bb (entry_bb);
3361 switch (gimple_omp_for_kind (fd->for_stmt))
3363 case GF_OMP_FOR_KIND_FOR:
3364 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3365 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3366 break;
3367 case GF_OMP_FOR_KIND_DISTRIBUTE:
3368 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3369 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3370 break;
3371 default:
3372 gcc_unreachable ();
3374 nthreads = build_call_expr (nthreads, 0);
3375 nthreads = fold_convert (itype, nthreads);
3376 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3377 true, GSI_SAME_STMT);
3378 threadid = build_call_expr (threadid, 0);
3379 threadid = fold_convert (itype, threadid);
3380 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3381 true, GSI_SAME_STMT);
3383 n1 = fd->loop.n1;
3384 n2 = fd->loop.n2;
3385 step = fd->loop.step;
3386 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3388 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3389 OMP_CLAUSE__LOOPTEMP_);
3390 gcc_assert (innerc);
3391 n1 = OMP_CLAUSE_DECL (innerc);
3392 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3393 OMP_CLAUSE__LOOPTEMP_);
3394 gcc_assert (innerc);
3395 n2 = OMP_CLAUSE_DECL (innerc);
3397 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3398 true, NULL_TREE, true, GSI_SAME_STMT);
3399 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3400 true, NULL_TREE, true, GSI_SAME_STMT);
3401 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3402 true, NULL_TREE, true, GSI_SAME_STMT);
3404 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3405 t = fold_build2 (PLUS_EXPR, itype, step, t);
3406 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3407 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3408 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3409 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3410 fold_build1 (NEGATE_EXPR, itype, t),
3411 fold_build1 (NEGATE_EXPR, itype, step));
3412 else
3413 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3414 t = fold_convert (itype, t);
3415 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3417 q = create_tmp_reg (itype, "q");
3418 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3419 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3420 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3422 tt = create_tmp_reg (itype, "tt");
3423 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3424 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3425 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3427 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3428 gcond *cond_stmt = gimple_build_cond_empty (t);
3429 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3431 second_bb = split_block (entry_bb, cond_stmt)->dest;
3432 gsi = gsi_last_nondebug_bb (second_bb);
3433 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3435 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3436 GSI_SAME_STMT);
3437 gassign *assign_stmt
3438 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3439 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3441 third_bb = split_block (second_bb, assign_stmt)->dest;
3442 gsi = gsi_last_nondebug_bb (third_bb);
3443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3445 t = build2 (MULT_EXPR, itype, q, threadid);
3446 t = build2 (PLUS_EXPR, itype, t, tt);
3447 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3449 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3450 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3452 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3453 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3455 /* Remove the GIMPLE_OMP_FOR statement. */
3456 gsi_remove (&gsi, true);
3458 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3459 gsi = gsi_start_bb (seq_start_bb);
3461 tree startvar = fd->loop.v;
3462 tree endvar = NULL_TREE;
3464 if (gimple_omp_for_combined_p (fd->for_stmt))
3466 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3467 ? gimple_omp_parallel_clauses (inner_stmt)
3468 : gimple_omp_for_clauses (inner_stmt);
3469 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3470 gcc_assert (innerc);
3471 startvar = OMP_CLAUSE_DECL (innerc);
3472 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3473 OMP_CLAUSE__LOOPTEMP_);
3474 gcc_assert (innerc);
3475 endvar = OMP_CLAUSE_DECL (innerc);
3476 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3477 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3479 int i;
3480 for (i = 1; i < fd->collapse; i++)
3482 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3483 OMP_CLAUSE__LOOPTEMP_);
3484 gcc_assert (innerc);
3486 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3487 OMP_CLAUSE__LOOPTEMP_);
3488 if (innerc)
3490 /* If needed (distribute parallel for with lastprivate),
3491 propagate down the total number of iterations. */
3492 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3493 fd->loop.n2);
3494 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3495 GSI_CONTINUE_LINKING);
3496 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3497 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3501 t = fold_convert (itype, s0);
3502 t = fold_build2 (MULT_EXPR, itype, t, step);
3503 if (POINTER_TYPE_P (type))
3504 t = fold_build_pointer_plus (n1, t);
3505 else
3506 t = fold_build2 (PLUS_EXPR, type, t, n1);
3507 t = fold_convert (TREE_TYPE (startvar), t);
3508 t = force_gimple_operand_gsi (&gsi, t,
3509 DECL_P (startvar)
3510 && TREE_ADDRESSABLE (startvar),
3511 NULL_TREE, false, GSI_CONTINUE_LINKING);
3512 assign_stmt = gimple_build_assign (startvar, t);
3513 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3515 t = fold_convert (itype, e0);
3516 t = fold_build2 (MULT_EXPR, itype, t, step);
3517 if (POINTER_TYPE_P (type))
3518 t = fold_build_pointer_plus (n1, t);
3519 else
3520 t = fold_build2 (PLUS_EXPR, type, t, n1);
3521 t = fold_convert (TREE_TYPE (startvar), t);
3522 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3523 false, GSI_CONTINUE_LINKING);
3524 if (endvar)
3526 assign_stmt = gimple_build_assign (endvar, e);
3527 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3528 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3529 assign_stmt = gimple_build_assign (fd->loop.v, e);
3530 else
3531 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3532 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3534 /* Handle linear clause adjustments. */
3535 tree itercnt = NULL_TREE;
3536 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3537 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3538 c; c = OMP_CLAUSE_CHAIN (c))
3539 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3540 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3542 tree d = OMP_CLAUSE_DECL (c);
3543 bool is_ref = omp_is_reference (d);
3544 tree t = d, a, dest;
3545 if (is_ref)
3546 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3547 if (itercnt == NULL_TREE)
3549 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3551 itercnt = fold_build2 (MINUS_EXPR, itype,
3552 fold_convert (itype, n1),
3553 fold_convert (itype, fd->loop.n1));
3554 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3555 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3556 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3557 NULL_TREE, false,
3558 GSI_CONTINUE_LINKING);
3560 else
3561 itercnt = s0;
3563 tree type = TREE_TYPE (t);
3564 if (POINTER_TYPE_P (type))
3565 type = sizetype;
3566 a = fold_build2 (MULT_EXPR, type,
3567 fold_convert (type, itercnt),
3568 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3569 dest = unshare_expr (t);
3570 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3571 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3572 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3573 false, GSI_CONTINUE_LINKING);
3574 assign_stmt = gimple_build_assign (dest, t);
3575 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3577 if (fd->collapse > 1)
3578 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3580 if (!broken_loop)
3582 /* The code controlling the sequential loop replaces the
3583 GIMPLE_OMP_CONTINUE. */
3584 gsi = gsi_last_nondebug_bb (cont_bb);
3585 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3586 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3587 vmain = gimple_omp_continue_control_use (cont_stmt);
3588 vback = gimple_omp_continue_control_def (cont_stmt);
3590 if (!gimple_omp_for_combined_p (fd->for_stmt))
3592 if (POINTER_TYPE_P (type))
3593 t = fold_build_pointer_plus (vmain, step);
3594 else
3595 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3596 t = force_gimple_operand_gsi (&gsi, t,
3597 DECL_P (vback)
3598 && TREE_ADDRESSABLE (vback),
3599 NULL_TREE, true, GSI_SAME_STMT);
3600 assign_stmt = gimple_build_assign (vback, t);
3601 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3603 t = build2 (fd->loop.cond_code, boolean_type_node,
3604 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3605 ? t : vback, e);
3606 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3609 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3610 gsi_remove (&gsi, true);
3612 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3613 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3616 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3617 gsi = gsi_last_nondebug_bb (exit_bb);
3618 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3620 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3621 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3623 gsi_remove (&gsi, true);
3625 /* Connect all the blocks. */
3626 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3627 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3628 ep = find_edge (entry_bb, second_bb);
3629 ep->flags = EDGE_TRUE_VALUE;
3630 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3631 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3632 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3634 if (!broken_loop)
3636 ep = find_edge (cont_bb, body_bb);
3637 if (ep == NULL)
3639 ep = BRANCH_EDGE (cont_bb);
3640 gcc_assert (single_succ (ep->dest) == body_bb);
3642 if (gimple_omp_for_combined_p (fd->for_stmt))
3644 remove_edge (ep);
3645 ep = NULL;
3647 else if (fd->collapse > 1)
3649 remove_edge (ep);
3650 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3652 else
3653 ep->flags = EDGE_TRUE_VALUE;
3654 find_edge (cont_bb, fin_bb)->flags
3655 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3658 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3659 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3660 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3662 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3663 recompute_dominator (CDI_DOMINATORS, body_bb));
3664 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3665 recompute_dominator (CDI_DOMINATORS, fin_bb));
3667 struct loop *loop = body_bb->loop_father;
3668 if (loop != entry_bb->loop_father)
3670 gcc_assert (broken_loop || loop->header == body_bb);
3671 gcc_assert (broken_loop
3672 || loop->latch == region->cont
3673 || single_pred (loop->latch) == region->cont);
3674 return;
3677 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3679 loop = alloc_loop ();
3680 loop->header = body_bb;
3681 if (collapse_bb == NULL)
3682 loop->latch = cont_bb;
3683 add_loop (loop, body_bb->loop_father);
3687 /* Return phi in E->DEST with ARG on edge E. */
3689 static gphi *
3690 find_phi_with_arg_on_edge (tree arg, edge e)
3692 basic_block bb = e->dest;
3694 for (gphi_iterator gpi = gsi_start_phis (bb);
3695 !gsi_end_p (gpi);
3696 gsi_next (&gpi))
3698 gphi *phi = gpi.phi ();
3699 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3700 return phi;
3703 return NULL;
3706 /* A subroutine of expand_omp_for. Generate code for a parallel
3707 loop with static schedule and a specified chunk size. Given
3708 parameters:
3710 for (V = N1; V cond N2; V += STEP) BODY;
3712 where COND is "<" or ">", we generate pseudocode
3714 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3715 if (cond is <)
3716 adj = STEP - 1;
3717 else
3718 adj = STEP + 1;
3719 if ((__typeof (V)) -1 > 0 && cond is >)
3720 n = -(adj + N2 - N1) / -STEP;
3721 else
3722 n = (adj + N2 - N1) / STEP;
3723 trip = 0;
3724 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3725 here so that V is defined
3726 if the loop is not entered
3728 s0 = (trip * nthreads + threadid) * CHUNK;
3729 e0 = min (s0 + CHUNK, n);
3730 if (s0 < n) goto L1; else goto L4;
3732 V = s0 * STEP + N1;
3733 e = e0 * STEP + N1;
3735 BODY;
3736 V += STEP;
3737 if (V cond e) goto L2; else goto L3;
3739 trip += 1;
3740 goto L0;
3744 static void
3745 expand_omp_for_static_chunk (struct omp_region *region,
3746 struct omp_for_data *fd, gimple *inner_stmt)
3748 tree n, s0, e0, e, t;
3749 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3750 tree type, itype, vmain, vback, vextra;
3751 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3752 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3753 gimple_stmt_iterator gsi;
3754 edge se;
3755 bool broken_loop = region->cont == NULL;
3756 tree *counts = NULL;
3757 tree n1, n2, step;
3759 itype = type = TREE_TYPE (fd->loop.v);
3760 if (POINTER_TYPE_P (type))
3761 itype = signed_type_for (type);
3763 entry_bb = region->entry;
3764 se = split_block (entry_bb, last_stmt (entry_bb));
3765 entry_bb = se->src;
3766 iter_part_bb = se->dest;
3767 cont_bb = region->cont;
3768 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3769 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3770 gcc_assert (broken_loop
3771 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3772 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3773 body_bb = single_succ (seq_start_bb);
3774 if (!broken_loop)
3776 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3777 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3778 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3779 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3781 exit_bb = region->exit;
3783 /* Trip and adjustment setup goes in ENTRY_BB. */
3784 gsi = gsi_last_nondebug_bb (entry_bb);
3785 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3787 if (fd->collapse > 1)
3789 int first_zero_iter = -1, dummy = -1;
3790 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3792 counts = XALLOCAVEC (tree, fd->collapse);
3793 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3794 fin_bb, first_zero_iter,
3795 dummy_bb, dummy, l2_dom_bb);
3796 t = NULL_TREE;
3798 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3799 t = integer_one_node;
3800 else
3801 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3802 fold_convert (type, fd->loop.n1),
3803 fold_convert (type, fd->loop.n2));
3804 if (fd->collapse == 1
3805 && TYPE_UNSIGNED (type)
3806 && (t == NULL_TREE || !integer_onep (t)))
3808 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3809 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3810 true, GSI_SAME_STMT);
3811 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3812 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3813 true, GSI_SAME_STMT);
3814 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3815 NULL_TREE, NULL_TREE);
3816 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3817 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3818 expand_omp_regimplify_p, NULL, NULL)
3819 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3820 expand_omp_regimplify_p, NULL, NULL))
3822 gsi = gsi_for_stmt (cond_stmt);
3823 gimple_regimplify_operands (cond_stmt, &gsi);
3825 se = split_block (entry_bb, cond_stmt);
3826 se->flags = EDGE_TRUE_VALUE;
3827 entry_bb = se->dest;
3828 se->probability = profile_probability::very_likely ();
3829 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3830 se->probability = profile_probability::very_unlikely ();
3831 if (gimple_in_ssa_p (cfun))
3833 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3834 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3835 !gsi_end_p (gpi); gsi_next (&gpi))
3837 gphi *phi = gpi.phi ();
3838 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3839 se, UNKNOWN_LOCATION);
3842 gsi = gsi_last_bb (entry_bb);
3845 switch (gimple_omp_for_kind (fd->for_stmt))
3847 case GF_OMP_FOR_KIND_FOR:
3848 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3849 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3850 break;
3851 case GF_OMP_FOR_KIND_DISTRIBUTE:
3852 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3853 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3854 break;
3855 default:
3856 gcc_unreachable ();
3858 nthreads = build_call_expr (nthreads, 0);
3859 nthreads = fold_convert (itype, nthreads);
3860 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3861 true, GSI_SAME_STMT);
3862 threadid = build_call_expr (threadid, 0);
3863 threadid = fold_convert (itype, threadid);
3864 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3865 true, GSI_SAME_STMT);
3867 n1 = fd->loop.n1;
3868 n2 = fd->loop.n2;
3869 step = fd->loop.step;
3870 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3872 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3873 OMP_CLAUSE__LOOPTEMP_);
3874 gcc_assert (innerc);
3875 n1 = OMP_CLAUSE_DECL (innerc);
3876 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3877 OMP_CLAUSE__LOOPTEMP_);
3878 gcc_assert (innerc);
3879 n2 = OMP_CLAUSE_DECL (innerc);
3881 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3882 true, NULL_TREE, true, GSI_SAME_STMT);
3883 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3884 true, NULL_TREE, true, GSI_SAME_STMT);
3885 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3886 true, NULL_TREE, true, GSI_SAME_STMT);
3887 tree chunk_size = fold_convert (itype, fd->chunk_size);
3888 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3889 chunk_size
3890 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3891 GSI_SAME_STMT);
3893 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3894 t = fold_build2 (PLUS_EXPR, itype, step, t);
3895 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3896 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3897 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3898 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3899 fold_build1 (NEGATE_EXPR, itype, t),
3900 fold_build1 (NEGATE_EXPR, itype, step));
3901 else
3902 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3903 t = fold_convert (itype, t);
3904 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3905 true, GSI_SAME_STMT);
3907 trip_var = create_tmp_reg (itype, ".trip");
3908 if (gimple_in_ssa_p (cfun))
3910 trip_init = make_ssa_name (trip_var);
3911 trip_main = make_ssa_name (trip_var);
3912 trip_back = make_ssa_name (trip_var);
3914 else
3916 trip_init = trip_var;
3917 trip_main = trip_var;
3918 trip_back = trip_var;
3921 gassign *assign_stmt
3922 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3923 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3925 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3926 t = fold_build2 (MULT_EXPR, itype, t, step);
3927 if (POINTER_TYPE_P (type))
3928 t = fold_build_pointer_plus (n1, t);
3929 else
3930 t = fold_build2 (PLUS_EXPR, type, t, n1);
3931 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3932 true, GSI_SAME_STMT);
3934 /* Remove the GIMPLE_OMP_FOR. */
3935 gsi_remove (&gsi, true);
3937 gimple_stmt_iterator gsif = gsi;
3939 /* Iteration space partitioning goes in ITER_PART_BB. */
3940 gsi = gsi_last_bb (iter_part_bb);
3942 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3943 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3944 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3945 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3946 false, GSI_CONTINUE_LINKING);
3948 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3949 t = fold_build2 (MIN_EXPR, itype, t, n);
3950 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3951 false, GSI_CONTINUE_LINKING);
3953 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3954 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3956 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3957 gsi = gsi_start_bb (seq_start_bb);
3959 tree startvar = fd->loop.v;
3960 tree endvar = NULL_TREE;
3962 if (gimple_omp_for_combined_p (fd->for_stmt))
3964 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3965 ? gimple_omp_parallel_clauses (inner_stmt)
3966 : gimple_omp_for_clauses (inner_stmt);
3967 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3968 gcc_assert (innerc);
3969 startvar = OMP_CLAUSE_DECL (innerc);
3970 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3971 OMP_CLAUSE__LOOPTEMP_);
3972 gcc_assert (innerc);
3973 endvar = OMP_CLAUSE_DECL (innerc);
3974 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3975 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3977 int i;
3978 for (i = 1; i < fd->collapse; i++)
3980 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3981 OMP_CLAUSE__LOOPTEMP_);
3982 gcc_assert (innerc);
3984 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3985 OMP_CLAUSE__LOOPTEMP_);
3986 if (innerc)
3988 /* If needed (distribute parallel for with lastprivate),
3989 propagate down the total number of iterations. */
3990 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3991 fd->loop.n2);
3992 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3993 GSI_CONTINUE_LINKING);
3994 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3995 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4000 t = fold_convert (itype, s0);
4001 t = fold_build2 (MULT_EXPR, itype, t, step);
4002 if (POINTER_TYPE_P (type))
4003 t = fold_build_pointer_plus (n1, t);
4004 else
4005 t = fold_build2 (PLUS_EXPR, type, t, n1);
4006 t = fold_convert (TREE_TYPE (startvar), t);
4007 t = force_gimple_operand_gsi (&gsi, t,
4008 DECL_P (startvar)
4009 && TREE_ADDRESSABLE (startvar),
4010 NULL_TREE, false, GSI_CONTINUE_LINKING);
4011 assign_stmt = gimple_build_assign (startvar, t);
4012 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4014 t = fold_convert (itype, e0);
4015 t = fold_build2 (MULT_EXPR, itype, t, step);
4016 if (POINTER_TYPE_P (type))
4017 t = fold_build_pointer_plus (n1, t);
4018 else
4019 t = fold_build2 (PLUS_EXPR, type, t, n1);
4020 t = fold_convert (TREE_TYPE (startvar), t);
4021 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4022 false, GSI_CONTINUE_LINKING);
4023 if (endvar)
4025 assign_stmt = gimple_build_assign (endvar, e);
4026 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4027 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4028 assign_stmt = gimple_build_assign (fd->loop.v, e);
4029 else
4030 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4031 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4033 /* Handle linear clause adjustments. */
4034 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4035 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4036 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4037 c; c = OMP_CLAUSE_CHAIN (c))
4038 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4039 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4041 tree d = OMP_CLAUSE_DECL (c);
4042 bool is_ref = omp_is_reference (d);
4043 tree t = d, a, dest;
4044 if (is_ref)
4045 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4046 tree type = TREE_TYPE (t);
4047 if (POINTER_TYPE_P (type))
4048 type = sizetype;
4049 dest = unshare_expr (t);
4050 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4051 expand_omp_build_assign (&gsif, v, t);
4052 if (itercnt == NULL_TREE)
4054 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4056 itercntbias
4057 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4058 fold_convert (itype, fd->loop.n1));
4059 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4060 itercntbias, step);
4061 itercntbias
4062 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4063 NULL_TREE, true,
4064 GSI_SAME_STMT);
4065 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4066 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4067 NULL_TREE, false,
4068 GSI_CONTINUE_LINKING);
4070 else
4071 itercnt = s0;
4073 a = fold_build2 (MULT_EXPR, type,
4074 fold_convert (type, itercnt),
4075 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4076 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4077 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4078 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4079 false, GSI_CONTINUE_LINKING);
4080 assign_stmt = gimple_build_assign (dest, t);
4081 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4083 if (fd->collapse > 1)
4084 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4086 if (!broken_loop)
4088 /* The code controlling the sequential loop goes in CONT_BB,
4089 replacing the GIMPLE_OMP_CONTINUE. */
4090 gsi = gsi_last_nondebug_bb (cont_bb);
4091 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4092 vmain = gimple_omp_continue_control_use (cont_stmt);
4093 vback = gimple_omp_continue_control_def (cont_stmt);
4095 if (!gimple_omp_for_combined_p (fd->for_stmt))
4097 if (POINTER_TYPE_P (type))
4098 t = fold_build_pointer_plus (vmain, step);
4099 else
4100 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4101 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4102 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4103 true, GSI_SAME_STMT);
4104 assign_stmt = gimple_build_assign (vback, t);
4105 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4107 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4108 t = build2 (EQ_EXPR, boolean_type_node,
4109 build_int_cst (itype, 0),
4110 build_int_cst (itype, 1));
4111 else
4112 t = build2 (fd->loop.cond_code, boolean_type_node,
4113 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4114 ? t : vback, e);
4115 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4118 /* Remove GIMPLE_OMP_CONTINUE. */
4119 gsi_remove (&gsi, true);
4121 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4122 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4124 /* Trip update code goes into TRIP_UPDATE_BB. */
4125 gsi = gsi_start_bb (trip_update_bb);
4127 t = build_int_cst (itype, 1);
4128 t = build2 (PLUS_EXPR, itype, trip_main, t);
4129 assign_stmt = gimple_build_assign (trip_back, t);
4130 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4133 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4134 gsi = gsi_last_nondebug_bb (exit_bb);
4135 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4137 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4138 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4140 gsi_remove (&gsi, true);
4142 /* Connect the new blocks. */
4143 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4144 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4146 if (!broken_loop)
4148 se = find_edge (cont_bb, body_bb);
4149 if (se == NULL)
4151 se = BRANCH_EDGE (cont_bb);
4152 gcc_assert (single_succ (se->dest) == body_bb);
4154 if (gimple_omp_for_combined_p (fd->for_stmt))
4156 remove_edge (se);
4157 se = NULL;
4159 else if (fd->collapse > 1)
4161 remove_edge (se);
4162 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4164 else
4165 se->flags = EDGE_TRUE_VALUE;
4166 find_edge (cont_bb, trip_update_bb)->flags
4167 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4169 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4170 iter_part_bb);
4173 if (gimple_in_ssa_p (cfun))
4175 gphi_iterator psi;
4176 gphi *phi;
4177 edge re, ene;
4178 edge_var_map *vm;
4179 size_t i;
4181 gcc_assert (fd->collapse == 1 && !broken_loop);
4183 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4184 remove arguments of the phi nodes in fin_bb. We need to create
4185 appropriate phi nodes in iter_part_bb instead. */
4186 se = find_edge (iter_part_bb, fin_bb);
4187 re = single_succ_edge (trip_update_bb);
4188 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4189 ene = single_succ_edge (entry_bb);
4191 psi = gsi_start_phis (fin_bb);
4192 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4193 gsi_next (&psi), ++i)
4195 gphi *nphi;
4196 source_location locus;
4198 phi = psi.phi ();
4199 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4200 redirect_edge_var_map_def (vm), 0))
4201 continue;
4203 t = gimple_phi_result (phi);
4204 gcc_assert (t == redirect_edge_var_map_result (vm));
4206 if (!single_pred_p (fin_bb))
4207 t = copy_ssa_name (t, phi);
4209 nphi = create_phi_node (t, iter_part_bb);
4211 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4212 locus = gimple_phi_arg_location_from_edge (phi, se);
4214 /* A special case -- fd->loop.v is not yet computed in
4215 iter_part_bb, we need to use vextra instead. */
4216 if (t == fd->loop.v)
4217 t = vextra;
4218 add_phi_arg (nphi, t, ene, locus);
4219 locus = redirect_edge_var_map_location (vm);
4220 tree back_arg = redirect_edge_var_map_def (vm);
4221 add_phi_arg (nphi, back_arg, re, locus);
4222 edge ce = find_edge (cont_bb, body_bb);
4223 if (ce == NULL)
4225 ce = BRANCH_EDGE (cont_bb);
4226 gcc_assert (single_succ (ce->dest) == body_bb);
4227 ce = single_succ_edge (ce->dest);
4229 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4230 gcc_assert (inner_loop_phi != NULL);
4231 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4232 find_edge (seq_start_bb, body_bb), locus);
4234 if (!single_pred_p (fin_bb))
4235 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4237 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4238 redirect_edge_var_map_clear (re);
4239 if (single_pred_p (fin_bb))
4240 while (1)
4242 psi = gsi_start_phis (fin_bb);
4243 if (gsi_end_p (psi))
4244 break;
4245 remove_phi_node (&psi, false);
4248 /* Make phi node for trip. */
4249 phi = create_phi_node (trip_main, iter_part_bb);
4250 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4251 UNKNOWN_LOCATION);
4252 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4253 UNKNOWN_LOCATION);
4256 if (!broken_loop)
4257 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4258 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4259 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4260 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4261 recompute_dominator (CDI_DOMINATORS, fin_bb));
4262 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4263 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4264 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4265 recompute_dominator (CDI_DOMINATORS, body_bb));
4267 if (!broken_loop)
4269 struct loop *loop = body_bb->loop_father;
4270 struct loop *trip_loop = alloc_loop ();
4271 trip_loop->header = iter_part_bb;
4272 trip_loop->latch = trip_update_bb;
4273 add_loop (trip_loop, iter_part_bb->loop_father);
4275 if (loop != entry_bb->loop_father)
4277 gcc_assert (loop->header == body_bb);
4278 gcc_assert (loop->latch == region->cont
4279 || single_pred (loop->latch) == region->cont);
4280 trip_loop->inner = loop;
4281 return;
4284 if (!gimple_omp_for_combined_p (fd->for_stmt))
4286 loop = alloc_loop ();
4287 loop->header = body_bb;
4288 if (collapse_bb == NULL)
4289 loop->latch = cont_bb;
4290 add_loop (loop, trip_loop);
4295 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4296 loop. Given parameters:
4298 for (V = N1; V cond N2; V += STEP) BODY;
4300 where COND is "<" or ">", we generate pseudocode
4302 V = N1;
4303 goto L1;
4305 BODY;
4306 V += STEP;
4308 if (V cond N2) goto L0; else goto L2;
4311 For collapsed loops, given parameters:
4312 collapse(3)
4313 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4314 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4315 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4316 BODY;
4318 we generate pseudocode
4320 if (cond3 is <)
4321 adj = STEP3 - 1;
4322 else
4323 adj = STEP3 + 1;
4324 count3 = (adj + N32 - N31) / STEP3;
4325 if (cond2 is <)
4326 adj = STEP2 - 1;
4327 else
4328 adj = STEP2 + 1;
4329 count2 = (adj + N22 - N21) / STEP2;
4330 if (cond1 is <)
4331 adj = STEP1 - 1;
4332 else
4333 adj = STEP1 + 1;
4334 count1 = (adj + N12 - N11) / STEP1;
4335 count = count1 * count2 * count3;
4336 V = 0;
4337 V1 = N11;
4338 V2 = N21;
4339 V3 = N31;
4340 goto L1;
4342 BODY;
4343 V += 1;
4344 V3 += STEP3;
4345 V2 += (V3 cond3 N32) ? 0 : STEP2;
4346 V3 = (V3 cond3 N32) ? V3 : N31;
4347 V1 += (V2 cond2 N22) ? 0 : STEP1;
4348 V2 = (V2 cond2 N22) ? V2 : N21;
4350 if (V < count) goto L0; else goto L2;
4355 static void
4356 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4358 tree type, t;
4359 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4360 gimple_stmt_iterator gsi;
4361 gimple *stmt;
4362 gcond *cond_stmt;
4363 bool broken_loop = region->cont == NULL;
4364 edge e, ne;
4365 tree *counts = NULL;
4366 int i;
4367 int safelen_int = INT_MAX;
4368 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4369 OMP_CLAUSE_SAFELEN);
4370 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4371 OMP_CLAUSE__SIMDUID_);
4372 tree n1, n2;
4374 if (safelen)
4376 poly_uint64 val;
4377 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4378 if (!poly_int_tree_p (safelen, &val))
4379 safelen_int = 0;
4380 else
4381 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4382 if (safelen_int == 1)
4383 safelen_int = 0;
4385 type = TREE_TYPE (fd->loop.v);
4386 entry_bb = region->entry;
4387 cont_bb = region->cont;
4388 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4389 gcc_assert (broken_loop
4390 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4391 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4392 if (!broken_loop)
4394 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4395 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4396 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4397 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4399 else
4401 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4402 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4403 l2_bb = single_succ (l1_bb);
4405 exit_bb = region->exit;
4406 l2_dom_bb = NULL;
4408 gsi = gsi_last_nondebug_bb (entry_bb);
4410 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4411 /* Not needed in SSA form right now. */
4412 gcc_assert (!gimple_in_ssa_p (cfun));
4413 if (fd->collapse > 1)
4415 int first_zero_iter = -1, dummy = -1;
4416 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4418 counts = XALLOCAVEC (tree, fd->collapse);
4419 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4420 zero_iter_bb, first_zero_iter,
4421 dummy_bb, dummy, l2_dom_bb);
4423 if (l2_dom_bb == NULL)
4424 l2_dom_bb = l1_bb;
4426 n1 = fd->loop.n1;
4427 n2 = fd->loop.n2;
4428 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4430 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4431 OMP_CLAUSE__LOOPTEMP_);
4432 gcc_assert (innerc);
4433 n1 = OMP_CLAUSE_DECL (innerc);
4434 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4435 OMP_CLAUSE__LOOPTEMP_);
4436 gcc_assert (innerc);
4437 n2 = OMP_CLAUSE_DECL (innerc);
4439 tree step = fd->loop.step;
4441 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4442 OMP_CLAUSE__SIMT_);
4443 if (is_simt)
4445 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4446 is_simt = safelen_int > 1;
4448 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4449 if (is_simt)
4451 simt_lane = create_tmp_var (unsigned_type_node);
4452 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4453 gimple_call_set_lhs (g, simt_lane);
4454 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4455 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4456 fold_convert (TREE_TYPE (step), simt_lane));
4457 n1 = fold_convert (type, n1);
4458 if (POINTER_TYPE_P (type))
4459 n1 = fold_build_pointer_plus (n1, offset);
4460 else
4461 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4463 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4464 if (fd->collapse > 1)
4465 simt_maxlane = build_one_cst (unsigned_type_node);
4466 else if (safelen_int < omp_max_simt_vf ())
4467 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4468 tree vf
4469 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4470 unsigned_type_node, 0);
4471 if (simt_maxlane)
4472 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4473 vf = fold_convert (TREE_TYPE (step), vf);
4474 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4477 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4478 if (fd->collapse > 1)
4480 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4482 gsi_prev (&gsi);
4483 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4484 gsi_next (&gsi);
4486 else
4487 for (i = 0; i < fd->collapse; i++)
4489 tree itype = TREE_TYPE (fd->loops[i].v);
4490 if (POINTER_TYPE_P (itype))
4491 itype = signed_type_for (itype);
4492 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4493 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4497 /* Remove the GIMPLE_OMP_FOR statement. */
4498 gsi_remove (&gsi, true);
4500 if (!broken_loop)
4502 /* Code to control the increment goes in the CONT_BB. */
4503 gsi = gsi_last_nondebug_bb (cont_bb);
4504 stmt = gsi_stmt (gsi);
4505 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4507 if (POINTER_TYPE_P (type))
4508 t = fold_build_pointer_plus (fd->loop.v, step);
4509 else
4510 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4511 expand_omp_build_assign (&gsi, fd->loop.v, t);
4513 if (fd->collapse > 1)
4515 i = fd->collapse - 1;
4516 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4518 t = fold_convert (sizetype, fd->loops[i].step);
4519 t = fold_build_pointer_plus (fd->loops[i].v, t);
4521 else
4523 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4524 fd->loops[i].step);
4525 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4526 fd->loops[i].v, t);
4528 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4530 for (i = fd->collapse - 1; i > 0; i--)
4532 tree itype = TREE_TYPE (fd->loops[i].v);
4533 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4534 if (POINTER_TYPE_P (itype2))
4535 itype2 = signed_type_for (itype2);
4536 t = fold_convert (itype2, fd->loops[i - 1].step);
4537 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4538 GSI_SAME_STMT);
4539 t = build3 (COND_EXPR, itype2,
4540 build2 (fd->loops[i].cond_code, boolean_type_node,
4541 fd->loops[i].v,
4542 fold_convert (itype, fd->loops[i].n2)),
4543 build_int_cst (itype2, 0), t);
4544 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4545 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4546 else
4547 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4548 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4550 t = fold_convert (itype, fd->loops[i].n1);
4551 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4552 GSI_SAME_STMT);
4553 t = build3 (COND_EXPR, itype,
4554 build2 (fd->loops[i].cond_code, boolean_type_node,
4555 fd->loops[i].v,
4556 fold_convert (itype, fd->loops[i].n2)),
4557 fd->loops[i].v, t);
4558 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4562 /* Remove GIMPLE_OMP_CONTINUE. */
4563 gsi_remove (&gsi, true);
4566 /* Emit the condition in L1_BB. */
4567 gsi = gsi_start_bb (l1_bb);
4569 t = fold_convert (type, n2);
4570 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4571 false, GSI_CONTINUE_LINKING);
4572 tree v = fd->loop.v;
4573 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4574 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4575 false, GSI_CONTINUE_LINKING);
4576 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4577 cond_stmt = gimple_build_cond_empty (t);
4578 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4579 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4580 NULL, NULL)
4581 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4582 NULL, NULL))
4584 gsi = gsi_for_stmt (cond_stmt);
4585 gimple_regimplify_operands (cond_stmt, &gsi);
4588 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4589 if (is_simt)
4591 gsi = gsi_start_bb (l2_bb);
4592 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4593 if (POINTER_TYPE_P (type))
4594 t = fold_build_pointer_plus (fd->loop.v, step);
4595 else
4596 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4597 expand_omp_build_assign (&gsi, fd->loop.v, t);
4600 /* Remove GIMPLE_OMP_RETURN. */
4601 gsi = gsi_last_nondebug_bb (exit_bb);
4602 gsi_remove (&gsi, true);
4604 /* Connect the new blocks. */
4605 remove_edge (FALLTHRU_EDGE (entry_bb));
4607 if (!broken_loop)
4609 remove_edge (BRANCH_EDGE (entry_bb));
4610 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4612 e = BRANCH_EDGE (l1_bb);
4613 ne = FALLTHRU_EDGE (l1_bb);
4614 e->flags = EDGE_TRUE_VALUE;
4616 else
4618 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4620 ne = single_succ_edge (l1_bb);
4621 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4624 ne->flags = EDGE_FALSE_VALUE;
4625 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4626 ne->probability = e->probability.invert ();
4628 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4629 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4631 if (simt_maxlane)
4633 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4634 NULL_TREE, NULL_TREE);
4635 gsi = gsi_last_bb (entry_bb);
4636 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4637 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4638 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4639 FALLTHRU_EDGE (entry_bb)->probability
4640 = profile_probability::guessed_always ().apply_scale (7, 8);
4641 BRANCH_EDGE (entry_bb)->probability
4642 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4643 l2_dom_bb = entry_bb;
4645 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4647 if (!broken_loop)
4649 struct loop *loop = alloc_loop ();
4650 loop->header = l1_bb;
4651 loop->latch = cont_bb;
4652 add_loop (loop, l1_bb->loop_father);
4653 loop->safelen = safelen_int;
4654 if (simduid)
4656 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4657 cfun->has_simduid_loops = true;
4659 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4660 the loop. */
4661 if ((flag_tree_loop_vectorize
4662 || !global_options_set.x_flag_tree_loop_vectorize)
4663 && flag_tree_loop_optimize
4664 && loop->safelen > 1)
4666 loop->force_vectorize = true;
4667 cfun->has_force_vectorize_loops = true;
4670 else if (simduid)
4671 cfun->has_simduid_loops = true;
4674 /* Taskloop construct is represented after gimplification with
4675 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4676 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4677 which should just compute all the needed loop temporaries
4678 for GIMPLE_OMP_TASK. */
4680 static void
4681 expand_omp_taskloop_for_outer (struct omp_region *region,
4682 struct omp_for_data *fd,
4683 gimple *inner_stmt)
4685 tree type, bias = NULL_TREE;
4686 basic_block entry_bb, cont_bb, exit_bb;
4687 gimple_stmt_iterator gsi;
4688 gassign *assign_stmt;
4689 tree *counts = NULL;
4690 int i;
4692 gcc_assert (inner_stmt);
4693 gcc_assert (region->cont);
4694 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4695 && gimple_omp_task_taskloop_p (inner_stmt));
4696 type = TREE_TYPE (fd->loop.v);
4698 /* See if we need to bias by LLONG_MIN. */
4699 if (fd->iter_type == long_long_unsigned_type_node
4700 && TREE_CODE (type) == INTEGER_TYPE
4701 && !TYPE_UNSIGNED (type))
4703 tree n1, n2;
4705 if (fd->loop.cond_code == LT_EXPR)
4707 n1 = fd->loop.n1;
4708 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4710 else
4712 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4713 n2 = fd->loop.n1;
4715 if (TREE_CODE (n1) != INTEGER_CST
4716 || TREE_CODE (n2) != INTEGER_CST
4717 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4718 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4721 entry_bb = region->entry;
4722 cont_bb = region->cont;
4723 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4724 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4725 exit_bb = region->exit;
4727 gsi = gsi_last_nondebug_bb (entry_bb);
4728 gimple *for_stmt = gsi_stmt (gsi);
4729 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4730 if (fd->collapse > 1)
4732 int first_zero_iter = -1, dummy = -1;
4733 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4735 counts = XALLOCAVEC (tree, fd->collapse);
4736 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4737 zero_iter_bb, first_zero_iter,
4738 dummy_bb, dummy, l2_dom_bb);
4740 if (zero_iter_bb)
4742 /* Some counts[i] vars might be uninitialized if
4743 some loop has zero iterations. But the body shouldn't
4744 be executed in that case, so just avoid uninit warnings. */
4745 for (i = first_zero_iter; i < fd->collapse; i++)
4746 if (SSA_VAR_P (counts[i]))
4747 TREE_NO_WARNING (counts[i]) = 1;
4748 gsi_prev (&gsi);
4749 edge e = split_block (entry_bb, gsi_stmt (gsi));
4750 entry_bb = e->dest;
4751 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4752 gsi = gsi_last_bb (entry_bb);
4753 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4754 get_immediate_dominator (CDI_DOMINATORS,
4755 zero_iter_bb));
4759 tree t0, t1;
4760 t1 = fd->loop.n2;
4761 t0 = fd->loop.n1;
4762 if (POINTER_TYPE_P (TREE_TYPE (t0))
4763 && TYPE_PRECISION (TREE_TYPE (t0))
4764 != TYPE_PRECISION (fd->iter_type))
4766 /* Avoid casting pointers to integer of a different size. */
4767 tree itype = signed_type_for (type);
4768 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4769 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4771 else
4773 t1 = fold_convert (fd->iter_type, t1);
4774 t0 = fold_convert (fd->iter_type, t0);
4776 if (bias)
4778 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4779 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4782 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4783 OMP_CLAUSE__LOOPTEMP_);
4784 gcc_assert (innerc);
4785 tree startvar = OMP_CLAUSE_DECL (innerc);
4786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4787 gcc_assert (innerc);
4788 tree endvar = OMP_CLAUSE_DECL (innerc);
4789 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4791 gcc_assert (innerc);
4792 for (i = 1; i < fd->collapse; i++)
4794 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4795 OMP_CLAUSE__LOOPTEMP_);
4796 gcc_assert (innerc);
4798 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4799 OMP_CLAUSE__LOOPTEMP_);
4800 if (innerc)
4802 /* If needed (inner taskloop has lastprivate clause), propagate
4803 down the total number of iterations. */
4804 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4805 NULL_TREE, false,
4806 GSI_CONTINUE_LINKING);
4807 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4808 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4812 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4813 GSI_CONTINUE_LINKING);
4814 assign_stmt = gimple_build_assign (startvar, t0);
4815 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4817 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4818 GSI_CONTINUE_LINKING);
4819 assign_stmt = gimple_build_assign (endvar, t1);
4820 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4821 if (fd->collapse > 1)
4822 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4824 /* Remove the GIMPLE_OMP_FOR statement. */
4825 gsi = gsi_for_stmt (for_stmt);
4826 gsi_remove (&gsi, true);
4828 gsi = gsi_last_nondebug_bb (cont_bb);
4829 gsi_remove (&gsi, true);
4831 gsi = gsi_last_nondebug_bb (exit_bb);
4832 gsi_remove (&gsi, true);
4834 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4835 remove_edge (BRANCH_EDGE (entry_bb));
4836 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4837 remove_edge (BRANCH_EDGE (cont_bb));
4838 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4839 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4840 recompute_dominator (CDI_DOMINATORS, region->entry));
4843 /* Taskloop construct is represented after gimplification with
4844 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4845 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4846 GOMP_taskloop{,_ull} function arranges for each task to be given just
4847 a single range of iterations. */
4849 static void
4850 expand_omp_taskloop_for_inner (struct omp_region *region,
4851 struct omp_for_data *fd,
4852 gimple *inner_stmt)
4854 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4855 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4856 basic_block fin_bb;
4857 gimple_stmt_iterator gsi;
4858 edge ep;
4859 bool broken_loop = region->cont == NULL;
4860 tree *counts = NULL;
4861 tree n1, n2, step;
4863 itype = type = TREE_TYPE (fd->loop.v);
4864 if (POINTER_TYPE_P (type))
4865 itype = signed_type_for (type);
4867 /* See if we need to bias by LLONG_MIN. */
4868 if (fd->iter_type == long_long_unsigned_type_node
4869 && TREE_CODE (type) == INTEGER_TYPE
4870 && !TYPE_UNSIGNED (type))
4872 tree n1, n2;
4874 if (fd->loop.cond_code == LT_EXPR)
4876 n1 = fd->loop.n1;
4877 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4879 else
4881 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4882 n2 = fd->loop.n1;
4884 if (TREE_CODE (n1) != INTEGER_CST
4885 || TREE_CODE (n2) != INTEGER_CST
4886 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4887 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4890 entry_bb = region->entry;
4891 cont_bb = region->cont;
4892 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4893 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4894 gcc_assert (broken_loop
4895 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4896 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4897 if (!broken_loop)
4899 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4900 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4902 exit_bb = region->exit;
4904 /* Iteration space partitioning goes in ENTRY_BB. */
4905 gsi = gsi_last_nondebug_bb (entry_bb);
4906 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4908 if (fd->collapse > 1)
4910 int first_zero_iter = -1, dummy = -1;
4911 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4913 counts = XALLOCAVEC (tree, fd->collapse);
4914 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4915 fin_bb, first_zero_iter,
4916 dummy_bb, dummy, l2_dom_bb);
4917 t = NULL_TREE;
4919 else
4920 t = integer_one_node;
4922 step = fd->loop.step;
4923 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4924 OMP_CLAUSE__LOOPTEMP_);
4925 gcc_assert (innerc);
4926 n1 = OMP_CLAUSE_DECL (innerc);
4927 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4928 gcc_assert (innerc);
4929 n2 = OMP_CLAUSE_DECL (innerc);
4930 if (bias)
4932 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4933 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4935 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4936 true, NULL_TREE, true, GSI_SAME_STMT);
4937 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4938 true, NULL_TREE, true, GSI_SAME_STMT);
4939 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4940 true, NULL_TREE, true, GSI_SAME_STMT);
4942 tree startvar = fd->loop.v;
4943 tree endvar = NULL_TREE;
4945 if (gimple_omp_for_combined_p (fd->for_stmt))
4947 tree clauses = gimple_omp_for_clauses (inner_stmt);
4948 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4949 gcc_assert (innerc);
4950 startvar = OMP_CLAUSE_DECL (innerc);
4951 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4952 OMP_CLAUSE__LOOPTEMP_);
4953 gcc_assert (innerc);
4954 endvar = OMP_CLAUSE_DECL (innerc);
4956 t = fold_convert (TREE_TYPE (startvar), n1);
4957 t = force_gimple_operand_gsi (&gsi, t,
4958 DECL_P (startvar)
4959 && TREE_ADDRESSABLE (startvar),
4960 NULL_TREE, false, GSI_CONTINUE_LINKING);
4961 gimple *assign_stmt = gimple_build_assign (startvar, t);
4962 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4964 t = fold_convert (TREE_TYPE (startvar), n2);
4965 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4966 false, GSI_CONTINUE_LINKING);
4967 if (endvar)
4969 assign_stmt = gimple_build_assign (endvar, e);
4970 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4971 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4972 assign_stmt = gimple_build_assign (fd->loop.v, e);
4973 else
4974 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4975 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4977 if (fd->collapse > 1)
4978 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4980 if (!broken_loop)
4982 /* The code controlling the sequential loop replaces the
4983 GIMPLE_OMP_CONTINUE. */
4984 gsi = gsi_last_nondebug_bb (cont_bb);
4985 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4986 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4987 vmain = gimple_omp_continue_control_use (cont_stmt);
4988 vback = gimple_omp_continue_control_def (cont_stmt);
4990 if (!gimple_omp_for_combined_p (fd->for_stmt))
4992 if (POINTER_TYPE_P (type))
4993 t = fold_build_pointer_plus (vmain, step);
4994 else
4995 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4996 t = force_gimple_operand_gsi (&gsi, t,
4997 DECL_P (vback)
4998 && TREE_ADDRESSABLE (vback),
4999 NULL_TREE, true, GSI_SAME_STMT);
5000 assign_stmt = gimple_build_assign (vback, t);
5001 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5003 t = build2 (fd->loop.cond_code, boolean_type_node,
5004 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5005 ? t : vback, e);
5006 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5009 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5010 gsi_remove (&gsi, true);
5012 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5013 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5016 /* Remove the GIMPLE_OMP_FOR statement. */
5017 gsi = gsi_for_stmt (fd->for_stmt);
5018 gsi_remove (&gsi, true);
5020 /* Remove the GIMPLE_OMP_RETURN statement. */
5021 gsi = gsi_last_nondebug_bb (exit_bb);
5022 gsi_remove (&gsi, true);
5024 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5025 if (!broken_loop)
5026 remove_edge (BRANCH_EDGE (entry_bb));
5027 else
5029 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5030 region->outer->cont = NULL;
5033 /* Connect all the blocks. */
5034 if (!broken_loop)
5036 ep = find_edge (cont_bb, body_bb);
5037 if (gimple_omp_for_combined_p (fd->for_stmt))
5039 remove_edge (ep);
5040 ep = NULL;
5042 else if (fd->collapse > 1)
5044 remove_edge (ep);
5045 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5047 else
5048 ep->flags = EDGE_TRUE_VALUE;
5049 find_edge (cont_bb, fin_bb)->flags
5050 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5053 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5054 recompute_dominator (CDI_DOMINATORS, body_bb));
5055 if (!broken_loop)
5056 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5057 recompute_dominator (CDI_DOMINATORS, fin_bb));
5059 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5061 struct loop *loop = alloc_loop ();
5062 loop->header = body_bb;
5063 if (collapse_bb == NULL)
5064 loop->latch = cont_bb;
5065 add_loop (loop, body_bb->loop_father);
5069 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5070 partitioned loop. The lowering here is abstracted, in that the
5071 loop parameters are passed through internal functions, which are
5072 further lowered by oacc_device_lower, once we get to the target
5073 compiler. The loop is of the form:
5075 for (V = B; V LTGT E; V += S) {BODY}
5077 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5078 (constant 0 for no chunking) and we will have a GWV partitioning
5079 mask, specifying dimensions over which the loop is to be
5080 partitioned (see note below). We generate code that looks like
5081 (this ignores tiling):
5083 <entry_bb> [incoming FALL->body, BRANCH->exit]
5084 typedef signedintify (typeof (V)) T; // underlying signed integral type
5085 T range = E - B;
5086 T chunk_no = 0;
5087 T DIR = LTGT == '<' ? +1 : -1;
5088 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5089 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5091 <head_bb> [created by splitting end of entry_bb]
5092 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5093 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5094 if (!(offset LTGT bound)) goto bottom_bb;
5096 <body_bb> [incoming]
5097 V = B + offset;
5098 {BODY}
5100 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5101 offset += step;
5102 if (offset LTGT bound) goto body_bb; [*]
5104 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5105 chunk_no++;
5106 if (chunk < chunk_max) goto head_bb;
5108 <exit_bb> [incoming]
5109 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5111 [*] Needed if V live at end of loop. */
5113 static void
5114 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5116 tree v = fd->loop.v;
5117 enum tree_code cond_code = fd->loop.cond_code;
5118 enum tree_code plus_code = PLUS_EXPR;
5120 tree chunk_size = integer_minus_one_node;
5121 tree gwv = integer_zero_node;
5122 tree iter_type = TREE_TYPE (v);
5123 tree diff_type = iter_type;
5124 tree plus_type = iter_type;
5125 struct oacc_collapse *counts = NULL;
5127 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5128 == GF_OMP_FOR_KIND_OACC_LOOP);
5129 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5130 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5132 if (POINTER_TYPE_P (iter_type))
5134 plus_code = POINTER_PLUS_EXPR;
5135 plus_type = sizetype;
5137 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5138 diff_type = signed_type_for (diff_type);
5139 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5140 diff_type = integer_type_node;
5142 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5143 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5144 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5145 basic_block bottom_bb = NULL;
5147 /* entry_bb has two sucessors; the branch edge is to the exit
5148 block, fallthrough edge to body. */
5149 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5150 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5152 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5153 body_bb, or to a block whose only successor is the body_bb. Its
5154 fallthrough successor is the final block (same as the branch
5155 successor of the entry_bb). */
5156 if (cont_bb)
5158 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5159 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5161 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5162 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5164 else
5165 gcc_assert (!gimple_in_ssa_p (cfun));
5167 /* The exit block only has entry_bb and cont_bb as predecessors. */
5168 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5170 tree chunk_no;
5171 tree chunk_max = NULL_TREE;
5172 tree bound, offset;
5173 tree step = create_tmp_var (diff_type, ".step");
5174 bool up = cond_code == LT_EXPR;
5175 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5176 bool chunking = !gimple_in_ssa_p (cfun);
5177 bool negating;
5179 /* Tiling vars. */
5180 tree tile_size = NULL_TREE;
5181 tree element_s = NULL_TREE;
5182 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5183 basic_block elem_body_bb = NULL;
5184 basic_block elem_cont_bb = NULL;
5186 /* SSA instances. */
5187 tree offset_incr = NULL_TREE;
5188 tree offset_init = NULL_TREE;
5190 gimple_stmt_iterator gsi;
5191 gassign *ass;
5192 gcall *call;
5193 gimple *stmt;
5194 tree expr;
5195 location_t loc;
5196 edge split, be, fte;
5198 /* Split the end of entry_bb to create head_bb. */
5199 split = split_block (entry_bb, last_stmt (entry_bb));
5200 basic_block head_bb = split->dest;
5201 entry_bb = split->src;
5203 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5204 gsi = gsi_last_nondebug_bb (entry_bb);
5205 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5206 loc = gimple_location (for_stmt);
5208 if (gimple_in_ssa_p (cfun))
5210 offset_init = gimple_omp_for_index (for_stmt, 0);
5211 gcc_assert (integer_zerop (fd->loop.n1));
5212 /* The SSA parallelizer does gang parallelism. */
5213 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5216 if (fd->collapse > 1 || fd->tiling)
5218 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5219 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5220 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5221 TREE_TYPE (fd->loop.n2), loc);
5223 if (SSA_VAR_P (fd->loop.n2))
5225 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5226 true, GSI_SAME_STMT);
5227 ass = gimple_build_assign (fd->loop.n2, total);
5228 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5232 tree b = fd->loop.n1;
5233 tree e = fd->loop.n2;
5234 tree s = fd->loop.step;
5236 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5237 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5239 /* Convert the step, avoiding possible unsigned->signed overflow. */
5240 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5241 if (negating)
5242 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5243 s = fold_convert (diff_type, s);
5244 if (negating)
5245 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5246 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5248 if (!chunking)
5249 chunk_size = integer_zero_node;
5250 expr = fold_convert (diff_type, chunk_size);
5251 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5252 NULL_TREE, true, GSI_SAME_STMT);
5254 if (fd->tiling)
5256 /* Determine the tile size and element step,
5257 modify the outer loop step size. */
5258 tile_size = create_tmp_var (diff_type, ".tile_size");
5259 expr = build_int_cst (diff_type, 1);
5260 for (int ix = 0; ix < fd->collapse; ix++)
5261 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5262 expr = force_gimple_operand_gsi (&gsi, expr, true,
5263 NULL_TREE, true, GSI_SAME_STMT);
5264 ass = gimple_build_assign (tile_size, expr);
5265 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5267 element_s = create_tmp_var (diff_type, ".element_s");
5268 ass = gimple_build_assign (element_s, s);
5269 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5271 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5272 s = force_gimple_operand_gsi (&gsi, expr, true,
5273 NULL_TREE, true, GSI_SAME_STMT);
5276 /* Determine the range, avoiding possible unsigned->signed overflow. */
5277 negating = !up && TYPE_UNSIGNED (iter_type);
5278 expr = fold_build2 (MINUS_EXPR, plus_type,
5279 fold_convert (plus_type, negating ? b : e),
5280 fold_convert (plus_type, negating ? e : b));
5281 expr = fold_convert (diff_type, expr);
5282 if (negating)
5283 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5284 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5285 NULL_TREE, true, GSI_SAME_STMT);
5287 chunk_no = build_int_cst (diff_type, 0);
5288 if (chunking)
5290 gcc_assert (!gimple_in_ssa_p (cfun));
5292 expr = chunk_no;
5293 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5294 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5296 ass = gimple_build_assign (chunk_no, expr);
5297 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5299 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5300 build_int_cst (integer_type_node,
5301 IFN_GOACC_LOOP_CHUNKS),
5302 dir, range, s, chunk_size, gwv);
5303 gimple_call_set_lhs (call, chunk_max);
5304 gimple_set_location (call, loc);
5305 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5307 else
5308 chunk_size = chunk_no;
5310 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5311 build_int_cst (integer_type_node,
5312 IFN_GOACC_LOOP_STEP),
5313 dir, range, s, chunk_size, gwv);
5314 gimple_call_set_lhs (call, step);
5315 gimple_set_location (call, loc);
5316 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5318 /* Remove the GIMPLE_OMP_FOR. */
5319 gsi_remove (&gsi, true);
5321 /* Fixup edges from head_bb. */
5322 be = BRANCH_EDGE (head_bb);
5323 fte = FALLTHRU_EDGE (head_bb);
5324 be->flags |= EDGE_FALSE_VALUE;
5325 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5327 basic_block body_bb = fte->dest;
5329 if (gimple_in_ssa_p (cfun))
5331 gsi = gsi_last_nondebug_bb (cont_bb);
5332 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5334 offset = gimple_omp_continue_control_use (cont_stmt);
5335 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5337 else
5339 offset = create_tmp_var (diff_type, ".offset");
5340 offset_init = offset_incr = offset;
5342 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5344 /* Loop offset & bound go into head_bb. */
5345 gsi = gsi_start_bb (head_bb);
5347 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5348 build_int_cst (integer_type_node,
5349 IFN_GOACC_LOOP_OFFSET),
5350 dir, range, s,
5351 chunk_size, gwv, chunk_no);
5352 gimple_call_set_lhs (call, offset_init);
5353 gimple_set_location (call, loc);
5354 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5356 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5357 build_int_cst (integer_type_node,
5358 IFN_GOACC_LOOP_BOUND),
5359 dir, range, s,
5360 chunk_size, gwv, offset_init);
5361 gimple_call_set_lhs (call, bound);
5362 gimple_set_location (call, loc);
5363 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5365 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5366 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5367 GSI_CONTINUE_LINKING);
5369 /* V assignment goes into body_bb. */
5370 if (!gimple_in_ssa_p (cfun))
5372 gsi = gsi_start_bb (body_bb);
5374 expr = build2 (plus_code, iter_type, b,
5375 fold_convert (plus_type, offset));
5376 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5377 true, GSI_SAME_STMT);
5378 ass = gimple_build_assign (v, expr);
5379 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5381 if (fd->collapse > 1 || fd->tiling)
5382 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5384 if (fd->tiling)
5386 /* Determine the range of the element loop -- usually simply
5387 the tile_size, but could be smaller if the final
5388 iteration of the outer loop is a partial tile. */
5389 tree e_range = create_tmp_var (diff_type, ".e_range");
5391 expr = build2 (MIN_EXPR, diff_type,
5392 build2 (MINUS_EXPR, diff_type, bound, offset),
5393 build2 (MULT_EXPR, diff_type, tile_size,
5394 element_s));
5395 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5396 true, GSI_SAME_STMT);
5397 ass = gimple_build_assign (e_range, expr);
5398 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5400 /* Determine bound, offset & step of inner loop. */
5401 e_bound = create_tmp_var (diff_type, ".e_bound");
5402 e_offset = create_tmp_var (diff_type, ".e_offset");
5403 e_step = create_tmp_var (diff_type, ".e_step");
5405 /* Mark these as element loops. */
5406 tree t, e_gwv = integer_minus_one_node;
5407 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5409 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5410 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5411 element_s, chunk, e_gwv, chunk);
5412 gimple_call_set_lhs (call, e_offset);
5413 gimple_set_location (call, loc);
5414 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5416 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5417 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5418 element_s, chunk, e_gwv, e_offset);
5419 gimple_call_set_lhs (call, e_bound);
5420 gimple_set_location (call, loc);
5421 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5423 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5424 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5425 element_s, chunk, e_gwv);
5426 gimple_call_set_lhs (call, e_step);
5427 gimple_set_location (call, loc);
5428 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5430 /* Add test and split block. */
5431 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5432 stmt = gimple_build_cond_empty (expr);
5433 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5434 split = split_block (body_bb, stmt);
5435 elem_body_bb = split->dest;
5436 if (cont_bb == body_bb)
5437 cont_bb = elem_body_bb;
5438 body_bb = split->src;
5440 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5442 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5443 if (cont_bb == NULL)
5445 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5446 e->probability = profile_probability::even ();
5447 split->probability = profile_probability::even ();
5450 /* Initialize the user's loop vars. */
5451 gsi = gsi_start_bb (elem_body_bb);
5452 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5456 /* Loop increment goes into cont_bb. If this is not a loop, we
5457 will have spawned threads as if it was, and each one will
5458 execute one iteration. The specification is not explicit about
5459 whether such constructs are ill-formed or not, and they can
5460 occur, especially when noreturn routines are involved. */
5461 if (cont_bb)
5463 gsi = gsi_last_nondebug_bb (cont_bb);
5464 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5465 loc = gimple_location (cont_stmt);
5467 if (fd->tiling)
5469 /* Insert element loop increment and test. */
5470 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5471 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5472 true, GSI_SAME_STMT);
5473 ass = gimple_build_assign (e_offset, expr);
5474 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5475 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5477 stmt = gimple_build_cond_empty (expr);
5478 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5479 split = split_block (cont_bb, stmt);
5480 elem_cont_bb = split->src;
5481 cont_bb = split->dest;
5483 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5484 split->probability = profile_probability::unlikely ().guessed ();
5485 edge latch_edge
5486 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5487 latch_edge->probability = profile_probability::likely ().guessed ();
5489 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5490 skip_edge->probability = profile_probability::unlikely ().guessed ();
5491 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5492 loop_entry_edge->probability
5493 = profile_probability::likely ().guessed ();
5495 gsi = gsi_for_stmt (cont_stmt);
5498 /* Increment offset. */
5499 if (gimple_in_ssa_p (cfun))
5500 expr = build2 (plus_code, iter_type, offset,
5501 fold_convert (plus_type, step));
5502 else
5503 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5504 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5505 true, GSI_SAME_STMT);
5506 ass = gimple_build_assign (offset_incr, expr);
5507 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5508 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5509 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5511 /* Remove the GIMPLE_OMP_CONTINUE. */
5512 gsi_remove (&gsi, true);
5514 /* Fixup edges from cont_bb. */
5515 be = BRANCH_EDGE (cont_bb);
5516 fte = FALLTHRU_EDGE (cont_bb);
5517 be->flags |= EDGE_TRUE_VALUE;
5518 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5520 if (chunking)
5522 /* Split the beginning of exit_bb to make bottom_bb. We
5523 need to insert a nop at the start, because splitting is
5524 after a stmt, not before. */
5525 gsi = gsi_start_bb (exit_bb);
5526 stmt = gimple_build_nop ();
5527 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5528 split = split_block (exit_bb, stmt);
5529 bottom_bb = split->src;
5530 exit_bb = split->dest;
5531 gsi = gsi_last_bb (bottom_bb);
5533 /* Chunk increment and test goes into bottom_bb. */
5534 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5535 build_int_cst (diff_type, 1));
5536 ass = gimple_build_assign (chunk_no, expr);
5537 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5539 /* Chunk test at end of bottom_bb. */
5540 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5541 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5542 GSI_CONTINUE_LINKING);
5544 /* Fixup edges from bottom_bb. */
5545 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5546 split->probability = profile_probability::unlikely ().guessed ();
5547 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5548 latch_edge->probability = profile_probability::likely ().guessed ();
5552 gsi = gsi_last_nondebug_bb (exit_bb);
5553 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5554 loc = gimple_location (gsi_stmt (gsi));
5556 if (!gimple_in_ssa_p (cfun))
5558 /* Insert the final value of V, in case it is live. This is the
5559 value for the only thread that survives past the join. */
5560 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5561 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5562 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5563 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5564 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5565 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5566 true, GSI_SAME_STMT);
5567 ass = gimple_build_assign (v, expr);
5568 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5571 /* Remove the OMP_RETURN. */
5572 gsi_remove (&gsi, true);
5574 if (cont_bb)
5576 /* We now have one, two or three nested loops. Update the loop
5577 structures. */
5578 struct loop *parent = entry_bb->loop_father;
5579 struct loop *body = body_bb->loop_father;
5581 if (chunking)
5583 struct loop *chunk_loop = alloc_loop ();
5584 chunk_loop->header = head_bb;
5585 chunk_loop->latch = bottom_bb;
5586 add_loop (chunk_loop, parent);
5587 parent = chunk_loop;
5589 else if (parent != body)
5591 gcc_assert (body->header == body_bb);
5592 gcc_assert (body->latch == cont_bb
5593 || single_pred (body->latch) == cont_bb);
5594 parent = NULL;
5597 if (parent)
5599 struct loop *body_loop = alloc_loop ();
5600 body_loop->header = body_bb;
5601 body_loop->latch = cont_bb;
5602 add_loop (body_loop, parent);
5604 if (fd->tiling)
5606 /* Insert tiling's element loop. */
5607 struct loop *inner_loop = alloc_loop ();
5608 inner_loop->header = elem_body_bb;
5609 inner_loop->latch = elem_cont_bb;
5610 add_loop (inner_loop, body_loop);
5616 /* Expand the OMP loop defined by REGION. */
5618 static void
5619 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5621 struct omp_for_data fd;
5622 struct omp_for_data_loop *loops;
5624 loops
5625 = (struct omp_for_data_loop *)
5626 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5627 * sizeof (struct omp_for_data_loop));
5628 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5629 &fd, loops);
5630 region->sched_kind = fd.sched_kind;
5631 region->sched_modifiers = fd.sched_modifiers;
5633 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5634 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5635 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5636 if (region->cont)
5638 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5639 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5640 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5642 else
5643 /* If there isn't a continue then this is a degerate case where
5644 the introduction of abnormal edges during lowering will prevent
5645 original loops from being detected. Fix that up. */
5646 loops_state_set (LOOPS_NEED_FIXUP);
5648 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5649 expand_omp_simd (region, &fd);
5650 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5652 gcc_assert (!inner_stmt);
5653 expand_oacc_for (region, &fd);
5655 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5657 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5658 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5659 else
5660 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5662 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5663 && !fd.have_ordered)
5665 if (fd.chunk_size == NULL)
5666 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5667 else
5668 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5670 else
5672 int fn_index, start_ix, next_ix;
5674 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5675 == GF_OMP_FOR_KIND_FOR);
5676 if (fd.chunk_size == NULL
5677 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5678 fd.chunk_size = integer_zero_node;
5679 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5680 switch (fd.sched_kind)
5682 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5683 fn_index = 3;
5684 break;
5685 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5686 case OMP_CLAUSE_SCHEDULE_GUIDED:
5687 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5688 && !fd.ordered
5689 && !fd.have_ordered)
5691 fn_index = 3 + fd.sched_kind;
5692 break;
5694 /* FALLTHRU */
5695 default:
5696 fn_index = fd.sched_kind;
5697 break;
5699 if (!fd.ordered)
5700 fn_index += fd.have_ordered * 6;
5701 if (fd.ordered)
5702 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5703 else
5704 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5705 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5706 if (fd.iter_type == long_long_unsigned_type_node)
5708 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5709 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5710 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5711 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5713 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5714 (enum built_in_function) next_ix, inner_stmt);
5717 if (gimple_in_ssa_p (cfun))
5718 update_ssa (TODO_update_ssa_only_virtuals);
5721 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5723 v = GOMP_sections_start (n);
5725 switch (v)
5727 case 0:
5728 goto L2;
5729 case 1:
5730 section 1;
5731 goto L1;
5732 case 2:
5734 case n:
5736 default:
5737 abort ();
5740 v = GOMP_sections_next ();
5741 goto L0;
5743 reduction;
5745 If this is a combined parallel sections, replace the call to
5746 GOMP_sections_start with call to GOMP_sections_next. */
5748 static void
5749 expand_omp_sections (struct omp_region *region)
5751 tree t, u, vin = NULL, vmain, vnext, l2;
5752 unsigned len;
5753 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5754 gimple_stmt_iterator si, switch_si;
5755 gomp_sections *sections_stmt;
5756 gimple *stmt;
5757 gomp_continue *cont;
5758 edge_iterator ei;
5759 edge e;
5760 struct omp_region *inner;
5761 unsigned i, casei;
5762 bool exit_reachable = region->cont != NULL;
5764 gcc_assert (region->exit != NULL);
5765 entry_bb = region->entry;
5766 l0_bb = single_succ (entry_bb);
5767 l1_bb = region->cont;
5768 l2_bb = region->exit;
5769 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5770 l2 = gimple_block_label (l2_bb);
5771 else
5773 /* This can happen if there are reductions. */
5774 len = EDGE_COUNT (l0_bb->succs);
5775 gcc_assert (len > 0);
5776 e = EDGE_SUCC (l0_bb, len - 1);
5777 si = gsi_last_nondebug_bb (e->dest);
5778 l2 = NULL_TREE;
5779 if (gsi_end_p (si)
5780 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5781 l2 = gimple_block_label (e->dest);
5782 else
5783 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5785 si = gsi_last_nondebug_bb (e->dest);
5786 if (gsi_end_p (si)
5787 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5789 l2 = gimple_block_label (e->dest);
5790 break;
5794 if (exit_reachable)
5795 default_bb = create_empty_bb (l1_bb->prev_bb);
5796 else
5797 default_bb = create_empty_bb (l0_bb);
5799 /* We will build a switch() with enough cases for all the
5800 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5801 and a default case to abort if something goes wrong. */
5802 len = EDGE_COUNT (l0_bb->succs);
5804 /* Use vec::quick_push on label_vec throughout, since we know the size
5805 in advance. */
5806 auto_vec<tree> label_vec (len);
5808 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5809 GIMPLE_OMP_SECTIONS statement. */
5810 si = gsi_last_nondebug_bb (entry_bb);
5811 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5812 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5813 vin = gimple_omp_sections_control (sections_stmt);
5814 if (!is_combined_parallel (region))
5816 /* If we are not inside a combined parallel+sections region,
5817 call GOMP_sections_start. */
5818 t = build_int_cst (unsigned_type_node, len - 1);
5819 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5820 stmt = gimple_build_call (u, 1, t);
5822 else
5824 /* Otherwise, call GOMP_sections_next. */
5825 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5826 stmt = gimple_build_call (u, 0);
5828 gimple_call_set_lhs (stmt, vin);
5829 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5830 gsi_remove (&si, true);
5832 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5833 L0_BB. */
5834 switch_si = gsi_last_nondebug_bb (l0_bb);
5835 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5836 if (exit_reachable)
5838 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5839 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5840 vmain = gimple_omp_continue_control_use (cont);
5841 vnext = gimple_omp_continue_control_def (cont);
5843 else
5845 vmain = vin;
5846 vnext = NULL_TREE;
5849 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5850 label_vec.quick_push (t);
5851 i = 1;
5853 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5854 for (inner = region->inner, casei = 1;
5855 inner;
5856 inner = inner->next, i++, casei++)
5858 basic_block s_entry_bb, s_exit_bb;
5860 /* Skip optional reduction region. */
5861 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5863 --i;
5864 --casei;
5865 continue;
5868 s_entry_bb = inner->entry;
5869 s_exit_bb = inner->exit;
5871 t = gimple_block_label (s_entry_bb);
5872 u = build_int_cst (unsigned_type_node, casei);
5873 u = build_case_label (u, NULL, t);
5874 label_vec.quick_push (u);
5876 si = gsi_last_nondebug_bb (s_entry_bb);
5877 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5878 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5879 gsi_remove (&si, true);
5880 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5882 if (s_exit_bb == NULL)
5883 continue;
5885 si = gsi_last_nondebug_bb (s_exit_bb);
5886 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5887 gsi_remove (&si, true);
5889 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5892 /* Error handling code goes in DEFAULT_BB. */
5893 t = gimple_block_label (default_bb);
5894 u = build_case_label (NULL, NULL, t);
5895 make_edge (l0_bb, default_bb, 0);
5896 add_bb_to_loop (default_bb, current_loops->tree_root);
5898 stmt = gimple_build_switch (vmain, u, label_vec);
5899 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5900 gsi_remove (&switch_si, true);
5902 si = gsi_start_bb (default_bb);
5903 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5904 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5906 if (exit_reachable)
5908 tree bfn_decl;
5910 /* Code to get the next section goes in L1_BB. */
5911 si = gsi_last_nondebug_bb (l1_bb);
5912 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5914 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5915 stmt = gimple_build_call (bfn_decl, 0);
5916 gimple_call_set_lhs (stmt, vnext);
5917 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5918 gsi_remove (&si, true);
5920 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5923 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5924 si = gsi_last_nondebug_bb (l2_bb);
5925 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5926 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5927 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5928 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5929 else
5930 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5931 stmt = gimple_build_call (t, 0);
5932 if (gimple_omp_return_lhs (gsi_stmt (si)))
5933 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5934 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5935 gsi_remove (&si, true);
5937 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5940 /* Expand code for an OpenMP single directive. We've already expanded
5941 much of the code, here we simply place the GOMP_barrier call. */
5943 static void
5944 expand_omp_single (struct omp_region *region)
5946 basic_block entry_bb, exit_bb;
5947 gimple_stmt_iterator si;
5949 entry_bb = region->entry;
5950 exit_bb = region->exit;
5952 si = gsi_last_nondebug_bb (entry_bb);
5953 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5954 gsi_remove (&si, true);
5955 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5957 si = gsi_last_nondebug_bb (exit_bb);
5958 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5960 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5961 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5963 gsi_remove (&si, true);
5964 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5967 /* Generic expansion for OpenMP synchronization directives: master,
5968 ordered and critical. All we need to do here is remove the entry
5969 and exit markers for REGION. */
5971 static void
5972 expand_omp_synch (struct omp_region *region)
5974 basic_block entry_bb, exit_bb;
5975 gimple_stmt_iterator si;
5977 entry_bb = region->entry;
5978 exit_bb = region->exit;
5980 si = gsi_last_nondebug_bb (entry_bb);
5981 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5982 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5983 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5984 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5985 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5986 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5987 gsi_remove (&si, true);
5988 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5990 if (exit_bb)
5992 si = gsi_last_nondebug_bb (exit_bb);
5993 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5994 gsi_remove (&si, true);
5995 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5999 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6000 operation as a normal volatile load. */
6002 static bool
6003 expand_omp_atomic_load (basic_block load_bb, tree addr,
6004 tree loaded_val, int index)
6006 enum built_in_function tmpbase;
6007 gimple_stmt_iterator gsi;
6008 basic_block store_bb;
6009 location_t loc;
6010 gimple *stmt;
6011 tree decl, call, type, itype;
6013 gsi = gsi_last_nondebug_bb (load_bb);
6014 stmt = gsi_stmt (gsi);
6015 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6016 loc = gimple_location (stmt);
6018 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6019 is smaller than word size, then expand_atomic_load assumes that the load
6020 is atomic. We could avoid the builtin entirely in this case. */
6022 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6023 decl = builtin_decl_explicit (tmpbase);
6024 if (decl == NULL_TREE)
6025 return false;
6027 type = TREE_TYPE (loaded_val);
6028 itype = TREE_TYPE (TREE_TYPE (decl));
6030 call = build_call_expr_loc (loc, decl, 2, addr,
6031 build_int_cst (NULL,
6032 gimple_omp_atomic_seq_cst_p (stmt)
6033 ? MEMMODEL_SEQ_CST
6034 : MEMMODEL_RELAXED));
6035 if (!useless_type_conversion_p (type, itype))
6036 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6037 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6039 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6040 gsi_remove (&gsi, true);
6042 store_bb = single_succ (load_bb);
6043 gsi = gsi_last_nondebug_bb (store_bb);
6044 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6045 gsi_remove (&gsi, true);
6047 if (gimple_in_ssa_p (cfun))
6048 update_ssa (TODO_update_ssa_no_phi);
6050 return true;
6053 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6054 operation as a normal volatile store. */
6056 static bool
6057 expand_omp_atomic_store (basic_block load_bb, tree addr,
6058 tree loaded_val, tree stored_val, int index)
6060 enum built_in_function tmpbase;
6061 gimple_stmt_iterator gsi;
6062 basic_block store_bb = single_succ (load_bb);
6063 location_t loc;
6064 gimple *stmt;
6065 tree decl, call, type, itype;
6066 machine_mode imode;
6067 bool exchange;
6069 gsi = gsi_last_nondebug_bb (load_bb);
6070 stmt = gsi_stmt (gsi);
6071 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6073 /* If the load value is needed, then this isn't a store but an exchange. */
6074 exchange = gimple_omp_atomic_need_value_p (stmt);
6076 gsi = gsi_last_nondebug_bb (store_bb);
6077 stmt = gsi_stmt (gsi);
6078 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6079 loc = gimple_location (stmt);
6081 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6082 is smaller than word size, then expand_atomic_store assumes that the store
6083 is atomic. We could avoid the builtin entirely in this case. */
6085 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6086 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6087 decl = builtin_decl_explicit (tmpbase);
6088 if (decl == NULL_TREE)
6089 return false;
6091 type = TREE_TYPE (stored_val);
6093 /* Dig out the type of the function's second argument. */
6094 itype = TREE_TYPE (decl);
6095 itype = TYPE_ARG_TYPES (itype);
6096 itype = TREE_CHAIN (itype);
6097 itype = TREE_VALUE (itype);
6098 imode = TYPE_MODE (itype);
6100 if (exchange && !can_atomic_exchange_p (imode, true))
6101 return false;
6103 if (!useless_type_conversion_p (itype, type))
6104 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6105 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6106 build_int_cst (NULL,
6107 gimple_omp_atomic_seq_cst_p (stmt)
6108 ? MEMMODEL_SEQ_CST
6109 : MEMMODEL_RELAXED));
6110 if (exchange)
6112 if (!useless_type_conversion_p (type, itype))
6113 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6114 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6117 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6118 gsi_remove (&gsi, true);
6120 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6121 gsi = gsi_last_nondebug_bb (load_bb);
6122 gsi_remove (&gsi, true);
6124 if (gimple_in_ssa_p (cfun))
6125 update_ssa (TODO_update_ssa_no_phi);
6127 return true;
6130 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6131 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6132 size of the data type, and thus usable to find the index of the builtin
6133 decl. Returns false if the expression is not of the proper form. */
6135 static bool
6136 expand_omp_atomic_fetch_op (basic_block load_bb,
6137 tree addr, tree loaded_val,
6138 tree stored_val, int index)
6140 enum built_in_function oldbase, newbase, tmpbase;
6141 tree decl, itype, call;
6142 tree lhs, rhs;
6143 basic_block store_bb = single_succ (load_bb);
6144 gimple_stmt_iterator gsi;
6145 gimple *stmt;
6146 location_t loc;
6147 enum tree_code code;
6148 bool need_old, need_new;
6149 machine_mode imode;
6150 bool seq_cst;
6152 /* We expect to find the following sequences:
6154 load_bb:
6155 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6157 store_bb:
6158 val = tmp OP something; (or: something OP tmp)
6159 GIMPLE_OMP_STORE (val)
6161 ???FIXME: Allow a more flexible sequence.
6162 Perhaps use data flow to pick the statements.
6166 gsi = gsi_after_labels (store_bb);
6167 stmt = gsi_stmt (gsi);
6168 if (is_gimple_debug (stmt))
6170 gsi_next_nondebug (&gsi);
6171 if (gsi_end_p (gsi))
6172 return false;
6173 stmt = gsi_stmt (gsi);
6175 loc = gimple_location (stmt);
6176 if (!is_gimple_assign (stmt))
6177 return false;
6178 gsi_next_nondebug (&gsi);
6179 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6180 return false;
6181 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6182 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6183 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6184 gcc_checking_assert (!need_old || !need_new);
6186 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6187 return false;
6189 /* Check for one of the supported fetch-op operations. */
6190 code = gimple_assign_rhs_code (stmt);
6191 switch (code)
6193 case PLUS_EXPR:
6194 case POINTER_PLUS_EXPR:
6195 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6196 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6197 break;
6198 case MINUS_EXPR:
6199 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6200 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6201 break;
6202 case BIT_AND_EXPR:
6203 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6204 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6205 break;
6206 case BIT_IOR_EXPR:
6207 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6208 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6209 break;
6210 case BIT_XOR_EXPR:
6211 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6212 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6213 break;
6214 default:
6215 return false;
6218 /* Make sure the expression is of the proper form. */
6219 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6220 rhs = gimple_assign_rhs2 (stmt);
6221 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6222 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6223 rhs = gimple_assign_rhs1 (stmt);
6224 else
6225 return false;
6227 tmpbase = ((enum built_in_function)
6228 ((need_new ? newbase : oldbase) + index + 1));
6229 decl = builtin_decl_explicit (tmpbase);
6230 if (decl == NULL_TREE)
6231 return false;
6232 itype = TREE_TYPE (TREE_TYPE (decl));
6233 imode = TYPE_MODE (itype);
6235 /* We could test all of the various optabs involved, but the fact of the
6236 matter is that (with the exception of i486 vs i586 and xadd) all targets
6237 that support any atomic operaton optab also implements compare-and-swap.
6238 Let optabs.c take care of expanding any compare-and-swap loop. */
6239 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6240 return false;
6242 gsi = gsi_last_nondebug_bb (load_bb);
6243 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6245 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6246 It only requires that the operation happen atomically. Thus we can
6247 use the RELAXED memory model. */
6248 call = build_call_expr_loc (loc, decl, 3, addr,
6249 fold_convert_loc (loc, itype, rhs),
6250 build_int_cst (NULL,
6251 seq_cst ? MEMMODEL_SEQ_CST
6252 : MEMMODEL_RELAXED));
6254 if (need_old || need_new)
6256 lhs = need_old ? loaded_val : stored_val;
6257 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6258 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6260 else
6261 call = fold_convert_loc (loc, void_type_node, call);
6262 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6263 gsi_remove (&gsi, true);
6265 gsi = gsi_last_nondebug_bb (store_bb);
6266 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6267 gsi_remove (&gsi, true);
6268 gsi = gsi_last_nondebug_bb (store_bb);
6269 stmt = gsi_stmt (gsi);
6270 gsi_remove (&gsi, true);
6272 if (gimple_in_ssa_p (cfun))
6274 release_defs (stmt);
6275 update_ssa (TODO_update_ssa_no_phi);
6278 return true;
6281 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6283 oldval = *addr;
6284 repeat:
6285 newval = rhs; // with oldval replacing *addr in rhs
6286 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6287 if (oldval != newval)
6288 goto repeat;
6290 INDEX is log2 of the size of the data type, and thus usable to find the
6291 index of the builtin decl. */
6293 static bool
6294 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6295 tree addr, tree loaded_val, tree stored_val,
6296 int index)
6298 tree loadedi, storedi, initial, new_storedi, old_vali;
6299 tree type, itype, cmpxchg, iaddr, atype;
6300 gimple_stmt_iterator si;
6301 basic_block loop_header = single_succ (load_bb);
6302 gimple *phi, *stmt;
6303 edge e;
6304 enum built_in_function fncode;
6306 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6307 order to use the RELAXED memory model effectively. */
6308 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6309 + index + 1);
6310 cmpxchg = builtin_decl_explicit (fncode);
6311 if (cmpxchg == NULL_TREE)
6312 return false;
6313 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6314 atype = type;
6315 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6317 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6318 || !can_atomic_load_p (TYPE_MODE (itype)))
6319 return false;
6321 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6322 si = gsi_last_nondebug_bb (load_bb);
6323 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6325 /* For floating-point values, we'll need to view-convert them to integers
6326 so that we can perform the atomic compare and swap. Simplify the
6327 following code by always setting up the "i"ntegral variables. */
6328 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6330 tree iaddr_val;
6332 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6333 true));
6334 atype = itype;
6335 iaddr_val
6336 = force_gimple_operand_gsi (&si,
6337 fold_convert (TREE_TYPE (iaddr), addr),
6338 false, NULL_TREE, true, GSI_SAME_STMT);
6339 stmt = gimple_build_assign (iaddr, iaddr_val);
6340 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6341 loadedi = create_tmp_var (itype);
6342 if (gimple_in_ssa_p (cfun))
6343 loadedi = make_ssa_name (loadedi);
6345 else
6347 iaddr = addr;
6348 loadedi = loaded_val;
6351 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6352 tree loaddecl = builtin_decl_explicit (fncode);
6353 if (loaddecl)
6354 initial
6355 = fold_convert (atype,
6356 build_call_expr (loaddecl, 2, iaddr,
6357 build_int_cst (NULL_TREE,
6358 MEMMODEL_RELAXED)));
6359 else
6361 tree off
6362 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6363 true), 0);
6364 initial = build2 (MEM_REF, atype, iaddr, off);
6367 initial
6368 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6369 GSI_SAME_STMT);
6371 /* Move the value to the LOADEDI temporary. */
6372 if (gimple_in_ssa_p (cfun))
6374 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6375 phi = create_phi_node (loadedi, loop_header);
6376 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6377 initial);
6379 else
6380 gsi_insert_before (&si,
6381 gimple_build_assign (loadedi, initial),
6382 GSI_SAME_STMT);
6383 if (loadedi != loaded_val)
6385 gimple_stmt_iterator gsi2;
6386 tree x;
6388 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6389 gsi2 = gsi_start_bb (loop_header);
6390 if (gimple_in_ssa_p (cfun))
6392 gassign *stmt;
6393 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6394 true, GSI_SAME_STMT);
6395 stmt = gimple_build_assign (loaded_val, x);
6396 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6398 else
6400 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6401 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6402 true, GSI_SAME_STMT);
6405 gsi_remove (&si, true);
6407 si = gsi_last_nondebug_bb (store_bb);
6408 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6410 if (iaddr == addr)
6411 storedi = stored_val;
6412 else
6413 storedi
6414 = force_gimple_operand_gsi (&si,
6415 build1 (VIEW_CONVERT_EXPR, itype,
6416 stored_val), true, NULL_TREE, true,
6417 GSI_SAME_STMT);
6419 /* Build the compare&swap statement. */
6420 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6421 new_storedi = force_gimple_operand_gsi (&si,
6422 fold_convert (TREE_TYPE (loadedi),
6423 new_storedi),
6424 true, NULL_TREE,
6425 true, GSI_SAME_STMT);
6427 if (gimple_in_ssa_p (cfun))
6428 old_vali = loadedi;
6429 else
6431 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6432 stmt = gimple_build_assign (old_vali, loadedi);
6433 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6435 stmt = gimple_build_assign (loadedi, new_storedi);
6436 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6439 /* Note that we always perform the comparison as an integer, even for
6440 floating point. This allows the atomic operation to properly
6441 succeed even with NaNs and -0.0. */
6442 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6443 stmt = gimple_build_cond_empty (ne);
6444 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6446 /* Update cfg. */
6447 e = single_succ_edge (store_bb);
6448 e->flags &= ~EDGE_FALLTHRU;
6449 e->flags |= EDGE_FALSE_VALUE;
6450 /* Expect no looping. */
6451 e->probability = profile_probability::guessed_always ();
6453 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6454 e->probability = profile_probability::guessed_never ();
6456 /* Copy the new value to loadedi (we already did that before the condition
6457 if we are not in SSA). */
6458 if (gimple_in_ssa_p (cfun))
6460 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6461 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6464 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6465 gsi_remove (&si, true);
6467 struct loop *loop = alloc_loop ();
6468 loop->header = loop_header;
6469 loop->latch = store_bb;
6470 add_loop (loop, loop_header->loop_father);
6472 if (gimple_in_ssa_p (cfun))
6473 update_ssa (TODO_update_ssa_no_phi);
6475 return true;
6478 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6480 GOMP_atomic_start ();
6481 *addr = rhs;
6482 GOMP_atomic_end ();
6484 The result is not globally atomic, but works so long as all parallel
6485 references are within #pragma omp atomic directives. According to
6486 responses received from omp@openmp.org, appears to be within spec.
6487 Which makes sense, since that's how several other compilers handle
6488 this situation as well.
6489 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6490 expanding. STORED_VAL is the operand of the matching
6491 GIMPLE_OMP_ATOMIC_STORE.
6493 We replace
6494 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6495 loaded_val = *addr;
6497 and replace
6498 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6499 *addr = stored_val;
6502 static bool
6503 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6504 tree addr, tree loaded_val, tree stored_val)
6506 gimple_stmt_iterator si;
6507 gassign *stmt;
6508 tree t;
6510 si = gsi_last_nondebug_bb (load_bb);
6511 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6513 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6514 t = build_call_expr (t, 0);
6515 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6517 tree mem = build_simple_mem_ref (addr);
6518 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6519 TREE_OPERAND (mem, 1)
6520 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6521 true),
6522 TREE_OPERAND (mem, 1));
6523 stmt = gimple_build_assign (loaded_val, mem);
6524 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6525 gsi_remove (&si, true);
6527 si = gsi_last_nondebug_bb (store_bb);
6528 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6530 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6531 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6533 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6534 t = build_call_expr (t, 0);
6535 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6536 gsi_remove (&si, true);
6538 if (gimple_in_ssa_p (cfun))
6539 update_ssa (TODO_update_ssa_no_phi);
6540 return true;
6543 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6544 using expand_omp_atomic_fetch_op. If it failed, we try to
6545 call expand_omp_atomic_pipeline, and if it fails too, the
6546 ultimate fallback is wrapping the operation in a mutex
6547 (expand_omp_atomic_mutex). REGION is the atomic region built
6548 by build_omp_regions_1(). */
6550 static void
6551 expand_omp_atomic (struct omp_region *region)
6553 basic_block load_bb = region->entry, store_bb = region->exit;
6554 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6555 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6556 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6557 tree addr = gimple_omp_atomic_load_rhs (load);
6558 tree stored_val = gimple_omp_atomic_store_val (store);
6559 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6560 HOST_WIDE_INT index;
6562 /* Make sure the type is one of the supported sizes. */
6563 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6564 index = exact_log2 (index);
6565 if (index >= 0 && index <= 4)
6567 unsigned int align = TYPE_ALIGN_UNIT (type);
6569 /* __sync builtins require strict data alignment. */
6570 if (exact_log2 (align) >= index)
6572 /* Atomic load. */
6573 scalar_mode smode;
6574 if (loaded_val == stored_val
6575 && (is_int_mode (TYPE_MODE (type), &smode)
6576 || is_float_mode (TYPE_MODE (type), &smode))
6577 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6578 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6579 return;
6581 /* Atomic store. */
6582 if ((is_int_mode (TYPE_MODE (type), &smode)
6583 || is_float_mode (TYPE_MODE (type), &smode))
6584 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6585 && store_bb == single_succ (load_bb)
6586 && first_stmt (store_bb) == store
6587 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6588 stored_val, index))
6589 return;
6591 /* When possible, use specialized atomic update functions. */
6592 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6593 && store_bb == single_succ (load_bb)
6594 && expand_omp_atomic_fetch_op (load_bb, addr,
6595 loaded_val, stored_val, index))
6596 return;
6598 /* If we don't have specialized __sync builtins, try and implement
6599 as a compare and swap loop. */
6600 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6601 loaded_val, stored_val, index))
6602 return;
6606 /* The ultimate fallback is wrapping the operation in a mutex. */
6607 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6610 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6611 at REGION_EXIT. */
6613 static void
6614 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6615 basic_block region_exit)
6617 struct loop *outer = region_entry->loop_father;
6618 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6620 /* Don't parallelize the kernels region if it contains more than one outer
6621 loop. */
6622 unsigned int nr_outer_loops = 0;
6623 struct loop *single_outer = NULL;
6624 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6626 gcc_assert (loop_outer (loop) == outer);
6628 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6629 continue;
6631 if (region_exit != NULL
6632 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6633 continue;
6635 nr_outer_loops++;
6636 single_outer = loop;
6638 if (nr_outer_loops != 1)
6639 return;
6641 for (struct loop *loop = single_outer->inner;
6642 loop != NULL;
6643 loop = loop->inner)
6644 if (loop->next)
6645 return;
6647 /* Mark the loops in the region. */
6648 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6649 loop->in_oacc_kernels_region = true;
6652 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6654 struct GTY(()) grid_launch_attributes_trees
6656 tree kernel_dim_array_type;
6657 tree kernel_lattrs_dimnum_decl;
6658 tree kernel_lattrs_grid_decl;
6659 tree kernel_lattrs_group_decl;
6660 tree kernel_launch_attributes_type;
6663 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6665 /* Create types used to pass kernel launch attributes to target. */
6667 static void
6668 grid_create_kernel_launch_attr_types (void)
6670 if (grid_attr_trees)
6671 return;
6672 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6674 tree dim_arr_index_type
6675 = build_index_type (build_int_cst (integer_type_node, 2));
6676 grid_attr_trees->kernel_dim_array_type
6677 = build_array_type (uint32_type_node, dim_arr_index_type);
6679 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6680 grid_attr_trees->kernel_lattrs_dimnum_decl
6681 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6682 uint32_type_node);
6683 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6685 grid_attr_trees->kernel_lattrs_grid_decl
6686 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6687 grid_attr_trees->kernel_dim_array_type);
6688 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6689 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6690 grid_attr_trees->kernel_lattrs_group_decl
6691 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6692 grid_attr_trees->kernel_dim_array_type);
6693 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6694 = grid_attr_trees->kernel_lattrs_grid_decl;
6695 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6696 "__gomp_kernel_launch_attributes",
6697 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6700 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6701 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6702 of type uint32_type_node. */
6704 static void
6705 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6706 tree fld_decl, int index, tree value)
6708 tree ref = build4 (ARRAY_REF, uint32_type_node,
6709 build3 (COMPONENT_REF,
6710 grid_attr_trees->kernel_dim_array_type,
6711 range_var, fld_decl, NULL_TREE),
6712 build_int_cst (integer_type_node, index),
6713 NULL_TREE, NULL_TREE);
6714 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6717 /* Return a tree representation of a pointer to a structure with grid and
6718 work-group size information. Statements filling that information will be
6719 inserted before GSI, TGT_STMT is the target statement which has the
6720 necessary information in it. */
6722 static tree
6723 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6724 gomp_target *tgt_stmt)
6726 grid_create_kernel_launch_attr_types ();
6727 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6728 "__kernel_launch_attrs");
6730 unsigned max_dim = 0;
6731 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6732 clause;
6733 clause = OMP_CLAUSE_CHAIN (clause))
6735 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6736 continue;
6738 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6739 max_dim = MAX (dim, max_dim);
6741 grid_insert_store_range_dim (gsi, lattrs,
6742 grid_attr_trees->kernel_lattrs_grid_decl,
6743 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6744 grid_insert_store_range_dim (gsi, lattrs,
6745 grid_attr_trees->kernel_lattrs_group_decl,
6746 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6749 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6750 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6751 gcc_checking_assert (max_dim <= 2);
6752 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6753 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6754 GSI_SAME_STMT);
6755 TREE_ADDRESSABLE (lattrs) = 1;
6756 return build_fold_addr_expr (lattrs);
6759 /* Build target argument identifier from the DEVICE identifier, value
6760 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6762 static tree
6763 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6765 tree t = build_int_cst (integer_type_node, device);
6766 if (subseqent_param)
6767 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6768 build_int_cst (integer_type_node,
6769 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6770 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6771 build_int_cst (integer_type_node, id));
6772 return t;
6775 /* Like above but return it in type that can be directly stored as an element
6776 of the argument array. */
6778 static tree
6779 get_target_argument_identifier (int device, bool subseqent_param, int id)
6781 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6782 return fold_convert (ptr_type_node, t);
6785 /* Return a target argument consisting of DEVICE identifier, value identifier
6786 ID, and the actual VALUE. */
6788 static tree
6789 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6790 tree value)
6792 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6793 fold_convert (integer_type_node, value),
6794 build_int_cst (unsigned_type_node,
6795 GOMP_TARGET_ARG_VALUE_SHIFT));
6796 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6797 get_target_argument_identifier_1 (device, false, id));
6798 t = fold_convert (ptr_type_node, t);
6799 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6802 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6803 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6804 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6805 arguments. */
6807 static void
6808 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6809 int id, tree value, vec <tree> *args)
6811 if (tree_fits_shwi_p (value)
6812 && tree_to_shwi (value) > -(1 << 15)
6813 && tree_to_shwi (value) < (1 << 15))
6814 args->quick_push (get_target_argument_value (gsi, device, id, value));
6815 else
6817 args->quick_push (get_target_argument_identifier (device, true, id));
6818 value = fold_convert (ptr_type_node, value);
6819 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6820 GSI_SAME_STMT);
6821 args->quick_push (value);
6825 /* Create an array of arguments that is then passed to GOMP_target. */
6827 static tree
6828 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6830 auto_vec <tree, 6> args;
6831 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6832 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6833 if (c)
6834 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6835 else
6836 t = integer_minus_one_node;
6837 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6838 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6840 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6841 if (c)
6842 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6843 else
6844 t = integer_minus_one_node;
6845 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6846 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6847 &args);
6849 /* Add HSA-specific grid sizes, if available. */
6850 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6851 OMP_CLAUSE__GRIDDIM_))
6853 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6854 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6855 args.quick_push (t);
6856 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6859 /* Produce more, perhaps device specific, arguments here. */
6861 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6862 args.length () + 1),
6863 ".omp_target_args");
6864 for (unsigned i = 0; i < args.length (); i++)
6866 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6867 build_int_cst (integer_type_node, i),
6868 NULL_TREE, NULL_TREE);
6869 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6870 GSI_SAME_STMT);
6872 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6873 build_int_cst (integer_type_node, args.length ()),
6874 NULL_TREE, NULL_TREE);
6875 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6876 GSI_SAME_STMT);
6877 TREE_ADDRESSABLE (argarray) = 1;
6878 return build_fold_addr_expr (argarray);
6881 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6883 static void
6884 expand_omp_target (struct omp_region *region)
6886 basic_block entry_bb, exit_bb, new_bb;
6887 struct function *child_cfun;
6888 tree child_fn, block, t;
6889 gimple_stmt_iterator gsi;
6890 gomp_target *entry_stmt;
6891 gimple *stmt;
6892 edge e;
6893 bool offloaded, data_region;
6895 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6896 new_bb = region->entry;
6898 offloaded = is_gimple_omp_offloaded (entry_stmt);
6899 switch (gimple_omp_target_kind (entry_stmt))
6901 case GF_OMP_TARGET_KIND_REGION:
6902 case GF_OMP_TARGET_KIND_UPDATE:
6903 case GF_OMP_TARGET_KIND_ENTER_DATA:
6904 case GF_OMP_TARGET_KIND_EXIT_DATA:
6905 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6906 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6907 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6908 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6909 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6910 data_region = false;
6911 break;
6912 case GF_OMP_TARGET_KIND_DATA:
6913 case GF_OMP_TARGET_KIND_OACC_DATA:
6914 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6915 data_region = true;
6916 break;
6917 default:
6918 gcc_unreachable ();
6921 child_fn = NULL_TREE;
6922 child_cfun = NULL;
6923 if (offloaded)
6925 child_fn = gimple_omp_target_child_fn (entry_stmt);
6926 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6929 /* Supported by expand_omp_taskreg, but not here. */
6930 if (child_cfun != NULL)
6931 gcc_checking_assert (!child_cfun->cfg);
6932 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6934 entry_bb = region->entry;
6935 exit_bb = region->exit;
6937 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6939 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6941 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6942 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6943 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6944 DECL_ATTRIBUTES (child_fn)
6945 = tree_cons (get_identifier ("oacc kernels"),
6946 NULL_TREE, DECL_ATTRIBUTES (child_fn));
6949 if (offloaded)
6951 unsigned srcidx, dstidx, num;
6953 /* If the offloading region needs data sent from the parent
6954 function, then the very first statement (except possible
6955 tree profile counter updates) of the offloading body
6956 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6957 &.OMP_DATA_O is passed as an argument to the child function,
6958 we need to replace it with the argument as seen by the child
6959 function.
6961 In most cases, this will end up being the identity assignment
6962 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6963 a function call that has been inlined, the original PARM_DECL
6964 .OMP_DATA_I may have been converted into a different local
6965 variable. In which case, we need to keep the assignment. */
6966 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6967 if (data_arg)
6969 basic_block entry_succ_bb = single_succ (entry_bb);
6970 gimple_stmt_iterator gsi;
6971 tree arg;
6972 gimple *tgtcopy_stmt = NULL;
6973 tree sender = TREE_VEC_ELT (data_arg, 0);
6975 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6977 gcc_assert (!gsi_end_p (gsi));
6978 stmt = gsi_stmt (gsi);
6979 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6980 continue;
6982 if (gimple_num_ops (stmt) == 2)
6984 tree arg = gimple_assign_rhs1 (stmt);
6986 /* We're ignoring the subcode because we're
6987 effectively doing a STRIP_NOPS. */
6989 if (TREE_CODE (arg) == ADDR_EXPR
6990 && TREE_OPERAND (arg, 0) == sender)
6992 tgtcopy_stmt = stmt;
6993 break;
6998 gcc_assert (tgtcopy_stmt != NULL);
6999 arg = DECL_ARGUMENTS (child_fn);
7001 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7002 gsi_remove (&gsi, true);
7005 /* Declare local variables needed in CHILD_CFUN. */
7006 block = DECL_INITIAL (child_fn);
7007 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7008 /* The gimplifier could record temporaries in the offloading block
7009 rather than in containing function's local_decls chain,
7010 which would mean cgraph missed finalizing them. Do it now. */
7011 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7012 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7013 varpool_node::finalize_decl (t);
7014 DECL_SAVED_TREE (child_fn) = NULL;
7015 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7016 gimple_set_body (child_fn, NULL);
7017 TREE_USED (block) = 1;
7019 /* Reset DECL_CONTEXT on function arguments. */
7020 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7021 DECL_CONTEXT (t) = child_fn;
7023 /* Split ENTRY_BB at GIMPLE_*,
7024 so that it can be moved to the child function. */
7025 gsi = gsi_last_nondebug_bb (entry_bb);
7026 stmt = gsi_stmt (gsi);
7027 gcc_assert (stmt
7028 && gimple_code (stmt) == gimple_code (entry_stmt));
7029 e = split_block (entry_bb, stmt);
7030 gsi_remove (&gsi, true);
7031 entry_bb = e->dest;
7032 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7034 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7035 if (exit_bb)
7037 gsi = gsi_last_nondebug_bb (exit_bb);
7038 gcc_assert (!gsi_end_p (gsi)
7039 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7040 stmt = gimple_build_return (NULL);
7041 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7042 gsi_remove (&gsi, true);
7045 /* Make sure to generate early debug for the function before
7046 outlining anything. */
7047 if (! gimple_in_ssa_p (cfun))
7048 (*debug_hooks->early_global_decl) (cfun->decl);
7050 /* Move the offloading region into CHILD_CFUN. */
7052 block = gimple_block (entry_stmt);
7054 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7055 if (exit_bb)
7056 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7057 /* When the OMP expansion process cannot guarantee an up-to-date
7058 loop tree arrange for the child function to fixup loops. */
7059 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7060 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7062 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7063 num = vec_safe_length (child_cfun->local_decls);
7064 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7066 t = (*child_cfun->local_decls)[srcidx];
7067 if (DECL_CONTEXT (t) == cfun->decl)
7068 continue;
7069 if (srcidx != dstidx)
7070 (*child_cfun->local_decls)[dstidx] = t;
7071 dstidx++;
7073 if (dstidx != num)
7074 vec_safe_truncate (child_cfun->local_decls, dstidx);
7076 /* Inform the callgraph about the new function. */
7077 child_cfun->curr_properties = cfun->curr_properties;
7078 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7079 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7080 cgraph_node *node = cgraph_node::get_create (child_fn);
7081 node->parallelized_function = 1;
7082 cgraph_node::add_new_function (child_fn, true);
7084 /* Add the new function to the offload table. */
7085 if (ENABLE_OFFLOADING)
7087 if (in_lto_p)
7088 DECL_PRESERVE_P (child_fn) = 1;
7089 vec_safe_push (offload_funcs, child_fn);
7092 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7093 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7095 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7096 fixed in a following pass. */
7097 push_cfun (child_cfun);
7098 if (need_asm)
7099 assign_assembler_name_if_needed (child_fn);
7100 cgraph_edge::rebuild_edges ();
7102 /* Some EH regions might become dead, see PR34608. If
7103 pass_cleanup_cfg isn't the first pass to happen with the
7104 new child, these dead EH edges might cause problems.
7105 Clean them up now. */
7106 if (flag_exceptions)
7108 basic_block bb;
7109 bool changed = false;
7111 FOR_EACH_BB_FN (bb, cfun)
7112 changed |= gimple_purge_dead_eh_edges (bb);
7113 if (changed)
7114 cleanup_tree_cfg ();
7116 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7117 verify_loop_structure ();
7118 pop_cfun ();
7120 if (dump_file && !gimple_in_ssa_p (cfun))
7122 omp_any_child_fn_dumped = true;
7123 dump_function_header (dump_file, child_fn, dump_flags);
7124 dump_function_to_file (child_fn, dump_file, dump_flags);
7128 /* Emit a library call to launch the offloading region, or do data
7129 transfers. */
7130 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7131 enum built_in_function start_ix;
7132 location_t clause_loc;
7133 unsigned int flags_i = 0;
7135 switch (gimple_omp_target_kind (entry_stmt))
7137 case GF_OMP_TARGET_KIND_REGION:
7138 start_ix = BUILT_IN_GOMP_TARGET;
7139 break;
7140 case GF_OMP_TARGET_KIND_DATA:
7141 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7142 break;
7143 case GF_OMP_TARGET_KIND_UPDATE:
7144 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7145 break;
7146 case GF_OMP_TARGET_KIND_ENTER_DATA:
7147 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7148 break;
7149 case GF_OMP_TARGET_KIND_EXIT_DATA:
7150 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7151 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7152 break;
7153 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7154 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7155 start_ix = BUILT_IN_GOACC_PARALLEL;
7156 break;
7157 case GF_OMP_TARGET_KIND_OACC_DATA:
7158 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7159 start_ix = BUILT_IN_GOACC_DATA_START;
7160 break;
7161 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7162 start_ix = BUILT_IN_GOACC_UPDATE;
7163 break;
7164 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7165 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7166 break;
7167 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7168 start_ix = BUILT_IN_GOACC_DECLARE;
7169 break;
7170 default:
7171 gcc_unreachable ();
7174 clauses = gimple_omp_target_clauses (entry_stmt);
7176 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7177 library choose) and there is no conditional. */
7178 cond = NULL_TREE;
7179 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7181 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7182 if (c)
7183 cond = OMP_CLAUSE_IF_EXPR (c);
7185 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7186 if (c)
7188 /* Even if we pass it to all library function calls, it is currently only
7189 defined/used for the OpenMP target ones. */
7190 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7191 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7192 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7193 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7195 device = OMP_CLAUSE_DEVICE_ID (c);
7196 clause_loc = OMP_CLAUSE_LOCATION (c);
7198 else
7199 clause_loc = gimple_location (entry_stmt);
7201 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7202 if (c)
7203 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7205 /* Ensure 'device' is of the correct type. */
7206 device = fold_convert_loc (clause_loc, integer_type_node, device);
7208 /* If we found the clause 'if (cond)', build
7209 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7210 if (cond)
7212 cond = gimple_boolify (cond);
7214 basic_block cond_bb, then_bb, else_bb;
7215 edge e;
7216 tree tmp_var;
7218 tmp_var = create_tmp_var (TREE_TYPE (device));
7219 if (offloaded)
7220 e = split_block_after_labels (new_bb);
7221 else
7223 gsi = gsi_last_nondebug_bb (new_bb);
7224 gsi_prev (&gsi);
7225 e = split_block (new_bb, gsi_stmt (gsi));
7227 cond_bb = e->src;
7228 new_bb = e->dest;
7229 remove_edge (e);
7231 then_bb = create_empty_bb (cond_bb);
7232 else_bb = create_empty_bb (then_bb);
7233 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7234 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7236 stmt = gimple_build_cond_empty (cond);
7237 gsi = gsi_last_bb (cond_bb);
7238 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7240 gsi = gsi_start_bb (then_bb);
7241 stmt = gimple_build_assign (tmp_var, device);
7242 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7244 gsi = gsi_start_bb (else_bb);
7245 stmt = gimple_build_assign (tmp_var,
7246 build_int_cst (integer_type_node,
7247 GOMP_DEVICE_HOST_FALLBACK));
7248 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7250 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7251 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7252 add_bb_to_loop (then_bb, cond_bb->loop_father);
7253 add_bb_to_loop (else_bb, cond_bb->loop_father);
7254 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7255 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7257 device = tmp_var;
7258 gsi = gsi_last_nondebug_bb (new_bb);
7260 else
7262 gsi = gsi_last_nondebug_bb (new_bb);
7263 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7264 true, GSI_SAME_STMT);
7267 t = gimple_omp_target_data_arg (entry_stmt);
7268 if (t == NULL)
7270 t1 = size_zero_node;
7271 t2 = build_zero_cst (ptr_type_node);
7272 t3 = t2;
7273 t4 = t2;
7275 else
7277 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7278 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7279 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7280 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7281 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7284 gimple *g;
7285 bool tagging = false;
7286 /* The maximum number used by any start_ix, without varargs. */
7287 auto_vec<tree, 11> args;
7288 args.quick_push (device);
7289 if (offloaded)
7290 args.quick_push (build_fold_addr_expr (child_fn));
7291 args.quick_push (t1);
7292 args.quick_push (t2);
7293 args.quick_push (t3);
7294 args.quick_push (t4);
7295 switch (start_ix)
7297 case BUILT_IN_GOACC_DATA_START:
7298 case BUILT_IN_GOACC_DECLARE:
7299 case BUILT_IN_GOMP_TARGET_DATA:
7300 break;
7301 case BUILT_IN_GOMP_TARGET:
7302 case BUILT_IN_GOMP_TARGET_UPDATE:
7303 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7304 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7305 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7306 if (c)
7307 depend = OMP_CLAUSE_DECL (c);
7308 else
7309 depend = build_int_cst (ptr_type_node, 0);
7310 args.quick_push (depend);
7311 if (start_ix == BUILT_IN_GOMP_TARGET)
7312 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7313 break;
7314 case BUILT_IN_GOACC_PARALLEL:
7315 oacc_set_fn_attrib (child_fn, clauses, &args);
7316 tagging = true;
7317 /* FALLTHRU */
7318 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7319 case BUILT_IN_GOACC_UPDATE:
7321 tree t_async = NULL_TREE;
7323 /* If present, use the value specified by the respective
7324 clause, making sure that is of the correct type. */
7325 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7326 if (c)
7327 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7328 integer_type_node,
7329 OMP_CLAUSE_ASYNC_EXPR (c));
7330 else if (!tagging)
7331 /* Default values for t_async. */
7332 t_async = fold_convert_loc (gimple_location (entry_stmt),
7333 integer_type_node,
7334 build_int_cst (integer_type_node,
7335 GOMP_ASYNC_SYNC));
7336 if (tagging && t_async)
7338 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7340 if (TREE_CODE (t_async) == INTEGER_CST)
7342 /* See if we can pack the async arg in to the tag's
7343 operand. */
7344 i_async = TREE_INT_CST_LOW (t_async);
7345 if (i_async < GOMP_LAUNCH_OP_MAX)
7346 t_async = NULL_TREE;
7347 else
7348 i_async = GOMP_LAUNCH_OP_MAX;
7350 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7351 i_async));
7353 if (t_async)
7354 args.safe_push (t_async);
7356 /* Save the argument index, and ... */
7357 unsigned t_wait_idx = args.length ();
7358 unsigned num_waits = 0;
7359 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7360 if (!tagging || c)
7361 /* ... push a placeholder. */
7362 args.safe_push (integer_zero_node);
7364 for (; c; c = OMP_CLAUSE_CHAIN (c))
7365 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7367 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7368 integer_type_node,
7369 OMP_CLAUSE_WAIT_EXPR (c)));
7370 num_waits++;
7373 if (!tagging || num_waits)
7375 tree len;
7377 /* Now that we know the number, update the placeholder. */
7378 if (tagging)
7379 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7380 else
7381 len = build_int_cst (integer_type_node, num_waits);
7382 len = fold_convert_loc (gimple_location (entry_stmt),
7383 unsigned_type_node, len);
7384 args[t_wait_idx] = len;
7387 break;
7388 default:
7389 gcc_unreachable ();
7391 if (tagging)
7392 /* Push terminal marker - zero. */
7393 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7395 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7396 gimple_set_location (g, gimple_location (entry_stmt));
7397 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7398 if (!offloaded)
7400 g = gsi_stmt (gsi);
7401 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7402 gsi_remove (&gsi, true);
7404 if (data_region && region->exit)
7406 gsi = gsi_last_nondebug_bb (region->exit);
7407 g = gsi_stmt (gsi);
7408 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7409 gsi_remove (&gsi, true);
7413 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7414 iteration variable derived from the thread number. INTRA_GROUP means this
7415 is an expansion of a loop iterating over work-items within a separate
7416 iteration over groups. */
7418 static void
7419 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7421 gimple_stmt_iterator gsi;
7422 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7423 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7424 == GF_OMP_FOR_KIND_GRID_LOOP);
7425 size_t collapse = gimple_omp_for_collapse (for_stmt);
7426 struct omp_for_data_loop *loops
7427 = XALLOCAVEC (struct omp_for_data_loop,
7428 gimple_omp_for_collapse (for_stmt));
7429 struct omp_for_data fd;
7431 remove_edge (BRANCH_EDGE (kfor->entry));
7432 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7434 gcc_assert (kfor->cont);
7435 omp_extract_for_data (for_stmt, &fd, loops);
7437 gsi = gsi_start_bb (body_bb);
7439 for (size_t dim = 0; dim < collapse; dim++)
7441 tree type, itype;
7442 itype = type = TREE_TYPE (fd.loops[dim].v);
7443 if (POINTER_TYPE_P (type))
7444 itype = signed_type_for (type);
7446 tree n1 = fd.loops[dim].n1;
7447 tree step = fd.loops[dim].step;
7448 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7449 true, NULL_TREE, true, GSI_SAME_STMT);
7450 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7451 true, NULL_TREE, true, GSI_SAME_STMT);
7452 tree threadid;
7453 if (gimple_omp_for_grid_group_iter (for_stmt))
7455 gcc_checking_assert (!intra_group);
7456 threadid = build_call_expr (builtin_decl_explicit
7457 (BUILT_IN_HSA_WORKGROUPID), 1,
7458 build_int_cstu (unsigned_type_node, dim));
7460 else if (intra_group)
7461 threadid = build_call_expr (builtin_decl_explicit
7462 (BUILT_IN_HSA_WORKITEMID), 1,
7463 build_int_cstu (unsigned_type_node, dim));
7464 else
7465 threadid = build_call_expr (builtin_decl_explicit
7466 (BUILT_IN_HSA_WORKITEMABSID), 1,
7467 build_int_cstu (unsigned_type_node, dim));
7468 threadid = fold_convert (itype, threadid);
7469 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7470 true, GSI_SAME_STMT);
7472 tree startvar = fd.loops[dim].v;
7473 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7474 if (POINTER_TYPE_P (type))
7475 t = fold_build_pointer_plus (n1, t);
7476 else
7477 t = fold_build2 (PLUS_EXPR, type, t, n1);
7478 t = fold_convert (type, t);
7479 t = force_gimple_operand_gsi (&gsi, t,
7480 DECL_P (startvar)
7481 && TREE_ADDRESSABLE (startvar),
7482 NULL_TREE, true, GSI_SAME_STMT);
7483 gassign *assign_stmt = gimple_build_assign (startvar, t);
7484 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7486 /* Remove the omp for statement. */
7487 gsi = gsi_last_nondebug_bb (kfor->entry);
7488 gsi_remove (&gsi, true);
7490 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7491 gsi = gsi_last_nondebug_bb (kfor->cont);
7492 gcc_assert (!gsi_end_p (gsi)
7493 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7494 gsi_remove (&gsi, true);
7496 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7497 gsi = gsi_last_nondebug_bb (kfor->exit);
7498 gcc_assert (!gsi_end_p (gsi)
7499 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7500 if (intra_group)
7501 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7502 gsi_remove (&gsi, true);
7504 /* Fixup the much simpler CFG. */
7505 remove_edge (find_edge (kfor->cont, body_bb));
7507 if (kfor->cont != body_bb)
7508 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7509 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7512 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7513 argument_decls. */
7515 struct grid_arg_decl_map
7517 tree old_arg;
7518 tree new_arg;
7521 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7522 pertaining to kernel function. */
7524 static tree
7525 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7527 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7528 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7529 tree t = *tp;
7531 if (t == adm->old_arg)
7532 *tp = adm->new_arg;
7533 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7534 return NULL_TREE;
7537 /* If TARGET region contains a kernel body for loop, remove its region from the
7538 TARGET and expand it in HSA gridified kernel fashion. */
7540 static void
7541 grid_expand_target_grid_body (struct omp_region *target)
7543 if (!hsa_gen_requested_p ())
7544 return;
7546 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7547 struct omp_region **pp;
7549 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7550 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7551 break;
7553 struct omp_region *gpukernel = *pp;
7555 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7556 if (!gpukernel)
7558 /* HSA cannot handle OACC stuff. */
7559 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7560 return;
7561 gcc_checking_assert (orig_child_fndecl);
7562 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7563 OMP_CLAUSE__GRIDDIM_));
7564 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7566 hsa_register_kernel (n);
7567 return;
7570 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7571 OMP_CLAUSE__GRIDDIM_));
7572 tree inside_block
7573 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7574 *pp = gpukernel->next;
7575 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7576 if ((*pp)->type == GIMPLE_OMP_FOR)
7577 break;
7579 struct omp_region *kfor = *pp;
7580 gcc_assert (kfor);
7581 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7582 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7583 *pp = kfor->next;
7584 if (kfor->inner)
7586 if (gimple_omp_for_grid_group_iter (for_stmt))
7588 struct omp_region **next_pp;
7589 for (pp = &kfor->inner; *pp; pp = next_pp)
7591 next_pp = &(*pp)->next;
7592 if ((*pp)->type != GIMPLE_OMP_FOR)
7593 continue;
7594 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7595 gcc_assert (gimple_omp_for_kind (inner)
7596 == GF_OMP_FOR_KIND_GRID_LOOP);
7597 grid_expand_omp_for_loop (*pp, true);
7598 *pp = (*pp)->next;
7599 next_pp = pp;
7602 expand_omp (kfor->inner);
7604 if (gpukernel->inner)
7605 expand_omp (gpukernel->inner);
7607 tree kern_fndecl = copy_node (orig_child_fndecl);
7608 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7609 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7610 tree tgtblock = gimple_block (tgt_stmt);
7611 tree fniniblock = make_node (BLOCK);
7612 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7613 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7614 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7615 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7616 DECL_INITIAL (kern_fndecl) = fniniblock;
7617 push_struct_function (kern_fndecl);
7618 cfun->function_end_locus = gimple_location (tgt_stmt);
7619 init_tree_ssa (cfun);
7620 pop_cfun ();
7622 /* Make sure to generate early debug for the function before
7623 outlining anything. */
7624 if (! gimple_in_ssa_p (cfun))
7625 (*debug_hooks->early_global_decl) (cfun->decl);
7627 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7628 gcc_assert (!DECL_CHAIN (old_parm_decl));
7629 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7630 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7631 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7632 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7633 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7634 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7635 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7636 kern_cfun->curr_properties = cfun->curr_properties;
7638 grid_expand_omp_for_loop (kfor, false);
7640 /* Remove the omp for statement. */
7641 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7642 gsi_remove (&gsi, true);
7643 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7644 return. */
7645 gsi = gsi_last_nondebug_bb (gpukernel->exit);
7646 gcc_assert (!gsi_end_p (gsi)
7647 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7648 gimple *ret_stmt = gimple_build_return (NULL);
7649 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7650 gsi_remove (&gsi, true);
7652 /* Statements in the first BB in the target construct have been produced by
7653 target lowering and must be copied inside the GPUKERNEL, with the two
7654 exceptions of the first OMP statement and the OMP_DATA assignment
7655 statement. */
7656 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7657 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7658 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7659 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7660 !gsi_end_p (tsi); gsi_next (&tsi))
7662 gimple *stmt = gsi_stmt (tsi);
7663 if (is_gimple_omp (stmt))
7664 break;
7665 if (sender
7666 && is_gimple_assign (stmt)
7667 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7668 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7669 continue;
7670 gimple *copy = gimple_copy (stmt);
7671 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7672 gimple_set_block (copy, fniniblock);
7675 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7676 gpukernel->exit, inside_block);
7678 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7679 kcn->mark_force_output ();
7680 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7682 hsa_register_kernel (kcn, orig_child);
7684 cgraph_node::add_new_function (kern_fndecl, true);
7685 push_cfun (kern_cfun);
7686 cgraph_edge::rebuild_edges ();
7688 /* Re-map any mention of the PARM_DECL of the original function to the
7689 PARM_DECL of the new one.
7691 TODO: It would be great if lowering produced references into the GPU
7692 kernel decl straight away and we did not have to do this. */
7693 struct grid_arg_decl_map adm;
7694 adm.old_arg = old_parm_decl;
7695 adm.new_arg = new_parm_decl;
7696 basic_block bb;
7697 FOR_EACH_BB_FN (bb, kern_cfun)
7699 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7701 gimple *stmt = gsi_stmt (gsi);
7702 struct walk_stmt_info wi;
7703 memset (&wi, 0, sizeof (wi));
7704 wi.info = &adm;
7705 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7708 pop_cfun ();
7710 return;
7713 /* Expand the parallel region tree rooted at REGION. Expansion
7714 proceeds in depth-first order. Innermost regions are expanded
7715 first. This way, parallel regions that require a new function to
7716 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7717 internal dependencies in their body. */
7719 static void
7720 expand_omp (struct omp_region *region)
7722 omp_any_child_fn_dumped = false;
7723 while (region)
7725 location_t saved_location;
7726 gimple *inner_stmt = NULL;
7728 /* First, determine whether this is a combined parallel+workshare
7729 region. */
7730 if (region->type == GIMPLE_OMP_PARALLEL)
7731 determine_parallel_type (region);
7732 else if (region->type == GIMPLE_OMP_TARGET)
7733 grid_expand_target_grid_body (region);
7735 if (region->type == GIMPLE_OMP_FOR
7736 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7737 inner_stmt = last_stmt (region->inner->entry);
7739 if (region->inner)
7740 expand_omp (region->inner);
7742 saved_location = input_location;
7743 if (gimple_has_location (last_stmt (region->entry)))
7744 input_location = gimple_location (last_stmt (region->entry));
7746 switch (region->type)
7748 case GIMPLE_OMP_PARALLEL:
7749 case GIMPLE_OMP_TASK:
7750 expand_omp_taskreg (region);
7751 break;
7753 case GIMPLE_OMP_FOR:
7754 expand_omp_for (region, inner_stmt);
7755 break;
7757 case GIMPLE_OMP_SECTIONS:
7758 expand_omp_sections (region);
7759 break;
7761 case GIMPLE_OMP_SECTION:
7762 /* Individual omp sections are handled together with their
7763 parent GIMPLE_OMP_SECTIONS region. */
7764 break;
7766 case GIMPLE_OMP_SINGLE:
7767 expand_omp_single (region);
7768 break;
7770 case GIMPLE_OMP_ORDERED:
7772 gomp_ordered *ord_stmt
7773 = as_a <gomp_ordered *> (last_stmt (region->entry));
7774 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7775 OMP_CLAUSE_DEPEND))
7777 /* We'll expand these when expanding corresponding
7778 worksharing region with ordered(n) clause. */
7779 gcc_assert (region->outer
7780 && region->outer->type == GIMPLE_OMP_FOR);
7781 region->ord_stmt = ord_stmt;
7782 break;
7785 /* FALLTHRU */
7786 case GIMPLE_OMP_MASTER:
7787 case GIMPLE_OMP_TASKGROUP:
7788 case GIMPLE_OMP_CRITICAL:
7789 case GIMPLE_OMP_TEAMS:
7790 expand_omp_synch (region);
7791 break;
7793 case GIMPLE_OMP_ATOMIC_LOAD:
7794 expand_omp_atomic (region);
7795 break;
7797 case GIMPLE_OMP_TARGET:
7798 expand_omp_target (region);
7799 break;
7801 default:
7802 gcc_unreachable ();
7805 input_location = saved_location;
7806 region = region->next;
7808 if (omp_any_child_fn_dumped)
7810 if (dump_file)
7811 dump_function_header (dump_file, current_function_decl, dump_flags);
7812 omp_any_child_fn_dumped = false;
7816 /* Helper for build_omp_regions. Scan the dominator tree starting at
7817 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7818 true, the function ends once a single tree is built (otherwise, whole
7819 forest of OMP constructs may be built). */
7821 static void
7822 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7823 bool single_tree)
7825 gimple_stmt_iterator gsi;
7826 gimple *stmt;
7827 basic_block son;
7829 gsi = gsi_last_nondebug_bb (bb);
7830 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7832 struct omp_region *region;
7833 enum gimple_code code;
7835 stmt = gsi_stmt (gsi);
7836 code = gimple_code (stmt);
7837 if (code == GIMPLE_OMP_RETURN)
7839 /* STMT is the return point out of region PARENT. Mark it
7840 as the exit point and make PARENT the immediately
7841 enclosing region. */
7842 gcc_assert (parent);
7843 region = parent;
7844 region->exit = bb;
7845 parent = parent->outer;
7847 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7849 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7850 GIMPLE_OMP_RETURN, but matches with
7851 GIMPLE_OMP_ATOMIC_LOAD. */
7852 gcc_assert (parent);
7853 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7854 region = parent;
7855 region->exit = bb;
7856 parent = parent->outer;
7858 else if (code == GIMPLE_OMP_CONTINUE)
7860 gcc_assert (parent);
7861 parent->cont = bb;
7863 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7865 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7866 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7868 else
7870 region = new_omp_region (bb, code, parent);
7871 /* Otherwise... */
7872 if (code == GIMPLE_OMP_TARGET)
7874 switch (gimple_omp_target_kind (stmt))
7876 case GF_OMP_TARGET_KIND_REGION:
7877 case GF_OMP_TARGET_KIND_DATA:
7878 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7879 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7880 case GF_OMP_TARGET_KIND_OACC_DATA:
7881 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7882 break;
7883 case GF_OMP_TARGET_KIND_UPDATE:
7884 case GF_OMP_TARGET_KIND_ENTER_DATA:
7885 case GF_OMP_TARGET_KIND_EXIT_DATA:
7886 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7887 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7888 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7889 /* ..., other than for those stand-alone directives... */
7890 region = NULL;
7891 break;
7892 default:
7893 gcc_unreachable ();
7896 else if (code == GIMPLE_OMP_ORDERED
7897 && omp_find_clause (gimple_omp_ordered_clauses
7898 (as_a <gomp_ordered *> (stmt)),
7899 OMP_CLAUSE_DEPEND))
7900 /* #pragma omp ordered depend is also just a stand-alone
7901 directive. */
7902 region = NULL;
7903 /* ..., this directive becomes the parent for a new region. */
7904 if (region)
7905 parent = region;
7909 if (single_tree && !parent)
7910 return;
7912 for (son = first_dom_son (CDI_DOMINATORS, bb);
7913 son;
7914 son = next_dom_son (CDI_DOMINATORS, son))
7915 build_omp_regions_1 (son, parent, single_tree);
7918 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7919 root_omp_region. */
7921 static void
7922 build_omp_regions_root (basic_block root)
7924 gcc_assert (root_omp_region == NULL);
7925 build_omp_regions_1 (root, NULL, true);
7926 gcc_assert (root_omp_region != NULL);
7929 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7931 void
7932 omp_expand_local (basic_block head)
7934 build_omp_regions_root (head);
7935 if (dump_file && (dump_flags & TDF_DETAILS))
7937 fprintf (dump_file, "\nOMP region tree\n\n");
7938 dump_omp_region (dump_file, root_omp_region, 0);
7939 fprintf (dump_file, "\n");
7942 remove_exit_barriers (root_omp_region);
7943 expand_omp (root_omp_region);
7945 omp_free_regions ();
7948 /* Scan the CFG and build a tree of OMP regions. Return the root of
7949 the OMP region tree. */
7951 static void
7952 build_omp_regions (void)
7954 gcc_assert (root_omp_region == NULL);
7955 calculate_dominance_info (CDI_DOMINATORS);
7956 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7959 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7961 static unsigned int
7962 execute_expand_omp (void)
7964 build_omp_regions ();
7966 if (!root_omp_region)
7967 return 0;
7969 if (dump_file)
7971 fprintf (dump_file, "\nOMP region tree\n\n");
7972 dump_omp_region (dump_file, root_omp_region, 0);
7973 fprintf (dump_file, "\n");
7976 remove_exit_barriers (root_omp_region);
7978 expand_omp (root_omp_region);
7980 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7981 verify_loop_structure ();
7982 cleanup_tree_cfg ();
7984 omp_free_regions ();
7986 return 0;
7989 /* OMP expansion -- the default pass, run before creation of SSA form. */
7991 namespace {
7993 const pass_data pass_data_expand_omp =
7995 GIMPLE_PASS, /* type */
7996 "ompexp", /* name */
7997 OPTGROUP_OMP, /* optinfo_flags */
7998 TV_NONE, /* tv_id */
7999 PROP_gimple_any, /* properties_required */
8000 PROP_gimple_eomp, /* properties_provided */
8001 0, /* properties_destroyed */
8002 0, /* todo_flags_start */
8003 0, /* todo_flags_finish */
8006 class pass_expand_omp : public gimple_opt_pass
8008 public:
8009 pass_expand_omp (gcc::context *ctxt)
8010 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8013 /* opt_pass methods: */
8014 virtual unsigned int execute (function *)
8016 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8017 || flag_openmp_simd != 0)
8018 && !seen_error ());
8020 /* This pass always runs, to provide PROP_gimple_eomp.
8021 But often, there is nothing to do. */
8022 if (!gate)
8023 return 0;
8025 return execute_expand_omp ();
8028 }; // class pass_expand_omp
8030 } // anon namespace
8032 gimple_opt_pass *
8033 make_pass_expand_omp (gcc::context *ctxt)
8035 return new pass_expand_omp (ctxt);
8038 namespace {
8040 const pass_data pass_data_expand_omp_ssa =
8042 GIMPLE_PASS, /* type */
8043 "ompexpssa", /* name */
8044 OPTGROUP_OMP, /* optinfo_flags */
8045 TV_NONE, /* tv_id */
8046 PROP_cfg | PROP_ssa, /* properties_required */
8047 PROP_gimple_eomp, /* properties_provided */
8048 0, /* properties_destroyed */
8049 0, /* todo_flags_start */
8050 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8053 class pass_expand_omp_ssa : public gimple_opt_pass
8055 public:
8056 pass_expand_omp_ssa (gcc::context *ctxt)
8057 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8060 /* opt_pass methods: */
8061 virtual bool gate (function *fun)
8063 return !(fun->curr_properties & PROP_gimple_eomp);
8065 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8066 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8068 }; // class pass_expand_omp_ssa
8070 } // anon namespace
8072 gimple_opt_pass *
8073 make_pass_expand_omp_ssa (gcc::context *ctxt)
8075 return new pass_expand_omp_ssa (ctxt);
8078 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8079 GIMPLE_* codes. */
8081 bool
8082 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8083 int *region_idx)
8085 gimple *last = last_stmt (bb);
8086 enum gimple_code code = gimple_code (last);
8087 struct omp_region *cur_region = *region;
8088 bool fallthru = false;
8090 switch (code)
8092 case GIMPLE_OMP_PARALLEL:
8093 case GIMPLE_OMP_TASK:
8094 case GIMPLE_OMP_FOR:
8095 case GIMPLE_OMP_SINGLE:
8096 case GIMPLE_OMP_TEAMS:
8097 case GIMPLE_OMP_MASTER:
8098 case GIMPLE_OMP_TASKGROUP:
8099 case GIMPLE_OMP_CRITICAL:
8100 case GIMPLE_OMP_SECTION:
8101 case GIMPLE_OMP_GRID_BODY:
8102 cur_region = new_omp_region (bb, code, cur_region);
8103 fallthru = true;
8104 break;
8106 case GIMPLE_OMP_ORDERED:
8107 cur_region = new_omp_region (bb, code, cur_region);
8108 fallthru = true;
8109 if (omp_find_clause (gimple_omp_ordered_clauses
8110 (as_a <gomp_ordered *> (last)),
8111 OMP_CLAUSE_DEPEND))
8112 cur_region = cur_region->outer;
8113 break;
8115 case GIMPLE_OMP_TARGET:
8116 cur_region = new_omp_region (bb, code, cur_region);
8117 fallthru = true;
8118 switch (gimple_omp_target_kind (last))
8120 case GF_OMP_TARGET_KIND_REGION:
8121 case GF_OMP_TARGET_KIND_DATA:
8122 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8123 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8124 case GF_OMP_TARGET_KIND_OACC_DATA:
8125 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8126 break;
8127 case GF_OMP_TARGET_KIND_UPDATE:
8128 case GF_OMP_TARGET_KIND_ENTER_DATA:
8129 case GF_OMP_TARGET_KIND_EXIT_DATA:
8130 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8131 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8132 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8133 cur_region = cur_region->outer;
8134 break;
8135 default:
8136 gcc_unreachable ();
8138 break;
8140 case GIMPLE_OMP_SECTIONS:
8141 cur_region = new_omp_region (bb, code, cur_region);
8142 fallthru = true;
8143 break;
8145 case GIMPLE_OMP_SECTIONS_SWITCH:
8146 fallthru = false;
8147 break;
8149 case GIMPLE_OMP_ATOMIC_LOAD:
8150 case GIMPLE_OMP_ATOMIC_STORE:
8151 fallthru = true;
8152 break;
8154 case GIMPLE_OMP_RETURN:
8155 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8156 somewhere other than the next block. This will be
8157 created later. */
8158 cur_region->exit = bb;
8159 if (cur_region->type == GIMPLE_OMP_TASK)
8160 /* Add an edge corresponding to not scheduling the task
8161 immediately. */
8162 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8163 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8164 cur_region = cur_region->outer;
8165 break;
8167 case GIMPLE_OMP_CONTINUE:
8168 cur_region->cont = bb;
8169 switch (cur_region->type)
8171 case GIMPLE_OMP_FOR:
8172 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8173 succs edges as abnormal to prevent splitting
8174 them. */
8175 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8176 /* Make the loopback edge. */
8177 make_edge (bb, single_succ (cur_region->entry),
8178 EDGE_ABNORMAL);
8180 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8181 corresponds to the case that the body of the loop
8182 is not executed at all. */
8183 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8184 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8185 fallthru = false;
8186 break;
8188 case GIMPLE_OMP_SECTIONS:
8189 /* Wire up the edges into and out of the nested sections. */
8191 basic_block switch_bb = single_succ (cur_region->entry);
8193 struct omp_region *i;
8194 for (i = cur_region->inner; i ; i = i->next)
8196 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8197 make_edge (switch_bb, i->entry, 0);
8198 make_edge (i->exit, bb, EDGE_FALLTHRU);
8201 /* Make the loopback edge to the block with
8202 GIMPLE_OMP_SECTIONS_SWITCH. */
8203 make_edge (bb, switch_bb, 0);
8205 /* Make the edge from the switch to exit. */
8206 make_edge (switch_bb, bb->next_bb, 0);
8207 fallthru = false;
8209 break;
8211 case GIMPLE_OMP_TASK:
8212 fallthru = true;
8213 break;
8215 default:
8216 gcc_unreachable ();
8218 break;
8220 default:
8221 gcc_unreachable ();
8224 if (*region != cur_region)
8226 *region = cur_region;
8227 if (cur_region)
8228 *region_idx = cur_region->entry->index;
8229 else
8230 *region_idx = 0;
8233 return fallthru;
8236 #include "gt-omp-expand.h"