* gcc-interface/trans.c (process_freeze_entity): Be prepared for a
[official-gcc.git] / gcc / omp-expand.c
blob02488339b401eb8145081d63ab819d5eadbf5afe
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "debug.h"
60 #include "stringpool.h"
61 #include "attribs.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
117 /* Return true if REGION is a combined parallel+workshare region. */
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
122 return region->is_combined_parallel;
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
139 Is lowered into:
141 # BLOCK 2 (PAR_ENTRY_BB)
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
196 return true;
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205 if (!simd_schedule)
206 return chunk_size;
208 int vf = omp_max_vf ();
209 if (vf == 1)
210 return chunk_size;
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 struct omp_for_data fd;
233 tree n1, n2;
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
239 if (gimple_omp_for_combined_into_p (for_stmt))
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
263 if (fd.chunk_size)
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
270 return ws_args;
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
284 gcc_unreachable ();
287 /* Discover whether REGION is a combined parallel+workshare region. */
289 static void
290 determine_parallel_type (struct omp_region *region)
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
323 if (region->inner->type == GIMPLE_OMP_FOR)
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
358 /* Dump the parallel region tree rooted at REGION. */
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
369 if (region->cont)
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
388 dump_omp_region (stderr, region, 0);
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
394 dump_omp_region (stderr, root_omp_region, 0);
397 /* Create a new parallel region starting at STMT inside region PARENT. */
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
403 struct omp_region *region = XCNEW (struct omp_region);
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
409 if (parent)
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
416 else
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
424 return region;
427 /* Release the memory associated with the region tree rooted at REGION. */
429 static void
430 free_omp_region_1 (struct omp_region *region)
432 struct omp_region *i, *n;
434 for (i = region->inner; i ; i = n)
436 n = i->next;
437 free_omp_region_1 (i);
440 free (region);
443 /* Release the memory for the entire omp region tree. */
445 void
446 omp_free_regions (void)
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
451 n = r->next;
452 free_omp_region_1 (r);
454 root_omp_region = NULL;
457 /* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
460 static gcond *
461 gimple_build_cond_empty (tree cond)
463 enum tree_code pred_code;
464 tree lhs, rhs;
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
498 return false;
501 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
502 Add CHILD_FNDECL to decl chain of the supercontext of the block
503 ENTRY_BLOCK - this is the block which originally contained the
504 code from which CHILD_FNDECL was created.
506 Together, these actions ensure that the debug info for the outlined
507 function will be emitted with the correct lexical scope. */
509 static void
510 adjust_context_and_scope (tree entry_block, tree child_fndecl)
512 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
514 tree b = BLOCK_SUPERCONTEXT (entry_block);
516 if (TREE_CODE (b) == BLOCK)
518 tree parent_fndecl;
520 /* Follow supercontext chain until the parent fndecl
521 is found. */
522 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
523 TREE_CODE (parent_fndecl) == BLOCK;
524 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
527 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
531 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
532 BLOCK_VARS (b) = child_fndecl;
537 /* Build the function calls to GOMP_parallel_start etc to actually
538 generate the parallel operation. REGION is the parallel region
539 being expanded. BB is the block where to insert the code. WS_ARGS
540 will be set if this is a call to a combined parallel+workshare
541 construct, it contains the list of additional arguments needed by
542 the workshare construct. */
544 static void
545 expand_parallel_call (struct omp_region *region, basic_block bb,
546 gomp_parallel *entry_stmt,
547 vec<tree, va_gc> *ws_args)
549 tree t, t1, t2, val, cond, c, clauses, flags;
550 gimple_stmt_iterator gsi;
551 gimple *stmt;
552 enum built_in_function start_ix;
553 int start_ix2;
554 location_t clause_loc;
555 vec<tree, va_gc> *args;
557 clauses = gimple_omp_parallel_clauses (entry_stmt);
559 /* Determine what flavor of GOMP_parallel we will be
560 emitting. */
561 start_ix = BUILT_IN_GOMP_PARALLEL;
562 if (is_combined_parallel (region))
564 switch (region->inner->type)
566 case GIMPLE_OMP_FOR:
567 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
568 switch (region->inner->sched_kind)
570 case OMP_CLAUSE_SCHEDULE_RUNTIME:
571 start_ix2 = 3;
572 break;
573 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
574 case OMP_CLAUSE_SCHEDULE_GUIDED:
575 if (region->inner->sched_modifiers
576 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
578 start_ix2 = 3 + region->inner->sched_kind;
579 break;
581 /* FALLTHRU */
582 default:
583 start_ix2 = region->inner->sched_kind;
584 break;
586 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
587 start_ix = (enum built_in_function) start_ix2;
588 break;
589 case GIMPLE_OMP_SECTIONS:
590 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
591 break;
592 default:
593 gcc_unreachable ();
597 /* By default, the value of NUM_THREADS is zero (selected at run time)
598 and there is no conditional. */
599 cond = NULL_TREE;
600 val = build_int_cst (unsigned_type_node, 0);
601 flags = build_int_cst (unsigned_type_node, 0);
603 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
604 if (c)
605 cond = OMP_CLAUSE_IF_EXPR (c);
607 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
608 if (c)
610 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
611 clause_loc = OMP_CLAUSE_LOCATION (c);
613 else
614 clause_loc = gimple_location (entry_stmt);
616 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
617 if (c)
618 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
620 /* Ensure 'val' is of the correct type. */
621 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
623 /* If we found the clause 'if (cond)', build either
624 (cond != 0) or (cond ? val : 1u). */
625 if (cond)
627 cond = gimple_boolify (cond);
629 if (integer_zerop (val))
630 val = fold_build2_loc (clause_loc,
631 EQ_EXPR, unsigned_type_node, cond,
632 build_int_cst (TREE_TYPE (cond), 0));
633 else
635 basic_block cond_bb, then_bb, else_bb;
636 edge e, e_then, e_else;
637 tree tmp_then, tmp_else, tmp_join, tmp_var;
639 tmp_var = create_tmp_var (TREE_TYPE (val));
640 if (gimple_in_ssa_p (cfun))
642 tmp_then = make_ssa_name (tmp_var);
643 tmp_else = make_ssa_name (tmp_var);
644 tmp_join = make_ssa_name (tmp_var);
646 else
648 tmp_then = tmp_var;
649 tmp_else = tmp_var;
650 tmp_join = tmp_var;
653 e = split_block_after_labels (bb);
654 cond_bb = e->src;
655 bb = e->dest;
656 remove_edge (e);
658 then_bb = create_empty_bb (cond_bb);
659 else_bb = create_empty_bb (then_bb);
660 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
661 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
663 stmt = gimple_build_cond_empty (cond);
664 gsi = gsi_start_bb (cond_bb);
665 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
667 gsi = gsi_start_bb (then_bb);
668 expand_omp_build_assign (&gsi, tmp_then, val, true);
670 gsi = gsi_start_bb (else_bb);
671 expand_omp_build_assign (&gsi, tmp_else,
672 build_int_cst (unsigned_type_node, 1),
673 true);
675 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
676 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
677 add_bb_to_loop (then_bb, cond_bb->loop_father);
678 add_bb_to_loop (else_bb, cond_bb->loop_father);
679 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
680 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
682 if (gimple_in_ssa_p (cfun))
684 gphi *phi = create_phi_node (tmp_join, bb);
685 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
686 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
689 val = tmp_join;
692 gsi = gsi_start_bb (bb);
693 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
694 false, GSI_CONTINUE_LINKING);
697 gsi = gsi_last_nondebug_bb (bb);
698 t = gimple_omp_parallel_data_arg (entry_stmt);
699 if (t == NULL)
700 t1 = null_pointer_node;
701 else
702 t1 = build_fold_addr_expr (t);
703 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
704 t2 = build_fold_addr_expr (child_fndecl);
706 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
708 vec_alloc (args, 4 + vec_safe_length (ws_args));
709 args->quick_push (t2);
710 args->quick_push (t1);
711 args->quick_push (val);
712 if (ws_args)
713 args->splice (*ws_args);
714 args->quick_push (flags);
716 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
717 builtin_decl_explicit (start_ix), args);
719 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
720 false, GSI_CONTINUE_LINKING);
722 if (hsa_gen_requested_p ()
723 && parallel_needs_hsa_kernel_p (region))
725 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
726 hsa_register_kernel (child_cnode);
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 else
787 num_tasks = integer_zero_node;
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
799 tree flags = build_int_cst (unsigned_type_node, iflags);
801 tree cond = boolean_true_node;
802 if (ifc)
804 if (taskloop_p)
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
818 if (finalc)
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
837 gsi = gsi_last_nondebug_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
874 tree chain = NULL_TREE, t;
875 unsigned ix;
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879 DECL_CHAIN (t) = chain;
880 chain = t;
883 return chain;
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
892 static void
893 remove_exit_barrier (struct omp_region *region)
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
902 exit_bb = region->exit;
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_nondebug_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev_nondebug (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
922 gsi = gsi_last_nondebug_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
949 any_addressable_vars = 1;
950 break;
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
963 any_addressable_vars = 1;
964 break;
966 if (block == gimple_block (parallel_stmt))
967 break;
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
976 static void
977 remove_exit_barriers (struct omp_region *region)
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
982 if (region->inner)
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
988 region = region->next;
989 remove_exit_barriers (region);
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1027 tree built_in;
1029 if (DECL_NAME (decl) == thr_num_id)
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1054 gimple_call_set_fndecl (call, built_in);
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065 tree t = *tp;
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1124 if (is_combined_parallel (region))
1125 ws_args = region->ws_args;
1126 else
1127 ws_args = NULL;
1129 if (child_cfun->cfg)
1131 /* Due to inlining, it may happen that we have already outlined
1132 the region, in which case all we need to do is make the
1133 sub-graph unreachable and emit the parallel call. */
1134 edge entry_succ_e, exit_succ_e;
1136 entry_succ_e = single_succ_edge (entry_bb);
1138 gsi = gsi_last_nondebug_bb (entry_bb);
1139 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1140 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1141 gsi_remove (&gsi, true);
1143 new_bb = entry_bb;
1144 if (exit_bb)
1146 exit_succ_e = single_succ_edge (exit_bb);
1147 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1149 remove_edge_and_dominated_blocks (entry_succ_e);
1151 else
1153 unsigned srcidx, dstidx, num;
1155 /* If the parallel region needs data sent from the parent
1156 function, then the very first statement (except possible
1157 tree profile counter updates) of the parallel body
1158 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1159 &.OMP_DATA_O is passed as an argument to the child function,
1160 we need to replace it with the argument as seen by the child
1161 function.
1163 In most cases, this will end up being the identity assignment
1164 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1165 a function call that has been inlined, the original PARM_DECL
1166 .OMP_DATA_I may have been converted into a different local
1167 variable. In which case, we need to keep the assignment. */
1168 if (gimple_omp_taskreg_data_arg (entry_stmt))
1170 basic_block entry_succ_bb
1171 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1172 : FALLTHRU_EDGE (entry_bb)->dest;
1173 tree arg;
1174 gimple *parcopy_stmt = NULL;
1176 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1178 gimple *stmt;
1180 gcc_assert (!gsi_end_p (gsi));
1181 stmt = gsi_stmt (gsi);
1182 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1183 continue;
1185 if (gimple_num_ops (stmt) == 2)
1187 tree arg = gimple_assign_rhs1 (stmt);
1189 /* We're ignore the subcode because we're
1190 effectively doing a STRIP_NOPS. */
1192 if (TREE_CODE (arg) == ADDR_EXPR
1193 && TREE_OPERAND (arg, 0)
1194 == gimple_omp_taskreg_data_arg (entry_stmt))
1196 parcopy_stmt = stmt;
1197 break;
1202 gcc_assert (parcopy_stmt != NULL);
1203 arg = DECL_ARGUMENTS (child_fn);
1205 if (!gimple_in_ssa_p (cfun))
1207 if (gimple_assign_lhs (parcopy_stmt) == arg)
1208 gsi_remove (&gsi, true);
1209 else
1211 /* ?? Is setting the subcode really necessary ?? */
1212 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1213 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1216 else
1218 tree lhs = gimple_assign_lhs (parcopy_stmt);
1219 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1220 /* We'd like to set the rhs to the default def in the child_fn,
1221 but it's too early to create ssa names in the child_fn.
1222 Instead, we set the rhs to the parm. In
1223 move_sese_region_to_fn, we introduce a default def for the
1224 parm, map the parm to it's default def, and once we encounter
1225 this stmt, replace the parm with the default def. */
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 update_stmt (parcopy_stmt);
1231 /* Declare local variables needed in CHILD_CFUN. */
1232 block = DECL_INITIAL (child_fn);
1233 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1234 /* The gimplifier could record temporaries in parallel/task block
1235 rather than in containing function's local_decls chain,
1236 which would mean cgraph missed finalizing them. Do it now. */
1237 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1238 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1239 varpool_node::finalize_decl (t);
1240 DECL_SAVED_TREE (child_fn) = NULL;
1241 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1242 gimple_set_body (child_fn, NULL);
1243 TREE_USED (block) = 1;
1245 /* Reset DECL_CONTEXT on function arguments. */
1246 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1247 DECL_CONTEXT (t) = child_fn;
1249 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1250 so that it can be moved to the child function. */
1251 gsi = gsi_last_nondebug_bb (entry_bb);
1252 stmt = gsi_stmt (gsi);
1253 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1254 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1255 e = split_block (entry_bb, stmt);
1256 gsi_remove (&gsi, true);
1257 entry_bb = e->dest;
1258 edge e2 = NULL;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1260 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1261 else
1263 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1264 gcc_assert (e2->dest == region->exit);
1265 remove_edge (BRANCH_EDGE (entry_bb));
1266 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1267 gsi = gsi_last_nondebug_bb (region->exit);
1268 gcc_assert (!gsi_end_p (gsi)
1269 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1270 gsi_remove (&gsi, true);
1273 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1274 if (exit_bb)
1276 gsi = gsi_last_nondebug_bb (exit_bb);
1277 gcc_assert (!gsi_end_p (gsi)
1278 && (gimple_code (gsi_stmt (gsi))
1279 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1280 stmt = gimple_build_return (NULL);
1281 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1282 gsi_remove (&gsi, true);
1285 /* Move the parallel region into CHILD_CFUN. */
1287 if (gimple_in_ssa_p (cfun))
1289 init_tree_ssa (child_cfun);
1290 init_ssa_operands (child_cfun);
1291 child_cfun->gimple_df->in_ssa_p = true;
1292 block = NULL_TREE;
1294 else
1295 block = gimple_block (entry_stmt);
1297 /* Make sure to generate early debug for the function before
1298 outlining anything. */
1299 if (! gimple_in_ssa_p (cfun))
1300 (*debug_hooks->early_global_decl) (cfun->decl);
1302 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1303 if (exit_bb)
1304 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1305 if (e2)
1307 basic_block dest_bb = e2->dest;
1308 if (!exit_bb)
1309 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1310 remove_edge (e2);
1311 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1313 /* When the OMP expansion process cannot guarantee an up-to-date
1314 loop tree arrange for the child function to fixup loops. */
1315 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1316 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1318 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1319 num = vec_safe_length (child_cfun->local_decls);
1320 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1322 t = (*child_cfun->local_decls)[srcidx];
1323 if (DECL_CONTEXT (t) == cfun->decl)
1324 continue;
1325 if (srcidx != dstidx)
1326 (*child_cfun->local_decls)[dstidx] = t;
1327 dstidx++;
1329 if (dstidx != num)
1330 vec_safe_truncate (child_cfun->local_decls, dstidx);
1332 /* Inform the callgraph about the new function. */
1333 child_cfun->curr_properties = cfun->curr_properties;
1334 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1335 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1336 cgraph_node *node = cgraph_node::get_create (child_fn);
1337 node->parallelized_function = 1;
1338 cgraph_node::add_new_function (child_fn, true);
1340 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1341 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1343 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1344 fixed in a following pass. */
1345 push_cfun (child_cfun);
1346 if (need_asm)
1347 assign_assembler_name_if_needed (child_fn);
1349 if (optimize)
1350 optimize_omp_library_calls (entry_stmt);
1351 update_max_bb_count ();
1352 cgraph_edge::rebuild_edges ();
1354 /* Some EH regions might become dead, see PR34608. If
1355 pass_cleanup_cfg isn't the first pass to happen with the
1356 new child, these dead EH edges might cause problems.
1357 Clean them up now. */
1358 if (flag_exceptions)
1360 basic_block bb;
1361 bool changed = false;
1363 FOR_EACH_BB_FN (bb, cfun)
1364 changed |= gimple_purge_dead_eh_edges (bb);
1365 if (changed)
1366 cleanup_tree_cfg ();
1368 if (gimple_in_ssa_p (cfun))
1369 update_ssa (TODO_update_ssa);
1370 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1371 verify_loop_structure ();
1372 pop_cfun ();
1374 if (dump_file && !gimple_in_ssa_p (cfun))
1376 omp_any_child_fn_dumped = true;
1377 dump_function_header (dump_file, child_fn, dump_flags);
1378 dump_function_to_file (child_fn, dump_file, dump_flags);
1382 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1383 expand_parallel_call (region, new_bb,
1384 as_a <gomp_parallel *> (entry_stmt), ws_args);
1385 else
1386 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1387 if (gimple_in_ssa_p (cfun))
1388 update_ssa (TODO_update_ssa_only_virtuals);
1391 /* Information about members of an OpenACC collapsed loop nest. */
1393 struct oacc_collapse
1395 tree base; /* Base value. */
1396 tree iters; /* Number of steps. */
1397 tree step; /* Step size. */
1398 tree tile; /* Tile increment (if tiled). */
1399 tree outer; /* Tile iterator var. */
1402 /* Helper for expand_oacc_for. Determine collapsed loop information.
1403 Fill in COUNTS array. Emit any initialization code before GSI.
1404 Return the calculated outer loop bound of BOUND_TYPE. */
1406 static tree
1407 expand_oacc_collapse_init (const struct omp_for_data *fd,
1408 gimple_stmt_iterator *gsi,
1409 oacc_collapse *counts, tree bound_type,
1410 location_t loc)
1412 tree tiling = fd->tiling;
1413 tree total = build_int_cst (bound_type, 1);
1414 int ix;
1416 gcc_assert (integer_onep (fd->loop.step));
1417 gcc_assert (integer_zerop (fd->loop.n1));
1419 /* When tiling, the first operand of the tile clause applies to the
1420 innermost loop, and we work outwards from there. Seems
1421 backwards, but whatever. */
1422 for (ix = fd->collapse; ix--;)
1424 const omp_for_data_loop *loop = &fd->loops[ix];
1426 tree iter_type = TREE_TYPE (loop->v);
1427 tree diff_type = iter_type;
1428 tree plus_type = iter_type;
1430 gcc_assert (loop->cond_code == fd->loop.cond_code);
1432 if (POINTER_TYPE_P (iter_type))
1433 plus_type = sizetype;
1434 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1435 diff_type = signed_type_for (diff_type);
1437 if (tiling)
1439 tree num = build_int_cst (integer_type_node, fd->collapse);
1440 tree loop_no = build_int_cst (integer_type_node, ix);
1441 tree tile = TREE_VALUE (tiling);
1442 gcall *call
1443 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1444 /* gwv-outer=*/integer_zero_node,
1445 /* gwv-inner=*/integer_zero_node);
1447 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1448 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1449 gimple_call_set_lhs (call, counts[ix].tile);
1450 gimple_set_location (call, loc);
1451 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1453 tiling = TREE_CHAIN (tiling);
1455 else
1457 counts[ix].tile = NULL;
1458 counts[ix].outer = loop->v;
1461 tree b = loop->n1;
1462 tree e = loop->n2;
1463 tree s = loop->step;
1464 bool up = loop->cond_code == LT_EXPR;
1465 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1466 bool negating;
1467 tree expr;
1469 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1470 true, GSI_SAME_STMT);
1471 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1472 true, GSI_SAME_STMT);
1474 /* Convert the step, avoiding possible unsigned->signed overflow. */
1475 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1476 if (negating)
1477 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1478 s = fold_convert (diff_type, s);
1479 if (negating)
1480 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1481 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1482 true, GSI_SAME_STMT);
1484 /* Determine the range, avoiding possible unsigned->signed overflow. */
1485 negating = !up && TYPE_UNSIGNED (iter_type);
1486 expr = fold_build2 (MINUS_EXPR, plus_type,
1487 fold_convert (plus_type, negating ? b : e),
1488 fold_convert (plus_type, negating ? e : b));
1489 expr = fold_convert (diff_type, expr);
1490 if (negating)
1491 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1492 tree range = force_gimple_operand_gsi
1493 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1495 /* Determine number of iterations. */
1496 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1497 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1498 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1500 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1501 true, GSI_SAME_STMT);
1503 counts[ix].base = b;
1504 counts[ix].iters = iters;
1505 counts[ix].step = s;
1507 total = fold_build2 (MULT_EXPR, bound_type, total,
1508 fold_convert (bound_type, iters));
1511 return total;
1514 /* Emit initializers for collapsed loop members. INNER is true if
1515 this is for the element loop of a TILE. IVAR is the outer
1516 loop iteration variable, from which collapsed loop iteration values
1517 are calculated. COUNTS array has been initialized by
1518 expand_oacc_collapse_inits. */
1520 static void
1521 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1522 gimple_stmt_iterator *gsi,
1523 const oacc_collapse *counts, tree ivar)
1525 tree ivar_type = TREE_TYPE (ivar);
1527 /* The most rapidly changing iteration variable is the innermost
1528 one. */
1529 for (int ix = fd->collapse; ix--;)
1531 const omp_for_data_loop *loop = &fd->loops[ix];
1532 const oacc_collapse *collapse = &counts[ix];
1533 tree v = inner ? loop->v : collapse->outer;
1534 tree iter_type = TREE_TYPE (v);
1535 tree diff_type = TREE_TYPE (collapse->step);
1536 tree plus_type = iter_type;
1537 enum tree_code plus_code = PLUS_EXPR;
1538 tree expr;
1540 if (POINTER_TYPE_P (iter_type))
1542 plus_code = POINTER_PLUS_EXPR;
1543 plus_type = sizetype;
1546 expr = ivar;
1547 if (ix)
1549 tree mod = fold_convert (ivar_type, collapse->iters);
1550 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1551 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1552 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1553 true, GSI_SAME_STMT);
1556 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1557 collapse->step);
1558 expr = fold_build2 (plus_code, iter_type,
1559 inner ? collapse->outer : collapse->base,
1560 fold_convert (plus_type, expr));
1561 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1562 true, GSI_SAME_STMT);
1563 gassign *ass = gimple_build_assign (v, expr);
1564 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1568 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1569 of the combined collapse > 1 loop constructs, generate code like:
1570 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1571 if (cond3 is <)
1572 adj = STEP3 - 1;
1573 else
1574 adj = STEP3 + 1;
1575 count3 = (adj + N32 - N31) / STEP3;
1576 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1577 if (cond2 is <)
1578 adj = STEP2 - 1;
1579 else
1580 adj = STEP2 + 1;
1581 count2 = (adj + N22 - N21) / STEP2;
1582 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1583 if (cond1 is <)
1584 adj = STEP1 - 1;
1585 else
1586 adj = STEP1 + 1;
1587 count1 = (adj + N12 - N11) / STEP1;
1588 count = count1 * count2 * count3;
1589 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1590 count = 0;
1591 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1592 of the combined loop constructs, just initialize COUNTS array
1593 from the _looptemp_ clauses. */
1595 /* NOTE: It *could* be better to moosh all of the BBs together,
1596 creating one larger BB with all the computation and the unexpected
1597 jump at the end. I.e.
1599 bool zero3, zero2, zero1, zero;
1601 zero3 = N32 c3 N31;
1602 count3 = (N32 - N31) /[cl] STEP3;
1603 zero2 = N22 c2 N21;
1604 count2 = (N22 - N21) /[cl] STEP2;
1605 zero1 = N12 c1 N11;
1606 count1 = (N12 - N11) /[cl] STEP1;
1607 zero = zero3 || zero2 || zero1;
1608 count = count1 * count2 * count3;
1609 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1611 After all, we expect the zero=false, and thus we expect to have to
1612 evaluate all of the comparison expressions, so short-circuiting
1613 oughtn't be a win. Since the condition isn't protecting a
1614 denominator, we're not concerned about divide-by-zero, so we can
1615 fully evaluate count even if a numerator turned out to be wrong.
1617 It seems like putting this all together would create much better
1618 scheduling opportunities, and less pressure on the chip's branch
1619 predictor. */
1621 static void
1622 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1623 basic_block &entry_bb, tree *counts,
1624 basic_block &zero_iter1_bb, int &first_zero_iter1,
1625 basic_block &zero_iter2_bb, int &first_zero_iter2,
1626 basic_block &l2_dom_bb)
1628 tree t, type = TREE_TYPE (fd->loop.v);
1629 edge e, ne;
1630 int i;
1632 /* Collapsed loops need work for expansion into SSA form. */
1633 gcc_assert (!gimple_in_ssa_p (cfun));
1635 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1636 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1638 gcc_assert (fd->ordered == 0);
1639 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1640 isn't supposed to be handled, as the inner loop doesn't
1641 use it. */
1642 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1643 OMP_CLAUSE__LOOPTEMP_);
1644 gcc_assert (innerc);
1645 for (i = 0; i < fd->collapse; i++)
1647 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1648 OMP_CLAUSE__LOOPTEMP_);
1649 gcc_assert (innerc);
1650 if (i)
1651 counts[i] = OMP_CLAUSE_DECL (innerc);
1652 else
1653 counts[0] = NULL_TREE;
1655 return;
1658 for (i = fd->collapse; i < fd->ordered; i++)
1660 tree itype = TREE_TYPE (fd->loops[i].v);
1661 counts[i] = NULL_TREE;
1662 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1663 fold_convert (itype, fd->loops[i].n1),
1664 fold_convert (itype, fd->loops[i].n2));
1665 if (t && integer_zerop (t))
1667 for (i = fd->collapse; i < fd->ordered; i++)
1668 counts[i] = build_int_cst (type, 0);
1669 break;
1672 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1674 tree itype = TREE_TYPE (fd->loops[i].v);
1676 if (i >= fd->collapse && counts[i])
1677 continue;
1678 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1679 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1680 fold_convert (itype, fd->loops[i].n1),
1681 fold_convert (itype, fd->loops[i].n2)))
1682 == NULL_TREE || !integer_onep (t)))
1684 gcond *cond_stmt;
1685 tree n1, n2;
1686 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1687 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1688 true, GSI_SAME_STMT);
1689 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1690 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1691 true, GSI_SAME_STMT);
1692 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1693 NULL_TREE, NULL_TREE);
1694 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1695 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1696 expand_omp_regimplify_p, NULL, NULL)
1697 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1698 expand_omp_regimplify_p, NULL, NULL))
1700 *gsi = gsi_for_stmt (cond_stmt);
1701 gimple_regimplify_operands (cond_stmt, gsi);
1703 e = split_block (entry_bb, cond_stmt);
1704 basic_block &zero_iter_bb
1705 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1706 int &first_zero_iter
1707 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1708 if (zero_iter_bb == NULL)
1710 gassign *assign_stmt;
1711 first_zero_iter = i;
1712 zero_iter_bb = create_empty_bb (entry_bb);
1713 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1714 *gsi = gsi_after_labels (zero_iter_bb);
1715 if (i < fd->collapse)
1716 assign_stmt = gimple_build_assign (fd->loop.n2,
1717 build_zero_cst (type));
1718 else
1720 counts[i] = create_tmp_reg (type, ".count");
1721 assign_stmt
1722 = gimple_build_assign (counts[i], build_zero_cst (type));
1724 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1725 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1726 entry_bb);
1728 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1729 ne->probability = profile_probability::very_unlikely ();
1730 e->flags = EDGE_TRUE_VALUE;
1731 e->probability = ne->probability.invert ();
1732 if (l2_dom_bb == NULL)
1733 l2_dom_bb = entry_bb;
1734 entry_bb = e->dest;
1735 *gsi = gsi_last_nondebug_bb (entry_bb);
1738 if (POINTER_TYPE_P (itype))
1739 itype = signed_type_for (itype);
1740 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1741 ? -1 : 1));
1742 t = fold_build2 (PLUS_EXPR, itype,
1743 fold_convert (itype, fd->loops[i].step), t);
1744 t = fold_build2 (PLUS_EXPR, itype, t,
1745 fold_convert (itype, fd->loops[i].n2));
1746 t = fold_build2 (MINUS_EXPR, itype, t,
1747 fold_convert (itype, fd->loops[i].n1));
1748 /* ?? We could probably use CEIL_DIV_EXPR instead of
1749 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1750 generate the same code in the end because generically we
1751 don't know that the values involved must be negative for
1752 GT?? */
1753 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1754 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1755 fold_build1 (NEGATE_EXPR, itype, t),
1756 fold_build1 (NEGATE_EXPR, itype,
1757 fold_convert (itype,
1758 fd->loops[i].step)));
1759 else
1760 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1761 fold_convert (itype, fd->loops[i].step));
1762 t = fold_convert (type, t);
1763 if (TREE_CODE (t) == INTEGER_CST)
1764 counts[i] = t;
1765 else
1767 if (i < fd->collapse || i != first_zero_iter2)
1768 counts[i] = create_tmp_reg (type, ".count");
1769 expand_omp_build_assign (gsi, counts[i], t);
1771 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1773 if (i == 0)
1774 t = counts[0];
1775 else
1776 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1777 expand_omp_build_assign (gsi, fd->loop.n2, t);
1782 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1783 T = V;
1784 V3 = N31 + (T % count3) * STEP3;
1785 T = T / count3;
1786 V2 = N21 + (T % count2) * STEP2;
1787 T = T / count2;
1788 V1 = N11 + T * STEP1;
1789 if this loop doesn't have an inner loop construct combined with it.
1790 If it does have an inner loop construct combined with it and the
1791 iteration count isn't known constant, store values from counts array
1792 into its _looptemp_ temporaries instead. */
1794 static void
1795 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 tree *counts, gimple *inner_stmt, tree startvar)
1798 int i;
1799 if (gimple_omp_for_combined_p (fd->for_stmt))
1801 /* If fd->loop.n2 is constant, then no propagation of the counts
1802 is needed, they are constant. */
1803 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1804 return;
1806 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1807 ? gimple_omp_taskreg_clauses (inner_stmt)
1808 : gimple_omp_for_clauses (inner_stmt);
1809 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1810 isn't supposed to be handled, as the inner loop doesn't
1811 use it. */
1812 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1813 gcc_assert (innerc);
1814 for (i = 0; i < fd->collapse; i++)
1816 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1817 OMP_CLAUSE__LOOPTEMP_);
1818 gcc_assert (innerc);
1819 if (i)
1821 tree tem = OMP_CLAUSE_DECL (innerc);
1822 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1823 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1824 false, GSI_CONTINUE_LINKING);
1825 gassign *stmt = gimple_build_assign (tem, t);
1826 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1829 return;
1832 tree type = TREE_TYPE (fd->loop.v);
1833 tree tem = create_tmp_reg (type, ".tem");
1834 gassign *stmt = gimple_build_assign (tem, startvar);
1835 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1837 for (i = fd->collapse - 1; i >= 0; i--)
1839 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1840 itype = vtype;
1841 if (POINTER_TYPE_P (vtype))
1842 itype = signed_type_for (vtype);
1843 if (i != 0)
1844 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1845 else
1846 t = tem;
1847 t = fold_convert (itype, t);
1848 t = fold_build2 (MULT_EXPR, itype, t,
1849 fold_convert (itype, fd->loops[i].step));
1850 if (POINTER_TYPE_P (vtype))
1851 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1852 else
1853 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1854 t = force_gimple_operand_gsi (gsi, t,
1855 DECL_P (fd->loops[i].v)
1856 && TREE_ADDRESSABLE (fd->loops[i].v),
1857 NULL_TREE, false,
1858 GSI_CONTINUE_LINKING);
1859 stmt = gimple_build_assign (fd->loops[i].v, t);
1860 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1861 if (i != 0)
1863 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1864 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1865 false, GSI_CONTINUE_LINKING);
1866 stmt = gimple_build_assign (tem, t);
1867 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1872 /* Helper function for expand_omp_for_*. Generate code like:
1873 L10:
1874 V3 += STEP3;
1875 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1876 L11:
1877 V3 = N31;
1878 V2 += STEP2;
1879 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1880 L12:
1881 V2 = N21;
1882 V1 += STEP1;
1883 goto BODY_BB; */
1885 static basic_block
1886 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1887 basic_block body_bb)
1889 basic_block last_bb, bb, collapse_bb = NULL;
1890 int i;
1891 gimple_stmt_iterator gsi;
1892 edge e;
1893 tree t;
1894 gimple *stmt;
1896 last_bb = cont_bb;
1897 for (i = fd->collapse - 1; i >= 0; i--)
1899 tree vtype = TREE_TYPE (fd->loops[i].v);
1901 bb = create_empty_bb (last_bb);
1902 add_bb_to_loop (bb, last_bb->loop_father);
1903 gsi = gsi_start_bb (bb);
1905 if (i < fd->collapse - 1)
1907 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1908 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1910 t = fd->loops[i + 1].n1;
1911 t = force_gimple_operand_gsi (&gsi, t,
1912 DECL_P (fd->loops[i + 1].v)
1913 && TREE_ADDRESSABLE (fd->loops[i
1914 + 1].v),
1915 NULL_TREE, false,
1916 GSI_CONTINUE_LINKING);
1917 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1918 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1920 else
1921 collapse_bb = bb;
1923 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1925 if (POINTER_TYPE_P (vtype))
1926 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1927 else
1928 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1929 t = force_gimple_operand_gsi (&gsi, t,
1930 DECL_P (fd->loops[i].v)
1931 && TREE_ADDRESSABLE (fd->loops[i].v),
1932 NULL_TREE, false, GSI_CONTINUE_LINKING);
1933 stmt = gimple_build_assign (fd->loops[i].v, t);
1934 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1936 if (i > 0)
1938 t = fd->loops[i].n2;
1939 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1940 false, GSI_CONTINUE_LINKING);
1941 tree v = fd->loops[i].v;
1942 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1943 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1944 false, GSI_CONTINUE_LINKING);
1945 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1946 stmt = gimple_build_cond_empty (t);
1947 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1948 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1949 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1951 else
1952 make_edge (bb, body_bb, EDGE_FALLTHRU);
1953 last_bb = bb;
1956 return collapse_bb;
1959 /* Expand #pragma omp ordered depend(source). */
1961 static void
1962 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1963 tree *counts, location_t loc)
1965 enum built_in_function source_ix
1966 = fd->iter_type == long_integer_type_node
1967 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1968 gimple *g
1969 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1970 build_fold_addr_expr (counts[fd->ordered]));
1971 gimple_set_location (g, loc);
1972 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1975 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1977 static void
1978 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1979 tree *counts, tree c, location_t loc)
1981 auto_vec<tree, 10> args;
1982 enum built_in_function sink_ix
1983 = fd->iter_type == long_integer_type_node
1984 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1985 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1986 int i;
1987 gimple_stmt_iterator gsi2 = *gsi;
1988 bool warned_step = false;
1990 for (i = 0; i < fd->ordered; i++)
1992 tree step = NULL_TREE;
1993 off = TREE_PURPOSE (deps);
1994 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1996 step = TREE_OPERAND (off, 1);
1997 off = TREE_OPERAND (off, 0);
1999 if (!integer_zerop (off))
2001 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2002 || fd->loops[i].cond_code == GT_EXPR);
2003 bool forward = fd->loops[i].cond_code == LT_EXPR;
2004 if (step)
2006 /* Non-simple Fortran DO loops. If step is variable,
2007 we don't know at compile even the direction, so can't
2008 warn. */
2009 if (TREE_CODE (step) != INTEGER_CST)
2010 break;
2011 forward = tree_int_cst_sgn (step) != -1;
2013 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2014 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2015 "lexically later iteration");
2016 break;
2018 deps = TREE_CHAIN (deps);
2020 /* If all offsets corresponding to the collapsed loops are zero,
2021 this depend clause can be ignored. FIXME: but there is still a
2022 flush needed. We need to emit one __sync_synchronize () for it
2023 though (perhaps conditionally)? Solve this together with the
2024 conservative dependence folding optimization.
2025 if (i >= fd->collapse)
2026 return; */
2028 deps = OMP_CLAUSE_DECL (c);
2029 gsi_prev (&gsi2);
2030 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2031 edge e2 = split_block_after_labels (e1->dest);
2033 gsi2 = gsi_after_labels (e1->dest);
2034 *gsi = gsi_last_bb (e1->src);
2035 for (i = 0; i < fd->ordered; i++)
2037 tree itype = TREE_TYPE (fd->loops[i].v);
2038 tree step = NULL_TREE;
2039 tree orig_off = NULL_TREE;
2040 if (POINTER_TYPE_P (itype))
2041 itype = sizetype;
2042 if (i)
2043 deps = TREE_CHAIN (deps);
2044 off = TREE_PURPOSE (deps);
2045 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2047 step = TREE_OPERAND (off, 1);
2048 off = TREE_OPERAND (off, 0);
2049 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2050 && integer_onep (fd->loops[i].step)
2051 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2053 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2054 if (step)
2056 off = fold_convert_loc (loc, itype, off);
2057 orig_off = off;
2058 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2061 if (integer_zerop (off))
2062 t = boolean_true_node;
2063 else
2065 tree a;
2066 tree co = fold_convert_loc (loc, itype, off);
2067 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2069 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2070 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2071 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2072 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2073 co);
2075 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2076 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2077 fd->loops[i].v, co);
2078 else
2079 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2080 fd->loops[i].v, co);
2081 if (step)
2083 tree t1, t2;
2084 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2085 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2086 fd->loops[i].n1);
2087 else
2088 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2089 fd->loops[i].n2);
2090 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2091 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2092 fd->loops[i].n2);
2093 else
2094 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2095 fd->loops[i].n1);
2096 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2097 step, build_int_cst (TREE_TYPE (step), 0));
2098 if (TREE_CODE (step) != INTEGER_CST)
2100 t1 = unshare_expr (t1);
2101 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2102 false, GSI_CONTINUE_LINKING);
2103 t2 = unshare_expr (t2);
2104 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2105 false, GSI_CONTINUE_LINKING);
2107 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2108 t, t2, t1);
2110 else if (fd->loops[i].cond_code == LT_EXPR)
2112 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2113 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2114 fd->loops[i].n1);
2115 else
2116 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2117 fd->loops[i].n2);
2119 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2120 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2121 fd->loops[i].n2);
2122 else
2123 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2124 fd->loops[i].n1);
2126 if (cond)
2127 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2128 else
2129 cond = t;
2131 off = fold_convert_loc (loc, itype, off);
2133 if (step
2134 || (fd->loops[i].cond_code == LT_EXPR
2135 ? !integer_onep (fd->loops[i].step)
2136 : !integer_minus_onep (fd->loops[i].step)))
2138 if (step == NULL_TREE
2139 && TYPE_UNSIGNED (itype)
2140 && fd->loops[i].cond_code == GT_EXPR)
2141 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2142 fold_build1_loc (loc, NEGATE_EXPR, itype,
2143 s));
2144 else
2145 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2146 orig_off ? orig_off : off, s);
2147 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2148 build_int_cst (itype, 0));
2149 if (integer_zerop (t) && !warned_step)
2151 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2152 "in the iteration space");
2153 warned_step = true;
2155 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2156 cond, t);
2159 if (i <= fd->collapse - 1 && fd->collapse > 1)
2160 t = fd->loop.v;
2161 else if (counts[i])
2162 t = counts[i];
2163 else
2165 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2166 fd->loops[i].v, fd->loops[i].n1);
2167 t = fold_convert_loc (loc, fd->iter_type, t);
2169 if (step)
2170 /* We have divided off by step already earlier. */;
2171 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2172 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2173 fold_build1_loc (loc, NEGATE_EXPR, itype,
2174 s));
2175 else
2176 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2177 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2178 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2179 off = fold_convert_loc (loc, fd->iter_type, off);
2180 if (i <= fd->collapse - 1 && fd->collapse > 1)
2182 if (i)
2183 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2184 off);
2185 if (i < fd->collapse - 1)
2187 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2188 counts[i]);
2189 continue;
2192 off = unshare_expr (off);
2193 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2194 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2195 true, GSI_SAME_STMT);
2196 args.safe_push (t);
2198 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2199 gimple_set_location (g, loc);
2200 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2202 cond = unshare_expr (cond);
2203 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2204 GSI_CONTINUE_LINKING);
2205 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2206 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2207 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2208 e1->probability = e3->probability.invert ();
2209 e1->flags = EDGE_TRUE_VALUE;
2210 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2212 *gsi = gsi_after_labels (e2->dest);
2215 /* Expand all #pragma omp ordered depend(source) and
2216 #pragma omp ordered depend(sink:...) constructs in the current
2217 #pragma omp for ordered(n) region. */
2219 static void
2220 expand_omp_ordered_source_sink (struct omp_region *region,
2221 struct omp_for_data *fd, tree *counts,
2222 basic_block cont_bb)
2224 struct omp_region *inner;
2225 int i;
2226 for (i = fd->collapse - 1; i < fd->ordered; i++)
2227 if (i == fd->collapse - 1 && fd->collapse > 1)
2228 counts[i] = NULL_TREE;
2229 else if (i >= fd->collapse && !cont_bb)
2230 counts[i] = build_zero_cst (fd->iter_type);
2231 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2232 && integer_onep (fd->loops[i].step))
2233 counts[i] = NULL_TREE;
2234 else
2235 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2236 tree atype
2237 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2238 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2239 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2241 for (inner = region->inner; inner; inner = inner->next)
2242 if (inner->type == GIMPLE_OMP_ORDERED)
2244 gomp_ordered *ord_stmt = inner->ord_stmt;
2245 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2246 location_t loc = gimple_location (ord_stmt);
2247 tree c;
2248 for (c = gimple_omp_ordered_clauses (ord_stmt);
2249 c; c = OMP_CLAUSE_CHAIN (c))
2250 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2251 break;
2252 if (c)
2253 expand_omp_ordered_source (&gsi, fd, counts, loc);
2254 for (c = gimple_omp_ordered_clauses (ord_stmt);
2255 c; c = OMP_CLAUSE_CHAIN (c))
2256 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2257 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2258 gsi_remove (&gsi, true);
2262 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2263 collapsed. */
2265 static basic_block
2266 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2267 basic_block cont_bb, basic_block body_bb,
2268 bool ordered_lastprivate)
2270 if (fd->ordered == fd->collapse)
2271 return cont_bb;
2273 if (!cont_bb)
2275 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2276 for (int i = fd->collapse; i < fd->ordered; i++)
2278 tree type = TREE_TYPE (fd->loops[i].v);
2279 tree n1 = fold_convert (type, fd->loops[i].n1);
2280 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2281 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2282 size_int (i - fd->collapse + 1),
2283 NULL_TREE, NULL_TREE);
2284 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2286 return NULL;
2289 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2291 tree t, type = TREE_TYPE (fd->loops[i].v);
2292 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2293 expand_omp_build_assign (&gsi, fd->loops[i].v,
2294 fold_convert (type, fd->loops[i].n1));
2295 if (counts[i])
2296 expand_omp_build_assign (&gsi, counts[i],
2297 build_zero_cst (fd->iter_type));
2298 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2299 size_int (i - fd->collapse + 1),
2300 NULL_TREE, NULL_TREE);
2301 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2302 if (!gsi_end_p (gsi))
2303 gsi_prev (&gsi);
2304 else
2305 gsi = gsi_last_bb (body_bb);
2306 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2307 basic_block new_body = e1->dest;
2308 if (body_bb == cont_bb)
2309 cont_bb = new_body;
2310 edge e2 = NULL;
2311 basic_block new_header;
2312 if (EDGE_COUNT (cont_bb->preds) > 0)
2314 gsi = gsi_last_bb (cont_bb);
2315 if (POINTER_TYPE_P (type))
2316 t = fold_build_pointer_plus (fd->loops[i].v,
2317 fold_convert (sizetype,
2318 fd->loops[i].step));
2319 else
2320 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2321 fold_convert (type, fd->loops[i].step));
2322 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2323 if (counts[i])
2325 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2326 build_int_cst (fd->iter_type, 1));
2327 expand_omp_build_assign (&gsi, counts[i], t);
2328 t = counts[i];
2330 else
2332 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2333 fd->loops[i].v, fd->loops[i].n1);
2334 t = fold_convert (fd->iter_type, t);
2335 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2336 true, GSI_SAME_STMT);
2338 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2339 size_int (i - fd->collapse + 1),
2340 NULL_TREE, NULL_TREE);
2341 expand_omp_build_assign (&gsi, aref, t);
2342 gsi_prev (&gsi);
2343 e2 = split_block (cont_bb, gsi_stmt (gsi));
2344 new_header = e2->dest;
2346 else
2347 new_header = cont_bb;
2348 gsi = gsi_after_labels (new_header);
2349 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2350 true, GSI_SAME_STMT);
2351 tree n2
2352 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2353 true, NULL_TREE, true, GSI_SAME_STMT);
2354 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2355 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2356 edge e3 = split_block (new_header, gsi_stmt (gsi));
2357 cont_bb = e3->dest;
2358 remove_edge (e1);
2359 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2360 e3->flags = EDGE_FALSE_VALUE;
2361 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2362 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2363 e1->probability = e3->probability.invert ();
2365 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2366 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2368 if (e2)
2370 struct loop *loop = alloc_loop ();
2371 loop->header = new_header;
2372 loop->latch = e2->src;
2373 add_loop (loop, body_bb->loop_father);
2377 /* If there are any lastprivate clauses and it is possible some loops
2378 might have zero iterations, ensure all the decls are initialized,
2379 otherwise we could crash evaluating C++ class iterators with lastprivate
2380 clauses. */
2381 bool need_inits = false;
2382 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2383 if (need_inits)
2385 tree type = TREE_TYPE (fd->loops[i].v);
2386 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2387 expand_omp_build_assign (&gsi, fd->loops[i].v,
2388 fold_convert (type, fd->loops[i].n1));
2390 else
2392 tree type = TREE_TYPE (fd->loops[i].v);
2393 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2394 boolean_type_node,
2395 fold_convert (type, fd->loops[i].n1),
2396 fold_convert (type, fd->loops[i].n2));
2397 if (!integer_onep (this_cond))
2398 need_inits = true;
2401 return cont_bb;
2404 /* A subroutine of expand_omp_for. Generate code for a parallel
2405 loop with any schedule. Given parameters:
2407 for (V = N1; V cond N2; V += STEP) BODY;
2409 where COND is "<" or ">", we generate pseudocode
2411 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2412 if (more) goto L0; else goto L3;
2414 V = istart0;
2415 iend = iend0;
2417 BODY;
2418 V += STEP;
2419 if (V cond iend) goto L1; else goto L2;
2421 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2424 If this is a combined omp parallel loop, instead of the call to
2425 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2426 If this is gimple_omp_for_combined_p loop, then instead of assigning
2427 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2428 inner GIMPLE_OMP_FOR and V += STEP; and
2429 if (V cond iend) goto L1; else goto L2; are removed.
2431 For collapsed loops, given parameters:
2432 collapse(3)
2433 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2434 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2435 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2436 BODY;
2438 we generate pseudocode
2440 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2441 if (cond3 is <)
2442 adj = STEP3 - 1;
2443 else
2444 adj = STEP3 + 1;
2445 count3 = (adj + N32 - N31) / STEP3;
2446 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2447 if (cond2 is <)
2448 adj = STEP2 - 1;
2449 else
2450 adj = STEP2 + 1;
2451 count2 = (adj + N22 - N21) / STEP2;
2452 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2453 if (cond1 is <)
2454 adj = STEP1 - 1;
2455 else
2456 adj = STEP1 + 1;
2457 count1 = (adj + N12 - N11) / STEP1;
2458 count = count1 * count2 * count3;
2459 goto Z1;
2461 count = 0;
2463 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2464 if (more) goto L0; else goto L3;
2466 V = istart0;
2467 T = V;
2468 V3 = N31 + (T % count3) * STEP3;
2469 T = T / count3;
2470 V2 = N21 + (T % count2) * STEP2;
2471 T = T / count2;
2472 V1 = N11 + T * STEP1;
2473 iend = iend0;
2475 BODY;
2476 V += 1;
2477 if (V < iend) goto L10; else goto L2;
2478 L10:
2479 V3 += STEP3;
2480 if (V3 cond3 N32) goto L1; else goto L11;
2481 L11:
2482 V3 = N31;
2483 V2 += STEP2;
2484 if (V2 cond2 N22) goto L1; else goto L12;
2485 L12:
2486 V2 = N21;
2487 V1 += STEP1;
2488 goto L1;
2490 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2495 static void
2496 expand_omp_for_generic (struct omp_region *region,
2497 struct omp_for_data *fd,
2498 enum built_in_function start_fn,
2499 enum built_in_function next_fn,
2500 gimple *inner_stmt)
2502 tree type, istart0, iend0, iend;
2503 tree t, vmain, vback, bias = NULL_TREE;
2504 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2505 basic_block l2_bb = NULL, l3_bb = NULL;
2506 gimple_stmt_iterator gsi;
2507 gassign *assign_stmt;
2508 bool in_combined_parallel = is_combined_parallel (region);
2509 bool broken_loop = region->cont == NULL;
2510 edge e, ne;
2511 tree *counts = NULL;
2512 int i;
2513 bool ordered_lastprivate = false;
2515 gcc_assert (!broken_loop || !in_combined_parallel);
2516 gcc_assert (fd->iter_type == long_integer_type_node
2517 || !in_combined_parallel);
2519 entry_bb = region->entry;
2520 cont_bb = region->cont;
2521 collapse_bb = NULL;
2522 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2523 gcc_assert (broken_loop
2524 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2525 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2526 l1_bb = single_succ (l0_bb);
2527 if (!broken_loop)
2529 l2_bb = create_empty_bb (cont_bb);
2530 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2531 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2532 == l1_bb));
2533 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2535 else
2536 l2_bb = NULL;
2537 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2538 exit_bb = region->exit;
2540 gsi = gsi_last_nondebug_bb (entry_bb);
2542 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2543 if (fd->ordered
2544 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2545 OMP_CLAUSE_LASTPRIVATE))
2546 ordered_lastprivate = false;
2547 if (fd->collapse > 1 || fd->ordered)
2549 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2550 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2552 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2553 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2554 zero_iter1_bb, first_zero_iter1,
2555 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2557 if (zero_iter1_bb)
2559 /* Some counts[i] vars might be uninitialized if
2560 some loop has zero iterations. But the body shouldn't
2561 be executed in that case, so just avoid uninit warnings. */
2562 for (i = first_zero_iter1;
2563 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2564 if (SSA_VAR_P (counts[i]))
2565 TREE_NO_WARNING (counts[i]) = 1;
2566 gsi_prev (&gsi);
2567 e = split_block (entry_bb, gsi_stmt (gsi));
2568 entry_bb = e->dest;
2569 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2570 gsi = gsi_last_nondebug_bb (entry_bb);
2571 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2572 get_immediate_dominator (CDI_DOMINATORS,
2573 zero_iter1_bb));
2575 if (zero_iter2_bb)
2577 /* Some counts[i] vars might be uninitialized if
2578 some loop has zero iterations. But the body shouldn't
2579 be executed in that case, so just avoid uninit warnings. */
2580 for (i = first_zero_iter2; i < fd->ordered; i++)
2581 if (SSA_VAR_P (counts[i]))
2582 TREE_NO_WARNING (counts[i]) = 1;
2583 if (zero_iter1_bb)
2584 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2585 else
2587 gsi_prev (&gsi);
2588 e = split_block (entry_bb, gsi_stmt (gsi));
2589 entry_bb = e->dest;
2590 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2591 gsi = gsi_last_nondebug_bb (entry_bb);
2592 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2593 get_immediate_dominator
2594 (CDI_DOMINATORS, zero_iter2_bb));
2597 if (fd->collapse == 1)
2599 counts[0] = fd->loop.n2;
2600 fd->loop = fd->loops[0];
2604 type = TREE_TYPE (fd->loop.v);
2605 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2606 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2607 TREE_ADDRESSABLE (istart0) = 1;
2608 TREE_ADDRESSABLE (iend0) = 1;
2610 /* See if we need to bias by LLONG_MIN. */
2611 if (fd->iter_type == long_long_unsigned_type_node
2612 && TREE_CODE (type) == INTEGER_TYPE
2613 && !TYPE_UNSIGNED (type)
2614 && fd->ordered == 0)
2616 tree n1, n2;
2618 if (fd->loop.cond_code == LT_EXPR)
2620 n1 = fd->loop.n1;
2621 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2623 else
2625 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2626 n2 = fd->loop.n1;
2628 if (TREE_CODE (n1) != INTEGER_CST
2629 || TREE_CODE (n2) != INTEGER_CST
2630 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2631 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2634 gimple_stmt_iterator gsif = gsi;
2635 gsi_prev (&gsif);
2637 tree arr = NULL_TREE;
2638 if (in_combined_parallel)
2640 gcc_assert (fd->ordered == 0);
2641 /* In a combined parallel loop, emit a call to
2642 GOMP_loop_foo_next. */
2643 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2644 build_fold_addr_expr (istart0),
2645 build_fold_addr_expr (iend0));
2647 else
2649 tree t0, t1, t2, t3, t4;
2650 /* If this is not a combined parallel loop, emit a call to
2651 GOMP_loop_foo_start in ENTRY_BB. */
2652 t4 = build_fold_addr_expr (iend0);
2653 t3 = build_fold_addr_expr (istart0);
2654 if (fd->ordered)
2656 t0 = build_int_cst (unsigned_type_node,
2657 fd->ordered - fd->collapse + 1);
2658 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2659 fd->ordered
2660 - fd->collapse + 1),
2661 ".omp_counts");
2662 DECL_NAMELESS (arr) = 1;
2663 TREE_ADDRESSABLE (arr) = 1;
2664 TREE_STATIC (arr) = 1;
2665 vec<constructor_elt, va_gc> *v;
2666 vec_alloc (v, fd->ordered - fd->collapse + 1);
2667 int idx;
2669 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2671 tree c;
2672 if (idx == 0 && fd->collapse > 1)
2673 c = fd->loop.n2;
2674 else
2675 c = counts[idx + fd->collapse - 1];
2676 tree purpose = size_int (idx);
2677 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2678 if (TREE_CODE (c) != INTEGER_CST)
2679 TREE_STATIC (arr) = 0;
2682 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2683 if (!TREE_STATIC (arr))
2684 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2685 void_type_node, arr),
2686 true, NULL_TREE, true, GSI_SAME_STMT);
2687 t1 = build_fold_addr_expr (arr);
2688 t2 = NULL_TREE;
2690 else
2692 t2 = fold_convert (fd->iter_type, fd->loop.step);
2693 t1 = fd->loop.n2;
2694 t0 = fd->loop.n1;
2695 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2697 tree innerc
2698 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2699 OMP_CLAUSE__LOOPTEMP_);
2700 gcc_assert (innerc);
2701 t0 = OMP_CLAUSE_DECL (innerc);
2702 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2703 OMP_CLAUSE__LOOPTEMP_);
2704 gcc_assert (innerc);
2705 t1 = OMP_CLAUSE_DECL (innerc);
2707 if (POINTER_TYPE_P (TREE_TYPE (t0))
2708 && TYPE_PRECISION (TREE_TYPE (t0))
2709 != TYPE_PRECISION (fd->iter_type))
2711 /* Avoid casting pointers to integer of a different size. */
2712 tree itype = signed_type_for (type);
2713 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2714 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2716 else
2718 t1 = fold_convert (fd->iter_type, t1);
2719 t0 = fold_convert (fd->iter_type, t0);
2721 if (bias)
2723 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2724 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2727 if (fd->iter_type == long_integer_type_node || fd->ordered)
2729 if (fd->chunk_size)
2731 t = fold_convert (fd->iter_type, fd->chunk_size);
2732 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2733 if (fd->ordered)
2734 t = build_call_expr (builtin_decl_explicit (start_fn),
2735 5, t0, t1, t, t3, t4);
2736 else
2737 t = build_call_expr (builtin_decl_explicit (start_fn),
2738 6, t0, t1, t2, t, t3, t4);
2740 else if (fd->ordered)
2741 t = build_call_expr (builtin_decl_explicit (start_fn),
2742 4, t0, t1, t3, t4);
2743 else
2744 t = build_call_expr (builtin_decl_explicit (start_fn),
2745 5, t0, t1, t2, t3, t4);
2747 else
2749 tree t5;
2750 tree c_bool_type;
2751 tree bfn_decl;
2753 /* The GOMP_loop_ull_*start functions have additional boolean
2754 argument, true for < loops and false for > loops.
2755 In Fortran, the C bool type can be different from
2756 boolean_type_node. */
2757 bfn_decl = builtin_decl_explicit (start_fn);
2758 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2759 t5 = build_int_cst (c_bool_type,
2760 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2761 if (fd->chunk_size)
2763 tree bfn_decl = builtin_decl_explicit (start_fn);
2764 t = fold_convert (fd->iter_type, fd->chunk_size);
2765 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2766 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2768 else
2769 t = build_call_expr (builtin_decl_explicit (start_fn),
2770 6, t5, t0, t1, t2, t3, t4);
2773 if (TREE_TYPE (t) != boolean_type_node)
2774 t = fold_build2 (NE_EXPR, boolean_type_node,
2775 t, build_int_cst (TREE_TYPE (t), 0));
2776 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2777 true, GSI_SAME_STMT);
2778 if (arr && !TREE_STATIC (arr))
2780 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2781 TREE_THIS_VOLATILE (clobber) = 1;
2782 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2783 GSI_SAME_STMT);
2785 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2787 /* Remove the GIMPLE_OMP_FOR statement. */
2788 gsi_remove (&gsi, true);
2790 if (gsi_end_p (gsif))
2791 gsif = gsi_after_labels (gsi_bb (gsif));
2792 gsi_next (&gsif);
2794 /* Iteration setup for sequential loop goes in L0_BB. */
2795 tree startvar = fd->loop.v;
2796 tree endvar = NULL_TREE;
2798 if (gimple_omp_for_combined_p (fd->for_stmt))
2800 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2801 && gimple_omp_for_kind (inner_stmt)
2802 == GF_OMP_FOR_KIND_SIMD);
2803 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2804 OMP_CLAUSE__LOOPTEMP_);
2805 gcc_assert (innerc);
2806 startvar = OMP_CLAUSE_DECL (innerc);
2807 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2808 OMP_CLAUSE__LOOPTEMP_);
2809 gcc_assert (innerc);
2810 endvar = OMP_CLAUSE_DECL (innerc);
2813 gsi = gsi_start_bb (l0_bb);
2814 t = istart0;
2815 if (fd->ordered && fd->collapse == 1)
2816 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2817 fold_convert (fd->iter_type, fd->loop.step));
2818 else if (bias)
2819 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2820 if (fd->ordered && fd->collapse == 1)
2822 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2823 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2824 fd->loop.n1, fold_convert (sizetype, t));
2825 else
2827 t = fold_convert (TREE_TYPE (startvar), t);
2828 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2829 fd->loop.n1, t);
2832 else
2834 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2835 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2836 t = fold_convert (TREE_TYPE (startvar), t);
2838 t = force_gimple_operand_gsi (&gsi, t,
2839 DECL_P (startvar)
2840 && TREE_ADDRESSABLE (startvar),
2841 NULL_TREE, false, GSI_CONTINUE_LINKING);
2842 assign_stmt = gimple_build_assign (startvar, t);
2843 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2845 t = iend0;
2846 if (fd->ordered && fd->collapse == 1)
2847 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2848 fold_convert (fd->iter_type, fd->loop.step));
2849 else if (bias)
2850 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2851 if (fd->ordered && fd->collapse == 1)
2853 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2854 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2855 fd->loop.n1, fold_convert (sizetype, t));
2856 else
2858 t = fold_convert (TREE_TYPE (startvar), t);
2859 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2860 fd->loop.n1, t);
2863 else
2865 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2866 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2867 t = fold_convert (TREE_TYPE (startvar), t);
2869 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2870 false, GSI_CONTINUE_LINKING);
2871 if (endvar)
2873 assign_stmt = gimple_build_assign (endvar, iend);
2874 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2875 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2876 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2877 else
2878 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2879 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2881 /* Handle linear clause adjustments. */
2882 tree itercnt = NULL_TREE;
2883 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2884 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2885 c; c = OMP_CLAUSE_CHAIN (c))
2886 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2887 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2889 tree d = OMP_CLAUSE_DECL (c);
2890 bool is_ref = omp_is_reference (d);
2891 tree t = d, a, dest;
2892 if (is_ref)
2893 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2894 tree type = TREE_TYPE (t);
2895 if (POINTER_TYPE_P (type))
2896 type = sizetype;
2897 dest = unshare_expr (t);
2898 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2899 expand_omp_build_assign (&gsif, v, t);
2900 if (itercnt == NULL_TREE)
2902 itercnt = startvar;
2903 tree n1 = fd->loop.n1;
2904 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2906 itercnt
2907 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2908 itercnt);
2909 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2911 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2912 itercnt, n1);
2913 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2914 itercnt, fd->loop.step);
2915 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2916 NULL_TREE, false,
2917 GSI_CONTINUE_LINKING);
2919 a = fold_build2 (MULT_EXPR, type,
2920 fold_convert (type, itercnt),
2921 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2922 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2923 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2924 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2925 false, GSI_CONTINUE_LINKING);
2926 assign_stmt = gimple_build_assign (dest, t);
2927 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2929 if (fd->collapse > 1)
2930 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2932 if (fd->ordered)
2934 /* Until now, counts array contained number of iterations or
2935 variable containing it for ith loop. From now on, we need
2936 those counts only for collapsed loops, and only for the 2nd
2937 till the last collapsed one. Move those one element earlier,
2938 we'll use counts[fd->collapse - 1] for the first source/sink
2939 iteration counter and so on and counts[fd->ordered]
2940 as the array holding the current counter values for
2941 depend(source). */
2942 if (fd->collapse > 1)
2943 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2944 if (broken_loop)
2946 int i;
2947 for (i = fd->collapse; i < fd->ordered; i++)
2949 tree type = TREE_TYPE (fd->loops[i].v);
2950 tree this_cond
2951 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2952 fold_convert (type, fd->loops[i].n1),
2953 fold_convert (type, fd->loops[i].n2));
2954 if (!integer_onep (this_cond))
2955 break;
2957 if (i < fd->ordered)
2959 cont_bb
2960 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2961 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2962 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2963 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2964 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2965 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2966 make_edge (cont_bb, l1_bb, 0);
2967 l2_bb = create_empty_bb (cont_bb);
2968 broken_loop = false;
2971 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2972 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2973 ordered_lastprivate);
2974 if (counts[fd->collapse - 1])
2976 gcc_assert (fd->collapse == 1);
2977 gsi = gsi_last_bb (l0_bb);
2978 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2979 istart0, true);
2980 gsi = gsi_last_bb (cont_bb);
2981 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2982 build_int_cst (fd->iter_type, 1));
2983 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2984 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2985 size_zero_node, NULL_TREE, NULL_TREE);
2986 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2987 t = counts[fd->collapse - 1];
2989 else if (fd->collapse > 1)
2990 t = fd->loop.v;
2991 else
2993 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2994 fd->loops[0].v, fd->loops[0].n1);
2995 t = fold_convert (fd->iter_type, t);
2997 gsi = gsi_last_bb (l0_bb);
2998 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2999 size_zero_node, NULL_TREE, NULL_TREE);
3000 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3001 false, GSI_CONTINUE_LINKING);
3002 expand_omp_build_assign (&gsi, aref, t, true);
3005 if (!broken_loop)
3007 /* Code to control the increment and predicate for the sequential
3008 loop goes in the CONT_BB. */
3009 gsi = gsi_last_nondebug_bb (cont_bb);
3010 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3011 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3012 vmain = gimple_omp_continue_control_use (cont_stmt);
3013 vback = gimple_omp_continue_control_def (cont_stmt);
3015 if (!gimple_omp_for_combined_p (fd->for_stmt))
3017 if (POINTER_TYPE_P (type))
3018 t = fold_build_pointer_plus (vmain, fd->loop.step);
3019 else
3020 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3021 t = force_gimple_operand_gsi (&gsi, t,
3022 DECL_P (vback)
3023 && TREE_ADDRESSABLE (vback),
3024 NULL_TREE, true, GSI_SAME_STMT);
3025 assign_stmt = gimple_build_assign (vback, t);
3026 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3028 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3030 if (fd->collapse > 1)
3031 t = fd->loop.v;
3032 else
3034 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3035 fd->loops[0].v, fd->loops[0].n1);
3036 t = fold_convert (fd->iter_type, t);
3038 tree aref = build4 (ARRAY_REF, fd->iter_type,
3039 counts[fd->ordered], size_zero_node,
3040 NULL_TREE, NULL_TREE);
3041 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3042 true, GSI_SAME_STMT);
3043 expand_omp_build_assign (&gsi, aref, t);
3046 t = build2 (fd->loop.cond_code, boolean_type_node,
3047 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3048 iend);
3049 gcond *cond_stmt = gimple_build_cond_empty (t);
3050 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3053 /* Remove GIMPLE_OMP_CONTINUE. */
3054 gsi_remove (&gsi, true);
3056 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3057 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3059 /* Emit code to get the next parallel iteration in L2_BB. */
3060 gsi = gsi_start_bb (l2_bb);
3062 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3063 build_fold_addr_expr (istart0),
3064 build_fold_addr_expr (iend0));
3065 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3066 false, GSI_CONTINUE_LINKING);
3067 if (TREE_TYPE (t) != boolean_type_node)
3068 t = fold_build2 (NE_EXPR, boolean_type_node,
3069 t, build_int_cst (TREE_TYPE (t), 0));
3070 gcond *cond_stmt = gimple_build_cond_empty (t);
3071 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3074 /* Add the loop cleanup function. */
3075 gsi = gsi_last_nondebug_bb (exit_bb);
3076 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3077 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3078 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3079 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3080 else
3081 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3082 gcall *call_stmt = gimple_build_call (t, 0);
3083 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3084 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3085 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3086 if (fd->ordered)
3088 tree arr = counts[fd->ordered];
3089 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3090 TREE_THIS_VOLATILE (clobber) = 1;
3091 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3092 GSI_SAME_STMT);
3094 gsi_remove (&gsi, true);
3096 /* Connect the new blocks. */
3097 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3098 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3100 if (!broken_loop)
3102 gimple_seq phis;
3104 e = find_edge (cont_bb, l3_bb);
3105 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3107 phis = phi_nodes (l3_bb);
3108 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3110 gimple *phi = gsi_stmt (gsi);
3111 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3112 PHI_ARG_DEF_FROM_EDGE (phi, e));
3114 remove_edge (e);
3116 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3117 e = find_edge (cont_bb, l1_bb);
3118 if (e == NULL)
3120 e = BRANCH_EDGE (cont_bb);
3121 gcc_assert (single_succ (e->dest) == l1_bb);
3123 if (gimple_omp_for_combined_p (fd->for_stmt))
3125 remove_edge (e);
3126 e = NULL;
3128 else if (fd->collapse > 1)
3130 remove_edge (e);
3131 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3133 else
3134 e->flags = EDGE_TRUE_VALUE;
3135 if (e)
3137 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3138 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3140 else
3142 e = find_edge (cont_bb, l2_bb);
3143 e->flags = EDGE_FALLTHRU;
3145 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3147 if (gimple_in_ssa_p (cfun))
3149 /* Add phis to the outer loop that connect to the phis in the inner,
3150 original loop, and move the loop entry value of the inner phi to
3151 the loop entry value of the outer phi. */
3152 gphi_iterator psi;
3153 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3155 source_location locus;
3156 gphi *nphi;
3157 gphi *exit_phi = psi.phi ();
3159 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3160 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3162 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3163 edge latch_to_l1 = find_edge (latch, l1_bb);
3164 gphi *inner_phi
3165 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3167 tree t = gimple_phi_result (exit_phi);
3168 tree new_res = copy_ssa_name (t, NULL);
3169 nphi = create_phi_node (new_res, l0_bb);
3171 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3172 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3173 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3174 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3175 add_phi_arg (nphi, t, entry_to_l0, locus);
3177 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3178 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3180 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3184 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3185 recompute_dominator (CDI_DOMINATORS, l2_bb));
3186 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3187 recompute_dominator (CDI_DOMINATORS, l3_bb));
3188 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3189 recompute_dominator (CDI_DOMINATORS, l0_bb));
3190 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3191 recompute_dominator (CDI_DOMINATORS, l1_bb));
3193 /* We enter expand_omp_for_generic with a loop. This original loop may
3194 have its own loop struct, or it may be part of an outer loop struct
3195 (which may be the fake loop). */
3196 struct loop *outer_loop = entry_bb->loop_father;
3197 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3199 add_bb_to_loop (l2_bb, outer_loop);
3201 /* We've added a new loop around the original loop. Allocate the
3202 corresponding loop struct. */
3203 struct loop *new_loop = alloc_loop ();
3204 new_loop->header = l0_bb;
3205 new_loop->latch = l2_bb;
3206 add_loop (new_loop, outer_loop);
3208 /* Allocate a loop structure for the original loop unless we already
3209 had one. */
3210 if (!orig_loop_has_loop_struct
3211 && !gimple_omp_for_combined_p (fd->for_stmt))
3213 struct loop *orig_loop = alloc_loop ();
3214 orig_loop->header = l1_bb;
3215 /* The loop may have multiple latches. */
3216 add_loop (orig_loop, new_loop);
3221 /* A subroutine of expand_omp_for. Generate code for a parallel
3222 loop with static schedule and no specified chunk size. Given
3223 parameters:
3225 for (V = N1; V cond N2; V += STEP) BODY;
3227 where COND is "<" or ">", we generate pseudocode
3229 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3230 if (cond is <)
3231 adj = STEP - 1;
3232 else
3233 adj = STEP + 1;
3234 if ((__typeof (V)) -1 > 0 && cond is >)
3235 n = -(adj + N2 - N1) / -STEP;
3236 else
3237 n = (adj + N2 - N1) / STEP;
3238 q = n / nthreads;
3239 tt = n % nthreads;
3240 if (threadid < tt) goto L3; else goto L4;
3242 tt = 0;
3243 q = q + 1;
3245 s0 = q * threadid + tt;
3246 e0 = s0 + q;
3247 V = s0 * STEP + N1;
3248 if (s0 >= e0) goto L2; else goto L0;
3250 e = e0 * STEP + N1;
3252 BODY;
3253 V += STEP;
3254 if (V cond e) goto L1;
3258 static void
3259 expand_omp_for_static_nochunk (struct omp_region *region,
3260 struct omp_for_data *fd,
3261 gimple *inner_stmt)
3263 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3264 tree type, itype, vmain, vback;
3265 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3266 basic_block body_bb, cont_bb, collapse_bb = NULL;
3267 basic_block fin_bb;
3268 gimple_stmt_iterator gsi;
3269 edge ep;
3270 bool broken_loop = region->cont == NULL;
3271 tree *counts = NULL;
3272 tree n1, n2, step;
3274 itype = type = TREE_TYPE (fd->loop.v);
3275 if (POINTER_TYPE_P (type))
3276 itype = signed_type_for (type);
3278 entry_bb = region->entry;
3279 cont_bb = region->cont;
3280 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3281 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3282 gcc_assert (broken_loop
3283 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3284 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3285 body_bb = single_succ (seq_start_bb);
3286 if (!broken_loop)
3288 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3289 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3290 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3292 exit_bb = region->exit;
3294 /* Iteration space partitioning goes in ENTRY_BB. */
3295 gsi = gsi_last_nondebug_bb (entry_bb);
3296 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3298 if (fd->collapse > 1)
3300 int first_zero_iter = -1, dummy = -1;
3301 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3303 counts = XALLOCAVEC (tree, fd->collapse);
3304 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3305 fin_bb, first_zero_iter,
3306 dummy_bb, dummy, l2_dom_bb);
3307 t = NULL_TREE;
3309 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3310 t = integer_one_node;
3311 else
3312 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3313 fold_convert (type, fd->loop.n1),
3314 fold_convert (type, fd->loop.n2));
3315 if (fd->collapse == 1
3316 && TYPE_UNSIGNED (type)
3317 && (t == NULL_TREE || !integer_onep (t)))
3319 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3320 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3321 true, GSI_SAME_STMT);
3322 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3323 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3324 true, GSI_SAME_STMT);
3325 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3326 NULL_TREE, NULL_TREE);
3327 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3328 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3329 expand_omp_regimplify_p, NULL, NULL)
3330 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3331 expand_omp_regimplify_p, NULL, NULL))
3333 gsi = gsi_for_stmt (cond_stmt);
3334 gimple_regimplify_operands (cond_stmt, &gsi);
3336 ep = split_block (entry_bb, cond_stmt);
3337 ep->flags = EDGE_TRUE_VALUE;
3338 entry_bb = ep->dest;
3339 ep->probability = profile_probability::very_likely ();
3340 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3341 ep->probability = profile_probability::very_unlikely ();
3342 if (gimple_in_ssa_p (cfun))
3344 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3345 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3346 !gsi_end_p (gpi); gsi_next (&gpi))
3348 gphi *phi = gpi.phi ();
3349 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3350 ep, UNKNOWN_LOCATION);
3353 gsi = gsi_last_bb (entry_bb);
3356 switch (gimple_omp_for_kind (fd->for_stmt))
3358 case GF_OMP_FOR_KIND_FOR:
3359 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3360 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3361 break;
3362 case GF_OMP_FOR_KIND_DISTRIBUTE:
3363 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3364 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3365 break;
3366 default:
3367 gcc_unreachable ();
3369 nthreads = build_call_expr (nthreads, 0);
3370 nthreads = fold_convert (itype, nthreads);
3371 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3372 true, GSI_SAME_STMT);
3373 threadid = build_call_expr (threadid, 0);
3374 threadid = fold_convert (itype, threadid);
3375 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3376 true, GSI_SAME_STMT);
3378 n1 = fd->loop.n1;
3379 n2 = fd->loop.n2;
3380 step = fd->loop.step;
3381 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3383 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3384 OMP_CLAUSE__LOOPTEMP_);
3385 gcc_assert (innerc);
3386 n1 = OMP_CLAUSE_DECL (innerc);
3387 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3388 OMP_CLAUSE__LOOPTEMP_);
3389 gcc_assert (innerc);
3390 n2 = OMP_CLAUSE_DECL (innerc);
3392 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3393 true, NULL_TREE, true, GSI_SAME_STMT);
3394 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3395 true, NULL_TREE, true, GSI_SAME_STMT);
3396 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3397 true, NULL_TREE, true, GSI_SAME_STMT);
3399 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3400 t = fold_build2 (PLUS_EXPR, itype, step, t);
3401 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3402 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3403 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3404 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3405 fold_build1 (NEGATE_EXPR, itype, t),
3406 fold_build1 (NEGATE_EXPR, itype, step));
3407 else
3408 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3409 t = fold_convert (itype, t);
3410 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3412 q = create_tmp_reg (itype, "q");
3413 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3414 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3415 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3417 tt = create_tmp_reg (itype, "tt");
3418 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3419 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3420 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3422 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3423 gcond *cond_stmt = gimple_build_cond_empty (t);
3424 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3426 second_bb = split_block (entry_bb, cond_stmt)->dest;
3427 gsi = gsi_last_nondebug_bb (second_bb);
3428 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3430 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3431 GSI_SAME_STMT);
3432 gassign *assign_stmt
3433 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3434 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3436 third_bb = split_block (second_bb, assign_stmt)->dest;
3437 gsi = gsi_last_nondebug_bb (third_bb);
3438 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3440 t = build2 (MULT_EXPR, itype, q, threadid);
3441 t = build2 (PLUS_EXPR, itype, t, tt);
3442 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3444 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3445 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3447 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3448 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3450 /* Remove the GIMPLE_OMP_FOR statement. */
3451 gsi_remove (&gsi, true);
3453 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3454 gsi = gsi_start_bb (seq_start_bb);
3456 tree startvar = fd->loop.v;
3457 tree endvar = NULL_TREE;
3459 if (gimple_omp_for_combined_p (fd->for_stmt))
3461 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3462 ? gimple_omp_parallel_clauses (inner_stmt)
3463 : gimple_omp_for_clauses (inner_stmt);
3464 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3465 gcc_assert (innerc);
3466 startvar = OMP_CLAUSE_DECL (innerc);
3467 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3468 OMP_CLAUSE__LOOPTEMP_);
3469 gcc_assert (innerc);
3470 endvar = OMP_CLAUSE_DECL (innerc);
3471 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3472 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3474 int i;
3475 for (i = 1; i < fd->collapse; i++)
3477 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3478 OMP_CLAUSE__LOOPTEMP_);
3479 gcc_assert (innerc);
3481 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3482 OMP_CLAUSE__LOOPTEMP_);
3483 if (innerc)
3485 /* If needed (distribute parallel for with lastprivate),
3486 propagate down the total number of iterations. */
3487 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3488 fd->loop.n2);
3489 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3490 GSI_CONTINUE_LINKING);
3491 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3492 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3496 t = fold_convert (itype, s0);
3497 t = fold_build2 (MULT_EXPR, itype, t, step);
3498 if (POINTER_TYPE_P (type))
3499 t = fold_build_pointer_plus (n1, t);
3500 else
3501 t = fold_build2 (PLUS_EXPR, type, t, n1);
3502 t = fold_convert (TREE_TYPE (startvar), t);
3503 t = force_gimple_operand_gsi (&gsi, t,
3504 DECL_P (startvar)
3505 && TREE_ADDRESSABLE (startvar),
3506 NULL_TREE, false, GSI_CONTINUE_LINKING);
3507 assign_stmt = gimple_build_assign (startvar, t);
3508 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3510 t = fold_convert (itype, e0);
3511 t = fold_build2 (MULT_EXPR, itype, t, step);
3512 if (POINTER_TYPE_P (type))
3513 t = fold_build_pointer_plus (n1, t);
3514 else
3515 t = fold_build2 (PLUS_EXPR, type, t, n1);
3516 t = fold_convert (TREE_TYPE (startvar), t);
3517 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3518 false, GSI_CONTINUE_LINKING);
3519 if (endvar)
3521 assign_stmt = gimple_build_assign (endvar, e);
3522 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3523 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3524 assign_stmt = gimple_build_assign (fd->loop.v, e);
3525 else
3526 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3527 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3529 /* Handle linear clause adjustments. */
3530 tree itercnt = NULL_TREE;
3531 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3532 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3533 c; c = OMP_CLAUSE_CHAIN (c))
3534 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3535 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3537 tree d = OMP_CLAUSE_DECL (c);
3538 bool is_ref = omp_is_reference (d);
3539 tree t = d, a, dest;
3540 if (is_ref)
3541 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3542 if (itercnt == NULL_TREE)
3544 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3546 itercnt = fold_build2 (MINUS_EXPR, itype,
3547 fold_convert (itype, n1),
3548 fold_convert (itype, fd->loop.n1));
3549 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3550 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3551 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3552 NULL_TREE, false,
3553 GSI_CONTINUE_LINKING);
3555 else
3556 itercnt = s0;
3558 tree type = TREE_TYPE (t);
3559 if (POINTER_TYPE_P (type))
3560 type = sizetype;
3561 a = fold_build2 (MULT_EXPR, type,
3562 fold_convert (type, itercnt),
3563 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3564 dest = unshare_expr (t);
3565 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3566 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3567 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3568 false, GSI_CONTINUE_LINKING);
3569 assign_stmt = gimple_build_assign (dest, t);
3570 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3572 if (fd->collapse > 1)
3573 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3575 if (!broken_loop)
3577 /* The code controlling the sequential loop replaces the
3578 GIMPLE_OMP_CONTINUE. */
3579 gsi = gsi_last_nondebug_bb (cont_bb);
3580 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3581 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3582 vmain = gimple_omp_continue_control_use (cont_stmt);
3583 vback = gimple_omp_continue_control_def (cont_stmt);
3585 if (!gimple_omp_for_combined_p (fd->for_stmt))
3587 if (POINTER_TYPE_P (type))
3588 t = fold_build_pointer_plus (vmain, step);
3589 else
3590 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3591 t = force_gimple_operand_gsi (&gsi, t,
3592 DECL_P (vback)
3593 && TREE_ADDRESSABLE (vback),
3594 NULL_TREE, true, GSI_SAME_STMT);
3595 assign_stmt = gimple_build_assign (vback, t);
3596 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3598 t = build2 (fd->loop.cond_code, boolean_type_node,
3599 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3600 ? t : vback, e);
3601 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3604 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3605 gsi_remove (&gsi, true);
3607 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3608 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3611 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3612 gsi = gsi_last_nondebug_bb (exit_bb);
3613 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3615 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3616 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3618 gsi_remove (&gsi, true);
3620 /* Connect all the blocks. */
3621 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3622 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3623 ep = find_edge (entry_bb, second_bb);
3624 ep->flags = EDGE_TRUE_VALUE;
3625 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3626 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3627 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3629 if (!broken_loop)
3631 ep = find_edge (cont_bb, body_bb);
3632 if (ep == NULL)
3634 ep = BRANCH_EDGE (cont_bb);
3635 gcc_assert (single_succ (ep->dest) == body_bb);
3637 if (gimple_omp_for_combined_p (fd->for_stmt))
3639 remove_edge (ep);
3640 ep = NULL;
3642 else if (fd->collapse > 1)
3644 remove_edge (ep);
3645 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3647 else
3648 ep->flags = EDGE_TRUE_VALUE;
3649 find_edge (cont_bb, fin_bb)->flags
3650 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3653 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3654 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3655 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3657 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3658 recompute_dominator (CDI_DOMINATORS, body_bb));
3659 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3660 recompute_dominator (CDI_DOMINATORS, fin_bb));
3662 struct loop *loop = body_bb->loop_father;
3663 if (loop != entry_bb->loop_father)
3665 gcc_assert (broken_loop || loop->header == body_bb);
3666 gcc_assert (broken_loop
3667 || loop->latch == region->cont
3668 || single_pred (loop->latch) == region->cont);
3669 return;
3672 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3674 loop = alloc_loop ();
3675 loop->header = body_bb;
3676 if (collapse_bb == NULL)
3677 loop->latch = cont_bb;
3678 add_loop (loop, body_bb->loop_father);
3682 /* Return phi in E->DEST with ARG on edge E. */
3684 static gphi *
3685 find_phi_with_arg_on_edge (tree arg, edge e)
3687 basic_block bb = e->dest;
3689 for (gphi_iterator gpi = gsi_start_phis (bb);
3690 !gsi_end_p (gpi);
3691 gsi_next (&gpi))
3693 gphi *phi = gpi.phi ();
3694 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3695 return phi;
3698 return NULL;
3701 /* A subroutine of expand_omp_for. Generate code for a parallel
3702 loop with static schedule and a specified chunk size. Given
3703 parameters:
3705 for (V = N1; V cond N2; V += STEP) BODY;
3707 where COND is "<" or ">", we generate pseudocode
3709 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3710 if (cond is <)
3711 adj = STEP - 1;
3712 else
3713 adj = STEP + 1;
3714 if ((__typeof (V)) -1 > 0 && cond is >)
3715 n = -(adj + N2 - N1) / -STEP;
3716 else
3717 n = (adj + N2 - N1) / STEP;
3718 trip = 0;
3719 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3720 here so that V is defined
3721 if the loop is not entered
3723 s0 = (trip * nthreads + threadid) * CHUNK;
3724 e0 = min (s0 + CHUNK, n);
3725 if (s0 < n) goto L1; else goto L4;
3727 V = s0 * STEP + N1;
3728 e = e0 * STEP + N1;
3730 BODY;
3731 V += STEP;
3732 if (V cond e) goto L2; else goto L3;
3734 trip += 1;
3735 goto L0;
3739 static void
3740 expand_omp_for_static_chunk (struct omp_region *region,
3741 struct omp_for_data *fd, gimple *inner_stmt)
3743 tree n, s0, e0, e, t;
3744 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3745 tree type, itype, vmain, vback, vextra;
3746 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3747 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3748 gimple_stmt_iterator gsi;
3749 edge se;
3750 bool broken_loop = region->cont == NULL;
3751 tree *counts = NULL;
3752 tree n1, n2, step;
3754 itype = type = TREE_TYPE (fd->loop.v);
3755 if (POINTER_TYPE_P (type))
3756 itype = signed_type_for (type);
3758 entry_bb = region->entry;
3759 se = split_block (entry_bb, last_stmt (entry_bb));
3760 entry_bb = se->src;
3761 iter_part_bb = se->dest;
3762 cont_bb = region->cont;
3763 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3764 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3765 gcc_assert (broken_loop
3766 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3767 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3768 body_bb = single_succ (seq_start_bb);
3769 if (!broken_loop)
3771 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3772 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3773 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3774 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3776 exit_bb = region->exit;
3778 /* Trip and adjustment setup goes in ENTRY_BB. */
3779 gsi = gsi_last_nondebug_bb (entry_bb);
3780 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3782 if (fd->collapse > 1)
3784 int first_zero_iter = -1, dummy = -1;
3785 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3787 counts = XALLOCAVEC (tree, fd->collapse);
3788 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3789 fin_bb, first_zero_iter,
3790 dummy_bb, dummy, l2_dom_bb);
3791 t = NULL_TREE;
3793 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3794 t = integer_one_node;
3795 else
3796 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3797 fold_convert (type, fd->loop.n1),
3798 fold_convert (type, fd->loop.n2));
3799 if (fd->collapse == 1
3800 && TYPE_UNSIGNED (type)
3801 && (t == NULL_TREE || !integer_onep (t)))
3803 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3804 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3805 true, GSI_SAME_STMT);
3806 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3807 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3808 true, GSI_SAME_STMT);
3809 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3810 NULL_TREE, NULL_TREE);
3811 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3812 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3813 expand_omp_regimplify_p, NULL, NULL)
3814 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3815 expand_omp_regimplify_p, NULL, NULL))
3817 gsi = gsi_for_stmt (cond_stmt);
3818 gimple_regimplify_operands (cond_stmt, &gsi);
3820 se = split_block (entry_bb, cond_stmt);
3821 se->flags = EDGE_TRUE_VALUE;
3822 entry_bb = se->dest;
3823 se->probability = profile_probability::very_likely ();
3824 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3825 se->probability = profile_probability::very_unlikely ();
3826 if (gimple_in_ssa_p (cfun))
3828 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3829 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3830 !gsi_end_p (gpi); gsi_next (&gpi))
3832 gphi *phi = gpi.phi ();
3833 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3834 se, UNKNOWN_LOCATION);
3837 gsi = gsi_last_bb (entry_bb);
3840 switch (gimple_omp_for_kind (fd->for_stmt))
3842 case GF_OMP_FOR_KIND_FOR:
3843 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3844 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3845 break;
3846 case GF_OMP_FOR_KIND_DISTRIBUTE:
3847 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3848 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3849 break;
3850 default:
3851 gcc_unreachable ();
3853 nthreads = build_call_expr (nthreads, 0);
3854 nthreads = fold_convert (itype, nthreads);
3855 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3856 true, GSI_SAME_STMT);
3857 threadid = build_call_expr (threadid, 0);
3858 threadid = fold_convert (itype, threadid);
3859 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3860 true, GSI_SAME_STMT);
3862 n1 = fd->loop.n1;
3863 n2 = fd->loop.n2;
3864 step = fd->loop.step;
3865 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3867 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3868 OMP_CLAUSE__LOOPTEMP_);
3869 gcc_assert (innerc);
3870 n1 = OMP_CLAUSE_DECL (innerc);
3871 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3872 OMP_CLAUSE__LOOPTEMP_);
3873 gcc_assert (innerc);
3874 n2 = OMP_CLAUSE_DECL (innerc);
3876 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3877 true, NULL_TREE, true, GSI_SAME_STMT);
3878 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3879 true, NULL_TREE, true, GSI_SAME_STMT);
3880 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3881 true, NULL_TREE, true, GSI_SAME_STMT);
3882 tree chunk_size = fold_convert (itype, fd->chunk_size);
3883 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3884 chunk_size
3885 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3886 GSI_SAME_STMT);
3888 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3889 t = fold_build2 (PLUS_EXPR, itype, step, t);
3890 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3891 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3892 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3893 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3894 fold_build1 (NEGATE_EXPR, itype, t),
3895 fold_build1 (NEGATE_EXPR, itype, step));
3896 else
3897 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3898 t = fold_convert (itype, t);
3899 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3900 true, GSI_SAME_STMT);
3902 trip_var = create_tmp_reg (itype, ".trip");
3903 if (gimple_in_ssa_p (cfun))
3905 trip_init = make_ssa_name (trip_var);
3906 trip_main = make_ssa_name (trip_var);
3907 trip_back = make_ssa_name (trip_var);
3909 else
3911 trip_init = trip_var;
3912 trip_main = trip_var;
3913 trip_back = trip_var;
3916 gassign *assign_stmt
3917 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3918 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3920 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3921 t = fold_build2 (MULT_EXPR, itype, t, step);
3922 if (POINTER_TYPE_P (type))
3923 t = fold_build_pointer_plus (n1, t);
3924 else
3925 t = fold_build2 (PLUS_EXPR, type, t, n1);
3926 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3927 true, GSI_SAME_STMT);
3929 /* Remove the GIMPLE_OMP_FOR. */
3930 gsi_remove (&gsi, true);
3932 gimple_stmt_iterator gsif = gsi;
3934 /* Iteration space partitioning goes in ITER_PART_BB. */
3935 gsi = gsi_last_bb (iter_part_bb);
3937 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3938 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3939 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3940 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3941 false, GSI_CONTINUE_LINKING);
3943 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3944 t = fold_build2 (MIN_EXPR, itype, t, n);
3945 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3946 false, GSI_CONTINUE_LINKING);
3948 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3949 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3951 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3952 gsi = gsi_start_bb (seq_start_bb);
3954 tree startvar = fd->loop.v;
3955 tree endvar = NULL_TREE;
3957 if (gimple_omp_for_combined_p (fd->for_stmt))
3959 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3960 ? gimple_omp_parallel_clauses (inner_stmt)
3961 : gimple_omp_for_clauses (inner_stmt);
3962 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3963 gcc_assert (innerc);
3964 startvar = OMP_CLAUSE_DECL (innerc);
3965 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3966 OMP_CLAUSE__LOOPTEMP_);
3967 gcc_assert (innerc);
3968 endvar = OMP_CLAUSE_DECL (innerc);
3969 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3970 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3972 int i;
3973 for (i = 1; i < fd->collapse; i++)
3975 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3976 OMP_CLAUSE__LOOPTEMP_);
3977 gcc_assert (innerc);
3979 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3980 OMP_CLAUSE__LOOPTEMP_);
3981 if (innerc)
3983 /* If needed (distribute parallel for with lastprivate),
3984 propagate down the total number of iterations. */
3985 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3986 fd->loop.n2);
3987 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3988 GSI_CONTINUE_LINKING);
3989 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3990 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3995 t = fold_convert (itype, s0);
3996 t = fold_build2 (MULT_EXPR, itype, t, step);
3997 if (POINTER_TYPE_P (type))
3998 t = fold_build_pointer_plus (n1, t);
3999 else
4000 t = fold_build2 (PLUS_EXPR, type, t, n1);
4001 t = fold_convert (TREE_TYPE (startvar), t);
4002 t = force_gimple_operand_gsi (&gsi, t,
4003 DECL_P (startvar)
4004 && TREE_ADDRESSABLE (startvar),
4005 NULL_TREE, false, GSI_CONTINUE_LINKING);
4006 assign_stmt = gimple_build_assign (startvar, t);
4007 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4009 t = fold_convert (itype, e0);
4010 t = fold_build2 (MULT_EXPR, itype, t, step);
4011 if (POINTER_TYPE_P (type))
4012 t = fold_build_pointer_plus (n1, t);
4013 else
4014 t = fold_build2 (PLUS_EXPR, type, t, n1);
4015 t = fold_convert (TREE_TYPE (startvar), t);
4016 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4017 false, GSI_CONTINUE_LINKING);
4018 if (endvar)
4020 assign_stmt = gimple_build_assign (endvar, e);
4021 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4022 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4023 assign_stmt = gimple_build_assign (fd->loop.v, e);
4024 else
4025 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4026 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4028 /* Handle linear clause adjustments. */
4029 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4030 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4031 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4032 c; c = OMP_CLAUSE_CHAIN (c))
4033 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4034 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4036 tree d = OMP_CLAUSE_DECL (c);
4037 bool is_ref = omp_is_reference (d);
4038 tree t = d, a, dest;
4039 if (is_ref)
4040 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4041 tree type = TREE_TYPE (t);
4042 if (POINTER_TYPE_P (type))
4043 type = sizetype;
4044 dest = unshare_expr (t);
4045 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4046 expand_omp_build_assign (&gsif, v, t);
4047 if (itercnt == NULL_TREE)
4049 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4051 itercntbias
4052 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4053 fold_convert (itype, fd->loop.n1));
4054 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4055 itercntbias, step);
4056 itercntbias
4057 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4058 NULL_TREE, true,
4059 GSI_SAME_STMT);
4060 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4061 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4062 NULL_TREE, false,
4063 GSI_CONTINUE_LINKING);
4065 else
4066 itercnt = s0;
4068 a = fold_build2 (MULT_EXPR, type,
4069 fold_convert (type, itercnt),
4070 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4071 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4072 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4073 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4074 false, GSI_CONTINUE_LINKING);
4075 assign_stmt = gimple_build_assign (dest, t);
4076 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4078 if (fd->collapse > 1)
4079 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4081 if (!broken_loop)
4083 /* The code controlling the sequential loop goes in CONT_BB,
4084 replacing the GIMPLE_OMP_CONTINUE. */
4085 gsi = gsi_last_nondebug_bb (cont_bb);
4086 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4087 vmain = gimple_omp_continue_control_use (cont_stmt);
4088 vback = gimple_omp_continue_control_def (cont_stmt);
4090 if (!gimple_omp_for_combined_p (fd->for_stmt))
4092 if (POINTER_TYPE_P (type))
4093 t = fold_build_pointer_plus (vmain, step);
4094 else
4095 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4096 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4097 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4098 true, GSI_SAME_STMT);
4099 assign_stmt = gimple_build_assign (vback, t);
4100 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4102 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4103 t = build2 (EQ_EXPR, boolean_type_node,
4104 build_int_cst (itype, 0),
4105 build_int_cst (itype, 1));
4106 else
4107 t = build2 (fd->loop.cond_code, boolean_type_node,
4108 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4109 ? t : vback, e);
4110 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4113 /* Remove GIMPLE_OMP_CONTINUE. */
4114 gsi_remove (&gsi, true);
4116 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4117 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4119 /* Trip update code goes into TRIP_UPDATE_BB. */
4120 gsi = gsi_start_bb (trip_update_bb);
4122 t = build_int_cst (itype, 1);
4123 t = build2 (PLUS_EXPR, itype, trip_main, t);
4124 assign_stmt = gimple_build_assign (trip_back, t);
4125 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4128 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4129 gsi = gsi_last_nondebug_bb (exit_bb);
4130 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4132 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4133 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4135 gsi_remove (&gsi, true);
4137 /* Connect the new blocks. */
4138 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4139 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4141 if (!broken_loop)
4143 se = find_edge (cont_bb, body_bb);
4144 if (se == NULL)
4146 se = BRANCH_EDGE (cont_bb);
4147 gcc_assert (single_succ (se->dest) == body_bb);
4149 if (gimple_omp_for_combined_p (fd->for_stmt))
4151 remove_edge (se);
4152 se = NULL;
4154 else if (fd->collapse > 1)
4156 remove_edge (se);
4157 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4159 else
4160 se->flags = EDGE_TRUE_VALUE;
4161 find_edge (cont_bb, trip_update_bb)->flags
4162 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4164 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4165 iter_part_bb);
4168 if (gimple_in_ssa_p (cfun))
4170 gphi_iterator psi;
4171 gphi *phi;
4172 edge re, ene;
4173 edge_var_map *vm;
4174 size_t i;
4176 gcc_assert (fd->collapse == 1 && !broken_loop);
4178 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4179 remove arguments of the phi nodes in fin_bb. We need to create
4180 appropriate phi nodes in iter_part_bb instead. */
4181 se = find_edge (iter_part_bb, fin_bb);
4182 re = single_succ_edge (trip_update_bb);
4183 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4184 ene = single_succ_edge (entry_bb);
4186 psi = gsi_start_phis (fin_bb);
4187 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4188 gsi_next (&psi), ++i)
4190 gphi *nphi;
4191 source_location locus;
4193 phi = psi.phi ();
4194 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4195 redirect_edge_var_map_def (vm), 0))
4196 continue;
4198 t = gimple_phi_result (phi);
4199 gcc_assert (t == redirect_edge_var_map_result (vm));
4201 if (!single_pred_p (fin_bb))
4202 t = copy_ssa_name (t, phi);
4204 nphi = create_phi_node (t, iter_part_bb);
4206 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4207 locus = gimple_phi_arg_location_from_edge (phi, se);
4209 /* A special case -- fd->loop.v is not yet computed in
4210 iter_part_bb, we need to use vextra instead. */
4211 if (t == fd->loop.v)
4212 t = vextra;
4213 add_phi_arg (nphi, t, ene, locus);
4214 locus = redirect_edge_var_map_location (vm);
4215 tree back_arg = redirect_edge_var_map_def (vm);
4216 add_phi_arg (nphi, back_arg, re, locus);
4217 edge ce = find_edge (cont_bb, body_bb);
4218 if (ce == NULL)
4220 ce = BRANCH_EDGE (cont_bb);
4221 gcc_assert (single_succ (ce->dest) == body_bb);
4222 ce = single_succ_edge (ce->dest);
4224 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4225 gcc_assert (inner_loop_phi != NULL);
4226 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4227 find_edge (seq_start_bb, body_bb), locus);
4229 if (!single_pred_p (fin_bb))
4230 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4232 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4233 redirect_edge_var_map_clear (re);
4234 if (single_pred_p (fin_bb))
4235 while (1)
4237 psi = gsi_start_phis (fin_bb);
4238 if (gsi_end_p (psi))
4239 break;
4240 remove_phi_node (&psi, false);
4243 /* Make phi node for trip. */
4244 phi = create_phi_node (trip_main, iter_part_bb);
4245 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4246 UNKNOWN_LOCATION);
4247 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4248 UNKNOWN_LOCATION);
4251 if (!broken_loop)
4252 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4253 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4254 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4255 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4256 recompute_dominator (CDI_DOMINATORS, fin_bb));
4257 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4258 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4259 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4260 recompute_dominator (CDI_DOMINATORS, body_bb));
4262 if (!broken_loop)
4264 struct loop *loop = body_bb->loop_father;
4265 struct loop *trip_loop = alloc_loop ();
4266 trip_loop->header = iter_part_bb;
4267 trip_loop->latch = trip_update_bb;
4268 add_loop (trip_loop, iter_part_bb->loop_father);
4270 if (loop != entry_bb->loop_father)
4272 gcc_assert (loop->header == body_bb);
4273 gcc_assert (loop->latch == region->cont
4274 || single_pred (loop->latch) == region->cont);
4275 trip_loop->inner = loop;
4276 return;
4279 if (!gimple_omp_for_combined_p (fd->for_stmt))
4281 loop = alloc_loop ();
4282 loop->header = body_bb;
4283 if (collapse_bb == NULL)
4284 loop->latch = cont_bb;
4285 add_loop (loop, trip_loop);
4290 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4291 loop. Given parameters:
4293 for (V = N1; V cond N2; V += STEP) BODY;
4295 where COND is "<" or ">", we generate pseudocode
4297 V = N1;
4298 goto L1;
4300 BODY;
4301 V += STEP;
4303 if (V cond N2) goto L0; else goto L2;
4306 For collapsed loops, given parameters:
4307 collapse(3)
4308 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4309 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4310 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4311 BODY;
4313 we generate pseudocode
4315 if (cond3 is <)
4316 adj = STEP3 - 1;
4317 else
4318 adj = STEP3 + 1;
4319 count3 = (adj + N32 - N31) / STEP3;
4320 if (cond2 is <)
4321 adj = STEP2 - 1;
4322 else
4323 adj = STEP2 + 1;
4324 count2 = (adj + N22 - N21) / STEP2;
4325 if (cond1 is <)
4326 adj = STEP1 - 1;
4327 else
4328 adj = STEP1 + 1;
4329 count1 = (adj + N12 - N11) / STEP1;
4330 count = count1 * count2 * count3;
4331 V = 0;
4332 V1 = N11;
4333 V2 = N21;
4334 V3 = N31;
4335 goto L1;
4337 BODY;
4338 V += 1;
4339 V3 += STEP3;
4340 V2 += (V3 cond3 N32) ? 0 : STEP2;
4341 V3 = (V3 cond3 N32) ? V3 : N31;
4342 V1 += (V2 cond2 N22) ? 0 : STEP1;
4343 V2 = (V2 cond2 N22) ? V2 : N21;
4345 if (V < count) goto L0; else goto L2;
4350 static void
4351 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4353 tree type, t;
4354 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4355 gimple_stmt_iterator gsi;
4356 gimple *stmt;
4357 gcond *cond_stmt;
4358 bool broken_loop = region->cont == NULL;
4359 edge e, ne;
4360 tree *counts = NULL;
4361 int i;
4362 int safelen_int = INT_MAX;
4363 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4364 OMP_CLAUSE_SAFELEN);
4365 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4366 OMP_CLAUSE__SIMDUID_);
4367 tree n1, n2;
4369 if (safelen)
4371 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4372 if (TREE_CODE (safelen) != INTEGER_CST)
4373 safelen_int = 0;
4374 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4375 safelen_int = tree_to_uhwi (safelen);
4376 if (safelen_int == 1)
4377 safelen_int = 0;
4379 type = TREE_TYPE (fd->loop.v);
4380 entry_bb = region->entry;
4381 cont_bb = region->cont;
4382 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4383 gcc_assert (broken_loop
4384 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4385 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4386 if (!broken_loop)
4388 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4389 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4390 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4391 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4393 else
4395 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4396 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4397 l2_bb = single_succ (l1_bb);
4399 exit_bb = region->exit;
4400 l2_dom_bb = NULL;
4402 gsi = gsi_last_nondebug_bb (entry_bb);
4404 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4405 /* Not needed in SSA form right now. */
4406 gcc_assert (!gimple_in_ssa_p (cfun));
4407 if (fd->collapse > 1)
4409 int first_zero_iter = -1, dummy = -1;
4410 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4412 counts = XALLOCAVEC (tree, fd->collapse);
4413 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4414 zero_iter_bb, first_zero_iter,
4415 dummy_bb, dummy, l2_dom_bb);
4417 if (l2_dom_bb == NULL)
4418 l2_dom_bb = l1_bb;
4420 n1 = fd->loop.n1;
4421 n2 = fd->loop.n2;
4422 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4424 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4425 OMP_CLAUSE__LOOPTEMP_);
4426 gcc_assert (innerc);
4427 n1 = OMP_CLAUSE_DECL (innerc);
4428 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4429 OMP_CLAUSE__LOOPTEMP_);
4430 gcc_assert (innerc);
4431 n2 = OMP_CLAUSE_DECL (innerc);
4433 tree step = fd->loop.step;
4435 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4436 OMP_CLAUSE__SIMT_);
4437 if (is_simt)
4439 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4440 is_simt = safelen_int > 1;
4442 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4443 if (is_simt)
4445 simt_lane = create_tmp_var (unsigned_type_node);
4446 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4447 gimple_call_set_lhs (g, simt_lane);
4448 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4449 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4450 fold_convert (TREE_TYPE (step), simt_lane));
4451 n1 = fold_convert (type, n1);
4452 if (POINTER_TYPE_P (type))
4453 n1 = fold_build_pointer_plus (n1, offset);
4454 else
4455 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4457 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4458 if (fd->collapse > 1)
4459 simt_maxlane = build_one_cst (unsigned_type_node);
4460 else if (safelen_int < omp_max_simt_vf ())
4461 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4462 tree vf
4463 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4464 unsigned_type_node, 0);
4465 if (simt_maxlane)
4466 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4467 vf = fold_convert (TREE_TYPE (step), vf);
4468 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4471 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4472 if (fd->collapse > 1)
4474 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4476 gsi_prev (&gsi);
4477 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4478 gsi_next (&gsi);
4480 else
4481 for (i = 0; i < fd->collapse; i++)
4483 tree itype = TREE_TYPE (fd->loops[i].v);
4484 if (POINTER_TYPE_P (itype))
4485 itype = signed_type_for (itype);
4486 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4487 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4491 /* Remove the GIMPLE_OMP_FOR statement. */
4492 gsi_remove (&gsi, true);
4494 if (!broken_loop)
4496 /* Code to control the increment goes in the CONT_BB. */
4497 gsi = gsi_last_nondebug_bb (cont_bb);
4498 stmt = gsi_stmt (gsi);
4499 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4501 if (POINTER_TYPE_P (type))
4502 t = fold_build_pointer_plus (fd->loop.v, step);
4503 else
4504 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4505 expand_omp_build_assign (&gsi, fd->loop.v, t);
4507 if (fd->collapse > 1)
4509 i = fd->collapse - 1;
4510 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4512 t = fold_convert (sizetype, fd->loops[i].step);
4513 t = fold_build_pointer_plus (fd->loops[i].v, t);
4515 else
4517 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4518 fd->loops[i].step);
4519 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4520 fd->loops[i].v, t);
4522 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4524 for (i = fd->collapse - 1; i > 0; i--)
4526 tree itype = TREE_TYPE (fd->loops[i].v);
4527 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4528 if (POINTER_TYPE_P (itype2))
4529 itype2 = signed_type_for (itype2);
4530 t = fold_convert (itype2, fd->loops[i - 1].step);
4531 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4532 GSI_SAME_STMT);
4533 t = build3 (COND_EXPR, itype2,
4534 build2 (fd->loops[i].cond_code, boolean_type_node,
4535 fd->loops[i].v,
4536 fold_convert (itype, fd->loops[i].n2)),
4537 build_int_cst (itype2, 0), t);
4538 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4539 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4540 else
4541 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4542 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4544 t = fold_convert (itype, fd->loops[i].n1);
4545 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4546 GSI_SAME_STMT);
4547 t = build3 (COND_EXPR, itype,
4548 build2 (fd->loops[i].cond_code, boolean_type_node,
4549 fd->loops[i].v,
4550 fold_convert (itype, fd->loops[i].n2)),
4551 fd->loops[i].v, t);
4552 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4556 /* Remove GIMPLE_OMP_CONTINUE. */
4557 gsi_remove (&gsi, true);
4560 /* Emit the condition in L1_BB. */
4561 gsi = gsi_start_bb (l1_bb);
4563 t = fold_convert (type, n2);
4564 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4565 false, GSI_CONTINUE_LINKING);
4566 tree v = fd->loop.v;
4567 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4568 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4569 false, GSI_CONTINUE_LINKING);
4570 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4571 cond_stmt = gimple_build_cond_empty (t);
4572 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4573 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4574 NULL, NULL)
4575 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4576 NULL, NULL))
4578 gsi = gsi_for_stmt (cond_stmt);
4579 gimple_regimplify_operands (cond_stmt, &gsi);
4582 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4583 if (is_simt)
4585 gsi = gsi_start_bb (l2_bb);
4586 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4587 if (POINTER_TYPE_P (type))
4588 t = fold_build_pointer_plus (fd->loop.v, step);
4589 else
4590 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4591 expand_omp_build_assign (&gsi, fd->loop.v, t);
4594 /* Remove GIMPLE_OMP_RETURN. */
4595 gsi = gsi_last_nondebug_bb (exit_bb);
4596 gsi_remove (&gsi, true);
4598 /* Connect the new blocks. */
4599 remove_edge (FALLTHRU_EDGE (entry_bb));
4601 if (!broken_loop)
4603 remove_edge (BRANCH_EDGE (entry_bb));
4604 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4606 e = BRANCH_EDGE (l1_bb);
4607 ne = FALLTHRU_EDGE (l1_bb);
4608 e->flags = EDGE_TRUE_VALUE;
4610 else
4612 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4614 ne = single_succ_edge (l1_bb);
4615 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4618 ne->flags = EDGE_FALSE_VALUE;
4619 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4620 ne->probability = e->probability.invert ();
4622 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4623 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4625 if (simt_maxlane)
4627 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4628 NULL_TREE, NULL_TREE);
4629 gsi = gsi_last_bb (entry_bb);
4630 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4631 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4632 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4633 FALLTHRU_EDGE (entry_bb)->probability
4634 = profile_probability::guessed_always ().apply_scale (7, 8);
4635 BRANCH_EDGE (entry_bb)->probability
4636 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4637 l2_dom_bb = entry_bb;
4639 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4641 if (!broken_loop)
4643 struct loop *loop = alloc_loop ();
4644 loop->header = l1_bb;
4645 loop->latch = cont_bb;
4646 add_loop (loop, l1_bb->loop_father);
4647 loop->safelen = safelen_int;
4648 if (simduid)
4650 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4651 cfun->has_simduid_loops = true;
4653 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4654 the loop. */
4655 if ((flag_tree_loop_vectorize
4656 || !global_options_set.x_flag_tree_loop_vectorize)
4657 && flag_tree_loop_optimize
4658 && loop->safelen > 1)
4660 loop->force_vectorize = true;
4661 cfun->has_force_vectorize_loops = true;
4664 else if (simduid)
4665 cfun->has_simduid_loops = true;
4668 /* Taskloop construct is represented after gimplification with
4669 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4670 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4671 which should just compute all the needed loop temporaries
4672 for GIMPLE_OMP_TASK. */
4674 static void
4675 expand_omp_taskloop_for_outer (struct omp_region *region,
4676 struct omp_for_data *fd,
4677 gimple *inner_stmt)
4679 tree type, bias = NULL_TREE;
4680 basic_block entry_bb, cont_bb, exit_bb;
4681 gimple_stmt_iterator gsi;
4682 gassign *assign_stmt;
4683 tree *counts = NULL;
4684 int i;
4686 gcc_assert (inner_stmt);
4687 gcc_assert (region->cont);
4688 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4689 && gimple_omp_task_taskloop_p (inner_stmt));
4690 type = TREE_TYPE (fd->loop.v);
4692 /* See if we need to bias by LLONG_MIN. */
4693 if (fd->iter_type == long_long_unsigned_type_node
4694 && TREE_CODE (type) == INTEGER_TYPE
4695 && !TYPE_UNSIGNED (type))
4697 tree n1, n2;
4699 if (fd->loop.cond_code == LT_EXPR)
4701 n1 = fd->loop.n1;
4702 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4704 else
4706 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4707 n2 = fd->loop.n1;
4709 if (TREE_CODE (n1) != INTEGER_CST
4710 || TREE_CODE (n2) != INTEGER_CST
4711 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4712 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4715 entry_bb = region->entry;
4716 cont_bb = region->cont;
4717 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4718 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4719 exit_bb = region->exit;
4721 gsi = gsi_last_nondebug_bb (entry_bb);
4722 gimple *for_stmt = gsi_stmt (gsi);
4723 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4724 if (fd->collapse > 1)
4726 int first_zero_iter = -1, dummy = -1;
4727 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4729 counts = XALLOCAVEC (tree, fd->collapse);
4730 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4731 zero_iter_bb, first_zero_iter,
4732 dummy_bb, dummy, l2_dom_bb);
4734 if (zero_iter_bb)
4736 /* Some counts[i] vars might be uninitialized if
4737 some loop has zero iterations. But the body shouldn't
4738 be executed in that case, so just avoid uninit warnings. */
4739 for (i = first_zero_iter; i < fd->collapse; i++)
4740 if (SSA_VAR_P (counts[i]))
4741 TREE_NO_WARNING (counts[i]) = 1;
4742 gsi_prev (&gsi);
4743 edge e = split_block (entry_bb, gsi_stmt (gsi));
4744 entry_bb = e->dest;
4745 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4746 gsi = gsi_last_bb (entry_bb);
4747 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4748 get_immediate_dominator (CDI_DOMINATORS,
4749 zero_iter_bb));
4753 tree t0, t1;
4754 t1 = fd->loop.n2;
4755 t0 = fd->loop.n1;
4756 if (POINTER_TYPE_P (TREE_TYPE (t0))
4757 && TYPE_PRECISION (TREE_TYPE (t0))
4758 != TYPE_PRECISION (fd->iter_type))
4760 /* Avoid casting pointers to integer of a different size. */
4761 tree itype = signed_type_for (type);
4762 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4763 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4765 else
4767 t1 = fold_convert (fd->iter_type, t1);
4768 t0 = fold_convert (fd->iter_type, t0);
4770 if (bias)
4772 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4773 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4776 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4777 OMP_CLAUSE__LOOPTEMP_);
4778 gcc_assert (innerc);
4779 tree startvar = OMP_CLAUSE_DECL (innerc);
4780 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4781 gcc_assert (innerc);
4782 tree endvar = OMP_CLAUSE_DECL (innerc);
4783 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4785 gcc_assert (innerc);
4786 for (i = 1; i < fd->collapse; i++)
4788 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4789 OMP_CLAUSE__LOOPTEMP_);
4790 gcc_assert (innerc);
4792 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4793 OMP_CLAUSE__LOOPTEMP_);
4794 if (innerc)
4796 /* If needed (inner taskloop has lastprivate clause), propagate
4797 down the total number of iterations. */
4798 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4799 NULL_TREE, false,
4800 GSI_CONTINUE_LINKING);
4801 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4802 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4806 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4807 GSI_CONTINUE_LINKING);
4808 assign_stmt = gimple_build_assign (startvar, t0);
4809 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4811 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4812 GSI_CONTINUE_LINKING);
4813 assign_stmt = gimple_build_assign (endvar, t1);
4814 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4815 if (fd->collapse > 1)
4816 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4818 /* Remove the GIMPLE_OMP_FOR statement. */
4819 gsi = gsi_for_stmt (for_stmt);
4820 gsi_remove (&gsi, true);
4822 gsi = gsi_last_nondebug_bb (cont_bb);
4823 gsi_remove (&gsi, true);
4825 gsi = gsi_last_nondebug_bb (exit_bb);
4826 gsi_remove (&gsi, true);
4828 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4829 remove_edge (BRANCH_EDGE (entry_bb));
4830 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4831 remove_edge (BRANCH_EDGE (cont_bb));
4832 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4833 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4834 recompute_dominator (CDI_DOMINATORS, region->entry));
4837 /* Taskloop construct is represented after gimplification with
4838 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4839 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4840 GOMP_taskloop{,_ull} function arranges for each task to be given just
4841 a single range of iterations. */
4843 static void
4844 expand_omp_taskloop_for_inner (struct omp_region *region,
4845 struct omp_for_data *fd,
4846 gimple *inner_stmt)
4848 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4849 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4850 basic_block fin_bb;
4851 gimple_stmt_iterator gsi;
4852 edge ep;
4853 bool broken_loop = region->cont == NULL;
4854 tree *counts = NULL;
4855 tree n1, n2, step;
4857 itype = type = TREE_TYPE (fd->loop.v);
4858 if (POINTER_TYPE_P (type))
4859 itype = signed_type_for (type);
4861 /* See if we need to bias by LLONG_MIN. */
4862 if (fd->iter_type == long_long_unsigned_type_node
4863 && TREE_CODE (type) == INTEGER_TYPE
4864 && !TYPE_UNSIGNED (type))
4866 tree n1, n2;
4868 if (fd->loop.cond_code == LT_EXPR)
4870 n1 = fd->loop.n1;
4871 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4873 else
4875 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4876 n2 = fd->loop.n1;
4878 if (TREE_CODE (n1) != INTEGER_CST
4879 || TREE_CODE (n2) != INTEGER_CST
4880 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4881 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4884 entry_bb = region->entry;
4885 cont_bb = region->cont;
4886 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4887 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4888 gcc_assert (broken_loop
4889 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4890 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4891 if (!broken_loop)
4893 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4894 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4896 exit_bb = region->exit;
4898 /* Iteration space partitioning goes in ENTRY_BB. */
4899 gsi = gsi_last_nondebug_bb (entry_bb);
4900 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4902 if (fd->collapse > 1)
4904 int first_zero_iter = -1, dummy = -1;
4905 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4907 counts = XALLOCAVEC (tree, fd->collapse);
4908 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4909 fin_bb, first_zero_iter,
4910 dummy_bb, dummy, l2_dom_bb);
4911 t = NULL_TREE;
4913 else
4914 t = integer_one_node;
4916 step = fd->loop.step;
4917 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4918 OMP_CLAUSE__LOOPTEMP_);
4919 gcc_assert (innerc);
4920 n1 = OMP_CLAUSE_DECL (innerc);
4921 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4922 gcc_assert (innerc);
4923 n2 = OMP_CLAUSE_DECL (innerc);
4924 if (bias)
4926 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4927 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4929 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4930 true, NULL_TREE, true, GSI_SAME_STMT);
4931 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4932 true, NULL_TREE, true, GSI_SAME_STMT);
4933 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4934 true, NULL_TREE, true, GSI_SAME_STMT);
4936 tree startvar = fd->loop.v;
4937 tree endvar = NULL_TREE;
4939 if (gimple_omp_for_combined_p (fd->for_stmt))
4941 tree clauses = gimple_omp_for_clauses (inner_stmt);
4942 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4943 gcc_assert (innerc);
4944 startvar = OMP_CLAUSE_DECL (innerc);
4945 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4946 OMP_CLAUSE__LOOPTEMP_);
4947 gcc_assert (innerc);
4948 endvar = OMP_CLAUSE_DECL (innerc);
4950 t = fold_convert (TREE_TYPE (startvar), n1);
4951 t = force_gimple_operand_gsi (&gsi, t,
4952 DECL_P (startvar)
4953 && TREE_ADDRESSABLE (startvar),
4954 NULL_TREE, false, GSI_CONTINUE_LINKING);
4955 gimple *assign_stmt = gimple_build_assign (startvar, t);
4956 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4958 t = fold_convert (TREE_TYPE (startvar), n2);
4959 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4960 false, GSI_CONTINUE_LINKING);
4961 if (endvar)
4963 assign_stmt = gimple_build_assign (endvar, e);
4964 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4965 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4966 assign_stmt = gimple_build_assign (fd->loop.v, e);
4967 else
4968 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4969 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4971 if (fd->collapse > 1)
4972 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4974 if (!broken_loop)
4976 /* The code controlling the sequential loop replaces the
4977 GIMPLE_OMP_CONTINUE. */
4978 gsi = gsi_last_nondebug_bb (cont_bb);
4979 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4980 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4981 vmain = gimple_omp_continue_control_use (cont_stmt);
4982 vback = gimple_omp_continue_control_def (cont_stmt);
4984 if (!gimple_omp_for_combined_p (fd->for_stmt))
4986 if (POINTER_TYPE_P (type))
4987 t = fold_build_pointer_plus (vmain, step);
4988 else
4989 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4990 t = force_gimple_operand_gsi (&gsi, t,
4991 DECL_P (vback)
4992 && TREE_ADDRESSABLE (vback),
4993 NULL_TREE, true, GSI_SAME_STMT);
4994 assign_stmt = gimple_build_assign (vback, t);
4995 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4997 t = build2 (fd->loop.cond_code, boolean_type_node,
4998 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4999 ? t : vback, e);
5000 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5003 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5004 gsi_remove (&gsi, true);
5006 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5007 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5010 /* Remove the GIMPLE_OMP_FOR statement. */
5011 gsi = gsi_for_stmt (fd->for_stmt);
5012 gsi_remove (&gsi, true);
5014 /* Remove the GIMPLE_OMP_RETURN statement. */
5015 gsi = gsi_last_nondebug_bb (exit_bb);
5016 gsi_remove (&gsi, true);
5018 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5019 if (!broken_loop)
5020 remove_edge (BRANCH_EDGE (entry_bb));
5021 else
5023 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5024 region->outer->cont = NULL;
5027 /* Connect all the blocks. */
5028 if (!broken_loop)
5030 ep = find_edge (cont_bb, body_bb);
5031 if (gimple_omp_for_combined_p (fd->for_stmt))
5033 remove_edge (ep);
5034 ep = NULL;
5036 else if (fd->collapse > 1)
5038 remove_edge (ep);
5039 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5041 else
5042 ep->flags = EDGE_TRUE_VALUE;
5043 find_edge (cont_bb, fin_bb)->flags
5044 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5047 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5048 recompute_dominator (CDI_DOMINATORS, body_bb));
5049 if (!broken_loop)
5050 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5051 recompute_dominator (CDI_DOMINATORS, fin_bb));
5053 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5055 struct loop *loop = alloc_loop ();
5056 loop->header = body_bb;
5057 if (collapse_bb == NULL)
5058 loop->latch = cont_bb;
5059 add_loop (loop, body_bb->loop_father);
5063 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5064 partitioned loop. The lowering here is abstracted, in that the
5065 loop parameters are passed through internal functions, which are
5066 further lowered by oacc_device_lower, once we get to the target
5067 compiler. The loop is of the form:
5069 for (V = B; V LTGT E; V += S) {BODY}
5071 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5072 (constant 0 for no chunking) and we will have a GWV partitioning
5073 mask, specifying dimensions over which the loop is to be
5074 partitioned (see note below). We generate code that looks like
5075 (this ignores tiling):
5077 <entry_bb> [incoming FALL->body, BRANCH->exit]
5078 typedef signedintify (typeof (V)) T; // underlying signed integral type
5079 T range = E - B;
5080 T chunk_no = 0;
5081 T DIR = LTGT == '<' ? +1 : -1;
5082 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5083 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5085 <head_bb> [created by splitting end of entry_bb]
5086 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5087 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5088 if (!(offset LTGT bound)) goto bottom_bb;
5090 <body_bb> [incoming]
5091 V = B + offset;
5092 {BODY}
5094 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5095 offset += step;
5096 if (offset LTGT bound) goto body_bb; [*]
5098 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5099 chunk_no++;
5100 if (chunk < chunk_max) goto head_bb;
5102 <exit_bb> [incoming]
5103 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5105 [*] Needed if V live at end of loop. */
5107 static void
5108 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5110 tree v = fd->loop.v;
5111 enum tree_code cond_code = fd->loop.cond_code;
5112 enum tree_code plus_code = PLUS_EXPR;
5114 tree chunk_size = integer_minus_one_node;
5115 tree gwv = integer_zero_node;
5116 tree iter_type = TREE_TYPE (v);
5117 tree diff_type = iter_type;
5118 tree plus_type = iter_type;
5119 struct oacc_collapse *counts = NULL;
5121 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5122 == GF_OMP_FOR_KIND_OACC_LOOP);
5123 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5124 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5126 if (POINTER_TYPE_P (iter_type))
5128 plus_code = POINTER_PLUS_EXPR;
5129 plus_type = sizetype;
5131 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5132 diff_type = signed_type_for (diff_type);
5133 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5134 diff_type = integer_type_node;
5136 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5137 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5138 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5139 basic_block bottom_bb = NULL;
5141 /* entry_bb has two sucessors; the branch edge is to the exit
5142 block, fallthrough edge to body. */
5143 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5144 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5146 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5147 body_bb, or to a block whose only successor is the body_bb. Its
5148 fallthrough successor is the final block (same as the branch
5149 successor of the entry_bb). */
5150 if (cont_bb)
5152 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5153 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5155 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5156 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5158 else
5159 gcc_assert (!gimple_in_ssa_p (cfun));
5161 /* The exit block only has entry_bb and cont_bb as predecessors. */
5162 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5164 tree chunk_no;
5165 tree chunk_max = NULL_TREE;
5166 tree bound, offset;
5167 tree step = create_tmp_var (diff_type, ".step");
5168 bool up = cond_code == LT_EXPR;
5169 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5170 bool chunking = !gimple_in_ssa_p (cfun);
5171 bool negating;
5173 /* Tiling vars. */
5174 tree tile_size = NULL_TREE;
5175 tree element_s = NULL_TREE;
5176 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5177 basic_block elem_body_bb = NULL;
5178 basic_block elem_cont_bb = NULL;
5180 /* SSA instances. */
5181 tree offset_incr = NULL_TREE;
5182 tree offset_init = NULL_TREE;
5184 gimple_stmt_iterator gsi;
5185 gassign *ass;
5186 gcall *call;
5187 gimple *stmt;
5188 tree expr;
5189 location_t loc;
5190 edge split, be, fte;
5192 /* Split the end of entry_bb to create head_bb. */
5193 split = split_block (entry_bb, last_stmt (entry_bb));
5194 basic_block head_bb = split->dest;
5195 entry_bb = split->src;
5197 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5198 gsi = gsi_last_nondebug_bb (entry_bb);
5199 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5200 loc = gimple_location (for_stmt);
5202 if (gimple_in_ssa_p (cfun))
5204 offset_init = gimple_omp_for_index (for_stmt, 0);
5205 gcc_assert (integer_zerop (fd->loop.n1));
5206 /* The SSA parallelizer does gang parallelism. */
5207 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5210 if (fd->collapse > 1 || fd->tiling)
5212 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5213 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5214 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5215 TREE_TYPE (fd->loop.n2), loc);
5217 if (SSA_VAR_P (fd->loop.n2))
5219 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5220 true, GSI_SAME_STMT);
5221 ass = gimple_build_assign (fd->loop.n2, total);
5222 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5226 tree b = fd->loop.n1;
5227 tree e = fd->loop.n2;
5228 tree s = fd->loop.step;
5230 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5231 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5233 /* Convert the step, avoiding possible unsigned->signed overflow. */
5234 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5235 if (negating)
5236 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5237 s = fold_convert (diff_type, s);
5238 if (negating)
5239 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5240 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5242 if (!chunking)
5243 chunk_size = integer_zero_node;
5244 expr = fold_convert (diff_type, chunk_size);
5245 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5246 NULL_TREE, true, GSI_SAME_STMT);
5248 if (fd->tiling)
5250 /* Determine the tile size and element step,
5251 modify the outer loop step size. */
5252 tile_size = create_tmp_var (diff_type, ".tile_size");
5253 expr = build_int_cst (diff_type, 1);
5254 for (int ix = 0; ix < fd->collapse; ix++)
5255 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5256 expr = force_gimple_operand_gsi (&gsi, expr, true,
5257 NULL_TREE, true, GSI_SAME_STMT);
5258 ass = gimple_build_assign (tile_size, expr);
5259 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5261 element_s = create_tmp_var (diff_type, ".element_s");
5262 ass = gimple_build_assign (element_s, s);
5263 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5265 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5266 s = force_gimple_operand_gsi (&gsi, expr, true,
5267 NULL_TREE, true, GSI_SAME_STMT);
5270 /* Determine the range, avoiding possible unsigned->signed overflow. */
5271 negating = !up && TYPE_UNSIGNED (iter_type);
5272 expr = fold_build2 (MINUS_EXPR, plus_type,
5273 fold_convert (plus_type, negating ? b : e),
5274 fold_convert (plus_type, negating ? e : b));
5275 expr = fold_convert (diff_type, expr);
5276 if (negating)
5277 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5278 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5279 NULL_TREE, true, GSI_SAME_STMT);
5281 chunk_no = build_int_cst (diff_type, 0);
5282 if (chunking)
5284 gcc_assert (!gimple_in_ssa_p (cfun));
5286 expr = chunk_no;
5287 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5288 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5290 ass = gimple_build_assign (chunk_no, expr);
5291 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5293 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5294 build_int_cst (integer_type_node,
5295 IFN_GOACC_LOOP_CHUNKS),
5296 dir, range, s, chunk_size, gwv);
5297 gimple_call_set_lhs (call, chunk_max);
5298 gimple_set_location (call, loc);
5299 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5301 else
5302 chunk_size = chunk_no;
5304 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5305 build_int_cst (integer_type_node,
5306 IFN_GOACC_LOOP_STEP),
5307 dir, range, s, chunk_size, gwv);
5308 gimple_call_set_lhs (call, step);
5309 gimple_set_location (call, loc);
5310 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5312 /* Remove the GIMPLE_OMP_FOR. */
5313 gsi_remove (&gsi, true);
5315 /* Fixup edges from head_bb. */
5316 be = BRANCH_EDGE (head_bb);
5317 fte = FALLTHRU_EDGE (head_bb);
5318 be->flags |= EDGE_FALSE_VALUE;
5319 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5321 basic_block body_bb = fte->dest;
5323 if (gimple_in_ssa_p (cfun))
5325 gsi = gsi_last_nondebug_bb (cont_bb);
5326 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5328 offset = gimple_omp_continue_control_use (cont_stmt);
5329 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5331 else
5333 offset = create_tmp_var (diff_type, ".offset");
5334 offset_init = offset_incr = offset;
5336 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5338 /* Loop offset & bound go into head_bb. */
5339 gsi = gsi_start_bb (head_bb);
5341 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5342 build_int_cst (integer_type_node,
5343 IFN_GOACC_LOOP_OFFSET),
5344 dir, range, s,
5345 chunk_size, gwv, chunk_no);
5346 gimple_call_set_lhs (call, offset_init);
5347 gimple_set_location (call, loc);
5348 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5350 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5351 build_int_cst (integer_type_node,
5352 IFN_GOACC_LOOP_BOUND),
5353 dir, range, s,
5354 chunk_size, gwv, offset_init);
5355 gimple_call_set_lhs (call, bound);
5356 gimple_set_location (call, loc);
5357 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5359 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5360 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5361 GSI_CONTINUE_LINKING);
5363 /* V assignment goes into body_bb. */
5364 if (!gimple_in_ssa_p (cfun))
5366 gsi = gsi_start_bb (body_bb);
5368 expr = build2 (plus_code, iter_type, b,
5369 fold_convert (plus_type, offset));
5370 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5371 true, GSI_SAME_STMT);
5372 ass = gimple_build_assign (v, expr);
5373 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5375 if (fd->collapse > 1 || fd->tiling)
5376 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5378 if (fd->tiling)
5380 /* Determine the range of the element loop -- usually simply
5381 the tile_size, but could be smaller if the final
5382 iteration of the outer loop is a partial tile. */
5383 tree e_range = create_tmp_var (diff_type, ".e_range");
5385 expr = build2 (MIN_EXPR, diff_type,
5386 build2 (MINUS_EXPR, diff_type, bound, offset),
5387 build2 (MULT_EXPR, diff_type, tile_size,
5388 element_s));
5389 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5390 true, GSI_SAME_STMT);
5391 ass = gimple_build_assign (e_range, expr);
5392 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5394 /* Determine bound, offset & step of inner loop. */
5395 e_bound = create_tmp_var (diff_type, ".e_bound");
5396 e_offset = create_tmp_var (diff_type, ".e_offset");
5397 e_step = create_tmp_var (diff_type, ".e_step");
5399 /* Mark these as element loops. */
5400 tree t, e_gwv = integer_minus_one_node;
5401 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5403 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5404 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5405 element_s, chunk, e_gwv, chunk);
5406 gimple_call_set_lhs (call, e_offset);
5407 gimple_set_location (call, loc);
5408 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5410 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5411 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5412 element_s, chunk, e_gwv, e_offset);
5413 gimple_call_set_lhs (call, e_bound);
5414 gimple_set_location (call, loc);
5415 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5417 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5418 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5419 element_s, chunk, e_gwv);
5420 gimple_call_set_lhs (call, e_step);
5421 gimple_set_location (call, loc);
5422 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5424 /* Add test and split block. */
5425 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5426 stmt = gimple_build_cond_empty (expr);
5427 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5428 split = split_block (body_bb, stmt);
5429 elem_body_bb = split->dest;
5430 if (cont_bb == body_bb)
5431 cont_bb = elem_body_bb;
5432 body_bb = split->src;
5434 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5436 /* Initialize the user's loop vars. */
5437 gsi = gsi_start_bb (elem_body_bb);
5438 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5442 /* Loop increment goes into cont_bb. If this is not a loop, we
5443 will have spawned threads as if it was, and each one will
5444 execute one iteration. The specification is not explicit about
5445 whether such constructs are ill-formed or not, and they can
5446 occur, especially when noreturn routines are involved. */
5447 if (cont_bb)
5449 gsi = gsi_last_nondebug_bb (cont_bb);
5450 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5451 loc = gimple_location (cont_stmt);
5453 if (fd->tiling)
5455 /* Insert element loop increment and test. */
5456 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5457 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5458 true, GSI_SAME_STMT);
5459 ass = gimple_build_assign (e_offset, expr);
5460 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5461 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5463 stmt = gimple_build_cond_empty (expr);
5464 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5465 split = split_block (cont_bb, stmt);
5466 elem_cont_bb = split->src;
5467 cont_bb = split->dest;
5469 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5470 split->probability = profile_probability::unlikely ().guessed ();
5471 edge latch_edge
5472 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5473 latch_edge->probability = profile_probability::likely ().guessed ();
5475 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5476 skip_edge->probability = profile_probability::unlikely ().guessed ();
5477 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5478 loop_entry_edge->probability
5479 = profile_probability::likely ().guessed ();
5481 gsi = gsi_for_stmt (cont_stmt);
5484 /* Increment offset. */
5485 if (gimple_in_ssa_p (cfun))
5486 expr = build2 (plus_code, iter_type, offset,
5487 fold_convert (plus_type, step));
5488 else
5489 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5490 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5491 true, GSI_SAME_STMT);
5492 ass = gimple_build_assign (offset_incr, expr);
5493 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5494 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5495 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5497 /* Remove the GIMPLE_OMP_CONTINUE. */
5498 gsi_remove (&gsi, true);
5500 /* Fixup edges from cont_bb. */
5501 be = BRANCH_EDGE (cont_bb);
5502 fte = FALLTHRU_EDGE (cont_bb);
5503 be->flags |= EDGE_TRUE_VALUE;
5504 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5506 if (chunking)
5508 /* Split the beginning of exit_bb to make bottom_bb. We
5509 need to insert a nop at the start, because splitting is
5510 after a stmt, not before. */
5511 gsi = gsi_start_bb (exit_bb);
5512 stmt = gimple_build_nop ();
5513 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5514 split = split_block (exit_bb, stmt);
5515 bottom_bb = split->src;
5516 exit_bb = split->dest;
5517 gsi = gsi_last_bb (bottom_bb);
5519 /* Chunk increment and test goes into bottom_bb. */
5520 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5521 build_int_cst (diff_type, 1));
5522 ass = gimple_build_assign (chunk_no, expr);
5523 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5525 /* Chunk test at end of bottom_bb. */
5526 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5527 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5528 GSI_CONTINUE_LINKING);
5530 /* Fixup edges from bottom_bb. */
5531 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5532 split->probability = profile_probability::unlikely ().guessed ();
5533 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5534 latch_edge->probability = profile_probability::likely ().guessed ();
5538 gsi = gsi_last_nondebug_bb (exit_bb);
5539 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5540 loc = gimple_location (gsi_stmt (gsi));
5542 if (!gimple_in_ssa_p (cfun))
5544 /* Insert the final value of V, in case it is live. This is the
5545 value for the only thread that survives past the join. */
5546 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5547 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5548 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5549 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5550 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5551 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5552 true, GSI_SAME_STMT);
5553 ass = gimple_build_assign (v, expr);
5554 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5557 /* Remove the OMP_RETURN. */
5558 gsi_remove (&gsi, true);
5560 if (cont_bb)
5562 /* We now have one, two or three nested loops. Update the loop
5563 structures. */
5564 struct loop *parent = entry_bb->loop_father;
5565 struct loop *body = body_bb->loop_father;
5567 if (chunking)
5569 struct loop *chunk_loop = alloc_loop ();
5570 chunk_loop->header = head_bb;
5571 chunk_loop->latch = bottom_bb;
5572 add_loop (chunk_loop, parent);
5573 parent = chunk_loop;
5575 else if (parent != body)
5577 gcc_assert (body->header == body_bb);
5578 gcc_assert (body->latch == cont_bb
5579 || single_pred (body->latch) == cont_bb);
5580 parent = NULL;
5583 if (parent)
5585 struct loop *body_loop = alloc_loop ();
5586 body_loop->header = body_bb;
5587 body_loop->latch = cont_bb;
5588 add_loop (body_loop, parent);
5590 if (fd->tiling)
5592 /* Insert tiling's element loop. */
5593 struct loop *inner_loop = alloc_loop ();
5594 inner_loop->header = elem_body_bb;
5595 inner_loop->latch = elem_cont_bb;
5596 add_loop (inner_loop, body_loop);
5602 /* Expand the OMP loop defined by REGION. */
5604 static void
5605 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5607 struct omp_for_data fd;
5608 struct omp_for_data_loop *loops;
5610 loops
5611 = (struct omp_for_data_loop *)
5612 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5613 * sizeof (struct omp_for_data_loop));
5614 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5615 &fd, loops);
5616 region->sched_kind = fd.sched_kind;
5617 region->sched_modifiers = fd.sched_modifiers;
5619 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5620 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5621 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5622 if (region->cont)
5624 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5625 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5626 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5628 else
5629 /* If there isn't a continue then this is a degerate case where
5630 the introduction of abnormal edges during lowering will prevent
5631 original loops from being detected. Fix that up. */
5632 loops_state_set (LOOPS_NEED_FIXUP);
5634 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5635 expand_omp_simd (region, &fd);
5636 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5638 gcc_assert (!inner_stmt);
5639 expand_oacc_for (region, &fd);
5641 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5643 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5644 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5645 else
5646 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5648 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5649 && !fd.have_ordered)
5651 if (fd.chunk_size == NULL)
5652 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5653 else
5654 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5656 else
5658 int fn_index, start_ix, next_ix;
5660 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5661 == GF_OMP_FOR_KIND_FOR);
5662 if (fd.chunk_size == NULL
5663 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5664 fd.chunk_size = integer_zero_node;
5665 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5666 switch (fd.sched_kind)
5668 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5669 fn_index = 3;
5670 break;
5671 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5672 case OMP_CLAUSE_SCHEDULE_GUIDED:
5673 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5674 && !fd.ordered
5675 && !fd.have_ordered)
5677 fn_index = 3 + fd.sched_kind;
5678 break;
5680 /* FALLTHRU */
5681 default:
5682 fn_index = fd.sched_kind;
5683 break;
5685 if (!fd.ordered)
5686 fn_index += fd.have_ordered * 6;
5687 if (fd.ordered)
5688 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5689 else
5690 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5691 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5692 if (fd.iter_type == long_long_unsigned_type_node)
5694 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5695 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5696 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5697 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5699 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5700 (enum built_in_function) next_ix, inner_stmt);
5703 if (gimple_in_ssa_p (cfun))
5704 update_ssa (TODO_update_ssa_only_virtuals);
5707 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5709 v = GOMP_sections_start (n);
5711 switch (v)
5713 case 0:
5714 goto L2;
5715 case 1:
5716 section 1;
5717 goto L1;
5718 case 2:
5720 case n:
5722 default:
5723 abort ();
5726 v = GOMP_sections_next ();
5727 goto L0;
5729 reduction;
5731 If this is a combined parallel sections, replace the call to
5732 GOMP_sections_start with call to GOMP_sections_next. */
5734 static void
5735 expand_omp_sections (struct omp_region *region)
5737 tree t, u, vin = NULL, vmain, vnext, l2;
5738 unsigned len;
5739 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5740 gimple_stmt_iterator si, switch_si;
5741 gomp_sections *sections_stmt;
5742 gimple *stmt;
5743 gomp_continue *cont;
5744 edge_iterator ei;
5745 edge e;
5746 struct omp_region *inner;
5747 unsigned i, casei;
5748 bool exit_reachable = region->cont != NULL;
5750 gcc_assert (region->exit != NULL);
5751 entry_bb = region->entry;
5752 l0_bb = single_succ (entry_bb);
5753 l1_bb = region->cont;
5754 l2_bb = region->exit;
5755 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5756 l2 = gimple_block_label (l2_bb);
5757 else
5759 /* This can happen if there are reductions. */
5760 len = EDGE_COUNT (l0_bb->succs);
5761 gcc_assert (len > 0);
5762 e = EDGE_SUCC (l0_bb, len - 1);
5763 si = gsi_last_nondebug_bb (e->dest);
5764 l2 = NULL_TREE;
5765 if (gsi_end_p (si)
5766 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5767 l2 = gimple_block_label (e->dest);
5768 else
5769 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5771 si = gsi_last_nondebug_bb (e->dest);
5772 if (gsi_end_p (si)
5773 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5775 l2 = gimple_block_label (e->dest);
5776 break;
5780 if (exit_reachable)
5781 default_bb = create_empty_bb (l1_bb->prev_bb);
5782 else
5783 default_bb = create_empty_bb (l0_bb);
5785 /* We will build a switch() with enough cases for all the
5786 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5787 and a default case to abort if something goes wrong. */
5788 len = EDGE_COUNT (l0_bb->succs);
5790 /* Use vec::quick_push on label_vec throughout, since we know the size
5791 in advance. */
5792 auto_vec<tree> label_vec (len);
5794 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5795 GIMPLE_OMP_SECTIONS statement. */
5796 si = gsi_last_nondebug_bb (entry_bb);
5797 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5798 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5799 vin = gimple_omp_sections_control (sections_stmt);
5800 if (!is_combined_parallel (region))
5802 /* If we are not inside a combined parallel+sections region,
5803 call GOMP_sections_start. */
5804 t = build_int_cst (unsigned_type_node, len - 1);
5805 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5806 stmt = gimple_build_call (u, 1, t);
5808 else
5810 /* Otherwise, call GOMP_sections_next. */
5811 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5812 stmt = gimple_build_call (u, 0);
5814 gimple_call_set_lhs (stmt, vin);
5815 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5816 gsi_remove (&si, true);
5818 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5819 L0_BB. */
5820 switch_si = gsi_last_nondebug_bb (l0_bb);
5821 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5822 if (exit_reachable)
5824 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5825 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5826 vmain = gimple_omp_continue_control_use (cont);
5827 vnext = gimple_omp_continue_control_def (cont);
5829 else
5831 vmain = vin;
5832 vnext = NULL_TREE;
5835 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5836 label_vec.quick_push (t);
5837 i = 1;
5839 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5840 for (inner = region->inner, casei = 1;
5841 inner;
5842 inner = inner->next, i++, casei++)
5844 basic_block s_entry_bb, s_exit_bb;
5846 /* Skip optional reduction region. */
5847 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5849 --i;
5850 --casei;
5851 continue;
5854 s_entry_bb = inner->entry;
5855 s_exit_bb = inner->exit;
5857 t = gimple_block_label (s_entry_bb);
5858 u = build_int_cst (unsigned_type_node, casei);
5859 u = build_case_label (u, NULL, t);
5860 label_vec.quick_push (u);
5862 si = gsi_last_nondebug_bb (s_entry_bb);
5863 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5864 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5865 gsi_remove (&si, true);
5866 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5868 if (s_exit_bb == NULL)
5869 continue;
5871 si = gsi_last_nondebug_bb (s_exit_bb);
5872 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5873 gsi_remove (&si, true);
5875 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5878 /* Error handling code goes in DEFAULT_BB. */
5879 t = gimple_block_label (default_bb);
5880 u = build_case_label (NULL, NULL, t);
5881 make_edge (l0_bb, default_bb, 0);
5882 add_bb_to_loop (default_bb, current_loops->tree_root);
5884 stmt = gimple_build_switch (vmain, u, label_vec);
5885 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5886 gsi_remove (&switch_si, true);
5888 si = gsi_start_bb (default_bb);
5889 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5890 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5892 if (exit_reachable)
5894 tree bfn_decl;
5896 /* Code to get the next section goes in L1_BB. */
5897 si = gsi_last_nondebug_bb (l1_bb);
5898 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5900 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5901 stmt = gimple_build_call (bfn_decl, 0);
5902 gimple_call_set_lhs (stmt, vnext);
5903 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5904 gsi_remove (&si, true);
5906 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5909 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5910 si = gsi_last_nondebug_bb (l2_bb);
5911 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5912 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5913 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5914 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5915 else
5916 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5917 stmt = gimple_build_call (t, 0);
5918 if (gimple_omp_return_lhs (gsi_stmt (si)))
5919 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5920 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5921 gsi_remove (&si, true);
5923 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5926 /* Expand code for an OpenMP single directive. We've already expanded
5927 much of the code, here we simply place the GOMP_barrier call. */
5929 static void
5930 expand_omp_single (struct omp_region *region)
5932 basic_block entry_bb, exit_bb;
5933 gimple_stmt_iterator si;
5935 entry_bb = region->entry;
5936 exit_bb = region->exit;
5938 si = gsi_last_nondebug_bb (entry_bb);
5939 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5940 gsi_remove (&si, true);
5941 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5943 si = gsi_last_nondebug_bb (exit_bb);
5944 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5946 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5947 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5949 gsi_remove (&si, true);
5950 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5953 /* Generic expansion for OpenMP synchronization directives: master,
5954 ordered and critical. All we need to do here is remove the entry
5955 and exit markers for REGION. */
5957 static void
5958 expand_omp_synch (struct omp_region *region)
5960 basic_block entry_bb, exit_bb;
5961 gimple_stmt_iterator si;
5963 entry_bb = region->entry;
5964 exit_bb = region->exit;
5966 si = gsi_last_nondebug_bb (entry_bb);
5967 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5968 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5969 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5970 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5971 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5972 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5973 gsi_remove (&si, true);
5974 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5976 if (exit_bb)
5978 si = gsi_last_nondebug_bb (exit_bb);
5979 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5980 gsi_remove (&si, true);
5981 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5985 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
5986 operation as a normal volatile load. */
5988 static bool
5989 expand_omp_atomic_load (basic_block load_bb, tree addr,
5990 tree loaded_val, int index)
5992 enum built_in_function tmpbase;
5993 gimple_stmt_iterator gsi;
5994 basic_block store_bb;
5995 location_t loc;
5996 gimple *stmt;
5997 tree decl, call, type, itype;
5999 gsi = gsi_last_nondebug_bb (load_bb);
6000 stmt = gsi_stmt (gsi);
6001 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6002 loc = gimple_location (stmt);
6004 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6005 is smaller than word size, then expand_atomic_load assumes that the load
6006 is atomic. We could avoid the builtin entirely in this case. */
6008 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6009 decl = builtin_decl_explicit (tmpbase);
6010 if (decl == NULL_TREE)
6011 return false;
6013 type = TREE_TYPE (loaded_val);
6014 itype = TREE_TYPE (TREE_TYPE (decl));
6016 call = build_call_expr_loc (loc, decl, 2, addr,
6017 build_int_cst (NULL,
6018 gimple_omp_atomic_seq_cst_p (stmt)
6019 ? MEMMODEL_SEQ_CST
6020 : MEMMODEL_RELAXED));
6021 if (!useless_type_conversion_p (type, itype))
6022 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6023 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6025 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6026 gsi_remove (&gsi, true);
6028 store_bb = single_succ (load_bb);
6029 gsi = gsi_last_nondebug_bb (store_bb);
6030 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6031 gsi_remove (&gsi, true);
6033 if (gimple_in_ssa_p (cfun))
6034 update_ssa (TODO_update_ssa_no_phi);
6036 return true;
6039 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6040 operation as a normal volatile store. */
6042 static bool
6043 expand_omp_atomic_store (basic_block load_bb, tree addr,
6044 tree loaded_val, tree stored_val, int index)
6046 enum built_in_function tmpbase;
6047 gimple_stmt_iterator gsi;
6048 basic_block store_bb = single_succ (load_bb);
6049 location_t loc;
6050 gimple *stmt;
6051 tree decl, call, type, itype;
6052 machine_mode imode;
6053 bool exchange;
6055 gsi = gsi_last_nondebug_bb (load_bb);
6056 stmt = gsi_stmt (gsi);
6057 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6059 /* If the load value is needed, then this isn't a store but an exchange. */
6060 exchange = gimple_omp_atomic_need_value_p (stmt);
6062 gsi = gsi_last_nondebug_bb (store_bb);
6063 stmt = gsi_stmt (gsi);
6064 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6065 loc = gimple_location (stmt);
6067 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6068 is smaller than word size, then expand_atomic_store assumes that the store
6069 is atomic. We could avoid the builtin entirely in this case. */
6071 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6072 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6073 decl = builtin_decl_explicit (tmpbase);
6074 if (decl == NULL_TREE)
6075 return false;
6077 type = TREE_TYPE (stored_val);
6079 /* Dig out the type of the function's second argument. */
6080 itype = TREE_TYPE (decl);
6081 itype = TYPE_ARG_TYPES (itype);
6082 itype = TREE_CHAIN (itype);
6083 itype = TREE_VALUE (itype);
6084 imode = TYPE_MODE (itype);
6086 if (exchange && !can_atomic_exchange_p (imode, true))
6087 return false;
6089 if (!useless_type_conversion_p (itype, type))
6090 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6091 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6092 build_int_cst (NULL,
6093 gimple_omp_atomic_seq_cst_p (stmt)
6094 ? MEMMODEL_SEQ_CST
6095 : MEMMODEL_RELAXED));
6096 if (exchange)
6098 if (!useless_type_conversion_p (type, itype))
6099 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6100 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6103 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6104 gsi_remove (&gsi, true);
6106 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6107 gsi = gsi_last_nondebug_bb (load_bb);
6108 gsi_remove (&gsi, true);
6110 if (gimple_in_ssa_p (cfun))
6111 update_ssa (TODO_update_ssa_no_phi);
6113 return true;
6116 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6117 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6118 size of the data type, and thus usable to find the index of the builtin
6119 decl. Returns false if the expression is not of the proper form. */
6121 static bool
6122 expand_omp_atomic_fetch_op (basic_block load_bb,
6123 tree addr, tree loaded_val,
6124 tree stored_val, int index)
6126 enum built_in_function oldbase, newbase, tmpbase;
6127 tree decl, itype, call;
6128 tree lhs, rhs;
6129 basic_block store_bb = single_succ (load_bb);
6130 gimple_stmt_iterator gsi;
6131 gimple *stmt;
6132 location_t loc;
6133 enum tree_code code;
6134 bool need_old, need_new;
6135 machine_mode imode;
6136 bool seq_cst;
6138 /* We expect to find the following sequences:
6140 load_bb:
6141 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6143 store_bb:
6144 val = tmp OP something; (or: something OP tmp)
6145 GIMPLE_OMP_STORE (val)
6147 ???FIXME: Allow a more flexible sequence.
6148 Perhaps use data flow to pick the statements.
6152 gsi = gsi_after_labels (store_bb);
6153 stmt = gsi_stmt (gsi);
6154 if (is_gimple_debug (stmt))
6156 gsi_next_nondebug (&gsi);
6157 if (gsi_end_p (gsi))
6158 return false;
6159 stmt = gsi_stmt (gsi);
6161 loc = gimple_location (stmt);
6162 if (!is_gimple_assign (stmt))
6163 return false;
6164 gsi_next_nondebug (&gsi);
6165 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6166 return false;
6167 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6168 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6169 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6170 gcc_checking_assert (!need_old || !need_new);
6172 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6173 return false;
6175 /* Check for one of the supported fetch-op operations. */
6176 code = gimple_assign_rhs_code (stmt);
6177 switch (code)
6179 case PLUS_EXPR:
6180 case POINTER_PLUS_EXPR:
6181 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6182 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6183 break;
6184 case MINUS_EXPR:
6185 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6186 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6187 break;
6188 case BIT_AND_EXPR:
6189 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6190 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6191 break;
6192 case BIT_IOR_EXPR:
6193 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6194 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6195 break;
6196 case BIT_XOR_EXPR:
6197 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6198 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6199 break;
6200 default:
6201 return false;
6204 /* Make sure the expression is of the proper form. */
6205 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6206 rhs = gimple_assign_rhs2 (stmt);
6207 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6208 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6209 rhs = gimple_assign_rhs1 (stmt);
6210 else
6211 return false;
6213 tmpbase = ((enum built_in_function)
6214 ((need_new ? newbase : oldbase) + index + 1));
6215 decl = builtin_decl_explicit (tmpbase);
6216 if (decl == NULL_TREE)
6217 return false;
6218 itype = TREE_TYPE (TREE_TYPE (decl));
6219 imode = TYPE_MODE (itype);
6221 /* We could test all of the various optabs involved, but the fact of the
6222 matter is that (with the exception of i486 vs i586 and xadd) all targets
6223 that support any atomic operaton optab also implements compare-and-swap.
6224 Let optabs.c take care of expanding any compare-and-swap loop. */
6225 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6226 return false;
6228 gsi = gsi_last_nondebug_bb (load_bb);
6229 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6231 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6232 It only requires that the operation happen atomically. Thus we can
6233 use the RELAXED memory model. */
6234 call = build_call_expr_loc (loc, decl, 3, addr,
6235 fold_convert_loc (loc, itype, rhs),
6236 build_int_cst (NULL,
6237 seq_cst ? MEMMODEL_SEQ_CST
6238 : MEMMODEL_RELAXED));
6240 if (need_old || need_new)
6242 lhs = need_old ? loaded_val : stored_val;
6243 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6244 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6246 else
6247 call = fold_convert_loc (loc, void_type_node, call);
6248 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6249 gsi_remove (&gsi, true);
6251 gsi = gsi_last_nondebug_bb (store_bb);
6252 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6253 gsi_remove (&gsi, true);
6254 gsi = gsi_last_nondebug_bb (store_bb);
6255 stmt = gsi_stmt (gsi);
6256 gsi_remove (&gsi, true);
6258 if (gimple_in_ssa_p (cfun))
6260 release_defs (stmt);
6261 update_ssa (TODO_update_ssa_no_phi);
6264 return true;
6267 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6269 oldval = *addr;
6270 repeat:
6271 newval = rhs; // with oldval replacing *addr in rhs
6272 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6273 if (oldval != newval)
6274 goto repeat;
6276 INDEX is log2 of the size of the data type, and thus usable to find the
6277 index of the builtin decl. */
6279 static bool
6280 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6281 tree addr, tree loaded_val, tree stored_val,
6282 int index)
6284 tree loadedi, storedi, initial, new_storedi, old_vali;
6285 tree type, itype, cmpxchg, iaddr;
6286 gimple_stmt_iterator si;
6287 basic_block loop_header = single_succ (load_bb);
6288 gimple *phi, *stmt;
6289 edge e;
6290 enum built_in_function fncode;
6292 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6293 order to use the RELAXED memory model effectively. */
6294 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6295 + index + 1);
6296 cmpxchg = builtin_decl_explicit (fncode);
6297 if (cmpxchg == NULL_TREE)
6298 return false;
6299 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6300 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6302 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6303 || !can_atomic_load_p (TYPE_MODE (itype)))
6304 return false;
6306 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6307 si = gsi_last_nondebug_bb (load_bb);
6308 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6310 /* For floating-point values, we'll need to view-convert them to integers
6311 so that we can perform the atomic compare and swap. Simplify the
6312 following code by always setting up the "i"ntegral variables. */
6313 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6315 tree iaddr_val;
6317 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6318 true));
6319 iaddr_val
6320 = force_gimple_operand_gsi (&si,
6321 fold_convert (TREE_TYPE (iaddr), addr),
6322 false, NULL_TREE, true, GSI_SAME_STMT);
6323 stmt = gimple_build_assign (iaddr, iaddr_val);
6324 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6325 loadedi = create_tmp_var (itype);
6326 if (gimple_in_ssa_p (cfun))
6327 loadedi = make_ssa_name (loadedi);
6329 else
6331 iaddr = addr;
6332 loadedi = loaded_val;
6335 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6336 tree loaddecl = builtin_decl_explicit (fncode);
6337 if (loaddecl)
6338 initial
6339 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6340 build_call_expr (loaddecl, 2, iaddr,
6341 build_int_cst (NULL_TREE,
6342 MEMMODEL_RELAXED)));
6343 else
6344 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6345 build_int_cst (TREE_TYPE (iaddr), 0));
6347 initial
6348 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6349 GSI_SAME_STMT);
6351 /* Move the value to the LOADEDI temporary. */
6352 if (gimple_in_ssa_p (cfun))
6354 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6355 phi = create_phi_node (loadedi, loop_header);
6356 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6357 initial);
6359 else
6360 gsi_insert_before (&si,
6361 gimple_build_assign (loadedi, initial),
6362 GSI_SAME_STMT);
6363 if (loadedi != loaded_val)
6365 gimple_stmt_iterator gsi2;
6366 tree x;
6368 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6369 gsi2 = gsi_start_bb (loop_header);
6370 if (gimple_in_ssa_p (cfun))
6372 gassign *stmt;
6373 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6374 true, GSI_SAME_STMT);
6375 stmt = gimple_build_assign (loaded_val, x);
6376 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6378 else
6380 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6381 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6382 true, GSI_SAME_STMT);
6385 gsi_remove (&si, true);
6387 si = gsi_last_nondebug_bb (store_bb);
6388 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6390 if (iaddr == addr)
6391 storedi = stored_val;
6392 else
6393 storedi
6394 = force_gimple_operand_gsi (&si,
6395 build1 (VIEW_CONVERT_EXPR, itype,
6396 stored_val), true, NULL_TREE, true,
6397 GSI_SAME_STMT);
6399 /* Build the compare&swap statement. */
6400 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6401 new_storedi = force_gimple_operand_gsi (&si,
6402 fold_convert (TREE_TYPE (loadedi),
6403 new_storedi),
6404 true, NULL_TREE,
6405 true, GSI_SAME_STMT);
6407 if (gimple_in_ssa_p (cfun))
6408 old_vali = loadedi;
6409 else
6411 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6412 stmt = gimple_build_assign (old_vali, loadedi);
6413 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6415 stmt = gimple_build_assign (loadedi, new_storedi);
6416 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6419 /* Note that we always perform the comparison as an integer, even for
6420 floating point. This allows the atomic operation to properly
6421 succeed even with NaNs and -0.0. */
6422 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6423 stmt = gimple_build_cond_empty (ne);
6424 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6426 /* Update cfg. */
6427 e = single_succ_edge (store_bb);
6428 e->flags &= ~EDGE_FALLTHRU;
6429 e->flags |= EDGE_FALSE_VALUE;
6430 /* Expect no looping. */
6431 e->probability = profile_probability::guessed_always ();
6433 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6434 e->probability = profile_probability::guessed_never ();
6436 /* Copy the new value to loadedi (we already did that before the condition
6437 if we are not in SSA). */
6438 if (gimple_in_ssa_p (cfun))
6440 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6441 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6444 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6445 gsi_remove (&si, true);
6447 struct loop *loop = alloc_loop ();
6448 loop->header = loop_header;
6449 loop->latch = store_bb;
6450 add_loop (loop, loop_header->loop_father);
6452 if (gimple_in_ssa_p (cfun))
6453 update_ssa (TODO_update_ssa_no_phi);
6455 return true;
6458 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6460 GOMP_atomic_start ();
6461 *addr = rhs;
6462 GOMP_atomic_end ();
6464 The result is not globally atomic, but works so long as all parallel
6465 references are within #pragma omp atomic directives. According to
6466 responses received from omp@openmp.org, appears to be within spec.
6467 Which makes sense, since that's how several other compilers handle
6468 this situation as well.
6469 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6470 expanding. STORED_VAL is the operand of the matching
6471 GIMPLE_OMP_ATOMIC_STORE.
6473 We replace
6474 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6475 loaded_val = *addr;
6477 and replace
6478 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6479 *addr = stored_val;
6482 static bool
6483 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6484 tree addr, tree loaded_val, tree stored_val)
6486 gimple_stmt_iterator si;
6487 gassign *stmt;
6488 tree t;
6490 si = gsi_last_nondebug_bb (load_bb);
6491 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6493 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6494 t = build_call_expr (t, 0);
6495 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6497 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6498 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6499 gsi_remove (&si, true);
6501 si = gsi_last_nondebug_bb (store_bb);
6502 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6504 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6505 stored_val);
6506 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6508 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6509 t = build_call_expr (t, 0);
6510 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6511 gsi_remove (&si, true);
6513 if (gimple_in_ssa_p (cfun))
6514 update_ssa (TODO_update_ssa_no_phi);
6515 return true;
6518 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6519 using expand_omp_atomic_fetch_op. If it failed, we try to
6520 call expand_omp_atomic_pipeline, and if it fails too, the
6521 ultimate fallback is wrapping the operation in a mutex
6522 (expand_omp_atomic_mutex). REGION is the atomic region built
6523 by build_omp_regions_1(). */
6525 static void
6526 expand_omp_atomic (struct omp_region *region)
6528 basic_block load_bb = region->entry, store_bb = region->exit;
6529 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6530 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6531 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6532 tree addr = gimple_omp_atomic_load_rhs (load);
6533 tree stored_val = gimple_omp_atomic_store_val (store);
6534 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6535 HOST_WIDE_INT index;
6537 /* Make sure the type is one of the supported sizes. */
6538 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6539 index = exact_log2 (index);
6540 if (index >= 0 && index <= 4)
6542 unsigned int align = TYPE_ALIGN_UNIT (type);
6544 /* __sync builtins require strict data alignment. */
6545 if (exact_log2 (align) >= index)
6547 /* Atomic load. */
6548 scalar_mode smode;
6549 if (loaded_val == stored_val
6550 && (is_int_mode (TYPE_MODE (type), &smode)
6551 || is_float_mode (TYPE_MODE (type), &smode))
6552 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6553 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6554 return;
6556 /* Atomic store. */
6557 if ((is_int_mode (TYPE_MODE (type), &smode)
6558 || is_float_mode (TYPE_MODE (type), &smode))
6559 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6560 && store_bb == single_succ (load_bb)
6561 && first_stmt (store_bb) == store
6562 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6563 stored_val, index))
6564 return;
6566 /* When possible, use specialized atomic update functions. */
6567 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6568 && store_bb == single_succ (load_bb)
6569 && expand_omp_atomic_fetch_op (load_bb, addr,
6570 loaded_val, stored_val, index))
6571 return;
6573 /* If we don't have specialized __sync builtins, try and implement
6574 as a compare and swap loop. */
6575 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6576 loaded_val, stored_val, index))
6577 return;
6581 /* The ultimate fallback is wrapping the operation in a mutex. */
6582 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6585 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6586 at REGION_EXIT. */
6588 static void
6589 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6590 basic_block region_exit)
6592 struct loop *outer = region_entry->loop_father;
6593 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6595 /* Don't parallelize the kernels region if it contains more than one outer
6596 loop. */
6597 unsigned int nr_outer_loops = 0;
6598 struct loop *single_outer = NULL;
6599 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6601 gcc_assert (loop_outer (loop) == outer);
6603 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6604 continue;
6606 if (region_exit != NULL
6607 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6608 continue;
6610 nr_outer_loops++;
6611 single_outer = loop;
6613 if (nr_outer_loops != 1)
6614 return;
6616 for (struct loop *loop = single_outer->inner;
6617 loop != NULL;
6618 loop = loop->inner)
6619 if (loop->next)
6620 return;
6622 /* Mark the loops in the region. */
6623 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6624 loop->in_oacc_kernels_region = true;
6627 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6629 struct GTY(()) grid_launch_attributes_trees
6631 tree kernel_dim_array_type;
6632 tree kernel_lattrs_dimnum_decl;
6633 tree kernel_lattrs_grid_decl;
6634 tree kernel_lattrs_group_decl;
6635 tree kernel_launch_attributes_type;
6638 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6640 /* Create types used to pass kernel launch attributes to target. */
6642 static void
6643 grid_create_kernel_launch_attr_types (void)
6645 if (grid_attr_trees)
6646 return;
6647 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6649 tree dim_arr_index_type
6650 = build_index_type (build_int_cst (integer_type_node, 2));
6651 grid_attr_trees->kernel_dim_array_type
6652 = build_array_type (uint32_type_node, dim_arr_index_type);
6654 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6655 grid_attr_trees->kernel_lattrs_dimnum_decl
6656 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6657 uint32_type_node);
6658 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6660 grid_attr_trees->kernel_lattrs_grid_decl
6661 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6662 grid_attr_trees->kernel_dim_array_type);
6663 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6664 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6665 grid_attr_trees->kernel_lattrs_group_decl
6666 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6667 grid_attr_trees->kernel_dim_array_type);
6668 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6669 = grid_attr_trees->kernel_lattrs_grid_decl;
6670 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6671 "__gomp_kernel_launch_attributes",
6672 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6675 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6676 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6677 of type uint32_type_node. */
6679 static void
6680 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6681 tree fld_decl, int index, tree value)
6683 tree ref = build4 (ARRAY_REF, uint32_type_node,
6684 build3 (COMPONENT_REF,
6685 grid_attr_trees->kernel_dim_array_type,
6686 range_var, fld_decl, NULL_TREE),
6687 build_int_cst (integer_type_node, index),
6688 NULL_TREE, NULL_TREE);
6689 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6692 /* Return a tree representation of a pointer to a structure with grid and
6693 work-group size information. Statements filling that information will be
6694 inserted before GSI, TGT_STMT is the target statement which has the
6695 necessary information in it. */
6697 static tree
6698 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6699 gomp_target *tgt_stmt)
6701 grid_create_kernel_launch_attr_types ();
6702 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6703 "__kernel_launch_attrs");
6705 unsigned max_dim = 0;
6706 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6707 clause;
6708 clause = OMP_CLAUSE_CHAIN (clause))
6710 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6711 continue;
6713 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6714 max_dim = MAX (dim, max_dim);
6716 grid_insert_store_range_dim (gsi, lattrs,
6717 grid_attr_trees->kernel_lattrs_grid_decl,
6718 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6719 grid_insert_store_range_dim (gsi, lattrs,
6720 grid_attr_trees->kernel_lattrs_group_decl,
6721 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6724 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6725 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6726 gcc_checking_assert (max_dim <= 2);
6727 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6728 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6729 GSI_SAME_STMT);
6730 TREE_ADDRESSABLE (lattrs) = 1;
6731 return build_fold_addr_expr (lattrs);
6734 /* Build target argument identifier from the DEVICE identifier, value
6735 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6737 static tree
6738 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6740 tree t = build_int_cst (integer_type_node, device);
6741 if (subseqent_param)
6742 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6743 build_int_cst (integer_type_node,
6744 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6745 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6746 build_int_cst (integer_type_node, id));
6747 return t;
6750 /* Like above but return it in type that can be directly stored as an element
6751 of the argument array. */
6753 static tree
6754 get_target_argument_identifier (int device, bool subseqent_param, int id)
6756 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6757 return fold_convert (ptr_type_node, t);
6760 /* Return a target argument consisting of DEVICE identifier, value identifier
6761 ID, and the actual VALUE. */
6763 static tree
6764 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6765 tree value)
6767 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6768 fold_convert (integer_type_node, value),
6769 build_int_cst (unsigned_type_node,
6770 GOMP_TARGET_ARG_VALUE_SHIFT));
6771 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6772 get_target_argument_identifier_1 (device, false, id));
6773 t = fold_convert (ptr_type_node, t);
6774 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6777 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6778 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6779 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6780 arguments. */
6782 static void
6783 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6784 int id, tree value, vec <tree> *args)
6786 if (tree_fits_shwi_p (value)
6787 && tree_to_shwi (value) > -(1 << 15)
6788 && tree_to_shwi (value) < (1 << 15))
6789 args->quick_push (get_target_argument_value (gsi, device, id, value));
6790 else
6792 args->quick_push (get_target_argument_identifier (device, true, id));
6793 value = fold_convert (ptr_type_node, value);
6794 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6795 GSI_SAME_STMT);
6796 args->quick_push (value);
6800 /* Create an array of arguments that is then passed to GOMP_target. */
6802 static tree
6803 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6805 auto_vec <tree, 6> args;
6806 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6807 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6808 if (c)
6809 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6810 else
6811 t = integer_minus_one_node;
6812 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6813 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6815 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6816 if (c)
6817 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6818 else
6819 t = integer_minus_one_node;
6820 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6821 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6822 &args);
6824 /* Add HSA-specific grid sizes, if available. */
6825 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6826 OMP_CLAUSE__GRIDDIM_))
6828 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6829 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6830 args.quick_push (t);
6831 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6834 /* Produce more, perhaps device specific, arguments here. */
6836 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6837 args.length () + 1),
6838 ".omp_target_args");
6839 for (unsigned i = 0; i < args.length (); i++)
6841 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6842 build_int_cst (integer_type_node, i),
6843 NULL_TREE, NULL_TREE);
6844 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6845 GSI_SAME_STMT);
6847 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6848 build_int_cst (integer_type_node, args.length ()),
6849 NULL_TREE, NULL_TREE);
6850 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6851 GSI_SAME_STMT);
6852 TREE_ADDRESSABLE (argarray) = 1;
6853 return build_fold_addr_expr (argarray);
6856 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6858 static void
6859 expand_omp_target (struct omp_region *region)
6861 basic_block entry_bb, exit_bb, new_bb;
6862 struct function *child_cfun;
6863 tree child_fn, block, t;
6864 gimple_stmt_iterator gsi;
6865 gomp_target *entry_stmt;
6866 gimple *stmt;
6867 edge e;
6868 bool offloaded, data_region;
6870 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6871 new_bb = region->entry;
6873 offloaded = is_gimple_omp_offloaded (entry_stmt);
6874 switch (gimple_omp_target_kind (entry_stmt))
6876 case GF_OMP_TARGET_KIND_REGION:
6877 case GF_OMP_TARGET_KIND_UPDATE:
6878 case GF_OMP_TARGET_KIND_ENTER_DATA:
6879 case GF_OMP_TARGET_KIND_EXIT_DATA:
6880 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6881 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6882 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6883 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6884 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6885 data_region = false;
6886 break;
6887 case GF_OMP_TARGET_KIND_DATA:
6888 case GF_OMP_TARGET_KIND_OACC_DATA:
6889 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6890 data_region = true;
6891 break;
6892 default:
6893 gcc_unreachable ();
6896 child_fn = NULL_TREE;
6897 child_cfun = NULL;
6898 if (offloaded)
6900 child_fn = gimple_omp_target_child_fn (entry_stmt);
6901 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6904 /* Supported by expand_omp_taskreg, but not here. */
6905 if (child_cfun != NULL)
6906 gcc_checking_assert (!child_cfun->cfg);
6907 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6909 entry_bb = region->entry;
6910 exit_bb = region->exit;
6912 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6914 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6916 /* Further down, both OpenACC kernels and OpenACC parallel constructs
6917 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6918 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
6919 DECL_ATTRIBUTES (child_fn)
6920 = tree_cons (get_identifier ("oacc kernels"),
6921 NULL_TREE, DECL_ATTRIBUTES (child_fn));
6924 if (offloaded)
6926 unsigned srcidx, dstidx, num;
6928 /* If the offloading region needs data sent from the parent
6929 function, then the very first statement (except possible
6930 tree profile counter updates) of the offloading body
6931 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6932 &.OMP_DATA_O is passed as an argument to the child function,
6933 we need to replace it with the argument as seen by the child
6934 function.
6936 In most cases, this will end up being the identity assignment
6937 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6938 a function call that has been inlined, the original PARM_DECL
6939 .OMP_DATA_I may have been converted into a different local
6940 variable. In which case, we need to keep the assignment. */
6941 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6942 if (data_arg)
6944 basic_block entry_succ_bb = single_succ (entry_bb);
6945 gimple_stmt_iterator gsi;
6946 tree arg;
6947 gimple *tgtcopy_stmt = NULL;
6948 tree sender = TREE_VEC_ELT (data_arg, 0);
6950 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6952 gcc_assert (!gsi_end_p (gsi));
6953 stmt = gsi_stmt (gsi);
6954 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6955 continue;
6957 if (gimple_num_ops (stmt) == 2)
6959 tree arg = gimple_assign_rhs1 (stmt);
6961 /* We're ignoring the subcode because we're
6962 effectively doing a STRIP_NOPS. */
6964 if (TREE_CODE (arg) == ADDR_EXPR
6965 && TREE_OPERAND (arg, 0) == sender)
6967 tgtcopy_stmt = stmt;
6968 break;
6973 gcc_assert (tgtcopy_stmt != NULL);
6974 arg = DECL_ARGUMENTS (child_fn);
6976 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6977 gsi_remove (&gsi, true);
6980 /* Declare local variables needed in CHILD_CFUN. */
6981 block = DECL_INITIAL (child_fn);
6982 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
6983 /* The gimplifier could record temporaries in the offloading block
6984 rather than in containing function's local_decls chain,
6985 which would mean cgraph missed finalizing them. Do it now. */
6986 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
6987 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
6988 varpool_node::finalize_decl (t);
6989 DECL_SAVED_TREE (child_fn) = NULL;
6990 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6991 gimple_set_body (child_fn, NULL);
6992 TREE_USED (block) = 1;
6994 /* Reset DECL_CONTEXT on function arguments. */
6995 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
6996 DECL_CONTEXT (t) = child_fn;
6998 /* Split ENTRY_BB at GIMPLE_*,
6999 so that it can be moved to the child function. */
7000 gsi = gsi_last_nondebug_bb (entry_bb);
7001 stmt = gsi_stmt (gsi);
7002 gcc_assert (stmt
7003 && gimple_code (stmt) == gimple_code (entry_stmt));
7004 e = split_block (entry_bb, stmt);
7005 gsi_remove (&gsi, true);
7006 entry_bb = e->dest;
7007 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7009 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7010 if (exit_bb)
7012 gsi = gsi_last_nondebug_bb (exit_bb);
7013 gcc_assert (!gsi_end_p (gsi)
7014 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7015 stmt = gimple_build_return (NULL);
7016 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7017 gsi_remove (&gsi, true);
7020 /* Make sure to generate early debug for the function before
7021 outlining anything. */
7022 if (! gimple_in_ssa_p (cfun))
7023 (*debug_hooks->early_global_decl) (cfun->decl);
7025 /* Move the offloading region into CHILD_CFUN. */
7027 block = gimple_block (entry_stmt);
7029 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7030 if (exit_bb)
7031 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7032 /* When the OMP expansion process cannot guarantee an up-to-date
7033 loop tree arrange for the child function to fixup loops. */
7034 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7035 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7037 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7038 num = vec_safe_length (child_cfun->local_decls);
7039 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7041 t = (*child_cfun->local_decls)[srcidx];
7042 if (DECL_CONTEXT (t) == cfun->decl)
7043 continue;
7044 if (srcidx != dstidx)
7045 (*child_cfun->local_decls)[dstidx] = t;
7046 dstidx++;
7048 if (dstidx != num)
7049 vec_safe_truncate (child_cfun->local_decls, dstidx);
7051 /* Inform the callgraph about the new function. */
7052 child_cfun->curr_properties = cfun->curr_properties;
7053 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7054 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7055 cgraph_node *node = cgraph_node::get_create (child_fn);
7056 node->parallelized_function = 1;
7057 cgraph_node::add_new_function (child_fn, true);
7059 /* Add the new function to the offload table. */
7060 if (ENABLE_OFFLOADING)
7061 vec_safe_push (offload_funcs, child_fn);
7063 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7064 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7066 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7067 fixed in a following pass. */
7068 push_cfun (child_cfun);
7069 if (need_asm)
7070 assign_assembler_name_if_needed (child_fn);
7071 cgraph_edge::rebuild_edges ();
7073 /* Some EH regions might become dead, see PR34608. If
7074 pass_cleanup_cfg isn't the first pass to happen with the
7075 new child, these dead EH edges might cause problems.
7076 Clean them up now. */
7077 if (flag_exceptions)
7079 basic_block bb;
7080 bool changed = false;
7082 FOR_EACH_BB_FN (bb, cfun)
7083 changed |= gimple_purge_dead_eh_edges (bb);
7084 if (changed)
7085 cleanup_tree_cfg ();
7087 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7088 verify_loop_structure ();
7089 pop_cfun ();
7091 if (dump_file && !gimple_in_ssa_p (cfun))
7093 omp_any_child_fn_dumped = true;
7094 dump_function_header (dump_file, child_fn, dump_flags);
7095 dump_function_to_file (child_fn, dump_file, dump_flags);
7099 /* Emit a library call to launch the offloading region, or do data
7100 transfers. */
7101 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7102 enum built_in_function start_ix;
7103 location_t clause_loc;
7104 unsigned int flags_i = 0;
7106 switch (gimple_omp_target_kind (entry_stmt))
7108 case GF_OMP_TARGET_KIND_REGION:
7109 start_ix = BUILT_IN_GOMP_TARGET;
7110 break;
7111 case GF_OMP_TARGET_KIND_DATA:
7112 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7113 break;
7114 case GF_OMP_TARGET_KIND_UPDATE:
7115 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7116 break;
7117 case GF_OMP_TARGET_KIND_ENTER_DATA:
7118 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7119 break;
7120 case GF_OMP_TARGET_KIND_EXIT_DATA:
7121 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7122 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7123 break;
7124 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7125 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7126 start_ix = BUILT_IN_GOACC_PARALLEL;
7127 break;
7128 case GF_OMP_TARGET_KIND_OACC_DATA:
7129 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7130 start_ix = BUILT_IN_GOACC_DATA_START;
7131 break;
7132 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7133 start_ix = BUILT_IN_GOACC_UPDATE;
7134 break;
7135 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7136 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7137 break;
7138 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7139 start_ix = BUILT_IN_GOACC_DECLARE;
7140 break;
7141 default:
7142 gcc_unreachable ();
7145 clauses = gimple_omp_target_clauses (entry_stmt);
7147 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7148 library choose) and there is no conditional. */
7149 cond = NULL_TREE;
7150 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7152 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7153 if (c)
7154 cond = OMP_CLAUSE_IF_EXPR (c);
7156 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7157 if (c)
7159 /* Even if we pass it to all library function calls, it is currently only
7160 defined/used for the OpenMP target ones. */
7161 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7162 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7163 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7164 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7166 device = OMP_CLAUSE_DEVICE_ID (c);
7167 clause_loc = OMP_CLAUSE_LOCATION (c);
7169 else
7170 clause_loc = gimple_location (entry_stmt);
7172 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7173 if (c)
7174 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7176 /* Ensure 'device' is of the correct type. */
7177 device = fold_convert_loc (clause_loc, integer_type_node, device);
7179 /* If we found the clause 'if (cond)', build
7180 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7181 if (cond)
7183 cond = gimple_boolify (cond);
7185 basic_block cond_bb, then_bb, else_bb;
7186 edge e;
7187 tree tmp_var;
7189 tmp_var = create_tmp_var (TREE_TYPE (device));
7190 if (offloaded)
7191 e = split_block_after_labels (new_bb);
7192 else
7194 gsi = gsi_last_nondebug_bb (new_bb);
7195 gsi_prev (&gsi);
7196 e = split_block (new_bb, gsi_stmt (gsi));
7198 cond_bb = e->src;
7199 new_bb = e->dest;
7200 remove_edge (e);
7202 then_bb = create_empty_bb (cond_bb);
7203 else_bb = create_empty_bb (then_bb);
7204 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7205 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7207 stmt = gimple_build_cond_empty (cond);
7208 gsi = gsi_last_bb (cond_bb);
7209 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7211 gsi = gsi_start_bb (then_bb);
7212 stmt = gimple_build_assign (tmp_var, device);
7213 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7215 gsi = gsi_start_bb (else_bb);
7216 stmt = gimple_build_assign (tmp_var,
7217 build_int_cst (integer_type_node,
7218 GOMP_DEVICE_HOST_FALLBACK));
7219 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7221 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7222 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7223 add_bb_to_loop (then_bb, cond_bb->loop_father);
7224 add_bb_to_loop (else_bb, cond_bb->loop_father);
7225 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7226 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7228 device = tmp_var;
7229 gsi = gsi_last_nondebug_bb (new_bb);
7231 else
7233 gsi = gsi_last_nondebug_bb (new_bb);
7234 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7235 true, GSI_SAME_STMT);
7238 t = gimple_omp_target_data_arg (entry_stmt);
7239 if (t == NULL)
7241 t1 = size_zero_node;
7242 t2 = build_zero_cst (ptr_type_node);
7243 t3 = t2;
7244 t4 = t2;
7246 else
7248 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7249 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7250 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7251 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7252 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7255 gimple *g;
7256 bool tagging = false;
7257 /* The maximum number used by any start_ix, without varargs. */
7258 auto_vec<tree, 11> args;
7259 args.quick_push (device);
7260 if (offloaded)
7261 args.quick_push (build_fold_addr_expr (child_fn));
7262 args.quick_push (t1);
7263 args.quick_push (t2);
7264 args.quick_push (t3);
7265 args.quick_push (t4);
7266 switch (start_ix)
7268 case BUILT_IN_GOACC_DATA_START:
7269 case BUILT_IN_GOACC_DECLARE:
7270 case BUILT_IN_GOMP_TARGET_DATA:
7271 break;
7272 case BUILT_IN_GOMP_TARGET:
7273 case BUILT_IN_GOMP_TARGET_UPDATE:
7274 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7275 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7276 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7277 if (c)
7278 depend = OMP_CLAUSE_DECL (c);
7279 else
7280 depend = build_int_cst (ptr_type_node, 0);
7281 args.quick_push (depend);
7282 if (start_ix == BUILT_IN_GOMP_TARGET)
7283 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7284 break;
7285 case BUILT_IN_GOACC_PARALLEL:
7286 oacc_set_fn_attrib (child_fn, clauses, &args);
7287 tagging = true;
7288 /* FALLTHRU */
7289 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7290 case BUILT_IN_GOACC_UPDATE:
7292 tree t_async = NULL_TREE;
7294 /* If present, use the value specified by the respective
7295 clause, making sure that is of the correct type. */
7296 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7297 if (c)
7298 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7299 integer_type_node,
7300 OMP_CLAUSE_ASYNC_EXPR (c));
7301 else if (!tagging)
7302 /* Default values for t_async. */
7303 t_async = fold_convert_loc (gimple_location (entry_stmt),
7304 integer_type_node,
7305 build_int_cst (integer_type_node,
7306 GOMP_ASYNC_SYNC));
7307 if (tagging && t_async)
7309 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7311 if (TREE_CODE (t_async) == INTEGER_CST)
7313 /* See if we can pack the async arg in to the tag's
7314 operand. */
7315 i_async = TREE_INT_CST_LOW (t_async);
7316 if (i_async < GOMP_LAUNCH_OP_MAX)
7317 t_async = NULL_TREE;
7318 else
7319 i_async = GOMP_LAUNCH_OP_MAX;
7321 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7322 i_async));
7324 if (t_async)
7325 args.safe_push (t_async);
7327 /* Save the argument index, and ... */
7328 unsigned t_wait_idx = args.length ();
7329 unsigned num_waits = 0;
7330 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7331 if (!tagging || c)
7332 /* ... push a placeholder. */
7333 args.safe_push (integer_zero_node);
7335 for (; c; c = OMP_CLAUSE_CHAIN (c))
7336 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7338 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7339 integer_type_node,
7340 OMP_CLAUSE_WAIT_EXPR (c)));
7341 num_waits++;
7344 if (!tagging || num_waits)
7346 tree len;
7348 /* Now that we know the number, update the placeholder. */
7349 if (tagging)
7350 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7351 else
7352 len = build_int_cst (integer_type_node, num_waits);
7353 len = fold_convert_loc (gimple_location (entry_stmt),
7354 unsigned_type_node, len);
7355 args[t_wait_idx] = len;
7358 break;
7359 default:
7360 gcc_unreachable ();
7362 if (tagging)
7363 /* Push terminal marker - zero. */
7364 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7366 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7367 gimple_set_location (g, gimple_location (entry_stmt));
7368 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7369 if (!offloaded)
7371 g = gsi_stmt (gsi);
7372 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7373 gsi_remove (&gsi, true);
7375 if (data_region && region->exit)
7377 gsi = gsi_last_nondebug_bb (region->exit);
7378 g = gsi_stmt (gsi);
7379 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7380 gsi_remove (&gsi, true);
7384 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7385 iteration variable derived from the thread number. INTRA_GROUP means this
7386 is an expansion of a loop iterating over work-items within a separate
7387 iteration over groups. */
7389 static void
7390 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7392 gimple_stmt_iterator gsi;
7393 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7394 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7395 == GF_OMP_FOR_KIND_GRID_LOOP);
7396 size_t collapse = gimple_omp_for_collapse (for_stmt);
7397 struct omp_for_data_loop *loops
7398 = XALLOCAVEC (struct omp_for_data_loop,
7399 gimple_omp_for_collapse (for_stmt));
7400 struct omp_for_data fd;
7402 remove_edge (BRANCH_EDGE (kfor->entry));
7403 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7405 gcc_assert (kfor->cont);
7406 omp_extract_for_data (for_stmt, &fd, loops);
7408 gsi = gsi_start_bb (body_bb);
7410 for (size_t dim = 0; dim < collapse; dim++)
7412 tree type, itype;
7413 itype = type = TREE_TYPE (fd.loops[dim].v);
7414 if (POINTER_TYPE_P (type))
7415 itype = signed_type_for (type);
7417 tree n1 = fd.loops[dim].n1;
7418 tree step = fd.loops[dim].step;
7419 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7420 true, NULL_TREE, true, GSI_SAME_STMT);
7421 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7422 true, NULL_TREE, true, GSI_SAME_STMT);
7423 tree threadid;
7424 if (gimple_omp_for_grid_group_iter (for_stmt))
7426 gcc_checking_assert (!intra_group);
7427 threadid = build_call_expr (builtin_decl_explicit
7428 (BUILT_IN_HSA_WORKGROUPID), 1,
7429 build_int_cstu (unsigned_type_node, dim));
7431 else if (intra_group)
7432 threadid = build_call_expr (builtin_decl_explicit
7433 (BUILT_IN_HSA_WORKITEMID), 1,
7434 build_int_cstu (unsigned_type_node, dim));
7435 else
7436 threadid = build_call_expr (builtin_decl_explicit
7437 (BUILT_IN_HSA_WORKITEMABSID), 1,
7438 build_int_cstu (unsigned_type_node, dim));
7439 threadid = fold_convert (itype, threadid);
7440 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7441 true, GSI_SAME_STMT);
7443 tree startvar = fd.loops[dim].v;
7444 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7445 if (POINTER_TYPE_P (type))
7446 t = fold_build_pointer_plus (n1, t);
7447 else
7448 t = fold_build2 (PLUS_EXPR, type, t, n1);
7449 t = fold_convert (type, t);
7450 t = force_gimple_operand_gsi (&gsi, t,
7451 DECL_P (startvar)
7452 && TREE_ADDRESSABLE (startvar),
7453 NULL_TREE, true, GSI_SAME_STMT);
7454 gassign *assign_stmt = gimple_build_assign (startvar, t);
7455 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7457 /* Remove the omp for statement. */
7458 gsi = gsi_last_nondebug_bb (kfor->entry);
7459 gsi_remove (&gsi, true);
7461 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7462 gsi = gsi_last_nondebug_bb (kfor->cont);
7463 gcc_assert (!gsi_end_p (gsi)
7464 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7465 gsi_remove (&gsi, true);
7467 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7468 gsi = gsi_last_nondebug_bb (kfor->exit);
7469 gcc_assert (!gsi_end_p (gsi)
7470 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7471 if (intra_group)
7472 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7473 gsi_remove (&gsi, true);
7475 /* Fixup the much simpler CFG. */
7476 remove_edge (find_edge (kfor->cont, body_bb));
7478 if (kfor->cont != body_bb)
7479 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7480 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7483 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7484 argument_decls. */
7486 struct grid_arg_decl_map
7488 tree old_arg;
7489 tree new_arg;
7492 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7493 pertaining to kernel function. */
7495 static tree
7496 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7498 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7499 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7500 tree t = *tp;
7502 if (t == adm->old_arg)
7503 *tp = adm->new_arg;
7504 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7505 return NULL_TREE;
7508 /* If TARGET region contains a kernel body for loop, remove its region from the
7509 TARGET and expand it in HSA gridified kernel fashion. */
7511 static void
7512 grid_expand_target_grid_body (struct omp_region *target)
7514 if (!hsa_gen_requested_p ())
7515 return;
7517 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7518 struct omp_region **pp;
7520 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7521 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7522 break;
7524 struct omp_region *gpukernel = *pp;
7526 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7527 if (!gpukernel)
7529 /* HSA cannot handle OACC stuff. */
7530 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7531 return;
7532 gcc_checking_assert (orig_child_fndecl);
7533 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7534 OMP_CLAUSE__GRIDDIM_));
7535 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7537 hsa_register_kernel (n);
7538 return;
7541 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7542 OMP_CLAUSE__GRIDDIM_));
7543 tree inside_block
7544 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7545 *pp = gpukernel->next;
7546 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7547 if ((*pp)->type == GIMPLE_OMP_FOR)
7548 break;
7550 struct omp_region *kfor = *pp;
7551 gcc_assert (kfor);
7552 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7553 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7554 *pp = kfor->next;
7555 if (kfor->inner)
7557 if (gimple_omp_for_grid_group_iter (for_stmt))
7559 struct omp_region **next_pp;
7560 for (pp = &kfor->inner; *pp; pp = next_pp)
7562 next_pp = &(*pp)->next;
7563 if ((*pp)->type != GIMPLE_OMP_FOR)
7564 continue;
7565 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7566 gcc_assert (gimple_omp_for_kind (inner)
7567 == GF_OMP_FOR_KIND_GRID_LOOP);
7568 grid_expand_omp_for_loop (*pp, true);
7569 *pp = (*pp)->next;
7570 next_pp = pp;
7573 expand_omp (kfor->inner);
7575 if (gpukernel->inner)
7576 expand_omp (gpukernel->inner);
7578 tree kern_fndecl = copy_node (orig_child_fndecl);
7579 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7580 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7581 tree tgtblock = gimple_block (tgt_stmt);
7582 tree fniniblock = make_node (BLOCK);
7583 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7584 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7585 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7586 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7587 DECL_INITIAL (kern_fndecl) = fniniblock;
7588 push_struct_function (kern_fndecl);
7589 cfun->function_end_locus = gimple_location (tgt_stmt);
7590 init_tree_ssa (cfun);
7591 pop_cfun ();
7593 /* Make sure to generate early debug for the function before
7594 outlining anything. */
7595 if (! gimple_in_ssa_p (cfun))
7596 (*debug_hooks->early_global_decl) (cfun->decl);
7598 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7599 gcc_assert (!DECL_CHAIN (old_parm_decl));
7600 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7601 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7602 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7603 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7604 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7605 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7606 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7607 kern_cfun->curr_properties = cfun->curr_properties;
7609 grid_expand_omp_for_loop (kfor, false);
7611 /* Remove the omp for statement. */
7612 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7613 gsi_remove (&gsi, true);
7614 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7615 return. */
7616 gsi = gsi_last_nondebug_bb (gpukernel->exit);
7617 gcc_assert (!gsi_end_p (gsi)
7618 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7619 gimple *ret_stmt = gimple_build_return (NULL);
7620 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7621 gsi_remove (&gsi, true);
7623 /* Statements in the first BB in the target construct have been produced by
7624 target lowering and must be copied inside the GPUKERNEL, with the two
7625 exceptions of the first OMP statement and the OMP_DATA assignment
7626 statement. */
7627 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7628 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7629 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7630 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7631 !gsi_end_p (tsi); gsi_next (&tsi))
7633 gimple *stmt = gsi_stmt (tsi);
7634 if (is_gimple_omp (stmt))
7635 break;
7636 if (sender
7637 && is_gimple_assign (stmt)
7638 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7639 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7640 continue;
7641 gimple *copy = gimple_copy (stmt);
7642 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7643 gimple_set_block (copy, fniniblock);
7646 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7647 gpukernel->exit, inside_block);
7649 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7650 kcn->mark_force_output ();
7651 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7653 hsa_register_kernel (kcn, orig_child);
7655 cgraph_node::add_new_function (kern_fndecl, true);
7656 push_cfun (kern_cfun);
7657 cgraph_edge::rebuild_edges ();
7659 /* Re-map any mention of the PARM_DECL of the original function to the
7660 PARM_DECL of the new one.
7662 TODO: It would be great if lowering produced references into the GPU
7663 kernel decl straight away and we did not have to do this. */
7664 struct grid_arg_decl_map adm;
7665 adm.old_arg = old_parm_decl;
7666 adm.new_arg = new_parm_decl;
7667 basic_block bb;
7668 FOR_EACH_BB_FN (bb, kern_cfun)
7670 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7672 gimple *stmt = gsi_stmt (gsi);
7673 struct walk_stmt_info wi;
7674 memset (&wi, 0, sizeof (wi));
7675 wi.info = &adm;
7676 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7679 pop_cfun ();
7681 return;
7684 /* Expand the parallel region tree rooted at REGION. Expansion
7685 proceeds in depth-first order. Innermost regions are expanded
7686 first. This way, parallel regions that require a new function to
7687 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7688 internal dependencies in their body. */
7690 static void
7691 expand_omp (struct omp_region *region)
7693 omp_any_child_fn_dumped = false;
7694 while (region)
7696 location_t saved_location;
7697 gimple *inner_stmt = NULL;
7699 /* First, determine whether this is a combined parallel+workshare
7700 region. */
7701 if (region->type == GIMPLE_OMP_PARALLEL)
7702 determine_parallel_type (region);
7703 else if (region->type == GIMPLE_OMP_TARGET)
7704 grid_expand_target_grid_body (region);
7706 if (region->type == GIMPLE_OMP_FOR
7707 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7708 inner_stmt = last_stmt (region->inner->entry);
7710 if (region->inner)
7711 expand_omp (region->inner);
7713 saved_location = input_location;
7714 if (gimple_has_location (last_stmt (region->entry)))
7715 input_location = gimple_location (last_stmt (region->entry));
7717 switch (region->type)
7719 case GIMPLE_OMP_PARALLEL:
7720 case GIMPLE_OMP_TASK:
7721 expand_omp_taskreg (region);
7722 break;
7724 case GIMPLE_OMP_FOR:
7725 expand_omp_for (region, inner_stmt);
7726 break;
7728 case GIMPLE_OMP_SECTIONS:
7729 expand_omp_sections (region);
7730 break;
7732 case GIMPLE_OMP_SECTION:
7733 /* Individual omp sections are handled together with their
7734 parent GIMPLE_OMP_SECTIONS region. */
7735 break;
7737 case GIMPLE_OMP_SINGLE:
7738 expand_omp_single (region);
7739 break;
7741 case GIMPLE_OMP_ORDERED:
7743 gomp_ordered *ord_stmt
7744 = as_a <gomp_ordered *> (last_stmt (region->entry));
7745 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7746 OMP_CLAUSE_DEPEND))
7748 /* We'll expand these when expanding corresponding
7749 worksharing region with ordered(n) clause. */
7750 gcc_assert (region->outer
7751 && region->outer->type == GIMPLE_OMP_FOR);
7752 region->ord_stmt = ord_stmt;
7753 break;
7756 /* FALLTHRU */
7757 case GIMPLE_OMP_MASTER:
7758 case GIMPLE_OMP_TASKGROUP:
7759 case GIMPLE_OMP_CRITICAL:
7760 case GIMPLE_OMP_TEAMS:
7761 expand_omp_synch (region);
7762 break;
7764 case GIMPLE_OMP_ATOMIC_LOAD:
7765 expand_omp_atomic (region);
7766 break;
7768 case GIMPLE_OMP_TARGET:
7769 expand_omp_target (region);
7770 break;
7772 default:
7773 gcc_unreachable ();
7776 input_location = saved_location;
7777 region = region->next;
7779 if (omp_any_child_fn_dumped)
7781 if (dump_file)
7782 dump_function_header (dump_file, current_function_decl, dump_flags);
7783 omp_any_child_fn_dumped = false;
7787 /* Helper for build_omp_regions. Scan the dominator tree starting at
7788 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7789 true, the function ends once a single tree is built (otherwise, whole
7790 forest of OMP constructs may be built). */
7792 static void
7793 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7794 bool single_tree)
7796 gimple_stmt_iterator gsi;
7797 gimple *stmt;
7798 basic_block son;
7800 gsi = gsi_last_nondebug_bb (bb);
7801 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7803 struct omp_region *region;
7804 enum gimple_code code;
7806 stmt = gsi_stmt (gsi);
7807 code = gimple_code (stmt);
7808 if (code == GIMPLE_OMP_RETURN)
7810 /* STMT is the return point out of region PARENT. Mark it
7811 as the exit point and make PARENT the immediately
7812 enclosing region. */
7813 gcc_assert (parent);
7814 region = parent;
7815 region->exit = bb;
7816 parent = parent->outer;
7818 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7820 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7821 GIMPLE_OMP_RETURN, but matches with
7822 GIMPLE_OMP_ATOMIC_LOAD. */
7823 gcc_assert (parent);
7824 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7825 region = parent;
7826 region->exit = bb;
7827 parent = parent->outer;
7829 else if (code == GIMPLE_OMP_CONTINUE)
7831 gcc_assert (parent);
7832 parent->cont = bb;
7834 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7836 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7837 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7839 else
7841 region = new_omp_region (bb, code, parent);
7842 /* Otherwise... */
7843 if (code == GIMPLE_OMP_TARGET)
7845 switch (gimple_omp_target_kind (stmt))
7847 case GF_OMP_TARGET_KIND_REGION:
7848 case GF_OMP_TARGET_KIND_DATA:
7849 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7850 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7851 case GF_OMP_TARGET_KIND_OACC_DATA:
7852 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7853 break;
7854 case GF_OMP_TARGET_KIND_UPDATE:
7855 case GF_OMP_TARGET_KIND_ENTER_DATA:
7856 case GF_OMP_TARGET_KIND_EXIT_DATA:
7857 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7858 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7859 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7860 /* ..., other than for those stand-alone directives... */
7861 region = NULL;
7862 break;
7863 default:
7864 gcc_unreachable ();
7867 else if (code == GIMPLE_OMP_ORDERED
7868 && omp_find_clause (gimple_omp_ordered_clauses
7869 (as_a <gomp_ordered *> (stmt)),
7870 OMP_CLAUSE_DEPEND))
7871 /* #pragma omp ordered depend is also just a stand-alone
7872 directive. */
7873 region = NULL;
7874 /* ..., this directive becomes the parent for a new region. */
7875 if (region)
7876 parent = region;
7880 if (single_tree && !parent)
7881 return;
7883 for (son = first_dom_son (CDI_DOMINATORS, bb);
7884 son;
7885 son = next_dom_son (CDI_DOMINATORS, son))
7886 build_omp_regions_1 (son, parent, single_tree);
7889 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7890 root_omp_region. */
7892 static void
7893 build_omp_regions_root (basic_block root)
7895 gcc_assert (root_omp_region == NULL);
7896 build_omp_regions_1 (root, NULL, true);
7897 gcc_assert (root_omp_region != NULL);
7900 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7902 void
7903 omp_expand_local (basic_block head)
7905 build_omp_regions_root (head);
7906 if (dump_file && (dump_flags & TDF_DETAILS))
7908 fprintf (dump_file, "\nOMP region tree\n\n");
7909 dump_omp_region (dump_file, root_omp_region, 0);
7910 fprintf (dump_file, "\n");
7913 remove_exit_barriers (root_omp_region);
7914 expand_omp (root_omp_region);
7916 omp_free_regions ();
7919 /* Scan the CFG and build a tree of OMP regions. Return the root of
7920 the OMP region tree. */
7922 static void
7923 build_omp_regions (void)
7925 gcc_assert (root_omp_region == NULL);
7926 calculate_dominance_info (CDI_DOMINATORS);
7927 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7930 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7932 static unsigned int
7933 execute_expand_omp (void)
7935 build_omp_regions ();
7937 if (!root_omp_region)
7938 return 0;
7940 if (dump_file)
7942 fprintf (dump_file, "\nOMP region tree\n\n");
7943 dump_omp_region (dump_file, root_omp_region, 0);
7944 fprintf (dump_file, "\n");
7947 remove_exit_barriers (root_omp_region);
7949 expand_omp (root_omp_region);
7951 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7952 verify_loop_structure ();
7953 cleanup_tree_cfg ();
7955 omp_free_regions ();
7957 return 0;
7960 /* OMP expansion -- the default pass, run before creation of SSA form. */
7962 namespace {
7964 const pass_data pass_data_expand_omp =
7966 GIMPLE_PASS, /* type */
7967 "ompexp", /* name */
7968 OPTGROUP_OMP, /* optinfo_flags */
7969 TV_NONE, /* tv_id */
7970 PROP_gimple_any, /* properties_required */
7971 PROP_gimple_eomp, /* properties_provided */
7972 0, /* properties_destroyed */
7973 0, /* todo_flags_start */
7974 0, /* todo_flags_finish */
7977 class pass_expand_omp : public gimple_opt_pass
7979 public:
7980 pass_expand_omp (gcc::context *ctxt)
7981 : gimple_opt_pass (pass_data_expand_omp, ctxt)
7984 /* opt_pass methods: */
7985 virtual unsigned int execute (function *)
7987 bool gate = ((flag_openacc != 0 || flag_openmp != 0
7988 || flag_openmp_simd != 0)
7989 && !seen_error ());
7991 /* This pass always runs, to provide PROP_gimple_eomp.
7992 But often, there is nothing to do. */
7993 if (!gate)
7994 return 0;
7996 return execute_expand_omp ();
7999 }; // class pass_expand_omp
8001 } // anon namespace
8003 gimple_opt_pass *
8004 make_pass_expand_omp (gcc::context *ctxt)
8006 return new pass_expand_omp (ctxt);
8009 namespace {
8011 const pass_data pass_data_expand_omp_ssa =
8013 GIMPLE_PASS, /* type */
8014 "ompexpssa", /* name */
8015 OPTGROUP_OMP, /* optinfo_flags */
8016 TV_NONE, /* tv_id */
8017 PROP_cfg | PROP_ssa, /* properties_required */
8018 PROP_gimple_eomp, /* properties_provided */
8019 0, /* properties_destroyed */
8020 0, /* todo_flags_start */
8021 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8024 class pass_expand_omp_ssa : public gimple_opt_pass
8026 public:
8027 pass_expand_omp_ssa (gcc::context *ctxt)
8028 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8031 /* opt_pass methods: */
8032 virtual bool gate (function *fun)
8034 return !(fun->curr_properties & PROP_gimple_eomp);
8036 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8037 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8039 }; // class pass_expand_omp_ssa
8041 } // anon namespace
8043 gimple_opt_pass *
8044 make_pass_expand_omp_ssa (gcc::context *ctxt)
8046 return new pass_expand_omp_ssa (ctxt);
8049 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8050 GIMPLE_* codes. */
8052 bool
8053 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8054 int *region_idx)
8056 gimple *last = last_stmt (bb);
8057 enum gimple_code code = gimple_code (last);
8058 struct omp_region *cur_region = *region;
8059 bool fallthru = false;
8061 switch (code)
8063 case GIMPLE_OMP_PARALLEL:
8064 case GIMPLE_OMP_TASK:
8065 case GIMPLE_OMP_FOR:
8066 case GIMPLE_OMP_SINGLE:
8067 case GIMPLE_OMP_TEAMS:
8068 case GIMPLE_OMP_MASTER:
8069 case GIMPLE_OMP_TASKGROUP:
8070 case GIMPLE_OMP_CRITICAL:
8071 case GIMPLE_OMP_SECTION:
8072 case GIMPLE_OMP_GRID_BODY:
8073 cur_region = new_omp_region (bb, code, cur_region);
8074 fallthru = true;
8075 break;
8077 case GIMPLE_OMP_ORDERED:
8078 cur_region = new_omp_region (bb, code, cur_region);
8079 fallthru = true;
8080 if (omp_find_clause (gimple_omp_ordered_clauses
8081 (as_a <gomp_ordered *> (last)),
8082 OMP_CLAUSE_DEPEND))
8083 cur_region = cur_region->outer;
8084 break;
8086 case GIMPLE_OMP_TARGET:
8087 cur_region = new_omp_region (bb, code, cur_region);
8088 fallthru = true;
8089 switch (gimple_omp_target_kind (last))
8091 case GF_OMP_TARGET_KIND_REGION:
8092 case GF_OMP_TARGET_KIND_DATA:
8093 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8094 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8095 case GF_OMP_TARGET_KIND_OACC_DATA:
8096 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8097 break;
8098 case GF_OMP_TARGET_KIND_UPDATE:
8099 case GF_OMP_TARGET_KIND_ENTER_DATA:
8100 case GF_OMP_TARGET_KIND_EXIT_DATA:
8101 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8102 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8103 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8104 cur_region = cur_region->outer;
8105 break;
8106 default:
8107 gcc_unreachable ();
8109 break;
8111 case GIMPLE_OMP_SECTIONS:
8112 cur_region = new_omp_region (bb, code, cur_region);
8113 fallthru = true;
8114 break;
8116 case GIMPLE_OMP_SECTIONS_SWITCH:
8117 fallthru = false;
8118 break;
8120 case GIMPLE_OMP_ATOMIC_LOAD:
8121 case GIMPLE_OMP_ATOMIC_STORE:
8122 fallthru = true;
8123 break;
8125 case GIMPLE_OMP_RETURN:
8126 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8127 somewhere other than the next block. This will be
8128 created later. */
8129 cur_region->exit = bb;
8130 if (cur_region->type == GIMPLE_OMP_TASK)
8131 /* Add an edge corresponding to not scheduling the task
8132 immediately. */
8133 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8134 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8135 cur_region = cur_region->outer;
8136 break;
8138 case GIMPLE_OMP_CONTINUE:
8139 cur_region->cont = bb;
8140 switch (cur_region->type)
8142 case GIMPLE_OMP_FOR:
8143 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8144 succs edges as abnormal to prevent splitting
8145 them. */
8146 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8147 /* Make the loopback edge. */
8148 make_edge (bb, single_succ (cur_region->entry),
8149 EDGE_ABNORMAL);
8151 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8152 corresponds to the case that the body of the loop
8153 is not executed at all. */
8154 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8155 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8156 fallthru = false;
8157 break;
8159 case GIMPLE_OMP_SECTIONS:
8160 /* Wire up the edges into and out of the nested sections. */
8162 basic_block switch_bb = single_succ (cur_region->entry);
8164 struct omp_region *i;
8165 for (i = cur_region->inner; i ; i = i->next)
8167 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8168 make_edge (switch_bb, i->entry, 0);
8169 make_edge (i->exit, bb, EDGE_FALLTHRU);
8172 /* Make the loopback edge to the block with
8173 GIMPLE_OMP_SECTIONS_SWITCH. */
8174 make_edge (bb, switch_bb, 0);
8176 /* Make the edge from the switch to exit. */
8177 make_edge (switch_bb, bb->next_bb, 0);
8178 fallthru = false;
8180 break;
8182 case GIMPLE_OMP_TASK:
8183 fallthru = true;
8184 break;
8186 default:
8187 gcc_unreachable ();
8189 break;
8191 default:
8192 gcc_unreachable ();
8195 if (*region != cur_region)
8197 *region = cur_region;
8198 if (cur_region)
8199 *region_idx = cur_region->entry->index;
8200 else
8201 *region_idx = 0;
8204 return fallthru;
8207 #include "gt-omp-expand.h"