* i386.c (has_dispatch): Disable for Ryzen.
[official-gcc.git] / gcc / omp-expand.c
blob0f45563c57c8d517704ac584df524e34627f27c8
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "debug.h"
61 #include "stringpool.h"
62 #include "attribs.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
106 a depend clause. */
107 gomp_ordered *ord_stmt;
110 static struct omp_region *root_omp_region;
111 static bool omp_any_child_fn_dumped;
113 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
114 bool = false);
115 static gphi *find_phi_with_arg_on_edge (tree, edge);
116 static void expand_omp (struct omp_region *region);
118 /* Return true if REGION is a combined parallel+workshare region. */
120 static inline bool
121 is_combined_parallel (struct omp_region *region)
123 return region->is_combined_parallel;
126 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
127 is the immediate dominator of PAR_ENTRY_BB, return true if there
128 are no data dependencies that would prevent expanding the parallel
129 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131 When expanding a combined parallel+workshare region, the call to
132 the child function may need additional arguments in the case of
133 GIMPLE_OMP_FOR regions. In some cases, these arguments are
134 computed out of variables passed in from the parent to the child
135 via 'struct .omp_data_s'. For instance:
137 #pragma omp parallel for schedule (guided, i * 4)
138 for (j ...)
140 Is lowered into:
142 # BLOCK 2 (PAR_ENTRY_BB)
143 .omp_data_o.i = i;
144 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146 # BLOCK 3 (WS_ENTRY_BB)
147 .omp_data_i = &.omp_data_o;
148 D.1667 = .omp_data_i->i;
149 D.1598 = D.1667 * 4;
150 #pragma omp for schedule (guided, D.1598)
152 When we outline the parallel region, the call to the child function
153 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
154 that value is computed *after* the call site. So, in principle we
155 cannot do the transformation.
157 To see whether the code in WS_ENTRY_BB blocks the combined
158 parallel+workshare call, we collect all the variables used in the
159 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
160 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 call.
163 FIXME. If we had the SSA form built at this point, we could merely
164 hoist the code in block 3 into block 2 and be done with it. But at
165 this point we don't have dataflow information and though we could
166 hack something up here, it is really not worth the aggravation. */
168 static bool
169 workshare_safe_to_combine_p (basic_block ws_entry_bb)
171 struct omp_for_data fd;
172 gimple *ws_stmt = last_stmt (ws_entry_bb);
174 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
175 return true;
177 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
197 return true;
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
206 if (!simd_schedule)
207 return chunk_size;
209 int vf = omp_max_vf ();
210 if (vf == 1)
211 return chunk_size;
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
233 struct omp_for_data fd;
234 tree n1, n2;
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
240 if (gimple_omp_for_combined_into_p (for_stmt))
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
264 if (fd.chunk_size)
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
271 return ws_args;
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
285 gcc_unreachable ();
288 /* Discover whether REGION is a combined parallel+workshare region. */
290 static void
291 determine_parallel_type (struct omp_region *region)
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
314 if (single_succ (par_entry_bb) == ws_entry_bb
315 && single_succ (ws_exit_bb) == par_exit_bb
316 && workshare_safe_to_combine_p (ws_entry_bb)
317 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
318 || (last_and_only_stmt (ws_entry_bb)
319 && last_and_only_stmt (par_exit_bb))))
321 gimple *par_stmt = last_stmt (par_entry_bb);
322 gimple *ws_stmt = last_stmt (ws_entry_bb);
324 if (region->inner->type == GIMPLE_OMP_FOR)
326 /* If this is a combined parallel loop, we need to determine
327 whether or not to use the combined library calls. There
328 are two cases where we do not apply the transformation:
329 static loops and any kind of ordered loop. In the first
330 case, we already open code the loop so there is no need
331 to do anything else. In the latter case, the combined
332 parallel loop call would still need extra synchronization
333 to implement ordered semantics, so there would not be any
334 gain in using the combined call. */
335 tree clauses = gimple_omp_for_clauses (ws_stmt);
336 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
337 if (c == NULL
338 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
339 == OMP_CLAUSE_SCHEDULE_STATIC)
340 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
342 region->is_combined_parallel = false;
343 region->inner->is_combined_parallel = false;
344 return;
348 region->is_combined_parallel = true;
349 region->inner->is_combined_parallel = true;
350 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
354 /* Debugging dumps for parallel regions. */
355 void dump_omp_region (FILE *, struct omp_region *, int);
356 void debug_omp_region (struct omp_region *);
357 void debug_all_omp_regions (void);
359 /* Dump the parallel region tree rooted at REGION. */
361 void
362 dump_omp_region (FILE *file, struct omp_region *region, int indent)
364 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
365 gimple_code_name[region->type]);
367 if (region->inner)
368 dump_omp_region (file, region->inner, indent + 4);
370 if (region->cont)
372 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
373 region->cont->index);
376 if (region->exit)
377 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
378 region->exit->index);
379 else
380 fprintf (file, "%*s[no exit marker]\n", indent, "");
382 if (region->next)
383 dump_omp_region (file, region->next, indent);
386 DEBUG_FUNCTION void
387 debug_omp_region (struct omp_region *region)
389 dump_omp_region (stderr, region, 0);
392 DEBUG_FUNCTION void
393 debug_all_omp_regions (void)
395 dump_omp_region (stderr, root_omp_region, 0);
398 /* Create a new parallel region starting at STMT inside region PARENT. */
400 static struct omp_region *
401 new_omp_region (basic_block bb, enum gimple_code type,
402 struct omp_region *parent)
404 struct omp_region *region = XCNEW (struct omp_region);
406 region->outer = parent;
407 region->entry = bb;
408 region->type = type;
410 if (parent)
412 /* This is a nested region. Add it to the list of inner
413 regions in PARENT. */
414 region->next = parent->inner;
415 parent->inner = region;
417 else
419 /* This is a toplevel region. Add it to the list of toplevel
420 regions in ROOT_OMP_REGION. */
421 region->next = root_omp_region;
422 root_omp_region = region;
425 return region;
428 /* Release the memory associated with the region tree rooted at REGION. */
430 static void
431 free_omp_region_1 (struct omp_region *region)
433 struct omp_region *i, *n;
435 for (i = region->inner; i ; i = n)
437 n = i->next;
438 free_omp_region_1 (i);
441 free (region);
444 /* Release the memory for the entire omp region tree. */
446 void
447 omp_free_regions (void)
449 struct omp_region *r, *n;
450 for (r = root_omp_region; r ; r = n)
452 n = r->next;
453 free_omp_region_1 (r);
455 root_omp_region = NULL;
458 /* A convenience function to build an empty GIMPLE_COND with just the
459 condition. */
461 static gcond *
462 gimple_build_cond_empty (tree cond)
464 enum tree_code pred_code;
465 tree lhs, rhs;
467 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
468 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
471 /* Return true if a parallel REGION is within a declare target function or
472 within a target region and is not a part of a gridified target. */
474 static bool
475 parallel_needs_hsa_kernel_p (struct omp_region *region)
477 bool indirect = false;
478 for (region = region->outer; region; region = region->outer)
480 if (region->type == GIMPLE_OMP_PARALLEL)
481 indirect = true;
482 else if (region->type == GIMPLE_OMP_TARGET)
484 gomp_target *tgt_stmt
485 = as_a <gomp_target *> (last_stmt (region->entry));
487 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
488 OMP_CLAUSE__GRIDDIM_))
489 return indirect;
490 else
491 return true;
495 if (lookup_attribute ("omp declare target",
496 DECL_ATTRIBUTES (current_function_decl)))
497 return true;
499 return false;
502 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
503 Add CHILD_FNDECL to decl chain of the supercontext of the block
504 ENTRY_BLOCK - this is the block which originally contained the
505 code from which CHILD_FNDECL was created.
507 Together, these actions ensure that the debug info for the outlined
508 function will be emitted with the correct lexical scope. */
510 static void
511 adjust_context_and_scope (tree entry_block, tree child_fndecl)
513 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
515 tree b = BLOCK_SUPERCONTEXT (entry_block);
517 if (TREE_CODE (b) == BLOCK)
519 tree parent_fndecl;
521 /* Follow supercontext chain until the parent fndecl
522 is found. */
523 for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
524 TREE_CODE (parent_fndecl) == BLOCK;
525 parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
528 gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
530 DECL_CONTEXT (child_fndecl) = parent_fndecl;
532 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
533 BLOCK_VARS (b) = child_fndecl;
538 /* Build the function calls to GOMP_parallel_start etc to actually
539 generate the parallel operation. REGION is the parallel region
540 being expanded. BB is the block where to insert the code. WS_ARGS
541 will be set if this is a call to a combined parallel+workshare
542 construct, it contains the list of additional arguments needed by
543 the workshare construct. */
545 static void
546 expand_parallel_call (struct omp_region *region, basic_block bb,
547 gomp_parallel *entry_stmt,
548 vec<tree, va_gc> *ws_args)
550 tree t, t1, t2, val, cond, c, clauses, flags;
551 gimple_stmt_iterator gsi;
552 gimple *stmt;
553 enum built_in_function start_ix;
554 int start_ix2;
555 location_t clause_loc;
556 vec<tree, va_gc> *args;
558 clauses = gimple_omp_parallel_clauses (entry_stmt);
560 /* Determine what flavor of GOMP_parallel we will be
561 emitting. */
562 start_ix = BUILT_IN_GOMP_PARALLEL;
563 if (is_combined_parallel (region))
565 switch (region->inner->type)
567 case GIMPLE_OMP_FOR:
568 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
569 switch (region->inner->sched_kind)
571 case OMP_CLAUSE_SCHEDULE_RUNTIME:
572 start_ix2 = 3;
573 break;
574 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
575 case OMP_CLAUSE_SCHEDULE_GUIDED:
576 if (region->inner->sched_modifiers
577 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
579 start_ix2 = 3 + region->inner->sched_kind;
580 break;
582 /* FALLTHRU */
583 default:
584 start_ix2 = region->inner->sched_kind;
585 break;
587 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
588 start_ix = (enum built_in_function) start_ix2;
589 break;
590 case GIMPLE_OMP_SECTIONS:
591 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
592 break;
593 default:
594 gcc_unreachable ();
598 /* By default, the value of NUM_THREADS is zero (selected at run time)
599 and there is no conditional. */
600 cond = NULL_TREE;
601 val = build_int_cst (unsigned_type_node, 0);
602 flags = build_int_cst (unsigned_type_node, 0);
604 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
605 if (c)
606 cond = OMP_CLAUSE_IF_EXPR (c);
608 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
609 if (c)
611 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
612 clause_loc = OMP_CLAUSE_LOCATION (c);
614 else
615 clause_loc = gimple_location (entry_stmt);
617 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
618 if (c)
619 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
621 /* Ensure 'val' is of the correct type. */
622 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
624 /* If we found the clause 'if (cond)', build either
625 (cond != 0) or (cond ? val : 1u). */
626 if (cond)
628 cond = gimple_boolify (cond);
630 if (integer_zerop (val))
631 val = fold_build2_loc (clause_loc,
632 EQ_EXPR, unsigned_type_node, cond,
633 build_int_cst (TREE_TYPE (cond), 0));
634 else
636 basic_block cond_bb, then_bb, else_bb;
637 edge e, e_then, e_else;
638 tree tmp_then, tmp_else, tmp_join, tmp_var;
640 tmp_var = create_tmp_var (TREE_TYPE (val));
641 if (gimple_in_ssa_p (cfun))
643 tmp_then = make_ssa_name (tmp_var);
644 tmp_else = make_ssa_name (tmp_var);
645 tmp_join = make_ssa_name (tmp_var);
647 else
649 tmp_then = tmp_var;
650 tmp_else = tmp_var;
651 tmp_join = tmp_var;
654 e = split_block_after_labels (bb);
655 cond_bb = e->src;
656 bb = e->dest;
657 remove_edge (e);
659 then_bb = create_empty_bb (cond_bb);
660 else_bb = create_empty_bb (then_bb);
661 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
662 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
664 stmt = gimple_build_cond_empty (cond);
665 gsi = gsi_start_bb (cond_bb);
666 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
668 gsi = gsi_start_bb (then_bb);
669 expand_omp_build_assign (&gsi, tmp_then, val, true);
671 gsi = gsi_start_bb (else_bb);
672 expand_omp_build_assign (&gsi, tmp_else,
673 build_int_cst (unsigned_type_node, 1),
674 true);
676 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
677 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
678 add_bb_to_loop (then_bb, cond_bb->loop_father);
679 add_bb_to_loop (else_bb, cond_bb->loop_father);
680 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
681 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
683 if (gimple_in_ssa_p (cfun))
685 gphi *phi = create_phi_node (tmp_join, bb);
686 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
687 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
690 val = tmp_join;
693 gsi = gsi_start_bb (bb);
694 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
695 false, GSI_CONTINUE_LINKING);
698 gsi = gsi_last_bb (bb);
699 t = gimple_omp_parallel_data_arg (entry_stmt);
700 if (t == NULL)
701 t1 = null_pointer_node;
702 else
703 t1 = build_fold_addr_expr (t);
704 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
705 t2 = build_fold_addr_expr (child_fndecl);
707 adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
709 vec_alloc (args, 4 + vec_safe_length (ws_args));
710 args->quick_push (t2);
711 args->quick_push (t1);
712 args->quick_push (val);
713 if (ws_args)
714 args->splice (*ws_args);
715 args->quick_push (flags);
717 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
718 builtin_decl_explicit (start_ix), args);
720 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
721 false, GSI_CONTINUE_LINKING);
723 if (hsa_gen_requested_p ()
724 && parallel_needs_hsa_kernel_p (region))
726 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
727 hsa_register_kernel (child_cnode);
731 /* Insert a function call whose name is FUNC_NAME with the information from
732 ENTRY_STMT into the basic_block BB. */
734 static void
735 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
736 vec <tree, va_gc> *ws_args)
738 tree t, t1, t2;
739 gimple_stmt_iterator gsi;
740 vec <tree, va_gc> *args;
742 gcc_assert (vec_safe_length (ws_args) == 2);
743 tree func_name = (*ws_args)[0];
744 tree grain = (*ws_args)[1];
746 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
747 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
748 gcc_assert (count != NULL_TREE);
749 count = OMP_CLAUSE_OPERAND (count, 0);
751 gsi = gsi_last_bb (bb);
752 t = gimple_omp_parallel_data_arg (entry_stmt);
753 if (t == NULL)
754 t1 = null_pointer_node;
755 else
756 t1 = build_fold_addr_expr (t);
757 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
759 vec_alloc (args, 4);
760 args->quick_push (t2);
761 args->quick_push (t1);
762 args->quick_push (count);
763 args->quick_push (grain);
764 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
766 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
767 GSI_CONTINUE_LINKING);
770 /* Build the function call to GOMP_task to actually
771 generate the task operation. BB is the block where to insert the code. */
773 static void
774 expand_task_call (struct omp_region *region, basic_block bb,
775 gomp_task *entry_stmt)
777 tree t1, t2, t3;
778 gimple_stmt_iterator gsi;
779 location_t loc = gimple_location (entry_stmt);
781 tree clauses = gimple_omp_task_clauses (entry_stmt);
783 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
784 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
785 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
786 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
787 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
788 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
790 unsigned int iflags
791 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
792 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
793 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
795 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
796 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
797 tree num_tasks = NULL_TREE;
798 bool ull = false;
799 if (taskloop_p)
801 gimple *g = last_stmt (region->outer->entry);
802 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
803 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
804 struct omp_for_data fd;
805 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
806 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
807 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
808 OMP_CLAUSE__LOOPTEMP_);
809 startvar = OMP_CLAUSE_DECL (startvar);
810 endvar = OMP_CLAUSE_DECL (endvar);
811 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
812 if (fd.loop.cond_code == LT_EXPR)
813 iflags |= GOMP_TASK_FLAG_UP;
814 tree tclauses = gimple_omp_for_clauses (g);
815 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
816 if (num_tasks)
817 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
818 else
820 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
821 if (num_tasks)
823 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
824 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
826 else
827 num_tasks = integer_zero_node;
829 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
830 if (ifc == NULL_TREE)
831 iflags |= GOMP_TASK_FLAG_IF;
832 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
833 iflags |= GOMP_TASK_FLAG_NOGROUP;
834 ull = fd.iter_type == long_long_unsigned_type_node;
836 else if (priority)
837 iflags |= GOMP_TASK_FLAG_PRIORITY;
839 tree flags = build_int_cst (unsigned_type_node, iflags);
841 tree cond = boolean_true_node;
842 if (ifc)
844 if (taskloop_p)
846 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
848 build_int_cst (unsigned_type_node,
849 GOMP_TASK_FLAG_IF),
850 build_int_cst (unsigned_type_node, 0));
851 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
852 flags, t);
854 else
855 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
858 if (finalc)
860 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
861 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
862 build_int_cst (unsigned_type_node,
863 GOMP_TASK_FLAG_FINAL),
864 build_int_cst (unsigned_type_node, 0));
865 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
867 if (depend)
868 depend = OMP_CLAUSE_DECL (depend);
869 else
870 depend = build_int_cst (ptr_type_node, 0);
871 if (priority)
872 priority = fold_convert (integer_type_node,
873 OMP_CLAUSE_PRIORITY_EXPR (priority));
874 else
875 priority = integer_zero_node;
877 gsi = gsi_last_bb (bb);
878 tree t = gimple_omp_task_data_arg (entry_stmt);
879 if (t == NULL)
880 t2 = null_pointer_node;
881 else
882 t2 = build_fold_addr_expr_loc (loc, t);
883 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
884 t = gimple_omp_task_copy_fn (entry_stmt);
885 if (t == NULL)
886 t3 = null_pointer_node;
887 else
888 t3 = build_fold_addr_expr_loc (loc, t);
890 if (taskloop_p)
891 t = build_call_expr (ull
892 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
893 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
894 11, t1, t2, t3,
895 gimple_omp_task_arg_size (entry_stmt),
896 gimple_omp_task_arg_align (entry_stmt), flags,
897 num_tasks, priority, startvar, endvar, step);
898 else
899 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
900 9, t1, t2, t3,
901 gimple_omp_task_arg_size (entry_stmt),
902 gimple_omp_task_arg_align (entry_stmt), cond, flags,
903 depend, priority);
905 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
906 false, GSI_CONTINUE_LINKING);
909 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
911 static tree
912 vec2chain (vec<tree, va_gc> *v)
914 tree chain = NULL_TREE, t;
915 unsigned ix;
917 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
919 DECL_CHAIN (t) = chain;
920 chain = t;
923 return chain;
926 /* Remove barriers in REGION->EXIT's block. Note that this is only
927 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
928 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
929 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
930 removed. */
932 static void
933 remove_exit_barrier (struct omp_region *region)
935 gimple_stmt_iterator gsi;
936 basic_block exit_bb;
937 edge_iterator ei;
938 edge e;
939 gimple *stmt;
940 int any_addressable_vars = -1;
942 exit_bb = region->exit;
944 /* If the parallel region doesn't return, we don't have REGION->EXIT
945 block at all. */
946 if (! exit_bb)
947 return;
949 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
950 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
951 statements that can appear in between are extremely limited -- no
952 memory operations at all. Here, we allow nothing at all, so the
953 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
954 gsi = gsi_last_bb (exit_bb);
955 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
956 gsi_prev (&gsi);
957 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
958 return;
960 FOR_EACH_EDGE (e, ei, exit_bb->preds)
962 gsi = gsi_last_bb (e->src);
963 if (gsi_end_p (gsi))
964 continue;
965 stmt = gsi_stmt (gsi);
966 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
967 && !gimple_omp_return_nowait_p (stmt))
969 /* OpenMP 3.0 tasks unfortunately prevent this optimization
970 in many cases. If there could be tasks queued, the barrier
971 might be needed to let the tasks run before some local
972 variable of the parallel that the task uses as shared
973 runs out of scope. The task can be spawned either
974 from within current function (this would be easy to check)
975 or from some function it calls and gets passed an address
976 of such a variable. */
977 if (any_addressable_vars < 0)
979 gomp_parallel *parallel_stmt
980 = as_a <gomp_parallel *> (last_stmt (region->entry));
981 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
982 tree local_decls, block, decl;
983 unsigned ix;
985 any_addressable_vars = 0;
986 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
987 if (TREE_ADDRESSABLE (decl))
989 any_addressable_vars = 1;
990 break;
992 for (block = gimple_block (stmt);
993 !any_addressable_vars
994 && block
995 && TREE_CODE (block) == BLOCK;
996 block = BLOCK_SUPERCONTEXT (block))
998 for (local_decls = BLOCK_VARS (block);
999 local_decls;
1000 local_decls = DECL_CHAIN (local_decls))
1001 if (TREE_ADDRESSABLE (local_decls))
1003 any_addressable_vars = 1;
1004 break;
1006 if (block == gimple_block (parallel_stmt))
1007 break;
1010 if (!any_addressable_vars)
1011 gimple_omp_return_set_nowait (stmt);
1016 static void
1017 remove_exit_barriers (struct omp_region *region)
1019 if (region->type == GIMPLE_OMP_PARALLEL)
1020 remove_exit_barrier (region);
1022 if (region->inner)
1024 region = region->inner;
1025 remove_exit_barriers (region);
1026 while (region->next)
1028 region = region->next;
1029 remove_exit_barriers (region);
1034 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1035 calls. These can't be declared as const functions, but
1036 within one parallel body they are constant, so they can be
1037 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1038 which are declared const. Similarly for task body, except
1039 that in untied task omp_get_thread_num () can change at any task
1040 scheduling point. */
1042 static void
1043 optimize_omp_library_calls (gimple *entry_stmt)
1045 basic_block bb;
1046 gimple_stmt_iterator gsi;
1047 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1048 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1049 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1050 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1051 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1052 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1053 OMP_CLAUSE_UNTIED) != NULL);
1055 FOR_EACH_BB_FN (bb, cfun)
1056 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1058 gimple *call = gsi_stmt (gsi);
1059 tree decl;
1061 if (is_gimple_call (call)
1062 && (decl = gimple_call_fndecl (call))
1063 && DECL_EXTERNAL (decl)
1064 && TREE_PUBLIC (decl)
1065 && DECL_INITIAL (decl) == NULL)
1067 tree built_in;
1069 if (DECL_NAME (decl) == thr_num_id)
1071 /* In #pragma omp task untied omp_get_thread_num () can change
1072 during the execution of the task region. */
1073 if (untied_task)
1074 continue;
1075 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1077 else if (DECL_NAME (decl) == num_thr_id)
1078 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1079 else
1080 continue;
1082 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1083 || gimple_call_num_args (call) != 0)
1084 continue;
1086 if (flag_exceptions && !TREE_NOTHROW (decl))
1087 continue;
1089 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1090 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1091 TREE_TYPE (TREE_TYPE (built_in))))
1092 continue;
1094 gimple_call_set_fndecl (call, built_in);
1099 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1100 regimplified. */
1102 static tree
1103 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1105 tree t = *tp;
1107 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1108 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1109 return t;
1111 if (TREE_CODE (t) == ADDR_EXPR)
1112 recompute_tree_invariant_for_addr_expr (t);
1114 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1115 return NULL_TREE;
1118 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1120 static void
1121 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1122 bool after)
1124 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1125 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1126 !after, after ? GSI_CONTINUE_LINKING
1127 : GSI_SAME_STMT);
1128 gimple *stmt = gimple_build_assign (to, from);
1129 if (after)
1130 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1131 else
1132 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1133 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1134 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1136 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1137 gimple_regimplify_operands (stmt, &gsi);
1141 /* Expand the OpenMP parallel or task directive starting at REGION. */
1143 static void
1144 expand_omp_taskreg (struct omp_region *region)
1146 basic_block entry_bb, exit_bb, new_bb;
1147 struct function *child_cfun;
1148 tree child_fn, block, t;
1149 gimple_stmt_iterator gsi;
1150 gimple *entry_stmt, *stmt;
1151 edge e;
1152 vec<tree, va_gc> *ws_args;
1154 entry_stmt = last_stmt (region->entry);
1155 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1156 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1158 entry_bb = region->entry;
1159 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1160 exit_bb = region->cont;
1161 else
1162 exit_bb = region->exit;
1164 bool is_cilk_for
1165 = (flag_cilkplus
1166 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1167 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1168 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1170 if (is_cilk_for)
1171 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1172 and the inner statement contains the name of the built-in function
1173 and grain. */
1174 ws_args = region->inner->ws_args;
1175 else if (is_combined_parallel (region))
1176 ws_args = region->ws_args;
1177 else
1178 ws_args = NULL;
1180 if (child_cfun->cfg)
1182 /* Due to inlining, it may happen that we have already outlined
1183 the region, in which case all we need to do is make the
1184 sub-graph unreachable and emit the parallel call. */
1185 edge entry_succ_e, exit_succ_e;
1187 entry_succ_e = single_succ_edge (entry_bb);
1189 gsi = gsi_last_bb (entry_bb);
1190 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1191 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1192 gsi_remove (&gsi, true);
1194 new_bb = entry_bb;
1195 if (exit_bb)
1197 exit_succ_e = single_succ_edge (exit_bb);
1198 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1200 remove_edge_and_dominated_blocks (entry_succ_e);
1202 else
1204 unsigned srcidx, dstidx, num;
1206 /* If the parallel region needs data sent from the parent
1207 function, then the very first statement (except possible
1208 tree profile counter updates) of the parallel body
1209 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1210 &.OMP_DATA_O is passed as an argument to the child function,
1211 we need to replace it with the argument as seen by the child
1212 function.
1214 In most cases, this will end up being the identity assignment
1215 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1216 a function call that has been inlined, the original PARM_DECL
1217 .OMP_DATA_I may have been converted into a different local
1218 variable. In which case, we need to keep the assignment. */
1219 if (gimple_omp_taskreg_data_arg (entry_stmt))
1221 basic_block entry_succ_bb
1222 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1223 : FALLTHRU_EDGE (entry_bb)->dest;
1224 tree arg;
1225 gimple *parcopy_stmt = NULL;
1227 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1229 gimple *stmt;
1231 gcc_assert (!gsi_end_p (gsi));
1232 stmt = gsi_stmt (gsi);
1233 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1234 continue;
1236 if (gimple_num_ops (stmt) == 2)
1238 tree arg = gimple_assign_rhs1 (stmt);
1240 /* We're ignore the subcode because we're
1241 effectively doing a STRIP_NOPS. */
1243 if (TREE_CODE (arg) == ADDR_EXPR
1244 && TREE_OPERAND (arg, 0)
1245 == gimple_omp_taskreg_data_arg (entry_stmt))
1247 parcopy_stmt = stmt;
1248 break;
1253 gcc_assert (parcopy_stmt != NULL);
1254 arg = DECL_ARGUMENTS (child_fn);
1256 if (!gimple_in_ssa_p (cfun))
1258 if (gimple_assign_lhs (parcopy_stmt) == arg)
1259 gsi_remove (&gsi, true);
1260 else
1262 /* ?? Is setting the subcode really necessary ?? */
1263 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1264 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1267 else
1269 tree lhs = gimple_assign_lhs (parcopy_stmt);
1270 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1271 /* We'd like to set the rhs to the default def in the child_fn,
1272 but it's too early to create ssa names in the child_fn.
1273 Instead, we set the rhs to the parm. In
1274 move_sese_region_to_fn, we introduce a default def for the
1275 parm, map the parm to it's default def, and once we encounter
1276 this stmt, replace the parm with the default def. */
1277 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1278 update_stmt (parcopy_stmt);
1282 /* Declare local variables needed in CHILD_CFUN. */
1283 block = DECL_INITIAL (child_fn);
1284 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1285 /* The gimplifier could record temporaries in parallel/task block
1286 rather than in containing function's local_decls chain,
1287 which would mean cgraph missed finalizing them. Do it now. */
1288 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1289 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1290 varpool_node::finalize_decl (t);
1291 DECL_SAVED_TREE (child_fn) = NULL;
1292 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1293 gimple_set_body (child_fn, NULL);
1294 TREE_USED (block) = 1;
1296 /* Reset DECL_CONTEXT on function arguments. */
1297 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1298 DECL_CONTEXT (t) = child_fn;
1300 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1301 so that it can be moved to the child function. */
1302 gsi = gsi_last_bb (entry_bb);
1303 stmt = gsi_stmt (gsi);
1304 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1305 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1306 e = split_block (entry_bb, stmt);
1307 gsi_remove (&gsi, true);
1308 entry_bb = e->dest;
1309 edge e2 = NULL;
1310 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1311 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1312 else
1314 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1315 gcc_assert (e2->dest == region->exit);
1316 remove_edge (BRANCH_EDGE (entry_bb));
1317 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1318 gsi = gsi_last_bb (region->exit);
1319 gcc_assert (!gsi_end_p (gsi)
1320 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1321 gsi_remove (&gsi, true);
1324 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1325 if (exit_bb)
1327 gsi = gsi_last_bb (exit_bb);
1328 gcc_assert (!gsi_end_p (gsi)
1329 && (gimple_code (gsi_stmt (gsi))
1330 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1331 stmt = gimple_build_return (NULL);
1332 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1333 gsi_remove (&gsi, true);
1336 /* Move the parallel region into CHILD_CFUN. */
1338 if (gimple_in_ssa_p (cfun))
1340 init_tree_ssa (child_cfun);
1341 init_ssa_operands (child_cfun);
1342 child_cfun->gimple_df->in_ssa_p = true;
1343 block = NULL_TREE;
1345 else
1346 block = gimple_block (entry_stmt);
1348 /* Make sure to generate early debug for the function before
1349 outlining anything. */
1350 if (! gimple_in_ssa_p (cfun))
1351 (*debug_hooks->early_global_decl) (cfun->decl);
1353 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1354 if (exit_bb)
1355 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1356 if (e2)
1358 basic_block dest_bb = e2->dest;
1359 if (!exit_bb)
1360 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1361 remove_edge (e2);
1362 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1364 /* When the OMP expansion process cannot guarantee an up-to-date
1365 loop tree arrange for the child function to fixup loops. */
1366 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1367 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1369 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1370 num = vec_safe_length (child_cfun->local_decls);
1371 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1373 t = (*child_cfun->local_decls)[srcidx];
1374 if (DECL_CONTEXT (t) == cfun->decl)
1375 continue;
1376 if (srcidx != dstidx)
1377 (*child_cfun->local_decls)[dstidx] = t;
1378 dstidx++;
1380 if (dstidx != num)
1381 vec_safe_truncate (child_cfun->local_decls, dstidx);
1383 /* Inform the callgraph about the new function. */
1384 child_cfun->curr_properties = cfun->curr_properties;
1385 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1386 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1387 cgraph_node *node = cgraph_node::get_create (child_fn);
1388 node->parallelized_function = 1;
1389 cgraph_node::add_new_function (child_fn, true);
1391 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1392 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1394 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1395 fixed in a following pass. */
1396 push_cfun (child_cfun);
1397 if (need_asm)
1398 assign_assembler_name_if_needed (child_fn);
1400 if (optimize)
1401 optimize_omp_library_calls (entry_stmt);
1402 cgraph_edge::rebuild_edges ();
1404 /* Some EH regions might become dead, see PR34608. If
1405 pass_cleanup_cfg isn't the first pass to happen with the
1406 new child, these dead EH edges might cause problems.
1407 Clean them up now. */
1408 if (flag_exceptions)
1410 basic_block bb;
1411 bool changed = false;
1413 FOR_EACH_BB_FN (bb, cfun)
1414 changed |= gimple_purge_dead_eh_edges (bb);
1415 if (changed)
1416 cleanup_tree_cfg ();
1418 if (gimple_in_ssa_p (cfun))
1419 update_ssa (TODO_update_ssa);
1420 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1421 verify_loop_structure ();
1422 pop_cfun ();
1424 if (dump_file && !gimple_in_ssa_p (cfun))
1426 omp_any_child_fn_dumped = true;
1427 dump_function_header (dump_file, child_fn, dump_flags);
1428 dump_function_to_file (child_fn, dump_file, dump_flags);
1432 /* Emit a library call to launch the children threads. */
1433 if (is_cilk_for)
1434 expand_cilk_for_call (new_bb,
1435 as_a <gomp_parallel *> (entry_stmt), ws_args);
1436 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1437 expand_parallel_call (region, new_bb,
1438 as_a <gomp_parallel *> (entry_stmt), ws_args);
1439 else
1440 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1441 if (gimple_in_ssa_p (cfun))
1442 update_ssa (TODO_update_ssa_only_virtuals);
1445 /* Information about members of an OpenACC collapsed loop nest. */
1447 struct oacc_collapse
1449 tree base; /* Base value. */
1450 tree iters; /* Number of steps. */
1451 tree step; /* Step size. */
1452 tree tile; /* Tile increment (if tiled). */
1453 tree outer; /* Tile iterator var. */
1456 /* Helper for expand_oacc_for. Determine collapsed loop information.
1457 Fill in COUNTS array. Emit any initialization code before GSI.
1458 Return the calculated outer loop bound of BOUND_TYPE. */
1460 static tree
1461 expand_oacc_collapse_init (const struct omp_for_data *fd,
1462 gimple_stmt_iterator *gsi,
1463 oacc_collapse *counts, tree bound_type,
1464 location_t loc)
1466 tree tiling = fd->tiling;
1467 tree total = build_int_cst (bound_type, 1);
1468 int ix;
1470 gcc_assert (integer_onep (fd->loop.step));
1471 gcc_assert (integer_zerop (fd->loop.n1));
1473 /* When tiling, the first operand of the tile clause applies to the
1474 innermost loop, and we work outwards from there. Seems
1475 backwards, but whatever. */
1476 for (ix = fd->collapse; ix--;)
1478 const omp_for_data_loop *loop = &fd->loops[ix];
1480 tree iter_type = TREE_TYPE (loop->v);
1481 tree diff_type = iter_type;
1482 tree plus_type = iter_type;
1484 gcc_assert (loop->cond_code == fd->loop.cond_code);
1486 if (POINTER_TYPE_P (iter_type))
1487 plus_type = sizetype;
1488 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1489 diff_type = signed_type_for (diff_type);
1491 if (tiling)
1493 tree num = build_int_cst (integer_type_node, fd->collapse);
1494 tree loop_no = build_int_cst (integer_type_node, ix);
1495 tree tile = TREE_VALUE (tiling);
1496 gcall *call
1497 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1498 /* gwv-outer=*/integer_zero_node,
1499 /* gwv-inner=*/integer_zero_node);
1501 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1502 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1503 gimple_call_set_lhs (call, counts[ix].tile);
1504 gimple_set_location (call, loc);
1505 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1507 tiling = TREE_CHAIN (tiling);
1509 else
1511 counts[ix].tile = NULL;
1512 counts[ix].outer = loop->v;
1515 tree b = loop->n1;
1516 tree e = loop->n2;
1517 tree s = loop->step;
1518 bool up = loop->cond_code == LT_EXPR;
1519 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1520 bool negating;
1521 tree expr;
1523 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1524 true, GSI_SAME_STMT);
1525 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1526 true, GSI_SAME_STMT);
1528 /* Convert the step, avoiding possible unsigned->signed overflow. */
1529 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1530 if (negating)
1531 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1532 s = fold_convert (diff_type, s);
1533 if (negating)
1534 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1535 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1536 true, GSI_SAME_STMT);
1538 /* Determine the range, avoiding possible unsigned->signed overflow. */
1539 negating = !up && TYPE_UNSIGNED (iter_type);
1540 expr = fold_build2 (MINUS_EXPR, plus_type,
1541 fold_convert (plus_type, negating ? b : e),
1542 fold_convert (plus_type, negating ? e : b));
1543 expr = fold_convert (diff_type, expr);
1544 if (negating)
1545 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1546 tree range = force_gimple_operand_gsi
1547 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1549 /* Determine number of iterations. */
1550 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1551 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1552 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1554 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1555 true, GSI_SAME_STMT);
1557 counts[ix].base = b;
1558 counts[ix].iters = iters;
1559 counts[ix].step = s;
1561 total = fold_build2 (MULT_EXPR, bound_type, total,
1562 fold_convert (bound_type, iters));
1565 return total;
1568 /* Emit initializers for collapsed loop members. INNER is true if
1569 this is for the element loop of a TILE. IVAR is the outer
1570 loop iteration variable, from which collapsed loop iteration values
1571 are calculated. COUNTS array has been initialized by
1572 expand_oacc_collapse_inits. */
1574 static void
1575 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1576 gimple_stmt_iterator *gsi,
1577 const oacc_collapse *counts, tree ivar)
1579 tree ivar_type = TREE_TYPE (ivar);
1581 /* The most rapidly changing iteration variable is the innermost
1582 one. */
1583 for (int ix = fd->collapse; ix--;)
1585 const omp_for_data_loop *loop = &fd->loops[ix];
1586 const oacc_collapse *collapse = &counts[ix];
1587 tree v = inner ? loop->v : collapse->outer;
1588 tree iter_type = TREE_TYPE (v);
1589 tree diff_type = TREE_TYPE (collapse->step);
1590 tree plus_type = iter_type;
1591 enum tree_code plus_code = PLUS_EXPR;
1592 tree expr;
1594 if (POINTER_TYPE_P (iter_type))
1596 plus_code = POINTER_PLUS_EXPR;
1597 plus_type = sizetype;
1600 expr = ivar;
1601 if (ix)
1603 tree mod = fold_convert (ivar_type, collapse->iters);
1604 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1605 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1606 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1610 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1611 collapse->step);
1612 expr = fold_build2 (plus_code, iter_type,
1613 inner ? collapse->outer : collapse->base,
1614 fold_convert (plus_type, expr));
1615 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1616 true, GSI_SAME_STMT);
1617 gassign *ass = gimple_build_assign (v, expr);
1618 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1622 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1623 of the combined collapse > 1 loop constructs, generate code like:
1624 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1625 if (cond3 is <)
1626 adj = STEP3 - 1;
1627 else
1628 adj = STEP3 + 1;
1629 count3 = (adj + N32 - N31) / STEP3;
1630 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1631 if (cond2 is <)
1632 adj = STEP2 - 1;
1633 else
1634 adj = STEP2 + 1;
1635 count2 = (adj + N22 - N21) / STEP2;
1636 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1637 if (cond1 is <)
1638 adj = STEP1 - 1;
1639 else
1640 adj = STEP1 + 1;
1641 count1 = (adj + N12 - N11) / STEP1;
1642 count = count1 * count2 * count3;
1643 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1644 count = 0;
1645 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1646 of the combined loop constructs, just initialize COUNTS array
1647 from the _looptemp_ clauses. */
1649 /* NOTE: It *could* be better to moosh all of the BBs together,
1650 creating one larger BB with all the computation and the unexpected
1651 jump at the end. I.e.
1653 bool zero3, zero2, zero1, zero;
1655 zero3 = N32 c3 N31;
1656 count3 = (N32 - N31) /[cl] STEP3;
1657 zero2 = N22 c2 N21;
1658 count2 = (N22 - N21) /[cl] STEP2;
1659 zero1 = N12 c1 N11;
1660 count1 = (N12 - N11) /[cl] STEP1;
1661 zero = zero3 || zero2 || zero1;
1662 count = count1 * count2 * count3;
1663 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1665 After all, we expect the zero=false, and thus we expect to have to
1666 evaluate all of the comparison expressions, so short-circuiting
1667 oughtn't be a win. Since the condition isn't protecting a
1668 denominator, we're not concerned about divide-by-zero, so we can
1669 fully evaluate count even if a numerator turned out to be wrong.
1671 It seems like putting this all together would create much better
1672 scheduling opportunities, and less pressure on the chip's branch
1673 predictor. */
1675 static void
1676 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1677 basic_block &entry_bb, tree *counts,
1678 basic_block &zero_iter1_bb, int &first_zero_iter1,
1679 basic_block &zero_iter2_bb, int &first_zero_iter2,
1680 basic_block &l2_dom_bb)
1682 tree t, type = TREE_TYPE (fd->loop.v);
1683 edge e, ne;
1684 int i;
1686 /* Collapsed loops need work for expansion into SSA form. */
1687 gcc_assert (!gimple_in_ssa_p (cfun));
1689 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1690 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1692 gcc_assert (fd->ordered == 0);
1693 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1694 isn't supposed to be handled, as the inner loop doesn't
1695 use it. */
1696 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1697 OMP_CLAUSE__LOOPTEMP_);
1698 gcc_assert (innerc);
1699 for (i = 0; i < fd->collapse; i++)
1701 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1702 OMP_CLAUSE__LOOPTEMP_);
1703 gcc_assert (innerc);
1704 if (i)
1705 counts[i] = OMP_CLAUSE_DECL (innerc);
1706 else
1707 counts[0] = NULL_TREE;
1709 return;
1712 for (i = fd->collapse; i < fd->ordered; i++)
1714 tree itype = TREE_TYPE (fd->loops[i].v);
1715 counts[i] = NULL_TREE;
1716 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1717 fold_convert (itype, fd->loops[i].n1),
1718 fold_convert (itype, fd->loops[i].n2));
1719 if (t && integer_zerop (t))
1721 for (i = fd->collapse; i < fd->ordered; i++)
1722 counts[i] = build_int_cst (type, 0);
1723 break;
1726 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1728 tree itype = TREE_TYPE (fd->loops[i].v);
1730 if (i >= fd->collapse && counts[i])
1731 continue;
1732 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1733 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1734 fold_convert (itype, fd->loops[i].n1),
1735 fold_convert (itype, fd->loops[i].n2)))
1736 == NULL_TREE || !integer_onep (t)))
1738 gcond *cond_stmt;
1739 tree n1, n2;
1740 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1741 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1742 true, GSI_SAME_STMT);
1743 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1744 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1745 true, GSI_SAME_STMT);
1746 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1747 NULL_TREE, NULL_TREE);
1748 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1749 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1750 expand_omp_regimplify_p, NULL, NULL)
1751 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1752 expand_omp_regimplify_p, NULL, NULL))
1754 *gsi = gsi_for_stmt (cond_stmt);
1755 gimple_regimplify_operands (cond_stmt, gsi);
1757 e = split_block (entry_bb, cond_stmt);
1758 basic_block &zero_iter_bb
1759 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1760 int &first_zero_iter
1761 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1762 if (zero_iter_bb == NULL)
1764 gassign *assign_stmt;
1765 first_zero_iter = i;
1766 zero_iter_bb = create_empty_bb (entry_bb);
1767 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1768 *gsi = gsi_after_labels (zero_iter_bb);
1769 if (i < fd->collapse)
1770 assign_stmt = gimple_build_assign (fd->loop.n2,
1771 build_zero_cst (type));
1772 else
1774 counts[i] = create_tmp_reg (type, ".count");
1775 assign_stmt
1776 = gimple_build_assign (counts[i], build_zero_cst (type));
1778 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1779 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1780 entry_bb);
1782 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1783 ne->probability = profile_probability::very_unlikely ();
1784 e->flags = EDGE_TRUE_VALUE;
1785 e->probability = ne->probability.invert ();
1786 if (l2_dom_bb == NULL)
1787 l2_dom_bb = entry_bb;
1788 entry_bb = e->dest;
1789 *gsi = gsi_last_bb (entry_bb);
1792 if (POINTER_TYPE_P (itype))
1793 itype = signed_type_for (itype);
1794 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1795 ? -1 : 1));
1796 t = fold_build2 (PLUS_EXPR, itype,
1797 fold_convert (itype, fd->loops[i].step), t);
1798 t = fold_build2 (PLUS_EXPR, itype, t,
1799 fold_convert (itype, fd->loops[i].n2));
1800 t = fold_build2 (MINUS_EXPR, itype, t,
1801 fold_convert (itype, fd->loops[i].n1));
1802 /* ?? We could probably use CEIL_DIV_EXPR instead of
1803 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1804 generate the same code in the end because generically we
1805 don't know that the values involved must be negative for
1806 GT?? */
1807 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1808 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1809 fold_build1 (NEGATE_EXPR, itype, t),
1810 fold_build1 (NEGATE_EXPR, itype,
1811 fold_convert (itype,
1812 fd->loops[i].step)));
1813 else
1814 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1815 fold_convert (itype, fd->loops[i].step));
1816 t = fold_convert (type, t);
1817 if (TREE_CODE (t) == INTEGER_CST)
1818 counts[i] = t;
1819 else
1821 if (i < fd->collapse || i != first_zero_iter2)
1822 counts[i] = create_tmp_reg (type, ".count");
1823 expand_omp_build_assign (gsi, counts[i], t);
1825 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1827 if (i == 0)
1828 t = counts[0];
1829 else
1830 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1831 expand_omp_build_assign (gsi, fd->loop.n2, t);
1836 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1837 T = V;
1838 V3 = N31 + (T % count3) * STEP3;
1839 T = T / count3;
1840 V2 = N21 + (T % count2) * STEP2;
1841 T = T / count2;
1842 V1 = N11 + T * STEP1;
1843 if this loop doesn't have an inner loop construct combined with it.
1844 If it does have an inner loop construct combined with it and the
1845 iteration count isn't known constant, store values from counts array
1846 into its _looptemp_ temporaries instead. */
1848 static void
1849 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1850 tree *counts, gimple *inner_stmt, tree startvar)
1852 int i;
1853 if (gimple_omp_for_combined_p (fd->for_stmt))
1855 /* If fd->loop.n2 is constant, then no propagation of the counts
1856 is needed, they are constant. */
1857 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1858 return;
1860 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1861 ? gimple_omp_taskreg_clauses (inner_stmt)
1862 : gimple_omp_for_clauses (inner_stmt);
1863 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1864 isn't supposed to be handled, as the inner loop doesn't
1865 use it. */
1866 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1867 gcc_assert (innerc);
1868 for (i = 0; i < fd->collapse; i++)
1870 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1871 OMP_CLAUSE__LOOPTEMP_);
1872 gcc_assert (innerc);
1873 if (i)
1875 tree tem = OMP_CLAUSE_DECL (innerc);
1876 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1877 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1878 false, GSI_CONTINUE_LINKING);
1879 gassign *stmt = gimple_build_assign (tem, t);
1880 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1883 return;
1886 tree type = TREE_TYPE (fd->loop.v);
1887 tree tem = create_tmp_reg (type, ".tem");
1888 gassign *stmt = gimple_build_assign (tem, startvar);
1889 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1891 for (i = fd->collapse - 1; i >= 0; i--)
1893 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1894 itype = vtype;
1895 if (POINTER_TYPE_P (vtype))
1896 itype = signed_type_for (vtype);
1897 if (i != 0)
1898 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1899 else
1900 t = tem;
1901 t = fold_convert (itype, t);
1902 t = fold_build2 (MULT_EXPR, itype, t,
1903 fold_convert (itype, fd->loops[i].step));
1904 if (POINTER_TYPE_P (vtype))
1905 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1906 else
1907 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1908 t = force_gimple_operand_gsi (gsi, t,
1909 DECL_P (fd->loops[i].v)
1910 && TREE_ADDRESSABLE (fd->loops[i].v),
1911 NULL_TREE, false,
1912 GSI_CONTINUE_LINKING);
1913 stmt = gimple_build_assign (fd->loops[i].v, t);
1914 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1915 if (i != 0)
1917 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1918 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1919 false, GSI_CONTINUE_LINKING);
1920 stmt = gimple_build_assign (tem, t);
1921 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1926 /* Helper function for expand_omp_for_*. Generate code like:
1927 L10:
1928 V3 += STEP3;
1929 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1930 L11:
1931 V3 = N31;
1932 V2 += STEP2;
1933 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1934 L12:
1935 V2 = N21;
1936 V1 += STEP1;
1937 goto BODY_BB; */
1939 static basic_block
1940 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1941 basic_block body_bb)
1943 basic_block last_bb, bb, collapse_bb = NULL;
1944 int i;
1945 gimple_stmt_iterator gsi;
1946 edge e;
1947 tree t;
1948 gimple *stmt;
1950 last_bb = cont_bb;
1951 for (i = fd->collapse - 1; i >= 0; i--)
1953 tree vtype = TREE_TYPE (fd->loops[i].v);
1955 bb = create_empty_bb (last_bb);
1956 add_bb_to_loop (bb, last_bb->loop_father);
1957 gsi = gsi_start_bb (bb);
1959 if (i < fd->collapse - 1)
1961 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1962 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1964 t = fd->loops[i + 1].n1;
1965 t = force_gimple_operand_gsi (&gsi, t,
1966 DECL_P (fd->loops[i + 1].v)
1967 && TREE_ADDRESSABLE (fd->loops[i
1968 + 1].v),
1969 NULL_TREE, false,
1970 GSI_CONTINUE_LINKING);
1971 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1972 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1974 else
1975 collapse_bb = bb;
1977 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1979 if (POINTER_TYPE_P (vtype))
1980 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1981 else
1982 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1983 t = force_gimple_operand_gsi (&gsi, t,
1984 DECL_P (fd->loops[i].v)
1985 && TREE_ADDRESSABLE (fd->loops[i].v),
1986 NULL_TREE, false, GSI_CONTINUE_LINKING);
1987 stmt = gimple_build_assign (fd->loops[i].v, t);
1988 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1990 if (i > 0)
1992 t = fd->loops[i].n2;
1993 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1994 false, GSI_CONTINUE_LINKING);
1995 tree v = fd->loops[i].v;
1996 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1997 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1998 false, GSI_CONTINUE_LINKING);
1999 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2000 stmt = gimple_build_cond_empty (t);
2001 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2002 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2003 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2005 else
2006 make_edge (bb, body_bb, EDGE_FALLTHRU);
2007 last_bb = bb;
2010 return collapse_bb;
2013 /* Expand #pragma omp ordered depend(source). */
2015 static void
2016 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2017 tree *counts, location_t loc)
2019 enum built_in_function source_ix
2020 = fd->iter_type == long_integer_type_node
2021 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2022 gimple *g
2023 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2024 build_fold_addr_expr (counts[fd->ordered]));
2025 gimple_set_location (g, loc);
2026 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2029 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2031 static void
2032 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2033 tree *counts, tree c, location_t loc)
2035 auto_vec<tree, 10> args;
2036 enum built_in_function sink_ix
2037 = fd->iter_type == long_integer_type_node
2038 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2039 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2040 int i;
2041 gimple_stmt_iterator gsi2 = *gsi;
2042 bool warned_step = false;
2044 for (i = 0; i < fd->ordered; i++)
2046 tree step = NULL_TREE;
2047 off = TREE_PURPOSE (deps);
2048 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2050 step = TREE_OPERAND (off, 1);
2051 off = TREE_OPERAND (off, 0);
2053 if (!integer_zerop (off))
2055 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2056 || fd->loops[i].cond_code == GT_EXPR);
2057 bool forward = fd->loops[i].cond_code == LT_EXPR;
2058 if (step)
2060 /* Non-simple Fortran DO loops. If step is variable,
2061 we don't know at compile even the direction, so can't
2062 warn. */
2063 if (TREE_CODE (step) != INTEGER_CST)
2064 break;
2065 forward = tree_int_cst_sgn (step) != -1;
2067 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2068 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2069 "lexically later iteration");
2070 break;
2072 deps = TREE_CHAIN (deps);
2074 /* If all offsets corresponding to the collapsed loops are zero,
2075 this depend clause can be ignored. FIXME: but there is still a
2076 flush needed. We need to emit one __sync_synchronize () for it
2077 though (perhaps conditionally)? Solve this together with the
2078 conservative dependence folding optimization.
2079 if (i >= fd->collapse)
2080 return; */
2082 deps = OMP_CLAUSE_DECL (c);
2083 gsi_prev (&gsi2);
2084 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2085 edge e2 = split_block_after_labels (e1->dest);
2087 gsi2 = gsi_after_labels (e1->dest);
2088 *gsi = gsi_last_bb (e1->src);
2089 for (i = 0; i < fd->ordered; i++)
2091 tree itype = TREE_TYPE (fd->loops[i].v);
2092 tree step = NULL_TREE;
2093 tree orig_off = NULL_TREE;
2094 if (POINTER_TYPE_P (itype))
2095 itype = sizetype;
2096 if (i)
2097 deps = TREE_CHAIN (deps);
2098 off = TREE_PURPOSE (deps);
2099 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2101 step = TREE_OPERAND (off, 1);
2102 off = TREE_OPERAND (off, 0);
2103 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2104 && integer_onep (fd->loops[i].step)
2105 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2107 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2108 if (step)
2110 off = fold_convert_loc (loc, itype, off);
2111 orig_off = off;
2112 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2115 if (integer_zerop (off))
2116 t = boolean_true_node;
2117 else
2119 tree a;
2120 tree co = fold_convert_loc (loc, itype, off);
2121 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2123 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2124 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2125 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2126 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2127 co);
2129 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2130 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2131 fd->loops[i].v, co);
2132 else
2133 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2134 fd->loops[i].v, co);
2135 if (step)
2137 tree t1, t2;
2138 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2139 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2140 fd->loops[i].n1);
2141 else
2142 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2143 fd->loops[i].n2);
2144 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2145 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2146 fd->loops[i].n2);
2147 else
2148 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2149 fd->loops[i].n1);
2150 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2151 step, build_int_cst (TREE_TYPE (step), 0));
2152 if (TREE_CODE (step) != INTEGER_CST)
2154 t1 = unshare_expr (t1);
2155 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2156 false, GSI_CONTINUE_LINKING);
2157 t2 = unshare_expr (t2);
2158 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2159 false, GSI_CONTINUE_LINKING);
2161 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2162 t, t2, t1);
2164 else if (fd->loops[i].cond_code == LT_EXPR)
2166 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2167 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2168 fd->loops[i].n1);
2169 else
2170 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2171 fd->loops[i].n2);
2173 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2174 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2175 fd->loops[i].n2);
2176 else
2177 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2178 fd->loops[i].n1);
2180 if (cond)
2181 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2182 else
2183 cond = t;
2185 off = fold_convert_loc (loc, itype, off);
2187 if (step
2188 || (fd->loops[i].cond_code == LT_EXPR
2189 ? !integer_onep (fd->loops[i].step)
2190 : !integer_minus_onep (fd->loops[i].step)))
2192 if (step == NULL_TREE
2193 && TYPE_UNSIGNED (itype)
2194 && fd->loops[i].cond_code == GT_EXPR)
2195 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2196 fold_build1_loc (loc, NEGATE_EXPR, itype,
2197 s));
2198 else
2199 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2200 orig_off ? orig_off : off, s);
2201 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2202 build_int_cst (itype, 0));
2203 if (integer_zerop (t) && !warned_step)
2205 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2206 "in the iteration space");
2207 warned_step = true;
2209 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2210 cond, t);
2213 if (i <= fd->collapse - 1 && fd->collapse > 1)
2214 t = fd->loop.v;
2215 else if (counts[i])
2216 t = counts[i];
2217 else
2219 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2220 fd->loops[i].v, fd->loops[i].n1);
2221 t = fold_convert_loc (loc, fd->iter_type, t);
2223 if (step)
2224 /* We have divided off by step already earlier. */;
2225 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2226 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2227 fold_build1_loc (loc, NEGATE_EXPR, itype,
2228 s));
2229 else
2230 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2231 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2232 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2233 off = fold_convert_loc (loc, fd->iter_type, off);
2234 if (i <= fd->collapse - 1 && fd->collapse > 1)
2236 if (i)
2237 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2238 off);
2239 if (i < fd->collapse - 1)
2241 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2242 counts[i]);
2243 continue;
2246 off = unshare_expr (off);
2247 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2248 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2249 true, GSI_SAME_STMT);
2250 args.safe_push (t);
2252 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2253 gimple_set_location (g, loc);
2254 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2256 cond = unshare_expr (cond);
2257 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2258 GSI_CONTINUE_LINKING);
2259 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2260 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2261 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2262 e1->probability = e3->probability.invert ();
2263 e1->flags = EDGE_TRUE_VALUE;
2264 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2266 *gsi = gsi_after_labels (e2->dest);
2269 /* Expand all #pragma omp ordered depend(source) and
2270 #pragma omp ordered depend(sink:...) constructs in the current
2271 #pragma omp for ordered(n) region. */
2273 static void
2274 expand_omp_ordered_source_sink (struct omp_region *region,
2275 struct omp_for_data *fd, tree *counts,
2276 basic_block cont_bb)
2278 struct omp_region *inner;
2279 int i;
2280 for (i = fd->collapse - 1; i < fd->ordered; i++)
2281 if (i == fd->collapse - 1 && fd->collapse > 1)
2282 counts[i] = NULL_TREE;
2283 else if (i >= fd->collapse && !cont_bb)
2284 counts[i] = build_zero_cst (fd->iter_type);
2285 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2286 && integer_onep (fd->loops[i].step))
2287 counts[i] = NULL_TREE;
2288 else
2289 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2290 tree atype
2291 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2292 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2293 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2295 for (inner = region->inner; inner; inner = inner->next)
2296 if (inner->type == GIMPLE_OMP_ORDERED)
2298 gomp_ordered *ord_stmt = inner->ord_stmt;
2299 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2300 location_t loc = gimple_location (ord_stmt);
2301 tree c;
2302 for (c = gimple_omp_ordered_clauses (ord_stmt);
2303 c; c = OMP_CLAUSE_CHAIN (c))
2304 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2305 break;
2306 if (c)
2307 expand_omp_ordered_source (&gsi, fd, counts, loc);
2308 for (c = gimple_omp_ordered_clauses (ord_stmt);
2309 c; c = OMP_CLAUSE_CHAIN (c))
2310 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2311 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2312 gsi_remove (&gsi, true);
2316 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2317 collapsed. */
2319 static basic_block
2320 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2321 basic_block cont_bb, basic_block body_bb,
2322 bool ordered_lastprivate)
2324 if (fd->ordered == fd->collapse)
2325 return cont_bb;
2327 if (!cont_bb)
2329 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2330 for (int i = fd->collapse; i < fd->ordered; i++)
2332 tree type = TREE_TYPE (fd->loops[i].v);
2333 tree n1 = fold_convert (type, fd->loops[i].n1);
2334 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2335 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2336 size_int (i - fd->collapse + 1),
2337 NULL_TREE, NULL_TREE);
2338 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2340 return NULL;
2343 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2345 tree t, type = TREE_TYPE (fd->loops[i].v);
2346 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2347 expand_omp_build_assign (&gsi, fd->loops[i].v,
2348 fold_convert (type, fd->loops[i].n1));
2349 if (counts[i])
2350 expand_omp_build_assign (&gsi, counts[i],
2351 build_zero_cst (fd->iter_type));
2352 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2353 size_int (i - fd->collapse + 1),
2354 NULL_TREE, NULL_TREE);
2355 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2356 if (!gsi_end_p (gsi))
2357 gsi_prev (&gsi);
2358 else
2359 gsi = gsi_last_bb (body_bb);
2360 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2361 basic_block new_body = e1->dest;
2362 if (body_bb == cont_bb)
2363 cont_bb = new_body;
2364 edge e2 = NULL;
2365 basic_block new_header;
2366 if (EDGE_COUNT (cont_bb->preds) > 0)
2368 gsi = gsi_last_bb (cont_bb);
2369 if (POINTER_TYPE_P (type))
2370 t = fold_build_pointer_plus (fd->loops[i].v,
2371 fold_convert (sizetype,
2372 fd->loops[i].step));
2373 else
2374 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2375 fold_convert (type, fd->loops[i].step));
2376 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2377 if (counts[i])
2379 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2380 build_int_cst (fd->iter_type, 1));
2381 expand_omp_build_assign (&gsi, counts[i], t);
2382 t = counts[i];
2384 else
2386 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2387 fd->loops[i].v, fd->loops[i].n1);
2388 t = fold_convert (fd->iter_type, t);
2389 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2390 true, GSI_SAME_STMT);
2392 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2393 size_int (i - fd->collapse + 1),
2394 NULL_TREE, NULL_TREE);
2395 expand_omp_build_assign (&gsi, aref, t);
2396 gsi_prev (&gsi);
2397 e2 = split_block (cont_bb, gsi_stmt (gsi));
2398 new_header = e2->dest;
2400 else
2401 new_header = cont_bb;
2402 gsi = gsi_after_labels (new_header);
2403 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2404 true, GSI_SAME_STMT);
2405 tree n2
2406 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2407 true, NULL_TREE, true, GSI_SAME_STMT);
2408 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2409 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2410 edge e3 = split_block (new_header, gsi_stmt (gsi));
2411 cont_bb = e3->dest;
2412 remove_edge (e1);
2413 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2414 e3->flags = EDGE_FALSE_VALUE;
2415 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2416 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2417 e1->probability = e3->probability.invert ();
2419 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2420 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2422 if (e2)
2424 struct loop *loop = alloc_loop ();
2425 loop->header = new_header;
2426 loop->latch = e2->src;
2427 add_loop (loop, body_bb->loop_father);
2431 /* If there are any lastprivate clauses and it is possible some loops
2432 might have zero iterations, ensure all the decls are initialized,
2433 otherwise we could crash evaluating C++ class iterators with lastprivate
2434 clauses. */
2435 bool need_inits = false;
2436 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2437 if (need_inits)
2439 tree type = TREE_TYPE (fd->loops[i].v);
2440 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2441 expand_omp_build_assign (&gsi, fd->loops[i].v,
2442 fold_convert (type, fd->loops[i].n1));
2444 else
2446 tree type = TREE_TYPE (fd->loops[i].v);
2447 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2448 boolean_type_node,
2449 fold_convert (type, fd->loops[i].n1),
2450 fold_convert (type, fd->loops[i].n2));
2451 if (!integer_onep (this_cond))
2452 need_inits = true;
2455 return cont_bb;
2458 /* A subroutine of expand_omp_for. Generate code for a parallel
2459 loop with any schedule. Given parameters:
2461 for (V = N1; V cond N2; V += STEP) BODY;
2463 where COND is "<" or ">", we generate pseudocode
2465 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2466 if (more) goto L0; else goto L3;
2468 V = istart0;
2469 iend = iend0;
2471 BODY;
2472 V += STEP;
2473 if (V cond iend) goto L1; else goto L2;
2475 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2478 If this is a combined omp parallel loop, instead of the call to
2479 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2480 If this is gimple_omp_for_combined_p loop, then instead of assigning
2481 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2482 inner GIMPLE_OMP_FOR and V += STEP; and
2483 if (V cond iend) goto L1; else goto L2; are removed.
2485 For collapsed loops, given parameters:
2486 collapse(3)
2487 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2488 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2489 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2490 BODY;
2492 we generate pseudocode
2494 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2495 if (cond3 is <)
2496 adj = STEP3 - 1;
2497 else
2498 adj = STEP3 + 1;
2499 count3 = (adj + N32 - N31) / STEP3;
2500 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2501 if (cond2 is <)
2502 adj = STEP2 - 1;
2503 else
2504 adj = STEP2 + 1;
2505 count2 = (adj + N22 - N21) / STEP2;
2506 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2507 if (cond1 is <)
2508 adj = STEP1 - 1;
2509 else
2510 adj = STEP1 + 1;
2511 count1 = (adj + N12 - N11) / STEP1;
2512 count = count1 * count2 * count3;
2513 goto Z1;
2515 count = 0;
2517 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2518 if (more) goto L0; else goto L3;
2520 V = istart0;
2521 T = V;
2522 V3 = N31 + (T % count3) * STEP3;
2523 T = T / count3;
2524 V2 = N21 + (T % count2) * STEP2;
2525 T = T / count2;
2526 V1 = N11 + T * STEP1;
2527 iend = iend0;
2529 BODY;
2530 V += 1;
2531 if (V < iend) goto L10; else goto L2;
2532 L10:
2533 V3 += STEP3;
2534 if (V3 cond3 N32) goto L1; else goto L11;
2535 L11:
2536 V3 = N31;
2537 V2 += STEP2;
2538 if (V2 cond2 N22) goto L1; else goto L12;
2539 L12:
2540 V2 = N21;
2541 V1 += STEP1;
2542 goto L1;
2544 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2549 static void
2550 expand_omp_for_generic (struct omp_region *region,
2551 struct omp_for_data *fd,
2552 enum built_in_function start_fn,
2553 enum built_in_function next_fn,
2554 gimple *inner_stmt)
2556 tree type, istart0, iend0, iend;
2557 tree t, vmain, vback, bias = NULL_TREE;
2558 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2559 basic_block l2_bb = NULL, l3_bb = NULL;
2560 gimple_stmt_iterator gsi;
2561 gassign *assign_stmt;
2562 bool in_combined_parallel = is_combined_parallel (region);
2563 bool broken_loop = region->cont == NULL;
2564 edge e, ne;
2565 tree *counts = NULL;
2566 int i;
2567 bool ordered_lastprivate = false;
2569 gcc_assert (!broken_loop || !in_combined_parallel);
2570 gcc_assert (fd->iter_type == long_integer_type_node
2571 || !in_combined_parallel);
2573 entry_bb = region->entry;
2574 cont_bb = region->cont;
2575 collapse_bb = NULL;
2576 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2577 gcc_assert (broken_loop
2578 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2579 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2580 l1_bb = single_succ (l0_bb);
2581 if (!broken_loop)
2583 l2_bb = create_empty_bb (cont_bb);
2584 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2585 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2586 == l1_bb));
2587 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2589 else
2590 l2_bb = NULL;
2591 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2592 exit_bb = region->exit;
2594 gsi = gsi_last_bb (entry_bb);
2596 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2597 if (fd->ordered
2598 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2599 OMP_CLAUSE_LASTPRIVATE))
2600 ordered_lastprivate = false;
2601 if (fd->collapse > 1 || fd->ordered)
2603 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2604 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2606 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2607 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2608 zero_iter1_bb, first_zero_iter1,
2609 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2611 if (zero_iter1_bb)
2613 /* Some counts[i] vars might be uninitialized if
2614 some loop has zero iterations. But the body shouldn't
2615 be executed in that case, so just avoid uninit warnings. */
2616 for (i = first_zero_iter1;
2617 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2618 if (SSA_VAR_P (counts[i]))
2619 TREE_NO_WARNING (counts[i]) = 1;
2620 gsi_prev (&gsi);
2621 e = split_block (entry_bb, gsi_stmt (gsi));
2622 entry_bb = e->dest;
2623 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2624 gsi = gsi_last_bb (entry_bb);
2625 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2626 get_immediate_dominator (CDI_DOMINATORS,
2627 zero_iter1_bb));
2629 if (zero_iter2_bb)
2631 /* Some counts[i] vars might be uninitialized if
2632 some loop has zero iterations. But the body shouldn't
2633 be executed in that case, so just avoid uninit warnings. */
2634 for (i = first_zero_iter2; i < fd->ordered; i++)
2635 if (SSA_VAR_P (counts[i]))
2636 TREE_NO_WARNING (counts[i]) = 1;
2637 if (zero_iter1_bb)
2638 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2639 else
2641 gsi_prev (&gsi);
2642 e = split_block (entry_bb, gsi_stmt (gsi));
2643 entry_bb = e->dest;
2644 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2645 gsi = gsi_last_bb (entry_bb);
2646 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2647 get_immediate_dominator
2648 (CDI_DOMINATORS, zero_iter2_bb));
2651 if (fd->collapse == 1)
2653 counts[0] = fd->loop.n2;
2654 fd->loop = fd->loops[0];
2658 type = TREE_TYPE (fd->loop.v);
2659 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2660 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2661 TREE_ADDRESSABLE (istart0) = 1;
2662 TREE_ADDRESSABLE (iend0) = 1;
2664 /* See if we need to bias by LLONG_MIN. */
2665 if (fd->iter_type == long_long_unsigned_type_node
2666 && TREE_CODE (type) == INTEGER_TYPE
2667 && !TYPE_UNSIGNED (type)
2668 && fd->ordered == 0)
2670 tree n1, n2;
2672 if (fd->loop.cond_code == LT_EXPR)
2674 n1 = fd->loop.n1;
2675 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2677 else
2679 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2680 n2 = fd->loop.n1;
2682 if (TREE_CODE (n1) != INTEGER_CST
2683 || TREE_CODE (n2) != INTEGER_CST
2684 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2685 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2688 gimple_stmt_iterator gsif = gsi;
2689 gsi_prev (&gsif);
2691 tree arr = NULL_TREE;
2692 if (in_combined_parallel)
2694 gcc_assert (fd->ordered == 0);
2695 /* In a combined parallel loop, emit a call to
2696 GOMP_loop_foo_next. */
2697 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2698 build_fold_addr_expr (istart0),
2699 build_fold_addr_expr (iend0));
2701 else
2703 tree t0, t1, t2, t3, t4;
2704 /* If this is not a combined parallel loop, emit a call to
2705 GOMP_loop_foo_start in ENTRY_BB. */
2706 t4 = build_fold_addr_expr (iend0);
2707 t3 = build_fold_addr_expr (istart0);
2708 if (fd->ordered)
2710 t0 = build_int_cst (unsigned_type_node,
2711 fd->ordered - fd->collapse + 1);
2712 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2713 fd->ordered
2714 - fd->collapse + 1),
2715 ".omp_counts");
2716 DECL_NAMELESS (arr) = 1;
2717 TREE_ADDRESSABLE (arr) = 1;
2718 TREE_STATIC (arr) = 1;
2719 vec<constructor_elt, va_gc> *v;
2720 vec_alloc (v, fd->ordered - fd->collapse + 1);
2721 int idx;
2723 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2725 tree c;
2726 if (idx == 0 && fd->collapse > 1)
2727 c = fd->loop.n2;
2728 else
2729 c = counts[idx + fd->collapse - 1];
2730 tree purpose = size_int (idx);
2731 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2732 if (TREE_CODE (c) != INTEGER_CST)
2733 TREE_STATIC (arr) = 0;
2736 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2737 if (!TREE_STATIC (arr))
2738 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2739 void_type_node, arr),
2740 true, NULL_TREE, true, GSI_SAME_STMT);
2741 t1 = build_fold_addr_expr (arr);
2742 t2 = NULL_TREE;
2744 else
2746 t2 = fold_convert (fd->iter_type, fd->loop.step);
2747 t1 = fd->loop.n2;
2748 t0 = fd->loop.n1;
2749 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2751 tree innerc
2752 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2753 OMP_CLAUSE__LOOPTEMP_);
2754 gcc_assert (innerc);
2755 t0 = OMP_CLAUSE_DECL (innerc);
2756 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2757 OMP_CLAUSE__LOOPTEMP_);
2758 gcc_assert (innerc);
2759 t1 = OMP_CLAUSE_DECL (innerc);
2761 if (POINTER_TYPE_P (TREE_TYPE (t0))
2762 && TYPE_PRECISION (TREE_TYPE (t0))
2763 != TYPE_PRECISION (fd->iter_type))
2765 /* Avoid casting pointers to integer of a different size. */
2766 tree itype = signed_type_for (type);
2767 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2768 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2770 else
2772 t1 = fold_convert (fd->iter_type, t1);
2773 t0 = fold_convert (fd->iter_type, t0);
2775 if (bias)
2777 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2778 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2781 if (fd->iter_type == long_integer_type_node || fd->ordered)
2783 if (fd->chunk_size)
2785 t = fold_convert (fd->iter_type, fd->chunk_size);
2786 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2787 if (fd->ordered)
2788 t = build_call_expr (builtin_decl_explicit (start_fn),
2789 5, t0, t1, t, t3, t4);
2790 else
2791 t = build_call_expr (builtin_decl_explicit (start_fn),
2792 6, t0, t1, t2, t, t3, t4);
2794 else if (fd->ordered)
2795 t = build_call_expr (builtin_decl_explicit (start_fn),
2796 4, t0, t1, t3, t4);
2797 else
2798 t = build_call_expr (builtin_decl_explicit (start_fn),
2799 5, t0, t1, t2, t3, t4);
2801 else
2803 tree t5;
2804 tree c_bool_type;
2805 tree bfn_decl;
2807 /* The GOMP_loop_ull_*start functions have additional boolean
2808 argument, true for < loops and false for > loops.
2809 In Fortran, the C bool type can be different from
2810 boolean_type_node. */
2811 bfn_decl = builtin_decl_explicit (start_fn);
2812 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2813 t5 = build_int_cst (c_bool_type,
2814 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2815 if (fd->chunk_size)
2817 tree bfn_decl = builtin_decl_explicit (start_fn);
2818 t = fold_convert (fd->iter_type, fd->chunk_size);
2819 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2820 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2822 else
2823 t = build_call_expr (builtin_decl_explicit (start_fn),
2824 6, t5, t0, t1, t2, t3, t4);
2827 if (TREE_TYPE (t) != boolean_type_node)
2828 t = fold_build2 (NE_EXPR, boolean_type_node,
2829 t, build_int_cst (TREE_TYPE (t), 0));
2830 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2831 true, GSI_SAME_STMT);
2832 if (arr && !TREE_STATIC (arr))
2834 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2835 TREE_THIS_VOLATILE (clobber) = 1;
2836 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2837 GSI_SAME_STMT);
2839 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2841 /* Remove the GIMPLE_OMP_FOR statement. */
2842 gsi_remove (&gsi, true);
2844 if (gsi_end_p (gsif))
2845 gsif = gsi_after_labels (gsi_bb (gsif));
2846 gsi_next (&gsif);
2848 /* Iteration setup for sequential loop goes in L0_BB. */
2849 tree startvar = fd->loop.v;
2850 tree endvar = NULL_TREE;
2852 if (gimple_omp_for_combined_p (fd->for_stmt))
2854 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2855 && gimple_omp_for_kind (inner_stmt)
2856 == GF_OMP_FOR_KIND_SIMD);
2857 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2858 OMP_CLAUSE__LOOPTEMP_);
2859 gcc_assert (innerc);
2860 startvar = OMP_CLAUSE_DECL (innerc);
2861 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2862 OMP_CLAUSE__LOOPTEMP_);
2863 gcc_assert (innerc);
2864 endvar = OMP_CLAUSE_DECL (innerc);
2867 gsi = gsi_start_bb (l0_bb);
2868 t = istart0;
2869 if (fd->ordered && fd->collapse == 1)
2870 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2871 fold_convert (fd->iter_type, fd->loop.step));
2872 else if (bias)
2873 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2874 if (fd->ordered && fd->collapse == 1)
2876 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2877 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2878 fd->loop.n1, fold_convert (sizetype, t));
2879 else
2881 t = fold_convert (TREE_TYPE (startvar), t);
2882 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2883 fd->loop.n1, t);
2886 else
2888 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2889 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2890 t = fold_convert (TREE_TYPE (startvar), t);
2892 t = force_gimple_operand_gsi (&gsi, t,
2893 DECL_P (startvar)
2894 && TREE_ADDRESSABLE (startvar),
2895 NULL_TREE, false, GSI_CONTINUE_LINKING);
2896 assign_stmt = gimple_build_assign (startvar, t);
2897 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2899 t = iend0;
2900 if (fd->ordered && fd->collapse == 1)
2901 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2902 fold_convert (fd->iter_type, fd->loop.step));
2903 else if (bias)
2904 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2905 if (fd->ordered && fd->collapse == 1)
2907 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2908 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2909 fd->loop.n1, fold_convert (sizetype, t));
2910 else
2912 t = fold_convert (TREE_TYPE (startvar), t);
2913 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2914 fd->loop.n1, t);
2917 else
2919 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2920 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2921 t = fold_convert (TREE_TYPE (startvar), t);
2923 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2924 false, GSI_CONTINUE_LINKING);
2925 if (endvar)
2927 assign_stmt = gimple_build_assign (endvar, iend);
2928 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2929 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2930 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2931 else
2932 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2933 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2935 /* Handle linear clause adjustments. */
2936 tree itercnt = NULL_TREE;
2937 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2938 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2939 c; c = OMP_CLAUSE_CHAIN (c))
2940 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2941 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2943 tree d = OMP_CLAUSE_DECL (c);
2944 bool is_ref = omp_is_reference (d);
2945 tree t = d, a, dest;
2946 if (is_ref)
2947 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2948 tree type = TREE_TYPE (t);
2949 if (POINTER_TYPE_P (type))
2950 type = sizetype;
2951 dest = unshare_expr (t);
2952 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2953 expand_omp_build_assign (&gsif, v, t);
2954 if (itercnt == NULL_TREE)
2956 itercnt = startvar;
2957 tree n1 = fd->loop.n1;
2958 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2960 itercnt
2961 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2962 itercnt);
2963 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2965 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2966 itercnt, n1);
2967 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2968 itercnt, fd->loop.step);
2969 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2970 NULL_TREE, false,
2971 GSI_CONTINUE_LINKING);
2973 a = fold_build2 (MULT_EXPR, type,
2974 fold_convert (type, itercnt),
2975 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2976 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2977 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2978 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2979 false, GSI_CONTINUE_LINKING);
2980 assign_stmt = gimple_build_assign (dest, t);
2981 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2983 if (fd->collapse > 1)
2984 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2986 if (fd->ordered)
2988 /* Until now, counts array contained number of iterations or
2989 variable containing it for ith loop. From now on, we need
2990 those counts only for collapsed loops, and only for the 2nd
2991 till the last collapsed one. Move those one element earlier,
2992 we'll use counts[fd->collapse - 1] for the first source/sink
2993 iteration counter and so on and counts[fd->ordered]
2994 as the array holding the current counter values for
2995 depend(source). */
2996 if (fd->collapse > 1)
2997 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2998 if (broken_loop)
3000 int i;
3001 for (i = fd->collapse; i < fd->ordered; i++)
3003 tree type = TREE_TYPE (fd->loops[i].v);
3004 tree this_cond
3005 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3006 fold_convert (type, fd->loops[i].n1),
3007 fold_convert (type, fd->loops[i].n2));
3008 if (!integer_onep (this_cond))
3009 break;
3011 if (i < fd->ordered)
3013 cont_bb
3014 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3015 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3016 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3017 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3018 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3019 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3020 make_edge (cont_bb, l1_bb, 0);
3021 l2_bb = create_empty_bb (cont_bb);
3022 broken_loop = false;
3025 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3026 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3027 ordered_lastprivate);
3028 if (counts[fd->collapse - 1])
3030 gcc_assert (fd->collapse == 1);
3031 gsi = gsi_last_bb (l0_bb);
3032 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3033 istart0, true);
3034 gsi = gsi_last_bb (cont_bb);
3035 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3036 build_int_cst (fd->iter_type, 1));
3037 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3038 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3039 size_zero_node, NULL_TREE, NULL_TREE);
3040 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3041 t = counts[fd->collapse - 1];
3043 else if (fd->collapse > 1)
3044 t = fd->loop.v;
3045 else
3047 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3048 fd->loops[0].v, fd->loops[0].n1);
3049 t = fold_convert (fd->iter_type, t);
3051 gsi = gsi_last_bb (l0_bb);
3052 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3053 size_zero_node, NULL_TREE, NULL_TREE);
3054 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3055 false, GSI_CONTINUE_LINKING);
3056 expand_omp_build_assign (&gsi, aref, t, true);
3059 if (!broken_loop)
3061 /* Code to control the increment and predicate for the sequential
3062 loop goes in the CONT_BB. */
3063 gsi = gsi_last_bb (cont_bb);
3064 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3065 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3066 vmain = gimple_omp_continue_control_use (cont_stmt);
3067 vback = gimple_omp_continue_control_def (cont_stmt);
3069 if (!gimple_omp_for_combined_p (fd->for_stmt))
3071 if (POINTER_TYPE_P (type))
3072 t = fold_build_pointer_plus (vmain, fd->loop.step);
3073 else
3074 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3075 t = force_gimple_operand_gsi (&gsi, t,
3076 DECL_P (vback)
3077 && TREE_ADDRESSABLE (vback),
3078 NULL_TREE, true, GSI_SAME_STMT);
3079 assign_stmt = gimple_build_assign (vback, t);
3080 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3082 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3084 if (fd->collapse > 1)
3085 t = fd->loop.v;
3086 else
3088 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3089 fd->loops[0].v, fd->loops[0].n1);
3090 t = fold_convert (fd->iter_type, t);
3092 tree aref = build4 (ARRAY_REF, fd->iter_type,
3093 counts[fd->ordered], size_zero_node,
3094 NULL_TREE, NULL_TREE);
3095 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3096 true, GSI_SAME_STMT);
3097 expand_omp_build_assign (&gsi, aref, t);
3100 t = build2 (fd->loop.cond_code, boolean_type_node,
3101 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3102 iend);
3103 gcond *cond_stmt = gimple_build_cond_empty (t);
3104 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3107 /* Remove GIMPLE_OMP_CONTINUE. */
3108 gsi_remove (&gsi, true);
3110 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3111 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3113 /* Emit code to get the next parallel iteration in L2_BB. */
3114 gsi = gsi_start_bb (l2_bb);
3116 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3117 build_fold_addr_expr (istart0),
3118 build_fold_addr_expr (iend0));
3119 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3120 false, GSI_CONTINUE_LINKING);
3121 if (TREE_TYPE (t) != boolean_type_node)
3122 t = fold_build2 (NE_EXPR, boolean_type_node,
3123 t, build_int_cst (TREE_TYPE (t), 0));
3124 gcond *cond_stmt = gimple_build_cond_empty (t);
3125 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3128 /* Add the loop cleanup function. */
3129 gsi = gsi_last_bb (exit_bb);
3130 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3131 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3132 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3133 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3134 else
3135 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3136 gcall *call_stmt = gimple_build_call (t, 0);
3137 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3138 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3139 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3140 if (fd->ordered)
3142 tree arr = counts[fd->ordered];
3143 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3144 TREE_THIS_VOLATILE (clobber) = 1;
3145 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3146 GSI_SAME_STMT);
3148 gsi_remove (&gsi, true);
3150 /* Connect the new blocks. */
3151 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3152 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3154 if (!broken_loop)
3156 gimple_seq phis;
3158 e = find_edge (cont_bb, l3_bb);
3159 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3161 phis = phi_nodes (l3_bb);
3162 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3164 gimple *phi = gsi_stmt (gsi);
3165 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3166 PHI_ARG_DEF_FROM_EDGE (phi, e));
3168 remove_edge (e);
3170 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3171 e = find_edge (cont_bb, l1_bb);
3172 if (e == NULL)
3174 e = BRANCH_EDGE (cont_bb);
3175 gcc_assert (single_succ (e->dest) == l1_bb);
3177 if (gimple_omp_for_combined_p (fd->for_stmt))
3179 remove_edge (e);
3180 e = NULL;
3182 else if (fd->collapse > 1)
3184 remove_edge (e);
3185 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3187 else
3188 e->flags = EDGE_TRUE_VALUE;
3189 if (e)
3191 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3192 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3194 else
3196 e = find_edge (cont_bb, l2_bb);
3197 e->flags = EDGE_FALLTHRU;
3199 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3201 if (gimple_in_ssa_p (cfun))
3203 /* Add phis to the outer loop that connect to the phis in the inner,
3204 original loop, and move the loop entry value of the inner phi to
3205 the loop entry value of the outer phi. */
3206 gphi_iterator psi;
3207 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3209 source_location locus;
3210 gphi *nphi;
3211 gphi *exit_phi = psi.phi ();
3213 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3214 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3216 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3217 edge latch_to_l1 = find_edge (latch, l1_bb);
3218 gphi *inner_phi
3219 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3221 tree t = gimple_phi_result (exit_phi);
3222 tree new_res = copy_ssa_name (t, NULL);
3223 nphi = create_phi_node (new_res, l0_bb);
3225 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3226 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3227 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3228 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3229 add_phi_arg (nphi, t, entry_to_l0, locus);
3231 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3232 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3234 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3238 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3239 recompute_dominator (CDI_DOMINATORS, l2_bb));
3240 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3241 recompute_dominator (CDI_DOMINATORS, l3_bb));
3242 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3243 recompute_dominator (CDI_DOMINATORS, l0_bb));
3244 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3245 recompute_dominator (CDI_DOMINATORS, l1_bb));
3247 /* We enter expand_omp_for_generic with a loop. This original loop may
3248 have its own loop struct, or it may be part of an outer loop struct
3249 (which may be the fake loop). */
3250 struct loop *outer_loop = entry_bb->loop_father;
3251 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3253 add_bb_to_loop (l2_bb, outer_loop);
3255 /* We've added a new loop around the original loop. Allocate the
3256 corresponding loop struct. */
3257 struct loop *new_loop = alloc_loop ();
3258 new_loop->header = l0_bb;
3259 new_loop->latch = l2_bb;
3260 add_loop (new_loop, outer_loop);
3262 /* Allocate a loop structure for the original loop unless we already
3263 had one. */
3264 if (!orig_loop_has_loop_struct
3265 && !gimple_omp_for_combined_p (fd->for_stmt))
3267 struct loop *orig_loop = alloc_loop ();
3268 orig_loop->header = l1_bb;
3269 /* The loop may have multiple latches. */
3270 add_loop (orig_loop, new_loop);
3275 /* A subroutine of expand_omp_for. Generate code for a parallel
3276 loop with static schedule and no specified chunk size. Given
3277 parameters:
3279 for (V = N1; V cond N2; V += STEP) BODY;
3281 where COND is "<" or ">", we generate pseudocode
3283 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3284 if (cond is <)
3285 adj = STEP - 1;
3286 else
3287 adj = STEP + 1;
3288 if ((__typeof (V)) -1 > 0 && cond is >)
3289 n = -(adj + N2 - N1) / -STEP;
3290 else
3291 n = (adj + N2 - N1) / STEP;
3292 q = n / nthreads;
3293 tt = n % nthreads;
3294 if (threadid < tt) goto L3; else goto L4;
3296 tt = 0;
3297 q = q + 1;
3299 s0 = q * threadid + tt;
3300 e0 = s0 + q;
3301 V = s0 * STEP + N1;
3302 if (s0 >= e0) goto L2; else goto L0;
3304 e = e0 * STEP + N1;
3306 BODY;
3307 V += STEP;
3308 if (V cond e) goto L1;
3312 static void
3313 expand_omp_for_static_nochunk (struct omp_region *region,
3314 struct omp_for_data *fd,
3315 gimple *inner_stmt)
3317 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3318 tree type, itype, vmain, vback;
3319 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3320 basic_block body_bb, cont_bb, collapse_bb = NULL;
3321 basic_block fin_bb;
3322 gimple_stmt_iterator gsi;
3323 edge ep;
3324 bool broken_loop = region->cont == NULL;
3325 tree *counts = NULL;
3326 tree n1, n2, step;
3328 itype = type = TREE_TYPE (fd->loop.v);
3329 if (POINTER_TYPE_P (type))
3330 itype = signed_type_for (type);
3332 entry_bb = region->entry;
3333 cont_bb = region->cont;
3334 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3335 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3336 gcc_assert (broken_loop
3337 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3338 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3339 body_bb = single_succ (seq_start_bb);
3340 if (!broken_loop)
3342 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3343 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3344 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3346 exit_bb = region->exit;
3348 /* Iteration space partitioning goes in ENTRY_BB. */
3349 gsi = gsi_last_bb (entry_bb);
3350 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3352 if (fd->collapse > 1)
3354 int first_zero_iter = -1, dummy = -1;
3355 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3357 counts = XALLOCAVEC (tree, fd->collapse);
3358 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3359 fin_bb, first_zero_iter,
3360 dummy_bb, dummy, l2_dom_bb);
3361 t = NULL_TREE;
3363 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3364 t = integer_one_node;
3365 else
3366 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3367 fold_convert (type, fd->loop.n1),
3368 fold_convert (type, fd->loop.n2));
3369 if (fd->collapse == 1
3370 && TYPE_UNSIGNED (type)
3371 && (t == NULL_TREE || !integer_onep (t)))
3373 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3374 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3375 true, GSI_SAME_STMT);
3376 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3377 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3378 true, GSI_SAME_STMT);
3379 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3380 NULL_TREE, NULL_TREE);
3381 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3382 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3383 expand_omp_regimplify_p, NULL, NULL)
3384 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3385 expand_omp_regimplify_p, NULL, NULL))
3387 gsi = gsi_for_stmt (cond_stmt);
3388 gimple_regimplify_operands (cond_stmt, &gsi);
3390 ep = split_block (entry_bb, cond_stmt);
3391 ep->flags = EDGE_TRUE_VALUE;
3392 entry_bb = ep->dest;
3393 ep->probability = profile_probability::very_likely ();
3394 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3395 ep->probability = profile_probability::very_unlikely ();
3396 if (gimple_in_ssa_p (cfun))
3398 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3399 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3400 !gsi_end_p (gpi); gsi_next (&gpi))
3402 gphi *phi = gpi.phi ();
3403 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3404 ep, UNKNOWN_LOCATION);
3407 gsi = gsi_last_bb (entry_bb);
3410 switch (gimple_omp_for_kind (fd->for_stmt))
3412 case GF_OMP_FOR_KIND_FOR:
3413 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3414 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3415 break;
3416 case GF_OMP_FOR_KIND_DISTRIBUTE:
3417 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3418 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3419 break;
3420 default:
3421 gcc_unreachable ();
3423 nthreads = build_call_expr (nthreads, 0);
3424 nthreads = fold_convert (itype, nthreads);
3425 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3426 true, GSI_SAME_STMT);
3427 threadid = build_call_expr (threadid, 0);
3428 threadid = fold_convert (itype, threadid);
3429 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3430 true, GSI_SAME_STMT);
3432 n1 = fd->loop.n1;
3433 n2 = fd->loop.n2;
3434 step = fd->loop.step;
3435 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3437 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3438 OMP_CLAUSE__LOOPTEMP_);
3439 gcc_assert (innerc);
3440 n1 = OMP_CLAUSE_DECL (innerc);
3441 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3442 OMP_CLAUSE__LOOPTEMP_);
3443 gcc_assert (innerc);
3444 n2 = OMP_CLAUSE_DECL (innerc);
3446 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3447 true, NULL_TREE, true, GSI_SAME_STMT);
3448 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3449 true, NULL_TREE, true, GSI_SAME_STMT);
3450 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3451 true, NULL_TREE, true, GSI_SAME_STMT);
3453 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3454 t = fold_build2 (PLUS_EXPR, itype, step, t);
3455 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3456 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3457 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3458 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3459 fold_build1 (NEGATE_EXPR, itype, t),
3460 fold_build1 (NEGATE_EXPR, itype, step));
3461 else
3462 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3463 t = fold_convert (itype, t);
3464 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3466 q = create_tmp_reg (itype, "q");
3467 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3468 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3469 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3471 tt = create_tmp_reg (itype, "tt");
3472 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3473 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3474 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3476 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3477 gcond *cond_stmt = gimple_build_cond_empty (t);
3478 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3480 second_bb = split_block (entry_bb, cond_stmt)->dest;
3481 gsi = gsi_last_bb (second_bb);
3482 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3484 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3485 GSI_SAME_STMT);
3486 gassign *assign_stmt
3487 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3488 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3490 third_bb = split_block (second_bb, assign_stmt)->dest;
3491 gsi = gsi_last_bb (third_bb);
3492 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3494 t = build2 (MULT_EXPR, itype, q, threadid);
3495 t = build2 (PLUS_EXPR, itype, t, tt);
3496 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3498 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3499 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3501 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3502 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3504 /* Remove the GIMPLE_OMP_FOR statement. */
3505 gsi_remove (&gsi, true);
3507 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3508 gsi = gsi_start_bb (seq_start_bb);
3510 tree startvar = fd->loop.v;
3511 tree endvar = NULL_TREE;
3513 if (gimple_omp_for_combined_p (fd->for_stmt))
3515 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3516 ? gimple_omp_parallel_clauses (inner_stmt)
3517 : gimple_omp_for_clauses (inner_stmt);
3518 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3519 gcc_assert (innerc);
3520 startvar = OMP_CLAUSE_DECL (innerc);
3521 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3522 OMP_CLAUSE__LOOPTEMP_);
3523 gcc_assert (innerc);
3524 endvar = OMP_CLAUSE_DECL (innerc);
3525 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3526 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3528 int i;
3529 for (i = 1; i < fd->collapse; i++)
3531 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3532 OMP_CLAUSE__LOOPTEMP_);
3533 gcc_assert (innerc);
3535 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3536 OMP_CLAUSE__LOOPTEMP_);
3537 if (innerc)
3539 /* If needed (distribute parallel for with lastprivate),
3540 propagate down the total number of iterations. */
3541 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3542 fd->loop.n2);
3543 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3544 GSI_CONTINUE_LINKING);
3545 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3546 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3550 t = fold_convert (itype, s0);
3551 t = fold_build2 (MULT_EXPR, itype, t, step);
3552 if (POINTER_TYPE_P (type))
3553 t = fold_build_pointer_plus (n1, t);
3554 else
3555 t = fold_build2 (PLUS_EXPR, type, t, n1);
3556 t = fold_convert (TREE_TYPE (startvar), t);
3557 t = force_gimple_operand_gsi (&gsi, t,
3558 DECL_P (startvar)
3559 && TREE_ADDRESSABLE (startvar),
3560 NULL_TREE, false, GSI_CONTINUE_LINKING);
3561 assign_stmt = gimple_build_assign (startvar, t);
3562 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3564 t = fold_convert (itype, e0);
3565 t = fold_build2 (MULT_EXPR, itype, t, step);
3566 if (POINTER_TYPE_P (type))
3567 t = fold_build_pointer_plus (n1, t);
3568 else
3569 t = fold_build2 (PLUS_EXPR, type, t, n1);
3570 t = fold_convert (TREE_TYPE (startvar), t);
3571 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3572 false, GSI_CONTINUE_LINKING);
3573 if (endvar)
3575 assign_stmt = gimple_build_assign (endvar, e);
3576 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3577 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3578 assign_stmt = gimple_build_assign (fd->loop.v, e);
3579 else
3580 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3581 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3583 /* Handle linear clause adjustments. */
3584 tree itercnt = NULL_TREE;
3585 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3586 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3587 c; c = OMP_CLAUSE_CHAIN (c))
3588 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3589 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3591 tree d = OMP_CLAUSE_DECL (c);
3592 bool is_ref = omp_is_reference (d);
3593 tree t = d, a, dest;
3594 if (is_ref)
3595 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3596 if (itercnt == NULL_TREE)
3598 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3600 itercnt = fold_build2 (MINUS_EXPR, itype,
3601 fold_convert (itype, n1),
3602 fold_convert (itype, fd->loop.n1));
3603 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3604 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3605 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3606 NULL_TREE, false,
3607 GSI_CONTINUE_LINKING);
3609 else
3610 itercnt = s0;
3612 tree type = TREE_TYPE (t);
3613 if (POINTER_TYPE_P (type))
3614 type = sizetype;
3615 a = fold_build2 (MULT_EXPR, type,
3616 fold_convert (type, itercnt),
3617 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3618 dest = unshare_expr (t);
3619 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3620 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3621 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3622 false, GSI_CONTINUE_LINKING);
3623 assign_stmt = gimple_build_assign (dest, t);
3624 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3626 if (fd->collapse > 1)
3627 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3629 if (!broken_loop)
3631 /* The code controlling the sequential loop replaces the
3632 GIMPLE_OMP_CONTINUE. */
3633 gsi = gsi_last_bb (cont_bb);
3634 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3635 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3636 vmain = gimple_omp_continue_control_use (cont_stmt);
3637 vback = gimple_omp_continue_control_def (cont_stmt);
3639 if (!gimple_omp_for_combined_p (fd->for_stmt))
3641 if (POINTER_TYPE_P (type))
3642 t = fold_build_pointer_plus (vmain, step);
3643 else
3644 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3645 t = force_gimple_operand_gsi (&gsi, t,
3646 DECL_P (vback)
3647 && TREE_ADDRESSABLE (vback),
3648 NULL_TREE, true, GSI_SAME_STMT);
3649 assign_stmt = gimple_build_assign (vback, t);
3650 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3652 t = build2 (fd->loop.cond_code, boolean_type_node,
3653 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3654 ? t : vback, e);
3655 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3658 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3659 gsi_remove (&gsi, true);
3661 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3662 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3665 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3666 gsi = gsi_last_bb (exit_bb);
3667 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3669 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3670 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3672 gsi_remove (&gsi, true);
3674 /* Connect all the blocks. */
3675 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3676 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3677 ep = find_edge (entry_bb, second_bb);
3678 ep->flags = EDGE_TRUE_VALUE;
3679 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3680 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3681 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3683 if (!broken_loop)
3685 ep = find_edge (cont_bb, body_bb);
3686 if (ep == NULL)
3688 ep = BRANCH_EDGE (cont_bb);
3689 gcc_assert (single_succ (ep->dest) == body_bb);
3691 if (gimple_omp_for_combined_p (fd->for_stmt))
3693 remove_edge (ep);
3694 ep = NULL;
3696 else if (fd->collapse > 1)
3698 remove_edge (ep);
3699 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3701 else
3702 ep->flags = EDGE_TRUE_VALUE;
3703 find_edge (cont_bb, fin_bb)->flags
3704 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3707 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3708 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3709 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3711 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3712 recompute_dominator (CDI_DOMINATORS, body_bb));
3713 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3714 recompute_dominator (CDI_DOMINATORS, fin_bb));
3716 struct loop *loop = body_bb->loop_father;
3717 if (loop != entry_bb->loop_father)
3719 gcc_assert (broken_loop || loop->header == body_bb);
3720 gcc_assert (broken_loop
3721 || loop->latch == region->cont
3722 || single_pred (loop->latch) == region->cont);
3723 return;
3726 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3728 loop = alloc_loop ();
3729 loop->header = body_bb;
3730 if (collapse_bb == NULL)
3731 loop->latch = cont_bb;
3732 add_loop (loop, body_bb->loop_father);
3736 /* Return phi in E->DEST with ARG on edge E. */
3738 static gphi *
3739 find_phi_with_arg_on_edge (tree arg, edge e)
3741 basic_block bb = e->dest;
3743 for (gphi_iterator gpi = gsi_start_phis (bb);
3744 !gsi_end_p (gpi);
3745 gsi_next (&gpi))
3747 gphi *phi = gpi.phi ();
3748 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3749 return phi;
3752 return NULL;
3755 /* A subroutine of expand_omp_for. Generate code for a parallel
3756 loop with static schedule and a specified chunk size. Given
3757 parameters:
3759 for (V = N1; V cond N2; V += STEP) BODY;
3761 where COND is "<" or ">", we generate pseudocode
3763 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3764 if (cond is <)
3765 adj = STEP - 1;
3766 else
3767 adj = STEP + 1;
3768 if ((__typeof (V)) -1 > 0 && cond is >)
3769 n = -(adj + N2 - N1) / -STEP;
3770 else
3771 n = (adj + N2 - N1) / STEP;
3772 trip = 0;
3773 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3774 here so that V is defined
3775 if the loop is not entered
3777 s0 = (trip * nthreads + threadid) * CHUNK;
3778 e0 = min (s0 + CHUNK, n);
3779 if (s0 < n) goto L1; else goto L4;
3781 V = s0 * STEP + N1;
3782 e = e0 * STEP + N1;
3784 BODY;
3785 V += STEP;
3786 if (V cond e) goto L2; else goto L3;
3788 trip += 1;
3789 goto L0;
3793 static void
3794 expand_omp_for_static_chunk (struct omp_region *region,
3795 struct omp_for_data *fd, gimple *inner_stmt)
3797 tree n, s0, e0, e, t;
3798 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3799 tree type, itype, vmain, vback, vextra;
3800 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3801 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3802 gimple_stmt_iterator gsi;
3803 edge se;
3804 bool broken_loop = region->cont == NULL;
3805 tree *counts = NULL;
3806 tree n1, n2, step;
3808 itype = type = TREE_TYPE (fd->loop.v);
3809 if (POINTER_TYPE_P (type))
3810 itype = signed_type_for (type);
3812 entry_bb = region->entry;
3813 se = split_block (entry_bb, last_stmt (entry_bb));
3814 entry_bb = se->src;
3815 iter_part_bb = se->dest;
3816 cont_bb = region->cont;
3817 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3818 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3819 gcc_assert (broken_loop
3820 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3821 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3822 body_bb = single_succ (seq_start_bb);
3823 if (!broken_loop)
3825 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3826 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3827 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3828 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3830 exit_bb = region->exit;
3832 /* Trip and adjustment setup goes in ENTRY_BB. */
3833 gsi = gsi_last_bb (entry_bb);
3834 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3836 if (fd->collapse > 1)
3838 int first_zero_iter = -1, dummy = -1;
3839 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3841 counts = XALLOCAVEC (tree, fd->collapse);
3842 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3843 fin_bb, first_zero_iter,
3844 dummy_bb, dummy, l2_dom_bb);
3845 t = NULL_TREE;
3847 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3848 t = integer_one_node;
3849 else
3850 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3851 fold_convert (type, fd->loop.n1),
3852 fold_convert (type, fd->loop.n2));
3853 if (fd->collapse == 1
3854 && TYPE_UNSIGNED (type)
3855 && (t == NULL_TREE || !integer_onep (t)))
3857 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3858 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3859 true, GSI_SAME_STMT);
3860 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3861 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3862 true, GSI_SAME_STMT);
3863 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3864 NULL_TREE, NULL_TREE);
3865 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3866 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3867 expand_omp_regimplify_p, NULL, NULL)
3868 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3869 expand_omp_regimplify_p, NULL, NULL))
3871 gsi = gsi_for_stmt (cond_stmt);
3872 gimple_regimplify_operands (cond_stmt, &gsi);
3874 se = split_block (entry_bb, cond_stmt);
3875 se->flags = EDGE_TRUE_VALUE;
3876 entry_bb = se->dest;
3877 se->probability = profile_probability::very_likely ();
3878 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3879 se->probability = profile_probability::very_unlikely ();
3880 if (gimple_in_ssa_p (cfun))
3882 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3883 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3884 !gsi_end_p (gpi); gsi_next (&gpi))
3886 gphi *phi = gpi.phi ();
3887 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3888 se, UNKNOWN_LOCATION);
3891 gsi = gsi_last_bb (entry_bb);
3894 switch (gimple_omp_for_kind (fd->for_stmt))
3896 case GF_OMP_FOR_KIND_FOR:
3897 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3898 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3899 break;
3900 case GF_OMP_FOR_KIND_DISTRIBUTE:
3901 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3902 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3903 break;
3904 default:
3905 gcc_unreachable ();
3907 nthreads = build_call_expr (nthreads, 0);
3908 nthreads = fold_convert (itype, nthreads);
3909 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3910 true, GSI_SAME_STMT);
3911 threadid = build_call_expr (threadid, 0);
3912 threadid = fold_convert (itype, threadid);
3913 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3914 true, GSI_SAME_STMT);
3916 n1 = fd->loop.n1;
3917 n2 = fd->loop.n2;
3918 step = fd->loop.step;
3919 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3921 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3922 OMP_CLAUSE__LOOPTEMP_);
3923 gcc_assert (innerc);
3924 n1 = OMP_CLAUSE_DECL (innerc);
3925 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3926 OMP_CLAUSE__LOOPTEMP_);
3927 gcc_assert (innerc);
3928 n2 = OMP_CLAUSE_DECL (innerc);
3930 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3931 true, NULL_TREE, true, GSI_SAME_STMT);
3932 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3933 true, NULL_TREE, true, GSI_SAME_STMT);
3934 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3935 true, NULL_TREE, true, GSI_SAME_STMT);
3936 tree chunk_size = fold_convert (itype, fd->chunk_size);
3937 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3938 chunk_size
3939 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3940 GSI_SAME_STMT);
3942 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3943 t = fold_build2 (PLUS_EXPR, itype, step, t);
3944 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3945 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3946 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3947 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3948 fold_build1 (NEGATE_EXPR, itype, t),
3949 fold_build1 (NEGATE_EXPR, itype, step));
3950 else
3951 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3952 t = fold_convert (itype, t);
3953 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3954 true, GSI_SAME_STMT);
3956 trip_var = create_tmp_reg (itype, ".trip");
3957 if (gimple_in_ssa_p (cfun))
3959 trip_init = make_ssa_name (trip_var);
3960 trip_main = make_ssa_name (trip_var);
3961 trip_back = make_ssa_name (trip_var);
3963 else
3965 trip_init = trip_var;
3966 trip_main = trip_var;
3967 trip_back = trip_var;
3970 gassign *assign_stmt
3971 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3972 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3974 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3975 t = fold_build2 (MULT_EXPR, itype, t, step);
3976 if (POINTER_TYPE_P (type))
3977 t = fold_build_pointer_plus (n1, t);
3978 else
3979 t = fold_build2 (PLUS_EXPR, type, t, n1);
3980 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3981 true, GSI_SAME_STMT);
3983 /* Remove the GIMPLE_OMP_FOR. */
3984 gsi_remove (&gsi, true);
3986 gimple_stmt_iterator gsif = gsi;
3988 /* Iteration space partitioning goes in ITER_PART_BB. */
3989 gsi = gsi_last_bb (iter_part_bb);
3991 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3992 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3993 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3994 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3995 false, GSI_CONTINUE_LINKING);
3997 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3998 t = fold_build2 (MIN_EXPR, itype, t, n);
3999 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4000 false, GSI_CONTINUE_LINKING);
4002 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4003 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4005 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4006 gsi = gsi_start_bb (seq_start_bb);
4008 tree startvar = fd->loop.v;
4009 tree endvar = NULL_TREE;
4011 if (gimple_omp_for_combined_p (fd->for_stmt))
4013 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4014 ? gimple_omp_parallel_clauses (inner_stmt)
4015 : gimple_omp_for_clauses (inner_stmt);
4016 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4017 gcc_assert (innerc);
4018 startvar = OMP_CLAUSE_DECL (innerc);
4019 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4020 OMP_CLAUSE__LOOPTEMP_);
4021 gcc_assert (innerc);
4022 endvar = OMP_CLAUSE_DECL (innerc);
4023 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4024 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4026 int i;
4027 for (i = 1; i < fd->collapse; i++)
4029 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4030 OMP_CLAUSE__LOOPTEMP_);
4031 gcc_assert (innerc);
4033 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4034 OMP_CLAUSE__LOOPTEMP_);
4035 if (innerc)
4037 /* If needed (distribute parallel for with lastprivate),
4038 propagate down the total number of iterations. */
4039 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4040 fd->loop.n2);
4041 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4042 GSI_CONTINUE_LINKING);
4043 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4044 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4049 t = fold_convert (itype, s0);
4050 t = fold_build2 (MULT_EXPR, itype, t, step);
4051 if (POINTER_TYPE_P (type))
4052 t = fold_build_pointer_plus (n1, t);
4053 else
4054 t = fold_build2 (PLUS_EXPR, type, t, n1);
4055 t = fold_convert (TREE_TYPE (startvar), t);
4056 t = force_gimple_operand_gsi (&gsi, t,
4057 DECL_P (startvar)
4058 && TREE_ADDRESSABLE (startvar),
4059 NULL_TREE, false, GSI_CONTINUE_LINKING);
4060 assign_stmt = gimple_build_assign (startvar, t);
4061 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4063 t = fold_convert (itype, e0);
4064 t = fold_build2 (MULT_EXPR, itype, t, step);
4065 if (POINTER_TYPE_P (type))
4066 t = fold_build_pointer_plus (n1, t);
4067 else
4068 t = fold_build2 (PLUS_EXPR, type, t, n1);
4069 t = fold_convert (TREE_TYPE (startvar), t);
4070 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4071 false, GSI_CONTINUE_LINKING);
4072 if (endvar)
4074 assign_stmt = gimple_build_assign (endvar, e);
4075 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4076 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4077 assign_stmt = gimple_build_assign (fd->loop.v, e);
4078 else
4079 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4080 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4082 /* Handle linear clause adjustments. */
4083 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4084 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4085 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4086 c; c = OMP_CLAUSE_CHAIN (c))
4087 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4088 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4090 tree d = OMP_CLAUSE_DECL (c);
4091 bool is_ref = omp_is_reference (d);
4092 tree t = d, a, dest;
4093 if (is_ref)
4094 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4095 tree type = TREE_TYPE (t);
4096 if (POINTER_TYPE_P (type))
4097 type = sizetype;
4098 dest = unshare_expr (t);
4099 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4100 expand_omp_build_assign (&gsif, v, t);
4101 if (itercnt == NULL_TREE)
4103 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4105 itercntbias
4106 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4107 fold_convert (itype, fd->loop.n1));
4108 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4109 itercntbias, step);
4110 itercntbias
4111 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4112 NULL_TREE, true,
4113 GSI_SAME_STMT);
4114 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4115 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4116 NULL_TREE, false,
4117 GSI_CONTINUE_LINKING);
4119 else
4120 itercnt = s0;
4122 a = fold_build2 (MULT_EXPR, type,
4123 fold_convert (type, itercnt),
4124 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4125 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4126 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4127 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4128 false, GSI_CONTINUE_LINKING);
4129 assign_stmt = gimple_build_assign (dest, t);
4130 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4132 if (fd->collapse > 1)
4133 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4135 if (!broken_loop)
4137 /* The code controlling the sequential loop goes in CONT_BB,
4138 replacing the GIMPLE_OMP_CONTINUE. */
4139 gsi = gsi_last_bb (cont_bb);
4140 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4141 vmain = gimple_omp_continue_control_use (cont_stmt);
4142 vback = gimple_omp_continue_control_def (cont_stmt);
4144 if (!gimple_omp_for_combined_p (fd->for_stmt))
4146 if (POINTER_TYPE_P (type))
4147 t = fold_build_pointer_plus (vmain, step);
4148 else
4149 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4150 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4151 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4152 true, GSI_SAME_STMT);
4153 assign_stmt = gimple_build_assign (vback, t);
4154 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4156 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4157 t = build2 (EQ_EXPR, boolean_type_node,
4158 build_int_cst (itype, 0),
4159 build_int_cst (itype, 1));
4160 else
4161 t = build2 (fd->loop.cond_code, boolean_type_node,
4162 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4163 ? t : vback, e);
4164 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4167 /* Remove GIMPLE_OMP_CONTINUE. */
4168 gsi_remove (&gsi, true);
4170 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4171 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4173 /* Trip update code goes into TRIP_UPDATE_BB. */
4174 gsi = gsi_start_bb (trip_update_bb);
4176 t = build_int_cst (itype, 1);
4177 t = build2 (PLUS_EXPR, itype, trip_main, t);
4178 assign_stmt = gimple_build_assign (trip_back, t);
4179 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4182 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4183 gsi = gsi_last_bb (exit_bb);
4184 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4186 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4187 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4189 gsi_remove (&gsi, true);
4191 /* Connect the new blocks. */
4192 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4193 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4195 if (!broken_loop)
4197 se = find_edge (cont_bb, body_bb);
4198 if (se == NULL)
4200 se = BRANCH_EDGE (cont_bb);
4201 gcc_assert (single_succ (se->dest) == body_bb);
4203 if (gimple_omp_for_combined_p (fd->for_stmt))
4205 remove_edge (se);
4206 se = NULL;
4208 else if (fd->collapse > 1)
4210 remove_edge (se);
4211 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4213 else
4214 se->flags = EDGE_TRUE_VALUE;
4215 find_edge (cont_bb, trip_update_bb)->flags
4216 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4218 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4219 iter_part_bb);
4222 if (gimple_in_ssa_p (cfun))
4224 gphi_iterator psi;
4225 gphi *phi;
4226 edge re, ene;
4227 edge_var_map *vm;
4228 size_t i;
4230 gcc_assert (fd->collapse == 1 && !broken_loop);
4232 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4233 remove arguments of the phi nodes in fin_bb. We need to create
4234 appropriate phi nodes in iter_part_bb instead. */
4235 se = find_edge (iter_part_bb, fin_bb);
4236 re = single_succ_edge (trip_update_bb);
4237 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4238 ene = single_succ_edge (entry_bb);
4240 psi = gsi_start_phis (fin_bb);
4241 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4242 gsi_next (&psi), ++i)
4244 gphi *nphi;
4245 source_location locus;
4247 phi = psi.phi ();
4248 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4249 redirect_edge_var_map_def (vm), 0))
4250 continue;
4252 t = gimple_phi_result (phi);
4253 gcc_assert (t == redirect_edge_var_map_result (vm));
4255 if (!single_pred_p (fin_bb))
4256 t = copy_ssa_name (t, phi);
4258 nphi = create_phi_node (t, iter_part_bb);
4260 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4261 locus = gimple_phi_arg_location_from_edge (phi, se);
4263 /* A special case -- fd->loop.v is not yet computed in
4264 iter_part_bb, we need to use vextra instead. */
4265 if (t == fd->loop.v)
4266 t = vextra;
4267 add_phi_arg (nphi, t, ene, locus);
4268 locus = redirect_edge_var_map_location (vm);
4269 tree back_arg = redirect_edge_var_map_def (vm);
4270 add_phi_arg (nphi, back_arg, re, locus);
4271 edge ce = find_edge (cont_bb, body_bb);
4272 if (ce == NULL)
4274 ce = BRANCH_EDGE (cont_bb);
4275 gcc_assert (single_succ (ce->dest) == body_bb);
4276 ce = single_succ_edge (ce->dest);
4278 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4279 gcc_assert (inner_loop_phi != NULL);
4280 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4281 find_edge (seq_start_bb, body_bb), locus);
4283 if (!single_pred_p (fin_bb))
4284 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4286 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4287 redirect_edge_var_map_clear (re);
4288 if (single_pred_p (fin_bb))
4289 while (1)
4291 psi = gsi_start_phis (fin_bb);
4292 if (gsi_end_p (psi))
4293 break;
4294 remove_phi_node (&psi, false);
4297 /* Make phi node for trip. */
4298 phi = create_phi_node (trip_main, iter_part_bb);
4299 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4300 UNKNOWN_LOCATION);
4301 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4302 UNKNOWN_LOCATION);
4305 if (!broken_loop)
4306 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4307 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4308 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4309 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4310 recompute_dominator (CDI_DOMINATORS, fin_bb));
4311 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4312 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4313 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4314 recompute_dominator (CDI_DOMINATORS, body_bb));
4316 if (!broken_loop)
4318 struct loop *loop = body_bb->loop_father;
4319 struct loop *trip_loop = alloc_loop ();
4320 trip_loop->header = iter_part_bb;
4321 trip_loop->latch = trip_update_bb;
4322 add_loop (trip_loop, iter_part_bb->loop_father);
4324 if (loop != entry_bb->loop_father)
4326 gcc_assert (loop->header == body_bb);
4327 gcc_assert (loop->latch == region->cont
4328 || single_pred (loop->latch) == region->cont);
4329 trip_loop->inner = loop;
4330 return;
4333 if (!gimple_omp_for_combined_p (fd->for_stmt))
4335 loop = alloc_loop ();
4336 loop->header = body_bb;
4337 if (collapse_bb == NULL)
4338 loop->latch = cont_bb;
4339 add_loop (loop, trip_loop);
4344 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4345 Given parameters:
4346 for (V = N1; V cond N2; V += STEP) BODY;
4348 where COND is "<" or ">" or "!=", we generate pseudocode
4350 for (ind_var = low; ind_var < high; ind_var++)
4352 V = n1 + (ind_var * STEP)
4354 <BODY>
4357 In the above pseudocode, low and high are function parameters of the
4358 child function. In the function below, we are inserting a temp.
4359 variable that will be making a call to two OMP functions that will not be
4360 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4361 with _Cilk_for). These functions are replaced with low and high
4362 by the function that handles taskreg. */
4365 static void
4366 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4368 bool broken_loop = region->cont == NULL;
4369 basic_block entry_bb = region->entry;
4370 basic_block cont_bb = region->cont;
4372 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4373 gcc_assert (broken_loop
4374 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4375 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4376 basic_block l1_bb, l2_bb;
4378 if (!broken_loop)
4380 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4381 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4382 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4383 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4385 else
4387 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4388 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4389 l2_bb = single_succ (l1_bb);
4391 basic_block exit_bb = region->exit;
4392 basic_block l2_dom_bb = NULL;
4394 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4396 /* Below statements until the "tree high_val = ..." are pseudo statements
4397 used to pass information to be used by expand_omp_taskreg.
4398 low_val and high_val will be replaced by the __low and __high
4399 parameter from the child function.
4401 The call_exprs part is a place-holder, it is mainly used
4402 to distinctly identify to the top-level part that this is
4403 where we should put low and high (reasoning given in header
4404 comment). */
4406 gomp_parallel *par_stmt
4407 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4408 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4409 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4410 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4412 if (id_equal (DECL_NAME (t), "__high"))
4413 high_val = t;
4414 else if (id_equal (DECL_NAME (t), "__low"))
4415 low_val = t;
4417 gcc_assert (low_val && high_val);
4419 tree type = TREE_TYPE (low_val);
4420 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4421 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4423 /* Not needed in SSA form right now. */
4424 gcc_assert (!gimple_in_ssa_p (cfun));
4425 if (l2_dom_bb == NULL)
4426 l2_dom_bb = l1_bb;
4428 tree n1 = low_val;
4429 tree n2 = high_val;
4431 gimple *stmt = gimple_build_assign (ind_var, n1);
4433 /* Replace the GIMPLE_OMP_FOR statement. */
4434 gsi_replace (&gsi, stmt, true);
4436 if (!broken_loop)
4438 /* Code to control the increment goes in the CONT_BB. */
4439 gsi = gsi_last_bb (cont_bb);
4440 stmt = gsi_stmt (gsi);
4441 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4442 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4443 build_one_cst (type));
4445 /* Replace GIMPLE_OMP_CONTINUE. */
4446 gsi_replace (&gsi, stmt, true);
4449 /* Emit the condition in L1_BB. */
4450 gsi = gsi_after_labels (l1_bb);
4451 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4452 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4453 fd->loop.step);
4454 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4455 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4456 fd->loop.n1, fold_convert (sizetype, t));
4457 else
4458 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4459 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4460 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4461 expand_omp_build_assign (&gsi, fd->loop.v, t);
4463 /* The condition is always '<' since the runtime will fill in the low
4464 and high values. */
4465 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4466 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4468 /* Remove GIMPLE_OMP_RETURN. */
4469 gsi = gsi_last_bb (exit_bb);
4470 gsi_remove (&gsi, true);
4472 /* Connect the new blocks. */
4473 remove_edge (FALLTHRU_EDGE (entry_bb));
4475 edge e, ne;
4476 if (!broken_loop)
4478 remove_edge (BRANCH_EDGE (entry_bb));
4479 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4481 e = BRANCH_EDGE (l1_bb);
4482 ne = FALLTHRU_EDGE (l1_bb);
4483 e->flags = EDGE_TRUE_VALUE;
4485 else
4487 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4489 ne = single_succ_edge (l1_bb);
4490 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4493 ne->flags = EDGE_FALSE_VALUE;
4494 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4495 ne->probability = e->probability.invert ();
4497 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4498 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4499 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4501 if (!broken_loop)
4503 struct loop *loop = alloc_loop ();
4504 loop->header = l1_bb;
4505 loop->latch = cont_bb;
4506 add_loop (loop, l1_bb->loop_father);
4507 loop->safelen = INT_MAX;
4510 /* Pick the correct library function based on the precision of the
4511 induction variable type. */
4512 tree lib_fun = NULL_TREE;
4513 if (TYPE_PRECISION (type) == 32)
4514 lib_fun = cilk_for_32_fndecl;
4515 else if (TYPE_PRECISION (type) == 64)
4516 lib_fun = cilk_for_64_fndecl;
4517 else
4518 gcc_unreachable ();
4520 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4522 /* WS_ARGS contains the library function flavor to call:
4523 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4524 user-defined grain value. If the user does not define one, then zero
4525 is passed in by the parser. */
4526 vec_alloc (region->ws_args, 2);
4527 region->ws_args->quick_push (lib_fun);
4528 region->ws_args->quick_push (fd->chunk_size);
4531 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4532 loop. Given parameters:
4534 for (V = N1; V cond N2; V += STEP) BODY;
4536 where COND is "<" or ">", we generate pseudocode
4538 V = N1;
4539 goto L1;
4541 BODY;
4542 V += STEP;
4544 if (V cond N2) goto L0; else goto L2;
4547 For collapsed loops, given parameters:
4548 collapse(3)
4549 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4550 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4551 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4552 BODY;
4554 we generate pseudocode
4556 if (cond3 is <)
4557 adj = STEP3 - 1;
4558 else
4559 adj = STEP3 + 1;
4560 count3 = (adj + N32 - N31) / STEP3;
4561 if (cond2 is <)
4562 adj = STEP2 - 1;
4563 else
4564 adj = STEP2 + 1;
4565 count2 = (adj + N22 - N21) / STEP2;
4566 if (cond1 is <)
4567 adj = STEP1 - 1;
4568 else
4569 adj = STEP1 + 1;
4570 count1 = (adj + N12 - N11) / STEP1;
4571 count = count1 * count2 * count3;
4572 V = 0;
4573 V1 = N11;
4574 V2 = N21;
4575 V3 = N31;
4576 goto L1;
4578 BODY;
4579 V += 1;
4580 V3 += STEP3;
4581 V2 += (V3 cond3 N32) ? 0 : STEP2;
4582 V3 = (V3 cond3 N32) ? V3 : N31;
4583 V1 += (V2 cond2 N22) ? 0 : STEP1;
4584 V2 = (V2 cond2 N22) ? V2 : N21;
4586 if (V < count) goto L0; else goto L2;
4591 static void
4592 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4594 tree type, t;
4595 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4596 gimple_stmt_iterator gsi;
4597 gimple *stmt;
4598 gcond *cond_stmt;
4599 bool broken_loop = region->cont == NULL;
4600 edge e, ne;
4601 tree *counts = NULL;
4602 int i;
4603 int safelen_int = INT_MAX;
4604 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4605 OMP_CLAUSE_SAFELEN);
4606 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4607 OMP_CLAUSE__SIMDUID_);
4608 tree n1, n2;
4610 if (safelen)
4612 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4613 if (TREE_CODE (safelen) != INTEGER_CST)
4614 safelen_int = 0;
4615 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4616 safelen_int = tree_to_uhwi (safelen);
4617 if (safelen_int == 1)
4618 safelen_int = 0;
4620 type = TREE_TYPE (fd->loop.v);
4621 entry_bb = region->entry;
4622 cont_bb = region->cont;
4623 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4624 gcc_assert (broken_loop
4625 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4626 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4627 if (!broken_loop)
4629 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4630 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4631 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4632 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4634 else
4636 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4637 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4638 l2_bb = single_succ (l1_bb);
4640 exit_bb = region->exit;
4641 l2_dom_bb = NULL;
4643 gsi = gsi_last_bb (entry_bb);
4645 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4646 /* Not needed in SSA form right now. */
4647 gcc_assert (!gimple_in_ssa_p (cfun));
4648 if (fd->collapse > 1)
4650 int first_zero_iter = -1, dummy = -1;
4651 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4653 counts = XALLOCAVEC (tree, fd->collapse);
4654 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4655 zero_iter_bb, first_zero_iter,
4656 dummy_bb, dummy, l2_dom_bb);
4658 if (l2_dom_bb == NULL)
4659 l2_dom_bb = l1_bb;
4661 n1 = fd->loop.n1;
4662 n2 = fd->loop.n2;
4663 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4665 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4666 OMP_CLAUSE__LOOPTEMP_);
4667 gcc_assert (innerc);
4668 n1 = OMP_CLAUSE_DECL (innerc);
4669 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4670 OMP_CLAUSE__LOOPTEMP_);
4671 gcc_assert (innerc);
4672 n2 = OMP_CLAUSE_DECL (innerc);
4674 tree step = fd->loop.step;
4676 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4677 OMP_CLAUSE__SIMT_);
4678 if (is_simt)
4680 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4681 is_simt = safelen_int > 1;
4683 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4684 if (is_simt)
4686 simt_lane = create_tmp_var (unsigned_type_node);
4687 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4688 gimple_call_set_lhs (g, simt_lane);
4689 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4690 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4691 fold_convert (TREE_TYPE (step), simt_lane));
4692 n1 = fold_convert (type, n1);
4693 if (POINTER_TYPE_P (type))
4694 n1 = fold_build_pointer_plus (n1, offset);
4695 else
4696 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4698 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4699 if (fd->collapse > 1)
4700 simt_maxlane = build_one_cst (unsigned_type_node);
4701 else if (safelen_int < omp_max_simt_vf ())
4702 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4703 tree vf
4704 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4705 unsigned_type_node, 0);
4706 if (simt_maxlane)
4707 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4708 vf = fold_convert (TREE_TYPE (step), vf);
4709 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4712 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4713 if (fd->collapse > 1)
4715 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4717 gsi_prev (&gsi);
4718 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4719 gsi_next (&gsi);
4721 else
4722 for (i = 0; i < fd->collapse; i++)
4724 tree itype = TREE_TYPE (fd->loops[i].v);
4725 if (POINTER_TYPE_P (itype))
4726 itype = signed_type_for (itype);
4727 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4728 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4732 /* Remove the GIMPLE_OMP_FOR statement. */
4733 gsi_remove (&gsi, true);
4735 if (!broken_loop)
4737 /* Code to control the increment goes in the CONT_BB. */
4738 gsi = gsi_last_bb (cont_bb);
4739 stmt = gsi_stmt (gsi);
4740 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4742 if (POINTER_TYPE_P (type))
4743 t = fold_build_pointer_plus (fd->loop.v, step);
4744 else
4745 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4746 expand_omp_build_assign (&gsi, fd->loop.v, t);
4748 if (fd->collapse > 1)
4750 i = fd->collapse - 1;
4751 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4753 t = fold_convert (sizetype, fd->loops[i].step);
4754 t = fold_build_pointer_plus (fd->loops[i].v, t);
4756 else
4758 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4759 fd->loops[i].step);
4760 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4761 fd->loops[i].v, t);
4763 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4765 for (i = fd->collapse - 1; i > 0; i--)
4767 tree itype = TREE_TYPE (fd->loops[i].v);
4768 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4769 if (POINTER_TYPE_P (itype2))
4770 itype2 = signed_type_for (itype2);
4771 t = fold_convert (itype2, fd->loops[i - 1].step);
4772 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4773 GSI_SAME_STMT);
4774 t = build3 (COND_EXPR, itype2,
4775 build2 (fd->loops[i].cond_code, boolean_type_node,
4776 fd->loops[i].v,
4777 fold_convert (itype, fd->loops[i].n2)),
4778 build_int_cst (itype2, 0), t);
4779 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4780 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4781 else
4782 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4783 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4785 t = fold_convert (itype, fd->loops[i].n1);
4786 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4787 GSI_SAME_STMT);
4788 t = build3 (COND_EXPR, itype,
4789 build2 (fd->loops[i].cond_code, boolean_type_node,
4790 fd->loops[i].v,
4791 fold_convert (itype, fd->loops[i].n2)),
4792 fd->loops[i].v, t);
4793 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4797 /* Remove GIMPLE_OMP_CONTINUE. */
4798 gsi_remove (&gsi, true);
4801 /* Emit the condition in L1_BB. */
4802 gsi = gsi_start_bb (l1_bb);
4804 t = fold_convert (type, n2);
4805 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4806 false, GSI_CONTINUE_LINKING);
4807 tree v = fd->loop.v;
4808 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4809 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4810 false, GSI_CONTINUE_LINKING);
4811 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4812 cond_stmt = gimple_build_cond_empty (t);
4813 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4814 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4815 NULL, NULL)
4816 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4817 NULL, NULL))
4819 gsi = gsi_for_stmt (cond_stmt);
4820 gimple_regimplify_operands (cond_stmt, &gsi);
4823 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4824 if (is_simt)
4826 gsi = gsi_start_bb (l2_bb);
4827 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4828 if (POINTER_TYPE_P (type))
4829 t = fold_build_pointer_plus (fd->loop.v, step);
4830 else
4831 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4832 expand_omp_build_assign (&gsi, fd->loop.v, t);
4835 /* Remove GIMPLE_OMP_RETURN. */
4836 gsi = gsi_last_bb (exit_bb);
4837 gsi_remove (&gsi, true);
4839 /* Connect the new blocks. */
4840 remove_edge (FALLTHRU_EDGE (entry_bb));
4842 if (!broken_loop)
4844 remove_edge (BRANCH_EDGE (entry_bb));
4845 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4847 e = BRANCH_EDGE (l1_bb);
4848 ne = FALLTHRU_EDGE (l1_bb);
4849 e->flags = EDGE_TRUE_VALUE;
4851 else
4853 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4855 ne = single_succ_edge (l1_bb);
4856 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4859 ne->flags = EDGE_FALSE_VALUE;
4860 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4861 ne->probability = e->probability.invert ();
4863 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4864 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4866 if (simt_maxlane)
4868 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4869 NULL_TREE, NULL_TREE);
4870 gsi = gsi_last_bb (entry_bb);
4871 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4872 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4873 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4874 FALLTHRU_EDGE (entry_bb)->probability
4875 = profile_probability::guessed_always ().apply_scale (7, 8);
4876 BRANCH_EDGE (entry_bb)->probability
4877 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4878 l2_dom_bb = entry_bb;
4880 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4882 if (!broken_loop)
4884 struct loop *loop = alloc_loop ();
4885 loop->header = l1_bb;
4886 loop->latch = cont_bb;
4887 add_loop (loop, l1_bb->loop_father);
4888 loop->safelen = safelen_int;
4889 if (simduid)
4891 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4892 cfun->has_simduid_loops = true;
4894 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4895 the loop. */
4896 if ((flag_tree_loop_vectorize
4897 || !global_options_set.x_flag_tree_loop_vectorize)
4898 && flag_tree_loop_optimize
4899 && loop->safelen > 1)
4901 loop->force_vectorize = true;
4902 cfun->has_force_vectorize_loops = true;
4905 else if (simduid)
4906 cfun->has_simduid_loops = true;
4909 /* Taskloop construct is represented after gimplification with
4910 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4911 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4912 which should just compute all the needed loop temporaries
4913 for GIMPLE_OMP_TASK. */
4915 static void
4916 expand_omp_taskloop_for_outer (struct omp_region *region,
4917 struct omp_for_data *fd,
4918 gimple *inner_stmt)
4920 tree type, bias = NULL_TREE;
4921 basic_block entry_bb, cont_bb, exit_bb;
4922 gimple_stmt_iterator gsi;
4923 gassign *assign_stmt;
4924 tree *counts = NULL;
4925 int i;
4927 gcc_assert (inner_stmt);
4928 gcc_assert (region->cont);
4929 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4930 && gimple_omp_task_taskloop_p (inner_stmt));
4931 type = TREE_TYPE (fd->loop.v);
4933 /* See if we need to bias by LLONG_MIN. */
4934 if (fd->iter_type == long_long_unsigned_type_node
4935 && TREE_CODE (type) == INTEGER_TYPE
4936 && !TYPE_UNSIGNED (type))
4938 tree n1, n2;
4940 if (fd->loop.cond_code == LT_EXPR)
4942 n1 = fd->loop.n1;
4943 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4945 else
4947 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4948 n2 = fd->loop.n1;
4950 if (TREE_CODE (n1) != INTEGER_CST
4951 || TREE_CODE (n2) != INTEGER_CST
4952 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4953 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4956 entry_bb = region->entry;
4957 cont_bb = region->cont;
4958 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4959 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4960 exit_bb = region->exit;
4962 gsi = gsi_last_bb (entry_bb);
4963 gimple *for_stmt = gsi_stmt (gsi);
4964 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4965 if (fd->collapse > 1)
4967 int first_zero_iter = -1, dummy = -1;
4968 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4970 counts = XALLOCAVEC (tree, fd->collapse);
4971 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4972 zero_iter_bb, first_zero_iter,
4973 dummy_bb, dummy, l2_dom_bb);
4975 if (zero_iter_bb)
4977 /* Some counts[i] vars might be uninitialized if
4978 some loop has zero iterations. But the body shouldn't
4979 be executed in that case, so just avoid uninit warnings. */
4980 for (i = first_zero_iter; i < fd->collapse; i++)
4981 if (SSA_VAR_P (counts[i]))
4982 TREE_NO_WARNING (counts[i]) = 1;
4983 gsi_prev (&gsi);
4984 edge e = split_block (entry_bb, gsi_stmt (gsi));
4985 entry_bb = e->dest;
4986 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4987 gsi = gsi_last_bb (entry_bb);
4988 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4989 get_immediate_dominator (CDI_DOMINATORS,
4990 zero_iter_bb));
4994 tree t0, t1;
4995 t1 = fd->loop.n2;
4996 t0 = fd->loop.n1;
4997 if (POINTER_TYPE_P (TREE_TYPE (t0))
4998 && TYPE_PRECISION (TREE_TYPE (t0))
4999 != TYPE_PRECISION (fd->iter_type))
5001 /* Avoid casting pointers to integer of a different size. */
5002 tree itype = signed_type_for (type);
5003 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5004 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5006 else
5008 t1 = fold_convert (fd->iter_type, t1);
5009 t0 = fold_convert (fd->iter_type, t0);
5011 if (bias)
5013 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5014 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5017 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5018 OMP_CLAUSE__LOOPTEMP_);
5019 gcc_assert (innerc);
5020 tree startvar = OMP_CLAUSE_DECL (innerc);
5021 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5022 gcc_assert (innerc);
5023 tree endvar = OMP_CLAUSE_DECL (innerc);
5024 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5026 gcc_assert (innerc);
5027 for (i = 1; i < fd->collapse; i++)
5029 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5030 OMP_CLAUSE__LOOPTEMP_);
5031 gcc_assert (innerc);
5033 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5034 OMP_CLAUSE__LOOPTEMP_);
5035 if (innerc)
5037 /* If needed (inner taskloop has lastprivate clause), propagate
5038 down the total number of iterations. */
5039 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5040 NULL_TREE, false,
5041 GSI_CONTINUE_LINKING);
5042 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5043 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5047 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5048 GSI_CONTINUE_LINKING);
5049 assign_stmt = gimple_build_assign (startvar, t0);
5050 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5052 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5053 GSI_CONTINUE_LINKING);
5054 assign_stmt = gimple_build_assign (endvar, t1);
5055 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5056 if (fd->collapse > 1)
5057 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5059 /* Remove the GIMPLE_OMP_FOR statement. */
5060 gsi = gsi_for_stmt (for_stmt);
5061 gsi_remove (&gsi, true);
5063 gsi = gsi_last_bb (cont_bb);
5064 gsi_remove (&gsi, true);
5066 gsi = gsi_last_bb (exit_bb);
5067 gsi_remove (&gsi, true);
5069 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5070 remove_edge (BRANCH_EDGE (entry_bb));
5071 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5072 remove_edge (BRANCH_EDGE (cont_bb));
5073 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5074 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5075 recompute_dominator (CDI_DOMINATORS, region->entry));
5078 /* Taskloop construct is represented after gimplification with
5079 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5080 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5081 GOMP_taskloop{,_ull} function arranges for each task to be given just
5082 a single range of iterations. */
5084 static void
5085 expand_omp_taskloop_for_inner (struct omp_region *region,
5086 struct omp_for_data *fd,
5087 gimple *inner_stmt)
5089 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5090 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5091 basic_block fin_bb;
5092 gimple_stmt_iterator gsi;
5093 edge ep;
5094 bool broken_loop = region->cont == NULL;
5095 tree *counts = NULL;
5096 tree n1, n2, step;
5098 itype = type = TREE_TYPE (fd->loop.v);
5099 if (POINTER_TYPE_P (type))
5100 itype = signed_type_for (type);
5102 /* See if we need to bias by LLONG_MIN. */
5103 if (fd->iter_type == long_long_unsigned_type_node
5104 && TREE_CODE (type) == INTEGER_TYPE
5105 && !TYPE_UNSIGNED (type))
5107 tree n1, n2;
5109 if (fd->loop.cond_code == LT_EXPR)
5111 n1 = fd->loop.n1;
5112 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5114 else
5116 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5117 n2 = fd->loop.n1;
5119 if (TREE_CODE (n1) != INTEGER_CST
5120 || TREE_CODE (n2) != INTEGER_CST
5121 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5122 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5125 entry_bb = region->entry;
5126 cont_bb = region->cont;
5127 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5128 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5129 gcc_assert (broken_loop
5130 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5131 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5132 if (!broken_loop)
5134 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5135 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5137 exit_bb = region->exit;
5139 /* Iteration space partitioning goes in ENTRY_BB. */
5140 gsi = gsi_last_bb (entry_bb);
5141 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5143 if (fd->collapse > 1)
5145 int first_zero_iter = -1, dummy = -1;
5146 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5148 counts = XALLOCAVEC (tree, fd->collapse);
5149 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5150 fin_bb, first_zero_iter,
5151 dummy_bb, dummy, l2_dom_bb);
5152 t = NULL_TREE;
5154 else
5155 t = integer_one_node;
5157 step = fd->loop.step;
5158 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5159 OMP_CLAUSE__LOOPTEMP_);
5160 gcc_assert (innerc);
5161 n1 = OMP_CLAUSE_DECL (innerc);
5162 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5163 gcc_assert (innerc);
5164 n2 = OMP_CLAUSE_DECL (innerc);
5165 if (bias)
5167 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5168 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5170 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5171 true, NULL_TREE, true, GSI_SAME_STMT);
5172 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5173 true, NULL_TREE, true, GSI_SAME_STMT);
5174 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5175 true, NULL_TREE, true, GSI_SAME_STMT);
5177 tree startvar = fd->loop.v;
5178 tree endvar = NULL_TREE;
5180 if (gimple_omp_for_combined_p (fd->for_stmt))
5182 tree clauses = gimple_omp_for_clauses (inner_stmt);
5183 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5184 gcc_assert (innerc);
5185 startvar = OMP_CLAUSE_DECL (innerc);
5186 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5187 OMP_CLAUSE__LOOPTEMP_);
5188 gcc_assert (innerc);
5189 endvar = OMP_CLAUSE_DECL (innerc);
5191 t = fold_convert (TREE_TYPE (startvar), n1);
5192 t = force_gimple_operand_gsi (&gsi, t,
5193 DECL_P (startvar)
5194 && TREE_ADDRESSABLE (startvar),
5195 NULL_TREE, false, GSI_CONTINUE_LINKING);
5196 gimple *assign_stmt = gimple_build_assign (startvar, t);
5197 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5199 t = fold_convert (TREE_TYPE (startvar), n2);
5200 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5201 false, GSI_CONTINUE_LINKING);
5202 if (endvar)
5204 assign_stmt = gimple_build_assign (endvar, e);
5205 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5206 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5207 assign_stmt = gimple_build_assign (fd->loop.v, e);
5208 else
5209 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5210 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5212 if (fd->collapse > 1)
5213 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5215 if (!broken_loop)
5217 /* The code controlling the sequential loop replaces the
5218 GIMPLE_OMP_CONTINUE. */
5219 gsi = gsi_last_bb (cont_bb);
5220 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5221 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5222 vmain = gimple_omp_continue_control_use (cont_stmt);
5223 vback = gimple_omp_continue_control_def (cont_stmt);
5225 if (!gimple_omp_for_combined_p (fd->for_stmt))
5227 if (POINTER_TYPE_P (type))
5228 t = fold_build_pointer_plus (vmain, step);
5229 else
5230 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5231 t = force_gimple_operand_gsi (&gsi, t,
5232 DECL_P (vback)
5233 && TREE_ADDRESSABLE (vback),
5234 NULL_TREE, true, GSI_SAME_STMT);
5235 assign_stmt = gimple_build_assign (vback, t);
5236 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5238 t = build2 (fd->loop.cond_code, boolean_type_node,
5239 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5240 ? t : vback, e);
5241 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5244 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5245 gsi_remove (&gsi, true);
5247 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5248 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5251 /* Remove the GIMPLE_OMP_FOR statement. */
5252 gsi = gsi_for_stmt (fd->for_stmt);
5253 gsi_remove (&gsi, true);
5255 /* Remove the GIMPLE_OMP_RETURN statement. */
5256 gsi = gsi_last_bb (exit_bb);
5257 gsi_remove (&gsi, true);
5259 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5260 if (!broken_loop)
5261 remove_edge (BRANCH_EDGE (entry_bb));
5262 else
5264 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5265 region->outer->cont = NULL;
5268 /* Connect all the blocks. */
5269 if (!broken_loop)
5271 ep = find_edge (cont_bb, body_bb);
5272 if (gimple_omp_for_combined_p (fd->for_stmt))
5274 remove_edge (ep);
5275 ep = NULL;
5277 else if (fd->collapse > 1)
5279 remove_edge (ep);
5280 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5282 else
5283 ep->flags = EDGE_TRUE_VALUE;
5284 find_edge (cont_bb, fin_bb)->flags
5285 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5288 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5289 recompute_dominator (CDI_DOMINATORS, body_bb));
5290 if (!broken_loop)
5291 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5292 recompute_dominator (CDI_DOMINATORS, fin_bb));
5294 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5296 struct loop *loop = alloc_loop ();
5297 loop->header = body_bb;
5298 if (collapse_bb == NULL)
5299 loop->latch = cont_bb;
5300 add_loop (loop, body_bb->loop_father);
5304 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5305 partitioned loop. The lowering here is abstracted, in that the
5306 loop parameters are passed through internal functions, which are
5307 further lowered by oacc_device_lower, once we get to the target
5308 compiler. The loop is of the form:
5310 for (V = B; V LTGT E; V += S) {BODY}
5312 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5313 (constant 0 for no chunking) and we will have a GWV partitioning
5314 mask, specifying dimensions over which the loop is to be
5315 partitioned (see note below). We generate code that looks like
5316 (this ignores tiling):
5318 <entry_bb> [incoming FALL->body, BRANCH->exit]
5319 typedef signedintify (typeof (V)) T; // underlying signed integral type
5320 T range = E - B;
5321 T chunk_no = 0;
5322 T DIR = LTGT == '<' ? +1 : -1;
5323 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5324 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5326 <head_bb> [created by splitting end of entry_bb]
5327 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5328 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5329 if (!(offset LTGT bound)) goto bottom_bb;
5331 <body_bb> [incoming]
5332 V = B + offset;
5333 {BODY}
5335 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5336 offset += step;
5337 if (offset LTGT bound) goto body_bb; [*]
5339 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5340 chunk_no++;
5341 if (chunk < chunk_max) goto head_bb;
5343 <exit_bb> [incoming]
5344 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5346 [*] Needed if V live at end of loop. */
5348 static void
5349 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5351 tree v = fd->loop.v;
5352 enum tree_code cond_code = fd->loop.cond_code;
5353 enum tree_code plus_code = PLUS_EXPR;
5355 tree chunk_size = integer_minus_one_node;
5356 tree gwv = integer_zero_node;
5357 tree iter_type = TREE_TYPE (v);
5358 tree diff_type = iter_type;
5359 tree plus_type = iter_type;
5360 struct oacc_collapse *counts = NULL;
5362 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5363 == GF_OMP_FOR_KIND_OACC_LOOP);
5364 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5365 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5367 if (POINTER_TYPE_P (iter_type))
5369 plus_code = POINTER_PLUS_EXPR;
5370 plus_type = sizetype;
5372 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5373 diff_type = signed_type_for (diff_type);
5374 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5375 diff_type = integer_type_node;
5377 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5378 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5379 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5380 basic_block bottom_bb = NULL;
5382 /* entry_bb has two sucessors; the branch edge is to the exit
5383 block, fallthrough edge to body. */
5384 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5385 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5387 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5388 body_bb, or to a block whose only successor is the body_bb. Its
5389 fallthrough successor is the final block (same as the branch
5390 successor of the entry_bb). */
5391 if (cont_bb)
5393 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5394 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5396 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5397 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5399 else
5400 gcc_assert (!gimple_in_ssa_p (cfun));
5402 /* The exit block only has entry_bb and cont_bb as predecessors. */
5403 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5405 tree chunk_no;
5406 tree chunk_max = NULL_TREE;
5407 tree bound, offset;
5408 tree step = create_tmp_var (diff_type, ".step");
5409 bool up = cond_code == LT_EXPR;
5410 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5411 bool chunking = !gimple_in_ssa_p (cfun);
5412 bool negating;
5414 /* Tiling vars. */
5415 tree tile_size = NULL_TREE;
5416 tree element_s = NULL_TREE;
5417 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5418 basic_block elem_body_bb = NULL;
5419 basic_block elem_cont_bb = NULL;
5421 /* SSA instances. */
5422 tree offset_incr = NULL_TREE;
5423 tree offset_init = NULL_TREE;
5425 gimple_stmt_iterator gsi;
5426 gassign *ass;
5427 gcall *call;
5428 gimple *stmt;
5429 tree expr;
5430 location_t loc;
5431 edge split, be, fte;
5433 /* Split the end of entry_bb to create head_bb. */
5434 split = split_block (entry_bb, last_stmt (entry_bb));
5435 basic_block head_bb = split->dest;
5436 entry_bb = split->src;
5438 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5439 gsi = gsi_last_bb (entry_bb);
5440 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5441 loc = gimple_location (for_stmt);
5443 if (gimple_in_ssa_p (cfun))
5445 offset_init = gimple_omp_for_index (for_stmt, 0);
5446 gcc_assert (integer_zerop (fd->loop.n1));
5447 /* The SSA parallelizer does gang parallelism. */
5448 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5451 if (fd->collapse > 1 || fd->tiling)
5453 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5454 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5455 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5456 TREE_TYPE (fd->loop.n2), loc);
5458 if (SSA_VAR_P (fd->loop.n2))
5460 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5461 true, GSI_SAME_STMT);
5462 ass = gimple_build_assign (fd->loop.n2, total);
5463 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5467 tree b = fd->loop.n1;
5468 tree e = fd->loop.n2;
5469 tree s = fd->loop.step;
5471 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5472 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5474 /* Convert the step, avoiding possible unsigned->signed overflow. */
5475 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5476 if (negating)
5477 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5478 s = fold_convert (diff_type, s);
5479 if (negating)
5480 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5481 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5483 if (!chunking)
5484 chunk_size = integer_zero_node;
5485 expr = fold_convert (diff_type, chunk_size);
5486 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5487 NULL_TREE, true, GSI_SAME_STMT);
5489 if (fd->tiling)
5491 /* Determine the tile size and element step,
5492 modify the outer loop step size. */
5493 tile_size = create_tmp_var (diff_type, ".tile_size");
5494 expr = build_int_cst (diff_type, 1);
5495 for (int ix = 0; ix < fd->collapse; ix++)
5496 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5497 expr = force_gimple_operand_gsi (&gsi, expr, true,
5498 NULL_TREE, true, GSI_SAME_STMT);
5499 ass = gimple_build_assign (tile_size, expr);
5500 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5502 element_s = create_tmp_var (diff_type, ".element_s");
5503 ass = gimple_build_assign (element_s, s);
5504 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5506 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5507 s = force_gimple_operand_gsi (&gsi, expr, true,
5508 NULL_TREE, true, GSI_SAME_STMT);
5511 /* Determine the range, avoiding possible unsigned->signed overflow. */
5512 negating = !up && TYPE_UNSIGNED (iter_type);
5513 expr = fold_build2 (MINUS_EXPR, plus_type,
5514 fold_convert (plus_type, negating ? b : e),
5515 fold_convert (plus_type, negating ? e : b));
5516 expr = fold_convert (diff_type, expr);
5517 if (negating)
5518 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5519 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5520 NULL_TREE, true, GSI_SAME_STMT);
5522 chunk_no = build_int_cst (diff_type, 0);
5523 if (chunking)
5525 gcc_assert (!gimple_in_ssa_p (cfun));
5527 expr = chunk_no;
5528 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5529 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5531 ass = gimple_build_assign (chunk_no, expr);
5532 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5534 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5535 build_int_cst (integer_type_node,
5536 IFN_GOACC_LOOP_CHUNKS),
5537 dir, range, s, chunk_size, gwv);
5538 gimple_call_set_lhs (call, chunk_max);
5539 gimple_set_location (call, loc);
5540 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5542 else
5543 chunk_size = chunk_no;
5545 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5546 build_int_cst (integer_type_node,
5547 IFN_GOACC_LOOP_STEP),
5548 dir, range, s, chunk_size, gwv);
5549 gimple_call_set_lhs (call, step);
5550 gimple_set_location (call, loc);
5551 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5553 /* Remove the GIMPLE_OMP_FOR. */
5554 gsi_remove (&gsi, true);
5556 /* Fixup edges from head_bb. */
5557 be = BRANCH_EDGE (head_bb);
5558 fte = FALLTHRU_EDGE (head_bb);
5559 be->flags |= EDGE_FALSE_VALUE;
5560 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5562 basic_block body_bb = fte->dest;
5564 if (gimple_in_ssa_p (cfun))
5566 gsi = gsi_last_bb (cont_bb);
5567 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5569 offset = gimple_omp_continue_control_use (cont_stmt);
5570 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5572 else
5574 offset = create_tmp_var (diff_type, ".offset");
5575 offset_init = offset_incr = offset;
5577 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5579 /* Loop offset & bound go into head_bb. */
5580 gsi = gsi_start_bb (head_bb);
5582 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5583 build_int_cst (integer_type_node,
5584 IFN_GOACC_LOOP_OFFSET),
5585 dir, range, s,
5586 chunk_size, gwv, chunk_no);
5587 gimple_call_set_lhs (call, offset_init);
5588 gimple_set_location (call, loc);
5589 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5591 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5592 build_int_cst (integer_type_node,
5593 IFN_GOACC_LOOP_BOUND),
5594 dir, range, s,
5595 chunk_size, gwv, offset_init);
5596 gimple_call_set_lhs (call, bound);
5597 gimple_set_location (call, loc);
5598 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5600 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5601 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5602 GSI_CONTINUE_LINKING);
5604 /* V assignment goes into body_bb. */
5605 if (!gimple_in_ssa_p (cfun))
5607 gsi = gsi_start_bb (body_bb);
5609 expr = build2 (plus_code, iter_type, b,
5610 fold_convert (plus_type, offset));
5611 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5612 true, GSI_SAME_STMT);
5613 ass = gimple_build_assign (v, expr);
5614 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5616 if (fd->collapse > 1 || fd->tiling)
5617 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5619 if (fd->tiling)
5621 /* Determine the range of the element loop -- usually simply
5622 the tile_size, but could be smaller if the final
5623 iteration of the outer loop is a partial tile. */
5624 tree e_range = create_tmp_var (diff_type, ".e_range");
5626 expr = build2 (MIN_EXPR, diff_type,
5627 build2 (MINUS_EXPR, diff_type, bound, offset),
5628 build2 (MULT_EXPR, diff_type, tile_size,
5629 element_s));
5630 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5631 true, GSI_SAME_STMT);
5632 ass = gimple_build_assign (e_range, expr);
5633 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5635 /* Determine bound, offset & step of inner loop. */
5636 e_bound = create_tmp_var (diff_type, ".e_bound");
5637 e_offset = create_tmp_var (diff_type, ".e_offset");
5638 e_step = create_tmp_var (diff_type, ".e_step");
5640 /* Mark these as element loops. */
5641 tree t, e_gwv = integer_minus_one_node;
5642 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5644 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5645 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5646 element_s, chunk, e_gwv, chunk);
5647 gimple_call_set_lhs (call, e_offset);
5648 gimple_set_location (call, loc);
5649 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5651 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5652 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5653 element_s, chunk, e_gwv, e_offset);
5654 gimple_call_set_lhs (call, e_bound);
5655 gimple_set_location (call, loc);
5656 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5658 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5659 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5660 element_s, chunk, e_gwv);
5661 gimple_call_set_lhs (call, e_step);
5662 gimple_set_location (call, loc);
5663 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5665 /* Add test and split block. */
5666 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5667 stmt = gimple_build_cond_empty (expr);
5668 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5669 split = split_block (body_bb, stmt);
5670 elem_body_bb = split->dest;
5671 if (cont_bb == body_bb)
5672 cont_bb = elem_body_bb;
5673 body_bb = split->src;
5675 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5677 /* Initialize the user's loop vars. */
5678 gsi = gsi_start_bb (elem_body_bb);
5679 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5683 /* Loop increment goes into cont_bb. If this is not a loop, we
5684 will have spawned threads as if it was, and each one will
5685 execute one iteration. The specification is not explicit about
5686 whether such constructs are ill-formed or not, and they can
5687 occur, especially when noreturn routines are involved. */
5688 if (cont_bb)
5690 gsi = gsi_last_bb (cont_bb);
5691 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5692 loc = gimple_location (cont_stmt);
5694 if (fd->tiling)
5696 /* Insert element loop increment and test. */
5697 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5698 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5699 true, GSI_SAME_STMT);
5700 ass = gimple_build_assign (e_offset, expr);
5701 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5702 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5704 stmt = gimple_build_cond_empty (expr);
5705 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5706 split = split_block (cont_bb, stmt);
5707 elem_cont_bb = split->src;
5708 cont_bb = split->dest;
5710 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5711 split->probability = profile_probability::unlikely ().guessed ();
5712 edge latch_edge
5713 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5714 latch_edge->probability = profile_probability::likely ().guessed ();
5716 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5717 skip_edge->probability = profile_probability::unlikely ().guessed ();
5718 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5719 loop_entry_edge->probability
5720 = profile_probability::likely ().guessed ();
5722 gsi = gsi_for_stmt (cont_stmt);
5725 /* Increment offset. */
5726 if (gimple_in_ssa_p (cfun))
5727 expr = build2 (plus_code, iter_type, offset,
5728 fold_convert (plus_type, step));
5729 else
5730 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5731 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5732 true, GSI_SAME_STMT);
5733 ass = gimple_build_assign (offset_incr, expr);
5734 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5735 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5736 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5738 /* Remove the GIMPLE_OMP_CONTINUE. */
5739 gsi_remove (&gsi, true);
5741 /* Fixup edges from cont_bb. */
5742 be = BRANCH_EDGE (cont_bb);
5743 fte = FALLTHRU_EDGE (cont_bb);
5744 be->flags |= EDGE_TRUE_VALUE;
5745 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5747 if (chunking)
5749 /* Split the beginning of exit_bb to make bottom_bb. We
5750 need to insert a nop at the start, because splitting is
5751 after a stmt, not before. */
5752 gsi = gsi_start_bb (exit_bb);
5753 stmt = gimple_build_nop ();
5754 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5755 split = split_block (exit_bb, stmt);
5756 bottom_bb = split->src;
5757 exit_bb = split->dest;
5758 gsi = gsi_last_bb (bottom_bb);
5760 /* Chunk increment and test goes into bottom_bb. */
5761 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5762 build_int_cst (diff_type, 1));
5763 ass = gimple_build_assign (chunk_no, expr);
5764 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5766 /* Chunk test at end of bottom_bb. */
5767 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5768 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5769 GSI_CONTINUE_LINKING);
5771 /* Fixup edges from bottom_bb. */
5772 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5773 split->probability = profile_probability::unlikely ().guessed ();
5774 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5775 latch_edge->probability = profile_probability::likely ().guessed ();
5779 gsi = gsi_last_bb (exit_bb);
5780 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5781 loc = gimple_location (gsi_stmt (gsi));
5783 if (!gimple_in_ssa_p (cfun))
5785 /* Insert the final value of V, in case it is live. This is the
5786 value for the only thread that survives past the join. */
5787 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5788 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5789 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5790 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5791 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5792 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5793 true, GSI_SAME_STMT);
5794 ass = gimple_build_assign (v, expr);
5795 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5798 /* Remove the OMP_RETURN. */
5799 gsi_remove (&gsi, true);
5801 if (cont_bb)
5803 /* We now have one, two or three nested loops. Update the loop
5804 structures. */
5805 struct loop *parent = entry_bb->loop_father;
5806 struct loop *body = body_bb->loop_father;
5808 if (chunking)
5810 struct loop *chunk_loop = alloc_loop ();
5811 chunk_loop->header = head_bb;
5812 chunk_loop->latch = bottom_bb;
5813 add_loop (chunk_loop, parent);
5814 parent = chunk_loop;
5816 else if (parent != body)
5818 gcc_assert (body->header == body_bb);
5819 gcc_assert (body->latch == cont_bb
5820 || single_pred (body->latch) == cont_bb);
5821 parent = NULL;
5824 if (parent)
5826 struct loop *body_loop = alloc_loop ();
5827 body_loop->header = body_bb;
5828 body_loop->latch = cont_bb;
5829 add_loop (body_loop, parent);
5831 if (fd->tiling)
5833 /* Insert tiling's element loop. */
5834 struct loop *inner_loop = alloc_loop ();
5835 inner_loop->header = elem_body_bb;
5836 inner_loop->latch = elem_cont_bb;
5837 add_loop (inner_loop, body_loop);
5843 /* Expand the OMP loop defined by REGION. */
5845 static void
5846 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5848 struct omp_for_data fd;
5849 struct omp_for_data_loop *loops;
5851 loops
5852 = (struct omp_for_data_loop *)
5853 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5854 * sizeof (struct omp_for_data_loop));
5855 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5856 &fd, loops);
5857 region->sched_kind = fd.sched_kind;
5858 region->sched_modifiers = fd.sched_modifiers;
5860 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5861 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5862 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5863 if (region->cont)
5865 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5866 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5867 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5869 else
5870 /* If there isn't a continue then this is a degerate case where
5871 the introduction of abnormal edges during lowering will prevent
5872 original loops from being detected. Fix that up. */
5873 loops_state_set (LOOPS_NEED_FIXUP);
5875 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5876 expand_omp_simd (region, &fd);
5877 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5878 expand_cilk_for (region, &fd);
5879 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5881 gcc_assert (!inner_stmt);
5882 expand_oacc_for (region, &fd);
5884 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5886 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5887 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5888 else
5889 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5891 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5892 && !fd.have_ordered)
5894 if (fd.chunk_size == NULL)
5895 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5896 else
5897 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5899 else
5901 int fn_index, start_ix, next_ix;
5903 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5904 == GF_OMP_FOR_KIND_FOR);
5905 if (fd.chunk_size == NULL
5906 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5907 fd.chunk_size = integer_zero_node;
5908 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5909 switch (fd.sched_kind)
5911 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5912 fn_index = 3;
5913 break;
5914 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5915 case OMP_CLAUSE_SCHEDULE_GUIDED:
5916 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5917 && !fd.ordered
5918 && !fd.have_ordered)
5920 fn_index = 3 + fd.sched_kind;
5921 break;
5923 /* FALLTHRU */
5924 default:
5925 fn_index = fd.sched_kind;
5926 break;
5928 if (!fd.ordered)
5929 fn_index += fd.have_ordered * 6;
5930 if (fd.ordered)
5931 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5932 else
5933 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5934 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5935 if (fd.iter_type == long_long_unsigned_type_node)
5937 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5938 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5939 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5940 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5942 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5943 (enum built_in_function) next_ix, inner_stmt);
5946 if (gimple_in_ssa_p (cfun))
5947 update_ssa (TODO_update_ssa_only_virtuals);
5950 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5952 v = GOMP_sections_start (n);
5954 switch (v)
5956 case 0:
5957 goto L2;
5958 case 1:
5959 section 1;
5960 goto L1;
5961 case 2:
5963 case n:
5965 default:
5966 abort ();
5969 v = GOMP_sections_next ();
5970 goto L0;
5972 reduction;
5974 If this is a combined parallel sections, replace the call to
5975 GOMP_sections_start with call to GOMP_sections_next. */
5977 static void
5978 expand_omp_sections (struct omp_region *region)
5980 tree t, u, vin = NULL, vmain, vnext, l2;
5981 unsigned len;
5982 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5983 gimple_stmt_iterator si, switch_si;
5984 gomp_sections *sections_stmt;
5985 gimple *stmt;
5986 gomp_continue *cont;
5987 edge_iterator ei;
5988 edge e;
5989 struct omp_region *inner;
5990 unsigned i, casei;
5991 bool exit_reachable = region->cont != NULL;
5993 gcc_assert (region->exit != NULL);
5994 entry_bb = region->entry;
5995 l0_bb = single_succ (entry_bb);
5996 l1_bb = region->cont;
5997 l2_bb = region->exit;
5998 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5999 l2 = gimple_block_label (l2_bb);
6000 else
6002 /* This can happen if there are reductions. */
6003 len = EDGE_COUNT (l0_bb->succs);
6004 gcc_assert (len > 0);
6005 e = EDGE_SUCC (l0_bb, len - 1);
6006 si = gsi_last_bb (e->dest);
6007 l2 = NULL_TREE;
6008 if (gsi_end_p (si)
6009 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6010 l2 = gimple_block_label (e->dest);
6011 else
6012 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6014 si = gsi_last_bb (e->dest);
6015 if (gsi_end_p (si)
6016 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6018 l2 = gimple_block_label (e->dest);
6019 break;
6023 if (exit_reachable)
6024 default_bb = create_empty_bb (l1_bb->prev_bb);
6025 else
6026 default_bb = create_empty_bb (l0_bb);
6028 /* We will build a switch() with enough cases for all the
6029 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6030 and a default case to abort if something goes wrong. */
6031 len = EDGE_COUNT (l0_bb->succs);
6033 /* Use vec::quick_push on label_vec throughout, since we know the size
6034 in advance. */
6035 auto_vec<tree> label_vec (len);
6037 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6038 GIMPLE_OMP_SECTIONS statement. */
6039 si = gsi_last_bb (entry_bb);
6040 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6041 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6042 vin = gimple_omp_sections_control (sections_stmt);
6043 if (!is_combined_parallel (region))
6045 /* If we are not inside a combined parallel+sections region,
6046 call GOMP_sections_start. */
6047 t = build_int_cst (unsigned_type_node, len - 1);
6048 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6049 stmt = gimple_build_call (u, 1, t);
6051 else
6053 /* Otherwise, call GOMP_sections_next. */
6054 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6055 stmt = gimple_build_call (u, 0);
6057 gimple_call_set_lhs (stmt, vin);
6058 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6059 gsi_remove (&si, true);
6061 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6062 L0_BB. */
6063 switch_si = gsi_last_bb (l0_bb);
6064 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6065 if (exit_reachable)
6067 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6068 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6069 vmain = gimple_omp_continue_control_use (cont);
6070 vnext = gimple_omp_continue_control_def (cont);
6072 else
6074 vmain = vin;
6075 vnext = NULL_TREE;
6078 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6079 label_vec.quick_push (t);
6080 i = 1;
6082 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6083 for (inner = region->inner, casei = 1;
6084 inner;
6085 inner = inner->next, i++, casei++)
6087 basic_block s_entry_bb, s_exit_bb;
6089 /* Skip optional reduction region. */
6090 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6092 --i;
6093 --casei;
6094 continue;
6097 s_entry_bb = inner->entry;
6098 s_exit_bb = inner->exit;
6100 t = gimple_block_label (s_entry_bb);
6101 u = build_int_cst (unsigned_type_node, casei);
6102 u = build_case_label (u, NULL, t);
6103 label_vec.quick_push (u);
6105 si = gsi_last_bb (s_entry_bb);
6106 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6107 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6108 gsi_remove (&si, true);
6109 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6111 if (s_exit_bb == NULL)
6112 continue;
6114 si = gsi_last_bb (s_exit_bb);
6115 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6116 gsi_remove (&si, true);
6118 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6121 /* Error handling code goes in DEFAULT_BB. */
6122 t = gimple_block_label (default_bb);
6123 u = build_case_label (NULL, NULL, t);
6124 make_edge (l0_bb, default_bb, 0);
6125 add_bb_to_loop (default_bb, current_loops->tree_root);
6127 stmt = gimple_build_switch (vmain, u, label_vec);
6128 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6129 gsi_remove (&switch_si, true);
6131 si = gsi_start_bb (default_bb);
6132 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6133 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6135 if (exit_reachable)
6137 tree bfn_decl;
6139 /* Code to get the next section goes in L1_BB. */
6140 si = gsi_last_bb (l1_bb);
6141 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6143 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6144 stmt = gimple_build_call (bfn_decl, 0);
6145 gimple_call_set_lhs (stmt, vnext);
6146 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6147 gsi_remove (&si, true);
6149 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6152 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6153 si = gsi_last_bb (l2_bb);
6154 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6155 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6156 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6157 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6158 else
6159 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6160 stmt = gimple_build_call (t, 0);
6161 if (gimple_omp_return_lhs (gsi_stmt (si)))
6162 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6163 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6164 gsi_remove (&si, true);
6166 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6169 /* Expand code for an OpenMP single directive. We've already expanded
6170 much of the code, here we simply place the GOMP_barrier call. */
6172 static void
6173 expand_omp_single (struct omp_region *region)
6175 basic_block entry_bb, exit_bb;
6176 gimple_stmt_iterator si;
6178 entry_bb = region->entry;
6179 exit_bb = region->exit;
6181 si = gsi_last_bb (entry_bb);
6182 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6183 gsi_remove (&si, true);
6184 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6186 si = gsi_last_bb (exit_bb);
6187 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6189 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6190 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6192 gsi_remove (&si, true);
6193 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6196 /* Generic expansion for OpenMP synchronization directives: master,
6197 ordered and critical. All we need to do here is remove the entry
6198 and exit markers for REGION. */
6200 static void
6201 expand_omp_synch (struct omp_region *region)
6203 basic_block entry_bb, exit_bb;
6204 gimple_stmt_iterator si;
6206 entry_bb = region->entry;
6207 exit_bb = region->exit;
6209 si = gsi_last_bb (entry_bb);
6210 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6211 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6212 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6213 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6214 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6215 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6216 gsi_remove (&si, true);
6217 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6219 if (exit_bb)
6221 si = gsi_last_bb (exit_bb);
6222 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6223 gsi_remove (&si, true);
6224 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6228 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6229 operation as a normal volatile load. */
6231 static bool
6232 expand_omp_atomic_load (basic_block load_bb, tree addr,
6233 tree loaded_val, int index)
6235 enum built_in_function tmpbase;
6236 gimple_stmt_iterator gsi;
6237 basic_block store_bb;
6238 location_t loc;
6239 gimple *stmt;
6240 tree decl, call, type, itype;
6242 gsi = gsi_last_bb (load_bb);
6243 stmt = gsi_stmt (gsi);
6244 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6245 loc = gimple_location (stmt);
6247 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6248 is smaller than word size, then expand_atomic_load assumes that the load
6249 is atomic. We could avoid the builtin entirely in this case. */
6251 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6252 decl = builtin_decl_explicit (tmpbase);
6253 if (decl == NULL_TREE)
6254 return false;
6256 type = TREE_TYPE (loaded_val);
6257 itype = TREE_TYPE (TREE_TYPE (decl));
6259 call = build_call_expr_loc (loc, decl, 2, addr,
6260 build_int_cst (NULL,
6261 gimple_omp_atomic_seq_cst_p (stmt)
6262 ? MEMMODEL_SEQ_CST
6263 : MEMMODEL_RELAXED));
6264 if (!useless_type_conversion_p (type, itype))
6265 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6266 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6268 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6269 gsi_remove (&gsi, true);
6271 store_bb = single_succ (load_bb);
6272 gsi = gsi_last_bb (store_bb);
6273 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6274 gsi_remove (&gsi, true);
6276 if (gimple_in_ssa_p (cfun))
6277 update_ssa (TODO_update_ssa_no_phi);
6279 return true;
6282 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6283 operation as a normal volatile store. */
6285 static bool
6286 expand_omp_atomic_store (basic_block load_bb, tree addr,
6287 tree loaded_val, tree stored_val, int index)
6289 enum built_in_function tmpbase;
6290 gimple_stmt_iterator gsi;
6291 basic_block store_bb = single_succ (load_bb);
6292 location_t loc;
6293 gimple *stmt;
6294 tree decl, call, type, itype;
6295 machine_mode imode;
6296 bool exchange;
6298 gsi = gsi_last_bb (load_bb);
6299 stmt = gsi_stmt (gsi);
6300 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6302 /* If the load value is needed, then this isn't a store but an exchange. */
6303 exchange = gimple_omp_atomic_need_value_p (stmt);
6305 gsi = gsi_last_bb (store_bb);
6306 stmt = gsi_stmt (gsi);
6307 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6308 loc = gimple_location (stmt);
6310 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6311 is smaller than word size, then expand_atomic_store assumes that the store
6312 is atomic. We could avoid the builtin entirely in this case. */
6314 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6315 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6316 decl = builtin_decl_explicit (tmpbase);
6317 if (decl == NULL_TREE)
6318 return false;
6320 type = TREE_TYPE (stored_val);
6322 /* Dig out the type of the function's second argument. */
6323 itype = TREE_TYPE (decl);
6324 itype = TYPE_ARG_TYPES (itype);
6325 itype = TREE_CHAIN (itype);
6326 itype = TREE_VALUE (itype);
6327 imode = TYPE_MODE (itype);
6329 if (exchange && !can_atomic_exchange_p (imode, true))
6330 return false;
6332 if (!useless_type_conversion_p (itype, type))
6333 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6334 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6335 build_int_cst (NULL,
6336 gimple_omp_atomic_seq_cst_p (stmt)
6337 ? MEMMODEL_SEQ_CST
6338 : MEMMODEL_RELAXED));
6339 if (exchange)
6341 if (!useless_type_conversion_p (type, itype))
6342 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6343 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6346 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6347 gsi_remove (&gsi, true);
6349 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6350 gsi = gsi_last_bb (load_bb);
6351 gsi_remove (&gsi, true);
6353 if (gimple_in_ssa_p (cfun))
6354 update_ssa (TODO_update_ssa_no_phi);
6356 return true;
6359 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6360 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6361 size of the data type, and thus usable to find the index of the builtin
6362 decl. Returns false if the expression is not of the proper form. */
6364 static bool
6365 expand_omp_atomic_fetch_op (basic_block load_bb,
6366 tree addr, tree loaded_val,
6367 tree stored_val, int index)
6369 enum built_in_function oldbase, newbase, tmpbase;
6370 tree decl, itype, call;
6371 tree lhs, rhs;
6372 basic_block store_bb = single_succ (load_bb);
6373 gimple_stmt_iterator gsi;
6374 gimple *stmt;
6375 location_t loc;
6376 enum tree_code code;
6377 bool need_old, need_new;
6378 machine_mode imode;
6379 bool seq_cst;
6381 /* We expect to find the following sequences:
6383 load_bb:
6384 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6386 store_bb:
6387 val = tmp OP something; (or: something OP tmp)
6388 GIMPLE_OMP_STORE (val)
6390 ???FIXME: Allow a more flexible sequence.
6391 Perhaps use data flow to pick the statements.
6395 gsi = gsi_after_labels (store_bb);
6396 stmt = gsi_stmt (gsi);
6397 loc = gimple_location (stmt);
6398 if (!is_gimple_assign (stmt))
6399 return false;
6400 gsi_next (&gsi);
6401 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6402 return false;
6403 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6404 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6405 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6406 gcc_checking_assert (!need_old || !need_new);
6408 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6409 return false;
6411 /* Check for one of the supported fetch-op operations. */
6412 code = gimple_assign_rhs_code (stmt);
6413 switch (code)
6415 case PLUS_EXPR:
6416 case POINTER_PLUS_EXPR:
6417 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6418 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6419 break;
6420 case MINUS_EXPR:
6421 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6422 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6423 break;
6424 case BIT_AND_EXPR:
6425 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6426 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6427 break;
6428 case BIT_IOR_EXPR:
6429 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6430 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6431 break;
6432 case BIT_XOR_EXPR:
6433 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6434 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6435 break;
6436 default:
6437 return false;
6440 /* Make sure the expression is of the proper form. */
6441 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6442 rhs = gimple_assign_rhs2 (stmt);
6443 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6444 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6445 rhs = gimple_assign_rhs1 (stmt);
6446 else
6447 return false;
6449 tmpbase = ((enum built_in_function)
6450 ((need_new ? newbase : oldbase) + index + 1));
6451 decl = builtin_decl_explicit (tmpbase);
6452 if (decl == NULL_TREE)
6453 return false;
6454 itype = TREE_TYPE (TREE_TYPE (decl));
6455 imode = TYPE_MODE (itype);
6457 /* We could test all of the various optabs involved, but the fact of the
6458 matter is that (with the exception of i486 vs i586 and xadd) all targets
6459 that support any atomic operaton optab also implements compare-and-swap.
6460 Let optabs.c take care of expanding any compare-and-swap loop. */
6461 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6462 return false;
6464 gsi = gsi_last_bb (load_bb);
6465 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6467 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6468 It only requires that the operation happen atomically. Thus we can
6469 use the RELAXED memory model. */
6470 call = build_call_expr_loc (loc, decl, 3, addr,
6471 fold_convert_loc (loc, itype, rhs),
6472 build_int_cst (NULL,
6473 seq_cst ? MEMMODEL_SEQ_CST
6474 : MEMMODEL_RELAXED));
6476 if (need_old || need_new)
6478 lhs = need_old ? loaded_val : stored_val;
6479 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6480 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6482 else
6483 call = fold_convert_loc (loc, void_type_node, call);
6484 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6485 gsi_remove (&gsi, true);
6487 gsi = gsi_last_bb (store_bb);
6488 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6489 gsi_remove (&gsi, true);
6490 gsi = gsi_last_bb (store_bb);
6491 stmt = gsi_stmt (gsi);
6492 gsi_remove (&gsi, true);
6494 if (gimple_in_ssa_p (cfun))
6496 release_defs (stmt);
6497 update_ssa (TODO_update_ssa_no_phi);
6500 return true;
6503 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6505 oldval = *addr;
6506 repeat:
6507 newval = rhs; // with oldval replacing *addr in rhs
6508 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6509 if (oldval != newval)
6510 goto repeat;
6512 INDEX is log2 of the size of the data type, and thus usable to find the
6513 index of the builtin decl. */
6515 static bool
6516 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6517 tree addr, tree loaded_val, tree stored_val,
6518 int index)
6520 tree loadedi, storedi, initial, new_storedi, old_vali;
6521 tree type, itype, cmpxchg, iaddr;
6522 gimple_stmt_iterator si;
6523 basic_block loop_header = single_succ (load_bb);
6524 gimple *phi, *stmt;
6525 edge e;
6526 enum built_in_function fncode;
6528 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6529 order to use the RELAXED memory model effectively. */
6530 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6531 + index + 1);
6532 cmpxchg = builtin_decl_explicit (fncode);
6533 if (cmpxchg == NULL_TREE)
6534 return false;
6535 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6536 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6538 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6539 || !can_atomic_load_p (TYPE_MODE (itype)))
6540 return false;
6542 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6543 si = gsi_last_bb (load_bb);
6544 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6546 /* For floating-point values, we'll need to view-convert them to integers
6547 so that we can perform the atomic compare and swap. Simplify the
6548 following code by always setting up the "i"ntegral variables. */
6549 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6551 tree iaddr_val;
6553 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6554 true));
6555 iaddr_val
6556 = force_gimple_operand_gsi (&si,
6557 fold_convert (TREE_TYPE (iaddr), addr),
6558 false, NULL_TREE, true, GSI_SAME_STMT);
6559 stmt = gimple_build_assign (iaddr, iaddr_val);
6560 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6561 loadedi = create_tmp_var (itype);
6562 if (gimple_in_ssa_p (cfun))
6563 loadedi = make_ssa_name (loadedi);
6565 else
6567 iaddr = addr;
6568 loadedi = loaded_val;
6571 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6572 tree loaddecl = builtin_decl_explicit (fncode);
6573 if (loaddecl)
6574 initial
6575 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6576 build_call_expr (loaddecl, 2, iaddr,
6577 build_int_cst (NULL_TREE,
6578 MEMMODEL_RELAXED)));
6579 else
6580 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6581 build_int_cst (TREE_TYPE (iaddr), 0));
6583 initial
6584 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6585 GSI_SAME_STMT);
6587 /* Move the value to the LOADEDI temporary. */
6588 if (gimple_in_ssa_p (cfun))
6590 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6591 phi = create_phi_node (loadedi, loop_header);
6592 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6593 initial);
6595 else
6596 gsi_insert_before (&si,
6597 gimple_build_assign (loadedi, initial),
6598 GSI_SAME_STMT);
6599 if (loadedi != loaded_val)
6601 gimple_stmt_iterator gsi2;
6602 tree x;
6604 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6605 gsi2 = gsi_start_bb (loop_header);
6606 if (gimple_in_ssa_p (cfun))
6608 gassign *stmt;
6609 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6610 true, GSI_SAME_STMT);
6611 stmt = gimple_build_assign (loaded_val, x);
6612 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6614 else
6616 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6617 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6618 true, GSI_SAME_STMT);
6621 gsi_remove (&si, true);
6623 si = gsi_last_bb (store_bb);
6624 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6626 if (iaddr == addr)
6627 storedi = stored_val;
6628 else
6629 storedi
6630 = force_gimple_operand_gsi (&si,
6631 build1 (VIEW_CONVERT_EXPR, itype,
6632 stored_val), true, NULL_TREE, true,
6633 GSI_SAME_STMT);
6635 /* Build the compare&swap statement. */
6636 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6637 new_storedi = force_gimple_operand_gsi (&si,
6638 fold_convert (TREE_TYPE (loadedi),
6639 new_storedi),
6640 true, NULL_TREE,
6641 true, GSI_SAME_STMT);
6643 if (gimple_in_ssa_p (cfun))
6644 old_vali = loadedi;
6645 else
6647 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6648 stmt = gimple_build_assign (old_vali, loadedi);
6649 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6651 stmt = gimple_build_assign (loadedi, new_storedi);
6652 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6655 /* Note that we always perform the comparison as an integer, even for
6656 floating point. This allows the atomic operation to properly
6657 succeed even with NaNs and -0.0. */
6658 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6659 stmt = gimple_build_cond_empty (ne);
6660 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6662 /* Update cfg. */
6663 e = single_succ_edge (store_bb);
6664 e->flags &= ~EDGE_FALLTHRU;
6665 e->flags |= EDGE_FALSE_VALUE;
6666 /* Expect no looping. */
6667 e->probability = profile_probability::guessed_always ();
6669 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6670 e->probability = profile_probability::guessed_never ();
6672 /* Copy the new value to loadedi (we already did that before the condition
6673 if we are not in SSA). */
6674 if (gimple_in_ssa_p (cfun))
6676 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6677 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6680 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6681 gsi_remove (&si, true);
6683 struct loop *loop = alloc_loop ();
6684 loop->header = loop_header;
6685 loop->latch = store_bb;
6686 add_loop (loop, loop_header->loop_father);
6688 if (gimple_in_ssa_p (cfun))
6689 update_ssa (TODO_update_ssa_no_phi);
6691 return true;
6694 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6696 GOMP_atomic_start ();
6697 *addr = rhs;
6698 GOMP_atomic_end ();
6700 The result is not globally atomic, but works so long as all parallel
6701 references are within #pragma omp atomic directives. According to
6702 responses received from omp@openmp.org, appears to be within spec.
6703 Which makes sense, since that's how several other compilers handle
6704 this situation as well.
6705 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6706 expanding. STORED_VAL is the operand of the matching
6707 GIMPLE_OMP_ATOMIC_STORE.
6709 We replace
6710 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6711 loaded_val = *addr;
6713 and replace
6714 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6715 *addr = stored_val;
6718 static bool
6719 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6720 tree addr, tree loaded_val, tree stored_val)
6722 gimple_stmt_iterator si;
6723 gassign *stmt;
6724 tree t;
6726 si = gsi_last_bb (load_bb);
6727 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6729 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6730 t = build_call_expr (t, 0);
6731 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6733 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6734 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6735 gsi_remove (&si, true);
6737 si = gsi_last_bb (store_bb);
6738 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6740 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6741 stored_val);
6742 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6744 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6745 t = build_call_expr (t, 0);
6746 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6747 gsi_remove (&si, true);
6749 if (gimple_in_ssa_p (cfun))
6750 update_ssa (TODO_update_ssa_no_phi);
6751 return true;
6754 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6755 using expand_omp_atomic_fetch_op. If it failed, we try to
6756 call expand_omp_atomic_pipeline, and if it fails too, the
6757 ultimate fallback is wrapping the operation in a mutex
6758 (expand_omp_atomic_mutex). REGION is the atomic region built
6759 by build_omp_regions_1(). */
6761 static void
6762 expand_omp_atomic (struct omp_region *region)
6764 basic_block load_bb = region->entry, store_bb = region->exit;
6765 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6766 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6767 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6768 tree addr = gimple_omp_atomic_load_rhs (load);
6769 tree stored_val = gimple_omp_atomic_store_val (store);
6770 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6771 HOST_WIDE_INT index;
6773 /* Make sure the type is one of the supported sizes. */
6774 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6775 index = exact_log2 (index);
6776 if (index >= 0 && index <= 4)
6778 unsigned int align = TYPE_ALIGN_UNIT (type);
6780 /* __sync builtins require strict data alignment. */
6781 if (exact_log2 (align) >= index)
6783 /* Atomic load. */
6784 scalar_mode smode;
6785 if (loaded_val == stored_val
6786 && (is_int_mode (TYPE_MODE (type), &smode)
6787 || is_float_mode (TYPE_MODE (type), &smode))
6788 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6789 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6790 return;
6792 /* Atomic store. */
6793 if ((is_int_mode (TYPE_MODE (type), &smode)
6794 || is_float_mode (TYPE_MODE (type), &smode))
6795 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6796 && store_bb == single_succ (load_bb)
6797 && first_stmt (store_bb) == store
6798 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6799 stored_val, index))
6800 return;
6802 /* When possible, use specialized atomic update functions. */
6803 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6804 && store_bb == single_succ (load_bb)
6805 && expand_omp_atomic_fetch_op (load_bb, addr,
6806 loaded_val, stored_val, index))
6807 return;
6809 /* If we don't have specialized __sync builtins, try and implement
6810 as a compare and swap loop. */
6811 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6812 loaded_val, stored_val, index))
6813 return;
6817 /* The ultimate fallback is wrapping the operation in a mutex. */
6818 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6821 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6822 at REGION_EXIT. */
6824 static void
6825 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6826 basic_block region_exit)
6828 struct loop *outer = region_entry->loop_father;
6829 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6831 /* Don't parallelize the kernels region if it contains more than one outer
6832 loop. */
6833 unsigned int nr_outer_loops = 0;
6834 struct loop *single_outer = NULL;
6835 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6837 gcc_assert (loop_outer (loop) == outer);
6839 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6840 continue;
6842 if (region_exit != NULL
6843 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6844 continue;
6846 nr_outer_loops++;
6847 single_outer = loop;
6849 if (nr_outer_loops != 1)
6850 return;
6852 for (struct loop *loop = single_outer->inner;
6853 loop != NULL;
6854 loop = loop->inner)
6855 if (loop->next)
6856 return;
6858 /* Mark the loops in the region. */
6859 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6860 loop->in_oacc_kernels_region = true;
6863 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6865 struct GTY(()) grid_launch_attributes_trees
6867 tree kernel_dim_array_type;
6868 tree kernel_lattrs_dimnum_decl;
6869 tree kernel_lattrs_grid_decl;
6870 tree kernel_lattrs_group_decl;
6871 tree kernel_launch_attributes_type;
6874 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6876 /* Create types used to pass kernel launch attributes to target. */
6878 static void
6879 grid_create_kernel_launch_attr_types (void)
6881 if (grid_attr_trees)
6882 return;
6883 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6885 tree dim_arr_index_type
6886 = build_index_type (build_int_cst (integer_type_node, 2));
6887 grid_attr_trees->kernel_dim_array_type
6888 = build_array_type (uint32_type_node, dim_arr_index_type);
6890 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6891 grid_attr_trees->kernel_lattrs_dimnum_decl
6892 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6893 uint32_type_node);
6894 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6896 grid_attr_trees->kernel_lattrs_grid_decl
6897 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6898 grid_attr_trees->kernel_dim_array_type);
6899 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6900 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6901 grid_attr_trees->kernel_lattrs_group_decl
6902 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6903 grid_attr_trees->kernel_dim_array_type);
6904 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6905 = grid_attr_trees->kernel_lattrs_grid_decl;
6906 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6907 "__gomp_kernel_launch_attributes",
6908 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6911 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6912 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6913 of type uint32_type_node. */
6915 static void
6916 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6917 tree fld_decl, int index, tree value)
6919 tree ref = build4 (ARRAY_REF, uint32_type_node,
6920 build3 (COMPONENT_REF,
6921 grid_attr_trees->kernel_dim_array_type,
6922 range_var, fld_decl, NULL_TREE),
6923 build_int_cst (integer_type_node, index),
6924 NULL_TREE, NULL_TREE);
6925 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6928 /* Return a tree representation of a pointer to a structure with grid and
6929 work-group size information. Statements filling that information will be
6930 inserted before GSI, TGT_STMT is the target statement which has the
6931 necessary information in it. */
6933 static tree
6934 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6935 gomp_target *tgt_stmt)
6937 grid_create_kernel_launch_attr_types ();
6938 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6939 "__kernel_launch_attrs");
6941 unsigned max_dim = 0;
6942 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6943 clause;
6944 clause = OMP_CLAUSE_CHAIN (clause))
6946 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6947 continue;
6949 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6950 max_dim = MAX (dim, max_dim);
6952 grid_insert_store_range_dim (gsi, lattrs,
6953 grid_attr_trees->kernel_lattrs_grid_decl,
6954 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6955 grid_insert_store_range_dim (gsi, lattrs,
6956 grid_attr_trees->kernel_lattrs_group_decl,
6957 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6960 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6961 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6962 gcc_checking_assert (max_dim <= 2);
6963 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6964 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6965 GSI_SAME_STMT);
6966 TREE_ADDRESSABLE (lattrs) = 1;
6967 return build_fold_addr_expr (lattrs);
6970 /* Build target argument identifier from the DEVICE identifier, value
6971 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6973 static tree
6974 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6976 tree t = build_int_cst (integer_type_node, device);
6977 if (subseqent_param)
6978 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6979 build_int_cst (integer_type_node,
6980 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6981 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6982 build_int_cst (integer_type_node, id));
6983 return t;
6986 /* Like above but return it in type that can be directly stored as an element
6987 of the argument array. */
6989 static tree
6990 get_target_argument_identifier (int device, bool subseqent_param, int id)
6992 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6993 return fold_convert (ptr_type_node, t);
6996 /* Return a target argument consisting of DEVICE identifier, value identifier
6997 ID, and the actual VALUE. */
6999 static tree
7000 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7001 tree value)
7003 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7004 fold_convert (integer_type_node, value),
7005 build_int_cst (unsigned_type_node,
7006 GOMP_TARGET_ARG_VALUE_SHIFT));
7007 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7008 get_target_argument_identifier_1 (device, false, id));
7009 t = fold_convert (ptr_type_node, t);
7010 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7013 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7014 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7015 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7016 arguments. */
7018 static void
7019 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7020 int id, tree value, vec <tree> *args)
7022 if (tree_fits_shwi_p (value)
7023 && tree_to_shwi (value) > -(1 << 15)
7024 && tree_to_shwi (value) < (1 << 15))
7025 args->quick_push (get_target_argument_value (gsi, device, id, value));
7026 else
7028 args->quick_push (get_target_argument_identifier (device, true, id));
7029 value = fold_convert (ptr_type_node, value);
7030 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7031 GSI_SAME_STMT);
7032 args->quick_push (value);
7036 /* Create an array of arguments that is then passed to GOMP_target. */
7038 static tree
7039 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7041 auto_vec <tree, 6> args;
7042 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7043 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7044 if (c)
7045 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7046 else
7047 t = integer_minus_one_node;
7048 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7049 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7051 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7052 if (c)
7053 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7054 else
7055 t = integer_minus_one_node;
7056 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7057 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7058 &args);
7060 /* Add HSA-specific grid sizes, if available. */
7061 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7062 OMP_CLAUSE__GRIDDIM_))
7064 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7065 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7066 args.quick_push (t);
7067 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7070 /* Produce more, perhaps device specific, arguments here. */
7072 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7073 args.length () + 1),
7074 ".omp_target_args");
7075 for (unsigned i = 0; i < args.length (); i++)
7077 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7078 build_int_cst (integer_type_node, i),
7079 NULL_TREE, NULL_TREE);
7080 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7081 GSI_SAME_STMT);
7083 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7084 build_int_cst (integer_type_node, args.length ()),
7085 NULL_TREE, NULL_TREE);
7086 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7087 GSI_SAME_STMT);
7088 TREE_ADDRESSABLE (argarray) = 1;
7089 return build_fold_addr_expr (argarray);
7092 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7094 static void
7095 expand_omp_target (struct omp_region *region)
7097 basic_block entry_bb, exit_bb, new_bb;
7098 struct function *child_cfun;
7099 tree child_fn, block, t;
7100 gimple_stmt_iterator gsi;
7101 gomp_target *entry_stmt;
7102 gimple *stmt;
7103 edge e;
7104 bool offloaded, data_region;
7106 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7107 new_bb = region->entry;
7109 offloaded = is_gimple_omp_offloaded (entry_stmt);
7110 switch (gimple_omp_target_kind (entry_stmt))
7112 case GF_OMP_TARGET_KIND_REGION:
7113 case GF_OMP_TARGET_KIND_UPDATE:
7114 case GF_OMP_TARGET_KIND_ENTER_DATA:
7115 case GF_OMP_TARGET_KIND_EXIT_DATA:
7116 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7117 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7118 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7119 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7120 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7121 data_region = false;
7122 break;
7123 case GF_OMP_TARGET_KIND_DATA:
7124 case GF_OMP_TARGET_KIND_OACC_DATA:
7125 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7126 data_region = true;
7127 break;
7128 default:
7129 gcc_unreachable ();
7132 child_fn = NULL_TREE;
7133 child_cfun = NULL;
7134 if (offloaded)
7136 child_fn = gimple_omp_target_child_fn (entry_stmt);
7137 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7140 /* Supported by expand_omp_taskreg, but not here. */
7141 if (child_cfun != NULL)
7142 gcc_checking_assert (!child_cfun->cfg);
7143 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7145 entry_bb = region->entry;
7146 exit_bb = region->exit;
7148 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7150 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7152 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7153 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7154 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7155 DECL_ATTRIBUTES (child_fn)
7156 = tree_cons (get_identifier ("oacc kernels"),
7157 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7160 if (offloaded)
7162 unsigned srcidx, dstidx, num;
7164 /* If the offloading region needs data sent from the parent
7165 function, then the very first statement (except possible
7166 tree profile counter updates) of the offloading body
7167 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7168 &.OMP_DATA_O is passed as an argument to the child function,
7169 we need to replace it with the argument as seen by the child
7170 function.
7172 In most cases, this will end up being the identity assignment
7173 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7174 a function call that has been inlined, the original PARM_DECL
7175 .OMP_DATA_I may have been converted into a different local
7176 variable. In which case, we need to keep the assignment. */
7177 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7178 if (data_arg)
7180 basic_block entry_succ_bb = single_succ (entry_bb);
7181 gimple_stmt_iterator gsi;
7182 tree arg;
7183 gimple *tgtcopy_stmt = NULL;
7184 tree sender = TREE_VEC_ELT (data_arg, 0);
7186 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7188 gcc_assert (!gsi_end_p (gsi));
7189 stmt = gsi_stmt (gsi);
7190 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7191 continue;
7193 if (gimple_num_ops (stmt) == 2)
7195 tree arg = gimple_assign_rhs1 (stmt);
7197 /* We're ignoring the subcode because we're
7198 effectively doing a STRIP_NOPS. */
7200 if (TREE_CODE (arg) == ADDR_EXPR
7201 && TREE_OPERAND (arg, 0) == sender)
7203 tgtcopy_stmt = stmt;
7204 break;
7209 gcc_assert (tgtcopy_stmt != NULL);
7210 arg = DECL_ARGUMENTS (child_fn);
7212 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7213 gsi_remove (&gsi, true);
7216 /* Declare local variables needed in CHILD_CFUN. */
7217 block = DECL_INITIAL (child_fn);
7218 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7219 /* The gimplifier could record temporaries in the offloading block
7220 rather than in containing function's local_decls chain,
7221 which would mean cgraph missed finalizing them. Do it now. */
7222 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7223 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7224 varpool_node::finalize_decl (t);
7225 DECL_SAVED_TREE (child_fn) = NULL;
7226 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7227 gimple_set_body (child_fn, NULL);
7228 TREE_USED (block) = 1;
7230 /* Reset DECL_CONTEXT on function arguments. */
7231 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7232 DECL_CONTEXT (t) = child_fn;
7234 /* Split ENTRY_BB at GIMPLE_*,
7235 so that it can be moved to the child function. */
7236 gsi = gsi_last_bb (entry_bb);
7237 stmt = gsi_stmt (gsi);
7238 gcc_assert (stmt
7239 && gimple_code (stmt) == gimple_code (entry_stmt));
7240 e = split_block (entry_bb, stmt);
7241 gsi_remove (&gsi, true);
7242 entry_bb = e->dest;
7243 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7245 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7246 if (exit_bb)
7248 gsi = gsi_last_bb (exit_bb);
7249 gcc_assert (!gsi_end_p (gsi)
7250 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7251 stmt = gimple_build_return (NULL);
7252 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7253 gsi_remove (&gsi, true);
7256 /* Make sure to generate early debug for the function before
7257 outlining anything. */
7258 if (! gimple_in_ssa_p (cfun))
7259 (*debug_hooks->early_global_decl) (cfun->decl);
7261 /* Move the offloading region into CHILD_CFUN. */
7263 block = gimple_block (entry_stmt);
7265 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7266 if (exit_bb)
7267 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7268 /* When the OMP expansion process cannot guarantee an up-to-date
7269 loop tree arrange for the child function to fixup loops. */
7270 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7271 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7273 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7274 num = vec_safe_length (child_cfun->local_decls);
7275 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7277 t = (*child_cfun->local_decls)[srcidx];
7278 if (DECL_CONTEXT (t) == cfun->decl)
7279 continue;
7280 if (srcidx != dstidx)
7281 (*child_cfun->local_decls)[dstidx] = t;
7282 dstidx++;
7284 if (dstidx != num)
7285 vec_safe_truncate (child_cfun->local_decls, dstidx);
7287 /* Inform the callgraph about the new function. */
7288 child_cfun->curr_properties = cfun->curr_properties;
7289 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7290 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7291 cgraph_node *node = cgraph_node::get_create (child_fn);
7292 node->parallelized_function = 1;
7293 cgraph_node::add_new_function (child_fn, true);
7295 /* Add the new function to the offload table. */
7296 if (ENABLE_OFFLOADING)
7297 vec_safe_push (offload_funcs, child_fn);
7299 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7300 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7302 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7303 fixed in a following pass. */
7304 push_cfun (child_cfun);
7305 if (need_asm)
7306 assign_assembler_name_if_needed (child_fn);
7307 cgraph_edge::rebuild_edges ();
7309 /* Some EH regions might become dead, see PR34608. If
7310 pass_cleanup_cfg isn't the first pass to happen with the
7311 new child, these dead EH edges might cause problems.
7312 Clean them up now. */
7313 if (flag_exceptions)
7315 basic_block bb;
7316 bool changed = false;
7318 FOR_EACH_BB_FN (bb, cfun)
7319 changed |= gimple_purge_dead_eh_edges (bb);
7320 if (changed)
7321 cleanup_tree_cfg ();
7323 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7324 verify_loop_structure ();
7325 pop_cfun ();
7327 if (dump_file && !gimple_in_ssa_p (cfun))
7329 omp_any_child_fn_dumped = true;
7330 dump_function_header (dump_file, child_fn, dump_flags);
7331 dump_function_to_file (child_fn, dump_file, dump_flags);
7335 /* Emit a library call to launch the offloading region, or do data
7336 transfers. */
7337 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7338 enum built_in_function start_ix;
7339 location_t clause_loc;
7340 unsigned int flags_i = 0;
7342 switch (gimple_omp_target_kind (entry_stmt))
7344 case GF_OMP_TARGET_KIND_REGION:
7345 start_ix = BUILT_IN_GOMP_TARGET;
7346 break;
7347 case GF_OMP_TARGET_KIND_DATA:
7348 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7349 break;
7350 case GF_OMP_TARGET_KIND_UPDATE:
7351 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7352 break;
7353 case GF_OMP_TARGET_KIND_ENTER_DATA:
7354 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7355 break;
7356 case GF_OMP_TARGET_KIND_EXIT_DATA:
7357 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7358 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7359 break;
7360 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7361 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7362 start_ix = BUILT_IN_GOACC_PARALLEL;
7363 break;
7364 case GF_OMP_TARGET_KIND_OACC_DATA:
7365 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7366 start_ix = BUILT_IN_GOACC_DATA_START;
7367 break;
7368 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7369 start_ix = BUILT_IN_GOACC_UPDATE;
7370 break;
7371 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7372 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7373 break;
7374 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7375 start_ix = BUILT_IN_GOACC_DECLARE;
7376 break;
7377 default:
7378 gcc_unreachable ();
7381 clauses = gimple_omp_target_clauses (entry_stmt);
7383 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7384 library choose) and there is no conditional. */
7385 cond = NULL_TREE;
7386 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7388 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7389 if (c)
7390 cond = OMP_CLAUSE_IF_EXPR (c);
7392 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7393 if (c)
7395 /* Even if we pass it to all library function calls, it is currently only
7396 defined/used for the OpenMP target ones. */
7397 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7398 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7399 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7400 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7402 device = OMP_CLAUSE_DEVICE_ID (c);
7403 clause_loc = OMP_CLAUSE_LOCATION (c);
7405 else
7406 clause_loc = gimple_location (entry_stmt);
7408 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7409 if (c)
7410 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7412 /* Ensure 'device' is of the correct type. */
7413 device = fold_convert_loc (clause_loc, integer_type_node, device);
7415 /* If we found the clause 'if (cond)', build
7416 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7417 if (cond)
7419 cond = gimple_boolify (cond);
7421 basic_block cond_bb, then_bb, else_bb;
7422 edge e;
7423 tree tmp_var;
7425 tmp_var = create_tmp_var (TREE_TYPE (device));
7426 if (offloaded)
7427 e = split_block_after_labels (new_bb);
7428 else
7430 gsi = gsi_last_bb (new_bb);
7431 gsi_prev (&gsi);
7432 e = split_block (new_bb, gsi_stmt (gsi));
7434 cond_bb = e->src;
7435 new_bb = e->dest;
7436 remove_edge (e);
7438 then_bb = create_empty_bb (cond_bb);
7439 else_bb = create_empty_bb (then_bb);
7440 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7441 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7443 stmt = gimple_build_cond_empty (cond);
7444 gsi = gsi_last_bb (cond_bb);
7445 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7447 gsi = gsi_start_bb (then_bb);
7448 stmt = gimple_build_assign (tmp_var, device);
7449 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7451 gsi = gsi_start_bb (else_bb);
7452 stmt = gimple_build_assign (tmp_var,
7453 build_int_cst (integer_type_node,
7454 GOMP_DEVICE_HOST_FALLBACK));
7455 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7457 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7458 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7459 add_bb_to_loop (then_bb, cond_bb->loop_father);
7460 add_bb_to_loop (else_bb, cond_bb->loop_father);
7461 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7462 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7464 device = tmp_var;
7465 gsi = gsi_last_bb (new_bb);
7467 else
7469 gsi = gsi_last_bb (new_bb);
7470 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7471 true, GSI_SAME_STMT);
7474 t = gimple_omp_target_data_arg (entry_stmt);
7475 if (t == NULL)
7477 t1 = size_zero_node;
7478 t2 = build_zero_cst (ptr_type_node);
7479 t3 = t2;
7480 t4 = t2;
7482 else
7484 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7485 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7486 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7487 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7488 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7491 gimple *g;
7492 bool tagging = false;
7493 /* The maximum number used by any start_ix, without varargs. */
7494 auto_vec<tree, 11> args;
7495 args.quick_push (device);
7496 if (offloaded)
7497 args.quick_push (build_fold_addr_expr (child_fn));
7498 args.quick_push (t1);
7499 args.quick_push (t2);
7500 args.quick_push (t3);
7501 args.quick_push (t4);
7502 switch (start_ix)
7504 case BUILT_IN_GOACC_DATA_START:
7505 case BUILT_IN_GOACC_DECLARE:
7506 case BUILT_IN_GOMP_TARGET_DATA:
7507 break;
7508 case BUILT_IN_GOMP_TARGET:
7509 case BUILT_IN_GOMP_TARGET_UPDATE:
7510 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7511 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7512 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7513 if (c)
7514 depend = OMP_CLAUSE_DECL (c);
7515 else
7516 depend = build_int_cst (ptr_type_node, 0);
7517 args.quick_push (depend);
7518 if (start_ix == BUILT_IN_GOMP_TARGET)
7519 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7520 break;
7521 case BUILT_IN_GOACC_PARALLEL:
7522 oacc_set_fn_attrib (child_fn, clauses, &args);
7523 tagging = true;
7524 /* FALLTHRU */
7525 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7526 case BUILT_IN_GOACC_UPDATE:
7528 tree t_async = NULL_TREE;
7530 /* If present, use the value specified by the respective
7531 clause, making sure that is of the correct type. */
7532 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7533 if (c)
7534 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7535 integer_type_node,
7536 OMP_CLAUSE_ASYNC_EXPR (c));
7537 else if (!tagging)
7538 /* Default values for t_async. */
7539 t_async = fold_convert_loc (gimple_location (entry_stmt),
7540 integer_type_node,
7541 build_int_cst (integer_type_node,
7542 GOMP_ASYNC_SYNC));
7543 if (tagging && t_async)
7545 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7547 if (TREE_CODE (t_async) == INTEGER_CST)
7549 /* See if we can pack the async arg in to the tag's
7550 operand. */
7551 i_async = TREE_INT_CST_LOW (t_async);
7552 if (i_async < GOMP_LAUNCH_OP_MAX)
7553 t_async = NULL_TREE;
7554 else
7555 i_async = GOMP_LAUNCH_OP_MAX;
7557 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7558 i_async));
7560 if (t_async)
7561 args.safe_push (t_async);
7563 /* Save the argument index, and ... */
7564 unsigned t_wait_idx = args.length ();
7565 unsigned num_waits = 0;
7566 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7567 if (!tagging || c)
7568 /* ... push a placeholder. */
7569 args.safe_push (integer_zero_node);
7571 for (; c; c = OMP_CLAUSE_CHAIN (c))
7572 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7574 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7575 integer_type_node,
7576 OMP_CLAUSE_WAIT_EXPR (c)));
7577 num_waits++;
7580 if (!tagging || num_waits)
7582 tree len;
7584 /* Now that we know the number, update the placeholder. */
7585 if (tagging)
7586 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7587 else
7588 len = build_int_cst (integer_type_node, num_waits);
7589 len = fold_convert_loc (gimple_location (entry_stmt),
7590 unsigned_type_node, len);
7591 args[t_wait_idx] = len;
7594 break;
7595 default:
7596 gcc_unreachable ();
7598 if (tagging)
7599 /* Push terminal marker - zero. */
7600 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7602 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7603 gimple_set_location (g, gimple_location (entry_stmt));
7604 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7605 if (!offloaded)
7607 g = gsi_stmt (gsi);
7608 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7609 gsi_remove (&gsi, true);
7611 if (data_region && region->exit)
7613 gsi = gsi_last_bb (region->exit);
7614 g = gsi_stmt (gsi);
7615 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7616 gsi_remove (&gsi, true);
7620 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7621 iteration variable derived from the thread number. INTRA_GROUP means this
7622 is an expansion of a loop iterating over work-items within a separate
7623 iteration over groups. */
7625 static void
7626 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7628 gimple_stmt_iterator gsi;
7629 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7630 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7631 == GF_OMP_FOR_KIND_GRID_LOOP);
7632 size_t collapse = gimple_omp_for_collapse (for_stmt);
7633 struct omp_for_data_loop *loops
7634 = XALLOCAVEC (struct omp_for_data_loop,
7635 gimple_omp_for_collapse (for_stmt));
7636 struct omp_for_data fd;
7638 remove_edge (BRANCH_EDGE (kfor->entry));
7639 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7641 gcc_assert (kfor->cont);
7642 omp_extract_for_data (for_stmt, &fd, loops);
7644 gsi = gsi_start_bb (body_bb);
7646 for (size_t dim = 0; dim < collapse; dim++)
7648 tree type, itype;
7649 itype = type = TREE_TYPE (fd.loops[dim].v);
7650 if (POINTER_TYPE_P (type))
7651 itype = signed_type_for (type);
7653 tree n1 = fd.loops[dim].n1;
7654 tree step = fd.loops[dim].step;
7655 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7656 true, NULL_TREE, true, GSI_SAME_STMT);
7657 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7658 true, NULL_TREE, true, GSI_SAME_STMT);
7659 tree threadid;
7660 if (gimple_omp_for_grid_group_iter (for_stmt))
7662 gcc_checking_assert (!intra_group);
7663 threadid = build_call_expr (builtin_decl_explicit
7664 (BUILT_IN_HSA_WORKGROUPID), 1,
7665 build_int_cstu (unsigned_type_node, dim));
7667 else if (intra_group)
7668 threadid = build_call_expr (builtin_decl_explicit
7669 (BUILT_IN_HSA_WORKITEMID), 1,
7670 build_int_cstu (unsigned_type_node, dim));
7671 else
7672 threadid = build_call_expr (builtin_decl_explicit
7673 (BUILT_IN_HSA_WORKITEMABSID), 1,
7674 build_int_cstu (unsigned_type_node, dim));
7675 threadid = fold_convert (itype, threadid);
7676 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7677 true, GSI_SAME_STMT);
7679 tree startvar = fd.loops[dim].v;
7680 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7681 if (POINTER_TYPE_P (type))
7682 t = fold_build_pointer_plus (n1, t);
7683 else
7684 t = fold_build2 (PLUS_EXPR, type, t, n1);
7685 t = fold_convert (type, t);
7686 t = force_gimple_operand_gsi (&gsi, t,
7687 DECL_P (startvar)
7688 && TREE_ADDRESSABLE (startvar),
7689 NULL_TREE, true, GSI_SAME_STMT);
7690 gassign *assign_stmt = gimple_build_assign (startvar, t);
7691 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7693 /* Remove the omp for statement. */
7694 gsi = gsi_last_bb (kfor->entry);
7695 gsi_remove (&gsi, true);
7697 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7698 gsi = gsi_last_bb (kfor->cont);
7699 gcc_assert (!gsi_end_p (gsi)
7700 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7701 gsi_remove (&gsi, true);
7703 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7704 gsi = gsi_last_bb (kfor->exit);
7705 gcc_assert (!gsi_end_p (gsi)
7706 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7707 if (intra_group)
7708 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7709 gsi_remove (&gsi, true);
7711 /* Fixup the much simpler CFG. */
7712 remove_edge (find_edge (kfor->cont, body_bb));
7714 if (kfor->cont != body_bb)
7715 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7716 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7719 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7720 argument_decls. */
7722 struct grid_arg_decl_map
7724 tree old_arg;
7725 tree new_arg;
7728 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7729 pertaining to kernel function. */
7731 static tree
7732 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7734 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7735 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7736 tree t = *tp;
7738 if (t == adm->old_arg)
7739 *tp = adm->new_arg;
7740 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7741 return NULL_TREE;
7744 /* If TARGET region contains a kernel body for loop, remove its region from the
7745 TARGET and expand it in HSA gridified kernel fashion. */
7747 static void
7748 grid_expand_target_grid_body (struct omp_region *target)
7750 if (!hsa_gen_requested_p ())
7751 return;
7753 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7754 struct omp_region **pp;
7756 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7757 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7758 break;
7760 struct omp_region *gpukernel = *pp;
7762 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7763 if (!gpukernel)
7765 /* HSA cannot handle OACC stuff. */
7766 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7767 return;
7768 gcc_checking_assert (orig_child_fndecl);
7769 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7770 OMP_CLAUSE__GRIDDIM_));
7771 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7773 hsa_register_kernel (n);
7774 return;
7777 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7778 OMP_CLAUSE__GRIDDIM_));
7779 tree inside_block
7780 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7781 *pp = gpukernel->next;
7782 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7783 if ((*pp)->type == GIMPLE_OMP_FOR)
7784 break;
7786 struct omp_region *kfor = *pp;
7787 gcc_assert (kfor);
7788 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7789 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7790 *pp = kfor->next;
7791 if (kfor->inner)
7793 if (gimple_omp_for_grid_group_iter (for_stmt))
7795 struct omp_region **next_pp;
7796 for (pp = &kfor->inner; *pp; pp = next_pp)
7798 next_pp = &(*pp)->next;
7799 if ((*pp)->type != GIMPLE_OMP_FOR)
7800 continue;
7801 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7802 gcc_assert (gimple_omp_for_kind (inner)
7803 == GF_OMP_FOR_KIND_GRID_LOOP);
7804 grid_expand_omp_for_loop (*pp, true);
7805 *pp = (*pp)->next;
7806 next_pp = pp;
7809 expand_omp (kfor->inner);
7811 if (gpukernel->inner)
7812 expand_omp (gpukernel->inner);
7814 tree kern_fndecl = copy_node (orig_child_fndecl);
7815 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7816 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7817 tree tgtblock = gimple_block (tgt_stmt);
7818 tree fniniblock = make_node (BLOCK);
7819 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7820 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7821 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7822 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7823 DECL_INITIAL (kern_fndecl) = fniniblock;
7824 push_struct_function (kern_fndecl);
7825 cfun->function_end_locus = gimple_location (tgt_stmt);
7826 init_tree_ssa (cfun);
7827 pop_cfun ();
7829 /* Make sure to generate early debug for the function before
7830 outlining anything. */
7831 if (! gimple_in_ssa_p (cfun))
7832 (*debug_hooks->early_global_decl) (cfun->decl);
7834 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7835 gcc_assert (!DECL_CHAIN (old_parm_decl));
7836 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7837 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7838 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7839 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7840 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7841 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7842 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7843 kern_cfun->curr_properties = cfun->curr_properties;
7845 grid_expand_omp_for_loop (kfor, false);
7847 /* Remove the omp for statement. */
7848 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7849 gsi_remove (&gsi, true);
7850 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7851 return. */
7852 gsi = gsi_last_bb (gpukernel->exit);
7853 gcc_assert (!gsi_end_p (gsi)
7854 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7855 gimple *ret_stmt = gimple_build_return (NULL);
7856 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7857 gsi_remove (&gsi, true);
7859 /* Statements in the first BB in the target construct have been produced by
7860 target lowering and must be copied inside the GPUKERNEL, with the two
7861 exceptions of the first OMP statement and the OMP_DATA assignment
7862 statement. */
7863 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7864 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7865 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7866 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7867 !gsi_end_p (tsi); gsi_next (&tsi))
7869 gimple *stmt = gsi_stmt (tsi);
7870 if (is_gimple_omp (stmt))
7871 break;
7872 if (sender
7873 && is_gimple_assign (stmt)
7874 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7875 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7876 continue;
7877 gimple *copy = gimple_copy (stmt);
7878 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7879 gimple_set_block (copy, fniniblock);
7882 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7883 gpukernel->exit, inside_block);
7885 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7886 kcn->mark_force_output ();
7887 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7889 hsa_register_kernel (kcn, orig_child);
7891 cgraph_node::add_new_function (kern_fndecl, true);
7892 push_cfun (kern_cfun);
7893 cgraph_edge::rebuild_edges ();
7895 /* Re-map any mention of the PARM_DECL of the original function to the
7896 PARM_DECL of the new one.
7898 TODO: It would be great if lowering produced references into the GPU
7899 kernel decl straight away and we did not have to do this. */
7900 struct grid_arg_decl_map adm;
7901 adm.old_arg = old_parm_decl;
7902 adm.new_arg = new_parm_decl;
7903 basic_block bb;
7904 FOR_EACH_BB_FN (bb, kern_cfun)
7906 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7908 gimple *stmt = gsi_stmt (gsi);
7909 struct walk_stmt_info wi;
7910 memset (&wi, 0, sizeof (wi));
7911 wi.info = &adm;
7912 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7915 pop_cfun ();
7917 return;
7920 /* Expand the parallel region tree rooted at REGION. Expansion
7921 proceeds in depth-first order. Innermost regions are expanded
7922 first. This way, parallel regions that require a new function to
7923 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7924 internal dependencies in their body. */
7926 static void
7927 expand_omp (struct omp_region *region)
7929 omp_any_child_fn_dumped = false;
7930 while (region)
7932 location_t saved_location;
7933 gimple *inner_stmt = NULL;
7935 /* First, determine whether this is a combined parallel+workshare
7936 region. */
7937 if (region->type == GIMPLE_OMP_PARALLEL)
7938 determine_parallel_type (region);
7939 else if (region->type == GIMPLE_OMP_TARGET)
7940 grid_expand_target_grid_body (region);
7942 if (region->type == GIMPLE_OMP_FOR
7943 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7944 inner_stmt = last_stmt (region->inner->entry);
7946 if (region->inner)
7947 expand_omp (region->inner);
7949 saved_location = input_location;
7950 if (gimple_has_location (last_stmt (region->entry)))
7951 input_location = gimple_location (last_stmt (region->entry));
7953 switch (region->type)
7955 case GIMPLE_OMP_PARALLEL:
7956 case GIMPLE_OMP_TASK:
7957 expand_omp_taskreg (region);
7958 break;
7960 case GIMPLE_OMP_FOR:
7961 expand_omp_for (region, inner_stmt);
7962 break;
7964 case GIMPLE_OMP_SECTIONS:
7965 expand_omp_sections (region);
7966 break;
7968 case GIMPLE_OMP_SECTION:
7969 /* Individual omp sections are handled together with their
7970 parent GIMPLE_OMP_SECTIONS region. */
7971 break;
7973 case GIMPLE_OMP_SINGLE:
7974 expand_omp_single (region);
7975 break;
7977 case GIMPLE_OMP_ORDERED:
7979 gomp_ordered *ord_stmt
7980 = as_a <gomp_ordered *> (last_stmt (region->entry));
7981 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7982 OMP_CLAUSE_DEPEND))
7984 /* We'll expand these when expanding corresponding
7985 worksharing region with ordered(n) clause. */
7986 gcc_assert (region->outer
7987 && region->outer->type == GIMPLE_OMP_FOR);
7988 region->ord_stmt = ord_stmt;
7989 break;
7992 /* FALLTHRU */
7993 case GIMPLE_OMP_MASTER:
7994 case GIMPLE_OMP_TASKGROUP:
7995 case GIMPLE_OMP_CRITICAL:
7996 case GIMPLE_OMP_TEAMS:
7997 expand_omp_synch (region);
7998 break;
8000 case GIMPLE_OMP_ATOMIC_LOAD:
8001 expand_omp_atomic (region);
8002 break;
8004 case GIMPLE_OMP_TARGET:
8005 expand_omp_target (region);
8006 break;
8008 default:
8009 gcc_unreachable ();
8012 input_location = saved_location;
8013 region = region->next;
8015 if (omp_any_child_fn_dumped)
8017 if (dump_file)
8018 dump_function_header (dump_file, current_function_decl, dump_flags);
8019 omp_any_child_fn_dumped = false;
8023 /* Helper for build_omp_regions. Scan the dominator tree starting at
8024 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8025 true, the function ends once a single tree is built (otherwise, whole
8026 forest of OMP constructs may be built). */
8028 static void
8029 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8030 bool single_tree)
8032 gimple_stmt_iterator gsi;
8033 gimple *stmt;
8034 basic_block son;
8036 gsi = gsi_last_bb (bb);
8037 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8039 struct omp_region *region;
8040 enum gimple_code code;
8042 stmt = gsi_stmt (gsi);
8043 code = gimple_code (stmt);
8044 if (code == GIMPLE_OMP_RETURN)
8046 /* STMT is the return point out of region PARENT. Mark it
8047 as the exit point and make PARENT the immediately
8048 enclosing region. */
8049 gcc_assert (parent);
8050 region = parent;
8051 region->exit = bb;
8052 parent = parent->outer;
8054 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8056 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8057 GIMPLE_OMP_RETURN, but matches with
8058 GIMPLE_OMP_ATOMIC_LOAD. */
8059 gcc_assert (parent);
8060 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8061 region = parent;
8062 region->exit = bb;
8063 parent = parent->outer;
8065 else if (code == GIMPLE_OMP_CONTINUE)
8067 gcc_assert (parent);
8068 parent->cont = bb;
8070 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8072 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8073 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8075 else
8077 region = new_omp_region (bb, code, parent);
8078 /* Otherwise... */
8079 if (code == GIMPLE_OMP_TARGET)
8081 switch (gimple_omp_target_kind (stmt))
8083 case GF_OMP_TARGET_KIND_REGION:
8084 case GF_OMP_TARGET_KIND_DATA:
8085 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8086 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8087 case GF_OMP_TARGET_KIND_OACC_DATA:
8088 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8089 break;
8090 case GF_OMP_TARGET_KIND_UPDATE:
8091 case GF_OMP_TARGET_KIND_ENTER_DATA:
8092 case GF_OMP_TARGET_KIND_EXIT_DATA:
8093 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8094 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8095 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8096 /* ..., other than for those stand-alone directives... */
8097 region = NULL;
8098 break;
8099 default:
8100 gcc_unreachable ();
8103 else if (code == GIMPLE_OMP_ORDERED
8104 && omp_find_clause (gimple_omp_ordered_clauses
8105 (as_a <gomp_ordered *> (stmt)),
8106 OMP_CLAUSE_DEPEND))
8107 /* #pragma omp ordered depend is also just a stand-alone
8108 directive. */
8109 region = NULL;
8110 /* ..., this directive becomes the parent for a new region. */
8111 if (region)
8112 parent = region;
8116 if (single_tree && !parent)
8117 return;
8119 for (son = first_dom_son (CDI_DOMINATORS, bb);
8120 son;
8121 son = next_dom_son (CDI_DOMINATORS, son))
8122 build_omp_regions_1 (son, parent, single_tree);
8125 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8126 root_omp_region. */
8128 static void
8129 build_omp_regions_root (basic_block root)
8131 gcc_assert (root_omp_region == NULL);
8132 build_omp_regions_1 (root, NULL, true);
8133 gcc_assert (root_omp_region != NULL);
8136 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8138 void
8139 omp_expand_local (basic_block head)
8141 build_omp_regions_root (head);
8142 if (dump_file && (dump_flags & TDF_DETAILS))
8144 fprintf (dump_file, "\nOMP region tree\n\n");
8145 dump_omp_region (dump_file, root_omp_region, 0);
8146 fprintf (dump_file, "\n");
8149 remove_exit_barriers (root_omp_region);
8150 expand_omp (root_omp_region);
8152 omp_free_regions ();
8155 /* Scan the CFG and build a tree of OMP regions. Return the root of
8156 the OMP region tree. */
8158 static void
8159 build_omp_regions (void)
8161 gcc_assert (root_omp_region == NULL);
8162 calculate_dominance_info (CDI_DOMINATORS);
8163 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8166 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8168 static unsigned int
8169 execute_expand_omp (void)
8171 build_omp_regions ();
8173 if (!root_omp_region)
8174 return 0;
8176 if (dump_file)
8178 fprintf (dump_file, "\nOMP region tree\n\n");
8179 dump_omp_region (dump_file, root_omp_region, 0);
8180 fprintf (dump_file, "\n");
8183 remove_exit_barriers (root_omp_region);
8185 expand_omp (root_omp_region);
8187 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8188 verify_loop_structure ();
8189 cleanup_tree_cfg ();
8191 omp_free_regions ();
8193 return 0;
8196 /* OMP expansion -- the default pass, run before creation of SSA form. */
8198 namespace {
8200 const pass_data pass_data_expand_omp =
8202 GIMPLE_PASS, /* type */
8203 "ompexp", /* name */
8204 OPTGROUP_OMP, /* optinfo_flags */
8205 TV_NONE, /* tv_id */
8206 PROP_gimple_any, /* properties_required */
8207 PROP_gimple_eomp, /* properties_provided */
8208 0, /* properties_destroyed */
8209 0, /* todo_flags_start */
8210 0, /* todo_flags_finish */
8213 class pass_expand_omp : public gimple_opt_pass
8215 public:
8216 pass_expand_omp (gcc::context *ctxt)
8217 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8220 /* opt_pass methods: */
8221 virtual unsigned int execute (function *)
8223 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8224 || flag_openmp_simd != 0)
8225 && !seen_error ());
8227 /* This pass always runs, to provide PROP_gimple_eomp.
8228 But often, there is nothing to do. */
8229 if (!gate)
8230 return 0;
8232 return execute_expand_omp ();
8235 }; // class pass_expand_omp
8237 } // anon namespace
8239 gimple_opt_pass *
8240 make_pass_expand_omp (gcc::context *ctxt)
8242 return new pass_expand_omp (ctxt);
8245 namespace {
8247 const pass_data pass_data_expand_omp_ssa =
8249 GIMPLE_PASS, /* type */
8250 "ompexpssa", /* name */
8251 OPTGROUP_OMP, /* optinfo_flags */
8252 TV_NONE, /* tv_id */
8253 PROP_cfg | PROP_ssa, /* properties_required */
8254 PROP_gimple_eomp, /* properties_provided */
8255 0, /* properties_destroyed */
8256 0, /* todo_flags_start */
8257 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8260 class pass_expand_omp_ssa : public gimple_opt_pass
8262 public:
8263 pass_expand_omp_ssa (gcc::context *ctxt)
8264 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8267 /* opt_pass methods: */
8268 virtual bool gate (function *fun)
8270 return !(fun->curr_properties & PROP_gimple_eomp);
8272 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8273 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8275 }; // class pass_expand_omp_ssa
8277 } // anon namespace
8279 gimple_opt_pass *
8280 make_pass_expand_omp_ssa (gcc::context *ctxt)
8282 return new pass_expand_omp_ssa (ctxt);
8285 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8286 GIMPLE_* codes. */
8288 bool
8289 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8290 int *region_idx)
8292 gimple *last = last_stmt (bb);
8293 enum gimple_code code = gimple_code (last);
8294 struct omp_region *cur_region = *region;
8295 bool fallthru = false;
8297 switch (code)
8299 case GIMPLE_OMP_PARALLEL:
8300 case GIMPLE_OMP_TASK:
8301 case GIMPLE_OMP_FOR:
8302 case GIMPLE_OMP_SINGLE:
8303 case GIMPLE_OMP_TEAMS:
8304 case GIMPLE_OMP_MASTER:
8305 case GIMPLE_OMP_TASKGROUP:
8306 case GIMPLE_OMP_CRITICAL:
8307 case GIMPLE_OMP_SECTION:
8308 case GIMPLE_OMP_GRID_BODY:
8309 cur_region = new_omp_region (bb, code, cur_region);
8310 fallthru = true;
8311 break;
8313 case GIMPLE_OMP_ORDERED:
8314 cur_region = new_omp_region (bb, code, cur_region);
8315 fallthru = true;
8316 if (omp_find_clause (gimple_omp_ordered_clauses
8317 (as_a <gomp_ordered *> (last)),
8318 OMP_CLAUSE_DEPEND))
8319 cur_region = cur_region->outer;
8320 break;
8322 case GIMPLE_OMP_TARGET:
8323 cur_region = new_omp_region (bb, code, cur_region);
8324 fallthru = true;
8325 switch (gimple_omp_target_kind (last))
8327 case GF_OMP_TARGET_KIND_REGION:
8328 case GF_OMP_TARGET_KIND_DATA:
8329 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8330 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8331 case GF_OMP_TARGET_KIND_OACC_DATA:
8332 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8333 break;
8334 case GF_OMP_TARGET_KIND_UPDATE:
8335 case GF_OMP_TARGET_KIND_ENTER_DATA:
8336 case GF_OMP_TARGET_KIND_EXIT_DATA:
8337 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8338 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8339 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8340 cur_region = cur_region->outer;
8341 break;
8342 default:
8343 gcc_unreachable ();
8345 break;
8347 case GIMPLE_OMP_SECTIONS:
8348 cur_region = new_omp_region (bb, code, cur_region);
8349 fallthru = true;
8350 break;
8352 case GIMPLE_OMP_SECTIONS_SWITCH:
8353 fallthru = false;
8354 break;
8356 case GIMPLE_OMP_ATOMIC_LOAD:
8357 case GIMPLE_OMP_ATOMIC_STORE:
8358 fallthru = true;
8359 break;
8361 case GIMPLE_OMP_RETURN:
8362 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8363 somewhere other than the next block. This will be
8364 created later. */
8365 cur_region->exit = bb;
8366 if (cur_region->type == GIMPLE_OMP_TASK)
8367 /* Add an edge corresponding to not scheduling the task
8368 immediately. */
8369 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8370 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8371 cur_region = cur_region->outer;
8372 break;
8374 case GIMPLE_OMP_CONTINUE:
8375 cur_region->cont = bb;
8376 switch (cur_region->type)
8378 case GIMPLE_OMP_FOR:
8379 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8380 succs edges as abnormal to prevent splitting
8381 them. */
8382 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8383 /* Make the loopback edge. */
8384 make_edge (bb, single_succ (cur_region->entry),
8385 EDGE_ABNORMAL);
8387 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8388 corresponds to the case that the body of the loop
8389 is not executed at all. */
8390 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8391 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8392 fallthru = false;
8393 break;
8395 case GIMPLE_OMP_SECTIONS:
8396 /* Wire up the edges into and out of the nested sections. */
8398 basic_block switch_bb = single_succ (cur_region->entry);
8400 struct omp_region *i;
8401 for (i = cur_region->inner; i ; i = i->next)
8403 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8404 make_edge (switch_bb, i->entry, 0);
8405 make_edge (i->exit, bb, EDGE_FALLTHRU);
8408 /* Make the loopback edge to the block with
8409 GIMPLE_OMP_SECTIONS_SWITCH. */
8410 make_edge (bb, switch_bb, 0);
8412 /* Make the edge from the switch to exit. */
8413 make_edge (switch_bb, bb->next_bb, 0);
8414 fallthru = false;
8416 break;
8418 case GIMPLE_OMP_TASK:
8419 fallthru = true;
8420 break;
8422 default:
8423 gcc_unreachable ();
8425 break;
8427 default:
8428 gcc_unreachable ();
8431 if (*region != cur_region)
8433 *region = cur_region;
8434 if (cur_region)
8435 *region_idx = cur_region->entry->index;
8436 else
8437 *region_idx = 0;
8440 return fallthru;
8443 #include "gt-omp-expand.h"