[testsuite] Fix FAIL: gcc.dg/lto/pr69188 on bare-metal targets
[official-gcc.git] / gcc / omp-expand.c
blobd3891e3175e1554c11b9c0db2c29699c8e907dff
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
66 struct omp_region
68 /* The enclosing region. */
69 struct omp_region *outer;
71 /* First child region. */
72 struct omp_region *inner;
74 /* Next peer region. */
75 struct omp_region *next;
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
116 /* Return true if REGION is a combined parallel+workshare region. */
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
121 return region->is_combined_parallel;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
138 Is lowered into:
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180 return false;
181 if (fd.iter_type != long_integer_type_node)
182 return false;
184 /* FIXME. We give up too easily here. If any of these arguments
185 are not constants, they will likely involve variables that have
186 been mapped into fields of .omp_data_s for sharing with the child
187 function. With appropriate data flow, it would be possible to
188 see through this. */
189 if (!is_gimple_min_invariant (fd.loop.n1)
190 || !is_gimple_min_invariant (fd.loop.n2)
191 || !is_gimple_min_invariant (fd.loop.step)
192 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193 return false;
195 return true;
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199 presence (SIMD_SCHEDULE). */
201 static tree
202 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 if (!simd_schedule)
205 return chunk_size;
207 int vf = omp_max_vf ();
208 if (vf == 1)
209 return chunk_size;
211 tree type = TREE_TYPE (chunk_size);
212 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213 build_int_cst (type, vf - 1));
214 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215 build_int_cst (type, -vf));
218 /* Collect additional arguments needed to emit a combined
219 parallel+workshare call. WS_STMT is the workshare directive being
220 expanded. */
222 static vec<tree, va_gc> *
223 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 tree t;
226 location_t loc = gimple_location (ws_stmt);
227 vec<tree, va_gc> *ws_args;
229 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231 struct omp_for_data fd;
232 tree n1, n2;
234 omp_extract_for_data (for_stmt, &fd, NULL);
235 n1 = fd.loop.n1;
236 n2 = fd.loop.n2;
238 if (gimple_omp_for_combined_into_p (for_stmt))
240 tree innerc
241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242 OMP_CLAUSE__LOOPTEMP_);
243 gcc_assert (innerc);
244 n1 = OMP_CLAUSE_DECL (innerc);
245 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246 OMP_CLAUSE__LOOPTEMP_);
247 gcc_assert (innerc);
248 n2 = OMP_CLAUSE_DECL (innerc);
251 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253 t = fold_convert_loc (loc, long_integer_type_node, n1);
254 ws_args->quick_push (t);
256 t = fold_convert_loc (loc, long_integer_type_node, n2);
257 ws_args->quick_push (t);
259 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260 ws_args->quick_push (t);
262 if (fd.chunk_size)
264 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265 t = omp_adjust_chunk_size (t, fd.simd_schedule);
266 ws_args->quick_push (t);
269 return ws_args;
271 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273 /* Number of sections is equal to the number of edges from the
274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 the exit of the sections region. */
276 basic_block bb = single_succ (gimple_bb (ws_stmt));
277 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278 vec_alloc (ws_args, 1);
279 ws_args->quick_push (t);
280 return ws_args;
283 gcc_unreachable ();
286 /* Discover whether REGION is a combined parallel+workshare region. */
288 static void
289 determine_parallel_type (struct omp_region *region)
291 basic_block par_entry_bb, par_exit_bb;
292 basic_block ws_entry_bb, ws_exit_bb;
294 if (region == NULL || region->inner == NULL
295 || region->exit == NULL || region->inner->exit == NULL
296 || region->inner->cont == NULL)
297 return;
299 /* We only support parallel+for and parallel+sections. */
300 if (region->type != GIMPLE_OMP_PARALLEL
301 || (region->inner->type != GIMPLE_OMP_FOR
302 && region->inner->type != GIMPLE_OMP_SECTIONS))
303 return;
305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306 WS_EXIT_BB -> PAR_EXIT_BB. */
307 par_entry_bb = region->entry;
308 par_exit_bb = region->exit;
309 ws_entry_bb = region->inner->entry;
310 ws_exit_bb = region->inner->exit;
312 if (single_succ (par_entry_bb) == ws_entry_bb
313 && single_succ (ws_exit_bb) == par_exit_bb
314 && workshare_safe_to_combine_p (ws_entry_bb)
315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316 || (last_and_only_stmt (ws_entry_bb)
317 && last_and_only_stmt (par_exit_bb))))
319 gimple *par_stmt = last_stmt (par_entry_bb);
320 gimple *ws_stmt = last_stmt (ws_entry_bb);
322 if (region->inner->type == GIMPLE_OMP_FOR)
324 /* If this is a combined parallel loop, we need to determine
325 whether or not to use the combined library calls. There
326 are two cases where we do not apply the transformation:
327 static loops and any kind of ordered loop. In the first
328 case, we already open code the loop so there is no need
329 to do anything else. In the latter case, the combined
330 parallel loop call would still need extra synchronization
331 to implement ordered semantics, so there would not be any
332 gain in using the combined call. */
333 tree clauses = gimple_omp_for_clauses (ws_stmt);
334 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335 if (c == NULL
336 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337 == OMP_CLAUSE_SCHEDULE_STATIC)
338 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 region->is_combined_parallel = false;
341 region->inner->is_combined_parallel = false;
342 return;
346 region->is_combined_parallel = true;
347 region->inner->is_combined_parallel = true;
348 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
352 /* Debugging dumps for parallel regions. */
353 void dump_omp_region (FILE *, struct omp_region *, int);
354 void debug_omp_region (struct omp_region *);
355 void debug_all_omp_regions (void);
357 /* Dump the parallel region tree rooted at REGION. */
359 void
360 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363 gimple_code_name[region->type]);
365 if (region->inner)
366 dump_omp_region (file, region->inner, indent + 4);
368 if (region->cont)
370 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371 region->cont->index);
374 if (region->exit)
375 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376 region->exit->index);
377 else
378 fprintf (file, "%*s[no exit marker]\n", indent, "");
380 if (region->next)
381 dump_omp_region (file, region->next, indent);
384 DEBUG_FUNCTION void
385 debug_omp_region (struct omp_region *region)
387 dump_omp_region (stderr, region, 0);
390 DEBUG_FUNCTION void
391 debug_all_omp_regions (void)
393 dump_omp_region (stderr, root_omp_region, 0);
396 /* Create a new parallel region starting at STMT inside region PARENT. */
398 static struct omp_region *
399 new_omp_region (basic_block bb, enum gimple_code type,
400 struct omp_region *parent)
402 struct omp_region *region = XCNEW (struct omp_region);
404 region->outer = parent;
405 region->entry = bb;
406 region->type = type;
408 if (parent)
410 /* This is a nested region. Add it to the list of inner
411 regions in PARENT. */
412 region->next = parent->inner;
413 parent->inner = region;
415 else
417 /* This is a toplevel region. Add it to the list of toplevel
418 regions in ROOT_OMP_REGION. */
419 region->next = root_omp_region;
420 root_omp_region = region;
423 return region;
426 /* Release the memory associated with the region tree rooted at REGION. */
428 static void
429 free_omp_region_1 (struct omp_region *region)
431 struct omp_region *i, *n;
433 for (i = region->inner; i ; i = n)
435 n = i->next;
436 free_omp_region_1 (i);
439 free (region);
442 /* Release the memory for the entire omp region tree. */
444 void
445 omp_free_regions (void)
447 struct omp_region *r, *n;
448 for (r = root_omp_region; r ; r = n)
450 n = r->next;
451 free_omp_region_1 (r);
453 root_omp_region = NULL;
456 /* A convenience function to build an empty GIMPLE_COND with just the
457 condition. */
459 static gcond *
460 gimple_build_cond_empty (tree cond)
462 enum tree_code pred_code;
463 tree lhs, rhs;
465 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
469 /* Return true if a parallel REGION is within a declare target function or
470 within a target region and is not a part of a gridified target. */
472 static bool
473 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 bool indirect = false;
476 for (region = region->outer; region; region = region->outer)
478 if (region->type == GIMPLE_OMP_PARALLEL)
479 indirect = true;
480 else if (region->type == GIMPLE_OMP_TARGET)
482 gomp_target *tgt_stmt
483 = as_a <gomp_target *> (last_stmt (region->entry));
485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486 OMP_CLAUSE__GRIDDIM_))
487 return indirect;
488 else
489 return true;
493 if (lookup_attribute ("omp declare target",
494 DECL_ATTRIBUTES (current_function_decl)))
495 return true;
497 return false;
500 /* Build the function calls to GOMP_parallel_start etc to actually
501 generate the parallel operation. REGION is the parallel region
502 being expanded. BB is the block where to insert the code. WS_ARGS
503 will be set if this is a call to a combined parallel+workshare
504 construct, it contains the list of additional arguments needed by
505 the workshare construct. */
507 static void
508 expand_parallel_call (struct omp_region *region, basic_block bb,
509 gomp_parallel *entry_stmt,
510 vec<tree, va_gc> *ws_args)
512 tree t, t1, t2, val, cond, c, clauses, flags;
513 gimple_stmt_iterator gsi;
514 gimple *stmt;
515 enum built_in_function start_ix;
516 int start_ix2;
517 location_t clause_loc;
518 vec<tree, va_gc> *args;
520 clauses = gimple_omp_parallel_clauses (entry_stmt);
522 /* Determine what flavor of GOMP_parallel we will be
523 emitting. */
524 start_ix = BUILT_IN_GOMP_PARALLEL;
525 if (is_combined_parallel (region))
527 switch (region->inner->type)
529 case GIMPLE_OMP_FOR:
530 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
531 switch (region->inner->sched_kind)
533 case OMP_CLAUSE_SCHEDULE_RUNTIME:
534 start_ix2 = 3;
535 break;
536 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
537 case OMP_CLAUSE_SCHEDULE_GUIDED:
538 if (region->inner->sched_modifiers
539 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
541 start_ix2 = 3 + region->inner->sched_kind;
542 break;
544 /* FALLTHRU */
545 default:
546 start_ix2 = region->inner->sched_kind;
547 break;
549 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
550 start_ix = (enum built_in_function) start_ix2;
551 break;
552 case GIMPLE_OMP_SECTIONS:
553 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
554 break;
555 default:
556 gcc_unreachable ();
560 /* By default, the value of NUM_THREADS is zero (selected at run time)
561 and there is no conditional. */
562 cond = NULL_TREE;
563 val = build_int_cst (unsigned_type_node, 0);
564 flags = build_int_cst (unsigned_type_node, 0);
566 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
567 if (c)
568 cond = OMP_CLAUSE_IF_EXPR (c);
570 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
571 if (c)
573 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
574 clause_loc = OMP_CLAUSE_LOCATION (c);
576 else
577 clause_loc = gimple_location (entry_stmt);
579 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
580 if (c)
581 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
583 /* Ensure 'val' is of the correct type. */
584 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
586 /* If we found the clause 'if (cond)', build either
587 (cond != 0) or (cond ? val : 1u). */
588 if (cond)
590 cond = gimple_boolify (cond);
592 if (integer_zerop (val))
593 val = fold_build2_loc (clause_loc,
594 EQ_EXPR, unsigned_type_node, cond,
595 build_int_cst (TREE_TYPE (cond), 0));
596 else
598 basic_block cond_bb, then_bb, else_bb;
599 edge e, e_then, e_else;
600 tree tmp_then, tmp_else, tmp_join, tmp_var;
602 tmp_var = create_tmp_var (TREE_TYPE (val));
603 if (gimple_in_ssa_p (cfun))
605 tmp_then = make_ssa_name (tmp_var);
606 tmp_else = make_ssa_name (tmp_var);
607 tmp_join = make_ssa_name (tmp_var);
609 else
611 tmp_then = tmp_var;
612 tmp_else = tmp_var;
613 tmp_join = tmp_var;
616 e = split_block_after_labels (bb);
617 cond_bb = e->src;
618 bb = e->dest;
619 remove_edge (e);
621 then_bb = create_empty_bb (cond_bb);
622 else_bb = create_empty_bb (then_bb);
623 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
624 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
626 stmt = gimple_build_cond_empty (cond);
627 gsi = gsi_start_bb (cond_bb);
628 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
630 gsi = gsi_start_bb (then_bb);
631 expand_omp_build_assign (&gsi, tmp_then, val, true);
633 gsi = gsi_start_bb (else_bb);
634 expand_omp_build_assign (&gsi, tmp_else,
635 build_int_cst (unsigned_type_node, 1),
636 true);
638 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
639 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
640 add_bb_to_loop (then_bb, cond_bb->loop_father);
641 add_bb_to_loop (else_bb, cond_bb->loop_father);
642 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
643 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
645 if (gimple_in_ssa_p (cfun))
647 gphi *phi = create_phi_node (tmp_join, bb);
648 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
649 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
652 val = tmp_join;
655 gsi = gsi_start_bb (bb);
656 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
657 false, GSI_CONTINUE_LINKING);
660 gsi = gsi_last_bb (bb);
661 t = gimple_omp_parallel_data_arg (entry_stmt);
662 if (t == NULL)
663 t1 = null_pointer_node;
664 else
665 t1 = build_fold_addr_expr (t);
666 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
667 t2 = build_fold_addr_expr (child_fndecl);
669 vec_alloc (args, 4 + vec_safe_length (ws_args));
670 args->quick_push (t2);
671 args->quick_push (t1);
672 args->quick_push (val);
673 if (ws_args)
674 args->splice (*ws_args);
675 args->quick_push (flags);
677 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
678 builtin_decl_explicit (start_ix), args);
680 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
681 false, GSI_CONTINUE_LINKING);
683 if (hsa_gen_requested_p ()
684 && parallel_needs_hsa_kernel_p (region))
686 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
687 hsa_register_kernel (child_cnode);
691 /* Insert a function call whose name is FUNC_NAME with the information from
692 ENTRY_STMT into the basic_block BB. */
694 static void
695 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
696 vec <tree, va_gc> *ws_args)
698 tree t, t1, t2;
699 gimple_stmt_iterator gsi;
700 vec <tree, va_gc> *args;
702 gcc_assert (vec_safe_length (ws_args) == 2);
703 tree func_name = (*ws_args)[0];
704 tree grain = (*ws_args)[1];
706 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
707 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
708 gcc_assert (count != NULL_TREE);
709 count = OMP_CLAUSE_OPERAND (count, 0);
711 gsi = gsi_last_bb (bb);
712 t = gimple_omp_parallel_data_arg (entry_stmt);
713 if (t == NULL)
714 t1 = null_pointer_node;
715 else
716 t1 = build_fold_addr_expr (t);
717 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
719 vec_alloc (args, 4);
720 args->quick_push (t2);
721 args->quick_push (t1);
722 args->quick_push (count);
723 args->quick_push (grain);
724 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
726 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
727 GSI_CONTINUE_LINKING);
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 else
787 num_tasks = integer_zero_node;
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
799 tree flags = build_int_cst (unsigned_type_node, iflags);
801 tree cond = boolean_true_node;
802 if (ifc)
804 if (taskloop_p)
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
818 if (finalc)
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
837 gsi = gsi_last_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
874 tree chain = NULL_TREE, t;
875 unsigned ix;
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879 DECL_CHAIN (t) = chain;
880 chain = t;
883 return chain;
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
892 static void
893 remove_exit_barrier (struct omp_region *region)
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
902 exit_bb = region->exit;
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
922 gsi = gsi_last_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
949 any_addressable_vars = 1;
950 break;
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
963 any_addressable_vars = 1;
964 break;
966 if (block == gimple_block (parallel_stmt))
967 break;
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
976 static void
977 remove_exit_barriers (struct omp_region *region)
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
982 if (region->inner)
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
988 region = region->next;
989 remove_exit_barriers (region);
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1027 tree built_in;
1029 if (DECL_NAME (decl) == thr_num_id)
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1054 gimple_call_set_fndecl (call, built_in);
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065 tree t = *tp;
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1124 bool is_cilk_for
1125 = (flag_cilkplus
1126 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1127 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1128 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1130 if (is_cilk_for)
1131 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1132 and the inner statement contains the name of the built-in function
1133 and grain. */
1134 ws_args = region->inner->ws_args;
1135 else if (is_combined_parallel (region))
1136 ws_args = region->ws_args;
1137 else
1138 ws_args = NULL;
1140 if (child_cfun->cfg)
1142 /* Due to inlining, it may happen that we have already outlined
1143 the region, in which case all we need to do is make the
1144 sub-graph unreachable and emit the parallel call. */
1145 edge entry_succ_e, exit_succ_e;
1147 entry_succ_e = single_succ_edge (entry_bb);
1149 gsi = gsi_last_bb (entry_bb);
1150 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1151 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1152 gsi_remove (&gsi, true);
1154 new_bb = entry_bb;
1155 if (exit_bb)
1157 exit_succ_e = single_succ_edge (exit_bb);
1158 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1160 remove_edge_and_dominated_blocks (entry_succ_e);
1162 else
1164 unsigned srcidx, dstidx, num;
1166 /* If the parallel region needs data sent from the parent
1167 function, then the very first statement (except possible
1168 tree profile counter updates) of the parallel body
1169 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1170 &.OMP_DATA_O is passed as an argument to the child function,
1171 we need to replace it with the argument as seen by the child
1172 function.
1174 In most cases, this will end up being the identity assignment
1175 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1176 a function call that has been inlined, the original PARM_DECL
1177 .OMP_DATA_I may have been converted into a different local
1178 variable. In which case, we need to keep the assignment. */
1179 if (gimple_omp_taskreg_data_arg (entry_stmt))
1181 basic_block entry_succ_bb
1182 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1183 : FALLTHRU_EDGE (entry_bb)->dest;
1184 tree arg;
1185 gimple *parcopy_stmt = NULL;
1187 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1189 gimple *stmt;
1191 gcc_assert (!gsi_end_p (gsi));
1192 stmt = gsi_stmt (gsi);
1193 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1194 continue;
1196 if (gimple_num_ops (stmt) == 2)
1198 tree arg = gimple_assign_rhs1 (stmt);
1200 /* We're ignore the subcode because we're
1201 effectively doing a STRIP_NOPS. */
1203 if (TREE_CODE (arg) == ADDR_EXPR
1204 && TREE_OPERAND (arg, 0)
1205 == gimple_omp_taskreg_data_arg (entry_stmt))
1207 parcopy_stmt = stmt;
1208 break;
1213 gcc_assert (parcopy_stmt != NULL);
1214 arg = DECL_ARGUMENTS (child_fn);
1216 if (!gimple_in_ssa_p (cfun))
1218 if (gimple_assign_lhs (parcopy_stmt) == arg)
1219 gsi_remove (&gsi, true);
1220 else
1222 /* ?? Is setting the subcode really necessary ?? */
1223 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1224 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 else
1229 tree lhs = gimple_assign_lhs (parcopy_stmt);
1230 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1231 /* We'd like to set the rhs to the default def in the child_fn,
1232 but it's too early to create ssa names in the child_fn.
1233 Instead, we set the rhs to the parm. In
1234 move_sese_region_to_fn, we introduce a default def for the
1235 parm, map the parm to it's default def, and once we encounter
1236 this stmt, replace the parm with the default def. */
1237 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1238 update_stmt (parcopy_stmt);
1242 /* Declare local variables needed in CHILD_CFUN. */
1243 block = DECL_INITIAL (child_fn);
1244 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1245 /* The gimplifier could record temporaries in parallel/task block
1246 rather than in containing function's local_decls chain,
1247 which would mean cgraph missed finalizing them. Do it now. */
1248 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1249 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1250 varpool_node::finalize_decl (t);
1251 DECL_SAVED_TREE (child_fn) = NULL;
1252 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1253 gimple_set_body (child_fn, NULL);
1254 TREE_USED (block) = 1;
1256 /* Reset DECL_CONTEXT on function arguments. */
1257 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1258 DECL_CONTEXT (t) = child_fn;
1260 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1261 so that it can be moved to the child function. */
1262 gsi = gsi_last_bb (entry_bb);
1263 stmt = gsi_stmt (gsi);
1264 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1265 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1266 e = split_block (entry_bb, stmt);
1267 gsi_remove (&gsi, true);
1268 entry_bb = e->dest;
1269 edge e2 = NULL;
1270 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1271 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1272 else
1274 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1275 gcc_assert (e2->dest == region->exit);
1276 remove_edge (BRANCH_EDGE (entry_bb));
1277 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1278 gsi = gsi_last_bb (region->exit);
1279 gcc_assert (!gsi_end_p (gsi)
1280 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1281 gsi_remove (&gsi, true);
1284 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1285 if (exit_bb)
1287 gsi = gsi_last_bb (exit_bb);
1288 gcc_assert (!gsi_end_p (gsi)
1289 && (gimple_code (gsi_stmt (gsi))
1290 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1291 stmt = gimple_build_return (NULL);
1292 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1293 gsi_remove (&gsi, true);
1296 /* Move the parallel region into CHILD_CFUN. */
1298 if (gimple_in_ssa_p (cfun))
1300 init_tree_ssa (child_cfun);
1301 init_ssa_operands (child_cfun);
1302 child_cfun->gimple_df->in_ssa_p = true;
1303 block = NULL_TREE;
1305 else
1306 block = gimple_block (entry_stmt);
1308 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1309 if (exit_bb)
1310 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1311 if (e2)
1313 basic_block dest_bb = e2->dest;
1314 if (!exit_bb)
1315 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1316 remove_edge (e2);
1317 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1319 /* When the OMP expansion process cannot guarantee an up-to-date
1320 loop tree arrange for the child function to fixup loops. */
1321 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1322 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1324 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1325 num = vec_safe_length (child_cfun->local_decls);
1326 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1328 t = (*child_cfun->local_decls)[srcidx];
1329 if (DECL_CONTEXT (t) == cfun->decl)
1330 continue;
1331 if (srcidx != dstidx)
1332 (*child_cfun->local_decls)[dstidx] = t;
1333 dstidx++;
1335 if (dstidx != num)
1336 vec_safe_truncate (child_cfun->local_decls, dstidx);
1338 /* Inform the callgraph about the new function. */
1339 child_cfun->curr_properties = cfun->curr_properties;
1340 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1341 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1342 cgraph_node *node = cgraph_node::get_create (child_fn);
1343 node->parallelized_function = 1;
1344 cgraph_node::add_new_function (child_fn, true);
1346 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1347 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1349 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1350 fixed in a following pass. */
1351 push_cfun (child_cfun);
1352 if (need_asm)
1353 assign_assembler_name_if_neeeded (child_fn);
1355 if (optimize)
1356 optimize_omp_library_calls (entry_stmt);
1357 cgraph_edge::rebuild_edges ();
1359 /* Some EH regions might become dead, see PR34608. If
1360 pass_cleanup_cfg isn't the first pass to happen with the
1361 new child, these dead EH edges might cause problems.
1362 Clean them up now. */
1363 if (flag_exceptions)
1365 basic_block bb;
1366 bool changed = false;
1368 FOR_EACH_BB_FN (bb, cfun)
1369 changed |= gimple_purge_dead_eh_edges (bb);
1370 if (changed)
1371 cleanup_tree_cfg ();
1373 if (gimple_in_ssa_p (cfun))
1374 update_ssa (TODO_update_ssa);
1375 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1376 verify_loop_structure ();
1377 pop_cfun ();
1379 if (dump_file && !gimple_in_ssa_p (cfun))
1381 omp_any_child_fn_dumped = true;
1382 dump_function_header (dump_file, child_fn, dump_flags);
1383 dump_function_to_file (child_fn, dump_file, dump_flags);
1387 /* Emit a library call to launch the children threads. */
1388 if (is_cilk_for)
1389 expand_cilk_for_call (new_bb,
1390 as_a <gomp_parallel *> (entry_stmt), ws_args);
1391 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1392 expand_parallel_call (region, new_bb,
1393 as_a <gomp_parallel *> (entry_stmt), ws_args);
1394 else
1395 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1396 if (gimple_in_ssa_p (cfun))
1397 update_ssa (TODO_update_ssa_only_virtuals);
1400 /* Information about members of an OpenACC collapsed loop nest. */
1402 struct oacc_collapse
1404 tree base; /* Base value. */
1405 tree iters; /* Number of steps. */
1406 tree step; /* step size. */
1409 /* Helper for expand_oacc_for. Determine collapsed loop information.
1410 Fill in COUNTS array. Emit any initialization code before GSI.
1411 Return the calculated outer loop bound of BOUND_TYPE. */
1413 static tree
1414 expand_oacc_collapse_init (const struct omp_for_data *fd,
1415 gimple_stmt_iterator *gsi,
1416 oacc_collapse *counts, tree bound_type)
1418 tree total = build_int_cst (bound_type, 1);
1419 int ix;
1421 gcc_assert (integer_onep (fd->loop.step));
1422 gcc_assert (integer_zerop (fd->loop.n1));
1424 for (ix = 0; ix != fd->collapse; ix++)
1426 const omp_for_data_loop *loop = &fd->loops[ix];
1428 tree iter_type = TREE_TYPE (loop->v);
1429 tree diff_type = iter_type;
1430 tree plus_type = iter_type;
1432 gcc_assert (loop->cond_code == fd->loop.cond_code);
1434 if (POINTER_TYPE_P (iter_type))
1435 plus_type = sizetype;
1436 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1437 diff_type = signed_type_for (diff_type);
1439 tree b = loop->n1;
1440 tree e = loop->n2;
1441 tree s = loop->step;
1442 bool up = loop->cond_code == LT_EXPR;
1443 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1444 bool negating;
1445 tree expr;
1447 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1448 true, GSI_SAME_STMT);
1449 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1450 true, GSI_SAME_STMT);
1452 /* Convert the step, avoiding possible unsigned->signed overflow. */
1453 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1454 if (negating)
1455 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1456 s = fold_convert (diff_type, s);
1457 if (negating)
1458 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1459 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1460 true, GSI_SAME_STMT);
1462 /* Determine the range, avoiding possible unsigned->signed overflow. */
1463 negating = !up && TYPE_UNSIGNED (iter_type);
1464 expr = fold_build2 (MINUS_EXPR, plus_type,
1465 fold_convert (plus_type, negating ? b : e),
1466 fold_convert (plus_type, negating ? e : b));
1467 expr = fold_convert (diff_type, expr);
1468 if (negating)
1469 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1470 tree range = force_gimple_operand_gsi
1471 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1473 /* Determine number of iterations. */
1474 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1475 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1476 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1478 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1479 true, GSI_SAME_STMT);
1481 counts[ix].base = b;
1482 counts[ix].iters = iters;
1483 counts[ix].step = s;
1485 total = fold_build2 (MULT_EXPR, bound_type, total,
1486 fold_convert (bound_type, iters));
1489 return total;
1492 /* Emit initializers for collapsed loop members. IVAR is the outer
1493 loop iteration variable, from which collapsed loop iteration values
1494 are calculated. COUNTS array has been initialized by
1495 expand_oacc_collapse_inits. */
1497 static void
1498 expand_oacc_collapse_vars (const struct omp_for_data *fd,
1499 gimple_stmt_iterator *gsi,
1500 const oacc_collapse *counts, tree ivar)
1502 tree ivar_type = TREE_TYPE (ivar);
1504 /* The most rapidly changing iteration variable is the innermost
1505 one. */
1506 for (int ix = fd->collapse; ix--;)
1508 const omp_for_data_loop *loop = &fd->loops[ix];
1509 const oacc_collapse *collapse = &counts[ix];
1510 tree iter_type = TREE_TYPE (loop->v);
1511 tree diff_type = TREE_TYPE (collapse->step);
1512 tree plus_type = iter_type;
1513 enum tree_code plus_code = PLUS_EXPR;
1514 tree expr;
1516 if (POINTER_TYPE_P (iter_type))
1518 plus_code = POINTER_PLUS_EXPR;
1519 plus_type = sizetype;
1522 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
1523 fold_convert (ivar_type, collapse->iters));
1524 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1525 collapse->step);
1526 expr = fold_build2 (plus_code, iter_type, collapse->base,
1527 fold_convert (plus_type, expr));
1528 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1529 true, GSI_SAME_STMT);
1530 gassign *ass = gimple_build_assign (loop->v, expr);
1531 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1533 if (ix)
1535 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
1536 fold_convert (ivar_type, collapse->iters));
1537 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1538 true, GSI_SAME_STMT);
1543 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1544 of the combined collapse > 1 loop constructs, generate code like:
1545 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1546 if (cond3 is <)
1547 adj = STEP3 - 1;
1548 else
1549 adj = STEP3 + 1;
1550 count3 = (adj + N32 - N31) / STEP3;
1551 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1552 if (cond2 is <)
1553 adj = STEP2 - 1;
1554 else
1555 adj = STEP2 + 1;
1556 count2 = (adj + N22 - N21) / STEP2;
1557 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1558 if (cond1 is <)
1559 adj = STEP1 - 1;
1560 else
1561 adj = STEP1 + 1;
1562 count1 = (adj + N12 - N11) / STEP1;
1563 count = count1 * count2 * count3;
1564 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1565 count = 0;
1566 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1567 of the combined loop constructs, just initialize COUNTS array
1568 from the _looptemp_ clauses. */
1570 /* NOTE: It *could* be better to moosh all of the BBs together,
1571 creating one larger BB with all the computation and the unexpected
1572 jump at the end. I.e.
1574 bool zero3, zero2, zero1, zero;
1576 zero3 = N32 c3 N31;
1577 count3 = (N32 - N31) /[cl] STEP3;
1578 zero2 = N22 c2 N21;
1579 count2 = (N22 - N21) /[cl] STEP2;
1580 zero1 = N12 c1 N11;
1581 count1 = (N12 - N11) /[cl] STEP1;
1582 zero = zero3 || zero2 || zero1;
1583 count = count1 * count2 * count3;
1584 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1586 After all, we expect the zero=false, and thus we expect to have to
1587 evaluate all of the comparison expressions, so short-circuiting
1588 oughtn't be a win. Since the condition isn't protecting a
1589 denominator, we're not concerned about divide-by-zero, so we can
1590 fully evaluate count even if a numerator turned out to be wrong.
1592 It seems like putting this all together would create much better
1593 scheduling opportunities, and less pressure on the chip's branch
1594 predictor. */
1596 static void
1597 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1598 basic_block &entry_bb, tree *counts,
1599 basic_block &zero_iter1_bb, int &first_zero_iter1,
1600 basic_block &zero_iter2_bb, int &first_zero_iter2,
1601 basic_block &l2_dom_bb)
1603 tree t, type = TREE_TYPE (fd->loop.v);
1604 edge e, ne;
1605 int i;
1607 /* Collapsed loops need work for expansion into SSA form. */
1608 gcc_assert (!gimple_in_ssa_p (cfun));
1610 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1611 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1613 gcc_assert (fd->ordered == 0);
1614 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1615 isn't supposed to be handled, as the inner loop doesn't
1616 use it. */
1617 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1618 OMP_CLAUSE__LOOPTEMP_);
1619 gcc_assert (innerc);
1620 for (i = 0; i < fd->collapse; i++)
1622 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1623 OMP_CLAUSE__LOOPTEMP_);
1624 gcc_assert (innerc);
1625 if (i)
1626 counts[i] = OMP_CLAUSE_DECL (innerc);
1627 else
1628 counts[0] = NULL_TREE;
1630 return;
1633 for (i = fd->collapse; i < fd->ordered; i++)
1635 tree itype = TREE_TYPE (fd->loops[i].v);
1636 counts[i] = NULL_TREE;
1637 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1638 fold_convert (itype, fd->loops[i].n1),
1639 fold_convert (itype, fd->loops[i].n2));
1640 if (t && integer_zerop (t))
1642 for (i = fd->collapse; i < fd->ordered; i++)
1643 counts[i] = build_int_cst (type, 0);
1644 break;
1647 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1649 tree itype = TREE_TYPE (fd->loops[i].v);
1651 if (i >= fd->collapse && counts[i])
1652 continue;
1653 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1654 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1655 fold_convert (itype, fd->loops[i].n1),
1656 fold_convert (itype, fd->loops[i].n2)))
1657 == NULL_TREE || !integer_onep (t)))
1659 gcond *cond_stmt;
1660 tree n1, n2;
1661 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1662 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1663 true, GSI_SAME_STMT);
1664 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1665 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1666 true, GSI_SAME_STMT);
1667 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1668 NULL_TREE, NULL_TREE);
1669 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1670 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1671 expand_omp_regimplify_p, NULL, NULL)
1672 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1673 expand_omp_regimplify_p, NULL, NULL))
1675 *gsi = gsi_for_stmt (cond_stmt);
1676 gimple_regimplify_operands (cond_stmt, gsi);
1678 e = split_block (entry_bb, cond_stmt);
1679 basic_block &zero_iter_bb
1680 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1681 int &first_zero_iter
1682 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1683 if (zero_iter_bb == NULL)
1685 gassign *assign_stmt;
1686 first_zero_iter = i;
1687 zero_iter_bb = create_empty_bb (entry_bb);
1688 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1689 *gsi = gsi_after_labels (zero_iter_bb);
1690 if (i < fd->collapse)
1691 assign_stmt = gimple_build_assign (fd->loop.n2,
1692 build_zero_cst (type));
1693 else
1695 counts[i] = create_tmp_reg (type, ".count");
1696 assign_stmt
1697 = gimple_build_assign (counts[i], build_zero_cst (type));
1699 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1700 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1701 entry_bb);
1703 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1704 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1705 e->flags = EDGE_TRUE_VALUE;
1706 e->probability = REG_BR_PROB_BASE - ne->probability;
1707 if (l2_dom_bb == NULL)
1708 l2_dom_bb = entry_bb;
1709 entry_bb = e->dest;
1710 *gsi = gsi_last_bb (entry_bb);
1713 if (POINTER_TYPE_P (itype))
1714 itype = signed_type_for (itype);
1715 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1716 ? -1 : 1));
1717 t = fold_build2 (PLUS_EXPR, itype,
1718 fold_convert (itype, fd->loops[i].step), t);
1719 t = fold_build2 (PLUS_EXPR, itype, t,
1720 fold_convert (itype, fd->loops[i].n2));
1721 t = fold_build2 (MINUS_EXPR, itype, t,
1722 fold_convert (itype, fd->loops[i].n1));
1723 /* ?? We could probably use CEIL_DIV_EXPR instead of
1724 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1725 generate the same code in the end because generically we
1726 don't know that the values involved must be negative for
1727 GT?? */
1728 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1729 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1730 fold_build1 (NEGATE_EXPR, itype, t),
1731 fold_build1 (NEGATE_EXPR, itype,
1732 fold_convert (itype,
1733 fd->loops[i].step)));
1734 else
1735 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1736 fold_convert (itype, fd->loops[i].step));
1737 t = fold_convert (type, t);
1738 if (TREE_CODE (t) == INTEGER_CST)
1739 counts[i] = t;
1740 else
1742 if (i < fd->collapse || i != first_zero_iter2)
1743 counts[i] = create_tmp_reg (type, ".count");
1744 expand_omp_build_assign (gsi, counts[i], t);
1746 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1748 if (i == 0)
1749 t = counts[0];
1750 else
1751 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1752 expand_omp_build_assign (gsi, fd->loop.n2, t);
1757 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1758 T = V;
1759 V3 = N31 + (T % count3) * STEP3;
1760 T = T / count3;
1761 V2 = N21 + (T % count2) * STEP2;
1762 T = T / count2;
1763 V1 = N11 + T * STEP1;
1764 if this loop doesn't have an inner loop construct combined with it.
1765 If it does have an inner loop construct combined with it and the
1766 iteration count isn't known constant, store values from counts array
1767 into its _looptemp_ temporaries instead. */
1769 static void
1770 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1771 tree *counts, gimple *inner_stmt, tree startvar)
1773 int i;
1774 if (gimple_omp_for_combined_p (fd->for_stmt))
1776 /* If fd->loop.n2 is constant, then no propagation of the counts
1777 is needed, they are constant. */
1778 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1779 return;
1781 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1782 ? gimple_omp_taskreg_clauses (inner_stmt)
1783 : gimple_omp_for_clauses (inner_stmt);
1784 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1785 isn't supposed to be handled, as the inner loop doesn't
1786 use it. */
1787 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1796 tree tem = OMP_CLAUSE_DECL (innerc);
1797 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1798 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1799 false, GSI_CONTINUE_LINKING);
1800 gassign *stmt = gimple_build_assign (tem, t);
1801 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1804 return;
1807 tree type = TREE_TYPE (fd->loop.v);
1808 tree tem = create_tmp_reg (type, ".tem");
1809 gassign *stmt = gimple_build_assign (tem, startvar);
1810 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1812 for (i = fd->collapse - 1; i >= 0; i--)
1814 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1815 itype = vtype;
1816 if (POINTER_TYPE_P (vtype))
1817 itype = signed_type_for (vtype);
1818 if (i != 0)
1819 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1820 else
1821 t = tem;
1822 t = fold_convert (itype, t);
1823 t = fold_build2 (MULT_EXPR, itype, t,
1824 fold_convert (itype, fd->loops[i].step));
1825 if (POINTER_TYPE_P (vtype))
1826 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1827 else
1828 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1829 t = force_gimple_operand_gsi (gsi, t,
1830 DECL_P (fd->loops[i].v)
1831 && TREE_ADDRESSABLE (fd->loops[i].v),
1832 NULL_TREE, false,
1833 GSI_CONTINUE_LINKING);
1834 stmt = gimple_build_assign (fd->loops[i].v, t);
1835 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1836 if (i != 0)
1838 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1839 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1840 false, GSI_CONTINUE_LINKING);
1841 stmt = gimple_build_assign (tem, t);
1842 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1847 /* Helper function for expand_omp_for_*. Generate code like:
1848 L10:
1849 V3 += STEP3;
1850 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1851 L11:
1852 V3 = N31;
1853 V2 += STEP2;
1854 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1855 L12:
1856 V2 = N21;
1857 V1 += STEP1;
1858 goto BODY_BB; */
1860 static basic_block
1861 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1862 basic_block body_bb)
1864 basic_block last_bb, bb, collapse_bb = NULL;
1865 int i;
1866 gimple_stmt_iterator gsi;
1867 edge e;
1868 tree t;
1869 gimple *stmt;
1871 last_bb = cont_bb;
1872 for (i = fd->collapse - 1; i >= 0; i--)
1874 tree vtype = TREE_TYPE (fd->loops[i].v);
1876 bb = create_empty_bb (last_bb);
1877 add_bb_to_loop (bb, last_bb->loop_father);
1878 gsi = gsi_start_bb (bb);
1880 if (i < fd->collapse - 1)
1882 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1883 e->probability = REG_BR_PROB_BASE / 8;
1885 t = fd->loops[i + 1].n1;
1886 t = force_gimple_operand_gsi (&gsi, t,
1887 DECL_P (fd->loops[i + 1].v)
1888 && TREE_ADDRESSABLE (fd->loops[i
1889 + 1].v),
1890 NULL_TREE, false,
1891 GSI_CONTINUE_LINKING);
1892 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1893 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1895 else
1896 collapse_bb = bb;
1898 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1900 if (POINTER_TYPE_P (vtype))
1901 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1902 else
1903 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1904 t = force_gimple_operand_gsi (&gsi, t,
1905 DECL_P (fd->loops[i].v)
1906 && TREE_ADDRESSABLE (fd->loops[i].v),
1907 NULL_TREE, false, GSI_CONTINUE_LINKING);
1908 stmt = gimple_build_assign (fd->loops[i].v, t);
1909 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1911 if (i > 0)
1913 t = fd->loops[i].n2;
1914 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1915 false, GSI_CONTINUE_LINKING);
1916 tree v = fd->loops[i].v;
1917 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1918 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1919 false, GSI_CONTINUE_LINKING);
1920 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1921 stmt = gimple_build_cond_empty (t);
1922 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1923 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1924 e->probability = REG_BR_PROB_BASE * 7 / 8;
1926 else
1927 make_edge (bb, body_bb, EDGE_FALLTHRU);
1928 last_bb = bb;
1931 return collapse_bb;
1934 /* Expand #pragma omp ordered depend(source). */
1936 static void
1937 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1938 tree *counts, location_t loc)
1940 enum built_in_function source_ix
1941 = fd->iter_type == long_integer_type_node
1942 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1943 gimple *g
1944 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1945 build_fold_addr_expr (counts[fd->ordered]));
1946 gimple_set_location (g, loc);
1947 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1950 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1952 static void
1953 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1954 tree *counts, tree c, location_t loc)
1956 auto_vec<tree, 10> args;
1957 enum built_in_function sink_ix
1958 = fd->iter_type == long_integer_type_node
1959 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1960 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1961 int i;
1962 gimple_stmt_iterator gsi2 = *gsi;
1963 bool warned_step = false;
1965 for (i = 0; i < fd->ordered; i++)
1967 tree step = NULL_TREE;
1968 off = TREE_PURPOSE (deps);
1969 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1971 step = TREE_OPERAND (off, 1);
1972 off = TREE_OPERAND (off, 0);
1974 if (!integer_zerop (off))
1976 gcc_assert (fd->loops[i].cond_code == LT_EXPR
1977 || fd->loops[i].cond_code == GT_EXPR);
1978 bool forward = fd->loops[i].cond_code == LT_EXPR;
1979 if (step)
1981 /* Non-simple Fortran DO loops. If step is variable,
1982 we don't know at compile even the direction, so can't
1983 warn. */
1984 if (TREE_CODE (step) != INTEGER_CST)
1985 break;
1986 forward = tree_int_cst_sgn (step) != -1;
1988 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
1989 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
1990 "lexically later iteration");
1991 break;
1993 deps = TREE_CHAIN (deps);
1995 /* If all offsets corresponding to the collapsed loops are zero,
1996 this depend clause can be ignored. FIXME: but there is still a
1997 flush needed. We need to emit one __sync_synchronize () for it
1998 though (perhaps conditionally)? Solve this together with the
1999 conservative dependence folding optimization.
2000 if (i >= fd->collapse)
2001 return; */
2003 deps = OMP_CLAUSE_DECL (c);
2004 gsi_prev (&gsi2);
2005 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2006 edge e2 = split_block_after_labels (e1->dest);
2008 gsi2 = gsi_after_labels (e1->dest);
2009 *gsi = gsi_last_bb (e1->src);
2010 for (i = 0; i < fd->ordered; i++)
2012 tree itype = TREE_TYPE (fd->loops[i].v);
2013 tree step = NULL_TREE;
2014 tree orig_off = NULL_TREE;
2015 if (POINTER_TYPE_P (itype))
2016 itype = sizetype;
2017 if (i)
2018 deps = TREE_CHAIN (deps);
2019 off = TREE_PURPOSE (deps);
2020 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2022 step = TREE_OPERAND (off, 1);
2023 off = TREE_OPERAND (off, 0);
2024 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2025 && integer_onep (fd->loops[i].step)
2026 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2028 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2029 if (step)
2031 off = fold_convert_loc (loc, itype, off);
2032 orig_off = off;
2033 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2036 if (integer_zerop (off))
2037 t = boolean_true_node;
2038 else
2040 tree a;
2041 tree co = fold_convert_loc (loc, itype, off);
2042 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2044 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2045 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2046 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2047 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2048 co);
2050 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2051 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2052 fd->loops[i].v, co);
2053 else
2054 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2055 fd->loops[i].v, co);
2056 if (step)
2058 tree t1, t2;
2059 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2060 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2061 fd->loops[i].n1);
2062 else
2063 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2064 fd->loops[i].n2);
2065 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2066 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2067 fd->loops[i].n2);
2068 else
2069 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2070 fd->loops[i].n1);
2071 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2072 step, build_int_cst (TREE_TYPE (step), 0));
2073 if (TREE_CODE (step) != INTEGER_CST)
2075 t1 = unshare_expr (t1);
2076 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2077 false, GSI_CONTINUE_LINKING);
2078 t2 = unshare_expr (t2);
2079 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2080 false, GSI_CONTINUE_LINKING);
2082 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2083 t, t2, t1);
2085 else if (fd->loops[i].cond_code == LT_EXPR)
2087 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2088 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2089 fd->loops[i].n1);
2090 else
2091 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2092 fd->loops[i].n2);
2094 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2095 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2096 fd->loops[i].n2);
2097 else
2098 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2099 fd->loops[i].n1);
2101 if (cond)
2102 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2103 else
2104 cond = t;
2106 off = fold_convert_loc (loc, itype, off);
2108 if (step
2109 || (fd->loops[i].cond_code == LT_EXPR
2110 ? !integer_onep (fd->loops[i].step)
2111 : !integer_minus_onep (fd->loops[i].step)))
2113 if (step == NULL_TREE
2114 && TYPE_UNSIGNED (itype)
2115 && fd->loops[i].cond_code == GT_EXPR)
2116 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2117 fold_build1_loc (loc, NEGATE_EXPR, itype,
2118 s));
2119 else
2120 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2121 orig_off ? orig_off : off, s);
2122 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2123 build_int_cst (itype, 0));
2124 if (integer_zerop (t) && !warned_step)
2126 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2127 "in the iteration space");
2128 warned_step = true;
2130 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2131 cond, t);
2134 if (i <= fd->collapse - 1 && fd->collapse > 1)
2135 t = fd->loop.v;
2136 else if (counts[i])
2137 t = counts[i];
2138 else
2140 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2141 fd->loops[i].v, fd->loops[i].n1);
2142 t = fold_convert_loc (loc, fd->iter_type, t);
2144 if (step)
2145 /* We have divided off by step already earlier. */;
2146 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2147 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2148 fold_build1_loc (loc, NEGATE_EXPR, itype,
2149 s));
2150 else
2151 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2152 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2153 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2154 off = fold_convert_loc (loc, fd->iter_type, off);
2155 if (i <= fd->collapse - 1 && fd->collapse > 1)
2157 if (i)
2158 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2159 off);
2160 if (i < fd->collapse - 1)
2162 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2163 counts[i]);
2164 continue;
2167 off = unshare_expr (off);
2168 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2169 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2170 true, GSI_SAME_STMT);
2171 args.safe_push (t);
2173 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2174 gimple_set_location (g, loc);
2175 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2177 cond = unshare_expr (cond);
2178 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2179 GSI_CONTINUE_LINKING);
2180 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2181 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2182 e3->probability = REG_BR_PROB_BASE / 8;
2183 e1->probability = REG_BR_PROB_BASE - e3->probability;
2184 e1->flags = EDGE_TRUE_VALUE;
2185 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2187 *gsi = gsi_after_labels (e2->dest);
2190 /* Expand all #pragma omp ordered depend(source) and
2191 #pragma omp ordered depend(sink:...) constructs in the current
2192 #pragma omp for ordered(n) region. */
2194 static void
2195 expand_omp_ordered_source_sink (struct omp_region *region,
2196 struct omp_for_data *fd, tree *counts,
2197 basic_block cont_bb)
2199 struct omp_region *inner;
2200 int i;
2201 for (i = fd->collapse - 1; i < fd->ordered; i++)
2202 if (i == fd->collapse - 1 && fd->collapse > 1)
2203 counts[i] = NULL_TREE;
2204 else if (i >= fd->collapse && !cont_bb)
2205 counts[i] = build_zero_cst (fd->iter_type);
2206 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2207 && integer_onep (fd->loops[i].step))
2208 counts[i] = NULL_TREE;
2209 else
2210 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2211 tree atype
2212 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2213 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2214 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2216 for (inner = region->inner; inner; inner = inner->next)
2217 if (inner->type == GIMPLE_OMP_ORDERED)
2219 gomp_ordered *ord_stmt = inner->ord_stmt;
2220 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2221 location_t loc = gimple_location (ord_stmt);
2222 tree c;
2223 for (c = gimple_omp_ordered_clauses (ord_stmt);
2224 c; c = OMP_CLAUSE_CHAIN (c))
2225 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2226 break;
2227 if (c)
2228 expand_omp_ordered_source (&gsi, fd, counts, loc);
2229 for (c = gimple_omp_ordered_clauses (ord_stmt);
2230 c; c = OMP_CLAUSE_CHAIN (c))
2231 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2232 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2233 gsi_remove (&gsi, true);
2237 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2238 collapsed. */
2240 static basic_block
2241 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2242 basic_block cont_bb, basic_block body_bb,
2243 bool ordered_lastprivate)
2245 if (fd->ordered == fd->collapse)
2246 return cont_bb;
2248 if (!cont_bb)
2250 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2251 for (int i = fd->collapse; i < fd->ordered; i++)
2253 tree type = TREE_TYPE (fd->loops[i].v);
2254 tree n1 = fold_convert (type, fd->loops[i].n1);
2255 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2256 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2257 size_int (i - fd->collapse + 1),
2258 NULL_TREE, NULL_TREE);
2259 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2261 return NULL;
2264 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2266 tree t, type = TREE_TYPE (fd->loops[i].v);
2267 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2268 expand_omp_build_assign (&gsi, fd->loops[i].v,
2269 fold_convert (type, fd->loops[i].n1));
2270 if (counts[i])
2271 expand_omp_build_assign (&gsi, counts[i],
2272 build_zero_cst (fd->iter_type));
2273 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2274 size_int (i - fd->collapse + 1),
2275 NULL_TREE, NULL_TREE);
2276 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2277 if (!gsi_end_p (gsi))
2278 gsi_prev (&gsi);
2279 else
2280 gsi = gsi_last_bb (body_bb);
2281 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2282 basic_block new_body = e1->dest;
2283 if (body_bb == cont_bb)
2284 cont_bb = new_body;
2285 edge e2 = NULL;
2286 basic_block new_header;
2287 if (EDGE_COUNT (cont_bb->preds) > 0)
2289 gsi = gsi_last_bb (cont_bb);
2290 if (POINTER_TYPE_P (type))
2291 t = fold_build_pointer_plus (fd->loops[i].v,
2292 fold_convert (sizetype,
2293 fd->loops[i].step));
2294 else
2295 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2296 fold_convert (type, fd->loops[i].step));
2297 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2298 if (counts[i])
2300 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2301 build_int_cst (fd->iter_type, 1));
2302 expand_omp_build_assign (&gsi, counts[i], t);
2303 t = counts[i];
2305 else
2307 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2308 fd->loops[i].v, fd->loops[i].n1);
2309 t = fold_convert (fd->iter_type, t);
2310 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2311 true, GSI_SAME_STMT);
2313 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2314 size_int (i - fd->collapse + 1),
2315 NULL_TREE, NULL_TREE);
2316 expand_omp_build_assign (&gsi, aref, t);
2317 gsi_prev (&gsi);
2318 e2 = split_block (cont_bb, gsi_stmt (gsi));
2319 new_header = e2->dest;
2321 else
2322 new_header = cont_bb;
2323 gsi = gsi_after_labels (new_header);
2324 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2325 true, GSI_SAME_STMT);
2326 tree n2
2327 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2328 true, NULL_TREE, true, GSI_SAME_STMT);
2329 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2330 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2331 edge e3 = split_block (new_header, gsi_stmt (gsi));
2332 cont_bb = e3->dest;
2333 remove_edge (e1);
2334 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2335 e3->flags = EDGE_FALSE_VALUE;
2336 e3->probability = REG_BR_PROB_BASE / 8;
2337 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2338 e1->probability = REG_BR_PROB_BASE - e3->probability;
2340 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2341 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2343 if (e2)
2345 struct loop *loop = alloc_loop ();
2346 loop->header = new_header;
2347 loop->latch = e2->src;
2348 add_loop (loop, body_bb->loop_father);
2352 /* If there are any lastprivate clauses and it is possible some loops
2353 might have zero iterations, ensure all the decls are initialized,
2354 otherwise we could crash evaluating C++ class iterators with lastprivate
2355 clauses. */
2356 bool need_inits = false;
2357 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2358 if (need_inits)
2360 tree type = TREE_TYPE (fd->loops[i].v);
2361 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2362 expand_omp_build_assign (&gsi, fd->loops[i].v,
2363 fold_convert (type, fd->loops[i].n1));
2365 else
2367 tree type = TREE_TYPE (fd->loops[i].v);
2368 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2369 boolean_type_node,
2370 fold_convert (type, fd->loops[i].n1),
2371 fold_convert (type, fd->loops[i].n2));
2372 if (!integer_onep (this_cond))
2373 need_inits = true;
2376 return cont_bb;
2379 /* A subroutine of expand_omp_for. Generate code for a parallel
2380 loop with any schedule. Given parameters:
2382 for (V = N1; V cond N2; V += STEP) BODY;
2384 where COND is "<" or ">", we generate pseudocode
2386 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2387 if (more) goto L0; else goto L3;
2389 V = istart0;
2390 iend = iend0;
2392 BODY;
2393 V += STEP;
2394 if (V cond iend) goto L1; else goto L2;
2396 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2399 If this is a combined omp parallel loop, instead of the call to
2400 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2401 If this is gimple_omp_for_combined_p loop, then instead of assigning
2402 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2403 inner GIMPLE_OMP_FOR and V += STEP; and
2404 if (V cond iend) goto L1; else goto L2; are removed.
2406 For collapsed loops, given parameters:
2407 collapse(3)
2408 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2409 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2410 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2411 BODY;
2413 we generate pseudocode
2415 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2416 if (cond3 is <)
2417 adj = STEP3 - 1;
2418 else
2419 adj = STEP3 + 1;
2420 count3 = (adj + N32 - N31) / STEP3;
2421 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2422 if (cond2 is <)
2423 adj = STEP2 - 1;
2424 else
2425 adj = STEP2 + 1;
2426 count2 = (adj + N22 - N21) / STEP2;
2427 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2428 if (cond1 is <)
2429 adj = STEP1 - 1;
2430 else
2431 adj = STEP1 + 1;
2432 count1 = (adj + N12 - N11) / STEP1;
2433 count = count1 * count2 * count3;
2434 goto Z1;
2436 count = 0;
2438 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2439 if (more) goto L0; else goto L3;
2441 V = istart0;
2442 T = V;
2443 V3 = N31 + (T % count3) * STEP3;
2444 T = T / count3;
2445 V2 = N21 + (T % count2) * STEP2;
2446 T = T / count2;
2447 V1 = N11 + T * STEP1;
2448 iend = iend0;
2450 BODY;
2451 V += 1;
2452 if (V < iend) goto L10; else goto L2;
2453 L10:
2454 V3 += STEP3;
2455 if (V3 cond3 N32) goto L1; else goto L11;
2456 L11:
2457 V3 = N31;
2458 V2 += STEP2;
2459 if (V2 cond2 N22) goto L1; else goto L12;
2460 L12:
2461 V2 = N21;
2462 V1 += STEP1;
2463 goto L1;
2465 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2470 static void
2471 expand_omp_for_generic (struct omp_region *region,
2472 struct omp_for_data *fd,
2473 enum built_in_function start_fn,
2474 enum built_in_function next_fn,
2475 gimple *inner_stmt)
2477 tree type, istart0, iend0, iend;
2478 tree t, vmain, vback, bias = NULL_TREE;
2479 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2480 basic_block l2_bb = NULL, l3_bb = NULL;
2481 gimple_stmt_iterator gsi;
2482 gassign *assign_stmt;
2483 bool in_combined_parallel = is_combined_parallel (region);
2484 bool broken_loop = region->cont == NULL;
2485 edge e, ne;
2486 tree *counts = NULL;
2487 int i;
2488 bool ordered_lastprivate = false;
2490 gcc_assert (!broken_loop || !in_combined_parallel);
2491 gcc_assert (fd->iter_type == long_integer_type_node
2492 || !in_combined_parallel);
2494 entry_bb = region->entry;
2495 cont_bb = region->cont;
2496 collapse_bb = NULL;
2497 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2498 gcc_assert (broken_loop
2499 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2500 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2501 l1_bb = single_succ (l0_bb);
2502 if (!broken_loop)
2504 l2_bb = create_empty_bb (cont_bb);
2505 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2506 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2507 == l1_bb));
2508 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2510 else
2511 l2_bb = NULL;
2512 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2513 exit_bb = region->exit;
2515 gsi = gsi_last_bb (entry_bb);
2517 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2518 if (fd->ordered
2519 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2520 OMP_CLAUSE_LASTPRIVATE))
2521 ordered_lastprivate = false;
2522 if (fd->collapse > 1 || fd->ordered)
2524 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2525 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2527 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2528 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2529 zero_iter1_bb, first_zero_iter1,
2530 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2532 if (zero_iter1_bb)
2534 /* Some counts[i] vars might be uninitialized if
2535 some loop has zero iterations. But the body shouldn't
2536 be executed in that case, so just avoid uninit warnings. */
2537 for (i = first_zero_iter1;
2538 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2539 if (SSA_VAR_P (counts[i]))
2540 TREE_NO_WARNING (counts[i]) = 1;
2541 gsi_prev (&gsi);
2542 e = split_block (entry_bb, gsi_stmt (gsi));
2543 entry_bb = e->dest;
2544 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2545 gsi = gsi_last_bb (entry_bb);
2546 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2547 get_immediate_dominator (CDI_DOMINATORS,
2548 zero_iter1_bb));
2550 if (zero_iter2_bb)
2552 /* Some counts[i] vars might be uninitialized if
2553 some loop has zero iterations. But the body shouldn't
2554 be executed in that case, so just avoid uninit warnings. */
2555 for (i = first_zero_iter2; i < fd->ordered; i++)
2556 if (SSA_VAR_P (counts[i]))
2557 TREE_NO_WARNING (counts[i]) = 1;
2558 if (zero_iter1_bb)
2559 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2560 else
2562 gsi_prev (&gsi);
2563 e = split_block (entry_bb, gsi_stmt (gsi));
2564 entry_bb = e->dest;
2565 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2566 gsi = gsi_last_bb (entry_bb);
2567 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2568 get_immediate_dominator
2569 (CDI_DOMINATORS, zero_iter2_bb));
2572 if (fd->collapse == 1)
2574 counts[0] = fd->loop.n2;
2575 fd->loop = fd->loops[0];
2579 type = TREE_TYPE (fd->loop.v);
2580 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2581 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2582 TREE_ADDRESSABLE (istart0) = 1;
2583 TREE_ADDRESSABLE (iend0) = 1;
2585 /* See if we need to bias by LLONG_MIN. */
2586 if (fd->iter_type == long_long_unsigned_type_node
2587 && TREE_CODE (type) == INTEGER_TYPE
2588 && !TYPE_UNSIGNED (type)
2589 && fd->ordered == 0)
2591 tree n1, n2;
2593 if (fd->loop.cond_code == LT_EXPR)
2595 n1 = fd->loop.n1;
2596 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2598 else
2600 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2601 n2 = fd->loop.n1;
2603 if (TREE_CODE (n1) != INTEGER_CST
2604 || TREE_CODE (n2) != INTEGER_CST
2605 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2606 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2609 gimple_stmt_iterator gsif = gsi;
2610 gsi_prev (&gsif);
2612 tree arr = NULL_TREE;
2613 if (in_combined_parallel)
2615 gcc_assert (fd->ordered == 0);
2616 /* In a combined parallel loop, emit a call to
2617 GOMP_loop_foo_next. */
2618 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2619 build_fold_addr_expr (istart0),
2620 build_fold_addr_expr (iend0));
2622 else
2624 tree t0, t1, t2, t3, t4;
2625 /* If this is not a combined parallel loop, emit a call to
2626 GOMP_loop_foo_start in ENTRY_BB. */
2627 t4 = build_fold_addr_expr (iend0);
2628 t3 = build_fold_addr_expr (istart0);
2629 if (fd->ordered)
2631 t0 = build_int_cst (unsigned_type_node,
2632 fd->ordered - fd->collapse + 1);
2633 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2634 fd->ordered
2635 - fd->collapse + 1),
2636 ".omp_counts");
2637 DECL_NAMELESS (arr) = 1;
2638 TREE_ADDRESSABLE (arr) = 1;
2639 TREE_STATIC (arr) = 1;
2640 vec<constructor_elt, va_gc> *v;
2641 vec_alloc (v, fd->ordered - fd->collapse + 1);
2642 int idx;
2644 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2646 tree c;
2647 if (idx == 0 && fd->collapse > 1)
2648 c = fd->loop.n2;
2649 else
2650 c = counts[idx + fd->collapse - 1];
2651 tree purpose = size_int (idx);
2652 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2653 if (TREE_CODE (c) != INTEGER_CST)
2654 TREE_STATIC (arr) = 0;
2657 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2658 if (!TREE_STATIC (arr))
2659 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2660 void_type_node, arr),
2661 true, NULL_TREE, true, GSI_SAME_STMT);
2662 t1 = build_fold_addr_expr (arr);
2663 t2 = NULL_TREE;
2665 else
2667 t2 = fold_convert (fd->iter_type, fd->loop.step);
2668 t1 = fd->loop.n2;
2669 t0 = fd->loop.n1;
2670 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2672 tree innerc
2673 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2674 OMP_CLAUSE__LOOPTEMP_);
2675 gcc_assert (innerc);
2676 t0 = OMP_CLAUSE_DECL (innerc);
2677 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2678 OMP_CLAUSE__LOOPTEMP_);
2679 gcc_assert (innerc);
2680 t1 = OMP_CLAUSE_DECL (innerc);
2682 if (POINTER_TYPE_P (TREE_TYPE (t0))
2683 && TYPE_PRECISION (TREE_TYPE (t0))
2684 != TYPE_PRECISION (fd->iter_type))
2686 /* Avoid casting pointers to integer of a different size. */
2687 tree itype = signed_type_for (type);
2688 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2689 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2691 else
2693 t1 = fold_convert (fd->iter_type, t1);
2694 t0 = fold_convert (fd->iter_type, t0);
2696 if (bias)
2698 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2699 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2702 if (fd->iter_type == long_integer_type_node || fd->ordered)
2704 if (fd->chunk_size)
2706 t = fold_convert (fd->iter_type, fd->chunk_size);
2707 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2708 if (fd->ordered)
2709 t = build_call_expr (builtin_decl_explicit (start_fn),
2710 5, t0, t1, t, t3, t4);
2711 else
2712 t = build_call_expr (builtin_decl_explicit (start_fn),
2713 6, t0, t1, t2, t, t3, t4);
2715 else if (fd->ordered)
2716 t = build_call_expr (builtin_decl_explicit (start_fn),
2717 4, t0, t1, t3, t4);
2718 else
2719 t = build_call_expr (builtin_decl_explicit (start_fn),
2720 5, t0, t1, t2, t3, t4);
2722 else
2724 tree t5;
2725 tree c_bool_type;
2726 tree bfn_decl;
2728 /* The GOMP_loop_ull_*start functions have additional boolean
2729 argument, true for < loops and false for > loops.
2730 In Fortran, the C bool type can be different from
2731 boolean_type_node. */
2732 bfn_decl = builtin_decl_explicit (start_fn);
2733 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2734 t5 = build_int_cst (c_bool_type,
2735 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2736 if (fd->chunk_size)
2738 tree bfn_decl = builtin_decl_explicit (start_fn);
2739 t = fold_convert (fd->iter_type, fd->chunk_size);
2740 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2741 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2743 else
2744 t = build_call_expr (builtin_decl_explicit (start_fn),
2745 6, t5, t0, t1, t2, t3, t4);
2748 if (TREE_TYPE (t) != boolean_type_node)
2749 t = fold_build2 (NE_EXPR, boolean_type_node,
2750 t, build_int_cst (TREE_TYPE (t), 0));
2751 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2752 true, GSI_SAME_STMT);
2753 if (arr && !TREE_STATIC (arr))
2755 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2756 TREE_THIS_VOLATILE (clobber) = 1;
2757 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2758 GSI_SAME_STMT);
2760 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2762 /* Remove the GIMPLE_OMP_FOR statement. */
2763 gsi_remove (&gsi, true);
2765 if (gsi_end_p (gsif))
2766 gsif = gsi_after_labels (gsi_bb (gsif));
2767 gsi_next (&gsif);
2769 /* Iteration setup for sequential loop goes in L0_BB. */
2770 tree startvar = fd->loop.v;
2771 tree endvar = NULL_TREE;
2773 if (gimple_omp_for_combined_p (fd->for_stmt))
2775 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2776 && gimple_omp_for_kind (inner_stmt)
2777 == GF_OMP_FOR_KIND_SIMD);
2778 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2779 OMP_CLAUSE__LOOPTEMP_);
2780 gcc_assert (innerc);
2781 startvar = OMP_CLAUSE_DECL (innerc);
2782 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2783 OMP_CLAUSE__LOOPTEMP_);
2784 gcc_assert (innerc);
2785 endvar = OMP_CLAUSE_DECL (innerc);
2788 gsi = gsi_start_bb (l0_bb);
2789 t = istart0;
2790 if (fd->ordered && fd->collapse == 1)
2791 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2792 fold_convert (fd->iter_type, fd->loop.step));
2793 else if (bias)
2794 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2795 if (fd->ordered && fd->collapse == 1)
2797 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2798 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2799 fd->loop.n1, fold_convert (sizetype, t));
2800 else
2802 t = fold_convert (TREE_TYPE (startvar), t);
2803 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2804 fd->loop.n1, t);
2807 else
2809 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2810 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2811 t = fold_convert (TREE_TYPE (startvar), t);
2813 t = force_gimple_operand_gsi (&gsi, t,
2814 DECL_P (startvar)
2815 && TREE_ADDRESSABLE (startvar),
2816 NULL_TREE, false, GSI_CONTINUE_LINKING);
2817 assign_stmt = gimple_build_assign (startvar, t);
2818 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2820 t = iend0;
2821 if (fd->ordered && fd->collapse == 1)
2822 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2823 fold_convert (fd->iter_type, fd->loop.step));
2824 else if (bias)
2825 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2826 if (fd->ordered && fd->collapse == 1)
2828 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2829 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2830 fd->loop.n1, fold_convert (sizetype, t));
2831 else
2833 t = fold_convert (TREE_TYPE (startvar), t);
2834 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2835 fd->loop.n1, t);
2838 else
2840 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2841 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2842 t = fold_convert (TREE_TYPE (startvar), t);
2844 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2845 false, GSI_CONTINUE_LINKING);
2846 if (endvar)
2848 assign_stmt = gimple_build_assign (endvar, iend);
2849 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2850 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2851 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2852 else
2853 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2854 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2856 /* Handle linear clause adjustments. */
2857 tree itercnt = NULL_TREE;
2858 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2859 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2860 c; c = OMP_CLAUSE_CHAIN (c))
2861 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2862 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2864 tree d = OMP_CLAUSE_DECL (c);
2865 bool is_ref = omp_is_reference (d);
2866 tree t = d, a, dest;
2867 if (is_ref)
2868 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2869 tree type = TREE_TYPE (t);
2870 if (POINTER_TYPE_P (type))
2871 type = sizetype;
2872 dest = unshare_expr (t);
2873 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2874 expand_omp_build_assign (&gsif, v, t);
2875 if (itercnt == NULL_TREE)
2877 itercnt = startvar;
2878 tree n1 = fd->loop.n1;
2879 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2881 itercnt
2882 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2883 itercnt);
2884 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2886 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2887 itercnt, n1);
2888 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2889 itercnt, fd->loop.step);
2890 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2891 NULL_TREE, false,
2892 GSI_CONTINUE_LINKING);
2894 a = fold_build2 (MULT_EXPR, type,
2895 fold_convert (type, itercnt),
2896 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2897 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2898 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2899 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2900 false, GSI_CONTINUE_LINKING);
2901 assign_stmt = gimple_build_assign (dest, t);
2902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2904 if (fd->collapse > 1)
2905 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2907 if (fd->ordered)
2909 /* Until now, counts array contained number of iterations or
2910 variable containing it for ith loop. From now on, we need
2911 those counts only for collapsed loops, and only for the 2nd
2912 till the last collapsed one. Move those one element earlier,
2913 we'll use counts[fd->collapse - 1] for the first source/sink
2914 iteration counter and so on and counts[fd->ordered]
2915 as the array holding the current counter values for
2916 depend(source). */
2917 if (fd->collapse > 1)
2918 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2919 if (broken_loop)
2921 int i;
2922 for (i = fd->collapse; i < fd->ordered; i++)
2924 tree type = TREE_TYPE (fd->loops[i].v);
2925 tree this_cond
2926 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2927 fold_convert (type, fd->loops[i].n1),
2928 fold_convert (type, fd->loops[i].n2));
2929 if (!integer_onep (this_cond))
2930 break;
2932 if (i < fd->ordered)
2934 cont_bb
2935 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2936 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2937 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2938 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2939 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2940 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2941 make_edge (cont_bb, l1_bb, 0);
2942 l2_bb = create_empty_bb (cont_bb);
2943 broken_loop = false;
2946 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2947 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2948 ordered_lastprivate);
2949 if (counts[fd->collapse - 1])
2951 gcc_assert (fd->collapse == 1);
2952 gsi = gsi_last_bb (l0_bb);
2953 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2954 istart0, true);
2955 gsi = gsi_last_bb (cont_bb);
2956 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2957 build_int_cst (fd->iter_type, 1));
2958 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2959 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2960 size_zero_node, NULL_TREE, NULL_TREE);
2961 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2962 t = counts[fd->collapse - 1];
2964 else if (fd->collapse > 1)
2965 t = fd->loop.v;
2966 else
2968 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2969 fd->loops[0].v, fd->loops[0].n1);
2970 t = fold_convert (fd->iter_type, t);
2972 gsi = gsi_last_bb (l0_bb);
2973 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2974 size_zero_node, NULL_TREE, NULL_TREE);
2975 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2976 false, GSI_CONTINUE_LINKING);
2977 expand_omp_build_assign (&gsi, aref, t, true);
2980 if (!broken_loop)
2982 /* Code to control the increment and predicate for the sequential
2983 loop goes in the CONT_BB. */
2984 gsi = gsi_last_bb (cont_bb);
2985 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
2986 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
2987 vmain = gimple_omp_continue_control_use (cont_stmt);
2988 vback = gimple_omp_continue_control_def (cont_stmt);
2990 if (!gimple_omp_for_combined_p (fd->for_stmt))
2992 if (POINTER_TYPE_P (type))
2993 t = fold_build_pointer_plus (vmain, fd->loop.step);
2994 else
2995 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
2996 t = force_gimple_operand_gsi (&gsi, t,
2997 DECL_P (vback)
2998 && TREE_ADDRESSABLE (vback),
2999 NULL_TREE, true, GSI_SAME_STMT);
3000 assign_stmt = gimple_build_assign (vback, t);
3001 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3003 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3005 if (fd->collapse > 1)
3006 t = fd->loop.v;
3007 else
3009 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3010 fd->loops[0].v, fd->loops[0].n1);
3011 t = fold_convert (fd->iter_type, t);
3013 tree aref = build4 (ARRAY_REF, fd->iter_type,
3014 counts[fd->ordered], size_zero_node,
3015 NULL_TREE, NULL_TREE);
3016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3017 true, GSI_SAME_STMT);
3018 expand_omp_build_assign (&gsi, aref, t);
3021 t = build2 (fd->loop.cond_code, boolean_type_node,
3022 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3023 iend);
3024 gcond *cond_stmt = gimple_build_cond_empty (t);
3025 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3028 /* Remove GIMPLE_OMP_CONTINUE. */
3029 gsi_remove (&gsi, true);
3031 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3032 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3034 /* Emit code to get the next parallel iteration in L2_BB. */
3035 gsi = gsi_start_bb (l2_bb);
3037 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3038 build_fold_addr_expr (istart0),
3039 build_fold_addr_expr (iend0));
3040 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3041 false, GSI_CONTINUE_LINKING);
3042 if (TREE_TYPE (t) != boolean_type_node)
3043 t = fold_build2 (NE_EXPR, boolean_type_node,
3044 t, build_int_cst (TREE_TYPE (t), 0));
3045 gcond *cond_stmt = gimple_build_cond_empty (t);
3046 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3049 /* Add the loop cleanup function. */
3050 gsi = gsi_last_bb (exit_bb);
3051 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3052 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3053 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3054 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3055 else
3056 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3057 gcall *call_stmt = gimple_build_call (t, 0);
3058 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3059 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3060 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3061 if (fd->ordered)
3063 tree arr = counts[fd->ordered];
3064 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3065 TREE_THIS_VOLATILE (clobber) = 1;
3066 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3067 GSI_SAME_STMT);
3069 gsi_remove (&gsi, true);
3071 /* Connect the new blocks. */
3072 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3073 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3075 if (!broken_loop)
3077 gimple_seq phis;
3079 e = find_edge (cont_bb, l3_bb);
3080 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3082 phis = phi_nodes (l3_bb);
3083 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3085 gimple *phi = gsi_stmt (gsi);
3086 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3087 PHI_ARG_DEF_FROM_EDGE (phi, e));
3089 remove_edge (e);
3091 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3092 e = find_edge (cont_bb, l1_bb);
3093 if (e == NULL)
3095 e = BRANCH_EDGE (cont_bb);
3096 gcc_assert (single_succ (e->dest) == l1_bb);
3098 if (gimple_omp_for_combined_p (fd->for_stmt))
3100 remove_edge (e);
3101 e = NULL;
3103 else if (fd->collapse > 1)
3105 remove_edge (e);
3106 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3108 else
3109 e->flags = EDGE_TRUE_VALUE;
3110 if (e)
3112 e->probability = REG_BR_PROB_BASE * 7 / 8;
3113 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3115 else
3117 e = find_edge (cont_bb, l2_bb);
3118 e->flags = EDGE_FALLTHRU;
3120 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3122 if (gimple_in_ssa_p (cfun))
3124 /* Add phis to the outer loop that connect to the phis in the inner,
3125 original loop, and move the loop entry value of the inner phi to
3126 the loop entry value of the outer phi. */
3127 gphi_iterator psi;
3128 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3130 source_location locus;
3131 gphi *nphi;
3132 gphi *exit_phi = psi.phi ();
3134 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3135 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3137 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3138 edge latch_to_l1 = find_edge (latch, l1_bb);
3139 gphi *inner_phi
3140 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3142 tree t = gimple_phi_result (exit_phi);
3143 tree new_res = copy_ssa_name (t, NULL);
3144 nphi = create_phi_node (new_res, l0_bb);
3146 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3147 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3148 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3149 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3150 add_phi_arg (nphi, t, entry_to_l0, locus);
3152 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3153 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3155 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3159 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3160 recompute_dominator (CDI_DOMINATORS, l2_bb));
3161 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3162 recompute_dominator (CDI_DOMINATORS, l3_bb));
3163 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3164 recompute_dominator (CDI_DOMINATORS, l0_bb));
3165 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3166 recompute_dominator (CDI_DOMINATORS, l1_bb));
3168 /* We enter expand_omp_for_generic with a loop. This original loop may
3169 have its own loop struct, or it may be part of an outer loop struct
3170 (which may be the fake loop). */
3171 struct loop *outer_loop = entry_bb->loop_father;
3172 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3174 add_bb_to_loop (l2_bb, outer_loop);
3176 /* We've added a new loop around the original loop. Allocate the
3177 corresponding loop struct. */
3178 struct loop *new_loop = alloc_loop ();
3179 new_loop->header = l0_bb;
3180 new_loop->latch = l2_bb;
3181 add_loop (new_loop, outer_loop);
3183 /* Allocate a loop structure for the original loop unless we already
3184 had one. */
3185 if (!orig_loop_has_loop_struct
3186 && !gimple_omp_for_combined_p (fd->for_stmt))
3188 struct loop *orig_loop = alloc_loop ();
3189 orig_loop->header = l1_bb;
3190 /* The loop may have multiple latches. */
3191 add_loop (orig_loop, new_loop);
3196 /* A subroutine of expand_omp_for. Generate code for a parallel
3197 loop with static schedule and no specified chunk size. Given
3198 parameters:
3200 for (V = N1; V cond N2; V += STEP) BODY;
3202 where COND is "<" or ">", we generate pseudocode
3204 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3205 if (cond is <)
3206 adj = STEP - 1;
3207 else
3208 adj = STEP + 1;
3209 if ((__typeof (V)) -1 > 0 && cond is >)
3210 n = -(adj + N2 - N1) / -STEP;
3211 else
3212 n = (adj + N2 - N1) / STEP;
3213 q = n / nthreads;
3214 tt = n % nthreads;
3215 if (threadid < tt) goto L3; else goto L4;
3217 tt = 0;
3218 q = q + 1;
3220 s0 = q * threadid + tt;
3221 e0 = s0 + q;
3222 V = s0 * STEP + N1;
3223 if (s0 >= e0) goto L2; else goto L0;
3225 e = e0 * STEP + N1;
3227 BODY;
3228 V += STEP;
3229 if (V cond e) goto L1;
3233 static void
3234 expand_omp_for_static_nochunk (struct omp_region *region,
3235 struct omp_for_data *fd,
3236 gimple *inner_stmt)
3238 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3239 tree type, itype, vmain, vback;
3240 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3241 basic_block body_bb, cont_bb, collapse_bb = NULL;
3242 basic_block fin_bb;
3243 gimple_stmt_iterator gsi;
3244 edge ep;
3245 bool broken_loop = region->cont == NULL;
3246 tree *counts = NULL;
3247 tree n1, n2, step;
3249 itype = type = TREE_TYPE (fd->loop.v);
3250 if (POINTER_TYPE_P (type))
3251 itype = signed_type_for (type);
3253 entry_bb = region->entry;
3254 cont_bb = region->cont;
3255 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3256 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3257 gcc_assert (broken_loop
3258 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3259 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3260 body_bb = single_succ (seq_start_bb);
3261 if (!broken_loop)
3263 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3264 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3265 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3267 exit_bb = region->exit;
3269 /* Iteration space partitioning goes in ENTRY_BB. */
3270 gsi = gsi_last_bb (entry_bb);
3271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3273 if (fd->collapse > 1)
3275 int first_zero_iter = -1, dummy = -1;
3276 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3278 counts = XALLOCAVEC (tree, fd->collapse);
3279 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3280 fin_bb, first_zero_iter,
3281 dummy_bb, dummy, l2_dom_bb);
3282 t = NULL_TREE;
3284 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3285 t = integer_one_node;
3286 else
3287 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3288 fold_convert (type, fd->loop.n1),
3289 fold_convert (type, fd->loop.n2));
3290 if (fd->collapse == 1
3291 && TYPE_UNSIGNED (type)
3292 && (t == NULL_TREE || !integer_onep (t)))
3294 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3295 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3296 true, GSI_SAME_STMT);
3297 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3298 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3299 true, GSI_SAME_STMT);
3300 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3301 NULL_TREE, NULL_TREE);
3302 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3303 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3304 expand_omp_regimplify_p, NULL, NULL)
3305 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3306 expand_omp_regimplify_p, NULL, NULL))
3308 gsi = gsi_for_stmt (cond_stmt);
3309 gimple_regimplify_operands (cond_stmt, &gsi);
3311 ep = split_block (entry_bb, cond_stmt);
3312 ep->flags = EDGE_TRUE_VALUE;
3313 entry_bb = ep->dest;
3314 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3315 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3316 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3317 if (gimple_in_ssa_p (cfun))
3319 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3320 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3321 !gsi_end_p (gpi); gsi_next (&gpi))
3323 gphi *phi = gpi.phi ();
3324 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3325 ep, UNKNOWN_LOCATION);
3328 gsi = gsi_last_bb (entry_bb);
3331 switch (gimple_omp_for_kind (fd->for_stmt))
3333 case GF_OMP_FOR_KIND_FOR:
3334 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3335 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3336 break;
3337 case GF_OMP_FOR_KIND_DISTRIBUTE:
3338 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3339 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3340 break;
3341 default:
3342 gcc_unreachable ();
3344 nthreads = build_call_expr (nthreads, 0);
3345 nthreads = fold_convert (itype, nthreads);
3346 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3347 true, GSI_SAME_STMT);
3348 threadid = build_call_expr (threadid, 0);
3349 threadid = fold_convert (itype, threadid);
3350 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3351 true, GSI_SAME_STMT);
3353 n1 = fd->loop.n1;
3354 n2 = fd->loop.n2;
3355 step = fd->loop.step;
3356 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3358 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3359 OMP_CLAUSE__LOOPTEMP_);
3360 gcc_assert (innerc);
3361 n1 = OMP_CLAUSE_DECL (innerc);
3362 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3363 OMP_CLAUSE__LOOPTEMP_);
3364 gcc_assert (innerc);
3365 n2 = OMP_CLAUSE_DECL (innerc);
3367 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3368 true, NULL_TREE, true, GSI_SAME_STMT);
3369 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3370 true, NULL_TREE, true, GSI_SAME_STMT);
3371 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3372 true, NULL_TREE, true, GSI_SAME_STMT);
3374 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3375 t = fold_build2 (PLUS_EXPR, itype, step, t);
3376 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3377 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3378 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3379 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3380 fold_build1 (NEGATE_EXPR, itype, t),
3381 fold_build1 (NEGATE_EXPR, itype, step));
3382 else
3383 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3384 t = fold_convert (itype, t);
3385 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3387 q = create_tmp_reg (itype, "q");
3388 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3389 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3390 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3392 tt = create_tmp_reg (itype, "tt");
3393 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3394 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3395 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3397 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3398 gcond *cond_stmt = gimple_build_cond_empty (t);
3399 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3401 second_bb = split_block (entry_bb, cond_stmt)->dest;
3402 gsi = gsi_last_bb (second_bb);
3403 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3405 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3406 GSI_SAME_STMT);
3407 gassign *assign_stmt
3408 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3409 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3411 third_bb = split_block (second_bb, assign_stmt)->dest;
3412 gsi = gsi_last_bb (third_bb);
3413 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3415 t = build2 (MULT_EXPR, itype, q, threadid);
3416 t = build2 (PLUS_EXPR, itype, t, tt);
3417 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3419 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3420 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3422 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3423 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3425 /* Remove the GIMPLE_OMP_FOR statement. */
3426 gsi_remove (&gsi, true);
3428 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3429 gsi = gsi_start_bb (seq_start_bb);
3431 tree startvar = fd->loop.v;
3432 tree endvar = NULL_TREE;
3434 if (gimple_omp_for_combined_p (fd->for_stmt))
3436 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3437 ? gimple_omp_parallel_clauses (inner_stmt)
3438 : gimple_omp_for_clauses (inner_stmt);
3439 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3440 gcc_assert (innerc);
3441 startvar = OMP_CLAUSE_DECL (innerc);
3442 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3443 OMP_CLAUSE__LOOPTEMP_);
3444 gcc_assert (innerc);
3445 endvar = OMP_CLAUSE_DECL (innerc);
3446 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3447 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3449 int i;
3450 for (i = 1; i < fd->collapse; i++)
3452 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3453 OMP_CLAUSE__LOOPTEMP_);
3454 gcc_assert (innerc);
3456 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3457 OMP_CLAUSE__LOOPTEMP_);
3458 if (innerc)
3460 /* If needed (distribute parallel for with lastprivate),
3461 propagate down the total number of iterations. */
3462 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3463 fd->loop.n2);
3464 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3465 GSI_CONTINUE_LINKING);
3466 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3467 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3471 t = fold_convert (itype, s0);
3472 t = fold_build2 (MULT_EXPR, itype, t, step);
3473 if (POINTER_TYPE_P (type))
3474 t = fold_build_pointer_plus (n1, t);
3475 else
3476 t = fold_build2 (PLUS_EXPR, type, t, n1);
3477 t = fold_convert (TREE_TYPE (startvar), t);
3478 t = force_gimple_operand_gsi (&gsi, t,
3479 DECL_P (startvar)
3480 && TREE_ADDRESSABLE (startvar),
3481 NULL_TREE, false, GSI_CONTINUE_LINKING);
3482 assign_stmt = gimple_build_assign (startvar, t);
3483 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3485 t = fold_convert (itype, e0);
3486 t = fold_build2 (MULT_EXPR, itype, t, step);
3487 if (POINTER_TYPE_P (type))
3488 t = fold_build_pointer_plus (n1, t);
3489 else
3490 t = fold_build2 (PLUS_EXPR, type, t, n1);
3491 t = fold_convert (TREE_TYPE (startvar), t);
3492 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3493 false, GSI_CONTINUE_LINKING);
3494 if (endvar)
3496 assign_stmt = gimple_build_assign (endvar, e);
3497 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3498 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3499 assign_stmt = gimple_build_assign (fd->loop.v, e);
3500 else
3501 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3502 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3504 /* Handle linear clause adjustments. */
3505 tree itercnt = NULL_TREE;
3506 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3507 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3508 c; c = OMP_CLAUSE_CHAIN (c))
3509 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3510 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3512 tree d = OMP_CLAUSE_DECL (c);
3513 bool is_ref = omp_is_reference (d);
3514 tree t = d, a, dest;
3515 if (is_ref)
3516 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3517 if (itercnt == NULL_TREE)
3519 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3521 itercnt = fold_build2 (MINUS_EXPR, itype,
3522 fold_convert (itype, n1),
3523 fold_convert (itype, fd->loop.n1));
3524 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3525 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3526 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3527 NULL_TREE, false,
3528 GSI_CONTINUE_LINKING);
3530 else
3531 itercnt = s0;
3533 tree type = TREE_TYPE (t);
3534 if (POINTER_TYPE_P (type))
3535 type = sizetype;
3536 a = fold_build2 (MULT_EXPR, type,
3537 fold_convert (type, itercnt),
3538 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3539 dest = unshare_expr (t);
3540 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3541 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3542 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3543 false, GSI_CONTINUE_LINKING);
3544 assign_stmt = gimple_build_assign (dest, t);
3545 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3547 if (fd->collapse > 1)
3548 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3550 if (!broken_loop)
3552 /* The code controlling the sequential loop replaces the
3553 GIMPLE_OMP_CONTINUE. */
3554 gsi = gsi_last_bb (cont_bb);
3555 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3556 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3557 vmain = gimple_omp_continue_control_use (cont_stmt);
3558 vback = gimple_omp_continue_control_def (cont_stmt);
3560 if (!gimple_omp_for_combined_p (fd->for_stmt))
3562 if (POINTER_TYPE_P (type))
3563 t = fold_build_pointer_plus (vmain, step);
3564 else
3565 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3566 t = force_gimple_operand_gsi (&gsi, t,
3567 DECL_P (vback)
3568 && TREE_ADDRESSABLE (vback),
3569 NULL_TREE, true, GSI_SAME_STMT);
3570 assign_stmt = gimple_build_assign (vback, t);
3571 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3573 t = build2 (fd->loop.cond_code, boolean_type_node,
3574 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3575 ? t : vback, e);
3576 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3579 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3580 gsi_remove (&gsi, true);
3582 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3583 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3586 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3587 gsi = gsi_last_bb (exit_bb);
3588 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3590 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3591 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3593 gsi_remove (&gsi, true);
3595 /* Connect all the blocks. */
3596 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3597 ep->probability = REG_BR_PROB_BASE / 4 * 3;
3598 ep = find_edge (entry_bb, second_bb);
3599 ep->flags = EDGE_TRUE_VALUE;
3600 ep->probability = REG_BR_PROB_BASE / 4;
3601 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3602 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3604 if (!broken_loop)
3606 ep = find_edge (cont_bb, body_bb);
3607 if (ep == NULL)
3609 ep = BRANCH_EDGE (cont_bb);
3610 gcc_assert (single_succ (ep->dest) == body_bb);
3612 if (gimple_omp_for_combined_p (fd->for_stmt))
3614 remove_edge (ep);
3615 ep = NULL;
3617 else if (fd->collapse > 1)
3619 remove_edge (ep);
3620 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3622 else
3623 ep->flags = EDGE_TRUE_VALUE;
3624 find_edge (cont_bb, fin_bb)->flags
3625 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3628 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3629 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3630 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3632 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3633 recompute_dominator (CDI_DOMINATORS, body_bb));
3634 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3635 recompute_dominator (CDI_DOMINATORS, fin_bb));
3637 struct loop *loop = body_bb->loop_father;
3638 if (loop != entry_bb->loop_father)
3640 gcc_assert (broken_loop || loop->header == body_bb);
3641 gcc_assert (broken_loop
3642 || loop->latch == region->cont
3643 || single_pred (loop->latch) == region->cont);
3644 return;
3647 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3649 loop = alloc_loop ();
3650 loop->header = body_bb;
3651 if (collapse_bb == NULL)
3652 loop->latch = cont_bb;
3653 add_loop (loop, body_bb->loop_father);
3657 /* Return phi in E->DEST with ARG on edge E. */
3659 static gphi *
3660 find_phi_with_arg_on_edge (tree arg, edge e)
3662 basic_block bb = e->dest;
3664 for (gphi_iterator gpi = gsi_start_phis (bb);
3665 !gsi_end_p (gpi);
3666 gsi_next (&gpi))
3668 gphi *phi = gpi.phi ();
3669 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3670 return phi;
3673 return NULL;
3676 /* A subroutine of expand_omp_for. Generate code for a parallel
3677 loop with static schedule and a specified chunk size. Given
3678 parameters:
3680 for (V = N1; V cond N2; V += STEP) BODY;
3682 where COND is "<" or ">", we generate pseudocode
3684 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3685 if (cond is <)
3686 adj = STEP - 1;
3687 else
3688 adj = STEP + 1;
3689 if ((__typeof (V)) -1 > 0 && cond is >)
3690 n = -(adj + N2 - N1) / -STEP;
3691 else
3692 n = (adj + N2 - N1) / STEP;
3693 trip = 0;
3694 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3695 here so that V is defined
3696 if the loop is not entered
3698 s0 = (trip * nthreads + threadid) * CHUNK;
3699 e0 = min (s0 + CHUNK, n);
3700 if (s0 < n) goto L1; else goto L4;
3702 V = s0 * STEP + N1;
3703 e = e0 * STEP + N1;
3705 BODY;
3706 V += STEP;
3707 if (V cond e) goto L2; else goto L3;
3709 trip += 1;
3710 goto L0;
3714 static void
3715 expand_omp_for_static_chunk (struct omp_region *region,
3716 struct omp_for_data *fd, gimple *inner_stmt)
3718 tree n, s0, e0, e, t;
3719 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3720 tree type, itype, vmain, vback, vextra;
3721 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3722 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3723 gimple_stmt_iterator gsi;
3724 edge se;
3725 bool broken_loop = region->cont == NULL;
3726 tree *counts = NULL;
3727 tree n1, n2, step;
3729 itype = type = TREE_TYPE (fd->loop.v);
3730 if (POINTER_TYPE_P (type))
3731 itype = signed_type_for (type);
3733 entry_bb = region->entry;
3734 se = split_block (entry_bb, last_stmt (entry_bb));
3735 entry_bb = se->src;
3736 iter_part_bb = se->dest;
3737 cont_bb = region->cont;
3738 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3739 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3740 gcc_assert (broken_loop
3741 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3742 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3743 body_bb = single_succ (seq_start_bb);
3744 if (!broken_loop)
3746 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3747 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3748 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3749 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3751 exit_bb = region->exit;
3753 /* Trip and adjustment setup goes in ENTRY_BB. */
3754 gsi = gsi_last_bb (entry_bb);
3755 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3757 if (fd->collapse > 1)
3759 int first_zero_iter = -1, dummy = -1;
3760 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3762 counts = XALLOCAVEC (tree, fd->collapse);
3763 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3764 fin_bb, first_zero_iter,
3765 dummy_bb, dummy, l2_dom_bb);
3766 t = NULL_TREE;
3768 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3769 t = integer_one_node;
3770 else
3771 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3772 fold_convert (type, fd->loop.n1),
3773 fold_convert (type, fd->loop.n2));
3774 if (fd->collapse == 1
3775 && TYPE_UNSIGNED (type)
3776 && (t == NULL_TREE || !integer_onep (t)))
3778 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3779 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3780 true, GSI_SAME_STMT);
3781 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3782 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3783 true, GSI_SAME_STMT);
3784 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3785 NULL_TREE, NULL_TREE);
3786 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3787 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3788 expand_omp_regimplify_p, NULL, NULL)
3789 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3790 expand_omp_regimplify_p, NULL, NULL))
3792 gsi = gsi_for_stmt (cond_stmt);
3793 gimple_regimplify_operands (cond_stmt, &gsi);
3795 se = split_block (entry_bb, cond_stmt);
3796 se->flags = EDGE_TRUE_VALUE;
3797 entry_bb = se->dest;
3798 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3799 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3800 se->probability = REG_BR_PROB_BASE / 2000 - 1;
3801 if (gimple_in_ssa_p (cfun))
3803 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3804 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3805 !gsi_end_p (gpi); gsi_next (&gpi))
3807 gphi *phi = gpi.phi ();
3808 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3809 se, UNKNOWN_LOCATION);
3812 gsi = gsi_last_bb (entry_bb);
3815 switch (gimple_omp_for_kind (fd->for_stmt))
3817 case GF_OMP_FOR_KIND_FOR:
3818 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3819 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3820 break;
3821 case GF_OMP_FOR_KIND_DISTRIBUTE:
3822 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3823 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3824 break;
3825 default:
3826 gcc_unreachable ();
3828 nthreads = build_call_expr (nthreads, 0);
3829 nthreads = fold_convert (itype, nthreads);
3830 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3831 true, GSI_SAME_STMT);
3832 threadid = build_call_expr (threadid, 0);
3833 threadid = fold_convert (itype, threadid);
3834 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3835 true, GSI_SAME_STMT);
3837 n1 = fd->loop.n1;
3838 n2 = fd->loop.n2;
3839 step = fd->loop.step;
3840 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3842 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3843 OMP_CLAUSE__LOOPTEMP_);
3844 gcc_assert (innerc);
3845 n1 = OMP_CLAUSE_DECL (innerc);
3846 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3847 OMP_CLAUSE__LOOPTEMP_);
3848 gcc_assert (innerc);
3849 n2 = OMP_CLAUSE_DECL (innerc);
3851 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3852 true, NULL_TREE, true, GSI_SAME_STMT);
3853 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3854 true, NULL_TREE, true, GSI_SAME_STMT);
3855 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3856 true, NULL_TREE, true, GSI_SAME_STMT);
3857 tree chunk_size = fold_convert (itype, fd->chunk_size);
3858 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3859 chunk_size
3860 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3861 GSI_SAME_STMT);
3863 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3864 t = fold_build2 (PLUS_EXPR, itype, step, t);
3865 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3866 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3867 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3868 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3869 fold_build1 (NEGATE_EXPR, itype, t),
3870 fold_build1 (NEGATE_EXPR, itype, step));
3871 else
3872 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3873 t = fold_convert (itype, t);
3874 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3875 true, GSI_SAME_STMT);
3877 trip_var = create_tmp_reg (itype, ".trip");
3878 if (gimple_in_ssa_p (cfun))
3880 trip_init = make_ssa_name (trip_var);
3881 trip_main = make_ssa_name (trip_var);
3882 trip_back = make_ssa_name (trip_var);
3884 else
3886 trip_init = trip_var;
3887 trip_main = trip_var;
3888 trip_back = trip_var;
3891 gassign *assign_stmt
3892 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3893 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3895 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3896 t = fold_build2 (MULT_EXPR, itype, t, step);
3897 if (POINTER_TYPE_P (type))
3898 t = fold_build_pointer_plus (n1, t);
3899 else
3900 t = fold_build2 (PLUS_EXPR, type, t, n1);
3901 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3902 true, GSI_SAME_STMT);
3904 /* Remove the GIMPLE_OMP_FOR. */
3905 gsi_remove (&gsi, true);
3907 gimple_stmt_iterator gsif = gsi;
3909 /* Iteration space partitioning goes in ITER_PART_BB. */
3910 gsi = gsi_last_bb (iter_part_bb);
3912 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3913 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3914 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3915 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3916 false, GSI_CONTINUE_LINKING);
3918 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3919 t = fold_build2 (MIN_EXPR, itype, t, n);
3920 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921 false, GSI_CONTINUE_LINKING);
3923 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3924 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3926 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3927 gsi = gsi_start_bb (seq_start_bb);
3929 tree startvar = fd->loop.v;
3930 tree endvar = NULL_TREE;
3932 if (gimple_omp_for_combined_p (fd->for_stmt))
3934 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3935 ? gimple_omp_parallel_clauses (inner_stmt)
3936 : gimple_omp_for_clauses (inner_stmt);
3937 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3938 gcc_assert (innerc);
3939 startvar = OMP_CLAUSE_DECL (innerc);
3940 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3941 OMP_CLAUSE__LOOPTEMP_);
3942 gcc_assert (innerc);
3943 endvar = OMP_CLAUSE_DECL (innerc);
3944 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3945 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3947 int i;
3948 for (i = 1; i < fd->collapse; i++)
3950 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3951 OMP_CLAUSE__LOOPTEMP_);
3952 gcc_assert (innerc);
3954 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3955 OMP_CLAUSE__LOOPTEMP_);
3956 if (innerc)
3958 /* If needed (distribute parallel for with lastprivate),
3959 propagate down the total number of iterations. */
3960 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3961 fd->loop.n2);
3962 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3963 GSI_CONTINUE_LINKING);
3964 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3965 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3970 t = fold_convert (itype, s0);
3971 t = fold_build2 (MULT_EXPR, itype, t, step);
3972 if (POINTER_TYPE_P (type))
3973 t = fold_build_pointer_plus (n1, t);
3974 else
3975 t = fold_build2 (PLUS_EXPR, type, t, n1);
3976 t = fold_convert (TREE_TYPE (startvar), t);
3977 t = force_gimple_operand_gsi (&gsi, t,
3978 DECL_P (startvar)
3979 && TREE_ADDRESSABLE (startvar),
3980 NULL_TREE, false, GSI_CONTINUE_LINKING);
3981 assign_stmt = gimple_build_assign (startvar, t);
3982 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3984 t = fold_convert (itype, e0);
3985 t = fold_build2 (MULT_EXPR, itype, t, step);
3986 if (POINTER_TYPE_P (type))
3987 t = fold_build_pointer_plus (n1, t);
3988 else
3989 t = fold_build2 (PLUS_EXPR, type, t, n1);
3990 t = fold_convert (TREE_TYPE (startvar), t);
3991 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3992 false, GSI_CONTINUE_LINKING);
3993 if (endvar)
3995 assign_stmt = gimple_build_assign (endvar, e);
3996 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3997 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3998 assign_stmt = gimple_build_assign (fd->loop.v, e);
3999 else
4000 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4001 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4003 /* Handle linear clause adjustments. */
4004 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4005 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4006 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4007 c; c = OMP_CLAUSE_CHAIN (c))
4008 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4009 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4011 tree d = OMP_CLAUSE_DECL (c);
4012 bool is_ref = omp_is_reference (d);
4013 tree t = d, a, dest;
4014 if (is_ref)
4015 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4016 tree type = TREE_TYPE (t);
4017 if (POINTER_TYPE_P (type))
4018 type = sizetype;
4019 dest = unshare_expr (t);
4020 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4021 expand_omp_build_assign (&gsif, v, t);
4022 if (itercnt == NULL_TREE)
4024 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4026 itercntbias
4027 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4028 fold_convert (itype, fd->loop.n1));
4029 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4030 itercntbias, step);
4031 itercntbias
4032 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4033 NULL_TREE, true,
4034 GSI_SAME_STMT);
4035 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4036 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4037 NULL_TREE, false,
4038 GSI_CONTINUE_LINKING);
4040 else
4041 itercnt = s0;
4043 a = fold_build2 (MULT_EXPR, type,
4044 fold_convert (type, itercnt),
4045 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4046 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4047 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4048 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4049 false, GSI_CONTINUE_LINKING);
4050 assign_stmt = gimple_build_assign (dest, t);
4051 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4053 if (fd->collapse > 1)
4054 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4056 if (!broken_loop)
4058 /* The code controlling the sequential loop goes in CONT_BB,
4059 replacing the GIMPLE_OMP_CONTINUE. */
4060 gsi = gsi_last_bb (cont_bb);
4061 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4062 vmain = gimple_omp_continue_control_use (cont_stmt);
4063 vback = gimple_omp_continue_control_def (cont_stmt);
4065 if (!gimple_omp_for_combined_p (fd->for_stmt))
4067 if (POINTER_TYPE_P (type))
4068 t = fold_build_pointer_plus (vmain, step);
4069 else
4070 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4071 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4072 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4073 true, GSI_SAME_STMT);
4074 assign_stmt = gimple_build_assign (vback, t);
4075 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4077 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4078 t = build2 (EQ_EXPR, boolean_type_node,
4079 build_int_cst (itype, 0),
4080 build_int_cst (itype, 1));
4081 else
4082 t = build2 (fd->loop.cond_code, boolean_type_node,
4083 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4084 ? t : vback, e);
4085 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4088 /* Remove GIMPLE_OMP_CONTINUE. */
4089 gsi_remove (&gsi, true);
4091 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4092 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4094 /* Trip update code goes into TRIP_UPDATE_BB. */
4095 gsi = gsi_start_bb (trip_update_bb);
4097 t = build_int_cst (itype, 1);
4098 t = build2 (PLUS_EXPR, itype, trip_main, t);
4099 assign_stmt = gimple_build_assign (trip_back, t);
4100 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4103 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4104 gsi = gsi_last_bb (exit_bb);
4105 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4107 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4108 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4110 gsi_remove (&gsi, true);
4112 /* Connect the new blocks. */
4113 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4114 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4116 if (!broken_loop)
4118 se = find_edge (cont_bb, body_bb);
4119 if (se == NULL)
4121 se = BRANCH_EDGE (cont_bb);
4122 gcc_assert (single_succ (se->dest) == body_bb);
4124 if (gimple_omp_for_combined_p (fd->for_stmt))
4126 remove_edge (se);
4127 se = NULL;
4129 else if (fd->collapse > 1)
4131 remove_edge (se);
4132 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4134 else
4135 se->flags = EDGE_TRUE_VALUE;
4136 find_edge (cont_bb, trip_update_bb)->flags
4137 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4139 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4140 iter_part_bb);
4143 if (gimple_in_ssa_p (cfun))
4145 gphi_iterator psi;
4146 gphi *phi;
4147 edge re, ene;
4148 edge_var_map *vm;
4149 size_t i;
4151 gcc_assert (fd->collapse == 1 && !broken_loop);
4153 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4154 remove arguments of the phi nodes in fin_bb. We need to create
4155 appropriate phi nodes in iter_part_bb instead. */
4156 se = find_edge (iter_part_bb, fin_bb);
4157 re = single_succ_edge (trip_update_bb);
4158 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4159 ene = single_succ_edge (entry_bb);
4161 psi = gsi_start_phis (fin_bb);
4162 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4163 gsi_next (&psi), ++i)
4165 gphi *nphi;
4166 source_location locus;
4168 phi = psi.phi ();
4169 t = gimple_phi_result (phi);
4170 gcc_assert (t == redirect_edge_var_map_result (vm));
4172 if (!single_pred_p (fin_bb))
4173 t = copy_ssa_name (t, phi);
4175 nphi = create_phi_node (t, iter_part_bb);
4177 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4178 locus = gimple_phi_arg_location_from_edge (phi, se);
4180 /* A special case -- fd->loop.v is not yet computed in
4181 iter_part_bb, we need to use vextra instead. */
4182 if (t == fd->loop.v)
4183 t = vextra;
4184 add_phi_arg (nphi, t, ene, locus);
4185 locus = redirect_edge_var_map_location (vm);
4186 tree back_arg = redirect_edge_var_map_def (vm);
4187 add_phi_arg (nphi, back_arg, re, locus);
4188 edge ce = find_edge (cont_bb, body_bb);
4189 if (ce == NULL)
4191 ce = BRANCH_EDGE (cont_bb);
4192 gcc_assert (single_succ (ce->dest) == body_bb);
4193 ce = single_succ_edge (ce->dest);
4195 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4196 gcc_assert (inner_loop_phi != NULL);
4197 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4198 find_edge (seq_start_bb, body_bb), locus);
4200 if (!single_pred_p (fin_bb))
4201 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4203 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4204 redirect_edge_var_map_clear (re);
4205 if (single_pred_p (fin_bb))
4206 while (1)
4208 psi = gsi_start_phis (fin_bb);
4209 if (gsi_end_p (psi))
4210 break;
4211 remove_phi_node (&psi, false);
4214 /* Make phi node for trip. */
4215 phi = create_phi_node (trip_main, iter_part_bb);
4216 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4217 UNKNOWN_LOCATION);
4218 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4219 UNKNOWN_LOCATION);
4222 if (!broken_loop)
4223 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4224 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4225 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4226 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4227 recompute_dominator (CDI_DOMINATORS, fin_bb));
4228 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4229 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4230 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4231 recompute_dominator (CDI_DOMINATORS, body_bb));
4233 if (!broken_loop)
4235 struct loop *loop = body_bb->loop_father;
4236 struct loop *trip_loop = alloc_loop ();
4237 trip_loop->header = iter_part_bb;
4238 trip_loop->latch = trip_update_bb;
4239 add_loop (trip_loop, iter_part_bb->loop_father);
4241 if (loop != entry_bb->loop_father)
4243 gcc_assert (loop->header == body_bb);
4244 gcc_assert (loop->latch == region->cont
4245 || single_pred (loop->latch) == region->cont);
4246 trip_loop->inner = loop;
4247 return;
4250 if (!gimple_omp_for_combined_p (fd->for_stmt))
4252 loop = alloc_loop ();
4253 loop->header = body_bb;
4254 if (collapse_bb == NULL)
4255 loop->latch = cont_bb;
4256 add_loop (loop, trip_loop);
4261 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4262 Given parameters:
4263 for (V = N1; V cond N2; V += STEP) BODY;
4265 where COND is "<" or ">" or "!=", we generate pseudocode
4267 for (ind_var = low; ind_var < high; ind_var++)
4269 V = n1 + (ind_var * STEP)
4271 <BODY>
4274 In the above pseudocode, low and high are function parameters of the
4275 child function. In the function below, we are inserting a temp.
4276 variable that will be making a call to two OMP functions that will not be
4277 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4278 with _Cilk_for). These functions are replaced with low and high
4279 by the function that handles taskreg. */
4282 static void
4283 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4285 bool broken_loop = region->cont == NULL;
4286 basic_block entry_bb = region->entry;
4287 basic_block cont_bb = region->cont;
4289 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4290 gcc_assert (broken_loop
4291 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4292 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4293 basic_block l1_bb, l2_bb;
4295 if (!broken_loop)
4297 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4298 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4299 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4300 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4302 else
4304 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4305 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4306 l2_bb = single_succ (l1_bb);
4308 basic_block exit_bb = region->exit;
4309 basic_block l2_dom_bb = NULL;
4311 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4313 /* Below statements until the "tree high_val = ..." are pseudo statements
4314 used to pass information to be used by expand_omp_taskreg.
4315 low_val and high_val will be replaced by the __low and __high
4316 parameter from the child function.
4318 The call_exprs part is a place-holder, it is mainly used
4319 to distinctly identify to the top-level part that this is
4320 where we should put low and high (reasoning given in header
4321 comment). */
4323 gomp_parallel *par_stmt
4324 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4325 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4326 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4327 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4329 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4330 high_val = t;
4331 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4332 low_val = t;
4334 gcc_assert (low_val && high_val);
4336 tree type = TREE_TYPE (low_val);
4337 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4338 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4340 /* Not needed in SSA form right now. */
4341 gcc_assert (!gimple_in_ssa_p (cfun));
4342 if (l2_dom_bb == NULL)
4343 l2_dom_bb = l1_bb;
4345 tree n1 = low_val;
4346 tree n2 = high_val;
4348 gimple *stmt = gimple_build_assign (ind_var, n1);
4350 /* Replace the GIMPLE_OMP_FOR statement. */
4351 gsi_replace (&gsi, stmt, true);
4353 if (!broken_loop)
4355 /* Code to control the increment goes in the CONT_BB. */
4356 gsi = gsi_last_bb (cont_bb);
4357 stmt = gsi_stmt (gsi);
4358 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4359 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4360 build_one_cst (type));
4362 /* Replace GIMPLE_OMP_CONTINUE. */
4363 gsi_replace (&gsi, stmt, true);
4366 /* Emit the condition in L1_BB. */
4367 gsi = gsi_after_labels (l1_bb);
4368 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4369 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4370 fd->loop.step);
4371 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4372 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4373 fd->loop.n1, fold_convert (sizetype, t));
4374 else
4375 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4376 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4377 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4378 expand_omp_build_assign (&gsi, fd->loop.v, t);
4380 /* The condition is always '<' since the runtime will fill in the low
4381 and high values. */
4382 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4383 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4385 /* Remove GIMPLE_OMP_RETURN. */
4386 gsi = gsi_last_bb (exit_bb);
4387 gsi_remove (&gsi, true);
4389 /* Connect the new blocks. */
4390 remove_edge (FALLTHRU_EDGE (entry_bb));
4392 edge e, ne;
4393 if (!broken_loop)
4395 remove_edge (BRANCH_EDGE (entry_bb));
4396 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4398 e = BRANCH_EDGE (l1_bb);
4399 ne = FALLTHRU_EDGE (l1_bb);
4400 e->flags = EDGE_TRUE_VALUE;
4402 else
4404 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4406 ne = single_succ_edge (l1_bb);
4407 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4410 ne->flags = EDGE_FALSE_VALUE;
4411 e->probability = REG_BR_PROB_BASE * 7 / 8;
4412 ne->probability = REG_BR_PROB_BASE / 8;
4414 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4415 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4416 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4418 if (!broken_loop)
4420 struct loop *loop = alloc_loop ();
4421 loop->header = l1_bb;
4422 loop->latch = cont_bb;
4423 add_loop (loop, l1_bb->loop_father);
4424 loop->safelen = INT_MAX;
4427 /* Pick the correct library function based on the precision of the
4428 induction variable type. */
4429 tree lib_fun = NULL_TREE;
4430 if (TYPE_PRECISION (type) == 32)
4431 lib_fun = cilk_for_32_fndecl;
4432 else if (TYPE_PRECISION (type) == 64)
4433 lib_fun = cilk_for_64_fndecl;
4434 else
4435 gcc_unreachable ();
4437 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4439 /* WS_ARGS contains the library function flavor to call:
4440 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4441 user-defined grain value. If the user does not define one, then zero
4442 is passed in by the parser. */
4443 vec_alloc (region->ws_args, 2);
4444 region->ws_args->quick_push (lib_fun);
4445 region->ws_args->quick_push (fd->chunk_size);
4448 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4449 loop. Given parameters:
4451 for (V = N1; V cond N2; V += STEP) BODY;
4453 where COND is "<" or ">", we generate pseudocode
4455 V = N1;
4456 goto L1;
4458 BODY;
4459 V += STEP;
4461 if (V cond N2) goto L0; else goto L2;
4464 For collapsed loops, given parameters:
4465 collapse(3)
4466 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4467 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4468 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4469 BODY;
4471 we generate pseudocode
4473 if (cond3 is <)
4474 adj = STEP3 - 1;
4475 else
4476 adj = STEP3 + 1;
4477 count3 = (adj + N32 - N31) / STEP3;
4478 if (cond2 is <)
4479 adj = STEP2 - 1;
4480 else
4481 adj = STEP2 + 1;
4482 count2 = (adj + N22 - N21) / STEP2;
4483 if (cond1 is <)
4484 adj = STEP1 - 1;
4485 else
4486 adj = STEP1 + 1;
4487 count1 = (adj + N12 - N11) / STEP1;
4488 count = count1 * count2 * count3;
4489 V = 0;
4490 V1 = N11;
4491 V2 = N21;
4492 V3 = N31;
4493 goto L1;
4495 BODY;
4496 V += 1;
4497 V3 += STEP3;
4498 V2 += (V3 cond3 N32) ? 0 : STEP2;
4499 V3 = (V3 cond3 N32) ? V3 : N31;
4500 V1 += (V2 cond2 N22) ? 0 : STEP1;
4501 V2 = (V2 cond2 N22) ? V2 : N21;
4503 if (V < count) goto L0; else goto L2;
4508 static void
4509 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4511 tree type, t;
4512 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4513 gimple_stmt_iterator gsi;
4514 gimple *stmt;
4515 gcond *cond_stmt;
4516 bool broken_loop = region->cont == NULL;
4517 edge e, ne;
4518 tree *counts = NULL;
4519 int i;
4520 int safelen_int = INT_MAX;
4521 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4522 OMP_CLAUSE_SAFELEN);
4523 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4524 OMP_CLAUSE__SIMDUID_);
4525 tree n1, n2;
4527 if (safelen)
4529 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4530 if (TREE_CODE (safelen) != INTEGER_CST)
4531 safelen_int = 0;
4532 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4533 safelen_int = tree_to_uhwi (safelen);
4534 if (safelen_int == 1)
4535 safelen_int = 0;
4537 type = TREE_TYPE (fd->loop.v);
4538 entry_bb = region->entry;
4539 cont_bb = region->cont;
4540 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4541 gcc_assert (broken_loop
4542 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4543 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4544 if (!broken_loop)
4546 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4547 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4548 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4549 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4551 else
4553 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4554 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4555 l2_bb = single_succ (l1_bb);
4557 exit_bb = region->exit;
4558 l2_dom_bb = NULL;
4560 gsi = gsi_last_bb (entry_bb);
4562 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4563 /* Not needed in SSA form right now. */
4564 gcc_assert (!gimple_in_ssa_p (cfun));
4565 if (fd->collapse > 1)
4567 int first_zero_iter = -1, dummy = -1;
4568 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4570 counts = XALLOCAVEC (tree, fd->collapse);
4571 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4572 zero_iter_bb, first_zero_iter,
4573 dummy_bb, dummy, l2_dom_bb);
4575 if (l2_dom_bb == NULL)
4576 l2_dom_bb = l1_bb;
4578 n1 = fd->loop.n1;
4579 n2 = fd->loop.n2;
4580 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4582 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4583 OMP_CLAUSE__LOOPTEMP_);
4584 gcc_assert (innerc);
4585 n1 = OMP_CLAUSE_DECL (innerc);
4586 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4587 OMP_CLAUSE__LOOPTEMP_);
4588 gcc_assert (innerc);
4589 n2 = OMP_CLAUSE_DECL (innerc);
4591 tree step = fd->loop.step;
4593 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4594 OMP_CLAUSE__SIMT_);
4595 if (is_simt)
4597 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4598 is_simt = safelen_int > 1;
4600 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4601 if (is_simt)
4603 simt_lane = create_tmp_var (unsigned_type_node);
4604 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4605 gimple_call_set_lhs (g, simt_lane);
4606 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4607 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4608 fold_convert (TREE_TYPE (step), simt_lane));
4609 n1 = fold_convert (type, n1);
4610 if (POINTER_TYPE_P (type))
4611 n1 = fold_build_pointer_plus (n1, offset);
4612 else
4613 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4615 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4616 if (fd->collapse > 1)
4617 simt_maxlane = build_one_cst (unsigned_type_node);
4618 else if (safelen_int < omp_max_simt_vf ())
4619 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4620 tree vf
4621 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4622 unsigned_type_node, 0);
4623 if (simt_maxlane)
4624 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4625 vf = fold_convert (TREE_TYPE (step), vf);
4626 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4629 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4630 if (fd->collapse > 1)
4632 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4634 gsi_prev (&gsi);
4635 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4636 gsi_next (&gsi);
4638 else
4639 for (i = 0; i < fd->collapse; i++)
4641 tree itype = TREE_TYPE (fd->loops[i].v);
4642 if (POINTER_TYPE_P (itype))
4643 itype = signed_type_for (itype);
4644 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4645 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4649 /* Remove the GIMPLE_OMP_FOR statement. */
4650 gsi_remove (&gsi, true);
4652 if (!broken_loop)
4654 /* Code to control the increment goes in the CONT_BB. */
4655 gsi = gsi_last_bb (cont_bb);
4656 stmt = gsi_stmt (gsi);
4657 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4659 if (POINTER_TYPE_P (type))
4660 t = fold_build_pointer_plus (fd->loop.v, step);
4661 else
4662 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4663 expand_omp_build_assign (&gsi, fd->loop.v, t);
4665 if (fd->collapse > 1)
4667 i = fd->collapse - 1;
4668 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4670 t = fold_convert (sizetype, fd->loops[i].step);
4671 t = fold_build_pointer_plus (fd->loops[i].v, t);
4673 else
4675 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4676 fd->loops[i].step);
4677 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4678 fd->loops[i].v, t);
4680 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4682 for (i = fd->collapse - 1; i > 0; i--)
4684 tree itype = TREE_TYPE (fd->loops[i].v);
4685 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4686 if (POINTER_TYPE_P (itype2))
4687 itype2 = signed_type_for (itype2);
4688 t = build3 (COND_EXPR, itype2,
4689 build2 (fd->loops[i].cond_code, boolean_type_node,
4690 fd->loops[i].v,
4691 fold_convert (itype, fd->loops[i].n2)),
4692 build_int_cst (itype2, 0),
4693 fold_convert (itype2, fd->loops[i - 1].step));
4694 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4695 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4696 else
4697 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4698 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4700 t = build3 (COND_EXPR, itype,
4701 build2 (fd->loops[i].cond_code, boolean_type_node,
4702 fd->loops[i].v,
4703 fold_convert (itype, fd->loops[i].n2)),
4704 fd->loops[i].v,
4705 fold_convert (itype, fd->loops[i].n1));
4706 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4710 /* Remove GIMPLE_OMP_CONTINUE. */
4711 gsi_remove (&gsi, true);
4714 /* Emit the condition in L1_BB. */
4715 gsi = gsi_start_bb (l1_bb);
4717 t = fold_convert (type, n2);
4718 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4719 false, GSI_CONTINUE_LINKING);
4720 tree v = fd->loop.v;
4721 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4722 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4723 false, GSI_CONTINUE_LINKING);
4724 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4725 cond_stmt = gimple_build_cond_empty (t);
4726 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4727 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4728 NULL, NULL)
4729 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4730 NULL, NULL))
4732 gsi = gsi_for_stmt (cond_stmt);
4733 gimple_regimplify_operands (cond_stmt, &gsi);
4736 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4737 if (is_simt)
4739 gsi = gsi_start_bb (l2_bb);
4740 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4741 if (POINTER_TYPE_P (type))
4742 t = fold_build_pointer_plus (fd->loop.v, step);
4743 else
4744 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4745 expand_omp_build_assign (&gsi, fd->loop.v, t);
4748 /* Remove GIMPLE_OMP_RETURN. */
4749 gsi = gsi_last_bb (exit_bb);
4750 gsi_remove (&gsi, true);
4752 /* Connect the new blocks. */
4753 remove_edge (FALLTHRU_EDGE (entry_bb));
4755 if (!broken_loop)
4757 remove_edge (BRANCH_EDGE (entry_bb));
4758 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4760 e = BRANCH_EDGE (l1_bb);
4761 ne = FALLTHRU_EDGE (l1_bb);
4762 e->flags = EDGE_TRUE_VALUE;
4764 else
4766 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4768 ne = single_succ_edge (l1_bb);
4769 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4772 ne->flags = EDGE_FALSE_VALUE;
4773 e->probability = REG_BR_PROB_BASE * 7 / 8;
4774 ne->probability = REG_BR_PROB_BASE / 8;
4776 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4777 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4779 if (simt_maxlane)
4781 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4782 NULL_TREE, NULL_TREE);
4783 gsi = gsi_last_bb (entry_bb);
4784 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4785 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4786 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4787 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4788 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4789 l2_dom_bb = entry_bb;
4791 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4793 if (!broken_loop)
4795 struct loop *loop = alloc_loop ();
4796 loop->header = l1_bb;
4797 loop->latch = cont_bb;
4798 add_loop (loop, l1_bb->loop_father);
4799 loop->safelen = safelen_int;
4800 if (simduid)
4802 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4803 cfun->has_simduid_loops = true;
4805 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4806 the loop. */
4807 if ((flag_tree_loop_vectorize
4808 || (!global_options_set.x_flag_tree_loop_vectorize
4809 && !global_options_set.x_flag_tree_vectorize))
4810 && flag_tree_loop_optimize
4811 && loop->safelen > 1)
4813 loop->force_vectorize = true;
4814 cfun->has_force_vectorize_loops = true;
4817 else if (simduid)
4818 cfun->has_simduid_loops = true;
4821 /* Taskloop construct is represented after gimplification with
4822 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4823 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4824 which should just compute all the needed loop temporaries
4825 for GIMPLE_OMP_TASK. */
4827 static void
4828 expand_omp_taskloop_for_outer (struct omp_region *region,
4829 struct omp_for_data *fd,
4830 gimple *inner_stmt)
4832 tree type, bias = NULL_TREE;
4833 basic_block entry_bb, cont_bb, exit_bb;
4834 gimple_stmt_iterator gsi;
4835 gassign *assign_stmt;
4836 tree *counts = NULL;
4837 int i;
4839 gcc_assert (inner_stmt);
4840 gcc_assert (region->cont);
4841 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4842 && gimple_omp_task_taskloop_p (inner_stmt));
4843 type = TREE_TYPE (fd->loop.v);
4845 /* See if we need to bias by LLONG_MIN. */
4846 if (fd->iter_type == long_long_unsigned_type_node
4847 && TREE_CODE (type) == INTEGER_TYPE
4848 && !TYPE_UNSIGNED (type))
4850 tree n1, n2;
4852 if (fd->loop.cond_code == LT_EXPR)
4854 n1 = fd->loop.n1;
4855 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4857 else
4859 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4860 n2 = fd->loop.n1;
4862 if (TREE_CODE (n1) != INTEGER_CST
4863 || TREE_CODE (n2) != INTEGER_CST
4864 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4865 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4868 entry_bb = region->entry;
4869 cont_bb = region->cont;
4870 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4871 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4872 exit_bb = region->exit;
4874 gsi = gsi_last_bb (entry_bb);
4875 gimple *for_stmt = gsi_stmt (gsi);
4876 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4877 if (fd->collapse > 1)
4879 int first_zero_iter = -1, dummy = -1;
4880 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4882 counts = XALLOCAVEC (tree, fd->collapse);
4883 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4884 zero_iter_bb, first_zero_iter,
4885 dummy_bb, dummy, l2_dom_bb);
4887 if (zero_iter_bb)
4889 /* Some counts[i] vars might be uninitialized if
4890 some loop has zero iterations. But the body shouldn't
4891 be executed in that case, so just avoid uninit warnings. */
4892 for (i = first_zero_iter; i < fd->collapse; i++)
4893 if (SSA_VAR_P (counts[i]))
4894 TREE_NO_WARNING (counts[i]) = 1;
4895 gsi_prev (&gsi);
4896 edge e = split_block (entry_bb, gsi_stmt (gsi));
4897 entry_bb = e->dest;
4898 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4899 gsi = gsi_last_bb (entry_bb);
4900 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4901 get_immediate_dominator (CDI_DOMINATORS,
4902 zero_iter_bb));
4906 tree t0, t1;
4907 t1 = fd->loop.n2;
4908 t0 = fd->loop.n1;
4909 if (POINTER_TYPE_P (TREE_TYPE (t0))
4910 && TYPE_PRECISION (TREE_TYPE (t0))
4911 != TYPE_PRECISION (fd->iter_type))
4913 /* Avoid casting pointers to integer of a different size. */
4914 tree itype = signed_type_for (type);
4915 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4916 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4918 else
4920 t1 = fold_convert (fd->iter_type, t1);
4921 t0 = fold_convert (fd->iter_type, t0);
4923 if (bias)
4925 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4926 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4929 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4930 OMP_CLAUSE__LOOPTEMP_);
4931 gcc_assert (innerc);
4932 tree startvar = OMP_CLAUSE_DECL (innerc);
4933 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4934 gcc_assert (innerc);
4935 tree endvar = OMP_CLAUSE_DECL (innerc);
4936 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4938 gcc_assert (innerc);
4939 for (i = 1; i < fd->collapse; i++)
4941 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4942 OMP_CLAUSE__LOOPTEMP_);
4943 gcc_assert (innerc);
4945 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4946 OMP_CLAUSE__LOOPTEMP_);
4947 if (innerc)
4949 /* If needed (inner taskloop has lastprivate clause), propagate
4950 down the total number of iterations. */
4951 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4952 NULL_TREE, false,
4953 GSI_CONTINUE_LINKING);
4954 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4955 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4959 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4960 GSI_CONTINUE_LINKING);
4961 assign_stmt = gimple_build_assign (startvar, t0);
4962 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4964 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4965 GSI_CONTINUE_LINKING);
4966 assign_stmt = gimple_build_assign (endvar, t1);
4967 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4968 if (fd->collapse > 1)
4969 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4971 /* Remove the GIMPLE_OMP_FOR statement. */
4972 gsi = gsi_for_stmt (for_stmt);
4973 gsi_remove (&gsi, true);
4975 gsi = gsi_last_bb (cont_bb);
4976 gsi_remove (&gsi, true);
4978 gsi = gsi_last_bb (exit_bb);
4979 gsi_remove (&gsi, true);
4981 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
4982 remove_edge (BRANCH_EDGE (entry_bb));
4983 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
4984 remove_edge (BRANCH_EDGE (cont_bb));
4985 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4986 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4987 recompute_dominator (CDI_DOMINATORS, region->entry));
4990 /* Taskloop construct is represented after gimplification with
4991 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4992 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4993 GOMP_taskloop{,_ull} function arranges for each task to be given just
4994 a single range of iterations. */
4996 static void
4997 expand_omp_taskloop_for_inner (struct omp_region *region,
4998 struct omp_for_data *fd,
4999 gimple *inner_stmt)
5001 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5002 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5003 basic_block fin_bb;
5004 gimple_stmt_iterator gsi;
5005 edge ep;
5006 bool broken_loop = region->cont == NULL;
5007 tree *counts = NULL;
5008 tree n1, n2, step;
5010 itype = type = TREE_TYPE (fd->loop.v);
5011 if (POINTER_TYPE_P (type))
5012 itype = signed_type_for (type);
5014 /* See if we need to bias by LLONG_MIN. */
5015 if (fd->iter_type == long_long_unsigned_type_node
5016 && TREE_CODE (type) == INTEGER_TYPE
5017 && !TYPE_UNSIGNED (type))
5019 tree n1, n2;
5021 if (fd->loop.cond_code == LT_EXPR)
5023 n1 = fd->loop.n1;
5024 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5026 else
5028 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5029 n2 = fd->loop.n1;
5031 if (TREE_CODE (n1) != INTEGER_CST
5032 || TREE_CODE (n2) != INTEGER_CST
5033 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5034 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5037 entry_bb = region->entry;
5038 cont_bb = region->cont;
5039 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5040 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5041 gcc_assert (broken_loop
5042 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5043 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5044 if (!broken_loop)
5046 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5047 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5049 exit_bb = region->exit;
5051 /* Iteration space partitioning goes in ENTRY_BB. */
5052 gsi = gsi_last_bb (entry_bb);
5053 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5055 if (fd->collapse > 1)
5057 int first_zero_iter = -1, dummy = -1;
5058 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5060 counts = XALLOCAVEC (tree, fd->collapse);
5061 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5062 fin_bb, first_zero_iter,
5063 dummy_bb, dummy, l2_dom_bb);
5064 t = NULL_TREE;
5066 else
5067 t = integer_one_node;
5069 step = fd->loop.step;
5070 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5071 OMP_CLAUSE__LOOPTEMP_);
5072 gcc_assert (innerc);
5073 n1 = OMP_CLAUSE_DECL (innerc);
5074 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5075 gcc_assert (innerc);
5076 n2 = OMP_CLAUSE_DECL (innerc);
5077 if (bias)
5079 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5080 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5082 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5083 true, NULL_TREE, true, GSI_SAME_STMT);
5084 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5085 true, NULL_TREE, true, GSI_SAME_STMT);
5086 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5087 true, NULL_TREE, true, GSI_SAME_STMT);
5089 tree startvar = fd->loop.v;
5090 tree endvar = NULL_TREE;
5092 if (gimple_omp_for_combined_p (fd->for_stmt))
5094 tree clauses = gimple_omp_for_clauses (inner_stmt);
5095 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5096 gcc_assert (innerc);
5097 startvar = OMP_CLAUSE_DECL (innerc);
5098 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5099 OMP_CLAUSE__LOOPTEMP_);
5100 gcc_assert (innerc);
5101 endvar = OMP_CLAUSE_DECL (innerc);
5103 t = fold_convert (TREE_TYPE (startvar), n1);
5104 t = force_gimple_operand_gsi (&gsi, t,
5105 DECL_P (startvar)
5106 && TREE_ADDRESSABLE (startvar),
5107 NULL_TREE, false, GSI_CONTINUE_LINKING);
5108 gimple *assign_stmt = gimple_build_assign (startvar, t);
5109 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5111 t = fold_convert (TREE_TYPE (startvar), n2);
5112 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5113 false, GSI_CONTINUE_LINKING);
5114 if (endvar)
5116 assign_stmt = gimple_build_assign (endvar, e);
5117 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5118 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5119 assign_stmt = gimple_build_assign (fd->loop.v, e);
5120 else
5121 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5122 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5124 if (fd->collapse > 1)
5125 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5127 if (!broken_loop)
5129 /* The code controlling the sequential loop replaces the
5130 GIMPLE_OMP_CONTINUE. */
5131 gsi = gsi_last_bb (cont_bb);
5132 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5133 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5134 vmain = gimple_omp_continue_control_use (cont_stmt);
5135 vback = gimple_omp_continue_control_def (cont_stmt);
5137 if (!gimple_omp_for_combined_p (fd->for_stmt))
5139 if (POINTER_TYPE_P (type))
5140 t = fold_build_pointer_plus (vmain, step);
5141 else
5142 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5143 t = force_gimple_operand_gsi (&gsi, t,
5144 DECL_P (vback)
5145 && TREE_ADDRESSABLE (vback),
5146 NULL_TREE, true, GSI_SAME_STMT);
5147 assign_stmt = gimple_build_assign (vback, t);
5148 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5150 t = build2 (fd->loop.cond_code, boolean_type_node,
5151 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5152 ? t : vback, e);
5153 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5156 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5157 gsi_remove (&gsi, true);
5159 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5160 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5163 /* Remove the GIMPLE_OMP_FOR statement. */
5164 gsi = gsi_for_stmt (fd->for_stmt);
5165 gsi_remove (&gsi, true);
5167 /* Remove the GIMPLE_OMP_RETURN statement. */
5168 gsi = gsi_last_bb (exit_bb);
5169 gsi_remove (&gsi, true);
5171 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5172 if (!broken_loop)
5173 remove_edge (BRANCH_EDGE (entry_bb));
5174 else
5176 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5177 region->outer->cont = NULL;
5180 /* Connect all the blocks. */
5181 if (!broken_loop)
5183 ep = find_edge (cont_bb, body_bb);
5184 if (gimple_omp_for_combined_p (fd->for_stmt))
5186 remove_edge (ep);
5187 ep = NULL;
5189 else if (fd->collapse > 1)
5191 remove_edge (ep);
5192 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5194 else
5195 ep->flags = EDGE_TRUE_VALUE;
5196 find_edge (cont_bb, fin_bb)->flags
5197 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5200 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5201 recompute_dominator (CDI_DOMINATORS, body_bb));
5202 if (!broken_loop)
5203 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5204 recompute_dominator (CDI_DOMINATORS, fin_bb));
5206 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5208 struct loop *loop = alloc_loop ();
5209 loop->header = body_bb;
5210 if (collapse_bb == NULL)
5211 loop->latch = cont_bb;
5212 add_loop (loop, body_bb->loop_father);
5216 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5217 partitioned loop. The lowering here is abstracted, in that the
5218 loop parameters are passed through internal functions, which are
5219 further lowered by oacc_device_lower, once we get to the target
5220 compiler. The loop is of the form:
5222 for (V = B; V LTGT E; V += S) {BODY}
5224 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5225 (constant 0 for no chunking) and we will have a GWV partitioning
5226 mask, specifying dimensions over which the loop is to be
5227 partitioned (see note below). We generate code that looks like:
5229 <entry_bb> [incoming FALL->body, BRANCH->exit]
5230 typedef signedintify (typeof (V)) T; // underlying signed integral type
5231 T range = E - B;
5232 T chunk_no = 0;
5233 T DIR = LTGT == '<' ? +1 : -1;
5234 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5235 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5237 <head_bb> [created by splitting end of entry_bb]
5238 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5239 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5240 if (!(offset LTGT bound)) goto bottom_bb;
5242 <body_bb> [incoming]
5243 V = B + offset;
5244 {BODY}
5246 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5247 offset += step;
5248 if (offset LTGT bound) goto body_bb; [*]
5250 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5251 chunk_no++;
5252 if (chunk < chunk_max) goto head_bb;
5254 <exit_bb> [incoming]
5255 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5257 [*] Needed if V live at end of loop
5259 Note: CHUNKING & GWV mask are specified explicitly here. This is a
5260 transition, and will be specified by a more general mechanism shortly.
5263 static void
5264 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5266 tree v = fd->loop.v;
5267 enum tree_code cond_code = fd->loop.cond_code;
5268 enum tree_code plus_code = PLUS_EXPR;
5270 tree chunk_size = integer_minus_one_node;
5271 tree gwv = integer_zero_node;
5272 tree iter_type = TREE_TYPE (v);
5273 tree diff_type = iter_type;
5274 tree plus_type = iter_type;
5275 struct oacc_collapse *counts = NULL;
5277 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5278 == GF_OMP_FOR_KIND_OACC_LOOP);
5279 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5280 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5282 if (POINTER_TYPE_P (iter_type))
5284 plus_code = POINTER_PLUS_EXPR;
5285 plus_type = sizetype;
5287 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5288 diff_type = signed_type_for (diff_type);
5290 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5291 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5292 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5293 basic_block bottom_bb = NULL;
5295 /* entry_bb has two sucessors; the branch edge is to the exit
5296 block, fallthrough edge to body. */
5297 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5298 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5300 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5301 body_bb, or to a block whose only successor is the body_bb. Its
5302 fallthrough successor is the final block (same as the branch
5303 successor of the entry_bb). */
5304 if (cont_bb)
5306 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5307 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5309 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5310 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5312 else
5313 gcc_assert (!gimple_in_ssa_p (cfun));
5315 /* The exit block only has entry_bb and cont_bb as predecessors. */
5316 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5318 tree chunk_no;
5319 tree chunk_max = NULL_TREE;
5320 tree bound, offset;
5321 tree step = create_tmp_var (diff_type, ".step");
5322 bool up = cond_code == LT_EXPR;
5323 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5324 bool chunking = !gimple_in_ssa_p (cfun);;
5325 bool negating;
5327 /* SSA instances. */
5328 tree offset_incr = NULL_TREE;
5329 tree offset_init = NULL_TREE;
5331 gimple_stmt_iterator gsi;
5332 gassign *ass;
5333 gcall *call;
5334 gimple *stmt;
5335 tree expr;
5336 location_t loc;
5337 edge split, be, fte;
5339 /* Split the end of entry_bb to create head_bb. */
5340 split = split_block (entry_bb, last_stmt (entry_bb));
5341 basic_block head_bb = split->dest;
5342 entry_bb = split->src;
5344 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5345 gsi = gsi_last_bb (entry_bb);
5346 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5347 loc = gimple_location (for_stmt);
5349 if (gimple_in_ssa_p (cfun))
5351 offset_init = gimple_omp_for_index (for_stmt, 0);
5352 gcc_assert (integer_zerop (fd->loop.n1));
5353 /* The SSA parallelizer does gang parallelism. */
5354 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5357 if (fd->collapse > 1)
5359 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5360 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5361 TREE_TYPE (fd->loop.n2));
5363 if (SSA_VAR_P (fd->loop.n2))
5365 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5366 true, GSI_SAME_STMT);
5367 ass = gimple_build_assign (fd->loop.n2, total);
5368 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5373 tree b = fd->loop.n1;
5374 tree e = fd->loop.n2;
5375 tree s = fd->loop.step;
5377 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5378 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5380 /* Convert the step, avoiding possible unsigned->signed overflow. */
5381 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5382 if (negating)
5383 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5384 s = fold_convert (diff_type, s);
5385 if (negating)
5386 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5387 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5389 if (!chunking)
5390 chunk_size = integer_zero_node;
5391 expr = fold_convert (diff_type, chunk_size);
5392 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5393 NULL_TREE, true, GSI_SAME_STMT);
5394 /* Determine the range, avoiding possible unsigned->signed overflow. */
5395 negating = !up && TYPE_UNSIGNED (iter_type);
5396 expr = fold_build2 (MINUS_EXPR, plus_type,
5397 fold_convert (plus_type, negating ? b : e),
5398 fold_convert (plus_type, negating ? e : b));
5399 expr = fold_convert (diff_type, expr);
5400 if (negating)
5401 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5402 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5403 NULL_TREE, true, GSI_SAME_STMT);
5405 chunk_no = build_int_cst (diff_type, 0);
5406 if (chunking)
5408 gcc_assert (!gimple_in_ssa_p (cfun));
5410 expr = chunk_no;
5411 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5412 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5414 ass = gimple_build_assign (chunk_no, expr);
5415 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5417 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5418 build_int_cst (integer_type_node,
5419 IFN_GOACC_LOOP_CHUNKS),
5420 dir, range, s, chunk_size, gwv);
5421 gimple_call_set_lhs (call, chunk_max);
5422 gimple_set_location (call, loc);
5423 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5425 else
5426 chunk_size = chunk_no;
5428 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5429 build_int_cst (integer_type_node,
5430 IFN_GOACC_LOOP_STEP),
5431 dir, range, s, chunk_size, gwv);
5432 gimple_call_set_lhs (call, step);
5433 gimple_set_location (call, loc);
5434 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5436 /* Remove the GIMPLE_OMP_FOR. */
5437 gsi_remove (&gsi, true);
5439 /* Fixup edges from head_bb. */
5440 be = BRANCH_EDGE (head_bb);
5441 fte = FALLTHRU_EDGE (head_bb);
5442 be->flags |= EDGE_FALSE_VALUE;
5443 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5445 basic_block body_bb = fte->dest;
5447 if (gimple_in_ssa_p (cfun))
5449 gsi = gsi_last_bb (cont_bb);
5450 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5452 offset = gimple_omp_continue_control_use (cont_stmt);
5453 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5455 else
5457 offset = create_tmp_var (diff_type, ".offset");
5458 offset_init = offset_incr = offset;
5460 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5462 /* Loop offset & bound go into head_bb. */
5463 gsi = gsi_start_bb (head_bb);
5465 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5466 build_int_cst (integer_type_node,
5467 IFN_GOACC_LOOP_OFFSET),
5468 dir, range, s,
5469 chunk_size, gwv, chunk_no);
5470 gimple_call_set_lhs (call, offset_init);
5471 gimple_set_location (call, loc);
5472 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5474 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5475 build_int_cst (integer_type_node,
5476 IFN_GOACC_LOOP_BOUND),
5477 dir, range, s,
5478 chunk_size, gwv, offset_init);
5479 gimple_call_set_lhs (call, bound);
5480 gimple_set_location (call, loc);
5481 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5483 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5484 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5485 GSI_CONTINUE_LINKING);
5487 /* V assignment goes into body_bb. */
5488 if (!gimple_in_ssa_p (cfun))
5490 gsi = gsi_start_bb (body_bb);
5492 expr = build2 (plus_code, iter_type, b,
5493 fold_convert (plus_type, offset));
5494 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5495 true, GSI_SAME_STMT);
5496 ass = gimple_build_assign (v, expr);
5497 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5498 if (fd->collapse > 1)
5499 expand_oacc_collapse_vars (fd, &gsi, counts, v);
5502 /* Loop increment goes into cont_bb. If this is not a loop, we
5503 will have spawned threads as if it was, and each one will
5504 execute one iteration. The specification is not explicit about
5505 whether such constructs are ill-formed or not, and they can
5506 occur, especially when noreturn routines are involved. */
5507 if (cont_bb)
5509 gsi = gsi_last_bb (cont_bb);
5510 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5511 loc = gimple_location (cont_stmt);
5513 /* Increment offset. */
5514 if (gimple_in_ssa_p (cfun))
5515 expr= build2 (plus_code, iter_type, offset,
5516 fold_convert (plus_type, step));
5517 else
5518 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5519 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5520 true, GSI_SAME_STMT);
5521 ass = gimple_build_assign (offset_incr, expr);
5522 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5523 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5524 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5526 /* Remove the GIMPLE_OMP_CONTINUE. */
5527 gsi_remove (&gsi, true);
5529 /* Fixup edges from cont_bb. */
5530 be = BRANCH_EDGE (cont_bb);
5531 fte = FALLTHRU_EDGE (cont_bb);
5532 be->flags |= EDGE_TRUE_VALUE;
5533 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5535 if (chunking)
5537 /* Split the beginning of exit_bb to make bottom_bb. We
5538 need to insert a nop at the start, because splitting is
5539 after a stmt, not before. */
5540 gsi = gsi_start_bb (exit_bb);
5541 stmt = gimple_build_nop ();
5542 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5543 split = split_block (exit_bb, stmt);
5544 bottom_bb = split->src;
5545 exit_bb = split->dest;
5546 gsi = gsi_last_bb (bottom_bb);
5548 /* Chunk increment and test goes into bottom_bb. */
5549 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5550 build_int_cst (diff_type, 1));
5551 ass = gimple_build_assign (chunk_no, expr);
5552 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5554 /* Chunk test at end of bottom_bb. */
5555 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5556 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5557 GSI_CONTINUE_LINKING);
5559 /* Fixup edges from bottom_bb. */
5560 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5561 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5565 gsi = gsi_last_bb (exit_bb);
5566 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5567 loc = gimple_location (gsi_stmt (gsi));
5569 if (!gimple_in_ssa_p (cfun))
5571 /* Insert the final value of V, in case it is live. This is the
5572 value for the only thread that survives past the join. */
5573 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5574 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5575 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5576 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5577 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5578 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5579 true, GSI_SAME_STMT);
5580 ass = gimple_build_assign (v, expr);
5581 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5584 /* Remove the OMP_RETURN. */
5585 gsi_remove (&gsi, true);
5587 if (cont_bb)
5589 /* We now have one or two nested loops. Update the loop
5590 structures. */
5591 struct loop *parent = entry_bb->loop_father;
5592 struct loop *body = body_bb->loop_father;
5594 if (chunking)
5596 struct loop *chunk_loop = alloc_loop ();
5597 chunk_loop->header = head_bb;
5598 chunk_loop->latch = bottom_bb;
5599 add_loop (chunk_loop, parent);
5600 parent = chunk_loop;
5602 else if (parent != body)
5604 gcc_assert (body->header == body_bb);
5605 gcc_assert (body->latch == cont_bb
5606 || single_pred (body->latch) == cont_bb);
5607 parent = NULL;
5610 if (parent)
5612 struct loop *body_loop = alloc_loop ();
5613 body_loop->header = body_bb;
5614 body_loop->latch = cont_bb;
5615 add_loop (body_loop, parent);
5620 /* Expand the OMP loop defined by REGION. */
5622 static void
5623 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5625 struct omp_for_data fd;
5626 struct omp_for_data_loop *loops;
5628 loops
5629 = (struct omp_for_data_loop *)
5630 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5631 * sizeof (struct omp_for_data_loop));
5632 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5633 &fd, loops);
5634 region->sched_kind = fd.sched_kind;
5635 region->sched_modifiers = fd.sched_modifiers;
5637 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5638 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5639 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5640 if (region->cont)
5642 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5643 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5644 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5646 else
5647 /* If there isn't a continue then this is a degerate case where
5648 the introduction of abnormal edges during lowering will prevent
5649 original loops from being detected. Fix that up. */
5650 loops_state_set (LOOPS_NEED_FIXUP);
5652 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5653 expand_omp_simd (region, &fd);
5654 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5655 expand_cilk_for (region, &fd);
5656 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5658 gcc_assert (!inner_stmt);
5659 expand_oacc_for (region, &fd);
5661 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5663 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5664 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5665 else
5666 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5668 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5669 && !fd.have_ordered)
5671 if (fd.chunk_size == NULL)
5672 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5673 else
5674 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5676 else
5678 int fn_index, start_ix, next_ix;
5680 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5681 == GF_OMP_FOR_KIND_FOR);
5682 if (fd.chunk_size == NULL
5683 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5684 fd.chunk_size = integer_zero_node;
5685 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5686 switch (fd.sched_kind)
5688 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5689 fn_index = 3;
5690 break;
5691 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5692 case OMP_CLAUSE_SCHEDULE_GUIDED:
5693 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5694 && !fd.ordered
5695 && !fd.have_ordered)
5697 fn_index = 3 + fd.sched_kind;
5698 break;
5700 /* FALLTHRU */
5701 default:
5702 fn_index = fd.sched_kind;
5703 break;
5705 if (!fd.ordered)
5706 fn_index += fd.have_ordered * 6;
5707 if (fd.ordered)
5708 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5709 else
5710 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5711 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5712 if (fd.iter_type == long_long_unsigned_type_node)
5714 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5715 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5716 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5717 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5719 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5720 (enum built_in_function) next_ix, inner_stmt);
5723 if (gimple_in_ssa_p (cfun))
5724 update_ssa (TODO_update_ssa_only_virtuals);
5727 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5729 v = GOMP_sections_start (n);
5731 switch (v)
5733 case 0:
5734 goto L2;
5735 case 1:
5736 section 1;
5737 goto L1;
5738 case 2:
5740 case n:
5742 default:
5743 abort ();
5746 v = GOMP_sections_next ();
5747 goto L0;
5749 reduction;
5751 If this is a combined parallel sections, replace the call to
5752 GOMP_sections_start with call to GOMP_sections_next. */
5754 static void
5755 expand_omp_sections (struct omp_region *region)
5757 tree t, u, vin = NULL, vmain, vnext, l2;
5758 unsigned len;
5759 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5760 gimple_stmt_iterator si, switch_si;
5761 gomp_sections *sections_stmt;
5762 gimple *stmt;
5763 gomp_continue *cont;
5764 edge_iterator ei;
5765 edge e;
5766 struct omp_region *inner;
5767 unsigned i, casei;
5768 bool exit_reachable = region->cont != NULL;
5770 gcc_assert (region->exit != NULL);
5771 entry_bb = region->entry;
5772 l0_bb = single_succ (entry_bb);
5773 l1_bb = region->cont;
5774 l2_bb = region->exit;
5775 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5776 l2 = gimple_block_label (l2_bb);
5777 else
5779 /* This can happen if there are reductions. */
5780 len = EDGE_COUNT (l0_bb->succs);
5781 gcc_assert (len > 0);
5782 e = EDGE_SUCC (l0_bb, len - 1);
5783 si = gsi_last_bb (e->dest);
5784 l2 = NULL_TREE;
5785 if (gsi_end_p (si)
5786 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5787 l2 = gimple_block_label (e->dest);
5788 else
5789 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5791 si = gsi_last_bb (e->dest);
5792 if (gsi_end_p (si)
5793 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5795 l2 = gimple_block_label (e->dest);
5796 break;
5800 if (exit_reachable)
5801 default_bb = create_empty_bb (l1_bb->prev_bb);
5802 else
5803 default_bb = create_empty_bb (l0_bb);
5805 /* We will build a switch() with enough cases for all the
5806 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5807 and a default case to abort if something goes wrong. */
5808 len = EDGE_COUNT (l0_bb->succs);
5810 /* Use vec::quick_push on label_vec throughout, since we know the size
5811 in advance. */
5812 auto_vec<tree> label_vec (len);
5814 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5815 GIMPLE_OMP_SECTIONS statement. */
5816 si = gsi_last_bb (entry_bb);
5817 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5818 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5819 vin = gimple_omp_sections_control (sections_stmt);
5820 if (!is_combined_parallel (region))
5822 /* If we are not inside a combined parallel+sections region,
5823 call GOMP_sections_start. */
5824 t = build_int_cst (unsigned_type_node, len - 1);
5825 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5826 stmt = gimple_build_call (u, 1, t);
5828 else
5830 /* Otherwise, call GOMP_sections_next. */
5831 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5832 stmt = gimple_build_call (u, 0);
5834 gimple_call_set_lhs (stmt, vin);
5835 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5836 gsi_remove (&si, true);
5838 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5839 L0_BB. */
5840 switch_si = gsi_last_bb (l0_bb);
5841 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5842 if (exit_reachable)
5844 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5845 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5846 vmain = gimple_omp_continue_control_use (cont);
5847 vnext = gimple_omp_continue_control_def (cont);
5849 else
5851 vmain = vin;
5852 vnext = NULL_TREE;
5855 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5856 label_vec.quick_push (t);
5857 i = 1;
5859 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5860 for (inner = region->inner, casei = 1;
5861 inner;
5862 inner = inner->next, i++, casei++)
5864 basic_block s_entry_bb, s_exit_bb;
5866 /* Skip optional reduction region. */
5867 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5869 --i;
5870 --casei;
5871 continue;
5874 s_entry_bb = inner->entry;
5875 s_exit_bb = inner->exit;
5877 t = gimple_block_label (s_entry_bb);
5878 u = build_int_cst (unsigned_type_node, casei);
5879 u = build_case_label (u, NULL, t);
5880 label_vec.quick_push (u);
5882 si = gsi_last_bb (s_entry_bb);
5883 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5884 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5885 gsi_remove (&si, true);
5886 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5888 if (s_exit_bb == NULL)
5889 continue;
5891 si = gsi_last_bb (s_exit_bb);
5892 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5893 gsi_remove (&si, true);
5895 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5898 /* Error handling code goes in DEFAULT_BB. */
5899 t = gimple_block_label (default_bb);
5900 u = build_case_label (NULL, NULL, t);
5901 make_edge (l0_bb, default_bb, 0);
5902 add_bb_to_loop (default_bb, current_loops->tree_root);
5904 stmt = gimple_build_switch (vmain, u, label_vec);
5905 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5906 gsi_remove (&switch_si, true);
5908 si = gsi_start_bb (default_bb);
5909 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5910 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5912 if (exit_reachable)
5914 tree bfn_decl;
5916 /* Code to get the next section goes in L1_BB. */
5917 si = gsi_last_bb (l1_bb);
5918 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5920 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5921 stmt = gimple_build_call (bfn_decl, 0);
5922 gimple_call_set_lhs (stmt, vnext);
5923 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5924 gsi_remove (&si, true);
5926 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5929 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5930 si = gsi_last_bb (l2_bb);
5931 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5932 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5933 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5934 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5935 else
5936 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5937 stmt = gimple_build_call (t, 0);
5938 if (gimple_omp_return_lhs (gsi_stmt (si)))
5939 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5940 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5941 gsi_remove (&si, true);
5943 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5946 /* Expand code for an OpenMP single directive. We've already expanded
5947 much of the code, here we simply place the GOMP_barrier call. */
5949 static void
5950 expand_omp_single (struct omp_region *region)
5952 basic_block entry_bb, exit_bb;
5953 gimple_stmt_iterator si;
5955 entry_bb = region->entry;
5956 exit_bb = region->exit;
5958 si = gsi_last_bb (entry_bb);
5959 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5960 gsi_remove (&si, true);
5961 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5963 si = gsi_last_bb (exit_bb);
5964 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5966 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5967 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5969 gsi_remove (&si, true);
5970 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5973 /* Generic expansion for OpenMP synchronization directives: master,
5974 ordered and critical. All we need to do here is remove the entry
5975 and exit markers for REGION. */
5977 static void
5978 expand_omp_synch (struct omp_region *region)
5980 basic_block entry_bb, exit_bb;
5981 gimple_stmt_iterator si;
5983 entry_bb = region->entry;
5984 exit_bb = region->exit;
5986 si = gsi_last_bb (entry_bb);
5987 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5988 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5989 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5990 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5991 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5992 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5993 gsi_remove (&si, true);
5994 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5996 if (exit_bb)
5998 si = gsi_last_bb (exit_bb);
5999 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6000 gsi_remove (&si, true);
6001 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6005 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6006 operation as a normal volatile load. */
6008 static bool
6009 expand_omp_atomic_load (basic_block load_bb, tree addr,
6010 tree loaded_val, int index)
6012 enum built_in_function tmpbase;
6013 gimple_stmt_iterator gsi;
6014 basic_block store_bb;
6015 location_t loc;
6016 gimple *stmt;
6017 tree decl, call, type, itype;
6019 gsi = gsi_last_bb (load_bb);
6020 stmt = gsi_stmt (gsi);
6021 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6022 loc = gimple_location (stmt);
6024 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6025 is smaller than word size, then expand_atomic_load assumes that the load
6026 is atomic. We could avoid the builtin entirely in this case. */
6028 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6029 decl = builtin_decl_explicit (tmpbase);
6030 if (decl == NULL_TREE)
6031 return false;
6033 type = TREE_TYPE (loaded_val);
6034 itype = TREE_TYPE (TREE_TYPE (decl));
6036 call = build_call_expr_loc (loc, decl, 2, addr,
6037 build_int_cst (NULL,
6038 gimple_omp_atomic_seq_cst_p (stmt)
6039 ? MEMMODEL_SEQ_CST
6040 : MEMMODEL_RELAXED));
6041 if (!useless_type_conversion_p (type, itype))
6042 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6043 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6045 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6046 gsi_remove (&gsi, true);
6048 store_bb = single_succ (load_bb);
6049 gsi = gsi_last_bb (store_bb);
6050 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6051 gsi_remove (&gsi, true);
6053 if (gimple_in_ssa_p (cfun))
6054 update_ssa (TODO_update_ssa_no_phi);
6056 return true;
6059 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6060 operation as a normal volatile store. */
6062 static bool
6063 expand_omp_atomic_store (basic_block load_bb, tree addr,
6064 tree loaded_val, tree stored_val, int index)
6066 enum built_in_function tmpbase;
6067 gimple_stmt_iterator gsi;
6068 basic_block store_bb = single_succ (load_bb);
6069 location_t loc;
6070 gimple *stmt;
6071 tree decl, call, type, itype;
6072 machine_mode imode;
6073 bool exchange;
6075 gsi = gsi_last_bb (load_bb);
6076 stmt = gsi_stmt (gsi);
6077 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6079 /* If the load value is needed, then this isn't a store but an exchange. */
6080 exchange = gimple_omp_atomic_need_value_p (stmt);
6082 gsi = gsi_last_bb (store_bb);
6083 stmt = gsi_stmt (gsi);
6084 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6085 loc = gimple_location (stmt);
6087 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6088 is smaller than word size, then expand_atomic_store assumes that the store
6089 is atomic. We could avoid the builtin entirely in this case. */
6091 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6092 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6093 decl = builtin_decl_explicit (tmpbase);
6094 if (decl == NULL_TREE)
6095 return false;
6097 type = TREE_TYPE (stored_val);
6099 /* Dig out the type of the function's second argument. */
6100 itype = TREE_TYPE (decl);
6101 itype = TYPE_ARG_TYPES (itype);
6102 itype = TREE_CHAIN (itype);
6103 itype = TREE_VALUE (itype);
6104 imode = TYPE_MODE (itype);
6106 if (exchange && !can_atomic_exchange_p (imode, true))
6107 return false;
6109 if (!useless_type_conversion_p (itype, type))
6110 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6111 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6112 build_int_cst (NULL,
6113 gimple_omp_atomic_seq_cst_p (stmt)
6114 ? MEMMODEL_SEQ_CST
6115 : MEMMODEL_RELAXED));
6116 if (exchange)
6118 if (!useless_type_conversion_p (type, itype))
6119 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6120 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6123 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6124 gsi_remove (&gsi, true);
6126 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6127 gsi = gsi_last_bb (load_bb);
6128 gsi_remove (&gsi, true);
6130 if (gimple_in_ssa_p (cfun))
6131 update_ssa (TODO_update_ssa_no_phi);
6133 return true;
6136 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6137 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6138 size of the data type, and thus usable to find the index of the builtin
6139 decl. Returns false if the expression is not of the proper form. */
6141 static bool
6142 expand_omp_atomic_fetch_op (basic_block load_bb,
6143 tree addr, tree loaded_val,
6144 tree stored_val, int index)
6146 enum built_in_function oldbase, newbase, tmpbase;
6147 tree decl, itype, call;
6148 tree lhs, rhs;
6149 basic_block store_bb = single_succ (load_bb);
6150 gimple_stmt_iterator gsi;
6151 gimple *stmt;
6152 location_t loc;
6153 enum tree_code code;
6154 bool need_old, need_new;
6155 machine_mode imode;
6156 bool seq_cst;
6158 /* We expect to find the following sequences:
6160 load_bb:
6161 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6163 store_bb:
6164 val = tmp OP something; (or: something OP tmp)
6165 GIMPLE_OMP_STORE (val)
6167 ???FIXME: Allow a more flexible sequence.
6168 Perhaps use data flow to pick the statements.
6172 gsi = gsi_after_labels (store_bb);
6173 stmt = gsi_stmt (gsi);
6174 loc = gimple_location (stmt);
6175 if (!is_gimple_assign (stmt))
6176 return false;
6177 gsi_next (&gsi);
6178 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6179 return false;
6180 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6181 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6182 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6183 gcc_checking_assert (!need_old || !need_new);
6185 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6186 return false;
6188 /* Check for one of the supported fetch-op operations. */
6189 code = gimple_assign_rhs_code (stmt);
6190 switch (code)
6192 case PLUS_EXPR:
6193 case POINTER_PLUS_EXPR:
6194 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6195 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6196 break;
6197 case MINUS_EXPR:
6198 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6199 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6200 break;
6201 case BIT_AND_EXPR:
6202 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6203 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6204 break;
6205 case BIT_IOR_EXPR:
6206 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6207 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6208 break;
6209 case BIT_XOR_EXPR:
6210 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6211 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6212 break;
6213 default:
6214 return false;
6217 /* Make sure the expression is of the proper form. */
6218 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6219 rhs = gimple_assign_rhs2 (stmt);
6220 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6221 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6222 rhs = gimple_assign_rhs1 (stmt);
6223 else
6224 return false;
6226 tmpbase = ((enum built_in_function)
6227 ((need_new ? newbase : oldbase) + index + 1));
6228 decl = builtin_decl_explicit (tmpbase);
6229 if (decl == NULL_TREE)
6230 return false;
6231 itype = TREE_TYPE (TREE_TYPE (decl));
6232 imode = TYPE_MODE (itype);
6234 /* We could test all of the various optabs involved, but the fact of the
6235 matter is that (with the exception of i486 vs i586 and xadd) all targets
6236 that support any atomic operaton optab also implements compare-and-swap.
6237 Let optabs.c take care of expanding any compare-and-swap loop. */
6238 if (!can_compare_and_swap_p (imode, true))
6239 return false;
6241 gsi = gsi_last_bb (load_bb);
6242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6244 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6245 It only requires that the operation happen atomically. Thus we can
6246 use the RELAXED memory model. */
6247 call = build_call_expr_loc (loc, decl, 3, addr,
6248 fold_convert_loc (loc, itype, rhs),
6249 build_int_cst (NULL,
6250 seq_cst ? MEMMODEL_SEQ_CST
6251 : MEMMODEL_RELAXED));
6253 if (need_old || need_new)
6255 lhs = need_old ? loaded_val : stored_val;
6256 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6257 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6259 else
6260 call = fold_convert_loc (loc, void_type_node, call);
6261 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6262 gsi_remove (&gsi, true);
6264 gsi = gsi_last_bb (store_bb);
6265 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6266 gsi_remove (&gsi, true);
6267 gsi = gsi_last_bb (store_bb);
6268 stmt = gsi_stmt (gsi);
6269 gsi_remove (&gsi, true);
6271 if (gimple_in_ssa_p (cfun))
6273 release_defs (stmt);
6274 update_ssa (TODO_update_ssa_no_phi);
6277 return true;
6280 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6282 oldval = *addr;
6283 repeat:
6284 newval = rhs; // with oldval replacing *addr in rhs
6285 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6286 if (oldval != newval)
6287 goto repeat;
6289 INDEX is log2 of the size of the data type, and thus usable to find the
6290 index of the builtin decl. */
6292 static bool
6293 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6294 tree addr, tree loaded_val, tree stored_val,
6295 int index)
6297 tree loadedi, storedi, initial, new_storedi, old_vali;
6298 tree type, itype, cmpxchg, iaddr;
6299 gimple_stmt_iterator si;
6300 basic_block loop_header = single_succ (load_bb);
6301 gimple *phi, *stmt;
6302 edge e;
6303 enum built_in_function fncode;
6305 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6306 order to use the RELAXED memory model effectively. */
6307 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6308 + index + 1);
6309 cmpxchg = builtin_decl_explicit (fncode);
6310 if (cmpxchg == NULL_TREE)
6311 return false;
6312 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6313 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6315 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
6316 return false;
6318 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6319 si = gsi_last_bb (load_bb);
6320 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6322 /* For floating-point values, we'll need to view-convert them to integers
6323 so that we can perform the atomic compare and swap. Simplify the
6324 following code by always setting up the "i"ntegral variables. */
6325 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6327 tree iaddr_val;
6329 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6330 true));
6331 iaddr_val
6332 = force_gimple_operand_gsi (&si,
6333 fold_convert (TREE_TYPE (iaddr), addr),
6334 false, NULL_TREE, true, GSI_SAME_STMT);
6335 stmt = gimple_build_assign (iaddr, iaddr_val);
6336 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6337 loadedi = create_tmp_var (itype);
6338 if (gimple_in_ssa_p (cfun))
6339 loadedi = make_ssa_name (loadedi);
6341 else
6343 iaddr = addr;
6344 loadedi = loaded_val;
6347 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6348 tree loaddecl = builtin_decl_explicit (fncode);
6349 if (loaddecl)
6350 initial
6351 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6352 build_call_expr (loaddecl, 2, iaddr,
6353 build_int_cst (NULL_TREE,
6354 MEMMODEL_RELAXED)));
6355 else
6356 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6357 build_int_cst (TREE_TYPE (iaddr), 0));
6359 initial
6360 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6361 GSI_SAME_STMT);
6363 /* Move the value to the LOADEDI temporary. */
6364 if (gimple_in_ssa_p (cfun))
6366 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6367 phi = create_phi_node (loadedi, loop_header);
6368 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6369 initial);
6371 else
6372 gsi_insert_before (&si,
6373 gimple_build_assign (loadedi, initial),
6374 GSI_SAME_STMT);
6375 if (loadedi != loaded_val)
6377 gimple_stmt_iterator gsi2;
6378 tree x;
6380 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6381 gsi2 = gsi_start_bb (loop_header);
6382 if (gimple_in_ssa_p (cfun))
6384 gassign *stmt;
6385 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6386 true, GSI_SAME_STMT);
6387 stmt = gimple_build_assign (loaded_val, x);
6388 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6390 else
6392 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6393 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6394 true, GSI_SAME_STMT);
6397 gsi_remove (&si, true);
6399 si = gsi_last_bb (store_bb);
6400 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6402 if (iaddr == addr)
6403 storedi = stored_val;
6404 else
6405 storedi
6406 = force_gimple_operand_gsi (&si,
6407 build1 (VIEW_CONVERT_EXPR, itype,
6408 stored_val), true, NULL_TREE, true,
6409 GSI_SAME_STMT);
6411 /* Build the compare&swap statement. */
6412 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6413 new_storedi = force_gimple_operand_gsi (&si,
6414 fold_convert (TREE_TYPE (loadedi),
6415 new_storedi),
6416 true, NULL_TREE,
6417 true, GSI_SAME_STMT);
6419 if (gimple_in_ssa_p (cfun))
6420 old_vali = loadedi;
6421 else
6423 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6424 stmt = gimple_build_assign (old_vali, loadedi);
6425 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6427 stmt = gimple_build_assign (loadedi, new_storedi);
6428 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6431 /* Note that we always perform the comparison as an integer, even for
6432 floating point. This allows the atomic operation to properly
6433 succeed even with NaNs and -0.0. */
6434 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6435 stmt = gimple_build_cond_empty (ne);
6436 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6438 /* Update cfg. */
6439 e = single_succ_edge (store_bb);
6440 e->flags &= ~EDGE_FALLTHRU;
6441 e->flags |= EDGE_FALSE_VALUE;
6443 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6445 /* Copy the new value to loadedi (we already did that before the condition
6446 if we are not in SSA). */
6447 if (gimple_in_ssa_p (cfun))
6449 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6450 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6453 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6454 gsi_remove (&si, true);
6456 struct loop *loop = alloc_loop ();
6457 loop->header = loop_header;
6458 loop->latch = store_bb;
6459 add_loop (loop, loop_header->loop_father);
6461 if (gimple_in_ssa_p (cfun))
6462 update_ssa (TODO_update_ssa_no_phi);
6464 return true;
6467 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6469 GOMP_atomic_start ();
6470 *addr = rhs;
6471 GOMP_atomic_end ();
6473 The result is not globally atomic, but works so long as all parallel
6474 references are within #pragma omp atomic directives. According to
6475 responses received from omp@openmp.org, appears to be within spec.
6476 Which makes sense, since that's how several other compilers handle
6477 this situation as well.
6478 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6479 expanding. STORED_VAL is the operand of the matching
6480 GIMPLE_OMP_ATOMIC_STORE.
6482 We replace
6483 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6484 loaded_val = *addr;
6486 and replace
6487 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6488 *addr = stored_val;
6491 static bool
6492 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6493 tree addr, tree loaded_val, tree stored_val)
6495 gimple_stmt_iterator si;
6496 gassign *stmt;
6497 tree t;
6499 si = gsi_last_bb (load_bb);
6500 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6502 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6503 t = build_call_expr (t, 0);
6504 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6506 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6507 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6508 gsi_remove (&si, true);
6510 si = gsi_last_bb (store_bb);
6511 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6513 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6514 stored_val);
6515 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6517 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6518 t = build_call_expr (t, 0);
6519 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6520 gsi_remove (&si, true);
6522 if (gimple_in_ssa_p (cfun))
6523 update_ssa (TODO_update_ssa_no_phi);
6524 return true;
6527 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6528 using expand_omp_atomic_fetch_op. If it failed, we try to
6529 call expand_omp_atomic_pipeline, and if it fails too, the
6530 ultimate fallback is wrapping the operation in a mutex
6531 (expand_omp_atomic_mutex). REGION is the atomic region built
6532 by build_omp_regions_1(). */
6534 static void
6535 expand_omp_atomic (struct omp_region *region)
6537 basic_block load_bb = region->entry, store_bb = region->exit;
6538 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6539 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6540 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6541 tree addr = gimple_omp_atomic_load_rhs (load);
6542 tree stored_val = gimple_omp_atomic_store_val (store);
6543 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6544 HOST_WIDE_INT index;
6546 /* Make sure the type is one of the supported sizes. */
6547 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6548 index = exact_log2 (index);
6549 if (index >= 0 && index <= 4)
6551 unsigned int align = TYPE_ALIGN_UNIT (type);
6553 /* __sync builtins require strict data alignment. */
6554 if (exact_log2 (align) >= index)
6556 /* Atomic load. */
6557 if (loaded_val == stored_val
6558 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6559 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6560 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6561 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6562 return;
6564 /* Atomic store. */
6565 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6566 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6567 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6568 && store_bb == single_succ (load_bb)
6569 && first_stmt (store_bb) == store
6570 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6571 stored_val, index))
6572 return;
6574 /* When possible, use specialized atomic update functions. */
6575 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6576 && store_bb == single_succ (load_bb)
6577 && expand_omp_atomic_fetch_op (load_bb, addr,
6578 loaded_val, stored_val, index))
6579 return;
6581 /* If we don't have specialized __sync builtins, try and implement
6582 as a compare and swap loop. */
6583 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6584 loaded_val, stored_val, index))
6585 return;
6589 /* The ultimate fallback is wrapping the operation in a mutex. */
6590 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6593 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6594 at REGION_EXIT. */
6596 static void
6597 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6598 basic_block region_exit)
6600 struct loop *outer = region_entry->loop_father;
6601 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6603 /* Don't parallelize the kernels region if it contains more than one outer
6604 loop. */
6605 unsigned int nr_outer_loops = 0;
6606 struct loop *single_outer = NULL;
6607 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6609 gcc_assert (loop_outer (loop) == outer);
6611 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6612 continue;
6614 if (region_exit != NULL
6615 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6616 continue;
6618 nr_outer_loops++;
6619 single_outer = loop;
6621 if (nr_outer_loops != 1)
6622 return;
6624 for (struct loop *loop = single_outer->inner;
6625 loop != NULL;
6626 loop = loop->inner)
6627 if (loop->next)
6628 return;
6630 /* Mark the loops in the region. */
6631 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6632 loop->in_oacc_kernels_region = true;
6635 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6637 struct GTY(()) grid_launch_attributes_trees
6639 tree kernel_dim_array_type;
6640 tree kernel_lattrs_dimnum_decl;
6641 tree kernel_lattrs_grid_decl;
6642 tree kernel_lattrs_group_decl;
6643 tree kernel_launch_attributes_type;
6646 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6648 /* Create types used to pass kernel launch attributes to target. */
6650 static void
6651 grid_create_kernel_launch_attr_types (void)
6653 if (grid_attr_trees)
6654 return;
6655 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6657 tree dim_arr_index_type
6658 = build_index_type (build_int_cst (integer_type_node, 2));
6659 grid_attr_trees->kernel_dim_array_type
6660 = build_array_type (uint32_type_node, dim_arr_index_type);
6662 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6663 grid_attr_trees->kernel_lattrs_dimnum_decl
6664 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6665 uint32_type_node);
6666 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6668 grid_attr_trees->kernel_lattrs_grid_decl
6669 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6670 grid_attr_trees->kernel_dim_array_type);
6671 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6672 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6673 grid_attr_trees->kernel_lattrs_group_decl
6674 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6675 grid_attr_trees->kernel_dim_array_type);
6676 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6677 = grid_attr_trees->kernel_lattrs_grid_decl;
6678 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6679 "__gomp_kernel_launch_attributes",
6680 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6683 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6684 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6685 of type uint32_type_node. */
6687 static void
6688 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6689 tree fld_decl, int index, tree value)
6691 tree ref = build4 (ARRAY_REF, uint32_type_node,
6692 build3 (COMPONENT_REF,
6693 grid_attr_trees->kernel_dim_array_type,
6694 range_var, fld_decl, NULL_TREE),
6695 build_int_cst (integer_type_node, index),
6696 NULL_TREE, NULL_TREE);
6697 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6700 /* Return a tree representation of a pointer to a structure with grid and
6701 work-group size information. Statements filling that information will be
6702 inserted before GSI, TGT_STMT is the target statement which has the
6703 necessary information in it. */
6705 static tree
6706 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6707 gomp_target *tgt_stmt)
6709 grid_create_kernel_launch_attr_types ();
6710 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6711 "__kernel_launch_attrs");
6713 unsigned max_dim = 0;
6714 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6715 clause;
6716 clause = OMP_CLAUSE_CHAIN (clause))
6718 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6719 continue;
6721 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6722 max_dim = MAX (dim, max_dim);
6724 grid_insert_store_range_dim (gsi, lattrs,
6725 grid_attr_trees->kernel_lattrs_grid_decl,
6726 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6727 grid_insert_store_range_dim (gsi, lattrs,
6728 grid_attr_trees->kernel_lattrs_group_decl,
6729 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6732 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6733 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6734 gcc_checking_assert (max_dim <= 2);
6735 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6736 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6737 GSI_SAME_STMT);
6738 TREE_ADDRESSABLE (lattrs) = 1;
6739 return build_fold_addr_expr (lattrs);
6742 /* Build target argument identifier from the DEVICE identifier, value
6743 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6745 static tree
6746 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6748 tree t = build_int_cst (integer_type_node, device);
6749 if (subseqent_param)
6750 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6751 build_int_cst (integer_type_node,
6752 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6753 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6754 build_int_cst (integer_type_node, id));
6755 return t;
6758 /* Like above but return it in type that can be directly stored as an element
6759 of the argument array. */
6761 static tree
6762 get_target_argument_identifier (int device, bool subseqent_param, int id)
6764 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6765 return fold_convert (ptr_type_node, t);
6768 /* Return a target argument consisting of DEVICE identifier, value identifier
6769 ID, and the actual VALUE. */
6771 static tree
6772 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6773 tree value)
6775 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6776 fold_convert (integer_type_node, value),
6777 build_int_cst (unsigned_type_node,
6778 GOMP_TARGET_ARG_VALUE_SHIFT));
6779 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6780 get_target_argument_identifier_1 (device, false, id));
6781 t = fold_convert (ptr_type_node, t);
6782 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6785 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6786 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6787 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6788 arguments. */
6790 static void
6791 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6792 int id, tree value, vec <tree> *args)
6794 if (tree_fits_shwi_p (value)
6795 && tree_to_shwi (value) > -(1 << 15)
6796 && tree_to_shwi (value) < (1 << 15))
6797 args->quick_push (get_target_argument_value (gsi, device, id, value));
6798 else
6800 args->quick_push (get_target_argument_identifier (device, true, id));
6801 value = fold_convert (ptr_type_node, value);
6802 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6803 GSI_SAME_STMT);
6804 args->quick_push (value);
6808 /* Create an array of arguments that is then passed to GOMP_target. */
6810 static tree
6811 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6813 auto_vec <tree, 6> args;
6814 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6815 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6816 if (c)
6817 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6818 else
6819 t = integer_minus_one_node;
6820 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6821 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6823 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6824 if (c)
6825 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6826 else
6827 t = integer_minus_one_node;
6828 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6829 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6830 &args);
6832 /* Add HSA-specific grid sizes, if available. */
6833 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6834 OMP_CLAUSE__GRIDDIM_))
6836 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6837 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6838 args.quick_push (t);
6839 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6842 /* Produce more, perhaps device specific, arguments here. */
6844 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6845 args.length () + 1),
6846 ".omp_target_args");
6847 for (unsigned i = 0; i < args.length (); i++)
6849 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6850 build_int_cst (integer_type_node, i),
6851 NULL_TREE, NULL_TREE);
6852 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6853 GSI_SAME_STMT);
6855 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6856 build_int_cst (integer_type_node, args.length ()),
6857 NULL_TREE, NULL_TREE);
6858 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6859 GSI_SAME_STMT);
6860 TREE_ADDRESSABLE (argarray) = 1;
6861 return build_fold_addr_expr (argarray);
6864 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6866 static void
6867 expand_omp_target (struct omp_region *region)
6869 basic_block entry_bb, exit_bb, new_bb;
6870 struct function *child_cfun;
6871 tree child_fn, block, t;
6872 gimple_stmt_iterator gsi;
6873 gomp_target *entry_stmt;
6874 gimple *stmt;
6875 edge e;
6876 bool offloaded, data_region;
6878 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6879 new_bb = region->entry;
6881 offloaded = is_gimple_omp_offloaded (entry_stmt);
6882 switch (gimple_omp_target_kind (entry_stmt))
6884 case GF_OMP_TARGET_KIND_REGION:
6885 case GF_OMP_TARGET_KIND_UPDATE:
6886 case GF_OMP_TARGET_KIND_ENTER_DATA:
6887 case GF_OMP_TARGET_KIND_EXIT_DATA:
6888 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6889 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6890 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6891 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6892 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6893 data_region = false;
6894 break;
6895 case GF_OMP_TARGET_KIND_DATA:
6896 case GF_OMP_TARGET_KIND_OACC_DATA:
6897 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6898 data_region = true;
6899 break;
6900 default:
6901 gcc_unreachable ();
6904 child_fn = NULL_TREE;
6905 child_cfun = NULL;
6906 if (offloaded)
6908 child_fn = gimple_omp_target_child_fn (entry_stmt);
6909 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6912 /* Supported by expand_omp_taskreg, but not here. */
6913 if (child_cfun != NULL)
6914 gcc_checking_assert (!child_cfun->cfg);
6915 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6917 entry_bb = region->entry;
6918 exit_bb = region->exit;
6920 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6921 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6923 if (offloaded)
6925 unsigned srcidx, dstidx, num;
6927 /* If the offloading region needs data sent from the parent
6928 function, then the very first statement (except possible
6929 tree profile counter updates) of the offloading body
6930 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6931 &.OMP_DATA_O is passed as an argument to the child function,
6932 we need to replace it with the argument as seen by the child
6933 function.
6935 In most cases, this will end up being the identity assignment
6936 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6937 a function call that has been inlined, the original PARM_DECL
6938 .OMP_DATA_I may have been converted into a different local
6939 variable. In which case, we need to keep the assignment. */
6940 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6941 if (data_arg)
6943 basic_block entry_succ_bb = single_succ (entry_bb);
6944 gimple_stmt_iterator gsi;
6945 tree arg;
6946 gimple *tgtcopy_stmt = NULL;
6947 tree sender = TREE_VEC_ELT (data_arg, 0);
6949 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6951 gcc_assert (!gsi_end_p (gsi));
6952 stmt = gsi_stmt (gsi);
6953 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6954 continue;
6956 if (gimple_num_ops (stmt) == 2)
6958 tree arg = gimple_assign_rhs1 (stmt);
6960 /* We're ignoring the subcode because we're
6961 effectively doing a STRIP_NOPS. */
6963 if (TREE_CODE (arg) == ADDR_EXPR
6964 && TREE_OPERAND (arg, 0) == sender)
6966 tgtcopy_stmt = stmt;
6967 break;
6972 gcc_assert (tgtcopy_stmt != NULL);
6973 arg = DECL_ARGUMENTS (child_fn);
6975 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6976 gsi_remove (&gsi, true);
6979 /* Declare local variables needed in CHILD_CFUN. */
6980 block = DECL_INITIAL (child_fn);
6981 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
6982 /* The gimplifier could record temporaries in the offloading block
6983 rather than in containing function's local_decls chain,
6984 which would mean cgraph missed finalizing them. Do it now. */
6985 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
6986 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
6987 varpool_node::finalize_decl (t);
6988 DECL_SAVED_TREE (child_fn) = NULL;
6989 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6990 gimple_set_body (child_fn, NULL);
6991 TREE_USED (block) = 1;
6993 /* Reset DECL_CONTEXT on function arguments. */
6994 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
6995 DECL_CONTEXT (t) = child_fn;
6997 /* Split ENTRY_BB at GIMPLE_*,
6998 so that it can be moved to the child function. */
6999 gsi = gsi_last_bb (entry_bb);
7000 stmt = gsi_stmt (gsi);
7001 gcc_assert (stmt
7002 && gimple_code (stmt) == gimple_code (entry_stmt));
7003 e = split_block (entry_bb, stmt);
7004 gsi_remove (&gsi, true);
7005 entry_bb = e->dest;
7006 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7008 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7009 if (exit_bb)
7011 gsi = gsi_last_bb (exit_bb);
7012 gcc_assert (!gsi_end_p (gsi)
7013 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7014 stmt = gimple_build_return (NULL);
7015 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7016 gsi_remove (&gsi, true);
7019 /* Move the offloading region into CHILD_CFUN. */
7021 block = gimple_block (entry_stmt);
7023 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7024 if (exit_bb)
7025 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7026 /* When the OMP expansion process cannot guarantee an up-to-date
7027 loop tree arrange for the child function to fixup loops. */
7028 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7029 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7031 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7032 num = vec_safe_length (child_cfun->local_decls);
7033 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7035 t = (*child_cfun->local_decls)[srcidx];
7036 if (DECL_CONTEXT (t) == cfun->decl)
7037 continue;
7038 if (srcidx != dstidx)
7039 (*child_cfun->local_decls)[dstidx] = t;
7040 dstidx++;
7042 if (dstidx != num)
7043 vec_safe_truncate (child_cfun->local_decls, dstidx);
7045 /* Inform the callgraph about the new function. */
7046 child_cfun->curr_properties = cfun->curr_properties;
7047 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7048 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7049 cgraph_node *node = cgraph_node::get_create (child_fn);
7050 node->parallelized_function = 1;
7051 cgraph_node::add_new_function (child_fn, true);
7053 /* Add the new function to the offload table. */
7054 if (ENABLE_OFFLOADING)
7055 vec_safe_push (offload_funcs, child_fn);
7057 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7058 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7060 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7061 fixed in a following pass. */
7062 push_cfun (child_cfun);
7063 if (need_asm)
7064 assign_assembler_name_if_neeeded (child_fn);
7065 cgraph_edge::rebuild_edges ();
7067 /* Some EH regions might become dead, see PR34608. If
7068 pass_cleanup_cfg isn't the first pass to happen with the
7069 new child, these dead EH edges might cause problems.
7070 Clean them up now. */
7071 if (flag_exceptions)
7073 basic_block bb;
7074 bool changed = false;
7076 FOR_EACH_BB_FN (bb, cfun)
7077 changed |= gimple_purge_dead_eh_edges (bb);
7078 if (changed)
7079 cleanup_tree_cfg ();
7081 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7082 verify_loop_structure ();
7083 pop_cfun ();
7085 if (dump_file && !gimple_in_ssa_p (cfun))
7087 omp_any_child_fn_dumped = true;
7088 dump_function_header (dump_file, child_fn, dump_flags);
7089 dump_function_to_file (child_fn, dump_file, dump_flags);
7093 /* Emit a library call to launch the offloading region, or do data
7094 transfers. */
7095 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7096 enum built_in_function start_ix;
7097 location_t clause_loc;
7098 unsigned int flags_i = 0;
7099 bool oacc_kernels_p = false;
7101 switch (gimple_omp_target_kind (entry_stmt))
7103 case GF_OMP_TARGET_KIND_REGION:
7104 start_ix = BUILT_IN_GOMP_TARGET;
7105 break;
7106 case GF_OMP_TARGET_KIND_DATA:
7107 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7108 break;
7109 case GF_OMP_TARGET_KIND_UPDATE:
7110 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7111 break;
7112 case GF_OMP_TARGET_KIND_ENTER_DATA:
7113 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7114 break;
7115 case GF_OMP_TARGET_KIND_EXIT_DATA:
7116 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7117 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7118 break;
7119 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7120 oacc_kernels_p = true;
7121 /* FALLTHROUGH */
7122 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7123 start_ix = BUILT_IN_GOACC_PARALLEL;
7124 break;
7125 case GF_OMP_TARGET_KIND_OACC_DATA:
7126 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7127 start_ix = BUILT_IN_GOACC_DATA_START;
7128 break;
7129 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7130 start_ix = BUILT_IN_GOACC_UPDATE;
7131 break;
7132 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7133 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7134 break;
7135 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7136 start_ix = BUILT_IN_GOACC_DECLARE;
7137 break;
7138 default:
7139 gcc_unreachable ();
7142 clauses = gimple_omp_target_clauses (entry_stmt);
7144 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7145 library choose) and there is no conditional. */
7146 cond = NULL_TREE;
7147 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7149 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7150 if (c)
7151 cond = OMP_CLAUSE_IF_EXPR (c);
7153 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7154 if (c)
7156 /* Even if we pass it to all library function calls, it is currently only
7157 defined/used for the OpenMP target ones. */
7158 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7159 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7160 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7161 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7163 device = OMP_CLAUSE_DEVICE_ID (c);
7164 clause_loc = OMP_CLAUSE_LOCATION (c);
7166 else
7167 clause_loc = gimple_location (entry_stmt);
7169 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7170 if (c)
7171 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7173 /* Ensure 'device' is of the correct type. */
7174 device = fold_convert_loc (clause_loc, integer_type_node, device);
7176 /* If we found the clause 'if (cond)', build
7177 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7178 if (cond)
7180 cond = gimple_boolify (cond);
7182 basic_block cond_bb, then_bb, else_bb;
7183 edge e;
7184 tree tmp_var;
7186 tmp_var = create_tmp_var (TREE_TYPE (device));
7187 if (offloaded)
7188 e = split_block_after_labels (new_bb);
7189 else
7191 gsi = gsi_last_bb (new_bb);
7192 gsi_prev (&gsi);
7193 e = split_block (new_bb, gsi_stmt (gsi));
7195 cond_bb = e->src;
7196 new_bb = e->dest;
7197 remove_edge (e);
7199 then_bb = create_empty_bb (cond_bb);
7200 else_bb = create_empty_bb (then_bb);
7201 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7202 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7204 stmt = gimple_build_cond_empty (cond);
7205 gsi = gsi_last_bb (cond_bb);
7206 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7208 gsi = gsi_start_bb (then_bb);
7209 stmt = gimple_build_assign (tmp_var, device);
7210 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7212 gsi = gsi_start_bb (else_bb);
7213 stmt = gimple_build_assign (tmp_var,
7214 build_int_cst (integer_type_node,
7215 GOMP_DEVICE_HOST_FALLBACK));
7216 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7218 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7219 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7220 add_bb_to_loop (then_bb, cond_bb->loop_father);
7221 add_bb_to_loop (else_bb, cond_bb->loop_father);
7222 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7223 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7225 device = tmp_var;
7226 gsi = gsi_last_bb (new_bb);
7228 else
7230 gsi = gsi_last_bb (new_bb);
7231 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7232 true, GSI_SAME_STMT);
7235 t = gimple_omp_target_data_arg (entry_stmt);
7236 if (t == NULL)
7238 t1 = size_zero_node;
7239 t2 = build_zero_cst (ptr_type_node);
7240 t3 = t2;
7241 t4 = t2;
7243 else
7245 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7246 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7247 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7248 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7249 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7252 gimple *g;
7253 bool tagging = false;
7254 /* The maximum number used by any start_ix, without varargs. */
7255 auto_vec<tree, 11> args;
7256 args.quick_push (device);
7257 if (offloaded)
7258 args.quick_push (build_fold_addr_expr (child_fn));
7259 args.quick_push (t1);
7260 args.quick_push (t2);
7261 args.quick_push (t3);
7262 args.quick_push (t4);
7263 switch (start_ix)
7265 case BUILT_IN_GOACC_DATA_START:
7266 case BUILT_IN_GOACC_DECLARE:
7267 case BUILT_IN_GOMP_TARGET_DATA:
7268 break;
7269 case BUILT_IN_GOMP_TARGET:
7270 case BUILT_IN_GOMP_TARGET_UPDATE:
7271 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7272 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7273 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7274 if (c)
7275 depend = OMP_CLAUSE_DECL (c);
7276 else
7277 depend = build_int_cst (ptr_type_node, 0);
7278 args.quick_push (depend);
7279 if (start_ix == BUILT_IN_GOMP_TARGET)
7280 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7281 break;
7282 case BUILT_IN_GOACC_PARALLEL:
7284 oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7285 tagging = true;
7287 /* FALLTHRU */
7288 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7289 case BUILT_IN_GOACC_UPDATE:
7291 tree t_async = NULL_TREE;
7293 /* If present, use the value specified by the respective
7294 clause, making sure that is of the correct type. */
7295 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7296 if (c)
7297 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7298 integer_type_node,
7299 OMP_CLAUSE_ASYNC_EXPR (c));
7300 else if (!tagging)
7301 /* Default values for t_async. */
7302 t_async = fold_convert_loc (gimple_location (entry_stmt),
7303 integer_type_node,
7304 build_int_cst (integer_type_node,
7305 GOMP_ASYNC_SYNC));
7306 if (tagging && t_async)
7308 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7310 if (TREE_CODE (t_async) == INTEGER_CST)
7312 /* See if we can pack the async arg in to the tag's
7313 operand. */
7314 i_async = TREE_INT_CST_LOW (t_async);
7315 if (i_async < GOMP_LAUNCH_OP_MAX)
7316 t_async = NULL_TREE;
7317 else
7318 i_async = GOMP_LAUNCH_OP_MAX;
7320 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7321 i_async));
7323 if (t_async)
7324 args.safe_push (t_async);
7326 /* Save the argument index, and ... */
7327 unsigned t_wait_idx = args.length ();
7328 unsigned num_waits = 0;
7329 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7330 if (!tagging || c)
7331 /* ... push a placeholder. */
7332 args.safe_push (integer_zero_node);
7334 for (; c; c = OMP_CLAUSE_CHAIN (c))
7335 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7337 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7338 integer_type_node,
7339 OMP_CLAUSE_WAIT_EXPR (c)));
7340 num_waits++;
7343 if (!tagging || num_waits)
7345 tree len;
7347 /* Now that we know the number, update the placeholder. */
7348 if (tagging)
7349 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7350 else
7351 len = build_int_cst (integer_type_node, num_waits);
7352 len = fold_convert_loc (gimple_location (entry_stmt),
7353 unsigned_type_node, len);
7354 args[t_wait_idx] = len;
7357 break;
7358 default:
7359 gcc_unreachable ();
7361 if (tagging)
7362 /* Push terminal marker - zero. */
7363 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7365 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7366 gimple_set_location (g, gimple_location (entry_stmt));
7367 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7368 if (!offloaded)
7370 g = gsi_stmt (gsi);
7371 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7372 gsi_remove (&gsi, true);
7374 if (data_region && region->exit)
7376 gsi = gsi_last_bb (region->exit);
7377 g = gsi_stmt (gsi);
7378 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7379 gsi_remove (&gsi, true);
7383 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7384 iteration variable derived from the thread number. INTRA_GROUP means this
7385 is an expansion of a loop iterating over work-items within a separate
7386 iteration over groups. */
7388 static void
7389 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7391 gimple_stmt_iterator gsi;
7392 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7393 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7394 == GF_OMP_FOR_KIND_GRID_LOOP);
7395 size_t collapse = gimple_omp_for_collapse (for_stmt);
7396 struct omp_for_data_loop *loops
7397 = XALLOCAVEC (struct omp_for_data_loop,
7398 gimple_omp_for_collapse (for_stmt));
7399 struct omp_for_data fd;
7401 remove_edge (BRANCH_EDGE (kfor->entry));
7402 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7404 gcc_assert (kfor->cont);
7405 omp_extract_for_data (for_stmt, &fd, loops);
7407 gsi = gsi_start_bb (body_bb);
7409 for (size_t dim = 0; dim < collapse; dim++)
7411 tree type, itype;
7412 itype = type = TREE_TYPE (fd.loops[dim].v);
7413 if (POINTER_TYPE_P (type))
7414 itype = signed_type_for (type);
7416 tree n1 = fd.loops[dim].n1;
7417 tree step = fd.loops[dim].step;
7418 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7419 true, NULL_TREE, true, GSI_SAME_STMT);
7420 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7421 true, NULL_TREE, true, GSI_SAME_STMT);
7422 tree threadid;
7423 if (gimple_omp_for_grid_group_iter (for_stmt))
7425 gcc_checking_assert (!intra_group);
7426 threadid = build_call_expr (builtin_decl_explicit
7427 (BUILT_IN_HSA_WORKGROUPID), 1,
7428 build_int_cstu (unsigned_type_node, dim));
7430 else if (intra_group)
7431 threadid = build_call_expr (builtin_decl_explicit
7432 (BUILT_IN_HSA_WORKITEMID), 1,
7433 build_int_cstu (unsigned_type_node, dim));
7434 else
7435 threadid = build_call_expr (builtin_decl_explicit
7436 (BUILT_IN_HSA_WORKITEMABSID), 1,
7437 build_int_cstu (unsigned_type_node, dim));
7438 threadid = fold_convert (itype, threadid);
7439 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7440 true, GSI_SAME_STMT);
7442 tree startvar = fd.loops[dim].v;
7443 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7444 if (POINTER_TYPE_P (type))
7445 t = fold_build_pointer_plus (n1, t);
7446 else
7447 t = fold_build2 (PLUS_EXPR, type, t, n1);
7448 t = fold_convert (type, t);
7449 t = force_gimple_operand_gsi (&gsi, t,
7450 DECL_P (startvar)
7451 && TREE_ADDRESSABLE (startvar),
7452 NULL_TREE, true, GSI_SAME_STMT);
7453 gassign *assign_stmt = gimple_build_assign (startvar, t);
7454 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7456 /* Remove the omp for statement. */
7457 gsi = gsi_last_bb (kfor->entry);
7458 gsi_remove (&gsi, true);
7460 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7461 gsi = gsi_last_bb (kfor->cont);
7462 gcc_assert (!gsi_end_p (gsi)
7463 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7464 gsi_remove (&gsi, true);
7466 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7467 gsi = gsi_last_bb (kfor->exit);
7468 gcc_assert (!gsi_end_p (gsi)
7469 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7470 if (intra_group)
7471 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7472 gsi_remove (&gsi, true);
7474 /* Fixup the much simpler CFG. */
7475 remove_edge (find_edge (kfor->cont, body_bb));
7477 if (kfor->cont != body_bb)
7478 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7479 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7482 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7483 argument_decls. */
7485 struct grid_arg_decl_map
7487 tree old_arg;
7488 tree new_arg;
7491 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7492 pertaining to kernel function. */
7494 static tree
7495 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7497 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7498 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7499 tree t = *tp;
7501 if (t == adm->old_arg)
7502 *tp = adm->new_arg;
7503 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7504 return NULL_TREE;
7507 /* If TARGET region contains a kernel body for loop, remove its region from the
7508 TARGET and expand it in HSA gridified kernel fashion. */
7510 static void
7511 grid_expand_target_grid_body (struct omp_region *target)
7513 if (!hsa_gen_requested_p ())
7514 return;
7516 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7517 struct omp_region **pp;
7519 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7520 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7521 break;
7523 struct omp_region *gpukernel = *pp;
7525 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7526 if (!gpukernel)
7528 /* HSA cannot handle OACC stuff. */
7529 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7530 return;
7531 gcc_checking_assert (orig_child_fndecl);
7532 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7533 OMP_CLAUSE__GRIDDIM_));
7534 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7536 hsa_register_kernel (n);
7537 return;
7540 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7541 OMP_CLAUSE__GRIDDIM_));
7542 tree inside_block
7543 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7544 *pp = gpukernel->next;
7545 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7546 if ((*pp)->type == GIMPLE_OMP_FOR)
7547 break;
7549 struct omp_region *kfor = *pp;
7550 gcc_assert (kfor);
7551 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7552 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7553 *pp = kfor->next;
7554 if (kfor->inner)
7556 if (gimple_omp_for_grid_group_iter (for_stmt))
7558 struct omp_region **next_pp;
7559 for (pp = &kfor->inner; *pp; pp = next_pp)
7561 next_pp = &(*pp)->next;
7562 if ((*pp)->type != GIMPLE_OMP_FOR)
7563 continue;
7564 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7565 gcc_assert (gimple_omp_for_kind (inner)
7566 == GF_OMP_FOR_KIND_GRID_LOOP);
7567 grid_expand_omp_for_loop (*pp, true);
7568 *pp = (*pp)->next;
7569 next_pp = pp;
7572 expand_omp (kfor->inner);
7574 if (gpukernel->inner)
7575 expand_omp (gpukernel->inner);
7577 tree kern_fndecl = copy_node (orig_child_fndecl);
7578 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7579 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7580 tree tgtblock = gimple_block (tgt_stmt);
7581 tree fniniblock = make_node (BLOCK);
7582 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7583 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7584 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7585 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7586 DECL_INITIAL (kern_fndecl) = fniniblock;
7587 push_struct_function (kern_fndecl);
7588 cfun->function_end_locus = gimple_location (tgt_stmt);
7589 init_tree_ssa (cfun);
7590 pop_cfun ();
7592 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7593 gcc_assert (!DECL_CHAIN (old_parm_decl));
7594 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7595 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7596 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7597 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7598 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7599 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7600 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7601 kern_cfun->curr_properties = cfun->curr_properties;
7603 grid_expand_omp_for_loop (kfor, false);
7605 /* Remove the omp for statement. */
7606 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7607 gsi_remove (&gsi, true);
7608 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7609 return. */
7610 gsi = gsi_last_bb (gpukernel->exit);
7611 gcc_assert (!gsi_end_p (gsi)
7612 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7613 gimple *ret_stmt = gimple_build_return (NULL);
7614 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7615 gsi_remove (&gsi, true);
7617 /* Statements in the first BB in the target construct have been produced by
7618 target lowering and must be copied inside the GPUKERNEL, with the two
7619 exceptions of the first OMP statement and the OMP_DATA assignment
7620 statement. */
7621 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7622 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7623 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7624 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7625 !gsi_end_p (tsi); gsi_next (&tsi))
7627 gimple *stmt = gsi_stmt (tsi);
7628 if (is_gimple_omp (stmt))
7629 break;
7630 if (sender
7631 && is_gimple_assign (stmt)
7632 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7633 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7634 continue;
7635 gimple *copy = gimple_copy (stmt);
7636 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7637 gimple_set_block (copy, fniniblock);
7640 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7641 gpukernel->exit, inside_block);
7643 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7644 kcn->mark_force_output ();
7645 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7647 hsa_register_kernel (kcn, orig_child);
7649 cgraph_node::add_new_function (kern_fndecl, true);
7650 push_cfun (kern_cfun);
7651 cgraph_edge::rebuild_edges ();
7653 /* Re-map any mention of the PARM_DECL of the original function to the
7654 PARM_DECL of the new one.
7656 TODO: It would be great if lowering produced references into the GPU
7657 kernel decl straight away and we did not have to do this. */
7658 struct grid_arg_decl_map adm;
7659 adm.old_arg = old_parm_decl;
7660 adm.new_arg = new_parm_decl;
7661 basic_block bb;
7662 FOR_EACH_BB_FN (bb, kern_cfun)
7664 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7666 gimple *stmt = gsi_stmt (gsi);
7667 struct walk_stmt_info wi;
7668 memset (&wi, 0, sizeof (wi));
7669 wi.info = &adm;
7670 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7673 pop_cfun ();
7675 return;
7678 /* Expand the parallel region tree rooted at REGION. Expansion
7679 proceeds in depth-first order. Innermost regions are expanded
7680 first. This way, parallel regions that require a new function to
7681 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7682 internal dependencies in their body. */
7684 static void
7685 expand_omp (struct omp_region *region)
7687 omp_any_child_fn_dumped = false;
7688 while (region)
7690 location_t saved_location;
7691 gimple *inner_stmt = NULL;
7693 /* First, determine whether this is a combined parallel+workshare
7694 region. */
7695 if (region->type == GIMPLE_OMP_PARALLEL)
7696 determine_parallel_type (region);
7697 else if (region->type == GIMPLE_OMP_TARGET)
7698 grid_expand_target_grid_body (region);
7700 if (region->type == GIMPLE_OMP_FOR
7701 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7702 inner_stmt = last_stmt (region->inner->entry);
7704 if (region->inner)
7705 expand_omp (region->inner);
7707 saved_location = input_location;
7708 if (gimple_has_location (last_stmt (region->entry)))
7709 input_location = gimple_location (last_stmt (region->entry));
7711 switch (region->type)
7713 case GIMPLE_OMP_PARALLEL:
7714 case GIMPLE_OMP_TASK:
7715 expand_omp_taskreg (region);
7716 break;
7718 case GIMPLE_OMP_FOR:
7719 expand_omp_for (region, inner_stmt);
7720 break;
7722 case GIMPLE_OMP_SECTIONS:
7723 expand_omp_sections (region);
7724 break;
7726 case GIMPLE_OMP_SECTION:
7727 /* Individual omp sections are handled together with their
7728 parent GIMPLE_OMP_SECTIONS region. */
7729 break;
7731 case GIMPLE_OMP_SINGLE:
7732 expand_omp_single (region);
7733 break;
7735 case GIMPLE_OMP_ORDERED:
7737 gomp_ordered *ord_stmt
7738 = as_a <gomp_ordered *> (last_stmt (region->entry));
7739 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7740 OMP_CLAUSE_DEPEND))
7742 /* We'll expand these when expanding corresponding
7743 worksharing region with ordered(n) clause. */
7744 gcc_assert (region->outer
7745 && region->outer->type == GIMPLE_OMP_FOR);
7746 region->ord_stmt = ord_stmt;
7747 break;
7750 /* FALLTHRU */
7751 case GIMPLE_OMP_MASTER:
7752 case GIMPLE_OMP_TASKGROUP:
7753 case GIMPLE_OMP_CRITICAL:
7754 case GIMPLE_OMP_TEAMS:
7755 expand_omp_synch (region);
7756 break;
7758 case GIMPLE_OMP_ATOMIC_LOAD:
7759 expand_omp_atomic (region);
7760 break;
7762 case GIMPLE_OMP_TARGET:
7763 expand_omp_target (region);
7764 break;
7766 default:
7767 gcc_unreachable ();
7770 input_location = saved_location;
7771 region = region->next;
7773 if (omp_any_child_fn_dumped)
7775 if (dump_file)
7776 dump_function_header (dump_file, current_function_decl, dump_flags);
7777 omp_any_child_fn_dumped = false;
7781 /* Helper for build_omp_regions. Scan the dominator tree starting at
7782 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7783 true, the function ends once a single tree is built (otherwise, whole
7784 forest of OMP constructs may be built). */
7786 static void
7787 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7788 bool single_tree)
7790 gimple_stmt_iterator gsi;
7791 gimple *stmt;
7792 basic_block son;
7794 gsi = gsi_last_bb (bb);
7795 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7797 struct omp_region *region;
7798 enum gimple_code code;
7800 stmt = gsi_stmt (gsi);
7801 code = gimple_code (stmt);
7802 if (code == GIMPLE_OMP_RETURN)
7804 /* STMT is the return point out of region PARENT. Mark it
7805 as the exit point and make PARENT the immediately
7806 enclosing region. */
7807 gcc_assert (parent);
7808 region = parent;
7809 region->exit = bb;
7810 parent = parent->outer;
7812 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7814 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
7815 GIMPLE_OMP_RETURN, but matches with
7816 GIMPLE_OMP_ATOMIC_LOAD. */
7817 gcc_assert (parent);
7818 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7819 region = parent;
7820 region->exit = bb;
7821 parent = parent->outer;
7823 else if (code == GIMPLE_OMP_CONTINUE)
7825 gcc_assert (parent);
7826 parent->cont = bb;
7828 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7830 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7831 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7833 else
7835 region = new_omp_region (bb, code, parent);
7836 /* Otherwise... */
7837 if (code == GIMPLE_OMP_TARGET)
7839 switch (gimple_omp_target_kind (stmt))
7841 case GF_OMP_TARGET_KIND_REGION:
7842 case GF_OMP_TARGET_KIND_DATA:
7843 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7844 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7845 case GF_OMP_TARGET_KIND_OACC_DATA:
7846 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7847 break;
7848 case GF_OMP_TARGET_KIND_UPDATE:
7849 case GF_OMP_TARGET_KIND_ENTER_DATA:
7850 case GF_OMP_TARGET_KIND_EXIT_DATA:
7851 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7852 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7853 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7854 /* ..., other than for those stand-alone directives... */
7855 region = NULL;
7856 break;
7857 default:
7858 gcc_unreachable ();
7861 else if (code == GIMPLE_OMP_ORDERED
7862 && omp_find_clause (gimple_omp_ordered_clauses
7863 (as_a <gomp_ordered *> (stmt)),
7864 OMP_CLAUSE_DEPEND))
7865 /* #pragma omp ordered depend is also just a stand-alone
7866 directive. */
7867 region = NULL;
7868 /* ..., this directive becomes the parent for a new region. */
7869 if (region)
7870 parent = region;
7874 if (single_tree && !parent)
7875 return;
7877 for (son = first_dom_son (CDI_DOMINATORS, bb);
7878 son;
7879 son = next_dom_son (CDI_DOMINATORS, son))
7880 build_omp_regions_1 (son, parent, single_tree);
7883 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7884 root_omp_region. */
7886 static void
7887 build_omp_regions_root (basic_block root)
7889 gcc_assert (root_omp_region == NULL);
7890 build_omp_regions_1 (root, NULL, true);
7891 gcc_assert (root_omp_region != NULL);
7894 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7896 void
7897 omp_expand_local (basic_block head)
7899 build_omp_regions_root (head);
7900 if (dump_file && (dump_flags & TDF_DETAILS))
7902 fprintf (dump_file, "\nOMP region tree\n\n");
7903 dump_omp_region (dump_file, root_omp_region, 0);
7904 fprintf (dump_file, "\n");
7907 remove_exit_barriers (root_omp_region);
7908 expand_omp (root_omp_region);
7910 omp_free_regions ();
7913 /* Scan the CFG and build a tree of OMP regions. Return the root of
7914 the OMP region tree. */
7916 static void
7917 build_omp_regions (void)
7919 gcc_assert (root_omp_region == NULL);
7920 calculate_dominance_info (CDI_DOMINATORS);
7921 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7924 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7926 static unsigned int
7927 execute_expand_omp (void)
7929 build_omp_regions ();
7931 if (!root_omp_region)
7932 return 0;
7934 if (dump_file)
7936 fprintf (dump_file, "\nOMP region tree\n\n");
7937 dump_omp_region (dump_file, root_omp_region, 0);
7938 fprintf (dump_file, "\n");
7941 remove_exit_barriers (root_omp_region);
7943 expand_omp (root_omp_region);
7945 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7946 verify_loop_structure ();
7947 cleanup_tree_cfg ();
7949 omp_free_regions ();
7951 return 0;
7954 /* OMP expansion -- the default pass, run before creation of SSA form. */
7956 namespace {
7958 const pass_data pass_data_expand_omp =
7960 GIMPLE_PASS, /* type */
7961 "ompexp", /* name */
7962 OPTGROUP_OPENMP, /* optinfo_flags */
7963 TV_NONE, /* tv_id */
7964 PROP_gimple_any, /* properties_required */
7965 PROP_gimple_eomp, /* properties_provided */
7966 0, /* properties_destroyed */
7967 0, /* todo_flags_start */
7968 0, /* todo_flags_finish */
7971 class pass_expand_omp : public gimple_opt_pass
7973 public:
7974 pass_expand_omp (gcc::context *ctxt)
7975 : gimple_opt_pass (pass_data_expand_omp, ctxt)
7978 /* opt_pass methods: */
7979 virtual unsigned int execute (function *)
7981 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
7982 || flag_openmp_simd != 0)
7983 && !seen_error ());
7985 /* This pass always runs, to provide PROP_gimple_eomp.
7986 But often, there is nothing to do. */
7987 if (!gate)
7988 return 0;
7990 return execute_expand_omp ();
7993 }; // class pass_expand_omp
7995 } // anon namespace
7997 gimple_opt_pass *
7998 make_pass_expand_omp (gcc::context *ctxt)
8000 return new pass_expand_omp (ctxt);
8003 namespace {
8005 const pass_data pass_data_expand_omp_ssa =
8007 GIMPLE_PASS, /* type */
8008 "ompexpssa", /* name */
8009 OPTGROUP_OPENMP, /* optinfo_flags */
8010 TV_NONE, /* tv_id */
8011 PROP_cfg | PROP_ssa, /* properties_required */
8012 PROP_gimple_eomp, /* properties_provided */
8013 0, /* properties_destroyed */
8014 0, /* todo_flags_start */
8015 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8018 class pass_expand_omp_ssa : public gimple_opt_pass
8020 public:
8021 pass_expand_omp_ssa (gcc::context *ctxt)
8022 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8025 /* opt_pass methods: */
8026 virtual bool gate (function *fun)
8028 return !(fun->curr_properties & PROP_gimple_eomp);
8030 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8031 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8033 }; // class pass_expand_omp_ssa
8035 } // anon namespace
8037 gimple_opt_pass *
8038 make_pass_expand_omp_ssa (gcc::context *ctxt)
8040 return new pass_expand_omp_ssa (ctxt);
8043 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8044 GIMPLE_* codes. */
8046 bool
8047 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8048 int *region_idx)
8050 gimple *last = last_stmt (bb);
8051 enum gimple_code code = gimple_code (last);
8052 struct omp_region *cur_region = *region;
8053 bool fallthru = false;
8055 switch (code)
8057 case GIMPLE_OMP_PARALLEL:
8058 case GIMPLE_OMP_TASK:
8059 case GIMPLE_OMP_FOR:
8060 case GIMPLE_OMP_SINGLE:
8061 case GIMPLE_OMP_TEAMS:
8062 case GIMPLE_OMP_MASTER:
8063 case GIMPLE_OMP_TASKGROUP:
8064 case GIMPLE_OMP_CRITICAL:
8065 case GIMPLE_OMP_SECTION:
8066 case GIMPLE_OMP_GRID_BODY:
8067 cur_region = new_omp_region (bb, code, cur_region);
8068 fallthru = true;
8069 break;
8071 case GIMPLE_OMP_ORDERED:
8072 cur_region = new_omp_region (bb, code, cur_region);
8073 fallthru = true;
8074 if (omp_find_clause (gimple_omp_ordered_clauses
8075 (as_a <gomp_ordered *> (last)),
8076 OMP_CLAUSE_DEPEND))
8077 cur_region = cur_region->outer;
8078 break;
8080 case GIMPLE_OMP_TARGET:
8081 cur_region = new_omp_region (bb, code, cur_region);
8082 fallthru = true;
8083 switch (gimple_omp_target_kind (last))
8085 case GF_OMP_TARGET_KIND_REGION:
8086 case GF_OMP_TARGET_KIND_DATA:
8087 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8088 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8089 case GF_OMP_TARGET_KIND_OACC_DATA:
8090 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8091 break;
8092 case GF_OMP_TARGET_KIND_UPDATE:
8093 case GF_OMP_TARGET_KIND_ENTER_DATA:
8094 case GF_OMP_TARGET_KIND_EXIT_DATA:
8095 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8096 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8097 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8098 cur_region = cur_region->outer;
8099 break;
8100 default:
8101 gcc_unreachable ();
8103 break;
8105 case GIMPLE_OMP_SECTIONS:
8106 cur_region = new_omp_region (bb, code, cur_region);
8107 fallthru = true;
8108 break;
8110 case GIMPLE_OMP_SECTIONS_SWITCH:
8111 fallthru = false;
8112 break;
8114 case GIMPLE_OMP_ATOMIC_LOAD:
8115 case GIMPLE_OMP_ATOMIC_STORE:
8116 fallthru = true;
8117 break;
8119 case GIMPLE_OMP_RETURN:
8120 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8121 somewhere other than the next block. This will be
8122 created later. */
8123 cur_region->exit = bb;
8124 if (cur_region->type == GIMPLE_OMP_TASK)
8125 /* Add an edge corresponding to not scheduling the task
8126 immediately. */
8127 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8128 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8129 cur_region = cur_region->outer;
8130 break;
8132 case GIMPLE_OMP_CONTINUE:
8133 cur_region->cont = bb;
8134 switch (cur_region->type)
8136 case GIMPLE_OMP_FOR:
8137 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8138 succs edges as abnormal to prevent splitting
8139 them. */
8140 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8141 /* Make the loopback edge. */
8142 make_edge (bb, single_succ (cur_region->entry),
8143 EDGE_ABNORMAL);
8145 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8146 corresponds to the case that the body of the loop
8147 is not executed at all. */
8148 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8149 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8150 fallthru = false;
8151 break;
8153 case GIMPLE_OMP_SECTIONS:
8154 /* Wire up the edges into and out of the nested sections. */
8156 basic_block switch_bb = single_succ (cur_region->entry);
8158 struct omp_region *i;
8159 for (i = cur_region->inner; i ; i = i->next)
8161 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8162 make_edge (switch_bb, i->entry, 0);
8163 make_edge (i->exit, bb, EDGE_FALLTHRU);
8166 /* Make the loopback edge to the block with
8167 GIMPLE_OMP_SECTIONS_SWITCH. */
8168 make_edge (bb, switch_bb, 0);
8170 /* Make the edge from the switch to exit. */
8171 make_edge (switch_bb, bb->next_bb, 0);
8172 fallthru = false;
8174 break;
8176 case GIMPLE_OMP_TASK:
8177 fallthru = true;
8178 break;
8180 default:
8181 gcc_unreachable ();
8183 break;
8185 default:
8186 gcc_unreachable ();
8189 if (*region != cur_region)
8191 *region = cur_region;
8192 if (cur_region)
8193 *region_idx = cur_region->entry->index;
8194 else
8195 *region_idx = 0;
8198 return fallthru;
8201 #include "gt-omp-expand.h"