PR middle-end/77674
[official-gcc.git] / gcc / omp-expand.c
blobf0e98873e28127cdd6fc666596bf126a7dadb01a
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
66 struct omp_region
68 /* The enclosing region. */
69 struct omp_region *outer;
71 /* First child region. */
72 struct omp_region *inner;
74 /* Next peer region. */
75 struct omp_region *next;
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
116 /* Return true if REGION is a combined parallel+workshare region. */
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
121 return region->is_combined_parallel;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
138 Is lowered into:
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180 return false;
181 if (fd.iter_type != long_integer_type_node)
182 return false;
184 /* FIXME. We give up too easily here. If any of these arguments
185 are not constants, they will likely involve variables that have
186 been mapped into fields of .omp_data_s for sharing with the child
187 function. With appropriate data flow, it would be possible to
188 see through this. */
189 if (!is_gimple_min_invariant (fd.loop.n1)
190 || !is_gimple_min_invariant (fd.loop.n2)
191 || !is_gimple_min_invariant (fd.loop.step)
192 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193 return false;
195 return true;
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199 presence (SIMD_SCHEDULE). */
201 static tree
202 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 if (!simd_schedule)
205 return chunk_size;
207 int vf = omp_max_vf ();
208 if (vf == 1)
209 return chunk_size;
211 tree type = TREE_TYPE (chunk_size);
212 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213 build_int_cst (type, vf - 1));
214 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215 build_int_cst (type, -vf));
218 /* Collect additional arguments needed to emit a combined
219 parallel+workshare call. WS_STMT is the workshare directive being
220 expanded. */
222 static vec<tree, va_gc> *
223 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 tree t;
226 location_t loc = gimple_location (ws_stmt);
227 vec<tree, va_gc> *ws_args;
229 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231 struct omp_for_data fd;
232 tree n1, n2;
234 omp_extract_for_data (for_stmt, &fd, NULL);
235 n1 = fd.loop.n1;
236 n2 = fd.loop.n2;
238 if (gimple_omp_for_combined_into_p (for_stmt))
240 tree innerc
241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242 OMP_CLAUSE__LOOPTEMP_);
243 gcc_assert (innerc);
244 n1 = OMP_CLAUSE_DECL (innerc);
245 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246 OMP_CLAUSE__LOOPTEMP_);
247 gcc_assert (innerc);
248 n2 = OMP_CLAUSE_DECL (innerc);
251 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253 t = fold_convert_loc (loc, long_integer_type_node, n1);
254 ws_args->quick_push (t);
256 t = fold_convert_loc (loc, long_integer_type_node, n2);
257 ws_args->quick_push (t);
259 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260 ws_args->quick_push (t);
262 if (fd.chunk_size)
264 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265 t = omp_adjust_chunk_size (t, fd.simd_schedule);
266 ws_args->quick_push (t);
269 return ws_args;
271 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273 /* Number of sections is equal to the number of edges from the
274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 the exit of the sections region. */
276 basic_block bb = single_succ (gimple_bb (ws_stmt));
277 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278 vec_alloc (ws_args, 1);
279 ws_args->quick_push (t);
280 return ws_args;
283 gcc_unreachable ();
286 /* Discover whether REGION is a combined parallel+workshare region. */
288 static void
289 determine_parallel_type (struct omp_region *region)
291 basic_block par_entry_bb, par_exit_bb;
292 basic_block ws_entry_bb, ws_exit_bb;
294 if (region == NULL || region->inner == NULL
295 || region->exit == NULL || region->inner->exit == NULL
296 || region->inner->cont == NULL)
297 return;
299 /* We only support parallel+for and parallel+sections. */
300 if (region->type != GIMPLE_OMP_PARALLEL
301 || (region->inner->type != GIMPLE_OMP_FOR
302 && region->inner->type != GIMPLE_OMP_SECTIONS))
303 return;
305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306 WS_EXIT_BB -> PAR_EXIT_BB. */
307 par_entry_bb = region->entry;
308 par_exit_bb = region->exit;
309 ws_entry_bb = region->inner->entry;
310 ws_exit_bb = region->inner->exit;
312 if (single_succ (par_entry_bb) == ws_entry_bb
313 && single_succ (ws_exit_bb) == par_exit_bb
314 && workshare_safe_to_combine_p (ws_entry_bb)
315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316 || (last_and_only_stmt (ws_entry_bb)
317 && last_and_only_stmt (par_exit_bb))))
319 gimple *par_stmt = last_stmt (par_entry_bb);
320 gimple *ws_stmt = last_stmt (ws_entry_bb);
322 if (region->inner->type == GIMPLE_OMP_FOR)
324 /* If this is a combined parallel loop, we need to determine
325 whether or not to use the combined library calls. There
326 are two cases where we do not apply the transformation:
327 static loops and any kind of ordered loop. In the first
328 case, we already open code the loop so there is no need
329 to do anything else. In the latter case, the combined
330 parallel loop call would still need extra synchronization
331 to implement ordered semantics, so there would not be any
332 gain in using the combined call. */
333 tree clauses = gimple_omp_for_clauses (ws_stmt);
334 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335 if (c == NULL
336 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337 == OMP_CLAUSE_SCHEDULE_STATIC)
338 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 region->is_combined_parallel = false;
341 region->inner->is_combined_parallel = false;
342 return;
346 region->is_combined_parallel = true;
347 region->inner->is_combined_parallel = true;
348 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
352 /* Debugging dumps for parallel regions. */
353 void dump_omp_region (FILE *, struct omp_region *, int);
354 void debug_omp_region (struct omp_region *);
355 void debug_all_omp_regions (void);
357 /* Dump the parallel region tree rooted at REGION. */
359 void
360 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363 gimple_code_name[region->type]);
365 if (region->inner)
366 dump_omp_region (file, region->inner, indent + 4);
368 if (region->cont)
370 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371 region->cont->index);
374 if (region->exit)
375 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376 region->exit->index);
377 else
378 fprintf (file, "%*s[no exit marker]\n", indent, "");
380 if (region->next)
381 dump_omp_region (file, region->next, indent);
384 DEBUG_FUNCTION void
385 debug_omp_region (struct omp_region *region)
387 dump_omp_region (stderr, region, 0);
390 DEBUG_FUNCTION void
391 debug_all_omp_regions (void)
393 dump_omp_region (stderr, root_omp_region, 0);
396 /* Create a new parallel region starting at STMT inside region PARENT. */
398 static struct omp_region *
399 new_omp_region (basic_block bb, enum gimple_code type,
400 struct omp_region *parent)
402 struct omp_region *region = XCNEW (struct omp_region);
404 region->outer = parent;
405 region->entry = bb;
406 region->type = type;
408 if (parent)
410 /* This is a nested region. Add it to the list of inner
411 regions in PARENT. */
412 region->next = parent->inner;
413 parent->inner = region;
415 else
417 /* This is a toplevel region. Add it to the list of toplevel
418 regions in ROOT_OMP_REGION. */
419 region->next = root_omp_region;
420 root_omp_region = region;
423 return region;
426 /* Release the memory associated with the region tree rooted at REGION. */
428 static void
429 free_omp_region_1 (struct omp_region *region)
431 struct omp_region *i, *n;
433 for (i = region->inner; i ; i = n)
435 n = i->next;
436 free_omp_region_1 (i);
439 free (region);
442 /* Release the memory for the entire omp region tree. */
444 void
445 omp_free_regions (void)
447 struct omp_region *r, *n;
448 for (r = root_omp_region; r ; r = n)
450 n = r->next;
451 free_omp_region_1 (r);
453 root_omp_region = NULL;
456 /* A convenience function to build an empty GIMPLE_COND with just the
457 condition. */
459 static gcond *
460 gimple_build_cond_empty (tree cond)
462 enum tree_code pred_code;
463 tree lhs, rhs;
465 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
469 /* Return true if a parallel REGION is within a declare target function or
470 within a target region and is not a part of a gridified target. */
472 static bool
473 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 bool indirect = false;
476 for (region = region->outer; region; region = region->outer)
478 if (region->type == GIMPLE_OMP_PARALLEL)
479 indirect = true;
480 else if (region->type == GIMPLE_OMP_TARGET)
482 gomp_target *tgt_stmt
483 = as_a <gomp_target *> (last_stmt (region->entry));
485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486 OMP_CLAUSE__GRIDDIM_))
487 return indirect;
488 else
489 return true;
493 if (lookup_attribute ("omp declare target",
494 DECL_ATTRIBUTES (current_function_decl)))
495 return true;
497 return false;
500 /* Build the function calls to GOMP_parallel_start etc to actually
501 generate the parallel operation. REGION is the parallel region
502 being expanded. BB is the block where to insert the code. WS_ARGS
503 will be set if this is a call to a combined parallel+workshare
504 construct, it contains the list of additional arguments needed by
505 the workshare construct. */
507 static void
508 expand_parallel_call (struct omp_region *region, basic_block bb,
509 gomp_parallel *entry_stmt,
510 vec<tree, va_gc> *ws_args)
512 tree t, t1, t2, val, cond, c, clauses, flags;
513 gimple_stmt_iterator gsi;
514 gimple *stmt;
515 enum built_in_function start_ix;
516 int start_ix2;
517 location_t clause_loc;
518 vec<tree, va_gc> *args;
520 clauses = gimple_omp_parallel_clauses (entry_stmt);
522 /* Determine what flavor of GOMP_parallel we will be
523 emitting. */
524 start_ix = BUILT_IN_GOMP_PARALLEL;
525 if (is_combined_parallel (region))
527 switch (region->inner->type)
529 case GIMPLE_OMP_FOR:
530 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
531 switch (region->inner->sched_kind)
533 case OMP_CLAUSE_SCHEDULE_RUNTIME:
534 start_ix2 = 3;
535 break;
536 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
537 case OMP_CLAUSE_SCHEDULE_GUIDED:
538 if (region->inner->sched_modifiers
539 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
541 start_ix2 = 3 + region->inner->sched_kind;
542 break;
544 /* FALLTHRU */
545 default:
546 start_ix2 = region->inner->sched_kind;
547 break;
549 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
550 start_ix = (enum built_in_function) start_ix2;
551 break;
552 case GIMPLE_OMP_SECTIONS:
553 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
554 break;
555 default:
556 gcc_unreachable ();
560 /* By default, the value of NUM_THREADS is zero (selected at run time)
561 and there is no conditional. */
562 cond = NULL_TREE;
563 val = build_int_cst (unsigned_type_node, 0);
564 flags = build_int_cst (unsigned_type_node, 0);
566 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
567 if (c)
568 cond = OMP_CLAUSE_IF_EXPR (c);
570 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
571 if (c)
573 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
574 clause_loc = OMP_CLAUSE_LOCATION (c);
576 else
577 clause_loc = gimple_location (entry_stmt);
579 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
580 if (c)
581 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
583 /* Ensure 'val' is of the correct type. */
584 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
586 /* If we found the clause 'if (cond)', build either
587 (cond != 0) or (cond ? val : 1u). */
588 if (cond)
590 cond = gimple_boolify (cond);
592 if (integer_zerop (val))
593 val = fold_build2_loc (clause_loc,
594 EQ_EXPR, unsigned_type_node, cond,
595 build_int_cst (TREE_TYPE (cond), 0));
596 else
598 basic_block cond_bb, then_bb, else_bb;
599 edge e, e_then, e_else;
600 tree tmp_then, tmp_else, tmp_join, tmp_var;
602 tmp_var = create_tmp_var (TREE_TYPE (val));
603 if (gimple_in_ssa_p (cfun))
605 tmp_then = make_ssa_name (tmp_var);
606 tmp_else = make_ssa_name (tmp_var);
607 tmp_join = make_ssa_name (tmp_var);
609 else
611 tmp_then = tmp_var;
612 tmp_else = tmp_var;
613 tmp_join = tmp_var;
616 e = split_block_after_labels (bb);
617 cond_bb = e->src;
618 bb = e->dest;
619 remove_edge (e);
621 then_bb = create_empty_bb (cond_bb);
622 else_bb = create_empty_bb (then_bb);
623 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
624 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
626 stmt = gimple_build_cond_empty (cond);
627 gsi = gsi_start_bb (cond_bb);
628 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
630 gsi = gsi_start_bb (then_bb);
631 expand_omp_build_assign (&gsi, tmp_then, val, true);
633 gsi = gsi_start_bb (else_bb);
634 expand_omp_build_assign (&gsi, tmp_else,
635 build_int_cst (unsigned_type_node, 1),
636 true);
638 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
639 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
640 add_bb_to_loop (then_bb, cond_bb->loop_father);
641 add_bb_to_loop (else_bb, cond_bb->loop_father);
642 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
643 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
645 if (gimple_in_ssa_p (cfun))
647 gphi *phi = create_phi_node (tmp_join, bb);
648 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
649 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
652 val = tmp_join;
655 gsi = gsi_start_bb (bb);
656 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
657 false, GSI_CONTINUE_LINKING);
660 gsi = gsi_last_bb (bb);
661 t = gimple_omp_parallel_data_arg (entry_stmt);
662 if (t == NULL)
663 t1 = null_pointer_node;
664 else
665 t1 = build_fold_addr_expr (t);
666 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
667 t2 = build_fold_addr_expr (child_fndecl);
669 vec_alloc (args, 4 + vec_safe_length (ws_args));
670 args->quick_push (t2);
671 args->quick_push (t1);
672 args->quick_push (val);
673 if (ws_args)
674 args->splice (*ws_args);
675 args->quick_push (flags);
677 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
678 builtin_decl_explicit (start_ix), args);
680 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
681 false, GSI_CONTINUE_LINKING);
683 if (hsa_gen_requested_p ()
684 && parallel_needs_hsa_kernel_p (region))
686 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
687 hsa_register_kernel (child_cnode);
691 /* Insert a function call whose name is FUNC_NAME with the information from
692 ENTRY_STMT into the basic_block BB. */
694 static void
695 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
696 vec <tree, va_gc> *ws_args)
698 tree t, t1, t2;
699 gimple_stmt_iterator gsi;
700 vec <tree, va_gc> *args;
702 gcc_assert (vec_safe_length (ws_args) == 2);
703 tree func_name = (*ws_args)[0];
704 tree grain = (*ws_args)[1];
706 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
707 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
708 gcc_assert (count != NULL_TREE);
709 count = OMP_CLAUSE_OPERAND (count, 0);
711 gsi = gsi_last_bb (bb);
712 t = gimple_omp_parallel_data_arg (entry_stmt);
713 if (t == NULL)
714 t1 = null_pointer_node;
715 else
716 t1 = build_fold_addr_expr (t);
717 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
719 vec_alloc (args, 4);
720 args->quick_push (t2);
721 args->quick_push (t1);
722 args->quick_push (count);
723 args->quick_push (grain);
724 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
726 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
727 GSI_CONTINUE_LINKING);
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 else
787 num_tasks = integer_zero_node;
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
799 tree flags = build_int_cst (unsigned_type_node, iflags);
801 tree cond = boolean_true_node;
802 if (ifc)
804 if (taskloop_p)
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
818 if (finalc)
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
837 gsi = gsi_last_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
874 tree chain = NULL_TREE, t;
875 unsigned ix;
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879 DECL_CHAIN (t) = chain;
880 chain = t;
883 return chain;
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
892 static void
893 remove_exit_barrier (struct omp_region *region)
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
902 exit_bb = region->exit;
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
922 gsi = gsi_last_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
949 any_addressable_vars = 1;
950 break;
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
963 any_addressable_vars = 1;
964 break;
966 if (block == gimple_block (parallel_stmt))
967 break;
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
976 static void
977 remove_exit_barriers (struct omp_region *region)
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
982 if (region->inner)
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
988 region = region->next;
989 remove_exit_barriers (region);
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1027 tree built_in;
1029 if (DECL_NAME (decl) == thr_num_id)
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1054 gimple_call_set_fndecl (call, built_in);
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065 tree t = *tp;
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1124 bool is_cilk_for
1125 = (flag_cilkplus
1126 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1127 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1128 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1130 if (is_cilk_for)
1131 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1132 and the inner statement contains the name of the built-in function
1133 and grain. */
1134 ws_args = region->inner->ws_args;
1135 else if (is_combined_parallel (region))
1136 ws_args = region->ws_args;
1137 else
1138 ws_args = NULL;
1140 if (child_cfun->cfg)
1142 /* Due to inlining, it may happen that we have already outlined
1143 the region, in which case all we need to do is make the
1144 sub-graph unreachable and emit the parallel call. */
1145 edge entry_succ_e, exit_succ_e;
1147 entry_succ_e = single_succ_edge (entry_bb);
1149 gsi = gsi_last_bb (entry_bb);
1150 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1151 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1152 gsi_remove (&gsi, true);
1154 new_bb = entry_bb;
1155 if (exit_bb)
1157 exit_succ_e = single_succ_edge (exit_bb);
1158 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1160 remove_edge_and_dominated_blocks (entry_succ_e);
1162 else
1164 unsigned srcidx, dstidx, num;
1166 /* If the parallel region needs data sent from the parent
1167 function, then the very first statement (except possible
1168 tree profile counter updates) of the parallel body
1169 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1170 &.OMP_DATA_O is passed as an argument to the child function,
1171 we need to replace it with the argument as seen by the child
1172 function.
1174 In most cases, this will end up being the identity assignment
1175 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1176 a function call that has been inlined, the original PARM_DECL
1177 .OMP_DATA_I may have been converted into a different local
1178 variable. In which case, we need to keep the assignment. */
1179 if (gimple_omp_taskreg_data_arg (entry_stmt))
1181 basic_block entry_succ_bb
1182 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1183 : FALLTHRU_EDGE (entry_bb)->dest;
1184 tree arg;
1185 gimple *parcopy_stmt = NULL;
1187 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1189 gimple *stmt;
1191 gcc_assert (!gsi_end_p (gsi));
1192 stmt = gsi_stmt (gsi);
1193 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1194 continue;
1196 if (gimple_num_ops (stmt) == 2)
1198 tree arg = gimple_assign_rhs1 (stmt);
1200 /* We're ignore the subcode because we're
1201 effectively doing a STRIP_NOPS. */
1203 if (TREE_CODE (arg) == ADDR_EXPR
1204 && TREE_OPERAND (arg, 0)
1205 == gimple_omp_taskreg_data_arg (entry_stmt))
1207 parcopy_stmt = stmt;
1208 break;
1213 gcc_assert (parcopy_stmt != NULL);
1214 arg = DECL_ARGUMENTS (child_fn);
1216 if (!gimple_in_ssa_p (cfun))
1218 if (gimple_assign_lhs (parcopy_stmt) == arg)
1219 gsi_remove (&gsi, true);
1220 else
1222 /* ?? Is setting the subcode really necessary ?? */
1223 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1224 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 else
1229 tree lhs = gimple_assign_lhs (parcopy_stmt);
1230 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1231 /* We'd like to set the rhs to the default def in the child_fn,
1232 but it's too early to create ssa names in the child_fn.
1233 Instead, we set the rhs to the parm. In
1234 move_sese_region_to_fn, we introduce a default def for the
1235 parm, map the parm to it's default def, and once we encounter
1236 this stmt, replace the parm with the default def. */
1237 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1238 update_stmt (parcopy_stmt);
1242 /* Declare local variables needed in CHILD_CFUN. */
1243 block = DECL_INITIAL (child_fn);
1244 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1245 /* The gimplifier could record temporaries in parallel/task block
1246 rather than in containing function's local_decls chain,
1247 which would mean cgraph missed finalizing them. Do it now. */
1248 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1249 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1250 varpool_node::finalize_decl (t);
1251 DECL_SAVED_TREE (child_fn) = NULL;
1252 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1253 gimple_set_body (child_fn, NULL);
1254 TREE_USED (block) = 1;
1256 /* Reset DECL_CONTEXT on function arguments. */
1257 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1258 DECL_CONTEXT (t) = child_fn;
1260 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1261 so that it can be moved to the child function. */
1262 gsi = gsi_last_bb (entry_bb);
1263 stmt = gsi_stmt (gsi);
1264 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1265 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1266 e = split_block (entry_bb, stmt);
1267 gsi_remove (&gsi, true);
1268 entry_bb = e->dest;
1269 edge e2 = NULL;
1270 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1271 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1272 else
1274 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1275 gcc_assert (e2->dest == region->exit);
1276 remove_edge (BRANCH_EDGE (entry_bb));
1277 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1278 gsi = gsi_last_bb (region->exit);
1279 gcc_assert (!gsi_end_p (gsi)
1280 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1281 gsi_remove (&gsi, true);
1284 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1285 if (exit_bb)
1287 gsi = gsi_last_bb (exit_bb);
1288 gcc_assert (!gsi_end_p (gsi)
1289 && (gimple_code (gsi_stmt (gsi))
1290 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1291 stmt = gimple_build_return (NULL);
1292 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1293 gsi_remove (&gsi, true);
1296 /* Move the parallel region into CHILD_CFUN. */
1298 if (gimple_in_ssa_p (cfun))
1300 init_tree_ssa (child_cfun);
1301 init_ssa_operands (child_cfun);
1302 child_cfun->gimple_df->in_ssa_p = true;
1303 block = NULL_TREE;
1305 else
1306 block = gimple_block (entry_stmt);
1308 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1309 if (exit_bb)
1310 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1311 if (e2)
1313 basic_block dest_bb = e2->dest;
1314 if (!exit_bb)
1315 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1316 remove_edge (e2);
1317 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1319 /* When the OMP expansion process cannot guarantee an up-to-date
1320 loop tree arrange for the child function to fixup loops. */
1321 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1322 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1324 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1325 num = vec_safe_length (child_cfun->local_decls);
1326 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1328 t = (*child_cfun->local_decls)[srcidx];
1329 if (DECL_CONTEXT (t) == cfun->decl)
1330 continue;
1331 if (srcidx != dstidx)
1332 (*child_cfun->local_decls)[dstidx] = t;
1333 dstidx++;
1335 if (dstidx != num)
1336 vec_safe_truncate (child_cfun->local_decls, dstidx);
1338 /* Inform the callgraph about the new function. */
1339 child_cfun->curr_properties = cfun->curr_properties;
1340 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1341 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1342 cgraph_node *node = cgraph_node::get_create (child_fn);
1343 node->parallelized_function = 1;
1344 cgraph_node::add_new_function (child_fn, true);
1346 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1347 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1349 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1350 fixed in a following pass. */
1351 push_cfun (child_cfun);
1352 if (need_asm)
1353 assign_assembler_name_if_neeeded (child_fn);
1355 if (optimize)
1356 optimize_omp_library_calls (entry_stmt);
1357 cgraph_edge::rebuild_edges ();
1359 /* Some EH regions might become dead, see PR34608. If
1360 pass_cleanup_cfg isn't the first pass to happen with the
1361 new child, these dead EH edges might cause problems.
1362 Clean them up now. */
1363 if (flag_exceptions)
1365 basic_block bb;
1366 bool changed = false;
1368 FOR_EACH_BB_FN (bb, cfun)
1369 changed |= gimple_purge_dead_eh_edges (bb);
1370 if (changed)
1371 cleanup_tree_cfg ();
1373 if (gimple_in_ssa_p (cfun))
1374 update_ssa (TODO_update_ssa);
1375 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1376 verify_loop_structure ();
1377 pop_cfun ();
1379 if (dump_file && !gimple_in_ssa_p (cfun))
1381 omp_any_child_fn_dumped = true;
1382 dump_function_header (dump_file, child_fn, dump_flags);
1383 dump_function_to_file (child_fn, dump_file, dump_flags);
1387 /* Emit a library call to launch the children threads. */
1388 if (is_cilk_for)
1389 expand_cilk_for_call (new_bb,
1390 as_a <gomp_parallel *> (entry_stmt), ws_args);
1391 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1392 expand_parallel_call (region, new_bb,
1393 as_a <gomp_parallel *> (entry_stmt), ws_args);
1394 else
1395 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1396 if (gimple_in_ssa_p (cfun))
1397 update_ssa (TODO_update_ssa_only_virtuals);
1400 /* Information about members of an OpenACC collapsed loop nest. */
1402 struct oacc_collapse
1404 tree base; /* Base value. */
1405 tree iters; /* Number of steps. */
1406 tree step; /* step size. */
1409 /* Helper for expand_oacc_for. Determine collapsed loop information.
1410 Fill in COUNTS array. Emit any initialization code before GSI.
1411 Return the calculated outer loop bound of BOUND_TYPE. */
1413 static tree
1414 expand_oacc_collapse_init (const struct omp_for_data *fd,
1415 gimple_stmt_iterator *gsi,
1416 oacc_collapse *counts, tree bound_type)
1418 tree total = build_int_cst (bound_type, 1);
1419 int ix;
1421 gcc_assert (integer_onep (fd->loop.step));
1422 gcc_assert (integer_zerop (fd->loop.n1));
1424 for (ix = 0; ix != fd->collapse; ix++)
1426 const omp_for_data_loop *loop = &fd->loops[ix];
1428 tree iter_type = TREE_TYPE (loop->v);
1429 tree diff_type = iter_type;
1430 tree plus_type = iter_type;
1432 gcc_assert (loop->cond_code == fd->loop.cond_code);
1434 if (POINTER_TYPE_P (iter_type))
1435 plus_type = sizetype;
1436 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1437 diff_type = signed_type_for (diff_type);
1439 tree b = loop->n1;
1440 tree e = loop->n2;
1441 tree s = loop->step;
1442 bool up = loop->cond_code == LT_EXPR;
1443 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1444 bool negating;
1445 tree expr;
1447 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1448 true, GSI_SAME_STMT);
1449 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1450 true, GSI_SAME_STMT);
1452 /* Convert the step, avoiding possible unsigned->signed overflow. */
1453 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1454 if (negating)
1455 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1456 s = fold_convert (diff_type, s);
1457 if (negating)
1458 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1459 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1460 true, GSI_SAME_STMT);
1462 /* Determine the range, avoiding possible unsigned->signed overflow. */
1463 negating = !up && TYPE_UNSIGNED (iter_type);
1464 expr = fold_build2 (MINUS_EXPR, plus_type,
1465 fold_convert (plus_type, negating ? b : e),
1466 fold_convert (plus_type, negating ? e : b));
1467 expr = fold_convert (diff_type, expr);
1468 if (negating)
1469 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1470 tree range = force_gimple_operand_gsi
1471 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1473 /* Determine number of iterations. */
1474 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1475 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1476 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1478 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1479 true, GSI_SAME_STMT);
1481 counts[ix].base = b;
1482 counts[ix].iters = iters;
1483 counts[ix].step = s;
1485 total = fold_build2 (MULT_EXPR, bound_type, total,
1486 fold_convert (bound_type, iters));
1489 return total;
1492 /* Emit initializers for collapsed loop members. IVAR is the outer
1493 loop iteration variable, from which collapsed loop iteration values
1494 are calculated. COUNTS array has been initialized by
1495 expand_oacc_collapse_inits. */
1497 static void
1498 expand_oacc_collapse_vars (const struct omp_for_data *fd,
1499 gimple_stmt_iterator *gsi,
1500 const oacc_collapse *counts, tree ivar)
1502 tree ivar_type = TREE_TYPE (ivar);
1504 /* The most rapidly changing iteration variable is the innermost
1505 one. */
1506 for (int ix = fd->collapse; ix--;)
1508 const omp_for_data_loop *loop = &fd->loops[ix];
1509 const oacc_collapse *collapse = &counts[ix];
1510 tree iter_type = TREE_TYPE (loop->v);
1511 tree diff_type = TREE_TYPE (collapse->step);
1512 tree plus_type = iter_type;
1513 enum tree_code plus_code = PLUS_EXPR;
1514 tree expr;
1516 if (POINTER_TYPE_P (iter_type))
1518 plus_code = POINTER_PLUS_EXPR;
1519 plus_type = sizetype;
1522 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
1523 fold_convert (ivar_type, collapse->iters));
1524 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1525 collapse->step);
1526 expr = fold_build2 (plus_code, iter_type, collapse->base,
1527 fold_convert (plus_type, expr));
1528 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1529 true, GSI_SAME_STMT);
1530 gassign *ass = gimple_build_assign (loop->v, expr);
1531 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1533 if (ix)
1535 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
1536 fold_convert (ivar_type, collapse->iters));
1537 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1538 true, GSI_SAME_STMT);
1543 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1544 of the combined collapse > 1 loop constructs, generate code like:
1545 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1546 if (cond3 is <)
1547 adj = STEP3 - 1;
1548 else
1549 adj = STEP3 + 1;
1550 count3 = (adj + N32 - N31) / STEP3;
1551 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1552 if (cond2 is <)
1553 adj = STEP2 - 1;
1554 else
1555 adj = STEP2 + 1;
1556 count2 = (adj + N22 - N21) / STEP2;
1557 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1558 if (cond1 is <)
1559 adj = STEP1 - 1;
1560 else
1561 adj = STEP1 + 1;
1562 count1 = (adj + N12 - N11) / STEP1;
1563 count = count1 * count2 * count3;
1564 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1565 count = 0;
1566 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1567 of the combined loop constructs, just initialize COUNTS array
1568 from the _looptemp_ clauses. */
1570 /* NOTE: It *could* be better to moosh all of the BBs together,
1571 creating one larger BB with all the computation and the unexpected
1572 jump at the end. I.e.
1574 bool zero3, zero2, zero1, zero;
1576 zero3 = N32 c3 N31;
1577 count3 = (N32 - N31) /[cl] STEP3;
1578 zero2 = N22 c2 N21;
1579 count2 = (N22 - N21) /[cl] STEP2;
1580 zero1 = N12 c1 N11;
1581 count1 = (N12 - N11) /[cl] STEP1;
1582 zero = zero3 || zero2 || zero1;
1583 count = count1 * count2 * count3;
1584 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1586 After all, we expect the zero=false, and thus we expect to have to
1587 evaluate all of the comparison expressions, so short-circuiting
1588 oughtn't be a win. Since the condition isn't protecting a
1589 denominator, we're not concerned about divide-by-zero, so we can
1590 fully evaluate count even if a numerator turned out to be wrong.
1592 It seems like putting this all together would create much better
1593 scheduling opportunities, and less pressure on the chip's branch
1594 predictor. */
1596 static void
1597 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1598 basic_block &entry_bb, tree *counts,
1599 basic_block &zero_iter1_bb, int &first_zero_iter1,
1600 basic_block &zero_iter2_bb, int &first_zero_iter2,
1601 basic_block &l2_dom_bb)
1603 tree t, type = TREE_TYPE (fd->loop.v);
1604 edge e, ne;
1605 int i;
1607 /* Collapsed loops need work for expansion into SSA form. */
1608 gcc_assert (!gimple_in_ssa_p (cfun));
1610 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1611 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1613 gcc_assert (fd->ordered == 0);
1614 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1615 isn't supposed to be handled, as the inner loop doesn't
1616 use it. */
1617 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1618 OMP_CLAUSE__LOOPTEMP_);
1619 gcc_assert (innerc);
1620 for (i = 0; i < fd->collapse; i++)
1622 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1623 OMP_CLAUSE__LOOPTEMP_);
1624 gcc_assert (innerc);
1625 if (i)
1626 counts[i] = OMP_CLAUSE_DECL (innerc);
1627 else
1628 counts[0] = NULL_TREE;
1630 return;
1633 for (i = fd->collapse; i < fd->ordered; i++)
1635 tree itype = TREE_TYPE (fd->loops[i].v);
1636 counts[i] = NULL_TREE;
1637 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1638 fold_convert (itype, fd->loops[i].n1),
1639 fold_convert (itype, fd->loops[i].n2));
1640 if (t && integer_zerop (t))
1642 for (i = fd->collapse; i < fd->ordered; i++)
1643 counts[i] = build_int_cst (type, 0);
1644 break;
1647 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1649 tree itype = TREE_TYPE (fd->loops[i].v);
1651 if (i >= fd->collapse && counts[i])
1652 continue;
1653 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1654 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1655 fold_convert (itype, fd->loops[i].n1),
1656 fold_convert (itype, fd->loops[i].n2)))
1657 == NULL_TREE || !integer_onep (t)))
1659 gcond *cond_stmt;
1660 tree n1, n2;
1661 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1662 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1663 true, GSI_SAME_STMT);
1664 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1665 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1666 true, GSI_SAME_STMT);
1667 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1668 NULL_TREE, NULL_TREE);
1669 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1670 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1671 expand_omp_regimplify_p, NULL, NULL)
1672 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1673 expand_omp_regimplify_p, NULL, NULL))
1675 *gsi = gsi_for_stmt (cond_stmt);
1676 gimple_regimplify_operands (cond_stmt, gsi);
1678 e = split_block (entry_bb, cond_stmt);
1679 basic_block &zero_iter_bb
1680 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1681 int &first_zero_iter
1682 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1683 if (zero_iter_bb == NULL)
1685 gassign *assign_stmt;
1686 first_zero_iter = i;
1687 zero_iter_bb = create_empty_bb (entry_bb);
1688 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1689 *gsi = gsi_after_labels (zero_iter_bb);
1690 if (i < fd->collapse)
1691 assign_stmt = gimple_build_assign (fd->loop.n2,
1692 build_zero_cst (type));
1693 else
1695 counts[i] = create_tmp_reg (type, ".count");
1696 assign_stmt
1697 = gimple_build_assign (counts[i], build_zero_cst (type));
1699 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1700 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1701 entry_bb);
1703 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1704 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1705 e->flags = EDGE_TRUE_VALUE;
1706 e->probability = REG_BR_PROB_BASE - ne->probability;
1707 if (l2_dom_bb == NULL)
1708 l2_dom_bb = entry_bb;
1709 entry_bb = e->dest;
1710 *gsi = gsi_last_bb (entry_bb);
1713 if (POINTER_TYPE_P (itype))
1714 itype = signed_type_for (itype);
1715 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1716 ? -1 : 1));
1717 t = fold_build2 (PLUS_EXPR, itype,
1718 fold_convert (itype, fd->loops[i].step), t);
1719 t = fold_build2 (PLUS_EXPR, itype, t,
1720 fold_convert (itype, fd->loops[i].n2));
1721 t = fold_build2 (MINUS_EXPR, itype, t,
1722 fold_convert (itype, fd->loops[i].n1));
1723 /* ?? We could probably use CEIL_DIV_EXPR instead of
1724 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1725 generate the same code in the end because generically we
1726 don't know that the values involved must be negative for
1727 GT?? */
1728 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1729 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1730 fold_build1 (NEGATE_EXPR, itype, t),
1731 fold_build1 (NEGATE_EXPR, itype,
1732 fold_convert (itype,
1733 fd->loops[i].step)));
1734 else
1735 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1736 fold_convert (itype, fd->loops[i].step));
1737 t = fold_convert (type, t);
1738 if (TREE_CODE (t) == INTEGER_CST)
1739 counts[i] = t;
1740 else
1742 if (i < fd->collapse || i != first_zero_iter2)
1743 counts[i] = create_tmp_reg (type, ".count");
1744 expand_omp_build_assign (gsi, counts[i], t);
1746 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1748 if (i == 0)
1749 t = counts[0];
1750 else
1751 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1752 expand_omp_build_assign (gsi, fd->loop.n2, t);
1757 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1758 T = V;
1759 V3 = N31 + (T % count3) * STEP3;
1760 T = T / count3;
1761 V2 = N21 + (T % count2) * STEP2;
1762 T = T / count2;
1763 V1 = N11 + T * STEP1;
1764 if this loop doesn't have an inner loop construct combined with it.
1765 If it does have an inner loop construct combined with it and the
1766 iteration count isn't known constant, store values from counts array
1767 into its _looptemp_ temporaries instead. */
1769 static void
1770 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1771 tree *counts, gimple *inner_stmt, tree startvar)
1773 int i;
1774 if (gimple_omp_for_combined_p (fd->for_stmt))
1776 /* If fd->loop.n2 is constant, then no propagation of the counts
1777 is needed, they are constant. */
1778 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1779 return;
1781 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1782 ? gimple_omp_taskreg_clauses (inner_stmt)
1783 : gimple_omp_for_clauses (inner_stmt);
1784 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1785 isn't supposed to be handled, as the inner loop doesn't
1786 use it. */
1787 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1796 tree tem = OMP_CLAUSE_DECL (innerc);
1797 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1798 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1799 false, GSI_CONTINUE_LINKING);
1800 gassign *stmt = gimple_build_assign (tem, t);
1801 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1804 return;
1807 tree type = TREE_TYPE (fd->loop.v);
1808 tree tem = create_tmp_reg (type, ".tem");
1809 gassign *stmt = gimple_build_assign (tem, startvar);
1810 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1812 for (i = fd->collapse - 1; i >= 0; i--)
1814 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1815 itype = vtype;
1816 if (POINTER_TYPE_P (vtype))
1817 itype = signed_type_for (vtype);
1818 if (i != 0)
1819 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1820 else
1821 t = tem;
1822 t = fold_convert (itype, t);
1823 t = fold_build2 (MULT_EXPR, itype, t,
1824 fold_convert (itype, fd->loops[i].step));
1825 if (POINTER_TYPE_P (vtype))
1826 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1827 else
1828 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1829 t = force_gimple_operand_gsi (gsi, t,
1830 DECL_P (fd->loops[i].v)
1831 && TREE_ADDRESSABLE (fd->loops[i].v),
1832 NULL_TREE, false,
1833 GSI_CONTINUE_LINKING);
1834 stmt = gimple_build_assign (fd->loops[i].v, t);
1835 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1836 if (i != 0)
1838 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1839 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1840 false, GSI_CONTINUE_LINKING);
1841 stmt = gimple_build_assign (tem, t);
1842 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1847 /* Helper function for expand_omp_for_*. Generate code like:
1848 L10:
1849 V3 += STEP3;
1850 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1851 L11:
1852 V3 = N31;
1853 V2 += STEP2;
1854 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1855 L12:
1856 V2 = N21;
1857 V1 += STEP1;
1858 goto BODY_BB; */
1860 static basic_block
1861 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1862 basic_block body_bb)
1864 basic_block last_bb, bb, collapse_bb = NULL;
1865 int i;
1866 gimple_stmt_iterator gsi;
1867 edge e;
1868 tree t;
1869 gimple *stmt;
1871 last_bb = cont_bb;
1872 for (i = fd->collapse - 1; i >= 0; i--)
1874 tree vtype = TREE_TYPE (fd->loops[i].v);
1876 bb = create_empty_bb (last_bb);
1877 add_bb_to_loop (bb, last_bb->loop_father);
1878 gsi = gsi_start_bb (bb);
1880 if (i < fd->collapse - 1)
1882 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1883 e->probability = REG_BR_PROB_BASE / 8;
1885 t = fd->loops[i + 1].n1;
1886 t = force_gimple_operand_gsi (&gsi, t,
1887 DECL_P (fd->loops[i + 1].v)
1888 && TREE_ADDRESSABLE (fd->loops[i
1889 + 1].v),
1890 NULL_TREE, false,
1891 GSI_CONTINUE_LINKING);
1892 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1893 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1895 else
1896 collapse_bb = bb;
1898 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1900 if (POINTER_TYPE_P (vtype))
1901 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1902 else
1903 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1904 t = force_gimple_operand_gsi (&gsi, t,
1905 DECL_P (fd->loops[i].v)
1906 && TREE_ADDRESSABLE (fd->loops[i].v),
1907 NULL_TREE, false, GSI_CONTINUE_LINKING);
1908 stmt = gimple_build_assign (fd->loops[i].v, t);
1909 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1911 if (i > 0)
1913 t = fd->loops[i].n2;
1914 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1915 false, GSI_CONTINUE_LINKING);
1916 tree v = fd->loops[i].v;
1917 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1918 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1919 false, GSI_CONTINUE_LINKING);
1920 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1921 stmt = gimple_build_cond_empty (t);
1922 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1923 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1924 e->probability = REG_BR_PROB_BASE * 7 / 8;
1926 else
1927 make_edge (bb, body_bb, EDGE_FALLTHRU);
1928 last_bb = bb;
1931 return collapse_bb;
1934 /* Expand #pragma omp ordered depend(source). */
1936 static void
1937 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1938 tree *counts, location_t loc)
1940 enum built_in_function source_ix
1941 = fd->iter_type == long_integer_type_node
1942 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1943 gimple *g
1944 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1945 build_fold_addr_expr (counts[fd->ordered]));
1946 gimple_set_location (g, loc);
1947 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1950 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1952 static void
1953 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1954 tree *counts, tree c, location_t loc)
1956 auto_vec<tree, 10> args;
1957 enum built_in_function sink_ix
1958 = fd->iter_type == long_integer_type_node
1959 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1960 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1961 int i;
1962 gimple_stmt_iterator gsi2 = *gsi;
1963 bool warned_step = false;
1965 for (i = 0; i < fd->ordered; i++)
1967 tree step = NULL_TREE;
1968 off = TREE_PURPOSE (deps);
1969 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1971 step = TREE_OPERAND (off, 1);
1972 off = TREE_OPERAND (off, 0);
1974 if (!integer_zerop (off))
1976 gcc_assert (fd->loops[i].cond_code == LT_EXPR
1977 || fd->loops[i].cond_code == GT_EXPR);
1978 bool forward = fd->loops[i].cond_code == LT_EXPR;
1979 if (step)
1981 /* Non-simple Fortran DO loops. If step is variable,
1982 we don't know at compile even the direction, so can't
1983 warn. */
1984 if (TREE_CODE (step) != INTEGER_CST)
1985 break;
1986 forward = tree_int_cst_sgn (step) != -1;
1988 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
1989 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
1990 "lexically later iteration");
1991 break;
1993 deps = TREE_CHAIN (deps);
1995 /* If all offsets corresponding to the collapsed loops are zero,
1996 this depend clause can be ignored. FIXME: but there is still a
1997 flush needed. We need to emit one __sync_synchronize () for it
1998 though (perhaps conditionally)? Solve this together with the
1999 conservative dependence folding optimization.
2000 if (i >= fd->collapse)
2001 return; */
2003 deps = OMP_CLAUSE_DECL (c);
2004 gsi_prev (&gsi2);
2005 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2006 edge e2 = split_block_after_labels (e1->dest);
2008 gsi2 = gsi_after_labels (e1->dest);
2009 *gsi = gsi_last_bb (e1->src);
2010 for (i = 0; i < fd->ordered; i++)
2012 tree itype = TREE_TYPE (fd->loops[i].v);
2013 tree step = NULL_TREE;
2014 tree orig_off = NULL_TREE;
2015 if (POINTER_TYPE_P (itype))
2016 itype = sizetype;
2017 if (i)
2018 deps = TREE_CHAIN (deps);
2019 off = TREE_PURPOSE (deps);
2020 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2022 step = TREE_OPERAND (off, 1);
2023 off = TREE_OPERAND (off, 0);
2024 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2025 && integer_onep (fd->loops[i].step)
2026 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2028 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2029 if (step)
2031 off = fold_convert_loc (loc, itype, off);
2032 orig_off = off;
2033 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2036 if (integer_zerop (off))
2037 t = boolean_true_node;
2038 else
2040 tree a;
2041 tree co = fold_convert_loc (loc, itype, off);
2042 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2044 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2045 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2046 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2047 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2048 co);
2050 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2051 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2052 fd->loops[i].v, co);
2053 else
2054 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2055 fd->loops[i].v, co);
2056 if (step)
2058 tree t1, t2;
2059 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2060 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2061 fd->loops[i].n1);
2062 else
2063 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2064 fd->loops[i].n2);
2065 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2066 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2067 fd->loops[i].n2);
2068 else
2069 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2070 fd->loops[i].n1);
2071 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2072 step, build_int_cst (TREE_TYPE (step), 0));
2073 if (TREE_CODE (step) != INTEGER_CST)
2075 t1 = unshare_expr (t1);
2076 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2077 false, GSI_CONTINUE_LINKING);
2078 t2 = unshare_expr (t2);
2079 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2080 false, GSI_CONTINUE_LINKING);
2082 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2083 t, t2, t1);
2085 else if (fd->loops[i].cond_code == LT_EXPR)
2087 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2088 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2089 fd->loops[i].n1);
2090 else
2091 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2092 fd->loops[i].n2);
2094 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2095 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2096 fd->loops[i].n2);
2097 else
2098 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2099 fd->loops[i].n1);
2101 if (cond)
2102 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2103 else
2104 cond = t;
2106 off = fold_convert_loc (loc, itype, off);
2108 if (step
2109 || (fd->loops[i].cond_code == LT_EXPR
2110 ? !integer_onep (fd->loops[i].step)
2111 : !integer_minus_onep (fd->loops[i].step)))
2113 if (step == NULL_TREE
2114 && TYPE_UNSIGNED (itype)
2115 && fd->loops[i].cond_code == GT_EXPR)
2116 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2117 fold_build1_loc (loc, NEGATE_EXPR, itype,
2118 s));
2119 else
2120 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2121 orig_off ? orig_off : off, s);
2122 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2123 build_int_cst (itype, 0));
2124 if (integer_zerop (t) && !warned_step)
2126 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2127 "in the iteration space");
2128 warned_step = true;
2130 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2131 cond, t);
2134 if (i <= fd->collapse - 1 && fd->collapse > 1)
2135 t = fd->loop.v;
2136 else if (counts[i])
2137 t = counts[i];
2138 else
2140 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2141 fd->loops[i].v, fd->loops[i].n1);
2142 t = fold_convert_loc (loc, fd->iter_type, t);
2144 if (step)
2145 /* We have divided off by step already earlier. */;
2146 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2147 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2148 fold_build1_loc (loc, NEGATE_EXPR, itype,
2149 s));
2150 else
2151 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2152 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2153 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2154 off = fold_convert_loc (loc, fd->iter_type, off);
2155 if (i <= fd->collapse - 1 && fd->collapse > 1)
2157 if (i)
2158 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2159 off);
2160 if (i < fd->collapse - 1)
2162 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2163 counts[i]);
2164 continue;
2167 off = unshare_expr (off);
2168 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2169 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2170 true, GSI_SAME_STMT);
2171 args.safe_push (t);
2173 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2174 gimple_set_location (g, loc);
2175 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2177 cond = unshare_expr (cond);
2178 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2179 GSI_CONTINUE_LINKING);
2180 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2181 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2182 e3->probability = REG_BR_PROB_BASE / 8;
2183 e1->probability = REG_BR_PROB_BASE - e3->probability;
2184 e1->flags = EDGE_TRUE_VALUE;
2185 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2187 *gsi = gsi_after_labels (e2->dest);
2190 /* Expand all #pragma omp ordered depend(source) and
2191 #pragma omp ordered depend(sink:...) constructs in the current
2192 #pragma omp for ordered(n) region. */
2194 static void
2195 expand_omp_ordered_source_sink (struct omp_region *region,
2196 struct omp_for_data *fd, tree *counts,
2197 basic_block cont_bb)
2199 struct omp_region *inner;
2200 int i;
2201 for (i = fd->collapse - 1; i < fd->ordered; i++)
2202 if (i == fd->collapse - 1 && fd->collapse > 1)
2203 counts[i] = NULL_TREE;
2204 else if (i >= fd->collapse && !cont_bb)
2205 counts[i] = build_zero_cst (fd->iter_type);
2206 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2207 && integer_onep (fd->loops[i].step))
2208 counts[i] = NULL_TREE;
2209 else
2210 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2211 tree atype
2212 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2213 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2214 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2216 for (inner = region->inner; inner; inner = inner->next)
2217 if (inner->type == GIMPLE_OMP_ORDERED)
2219 gomp_ordered *ord_stmt = inner->ord_stmt;
2220 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2221 location_t loc = gimple_location (ord_stmt);
2222 tree c;
2223 for (c = gimple_omp_ordered_clauses (ord_stmt);
2224 c; c = OMP_CLAUSE_CHAIN (c))
2225 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2226 break;
2227 if (c)
2228 expand_omp_ordered_source (&gsi, fd, counts, loc);
2229 for (c = gimple_omp_ordered_clauses (ord_stmt);
2230 c; c = OMP_CLAUSE_CHAIN (c))
2231 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2232 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2233 gsi_remove (&gsi, true);
2237 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2238 collapsed. */
2240 static basic_block
2241 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2242 basic_block cont_bb, basic_block body_bb,
2243 bool ordered_lastprivate)
2245 if (fd->ordered == fd->collapse)
2246 return cont_bb;
2248 if (!cont_bb)
2250 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2251 for (int i = fd->collapse; i < fd->ordered; i++)
2253 tree type = TREE_TYPE (fd->loops[i].v);
2254 tree n1 = fold_convert (type, fd->loops[i].n1);
2255 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2256 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2257 size_int (i - fd->collapse + 1),
2258 NULL_TREE, NULL_TREE);
2259 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2261 return NULL;
2264 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2266 tree t, type = TREE_TYPE (fd->loops[i].v);
2267 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2268 expand_omp_build_assign (&gsi, fd->loops[i].v,
2269 fold_convert (type, fd->loops[i].n1));
2270 if (counts[i])
2271 expand_omp_build_assign (&gsi, counts[i],
2272 build_zero_cst (fd->iter_type));
2273 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2274 size_int (i - fd->collapse + 1),
2275 NULL_TREE, NULL_TREE);
2276 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2277 if (!gsi_end_p (gsi))
2278 gsi_prev (&gsi);
2279 else
2280 gsi = gsi_last_bb (body_bb);
2281 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2282 basic_block new_body = e1->dest;
2283 if (body_bb == cont_bb)
2284 cont_bb = new_body;
2285 edge e2 = NULL;
2286 basic_block new_header;
2287 if (EDGE_COUNT (cont_bb->preds) > 0)
2289 gsi = gsi_last_bb (cont_bb);
2290 if (POINTER_TYPE_P (type))
2291 t = fold_build_pointer_plus (fd->loops[i].v,
2292 fold_convert (sizetype,
2293 fd->loops[i].step));
2294 else
2295 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2296 fold_convert (type, fd->loops[i].step));
2297 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2298 if (counts[i])
2300 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2301 build_int_cst (fd->iter_type, 1));
2302 expand_omp_build_assign (&gsi, counts[i], t);
2303 t = counts[i];
2305 else
2307 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2308 fd->loops[i].v, fd->loops[i].n1);
2309 t = fold_convert (fd->iter_type, t);
2310 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2311 true, GSI_SAME_STMT);
2313 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2314 size_int (i - fd->collapse + 1),
2315 NULL_TREE, NULL_TREE);
2316 expand_omp_build_assign (&gsi, aref, t);
2317 gsi_prev (&gsi);
2318 e2 = split_block (cont_bb, gsi_stmt (gsi));
2319 new_header = e2->dest;
2321 else
2322 new_header = cont_bb;
2323 gsi = gsi_after_labels (new_header);
2324 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2325 true, GSI_SAME_STMT);
2326 tree n2
2327 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2328 true, NULL_TREE, true, GSI_SAME_STMT);
2329 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2330 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2331 edge e3 = split_block (new_header, gsi_stmt (gsi));
2332 cont_bb = e3->dest;
2333 remove_edge (e1);
2334 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2335 e3->flags = EDGE_FALSE_VALUE;
2336 e3->probability = REG_BR_PROB_BASE / 8;
2337 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2338 e1->probability = REG_BR_PROB_BASE - e3->probability;
2340 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2341 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2343 if (e2)
2345 struct loop *loop = alloc_loop ();
2346 loop->header = new_header;
2347 loop->latch = e2->src;
2348 add_loop (loop, body_bb->loop_father);
2352 /* If there are any lastprivate clauses and it is possible some loops
2353 might have zero iterations, ensure all the decls are initialized,
2354 otherwise we could crash evaluating C++ class iterators with lastprivate
2355 clauses. */
2356 bool need_inits = false;
2357 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2358 if (need_inits)
2360 tree type = TREE_TYPE (fd->loops[i].v);
2361 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2362 expand_omp_build_assign (&gsi, fd->loops[i].v,
2363 fold_convert (type, fd->loops[i].n1));
2365 else
2367 tree type = TREE_TYPE (fd->loops[i].v);
2368 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2369 boolean_type_node,
2370 fold_convert (type, fd->loops[i].n1),
2371 fold_convert (type, fd->loops[i].n2));
2372 if (!integer_onep (this_cond))
2373 need_inits = true;
2376 return cont_bb;
2379 /* A subroutine of expand_omp_for. Generate code for a parallel
2380 loop with any schedule. Given parameters:
2382 for (V = N1; V cond N2; V += STEP) BODY;
2384 where COND is "<" or ">", we generate pseudocode
2386 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2387 if (more) goto L0; else goto L3;
2389 V = istart0;
2390 iend = iend0;
2392 BODY;
2393 V += STEP;
2394 if (V cond iend) goto L1; else goto L2;
2396 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2399 If this is a combined omp parallel loop, instead of the call to
2400 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2401 If this is gimple_omp_for_combined_p loop, then instead of assigning
2402 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2403 inner GIMPLE_OMP_FOR and V += STEP; and
2404 if (V cond iend) goto L1; else goto L2; are removed.
2406 For collapsed loops, given parameters:
2407 collapse(3)
2408 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2409 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2410 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2411 BODY;
2413 we generate pseudocode
2415 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2416 if (cond3 is <)
2417 adj = STEP3 - 1;
2418 else
2419 adj = STEP3 + 1;
2420 count3 = (adj + N32 - N31) / STEP3;
2421 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2422 if (cond2 is <)
2423 adj = STEP2 - 1;
2424 else
2425 adj = STEP2 + 1;
2426 count2 = (adj + N22 - N21) / STEP2;
2427 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2428 if (cond1 is <)
2429 adj = STEP1 - 1;
2430 else
2431 adj = STEP1 + 1;
2432 count1 = (adj + N12 - N11) / STEP1;
2433 count = count1 * count2 * count3;
2434 goto Z1;
2436 count = 0;
2438 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2439 if (more) goto L0; else goto L3;
2441 V = istart0;
2442 T = V;
2443 V3 = N31 + (T % count3) * STEP3;
2444 T = T / count3;
2445 V2 = N21 + (T % count2) * STEP2;
2446 T = T / count2;
2447 V1 = N11 + T * STEP1;
2448 iend = iend0;
2450 BODY;
2451 V += 1;
2452 if (V < iend) goto L10; else goto L2;
2453 L10:
2454 V3 += STEP3;
2455 if (V3 cond3 N32) goto L1; else goto L11;
2456 L11:
2457 V3 = N31;
2458 V2 += STEP2;
2459 if (V2 cond2 N22) goto L1; else goto L12;
2460 L12:
2461 V2 = N21;
2462 V1 += STEP1;
2463 goto L1;
2465 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2470 static void
2471 expand_omp_for_generic (struct omp_region *region,
2472 struct omp_for_data *fd,
2473 enum built_in_function start_fn,
2474 enum built_in_function next_fn,
2475 gimple *inner_stmt)
2477 tree type, istart0, iend0, iend;
2478 tree t, vmain, vback, bias = NULL_TREE;
2479 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2480 basic_block l2_bb = NULL, l3_bb = NULL;
2481 gimple_stmt_iterator gsi;
2482 gassign *assign_stmt;
2483 bool in_combined_parallel = is_combined_parallel (region);
2484 bool broken_loop = region->cont == NULL;
2485 edge e, ne;
2486 tree *counts = NULL;
2487 int i;
2488 bool ordered_lastprivate = false;
2490 gcc_assert (!broken_loop || !in_combined_parallel);
2491 gcc_assert (fd->iter_type == long_integer_type_node
2492 || !in_combined_parallel);
2494 entry_bb = region->entry;
2495 cont_bb = region->cont;
2496 collapse_bb = NULL;
2497 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2498 gcc_assert (broken_loop
2499 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2500 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2501 l1_bb = single_succ (l0_bb);
2502 if (!broken_loop)
2504 l2_bb = create_empty_bb (cont_bb);
2505 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2506 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2507 == l1_bb));
2508 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2510 else
2511 l2_bb = NULL;
2512 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2513 exit_bb = region->exit;
2515 gsi = gsi_last_bb (entry_bb);
2517 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2518 if (fd->ordered
2519 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2520 OMP_CLAUSE_LASTPRIVATE))
2521 ordered_lastprivate = false;
2522 if (fd->collapse > 1 || fd->ordered)
2524 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2525 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2527 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2528 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2529 zero_iter1_bb, first_zero_iter1,
2530 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2532 if (zero_iter1_bb)
2534 /* Some counts[i] vars might be uninitialized if
2535 some loop has zero iterations. But the body shouldn't
2536 be executed in that case, so just avoid uninit warnings. */
2537 for (i = first_zero_iter1;
2538 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2539 if (SSA_VAR_P (counts[i]))
2540 TREE_NO_WARNING (counts[i]) = 1;
2541 gsi_prev (&gsi);
2542 e = split_block (entry_bb, gsi_stmt (gsi));
2543 entry_bb = e->dest;
2544 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2545 gsi = gsi_last_bb (entry_bb);
2546 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2547 get_immediate_dominator (CDI_DOMINATORS,
2548 zero_iter1_bb));
2550 if (zero_iter2_bb)
2552 /* Some counts[i] vars might be uninitialized if
2553 some loop has zero iterations. But the body shouldn't
2554 be executed in that case, so just avoid uninit warnings. */
2555 for (i = first_zero_iter2; i < fd->ordered; i++)
2556 if (SSA_VAR_P (counts[i]))
2557 TREE_NO_WARNING (counts[i]) = 1;
2558 if (zero_iter1_bb)
2559 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2560 else
2562 gsi_prev (&gsi);
2563 e = split_block (entry_bb, gsi_stmt (gsi));
2564 entry_bb = e->dest;
2565 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2566 gsi = gsi_last_bb (entry_bb);
2567 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2568 get_immediate_dominator
2569 (CDI_DOMINATORS, zero_iter2_bb));
2572 if (fd->collapse == 1)
2574 counts[0] = fd->loop.n2;
2575 fd->loop = fd->loops[0];
2579 type = TREE_TYPE (fd->loop.v);
2580 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2581 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2582 TREE_ADDRESSABLE (istart0) = 1;
2583 TREE_ADDRESSABLE (iend0) = 1;
2585 /* See if we need to bias by LLONG_MIN. */
2586 if (fd->iter_type == long_long_unsigned_type_node
2587 && TREE_CODE (type) == INTEGER_TYPE
2588 && !TYPE_UNSIGNED (type)
2589 && fd->ordered == 0)
2591 tree n1, n2;
2593 if (fd->loop.cond_code == LT_EXPR)
2595 n1 = fd->loop.n1;
2596 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2598 else
2600 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2601 n2 = fd->loop.n1;
2603 if (TREE_CODE (n1) != INTEGER_CST
2604 || TREE_CODE (n2) != INTEGER_CST
2605 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2606 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2609 gimple_stmt_iterator gsif = gsi;
2610 gsi_prev (&gsif);
2612 tree arr = NULL_TREE;
2613 if (in_combined_parallel)
2615 gcc_assert (fd->ordered == 0);
2616 /* In a combined parallel loop, emit a call to
2617 GOMP_loop_foo_next. */
2618 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2619 build_fold_addr_expr (istart0),
2620 build_fold_addr_expr (iend0));
2622 else
2624 tree t0, t1, t2, t3, t4;
2625 /* If this is not a combined parallel loop, emit a call to
2626 GOMP_loop_foo_start in ENTRY_BB. */
2627 t4 = build_fold_addr_expr (iend0);
2628 t3 = build_fold_addr_expr (istart0);
2629 if (fd->ordered)
2631 t0 = build_int_cst (unsigned_type_node,
2632 fd->ordered - fd->collapse + 1);
2633 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2634 fd->ordered
2635 - fd->collapse + 1),
2636 ".omp_counts");
2637 DECL_NAMELESS (arr) = 1;
2638 TREE_ADDRESSABLE (arr) = 1;
2639 TREE_STATIC (arr) = 1;
2640 vec<constructor_elt, va_gc> *v;
2641 vec_alloc (v, fd->ordered - fd->collapse + 1);
2642 int idx;
2644 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2646 tree c;
2647 if (idx == 0 && fd->collapse > 1)
2648 c = fd->loop.n2;
2649 else
2650 c = counts[idx + fd->collapse - 1];
2651 tree purpose = size_int (idx);
2652 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2653 if (TREE_CODE (c) != INTEGER_CST)
2654 TREE_STATIC (arr) = 0;
2657 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2658 if (!TREE_STATIC (arr))
2659 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2660 void_type_node, arr),
2661 true, NULL_TREE, true, GSI_SAME_STMT);
2662 t1 = build_fold_addr_expr (arr);
2663 t2 = NULL_TREE;
2665 else
2667 t2 = fold_convert (fd->iter_type, fd->loop.step);
2668 t1 = fd->loop.n2;
2669 t0 = fd->loop.n1;
2670 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2672 tree innerc
2673 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2674 OMP_CLAUSE__LOOPTEMP_);
2675 gcc_assert (innerc);
2676 t0 = OMP_CLAUSE_DECL (innerc);
2677 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2678 OMP_CLAUSE__LOOPTEMP_);
2679 gcc_assert (innerc);
2680 t1 = OMP_CLAUSE_DECL (innerc);
2682 if (POINTER_TYPE_P (TREE_TYPE (t0))
2683 && TYPE_PRECISION (TREE_TYPE (t0))
2684 != TYPE_PRECISION (fd->iter_type))
2686 /* Avoid casting pointers to integer of a different size. */
2687 tree itype = signed_type_for (type);
2688 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2689 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2691 else
2693 t1 = fold_convert (fd->iter_type, t1);
2694 t0 = fold_convert (fd->iter_type, t0);
2696 if (bias)
2698 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2699 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2702 if (fd->iter_type == long_integer_type_node || fd->ordered)
2704 if (fd->chunk_size)
2706 t = fold_convert (fd->iter_type, fd->chunk_size);
2707 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2708 if (fd->ordered)
2709 t = build_call_expr (builtin_decl_explicit (start_fn),
2710 5, t0, t1, t, t3, t4);
2711 else
2712 t = build_call_expr (builtin_decl_explicit (start_fn),
2713 6, t0, t1, t2, t, t3, t4);
2715 else if (fd->ordered)
2716 t = build_call_expr (builtin_decl_explicit (start_fn),
2717 4, t0, t1, t3, t4);
2718 else
2719 t = build_call_expr (builtin_decl_explicit (start_fn),
2720 5, t0, t1, t2, t3, t4);
2722 else
2724 tree t5;
2725 tree c_bool_type;
2726 tree bfn_decl;
2728 /* The GOMP_loop_ull_*start functions have additional boolean
2729 argument, true for < loops and false for > loops.
2730 In Fortran, the C bool type can be different from
2731 boolean_type_node. */
2732 bfn_decl = builtin_decl_explicit (start_fn);
2733 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2734 t5 = build_int_cst (c_bool_type,
2735 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2736 if (fd->chunk_size)
2738 tree bfn_decl = builtin_decl_explicit (start_fn);
2739 t = fold_convert (fd->iter_type, fd->chunk_size);
2740 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2741 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2743 else
2744 t = build_call_expr (builtin_decl_explicit (start_fn),
2745 6, t5, t0, t1, t2, t3, t4);
2748 if (TREE_TYPE (t) != boolean_type_node)
2749 t = fold_build2 (NE_EXPR, boolean_type_node,
2750 t, build_int_cst (TREE_TYPE (t), 0));
2751 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2752 true, GSI_SAME_STMT);
2753 if (arr && !TREE_STATIC (arr))
2755 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2756 TREE_THIS_VOLATILE (clobber) = 1;
2757 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2758 GSI_SAME_STMT);
2760 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2762 /* Remove the GIMPLE_OMP_FOR statement. */
2763 gsi_remove (&gsi, true);
2765 if (gsi_end_p (gsif))
2766 gsif = gsi_after_labels (gsi_bb (gsif));
2767 gsi_next (&gsif);
2769 /* Iteration setup for sequential loop goes in L0_BB. */
2770 tree startvar = fd->loop.v;
2771 tree endvar = NULL_TREE;
2773 if (gimple_omp_for_combined_p (fd->for_stmt))
2775 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2776 && gimple_omp_for_kind (inner_stmt)
2777 == GF_OMP_FOR_KIND_SIMD);
2778 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2779 OMP_CLAUSE__LOOPTEMP_);
2780 gcc_assert (innerc);
2781 startvar = OMP_CLAUSE_DECL (innerc);
2782 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2783 OMP_CLAUSE__LOOPTEMP_);
2784 gcc_assert (innerc);
2785 endvar = OMP_CLAUSE_DECL (innerc);
2788 gsi = gsi_start_bb (l0_bb);
2789 t = istart0;
2790 if (fd->ordered && fd->collapse == 1)
2791 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2792 fold_convert (fd->iter_type, fd->loop.step));
2793 else if (bias)
2794 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2795 if (fd->ordered && fd->collapse == 1)
2797 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2798 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2799 fd->loop.n1, fold_convert (sizetype, t));
2800 else
2802 t = fold_convert (TREE_TYPE (startvar), t);
2803 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2804 fd->loop.n1, t);
2807 else
2809 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2810 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2811 t = fold_convert (TREE_TYPE (startvar), t);
2813 t = force_gimple_operand_gsi (&gsi, t,
2814 DECL_P (startvar)
2815 && TREE_ADDRESSABLE (startvar),
2816 NULL_TREE, false, GSI_CONTINUE_LINKING);
2817 assign_stmt = gimple_build_assign (startvar, t);
2818 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2820 t = iend0;
2821 if (fd->ordered && fd->collapse == 1)
2822 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2823 fold_convert (fd->iter_type, fd->loop.step));
2824 else if (bias)
2825 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2826 if (fd->ordered && fd->collapse == 1)
2828 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2829 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2830 fd->loop.n1, fold_convert (sizetype, t));
2831 else
2833 t = fold_convert (TREE_TYPE (startvar), t);
2834 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2835 fd->loop.n1, t);
2838 else
2840 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2841 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2842 t = fold_convert (TREE_TYPE (startvar), t);
2844 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2845 false, GSI_CONTINUE_LINKING);
2846 if (endvar)
2848 assign_stmt = gimple_build_assign (endvar, iend);
2849 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2850 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2851 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2852 else
2853 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2854 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2856 /* Handle linear clause adjustments. */
2857 tree itercnt = NULL_TREE;
2858 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2859 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2860 c; c = OMP_CLAUSE_CHAIN (c))
2861 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2862 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2864 tree d = OMP_CLAUSE_DECL (c);
2865 bool is_ref = omp_is_reference (d);
2866 tree t = d, a, dest;
2867 if (is_ref)
2868 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2869 tree type = TREE_TYPE (t);
2870 if (POINTER_TYPE_P (type))
2871 type = sizetype;
2872 dest = unshare_expr (t);
2873 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2874 expand_omp_build_assign (&gsif, v, t);
2875 if (itercnt == NULL_TREE)
2877 itercnt = startvar;
2878 tree n1 = fd->loop.n1;
2879 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2881 itercnt
2882 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2883 itercnt);
2884 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2886 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2887 itercnt, n1);
2888 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2889 itercnt, fd->loop.step);
2890 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2891 NULL_TREE, false,
2892 GSI_CONTINUE_LINKING);
2894 a = fold_build2 (MULT_EXPR, type,
2895 fold_convert (type, itercnt),
2896 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2897 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2898 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2899 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2900 false, GSI_CONTINUE_LINKING);
2901 assign_stmt = gimple_build_assign (dest, t);
2902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2904 if (fd->collapse > 1)
2905 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2907 if (fd->ordered)
2909 /* Until now, counts array contained number of iterations or
2910 variable containing it for ith loop. From now on, we need
2911 those counts only for collapsed loops, and only for the 2nd
2912 till the last collapsed one. Move those one element earlier,
2913 we'll use counts[fd->collapse - 1] for the first source/sink
2914 iteration counter and so on and counts[fd->ordered]
2915 as the array holding the current counter values for
2916 depend(source). */
2917 if (fd->collapse > 1)
2918 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2919 if (broken_loop)
2921 int i;
2922 for (i = fd->collapse; i < fd->ordered; i++)
2924 tree type = TREE_TYPE (fd->loops[i].v);
2925 tree this_cond
2926 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2927 fold_convert (type, fd->loops[i].n1),
2928 fold_convert (type, fd->loops[i].n2));
2929 if (!integer_onep (this_cond))
2930 break;
2932 if (i < fd->ordered)
2934 cont_bb
2935 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2936 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2937 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2938 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2939 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2940 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2941 make_edge (cont_bb, l1_bb, 0);
2942 l2_bb = create_empty_bb (cont_bb);
2943 broken_loop = false;
2946 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2947 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2948 ordered_lastprivate);
2949 if (counts[fd->collapse - 1])
2951 gcc_assert (fd->collapse == 1);
2952 gsi = gsi_last_bb (l0_bb);
2953 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2954 istart0, true);
2955 gsi = gsi_last_bb (cont_bb);
2956 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2957 build_int_cst (fd->iter_type, 1));
2958 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2959 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2960 size_zero_node, NULL_TREE, NULL_TREE);
2961 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2962 t = counts[fd->collapse - 1];
2964 else if (fd->collapse > 1)
2965 t = fd->loop.v;
2966 else
2968 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2969 fd->loops[0].v, fd->loops[0].n1);
2970 t = fold_convert (fd->iter_type, t);
2972 gsi = gsi_last_bb (l0_bb);
2973 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2974 size_zero_node, NULL_TREE, NULL_TREE);
2975 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2976 false, GSI_CONTINUE_LINKING);
2977 expand_omp_build_assign (&gsi, aref, t, true);
2980 if (!broken_loop)
2982 /* Code to control the increment and predicate for the sequential
2983 loop goes in the CONT_BB. */
2984 gsi = gsi_last_bb (cont_bb);
2985 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
2986 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
2987 vmain = gimple_omp_continue_control_use (cont_stmt);
2988 vback = gimple_omp_continue_control_def (cont_stmt);
2990 if (!gimple_omp_for_combined_p (fd->for_stmt))
2992 if (POINTER_TYPE_P (type))
2993 t = fold_build_pointer_plus (vmain, fd->loop.step);
2994 else
2995 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
2996 t = force_gimple_operand_gsi (&gsi, t,
2997 DECL_P (vback)
2998 && TREE_ADDRESSABLE (vback),
2999 NULL_TREE, true, GSI_SAME_STMT);
3000 assign_stmt = gimple_build_assign (vback, t);
3001 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3003 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3005 if (fd->collapse > 1)
3006 t = fd->loop.v;
3007 else
3009 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3010 fd->loops[0].v, fd->loops[0].n1);
3011 t = fold_convert (fd->iter_type, t);
3013 tree aref = build4 (ARRAY_REF, fd->iter_type,
3014 counts[fd->ordered], size_zero_node,
3015 NULL_TREE, NULL_TREE);
3016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3017 true, GSI_SAME_STMT);
3018 expand_omp_build_assign (&gsi, aref, t);
3021 t = build2 (fd->loop.cond_code, boolean_type_node,
3022 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3023 iend);
3024 gcond *cond_stmt = gimple_build_cond_empty (t);
3025 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3028 /* Remove GIMPLE_OMP_CONTINUE. */
3029 gsi_remove (&gsi, true);
3031 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3032 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3034 /* Emit code to get the next parallel iteration in L2_BB. */
3035 gsi = gsi_start_bb (l2_bb);
3037 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3038 build_fold_addr_expr (istart0),
3039 build_fold_addr_expr (iend0));
3040 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3041 false, GSI_CONTINUE_LINKING);
3042 if (TREE_TYPE (t) != boolean_type_node)
3043 t = fold_build2 (NE_EXPR, boolean_type_node,
3044 t, build_int_cst (TREE_TYPE (t), 0));
3045 gcond *cond_stmt = gimple_build_cond_empty (t);
3046 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3049 /* Add the loop cleanup function. */
3050 gsi = gsi_last_bb (exit_bb);
3051 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3052 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3053 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3054 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3055 else
3056 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3057 gcall *call_stmt = gimple_build_call (t, 0);
3058 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3059 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3060 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3061 if (fd->ordered)
3063 tree arr = counts[fd->ordered];
3064 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3065 TREE_THIS_VOLATILE (clobber) = 1;
3066 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3067 GSI_SAME_STMT);
3069 gsi_remove (&gsi, true);
3071 /* Connect the new blocks. */
3072 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3073 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3075 if (!broken_loop)
3077 gimple_seq phis;
3079 e = find_edge (cont_bb, l3_bb);
3080 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3082 phis = phi_nodes (l3_bb);
3083 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3085 gimple *phi = gsi_stmt (gsi);
3086 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3087 PHI_ARG_DEF_FROM_EDGE (phi, e));
3089 remove_edge (e);
3091 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3092 e = find_edge (cont_bb, l1_bb);
3093 if (e == NULL)
3095 e = BRANCH_EDGE (cont_bb);
3096 gcc_assert (single_succ (e->dest) == l1_bb);
3098 if (gimple_omp_for_combined_p (fd->for_stmt))
3100 remove_edge (e);
3101 e = NULL;
3103 else if (fd->collapse > 1)
3105 remove_edge (e);
3106 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3108 else
3109 e->flags = EDGE_TRUE_VALUE;
3110 if (e)
3112 e->probability = REG_BR_PROB_BASE * 7 / 8;
3113 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3115 else
3117 e = find_edge (cont_bb, l2_bb);
3118 e->flags = EDGE_FALLTHRU;
3120 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3122 if (gimple_in_ssa_p (cfun))
3124 /* Add phis to the outer loop that connect to the phis in the inner,
3125 original loop, and move the loop entry value of the inner phi to
3126 the loop entry value of the outer phi. */
3127 gphi_iterator psi;
3128 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3130 source_location locus;
3131 gphi *nphi;
3132 gphi *exit_phi = psi.phi ();
3134 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3135 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3137 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3138 edge latch_to_l1 = find_edge (latch, l1_bb);
3139 gphi *inner_phi
3140 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3142 tree t = gimple_phi_result (exit_phi);
3143 tree new_res = copy_ssa_name (t, NULL);
3144 nphi = create_phi_node (new_res, l0_bb);
3146 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3147 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3148 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3149 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3150 add_phi_arg (nphi, t, entry_to_l0, locus);
3152 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3153 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3155 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3159 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3160 recompute_dominator (CDI_DOMINATORS, l2_bb));
3161 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3162 recompute_dominator (CDI_DOMINATORS, l3_bb));
3163 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3164 recompute_dominator (CDI_DOMINATORS, l0_bb));
3165 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3166 recompute_dominator (CDI_DOMINATORS, l1_bb));
3168 /* We enter expand_omp_for_generic with a loop. This original loop may
3169 have its own loop struct, or it may be part of an outer loop struct
3170 (which may be the fake loop). */
3171 struct loop *outer_loop = entry_bb->loop_father;
3172 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3174 add_bb_to_loop (l2_bb, outer_loop);
3176 /* We've added a new loop around the original loop. Allocate the
3177 corresponding loop struct. */
3178 struct loop *new_loop = alloc_loop ();
3179 new_loop->header = l0_bb;
3180 new_loop->latch = l2_bb;
3181 add_loop (new_loop, outer_loop);
3183 /* Allocate a loop structure for the original loop unless we already
3184 had one. */
3185 if (!orig_loop_has_loop_struct
3186 && !gimple_omp_for_combined_p (fd->for_stmt))
3188 struct loop *orig_loop = alloc_loop ();
3189 orig_loop->header = l1_bb;
3190 /* The loop may have multiple latches. */
3191 add_loop (orig_loop, new_loop);
3196 /* A subroutine of expand_omp_for. Generate code for a parallel
3197 loop with static schedule and no specified chunk size. Given
3198 parameters:
3200 for (V = N1; V cond N2; V += STEP) BODY;
3202 where COND is "<" or ">", we generate pseudocode
3204 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3205 if (cond is <)
3206 adj = STEP - 1;
3207 else
3208 adj = STEP + 1;
3209 if ((__typeof (V)) -1 > 0 && cond is >)
3210 n = -(adj + N2 - N1) / -STEP;
3211 else
3212 n = (adj + N2 - N1) / STEP;
3213 q = n / nthreads;
3214 tt = n % nthreads;
3215 if (threadid < tt) goto L3; else goto L4;
3217 tt = 0;
3218 q = q + 1;
3220 s0 = q * threadid + tt;
3221 e0 = s0 + q;
3222 V = s0 * STEP + N1;
3223 if (s0 >= e0) goto L2; else goto L0;
3225 e = e0 * STEP + N1;
3227 BODY;
3228 V += STEP;
3229 if (V cond e) goto L1;
3233 static void
3234 expand_omp_for_static_nochunk (struct omp_region *region,
3235 struct omp_for_data *fd,
3236 gimple *inner_stmt)
3238 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3239 tree type, itype, vmain, vback;
3240 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3241 basic_block body_bb, cont_bb, collapse_bb = NULL;
3242 basic_block fin_bb;
3243 gimple_stmt_iterator gsi;
3244 edge ep;
3245 bool broken_loop = region->cont == NULL;
3246 tree *counts = NULL;
3247 tree n1, n2, step;
3249 itype = type = TREE_TYPE (fd->loop.v);
3250 if (POINTER_TYPE_P (type))
3251 itype = signed_type_for (type);
3253 entry_bb = region->entry;
3254 cont_bb = region->cont;
3255 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3256 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3257 gcc_assert (broken_loop
3258 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3259 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3260 body_bb = single_succ (seq_start_bb);
3261 if (!broken_loop)
3263 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3264 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3265 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3267 exit_bb = region->exit;
3269 /* Iteration space partitioning goes in ENTRY_BB. */
3270 gsi = gsi_last_bb (entry_bb);
3271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3273 if (fd->collapse > 1)
3275 int first_zero_iter = -1, dummy = -1;
3276 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3278 counts = XALLOCAVEC (tree, fd->collapse);
3279 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3280 fin_bb, first_zero_iter,
3281 dummy_bb, dummy, l2_dom_bb);
3282 t = NULL_TREE;
3284 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3285 t = integer_one_node;
3286 else
3287 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3288 fold_convert (type, fd->loop.n1),
3289 fold_convert (type, fd->loop.n2));
3290 if (fd->collapse == 1
3291 && TYPE_UNSIGNED (type)
3292 && (t == NULL_TREE || !integer_onep (t)))
3294 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3295 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3296 true, GSI_SAME_STMT);
3297 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3298 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3299 true, GSI_SAME_STMT);
3300 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3301 NULL_TREE, NULL_TREE);
3302 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3303 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3304 expand_omp_regimplify_p, NULL, NULL)
3305 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3306 expand_omp_regimplify_p, NULL, NULL))
3308 gsi = gsi_for_stmt (cond_stmt);
3309 gimple_regimplify_operands (cond_stmt, &gsi);
3311 ep = split_block (entry_bb, cond_stmt);
3312 ep->flags = EDGE_TRUE_VALUE;
3313 entry_bb = ep->dest;
3314 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3315 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3316 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3317 if (gimple_in_ssa_p (cfun))
3319 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3320 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3321 !gsi_end_p (gpi); gsi_next (&gpi))
3323 gphi *phi = gpi.phi ();
3324 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3325 ep, UNKNOWN_LOCATION);
3328 gsi = gsi_last_bb (entry_bb);
3331 switch (gimple_omp_for_kind (fd->for_stmt))
3333 case GF_OMP_FOR_KIND_FOR:
3334 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3335 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3336 break;
3337 case GF_OMP_FOR_KIND_DISTRIBUTE:
3338 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3339 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3340 break;
3341 default:
3342 gcc_unreachable ();
3344 nthreads = build_call_expr (nthreads, 0);
3345 nthreads = fold_convert (itype, nthreads);
3346 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3347 true, GSI_SAME_STMT);
3348 threadid = build_call_expr (threadid, 0);
3349 threadid = fold_convert (itype, threadid);
3350 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3351 true, GSI_SAME_STMT);
3353 n1 = fd->loop.n1;
3354 n2 = fd->loop.n2;
3355 step = fd->loop.step;
3356 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3358 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3359 OMP_CLAUSE__LOOPTEMP_);
3360 gcc_assert (innerc);
3361 n1 = OMP_CLAUSE_DECL (innerc);
3362 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3363 OMP_CLAUSE__LOOPTEMP_);
3364 gcc_assert (innerc);
3365 n2 = OMP_CLAUSE_DECL (innerc);
3367 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3368 true, NULL_TREE, true, GSI_SAME_STMT);
3369 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3370 true, NULL_TREE, true, GSI_SAME_STMT);
3371 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3372 true, NULL_TREE, true, GSI_SAME_STMT);
3374 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3375 t = fold_build2 (PLUS_EXPR, itype, step, t);
3376 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3377 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3378 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3379 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3380 fold_build1 (NEGATE_EXPR, itype, t),
3381 fold_build1 (NEGATE_EXPR, itype, step));
3382 else
3383 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3384 t = fold_convert (itype, t);
3385 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3387 q = create_tmp_reg (itype, "q");
3388 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3389 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3390 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3392 tt = create_tmp_reg (itype, "tt");
3393 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3394 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3395 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3397 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3398 gcond *cond_stmt = gimple_build_cond_empty (t);
3399 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3401 second_bb = split_block (entry_bb, cond_stmt)->dest;
3402 gsi = gsi_last_bb (second_bb);
3403 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3405 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3406 GSI_SAME_STMT);
3407 gassign *assign_stmt
3408 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3409 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3411 third_bb = split_block (second_bb, assign_stmt)->dest;
3412 gsi = gsi_last_bb (third_bb);
3413 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3415 t = build2 (MULT_EXPR, itype, q, threadid);
3416 t = build2 (PLUS_EXPR, itype, t, tt);
3417 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3419 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3420 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3422 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3423 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3425 /* Remove the GIMPLE_OMP_FOR statement. */
3426 gsi_remove (&gsi, true);
3428 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3429 gsi = gsi_start_bb (seq_start_bb);
3431 tree startvar = fd->loop.v;
3432 tree endvar = NULL_TREE;
3434 if (gimple_omp_for_combined_p (fd->for_stmt))
3436 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3437 ? gimple_omp_parallel_clauses (inner_stmt)
3438 : gimple_omp_for_clauses (inner_stmt);
3439 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3440 gcc_assert (innerc);
3441 startvar = OMP_CLAUSE_DECL (innerc);
3442 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3443 OMP_CLAUSE__LOOPTEMP_);
3444 gcc_assert (innerc);
3445 endvar = OMP_CLAUSE_DECL (innerc);
3446 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3447 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3449 int i;
3450 for (i = 1; i < fd->collapse; i++)
3452 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3453 OMP_CLAUSE__LOOPTEMP_);
3454 gcc_assert (innerc);
3456 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3457 OMP_CLAUSE__LOOPTEMP_);
3458 if (innerc)
3460 /* If needed (distribute parallel for with lastprivate),
3461 propagate down the total number of iterations. */
3462 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3463 fd->loop.n2);
3464 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3465 GSI_CONTINUE_LINKING);
3466 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3467 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3471 t = fold_convert (itype, s0);
3472 t = fold_build2 (MULT_EXPR, itype, t, step);
3473 if (POINTER_TYPE_P (type))
3474 t = fold_build_pointer_plus (n1, t);
3475 else
3476 t = fold_build2 (PLUS_EXPR, type, t, n1);
3477 t = fold_convert (TREE_TYPE (startvar), t);
3478 t = force_gimple_operand_gsi (&gsi, t,
3479 DECL_P (startvar)
3480 && TREE_ADDRESSABLE (startvar),
3481 NULL_TREE, false, GSI_CONTINUE_LINKING);
3482 assign_stmt = gimple_build_assign (startvar, t);
3483 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3485 t = fold_convert (itype, e0);
3486 t = fold_build2 (MULT_EXPR, itype, t, step);
3487 if (POINTER_TYPE_P (type))
3488 t = fold_build_pointer_plus (n1, t);
3489 else
3490 t = fold_build2 (PLUS_EXPR, type, t, n1);
3491 t = fold_convert (TREE_TYPE (startvar), t);
3492 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3493 false, GSI_CONTINUE_LINKING);
3494 if (endvar)
3496 assign_stmt = gimple_build_assign (endvar, e);
3497 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3498 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3499 assign_stmt = gimple_build_assign (fd->loop.v, e);
3500 else
3501 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3502 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3504 /* Handle linear clause adjustments. */
3505 tree itercnt = NULL_TREE;
3506 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3507 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3508 c; c = OMP_CLAUSE_CHAIN (c))
3509 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3510 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3512 tree d = OMP_CLAUSE_DECL (c);
3513 bool is_ref = omp_is_reference (d);
3514 tree t = d, a, dest;
3515 if (is_ref)
3516 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3517 if (itercnt == NULL_TREE)
3519 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3521 itercnt = fold_build2 (MINUS_EXPR, itype,
3522 fold_convert (itype, n1),
3523 fold_convert (itype, fd->loop.n1));
3524 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3525 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3526 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3527 NULL_TREE, false,
3528 GSI_CONTINUE_LINKING);
3530 else
3531 itercnt = s0;
3533 tree type = TREE_TYPE (t);
3534 if (POINTER_TYPE_P (type))
3535 type = sizetype;
3536 a = fold_build2 (MULT_EXPR, type,
3537 fold_convert (type, itercnt),
3538 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3539 dest = unshare_expr (t);
3540 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3541 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3542 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3543 false, GSI_CONTINUE_LINKING);
3544 assign_stmt = gimple_build_assign (dest, t);
3545 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3547 if (fd->collapse > 1)
3548 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3550 if (!broken_loop)
3552 /* The code controlling the sequential loop replaces the
3553 GIMPLE_OMP_CONTINUE. */
3554 gsi = gsi_last_bb (cont_bb);
3555 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3556 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3557 vmain = gimple_omp_continue_control_use (cont_stmt);
3558 vback = gimple_omp_continue_control_def (cont_stmt);
3560 if (!gimple_omp_for_combined_p (fd->for_stmt))
3562 if (POINTER_TYPE_P (type))
3563 t = fold_build_pointer_plus (vmain, step);
3564 else
3565 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3566 t = force_gimple_operand_gsi (&gsi, t,
3567 DECL_P (vback)
3568 && TREE_ADDRESSABLE (vback),
3569 NULL_TREE, true, GSI_SAME_STMT);
3570 assign_stmt = gimple_build_assign (vback, t);
3571 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3573 t = build2 (fd->loop.cond_code, boolean_type_node,
3574 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3575 ? t : vback, e);
3576 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3579 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3580 gsi_remove (&gsi, true);
3582 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3583 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3586 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3587 gsi = gsi_last_bb (exit_bb);
3588 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3590 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3591 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3593 gsi_remove (&gsi, true);
3595 /* Connect all the blocks. */
3596 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3597 ep->probability = REG_BR_PROB_BASE / 4 * 3;
3598 ep = find_edge (entry_bb, second_bb);
3599 ep->flags = EDGE_TRUE_VALUE;
3600 ep->probability = REG_BR_PROB_BASE / 4;
3601 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3602 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3604 if (!broken_loop)
3606 ep = find_edge (cont_bb, body_bb);
3607 if (ep == NULL)
3609 ep = BRANCH_EDGE (cont_bb);
3610 gcc_assert (single_succ (ep->dest) == body_bb);
3612 if (gimple_omp_for_combined_p (fd->for_stmt))
3614 remove_edge (ep);
3615 ep = NULL;
3617 else if (fd->collapse > 1)
3619 remove_edge (ep);
3620 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3622 else
3623 ep->flags = EDGE_TRUE_VALUE;
3624 find_edge (cont_bb, fin_bb)->flags
3625 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3628 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3629 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3630 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3632 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3633 recompute_dominator (CDI_DOMINATORS, body_bb));
3634 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3635 recompute_dominator (CDI_DOMINATORS, fin_bb));
3637 struct loop *loop = body_bb->loop_father;
3638 if (loop != entry_bb->loop_father)
3640 gcc_assert (broken_loop || loop->header == body_bb);
3641 gcc_assert (broken_loop
3642 || loop->latch == region->cont
3643 || single_pred (loop->latch) == region->cont);
3644 return;
3647 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3649 loop = alloc_loop ();
3650 loop->header = body_bb;
3651 if (collapse_bb == NULL)
3652 loop->latch = cont_bb;
3653 add_loop (loop, body_bb->loop_father);
3657 /* Return phi in E->DEST with ARG on edge E. */
3659 static gphi *
3660 find_phi_with_arg_on_edge (tree arg, edge e)
3662 basic_block bb = e->dest;
3664 for (gphi_iterator gpi = gsi_start_phis (bb);
3665 !gsi_end_p (gpi);
3666 gsi_next (&gpi))
3668 gphi *phi = gpi.phi ();
3669 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3670 return phi;
3673 return NULL;
3676 /* A subroutine of expand_omp_for. Generate code for a parallel
3677 loop with static schedule and a specified chunk size. Given
3678 parameters:
3680 for (V = N1; V cond N2; V += STEP) BODY;
3682 where COND is "<" or ">", we generate pseudocode
3684 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3685 if (cond is <)
3686 adj = STEP - 1;
3687 else
3688 adj = STEP + 1;
3689 if ((__typeof (V)) -1 > 0 && cond is >)
3690 n = -(adj + N2 - N1) / -STEP;
3691 else
3692 n = (adj + N2 - N1) / STEP;
3693 trip = 0;
3694 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3695 here so that V is defined
3696 if the loop is not entered
3698 s0 = (trip * nthreads + threadid) * CHUNK;
3699 e0 = min (s0 + CHUNK, n);
3700 if (s0 < n) goto L1; else goto L4;
3702 V = s0 * STEP + N1;
3703 e = e0 * STEP + N1;
3705 BODY;
3706 V += STEP;
3707 if (V cond e) goto L2; else goto L3;
3709 trip += 1;
3710 goto L0;
3714 static void
3715 expand_omp_for_static_chunk (struct omp_region *region,
3716 struct omp_for_data *fd, gimple *inner_stmt)
3718 tree n, s0, e0, e, t;
3719 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3720 tree type, itype, vmain, vback, vextra;
3721 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3722 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3723 gimple_stmt_iterator gsi;
3724 edge se;
3725 bool broken_loop = region->cont == NULL;
3726 tree *counts = NULL;
3727 tree n1, n2, step;
3729 itype = type = TREE_TYPE (fd->loop.v);
3730 if (POINTER_TYPE_P (type))
3731 itype = signed_type_for (type);
3733 entry_bb = region->entry;
3734 se = split_block (entry_bb, last_stmt (entry_bb));
3735 entry_bb = se->src;
3736 iter_part_bb = se->dest;
3737 cont_bb = region->cont;
3738 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3739 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3740 gcc_assert (broken_loop
3741 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3742 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3743 body_bb = single_succ (seq_start_bb);
3744 if (!broken_loop)
3746 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3747 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3748 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3749 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3751 exit_bb = region->exit;
3753 /* Trip and adjustment setup goes in ENTRY_BB. */
3754 gsi = gsi_last_bb (entry_bb);
3755 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3757 if (fd->collapse > 1)
3759 int first_zero_iter = -1, dummy = -1;
3760 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3762 counts = XALLOCAVEC (tree, fd->collapse);
3763 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3764 fin_bb, first_zero_iter,
3765 dummy_bb, dummy, l2_dom_bb);
3766 t = NULL_TREE;
3768 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3769 t = integer_one_node;
3770 else
3771 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3772 fold_convert (type, fd->loop.n1),
3773 fold_convert (type, fd->loop.n2));
3774 if (fd->collapse == 1
3775 && TYPE_UNSIGNED (type)
3776 && (t == NULL_TREE || !integer_onep (t)))
3778 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3779 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3780 true, GSI_SAME_STMT);
3781 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3782 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3783 true, GSI_SAME_STMT);
3784 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3785 NULL_TREE, NULL_TREE);
3786 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3787 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3788 expand_omp_regimplify_p, NULL, NULL)
3789 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3790 expand_omp_regimplify_p, NULL, NULL))
3792 gsi = gsi_for_stmt (cond_stmt);
3793 gimple_regimplify_operands (cond_stmt, &gsi);
3795 se = split_block (entry_bb, cond_stmt);
3796 se->flags = EDGE_TRUE_VALUE;
3797 entry_bb = se->dest;
3798 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3799 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3800 se->probability = REG_BR_PROB_BASE / 2000 - 1;
3801 if (gimple_in_ssa_p (cfun))
3803 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3804 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3805 !gsi_end_p (gpi); gsi_next (&gpi))
3807 gphi *phi = gpi.phi ();
3808 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3809 se, UNKNOWN_LOCATION);
3812 gsi = gsi_last_bb (entry_bb);
3815 switch (gimple_omp_for_kind (fd->for_stmt))
3817 case GF_OMP_FOR_KIND_FOR:
3818 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3819 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3820 break;
3821 case GF_OMP_FOR_KIND_DISTRIBUTE:
3822 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3823 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3824 break;
3825 default:
3826 gcc_unreachable ();
3828 nthreads = build_call_expr (nthreads, 0);
3829 nthreads = fold_convert (itype, nthreads);
3830 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3831 true, GSI_SAME_STMT);
3832 threadid = build_call_expr (threadid, 0);
3833 threadid = fold_convert (itype, threadid);
3834 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3835 true, GSI_SAME_STMT);
3837 n1 = fd->loop.n1;
3838 n2 = fd->loop.n2;
3839 step = fd->loop.step;
3840 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3842 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3843 OMP_CLAUSE__LOOPTEMP_);
3844 gcc_assert (innerc);
3845 n1 = OMP_CLAUSE_DECL (innerc);
3846 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3847 OMP_CLAUSE__LOOPTEMP_);
3848 gcc_assert (innerc);
3849 n2 = OMP_CLAUSE_DECL (innerc);
3851 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3852 true, NULL_TREE, true, GSI_SAME_STMT);
3853 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3854 true, NULL_TREE, true, GSI_SAME_STMT);
3855 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3856 true, NULL_TREE, true, GSI_SAME_STMT);
3857 tree chunk_size = fold_convert (itype, fd->chunk_size);
3858 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3859 chunk_size
3860 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3861 GSI_SAME_STMT);
3863 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3864 t = fold_build2 (PLUS_EXPR, itype, step, t);
3865 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3866 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3867 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3868 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3869 fold_build1 (NEGATE_EXPR, itype, t),
3870 fold_build1 (NEGATE_EXPR, itype, step));
3871 else
3872 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3873 t = fold_convert (itype, t);
3874 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3875 true, GSI_SAME_STMT);
3877 trip_var = create_tmp_reg (itype, ".trip");
3878 if (gimple_in_ssa_p (cfun))
3880 trip_init = make_ssa_name (trip_var);
3881 trip_main = make_ssa_name (trip_var);
3882 trip_back = make_ssa_name (trip_var);
3884 else
3886 trip_init = trip_var;
3887 trip_main = trip_var;
3888 trip_back = trip_var;
3891 gassign *assign_stmt
3892 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3893 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3895 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3896 t = fold_build2 (MULT_EXPR, itype, t, step);
3897 if (POINTER_TYPE_P (type))
3898 t = fold_build_pointer_plus (n1, t);
3899 else
3900 t = fold_build2 (PLUS_EXPR, type, t, n1);
3901 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3902 true, GSI_SAME_STMT);
3904 /* Remove the GIMPLE_OMP_FOR. */
3905 gsi_remove (&gsi, true);
3907 gimple_stmt_iterator gsif = gsi;
3909 /* Iteration space partitioning goes in ITER_PART_BB. */
3910 gsi = gsi_last_bb (iter_part_bb);
3912 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3913 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3914 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3915 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3916 false, GSI_CONTINUE_LINKING);
3918 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3919 t = fold_build2 (MIN_EXPR, itype, t, n);
3920 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921 false, GSI_CONTINUE_LINKING);
3923 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3924 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3926 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3927 gsi = gsi_start_bb (seq_start_bb);
3929 tree startvar = fd->loop.v;
3930 tree endvar = NULL_TREE;
3932 if (gimple_omp_for_combined_p (fd->for_stmt))
3934 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3935 ? gimple_omp_parallel_clauses (inner_stmt)
3936 : gimple_omp_for_clauses (inner_stmt);
3937 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3938 gcc_assert (innerc);
3939 startvar = OMP_CLAUSE_DECL (innerc);
3940 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3941 OMP_CLAUSE__LOOPTEMP_);
3942 gcc_assert (innerc);
3943 endvar = OMP_CLAUSE_DECL (innerc);
3944 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3945 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3947 int i;
3948 for (i = 1; i < fd->collapse; i++)
3950 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3951 OMP_CLAUSE__LOOPTEMP_);
3952 gcc_assert (innerc);
3954 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3955 OMP_CLAUSE__LOOPTEMP_);
3956 if (innerc)
3958 /* If needed (distribute parallel for with lastprivate),
3959 propagate down the total number of iterations. */
3960 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3961 fd->loop.n2);
3962 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3963 GSI_CONTINUE_LINKING);
3964 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3965 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3970 t = fold_convert (itype, s0);
3971 t = fold_build2 (MULT_EXPR, itype, t, step);
3972 if (POINTER_TYPE_P (type))
3973 t = fold_build_pointer_plus (n1, t);
3974 else
3975 t = fold_build2 (PLUS_EXPR, type, t, n1);
3976 t = fold_convert (TREE_TYPE (startvar), t);
3977 t = force_gimple_operand_gsi (&gsi, t,
3978 DECL_P (startvar)
3979 && TREE_ADDRESSABLE (startvar),
3980 NULL_TREE, false, GSI_CONTINUE_LINKING);
3981 assign_stmt = gimple_build_assign (startvar, t);
3982 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3984 t = fold_convert (itype, e0);
3985 t = fold_build2 (MULT_EXPR, itype, t, step);
3986 if (POINTER_TYPE_P (type))
3987 t = fold_build_pointer_plus (n1, t);
3988 else
3989 t = fold_build2 (PLUS_EXPR, type, t, n1);
3990 t = fold_convert (TREE_TYPE (startvar), t);
3991 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3992 false, GSI_CONTINUE_LINKING);
3993 if (endvar)
3995 assign_stmt = gimple_build_assign (endvar, e);
3996 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3997 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3998 assign_stmt = gimple_build_assign (fd->loop.v, e);
3999 else
4000 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4001 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4003 /* Handle linear clause adjustments. */
4004 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4005 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4006 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4007 c; c = OMP_CLAUSE_CHAIN (c))
4008 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4009 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4011 tree d = OMP_CLAUSE_DECL (c);
4012 bool is_ref = omp_is_reference (d);
4013 tree t = d, a, dest;
4014 if (is_ref)
4015 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4016 tree type = TREE_TYPE (t);
4017 if (POINTER_TYPE_P (type))
4018 type = sizetype;
4019 dest = unshare_expr (t);
4020 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4021 expand_omp_build_assign (&gsif, v, t);
4022 if (itercnt == NULL_TREE)
4024 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4026 itercntbias
4027 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4028 fold_convert (itype, fd->loop.n1));
4029 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4030 itercntbias, step);
4031 itercntbias
4032 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4033 NULL_TREE, true,
4034 GSI_SAME_STMT);
4035 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4036 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4037 NULL_TREE, false,
4038 GSI_CONTINUE_LINKING);
4040 else
4041 itercnt = s0;
4043 a = fold_build2 (MULT_EXPR, type,
4044 fold_convert (type, itercnt),
4045 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4046 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4047 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4048 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4049 false, GSI_CONTINUE_LINKING);
4050 assign_stmt = gimple_build_assign (dest, t);
4051 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4053 if (fd->collapse > 1)
4054 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4056 if (!broken_loop)
4058 /* The code controlling the sequential loop goes in CONT_BB,
4059 replacing the GIMPLE_OMP_CONTINUE. */
4060 gsi = gsi_last_bb (cont_bb);
4061 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4062 vmain = gimple_omp_continue_control_use (cont_stmt);
4063 vback = gimple_omp_continue_control_def (cont_stmt);
4065 if (!gimple_omp_for_combined_p (fd->for_stmt))
4067 if (POINTER_TYPE_P (type))
4068 t = fold_build_pointer_plus (vmain, step);
4069 else
4070 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4071 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4072 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4073 true, GSI_SAME_STMT);
4074 assign_stmt = gimple_build_assign (vback, t);
4075 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4077 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4078 t = build2 (EQ_EXPR, boolean_type_node,
4079 build_int_cst (itype, 0),
4080 build_int_cst (itype, 1));
4081 else
4082 t = build2 (fd->loop.cond_code, boolean_type_node,
4083 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4084 ? t : vback, e);
4085 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4088 /* Remove GIMPLE_OMP_CONTINUE. */
4089 gsi_remove (&gsi, true);
4091 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4092 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4094 /* Trip update code goes into TRIP_UPDATE_BB. */
4095 gsi = gsi_start_bb (trip_update_bb);
4097 t = build_int_cst (itype, 1);
4098 t = build2 (PLUS_EXPR, itype, trip_main, t);
4099 assign_stmt = gimple_build_assign (trip_back, t);
4100 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4103 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4104 gsi = gsi_last_bb (exit_bb);
4105 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4107 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4108 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4110 gsi_remove (&gsi, true);
4112 /* Connect the new blocks. */
4113 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4114 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4116 if (!broken_loop)
4118 se = find_edge (cont_bb, body_bb);
4119 if (se == NULL)
4121 se = BRANCH_EDGE (cont_bb);
4122 gcc_assert (single_succ (se->dest) == body_bb);
4124 if (gimple_omp_for_combined_p (fd->for_stmt))
4126 remove_edge (se);
4127 se = NULL;
4129 else if (fd->collapse > 1)
4131 remove_edge (se);
4132 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4134 else
4135 se->flags = EDGE_TRUE_VALUE;
4136 find_edge (cont_bb, trip_update_bb)->flags
4137 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4139 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4140 iter_part_bb);
4143 if (gimple_in_ssa_p (cfun))
4145 gphi_iterator psi;
4146 gphi *phi;
4147 edge re, ene;
4148 edge_var_map *vm;
4149 size_t i;
4151 gcc_assert (fd->collapse == 1 && !broken_loop);
4153 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4154 remove arguments of the phi nodes in fin_bb. We need to create
4155 appropriate phi nodes in iter_part_bb instead. */
4156 se = find_edge (iter_part_bb, fin_bb);
4157 re = single_succ_edge (trip_update_bb);
4158 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4159 ene = single_succ_edge (entry_bb);
4161 psi = gsi_start_phis (fin_bb);
4162 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4163 gsi_next (&psi), ++i)
4165 gphi *nphi;
4166 source_location locus;
4168 phi = psi.phi ();
4169 t = gimple_phi_result (phi);
4170 gcc_assert (t == redirect_edge_var_map_result (vm));
4172 if (!single_pred_p (fin_bb))
4173 t = copy_ssa_name (t, phi);
4175 nphi = create_phi_node (t, iter_part_bb);
4177 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4178 locus = gimple_phi_arg_location_from_edge (phi, se);
4180 /* A special case -- fd->loop.v is not yet computed in
4181 iter_part_bb, we need to use vextra instead. */
4182 if (t == fd->loop.v)
4183 t = vextra;
4184 add_phi_arg (nphi, t, ene, locus);
4185 locus = redirect_edge_var_map_location (vm);
4186 tree back_arg = redirect_edge_var_map_def (vm);
4187 add_phi_arg (nphi, back_arg, re, locus);
4188 edge ce = find_edge (cont_bb, body_bb);
4189 if (ce == NULL)
4191 ce = BRANCH_EDGE (cont_bb);
4192 gcc_assert (single_succ (ce->dest) == body_bb);
4193 ce = single_succ_edge (ce->dest);
4195 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4196 gcc_assert (inner_loop_phi != NULL);
4197 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4198 find_edge (seq_start_bb, body_bb), locus);
4200 if (!single_pred_p (fin_bb))
4201 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4203 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4204 redirect_edge_var_map_clear (re);
4205 if (single_pred_p (fin_bb))
4206 while (1)
4208 psi = gsi_start_phis (fin_bb);
4209 if (gsi_end_p (psi))
4210 break;
4211 remove_phi_node (&psi, false);
4214 /* Make phi node for trip. */
4215 phi = create_phi_node (trip_main, iter_part_bb);
4216 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4217 UNKNOWN_LOCATION);
4218 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4219 UNKNOWN_LOCATION);
4222 if (!broken_loop)
4223 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4224 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4225 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4226 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4227 recompute_dominator (CDI_DOMINATORS, fin_bb));
4228 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4229 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4230 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4231 recompute_dominator (CDI_DOMINATORS, body_bb));
4233 if (!broken_loop)
4235 struct loop *loop = body_bb->loop_father;
4236 struct loop *trip_loop = alloc_loop ();
4237 trip_loop->header = iter_part_bb;
4238 trip_loop->latch = trip_update_bb;
4239 add_loop (trip_loop, iter_part_bb->loop_father);
4241 if (loop != entry_bb->loop_father)
4243 gcc_assert (loop->header == body_bb);
4244 gcc_assert (loop->latch == region->cont
4245 || single_pred (loop->latch) == region->cont);
4246 trip_loop->inner = loop;
4247 return;
4250 if (!gimple_omp_for_combined_p (fd->for_stmt))
4252 loop = alloc_loop ();
4253 loop->header = body_bb;
4254 if (collapse_bb == NULL)
4255 loop->latch = cont_bb;
4256 add_loop (loop, trip_loop);
4261 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4262 Given parameters:
4263 for (V = N1; V cond N2; V += STEP) BODY;
4265 where COND is "<" or ">" or "!=", we generate pseudocode
4267 for (ind_var = low; ind_var < high; ind_var++)
4269 V = n1 + (ind_var * STEP)
4271 <BODY>
4274 In the above pseudocode, low and high are function parameters of the
4275 child function. In the function below, we are inserting a temp.
4276 variable that will be making a call to two OMP functions that will not be
4277 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4278 with _Cilk_for). These functions are replaced with low and high
4279 by the function that handles taskreg. */
4282 static void
4283 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4285 bool broken_loop = region->cont == NULL;
4286 basic_block entry_bb = region->entry;
4287 basic_block cont_bb = region->cont;
4289 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4290 gcc_assert (broken_loop
4291 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4292 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4293 basic_block l1_bb, l2_bb;
4295 if (!broken_loop)
4297 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4298 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4299 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4300 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4302 else
4304 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4305 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4306 l2_bb = single_succ (l1_bb);
4308 basic_block exit_bb = region->exit;
4309 basic_block l2_dom_bb = NULL;
4311 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4313 /* Below statements until the "tree high_val = ..." are pseudo statements
4314 used to pass information to be used by expand_omp_taskreg.
4315 low_val and high_val will be replaced by the __low and __high
4316 parameter from the child function.
4318 The call_exprs part is a place-holder, it is mainly used
4319 to distinctly identify to the top-level part that this is
4320 where we should put low and high (reasoning given in header
4321 comment). */
4323 gomp_parallel *par_stmt
4324 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4325 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4326 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4327 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4329 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4330 high_val = t;
4331 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4332 low_val = t;
4334 gcc_assert (low_val && high_val);
4336 tree type = TREE_TYPE (low_val);
4337 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4338 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4340 /* Not needed in SSA form right now. */
4341 gcc_assert (!gimple_in_ssa_p (cfun));
4342 if (l2_dom_bb == NULL)
4343 l2_dom_bb = l1_bb;
4345 tree n1 = low_val;
4346 tree n2 = high_val;
4348 gimple *stmt = gimple_build_assign (ind_var, n1);
4350 /* Replace the GIMPLE_OMP_FOR statement. */
4351 gsi_replace (&gsi, stmt, true);
4353 if (!broken_loop)
4355 /* Code to control the increment goes in the CONT_BB. */
4356 gsi = gsi_last_bb (cont_bb);
4357 stmt = gsi_stmt (gsi);
4358 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4359 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4360 build_one_cst (type));
4362 /* Replace GIMPLE_OMP_CONTINUE. */
4363 gsi_replace (&gsi, stmt, true);
4366 /* Emit the condition in L1_BB. */
4367 gsi = gsi_after_labels (l1_bb);
4368 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4369 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4370 fd->loop.step);
4371 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4372 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4373 fd->loop.n1, fold_convert (sizetype, t));
4374 else
4375 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4376 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4377 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4378 expand_omp_build_assign (&gsi, fd->loop.v, t);
4380 /* The condition is always '<' since the runtime will fill in the low
4381 and high values. */
4382 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4383 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4385 /* Remove GIMPLE_OMP_RETURN. */
4386 gsi = gsi_last_bb (exit_bb);
4387 gsi_remove (&gsi, true);
4389 /* Connect the new blocks. */
4390 remove_edge (FALLTHRU_EDGE (entry_bb));
4392 edge e, ne;
4393 if (!broken_loop)
4395 remove_edge (BRANCH_EDGE (entry_bb));
4396 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4398 e = BRANCH_EDGE (l1_bb);
4399 ne = FALLTHRU_EDGE (l1_bb);
4400 e->flags = EDGE_TRUE_VALUE;
4402 else
4404 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4406 ne = single_succ_edge (l1_bb);
4407 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4410 ne->flags = EDGE_FALSE_VALUE;
4411 e->probability = REG_BR_PROB_BASE * 7 / 8;
4412 ne->probability = REG_BR_PROB_BASE / 8;
4414 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4415 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4416 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4418 if (!broken_loop)
4420 struct loop *loop = alloc_loop ();
4421 loop->header = l1_bb;
4422 loop->latch = cont_bb;
4423 add_loop (loop, l1_bb->loop_father);
4424 loop->safelen = INT_MAX;
4427 /* Pick the correct library function based on the precision of the
4428 induction variable type. */
4429 tree lib_fun = NULL_TREE;
4430 if (TYPE_PRECISION (type) == 32)
4431 lib_fun = cilk_for_32_fndecl;
4432 else if (TYPE_PRECISION (type) == 64)
4433 lib_fun = cilk_for_64_fndecl;
4434 else
4435 gcc_unreachable ();
4437 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4439 /* WS_ARGS contains the library function flavor to call:
4440 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4441 user-defined grain value. If the user does not define one, then zero
4442 is passed in by the parser. */
4443 vec_alloc (region->ws_args, 2);
4444 region->ws_args->quick_push (lib_fun);
4445 region->ws_args->quick_push (fd->chunk_size);
4448 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4449 loop. Given parameters:
4451 for (V = N1; V cond N2; V += STEP) BODY;
4453 where COND is "<" or ">", we generate pseudocode
4455 V = N1;
4456 goto L1;
4458 BODY;
4459 V += STEP;
4461 if (V cond N2) goto L0; else goto L2;
4464 For collapsed loops, given parameters:
4465 collapse(3)
4466 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4467 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4468 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4469 BODY;
4471 we generate pseudocode
4473 if (cond3 is <)
4474 adj = STEP3 - 1;
4475 else
4476 adj = STEP3 + 1;
4477 count3 = (adj + N32 - N31) / STEP3;
4478 if (cond2 is <)
4479 adj = STEP2 - 1;
4480 else
4481 adj = STEP2 + 1;
4482 count2 = (adj + N22 - N21) / STEP2;
4483 if (cond1 is <)
4484 adj = STEP1 - 1;
4485 else
4486 adj = STEP1 + 1;
4487 count1 = (adj + N12 - N11) / STEP1;
4488 count = count1 * count2 * count3;
4489 V = 0;
4490 V1 = N11;
4491 V2 = N21;
4492 V3 = N31;
4493 goto L1;
4495 BODY;
4496 V += 1;
4497 V3 += STEP3;
4498 V2 += (V3 cond3 N32) ? 0 : STEP2;
4499 V3 = (V3 cond3 N32) ? V3 : N31;
4500 V1 += (V2 cond2 N22) ? 0 : STEP1;
4501 V2 = (V2 cond2 N22) ? V2 : N21;
4503 if (V < count) goto L0; else goto L2;
4508 static void
4509 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4511 tree type, t;
4512 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4513 gimple_stmt_iterator gsi;
4514 gimple *stmt;
4515 gcond *cond_stmt;
4516 bool broken_loop = region->cont == NULL;
4517 edge e, ne;
4518 tree *counts = NULL;
4519 int i;
4520 int safelen_int = INT_MAX;
4521 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4522 OMP_CLAUSE_SAFELEN);
4523 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4524 OMP_CLAUSE__SIMDUID_);
4525 tree n1, n2;
4527 if (safelen)
4529 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4530 if (TREE_CODE (safelen) != INTEGER_CST)
4531 safelen_int = 0;
4532 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4533 safelen_int = tree_to_uhwi (safelen);
4534 if (safelen_int == 1)
4535 safelen_int = 0;
4537 type = TREE_TYPE (fd->loop.v);
4538 entry_bb = region->entry;
4539 cont_bb = region->cont;
4540 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4541 gcc_assert (broken_loop
4542 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4543 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4544 if (!broken_loop)
4546 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4547 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4548 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4549 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4551 else
4553 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4554 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4555 l2_bb = single_succ (l1_bb);
4557 exit_bb = region->exit;
4558 l2_dom_bb = NULL;
4560 gsi = gsi_last_bb (entry_bb);
4562 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4563 /* Not needed in SSA form right now. */
4564 gcc_assert (!gimple_in_ssa_p (cfun));
4565 if (fd->collapse > 1)
4567 int first_zero_iter = -1, dummy = -1;
4568 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4570 counts = XALLOCAVEC (tree, fd->collapse);
4571 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4572 zero_iter_bb, first_zero_iter,
4573 dummy_bb, dummy, l2_dom_bb);
4575 if (l2_dom_bb == NULL)
4576 l2_dom_bb = l1_bb;
4578 n1 = fd->loop.n1;
4579 n2 = fd->loop.n2;
4580 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4582 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4583 OMP_CLAUSE__LOOPTEMP_);
4584 gcc_assert (innerc);
4585 n1 = OMP_CLAUSE_DECL (innerc);
4586 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4587 OMP_CLAUSE__LOOPTEMP_);
4588 gcc_assert (innerc);
4589 n2 = OMP_CLAUSE_DECL (innerc);
4591 tree step = fd->loop.step;
4593 bool is_simt = (safelen_int > 1
4594 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4595 OMP_CLAUSE__SIMT_));
4596 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4597 if (is_simt)
4599 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4600 simt_lane = create_tmp_var (unsigned_type_node);
4601 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4602 gimple_call_set_lhs (g, simt_lane);
4603 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4604 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4605 fold_convert (TREE_TYPE (step), simt_lane));
4606 n1 = fold_convert (type, n1);
4607 if (POINTER_TYPE_P (type))
4608 n1 = fold_build_pointer_plus (n1, offset);
4609 else
4610 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4612 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4613 if (fd->collapse > 1)
4614 simt_maxlane = build_one_cst (unsigned_type_node);
4615 else if (safelen_int < omp_max_simt_vf ())
4616 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4617 tree vf
4618 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4619 unsigned_type_node, 0);
4620 if (simt_maxlane)
4621 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4622 vf = fold_convert (TREE_TYPE (step), vf);
4623 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4626 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4627 if (fd->collapse > 1)
4629 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4631 gsi_prev (&gsi);
4632 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4633 gsi_next (&gsi);
4635 else
4636 for (i = 0; i < fd->collapse; i++)
4638 tree itype = TREE_TYPE (fd->loops[i].v);
4639 if (POINTER_TYPE_P (itype))
4640 itype = signed_type_for (itype);
4641 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4642 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4646 /* Remove the GIMPLE_OMP_FOR statement. */
4647 gsi_remove (&gsi, true);
4649 if (!broken_loop)
4651 /* Code to control the increment goes in the CONT_BB. */
4652 gsi = gsi_last_bb (cont_bb);
4653 stmt = gsi_stmt (gsi);
4654 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4656 if (POINTER_TYPE_P (type))
4657 t = fold_build_pointer_plus (fd->loop.v, step);
4658 else
4659 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4660 expand_omp_build_assign (&gsi, fd->loop.v, t);
4662 if (fd->collapse > 1)
4664 i = fd->collapse - 1;
4665 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4667 t = fold_convert (sizetype, fd->loops[i].step);
4668 t = fold_build_pointer_plus (fd->loops[i].v, t);
4670 else
4672 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4673 fd->loops[i].step);
4674 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4675 fd->loops[i].v, t);
4677 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4679 for (i = fd->collapse - 1; i > 0; i--)
4681 tree itype = TREE_TYPE (fd->loops[i].v);
4682 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4683 if (POINTER_TYPE_P (itype2))
4684 itype2 = signed_type_for (itype2);
4685 t = build3 (COND_EXPR, itype2,
4686 build2 (fd->loops[i].cond_code, boolean_type_node,
4687 fd->loops[i].v,
4688 fold_convert (itype, fd->loops[i].n2)),
4689 build_int_cst (itype2, 0),
4690 fold_convert (itype2, fd->loops[i - 1].step));
4691 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4692 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4693 else
4694 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4695 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4697 t = build3 (COND_EXPR, itype,
4698 build2 (fd->loops[i].cond_code, boolean_type_node,
4699 fd->loops[i].v,
4700 fold_convert (itype, fd->loops[i].n2)),
4701 fd->loops[i].v,
4702 fold_convert (itype, fd->loops[i].n1));
4703 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4707 /* Remove GIMPLE_OMP_CONTINUE. */
4708 gsi_remove (&gsi, true);
4711 /* Emit the condition in L1_BB. */
4712 gsi = gsi_start_bb (l1_bb);
4714 t = fold_convert (type, n2);
4715 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4716 false, GSI_CONTINUE_LINKING);
4717 tree v = fd->loop.v;
4718 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4719 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4720 false, GSI_CONTINUE_LINKING);
4721 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4722 cond_stmt = gimple_build_cond_empty (t);
4723 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4724 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4725 NULL, NULL)
4726 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4727 NULL, NULL))
4729 gsi = gsi_for_stmt (cond_stmt);
4730 gimple_regimplify_operands (cond_stmt, &gsi);
4733 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4734 if (is_simt)
4736 gsi = gsi_start_bb (l2_bb);
4737 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4738 if (POINTER_TYPE_P (type))
4739 t = fold_build_pointer_plus (fd->loop.v, step);
4740 else
4741 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4742 expand_omp_build_assign (&gsi, fd->loop.v, t);
4745 /* Remove GIMPLE_OMP_RETURN. */
4746 gsi = gsi_last_bb (exit_bb);
4747 gsi_remove (&gsi, true);
4749 /* Connect the new blocks. */
4750 remove_edge (FALLTHRU_EDGE (entry_bb));
4752 if (!broken_loop)
4754 remove_edge (BRANCH_EDGE (entry_bb));
4755 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4757 e = BRANCH_EDGE (l1_bb);
4758 ne = FALLTHRU_EDGE (l1_bb);
4759 e->flags = EDGE_TRUE_VALUE;
4761 else
4763 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4765 ne = single_succ_edge (l1_bb);
4766 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4769 ne->flags = EDGE_FALSE_VALUE;
4770 e->probability = REG_BR_PROB_BASE * 7 / 8;
4771 ne->probability = REG_BR_PROB_BASE / 8;
4773 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4774 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4776 if (simt_maxlane)
4778 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4779 NULL_TREE, NULL_TREE);
4780 gsi = gsi_last_bb (entry_bb);
4781 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4782 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4783 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4784 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4785 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4786 l2_dom_bb = entry_bb;
4788 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4790 if (!broken_loop)
4792 struct loop *loop = alloc_loop ();
4793 loop->header = l1_bb;
4794 loop->latch = cont_bb;
4795 add_loop (loop, l1_bb->loop_father);
4796 loop->safelen = safelen_int;
4797 if (simduid)
4799 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4800 cfun->has_simduid_loops = true;
4802 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4803 the loop. */
4804 if ((flag_tree_loop_vectorize
4805 || (!global_options_set.x_flag_tree_loop_vectorize
4806 && !global_options_set.x_flag_tree_vectorize))
4807 && flag_tree_loop_optimize
4808 && loop->safelen > 1)
4810 loop->force_vectorize = true;
4811 cfun->has_force_vectorize_loops = true;
4814 else if (simduid)
4815 cfun->has_simduid_loops = true;
4818 /* Taskloop construct is represented after gimplification with
4819 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4820 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4821 which should just compute all the needed loop temporaries
4822 for GIMPLE_OMP_TASK. */
4824 static void
4825 expand_omp_taskloop_for_outer (struct omp_region *region,
4826 struct omp_for_data *fd,
4827 gimple *inner_stmt)
4829 tree type, bias = NULL_TREE;
4830 basic_block entry_bb, cont_bb, exit_bb;
4831 gimple_stmt_iterator gsi;
4832 gassign *assign_stmt;
4833 tree *counts = NULL;
4834 int i;
4836 gcc_assert (inner_stmt);
4837 gcc_assert (region->cont);
4838 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4839 && gimple_omp_task_taskloop_p (inner_stmt));
4840 type = TREE_TYPE (fd->loop.v);
4842 /* See if we need to bias by LLONG_MIN. */
4843 if (fd->iter_type == long_long_unsigned_type_node
4844 && TREE_CODE (type) == INTEGER_TYPE
4845 && !TYPE_UNSIGNED (type))
4847 tree n1, n2;
4849 if (fd->loop.cond_code == LT_EXPR)
4851 n1 = fd->loop.n1;
4852 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4854 else
4856 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4857 n2 = fd->loop.n1;
4859 if (TREE_CODE (n1) != INTEGER_CST
4860 || TREE_CODE (n2) != INTEGER_CST
4861 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4862 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4865 entry_bb = region->entry;
4866 cont_bb = region->cont;
4867 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4868 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4869 exit_bb = region->exit;
4871 gsi = gsi_last_bb (entry_bb);
4872 gimple *for_stmt = gsi_stmt (gsi);
4873 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4874 if (fd->collapse > 1)
4876 int first_zero_iter = -1, dummy = -1;
4877 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4879 counts = XALLOCAVEC (tree, fd->collapse);
4880 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4881 zero_iter_bb, first_zero_iter,
4882 dummy_bb, dummy, l2_dom_bb);
4884 if (zero_iter_bb)
4886 /* Some counts[i] vars might be uninitialized if
4887 some loop has zero iterations. But the body shouldn't
4888 be executed in that case, so just avoid uninit warnings. */
4889 for (i = first_zero_iter; i < fd->collapse; i++)
4890 if (SSA_VAR_P (counts[i]))
4891 TREE_NO_WARNING (counts[i]) = 1;
4892 gsi_prev (&gsi);
4893 edge e = split_block (entry_bb, gsi_stmt (gsi));
4894 entry_bb = e->dest;
4895 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4896 gsi = gsi_last_bb (entry_bb);
4897 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4898 get_immediate_dominator (CDI_DOMINATORS,
4899 zero_iter_bb));
4903 tree t0, t1;
4904 t1 = fd->loop.n2;
4905 t0 = fd->loop.n1;
4906 if (POINTER_TYPE_P (TREE_TYPE (t0))
4907 && TYPE_PRECISION (TREE_TYPE (t0))
4908 != TYPE_PRECISION (fd->iter_type))
4910 /* Avoid casting pointers to integer of a different size. */
4911 tree itype = signed_type_for (type);
4912 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4913 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4915 else
4917 t1 = fold_convert (fd->iter_type, t1);
4918 t0 = fold_convert (fd->iter_type, t0);
4920 if (bias)
4922 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4923 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4926 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4927 OMP_CLAUSE__LOOPTEMP_);
4928 gcc_assert (innerc);
4929 tree startvar = OMP_CLAUSE_DECL (innerc);
4930 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4931 gcc_assert (innerc);
4932 tree endvar = OMP_CLAUSE_DECL (innerc);
4933 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4935 gcc_assert (innerc);
4936 for (i = 1; i < fd->collapse; i++)
4938 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4939 OMP_CLAUSE__LOOPTEMP_);
4940 gcc_assert (innerc);
4942 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4943 OMP_CLAUSE__LOOPTEMP_);
4944 if (innerc)
4946 /* If needed (inner taskloop has lastprivate clause), propagate
4947 down the total number of iterations. */
4948 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4949 NULL_TREE, false,
4950 GSI_CONTINUE_LINKING);
4951 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4952 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4956 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4957 GSI_CONTINUE_LINKING);
4958 assign_stmt = gimple_build_assign (startvar, t0);
4959 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4961 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4962 GSI_CONTINUE_LINKING);
4963 assign_stmt = gimple_build_assign (endvar, t1);
4964 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4965 if (fd->collapse > 1)
4966 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4968 /* Remove the GIMPLE_OMP_FOR statement. */
4969 gsi = gsi_for_stmt (for_stmt);
4970 gsi_remove (&gsi, true);
4972 gsi = gsi_last_bb (cont_bb);
4973 gsi_remove (&gsi, true);
4975 gsi = gsi_last_bb (exit_bb);
4976 gsi_remove (&gsi, true);
4978 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
4979 remove_edge (BRANCH_EDGE (entry_bb));
4980 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
4981 remove_edge (BRANCH_EDGE (cont_bb));
4982 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4983 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4984 recompute_dominator (CDI_DOMINATORS, region->entry));
4987 /* Taskloop construct is represented after gimplification with
4988 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4989 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4990 GOMP_taskloop{,_ull} function arranges for each task to be given just
4991 a single range of iterations. */
4993 static void
4994 expand_omp_taskloop_for_inner (struct omp_region *region,
4995 struct omp_for_data *fd,
4996 gimple *inner_stmt)
4998 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4999 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5000 basic_block fin_bb;
5001 gimple_stmt_iterator gsi;
5002 edge ep;
5003 bool broken_loop = region->cont == NULL;
5004 tree *counts = NULL;
5005 tree n1, n2, step;
5007 itype = type = TREE_TYPE (fd->loop.v);
5008 if (POINTER_TYPE_P (type))
5009 itype = signed_type_for (type);
5011 /* See if we need to bias by LLONG_MIN. */
5012 if (fd->iter_type == long_long_unsigned_type_node
5013 && TREE_CODE (type) == INTEGER_TYPE
5014 && !TYPE_UNSIGNED (type))
5016 tree n1, n2;
5018 if (fd->loop.cond_code == LT_EXPR)
5020 n1 = fd->loop.n1;
5021 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5023 else
5025 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5026 n2 = fd->loop.n1;
5028 if (TREE_CODE (n1) != INTEGER_CST
5029 || TREE_CODE (n2) != INTEGER_CST
5030 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5031 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5034 entry_bb = region->entry;
5035 cont_bb = region->cont;
5036 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5037 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5038 gcc_assert (broken_loop
5039 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5040 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5041 if (!broken_loop)
5043 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5044 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5046 exit_bb = region->exit;
5048 /* Iteration space partitioning goes in ENTRY_BB. */
5049 gsi = gsi_last_bb (entry_bb);
5050 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5052 if (fd->collapse > 1)
5054 int first_zero_iter = -1, dummy = -1;
5055 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5057 counts = XALLOCAVEC (tree, fd->collapse);
5058 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5059 fin_bb, first_zero_iter,
5060 dummy_bb, dummy, l2_dom_bb);
5061 t = NULL_TREE;
5063 else
5064 t = integer_one_node;
5066 step = fd->loop.step;
5067 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5068 OMP_CLAUSE__LOOPTEMP_);
5069 gcc_assert (innerc);
5070 n1 = OMP_CLAUSE_DECL (innerc);
5071 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5072 gcc_assert (innerc);
5073 n2 = OMP_CLAUSE_DECL (innerc);
5074 if (bias)
5076 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5077 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5079 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5080 true, NULL_TREE, true, GSI_SAME_STMT);
5081 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5082 true, NULL_TREE, true, GSI_SAME_STMT);
5083 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5084 true, NULL_TREE, true, GSI_SAME_STMT);
5086 tree startvar = fd->loop.v;
5087 tree endvar = NULL_TREE;
5089 if (gimple_omp_for_combined_p (fd->for_stmt))
5091 tree clauses = gimple_omp_for_clauses (inner_stmt);
5092 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5093 gcc_assert (innerc);
5094 startvar = OMP_CLAUSE_DECL (innerc);
5095 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5096 OMP_CLAUSE__LOOPTEMP_);
5097 gcc_assert (innerc);
5098 endvar = OMP_CLAUSE_DECL (innerc);
5100 t = fold_convert (TREE_TYPE (startvar), n1);
5101 t = force_gimple_operand_gsi (&gsi, t,
5102 DECL_P (startvar)
5103 && TREE_ADDRESSABLE (startvar),
5104 NULL_TREE, false, GSI_CONTINUE_LINKING);
5105 gimple *assign_stmt = gimple_build_assign (startvar, t);
5106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5108 t = fold_convert (TREE_TYPE (startvar), n2);
5109 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5110 false, GSI_CONTINUE_LINKING);
5111 if (endvar)
5113 assign_stmt = gimple_build_assign (endvar, e);
5114 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5115 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5116 assign_stmt = gimple_build_assign (fd->loop.v, e);
5117 else
5118 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5121 if (fd->collapse > 1)
5122 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5124 if (!broken_loop)
5126 /* The code controlling the sequential loop replaces the
5127 GIMPLE_OMP_CONTINUE. */
5128 gsi = gsi_last_bb (cont_bb);
5129 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5130 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5131 vmain = gimple_omp_continue_control_use (cont_stmt);
5132 vback = gimple_omp_continue_control_def (cont_stmt);
5134 if (!gimple_omp_for_combined_p (fd->for_stmt))
5136 if (POINTER_TYPE_P (type))
5137 t = fold_build_pointer_plus (vmain, step);
5138 else
5139 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5140 t = force_gimple_operand_gsi (&gsi, t,
5141 DECL_P (vback)
5142 && TREE_ADDRESSABLE (vback),
5143 NULL_TREE, true, GSI_SAME_STMT);
5144 assign_stmt = gimple_build_assign (vback, t);
5145 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5147 t = build2 (fd->loop.cond_code, boolean_type_node,
5148 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5149 ? t : vback, e);
5150 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5153 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5154 gsi_remove (&gsi, true);
5156 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5157 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5160 /* Remove the GIMPLE_OMP_FOR statement. */
5161 gsi = gsi_for_stmt (fd->for_stmt);
5162 gsi_remove (&gsi, true);
5164 /* Remove the GIMPLE_OMP_RETURN statement. */
5165 gsi = gsi_last_bb (exit_bb);
5166 gsi_remove (&gsi, true);
5168 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5169 if (!broken_loop)
5170 remove_edge (BRANCH_EDGE (entry_bb));
5171 else
5173 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5174 region->outer->cont = NULL;
5177 /* Connect all the blocks. */
5178 if (!broken_loop)
5180 ep = find_edge (cont_bb, body_bb);
5181 if (gimple_omp_for_combined_p (fd->for_stmt))
5183 remove_edge (ep);
5184 ep = NULL;
5186 else if (fd->collapse > 1)
5188 remove_edge (ep);
5189 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5191 else
5192 ep->flags = EDGE_TRUE_VALUE;
5193 find_edge (cont_bb, fin_bb)->flags
5194 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5197 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5198 recompute_dominator (CDI_DOMINATORS, body_bb));
5199 if (!broken_loop)
5200 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5201 recompute_dominator (CDI_DOMINATORS, fin_bb));
5203 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5205 struct loop *loop = alloc_loop ();
5206 loop->header = body_bb;
5207 if (collapse_bb == NULL)
5208 loop->latch = cont_bb;
5209 add_loop (loop, body_bb->loop_father);
5213 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5214 partitioned loop. The lowering here is abstracted, in that the
5215 loop parameters are passed through internal functions, which are
5216 further lowered by oacc_device_lower, once we get to the target
5217 compiler. The loop is of the form:
5219 for (V = B; V LTGT E; V += S) {BODY}
5221 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5222 (constant 0 for no chunking) and we will have a GWV partitioning
5223 mask, specifying dimensions over which the loop is to be
5224 partitioned (see note below). We generate code that looks like:
5226 <entry_bb> [incoming FALL->body, BRANCH->exit]
5227 typedef signedintify (typeof (V)) T; // underlying signed integral type
5228 T range = E - B;
5229 T chunk_no = 0;
5230 T DIR = LTGT == '<' ? +1 : -1;
5231 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5232 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5234 <head_bb> [created by splitting end of entry_bb]
5235 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5236 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5237 if (!(offset LTGT bound)) goto bottom_bb;
5239 <body_bb> [incoming]
5240 V = B + offset;
5241 {BODY}
5243 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5244 offset += step;
5245 if (offset LTGT bound) goto body_bb; [*]
5247 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5248 chunk_no++;
5249 if (chunk < chunk_max) goto head_bb;
5251 <exit_bb> [incoming]
5252 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5254 [*] Needed if V live at end of loop
5256 Note: CHUNKING & GWV mask are specified explicitly here. This is a
5257 transition, and will be specified by a more general mechanism shortly.
5260 static void
5261 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5263 tree v = fd->loop.v;
5264 enum tree_code cond_code = fd->loop.cond_code;
5265 enum tree_code plus_code = PLUS_EXPR;
5267 tree chunk_size = integer_minus_one_node;
5268 tree gwv = integer_zero_node;
5269 tree iter_type = TREE_TYPE (v);
5270 tree diff_type = iter_type;
5271 tree plus_type = iter_type;
5272 struct oacc_collapse *counts = NULL;
5274 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5275 == GF_OMP_FOR_KIND_OACC_LOOP);
5276 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5277 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5279 if (POINTER_TYPE_P (iter_type))
5281 plus_code = POINTER_PLUS_EXPR;
5282 plus_type = sizetype;
5284 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5285 diff_type = signed_type_for (diff_type);
5287 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5288 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5289 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5290 basic_block bottom_bb = NULL;
5292 /* entry_bb has two sucessors; the branch edge is to the exit
5293 block, fallthrough edge to body. */
5294 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5295 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5297 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5298 body_bb, or to a block whose only successor is the body_bb. Its
5299 fallthrough successor is the final block (same as the branch
5300 successor of the entry_bb). */
5301 if (cont_bb)
5303 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5304 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5306 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5307 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5309 else
5310 gcc_assert (!gimple_in_ssa_p (cfun));
5312 /* The exit block only has entry_bb and cont_bb as predecessors. */
5313 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5315 tree chunk_no;
5316 tree chunk_max = NULL_TREE;
5317 tree bound, offset;
5318 tree step = create_tmp_var (diff_type, ".step");
5319 bool up = cond_code == LT_EXPR;
5320 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5321 bool chunking = !gimple_in_ssa_p (cfun);;
5322 bool negating;
5324 /* SSA instances. */
5325 tree offset_incr = NULL_TREE;
5326 tree offset_init = NULL_TREE;
5328 gimple_stmt_iterator gsi;
5329 gassign *ass;
5330 gcall *call;
5331 gimple *stmt;
5332 tree expr;
5333 location_t loc;
5334 edge split, be, fte;
5336 /* Split the end of entry_bb to create head_bb. */
5337 split = split_block (entry_bb, last_stmt (entry_bb));
5338 basic_block head_bb = split->dest;
5339 entry_bb = split->src;
5341 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5342 gsi = gsi_last_bb (entry_bb);
5343 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5344 loc = gimple_location (for_stmt);
5346 if (gimple_in_ssa_p (cfun))
5348 offset_init = gimple_omp_for_index (for_stmt, 0);
5349 gcc_assert (integer_zerop (fd->loop.n1));
5350 /* The SSA parallelizer does gang parallelism. */
5351 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5354 if (fd->collapse > 1)
5356 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5357 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5358 TREE_TYPE (fd->loop.n2));
5360 if (SSA_VAR_P (fd->loop.n2))
5362 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5363 true, GSI_SAME_STMT);
5364 ass = gimple_build_assign (fd->loop.n2, total);
5365 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5370 tree b = fd->loop.n1;
5371 tree e = fd->loop.n2;
5372 tree s = fd->loop.step;
5374 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5375 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5377 /* Convert the step, avoiding possible unsigned->signed overflow. */
5378 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5379 if (negating)
5380 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5381 s = fold_convert (diff_type, s);
5382 if (negating)
5383 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5384 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5386 if (!chunking)
5387 chunk_size = integer_zero_node;
5388 expr = fold_convert (diff_type, chunk_size);
5389 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5390 NULL_TREE, true, GSI_SAME_STMT);
5391 /* Determine the range, avoiding possible unsigned->signed overflow. */
5392 negating = !up && TYPE_UNSIGNED (iter_type);
5393 expr = fold_build2 (MINUS_EXPR, plus_type,
5394 fold_convert (plus_type, negating ? b : e),
5395 fold_convert (plus_type, negating ? e : b));
5396 expr = fold_convert (diff_type, expr);
5397 if (negating)
5398 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5399 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5400 NULL_TREE, true, GSI_SAME_STMT);
5402 chunk_no = build_int_cst (diff_type, 0);
5403 if (chunking)
5405 gcc_assert (!gimple_in_ssa_p (cfun));
5407 expr = chunk_no;
5408 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5409 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5411 ass = gimple_build_assign (chunk_no, expr);
5412 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5414 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5415 build_int_cst (integer_type_node,
5416 IFN_GOACC_LOOP_CHUNKS),
5417 dir, range, s, chunk_size, gwv);
5418 gimple_call_set_lhs (call, chunk_max);
5419 gimple_set_location (call, loc);
5420 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5422 else
5423 chunk_size = chunk_no;
5425 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5426 build_int_cst (integer_type_node,
5427 IFN_GOACC_LOOP_STEP),
5428 dir, range, s, chunk_size, gwv);
5429 gimple_call_set_lhs (call, step);
5430 gimple_set_location (call, loc);
5431 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5433 /* Remove the GIMPLE_OMP_FOR. */
5434 gsi_remove (&gsi, true);
5436 /* Fixup edges from head_bb. */
5437 be = BRANCH_EDGE (head_bb);
5438 fte = FALLTHRU_EDGE (head_bb);
5439 be->flags |= EDGE_FALSE_VALUE;
5440 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5442 basic_block body_bb = fte->dest;
5444 if (gimple_in_ssa_p (cfun))
5446 gsi = gsi_last_bb (cont_bb);
5447 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5449 offset = gimple_omp_continue_control_use (cont_stmt);
5450 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5452 else
5454 offset = create_tmp_var (diff_type, ".offset");
5455 offset_init = offset_incr = offset;
5457 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5459 /* Loop offset & bound go into head_bb. */
5460 gsi = gsi_start_bb (head_bb);
5462 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5463 build_int_cst (integer_type_node,
5464 IFN_GOACC_LOOP_OFFSET),
5465 dir, range, s,
5466 chunk_size, gwv, chunk_no);
5467 gimple_call_set_lhs (call, offset_init);
5468 gimple_set_location (call, loc);
5469 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5471 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5472 build_int_cst (integer_type_node,
5473 IFN_GOACC_LOOP_BOUND),
5474 dir, range, s,
5475 chunk_size, gwv, offset_init);
5476 gimple_call_set_lhs (call, bound);
5477 gimple_set_location (call, loc);
5478 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5480 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5481 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5482 GSI_CONTINUE_LINKING);
5484 /* V assignment goes into body_bb. */
5485 if (!gimple_in_ssa_p (cfun))
5487 gsi = gsi_start_bb (body_bb);
5489 expr = build2 (plus_code, iter_type, b,
5490 fold_convert (plus_type, offset));
5491 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5492 true, GSI_SAME_STMT);
5493 ass = gimple_build_assign (v, expr);
5494 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5495 if (fd->collapse > 1)
5496 expand_oacc_collapse_vars (fd, &gsi, counts, v);
5499 /* Loop increment goes into cont_bb. If this is not a loop, we
5500 will have spawned threads as if it was, and each one will
5501 execute one iteration. The specification is not explicit about
5502 whether such constructs are ill-formed or not, and they can
5503 occur, especially when noreturn routines are involved. */
5504 if (cont_bb)
5506 gsi = gsi_last_bb (cont_bb);
5507 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5508 loc = gimple_location (cont_stmt);
5510 /* Increment offset. */
5511 if (gimple_in_ssa_p (cfun))
5512 expr= build2 (plus_code, iter_type, offset,
5513 fold_convert (plus_type, step));
5514 else
5515 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5516 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5517 true, GSI_SAME_STMT);
5518 ass = gimple_build_assign (offset_incr, expr);
5519 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5520 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5521 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5523 /* Remove the GIMPLE_OMP_CONTINUE. */
5524 gsi_remove (&gsi, true);
5526 /* Fixup edges from cont_bb. */
5527 be = BRANCH_EDGE (cont_bb);
5528 fte = FALLTHRU_EDGE (cont_bb);
5529 be->flags |= EDGE_TRUE_VALUE;
5530 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5532 if (chunking)
5534 /* Split the beginning of exit_bb to make bottom_bb. We
5535 need to insert a nop at the start, because splitting is
5536 after a stmt, not before. */
5537 gsi = gsi_start_bb (exit_bb);
5538 stmt = gimple_build_nop ();
5539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5540 split = split_block (exit_bb, stmt);
5541 bottom_bb = split->src;
5542 exit_bb = split->dest;
5543 gsi = gsi_last_bb (bottom_bb);
5545 /* Chunk increment and test goes into bottom_bb. */
5546 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5547 build_int_cst (diff_type, 1));
5548 ass = gimple_build_assign (chunk_no, expr);
5549 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5551 /* Chunk test at end of bottom_bb. */
5552 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5553 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5554 GSI_CONTINUE_LINKING);
5556 /* Fixup edges from bottom_bb. */
5557 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5558 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5562 gsi = gsi_last_bb (exit_bb);
5563 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5564 loc = gimple_location (gsi_stmt (gsi));
5566 if (!gimple_in_ssa_p (cfun))
5568 /* Insert the final value of V, in case it is live. This is the
5569 value for the only thread that survives past the join. */
5570 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5571 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5572 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5573 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5574 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5575 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5576 true, GSI_SAME_STMT);
5577 ass = gimple_build_assign (v, expr);
5578 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5581 /* Remove the OMP_RETURN. */
5582 gsi_remove (&gsi, true);
5584 if (cont_bb)
5586 /* We now have one or two nested loops. Update the loop
5587 structures. */
5588 struct loop *parent = entry_bb->loop_father;
5589 struct loop *body = body_bb->loop_father;
5591 if (chunking)
5593 struct loop *chunk_loop = alloc_loop ();
5594 chunk_loop->header = head_bb;
5595 chunk_loop->latch = bottom_bb;
5596 add_loop (chunk_loop, parent);
5597 parent = chunk_loop;
5599 else if (parent != body)
5601 gcc_assert (body->header == body_bb);
5602 gcc_assert (body->latch == cont_bb
5603 || single_pred (body->latch) == cont_bb);
5604 parent = NULL;
5607 if (parent)
5609 struct loop *body_loop = alloc_loop ();
5610 body_loop->header = body_bb;
5611 body_loop->latch = cont_bb;
5612 add_loop (body_loop, parent);
5617 /* Expand the OMP loop defined by REGION. */
5619 static void
5620 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5622 struct omp_for_data fd;
5623 struct omp_for_data_loop *loops;
5625 loops
5626 = (struct omp_for_data_loop *)
5627 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5628 * sizeof (struct omp_for_data_loop));
5629 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5630 &fd, loops);
5631 region->sched_kind = fd.sched_kind;
5632 region->sched_modifiers = fd.sched_modifiers;
5634 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5635 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5636 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5637 if (region->cont)
5639 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5640 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5641 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5643 else
5644 /* If there isn't a continue then this is a degerate case where
5645 the introduction of abnormal edges during lowering will prevent
5646 original loops from being detected. Fix that up. */
5647 loops_state_set (LOOPS_NEED_FIXUP);
5649 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5650 expand_omp_simd (region, &fd);
5651 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5652 expand_cilk_for (region, &fd);
5653 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5655 gcc_assert (!inner_stmt);
5656 expand_oacc_for (region, &fd);
5658 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5660 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5661 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5662 else
5663 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5665 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5666 && !fd.have_ordered)
5668 if (fd.chunk_size == NULL)
5669 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5670 else
5671 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5673 else
5675 int fn_index, start_ix, next_ix;
5677 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5678 == GF_OMP_FOR_KIND_FOR);
5679 if (fd.chunk_size == NULL
5680 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5681 fd.chunk_size = integer_zero_node;
5682 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5683 switch (fd.sched_kind)
5685 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5686 fn_index = 3;
5687 break;
5688 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5689 case OMP_CLAUSE_SCHEDULE_GUIDED:
5690 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5691 && !fd.ordered
5692 && !fd.have_ordered)
5694 fn_index = 3 + fd.sched_kind;
5695 break;
5697 /* FALLTHRU */
5698 default:
5699 fn_index = fd.sched_kind;
5700 break;
5702 if (!fd.ordered)
5703 fn_index += fd.have_ordered * 6;
5704 if (fd.ordered)
5705 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5706 else
5707 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5708 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5709 if (fd.iter_type == long_long_unsigned_type_node)
5711 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5712 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5713 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5714 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5716 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5717 (enum built_in_function) next_ix, inner_stmt);
5720 if (gimple_in_ssa_p (cfun))
5721 update_ssa (TODO_update_ssa_only_virtuals);
5724 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5726 v = GOMP_sections_start (n);
5728 switch (v)
5730 case 0:
5731 goto L2;
5732 case 1:
5733 section 1;
5734 goto L1;
5735 case 2:
5737 case n:
5739 default:
5740 abort ();
5743 v = GOMP_sections_next ();
5744 goto L0;
5746 reduction;
5748 If this is a combined parallel sections, replace the call to
5749 GOMP_sections_start with call to GOMP_sections_next. */
5751 static void
5752 expand_omp_sections (struct omp_region *region)
5754 tree t, u, vin = NULL, vmain, vnext, l2;
5755 unsigned len;
5756 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5757 gimple_stmt_iterator si, switch_si;
5758 gomp_sections *sections_stmt;
5759 gimple *stmt;
5760 gomp_continue *cont;
5761 edge_iterator ei;
5762 edge e;
5763 struct omp_region *inner;
5764 unsigned i, casei;
5765 bool exit_reachable = region->cont != NULL;
5767 gcc_assert (region->exit != NULL);
5768 entry_bb = region->entry;
5769 l0_bb = single_succ (entry_bb);
5770 l1_bb = region->cont;
5771 l2_bb = region->exit;
5772 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5773 l2 = gimple_block_label (l2_bb);
5774 else
5776 /* This can happen if there are reductions. */
5777 len = EDGE_COUNT (l0_bb->succs);
5778 gcc_assert (len > 0);
5779 e = EDGE_SUCC (l0_bb, len - 1);
5780 si = gsi_last_bb (e->dest);
5781 l2 = NULL_TREE;
5782 if (gsi_end_p (si)
5783 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5784 l2 = gimple_block_label (e->dest);
5785 else
5786 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5788 si = gsi_last_bb (e->dest);
5789 if (gsi_end_p (si)
5790 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5792 l2 = gimple_block_label (e->dest);
5793 break;
5797 if (exit_reachable)
5798 default_bb = create_empty_bb (l1_bb->prev_bb);
5799 else
5800 default_bb = create_empty_bb (l0_bb);
5802 /* We will build a switch() with enough cases for all the
5803 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5804 and a default case to abort if something goes wrong. */
5805 len = EDGE_COUNT (l0_bb->succs);
5807 /* Use vec::quick_push on label_vec throughout, since we know the size
5808 in advance. */
5809 auto_vec<tree> label_vec (len);
5811 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5812 GIMPLE_OMP_SECTIONS statement. */
5813 si = gsi_last_bb (entry_bb);
5814 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5815 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5816 vin = gimple_omp_sections_control (sections_stmt);
5817 if (!is_combined_parallel (region))
5819 /* If we are not inside a combined parallel+sections region,
5820 call GOMP_sections_start. */
5821 t = build_int_cst (unsigned_type_node, len - 1);
5822 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5823 stmt = gimple_build_call (u, 1, t);
5825 else
5827 /* Otherwise, call GOMP_sections_next. */
5828 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5829 stmt = gimple_build_call (u, 0);
5831 gimple_call_set_lhs (stmt, vin);
5832 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5833 gsi_remove (&si, true);
5835 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5836 L0_BB. */
5837 switch_si = gsi_last_bb (l0_bb);
5838 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5839 if (exit_reachable)
5841 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5842 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5843 vmain = gimple_omp_continue_control_use (cont);
5844 vnext = gimple_omp_continue_control_def (cont);
5846 else
5848 vmain = vin;
5849 vnext = NULL_TREE;
5852 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5853 label_vec.quick_push (t);
5854 i = 1;
5856 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5857 for (inner = region->inner, casei = 1;
5858 inner;
5859 inner = inner->next, i++, casei++)
5861 basic_block s_entry_bb, s_exit_bb;
5863 /* Skip optional reduction region. */
5864 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5866 --i;
5867 --casei;
5868 continue;
5871 s_entry_bb = inner->entry;
5872 s_exit_bb = inner->exit;
5874 t = gimple_block_label (s_entry_bb);
5875 u = build_int_cst (unsigned_type_node, casei);
5876 u = build_case_label (u, NULL, t);
5877 label_vec.quick_push (u);
5879 si = gsi_last_bb (s_entry_bb);
5880 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5881 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5882 gsi_remove (&si, true);
5883 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5885 if (s_exit_bb == NULL)
5886 continue;
5888 si = gsi_last_bb (s_exit_bb);
5889 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5890 gsi_remove (&si, true);
5892 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5895 /* Error handling code goes in DEFAULT_BB. */
5896 t = gimple_block_label (default_bb);
5897 u = build_case_label (NULL, NULL, t);
5898 make_edge (l0_bb, default_bb, 0);
5899 add_bb_to_loop (default_bb, current_loops->tree_root);
5901 stmt = gimple_build_switch (vmain, u, label_vec);
5902 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5903 gsi_remove (&switch_si, true);
5905 si = gsi_start_bb (default_bb);
5906 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5907 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5909 if (exit_reachable)
5911 tree bfn_decl;
5913 /* Code to get the next section goes in L1_BB. */
5914 si = gsi_last_bb (l1_bb);
5915 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5917 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5918 stmt = gimple_build_call (bfn_decl, 0);
5919 gimple_call_set_lhs (stmt, vnext);
5920 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5921 gsi_remove (&si, true);
5923 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5926 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5927 si = gsi_last_bb (l2_bb);
5928 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5929 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5930 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5931 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5932 else
5933 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5934 stmt = gimple_build_call (t, 0);
5935 if (gimple_omp_return_lhs (gsi_stmt (si)))
5936 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5937 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5938 gsi_remove (&si, true);
5940 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5943 /* Expand code for an OpenMP single directive. We've already expanded
5944 much of the code, here we simply place the GOMP_barrier call. */
5946 static void
5947 expand_omp_single (struct omp_region *region)
5949 basic_block entry_bb, exit_bb;
5950 gimple_stmt_iterator si;
5952 entry_bb = region->entry;
5953 exit_bb = region->exit;
5955 si = gsi_last_bb (entry_bb);
5956 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5957 gsi_remove (&si, true);
5958 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5960 si = gsi_last_bb (exit_bb);
5961 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5963 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5964 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5966 gsi_remove (&si, true);
5967 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5970 /* Generic expansion for OpenMP synchronization directives: master,
5971 ordered and critical. All we need to do here is remove the entry
5972 and exit markers for REGION. */
5974 static void
5975 expand_omp_synch (struct omp_region *region)
5977 basic_block entry_bb, exit_bb;
5978 gimple_stmt_iterator si;
5980 entry_bb = region->entry;
5981 exit_bb = region->exit;
5983 si = gsi_last_bb (entry_bb);
5984 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5985 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5986 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5987 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5988 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5989 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5990 gsi_remove (&si, true);
5991 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5993 if (exit_bb)
5995 si = gsi_last_bb (exit_bb);
5996 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5997 gsi_remove (&si, true);
5998 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6002 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6003 operation as a normal volatile load. */
6005 static bool
6006 expand_omp_atomic_load (basic_block load_bb, tree addr,
6007 tree loaded_val, int index)
6009 enum built_in_function tmpbase;
6010 gimple_stmt_iterator gsi;
6011 basic_block store_bb;
6012 location_t loc;
6013 gimple *stmt;
6014 tree decl, call, type, itype;
6016 gsi = gsi_last_bb (load_bb);
6017 stmt = gsi_stmt (gsi);
6018 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6019 loc = gimple_location (stmt);
6021 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6022 is smaller than word size, then expand_atomic_load assumes that the load
6023 is atomic. We could avoid the builtin entirely in this case. */
6025 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6026 decl = builtin_decl_explicit (tmpbase);
6027 if (decl == NULL_TREE)
6028 return false;
6030 type = TREE_TYPE (loaded_val);
6031 itype = TREE_TYPE (TREE_TYPE (decl));
6033 call = build_call_expr_loc (loc, decl, 2, addr,
6034 build_int_cst (NULL,
6035 gimple_omp_atomic_seq_cst_p (stmt)
6036 ? MEMMODEL_SEQ_CST
6037 : MEMMODEL_RELAXED));
6038 if (!useless_type_conversion_p (type, itype))
6039 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6040 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6042 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6043 gsi_remove (&gsi, true);
6045 store_bb = single_succ (load_bb);
6046 gsi = gsi_last_bb (store_bb);
6047 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6048 gsi_remove (&gsi, true);
6050 if (gimple_in_ssa_p (cfun))
6051 update_ssa (TODO_update_ssa_no_phi);
6053 return true;
6056 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6057 operation as a normal volatile store. */
6059 static bool
6060 expand_omp_atomic_store (basic_block load_bb, tree addr,
6061 tree loaded_val, tree stored_val, int index)
6063 enum built_in_function tmpbase;
6064 gimple_stmt_iterator gsi;
6065 basic_block store_bb = single_succ (load_bb);
6066 location_t loc;
6067 gimple *stmt;
6068 tree decl, call, type, itype;
6069 machine_mode imode;
6070 bool exchange;
6072 gsi = gsi_last_bb (load_bb);
6073 stmt = gsi_stmt (gsi);
6074 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6076 /* If the load value is needed, then this isn't a store but an exchange. */
6077 exchange = gimple_omp_atomic_need_value_p (stmt);
6079 gsi = gsi_last_bb (store_bb);
6080 stmt = gsi_stmt (gsi);
6081 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6082 loc = gimple_location (stmt);
6084 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6085 is smaller than word size, then expand_atomic_store assumes that the store
6086 is atomic. We could avoid the builtin entirely in this case. */
6088 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6089 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6090 decl = builtin_decl_explicit (tmpbase);
6091 if (decl == NULL_TREE)
6092 return false;
6094 type = TREE_TYPE (stored_val);
6096 /* Dig out the type of the function's second argument. */
6097 itype = TREE_TYPE (decl);
6098 itype = TYPE_ARG_TYPES (itype);
6099 itype = TREE_CHAIN (itype);
6100 itype = TREE_VALUE (itype);
6101 imode = TYPE_MODE (itype);
6103 if (exchange && !can_atomic_exchange_p (imode, true))
6104 return false;
6106 if (!useless_type_conversion_p (itype, type))
6107 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6108 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6109 build_int_cst (NULL,
6110 gimple_omp_atomic_seq_cst_p (stmt)
6111 ? MEMMODEL_SEQ_CST
6112 : MEMMODEL_RELAXED));
6113 if (exchange)
6115 if (!useless_type_conversion_p (type, itype))
6116 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6117 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6120 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6121 gsi_remove (&gsi, true);
6123 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6124 gsi = gsi_last_bb (load_bb);
6125 gsi_remove (&gsi, true);
6127 if (gimple_in_ssa_p (cfun))
6128 update_ssa (TODO_update_ssa_no_phi);
6130 return true;
6133 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6134 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6135 size of the data type, and thus usable to find the index of the builtin
6136 decl. Returns false if the expression is not of the proper form. */
6138 static bool
6139 expand_omp_atomic_fetch_op (basic_block load_bb,
6140 tree addr, tree loaded_val,
6141 tree stored_val, int index)
6143 enum built_in_function oldbase, newbase, tmpbase;
6144 tree decl, itype, call;
6145 tree lhs, rhs;
6146 basic_block store_bb = single_succ (load_bb);
6147 gimple_stmt_iterator gsi;
6148 gimple *stmt;
6149 location_t loc;
6150 enum tree_code code;
6151 bool need_old, need_new;
6152 machine_mode imode;
6153 bool seq_cst;
6155 /* We expect to find the following sequences:
6157 load_bb:
6158 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6160 store_bb:
6161 val = tmp OP something; (or: something OP tmp)
6162 GIMPLE_OMP_STORE (val)
6164 ???FIXME: Allow a more flexible sequence.
6165 Perhaps use data flow to pick the statements.
6169 gsi = gsi_after_labels (store_bb);
6170 stmt = gsi_stmt (gsi);
6171 loc = gimple_location (stmt);
6172 if (!is_gimple_assign (stmt))
6173 return false;
6174 gsi_next (&gsi);
6175 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6176 return false;
6177 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6178 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6179 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6180 gcc_checking_assert (!need_old || !need_new);
6182 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6183 return false;
6185 /* Check for one of the supported fetch-op operations. */
6186 code = gimple_assign_rhs_code (stmt);
6187 switch (code)
6189 case PLUS_EXPR:
6190 case POINTER_PLUS_EXPR:
6191 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6192 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6193 break;
6194 case MINUS_EXPR:
6195 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6196 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6197 break;
6198 case BIT_AND_EXPR:
6199 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6200 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6201 break;
6202 case BIT_IOR_EXPR:
6203 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6204 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6205 break;
6206 case BIT_XOR_EXPR:
6207 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6208 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6209 break;
6210 default:
6211 return false;
6214 /* Make sure the expression is of the proper form. */
6215 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6216 rhs = gimple_assign_rhs2 (stmt);
6217 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6218 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6219 rhs = gimple_assign_rhs1 (stmt);
6220 else
6221 return false;
6223 tmpbase = ((enum built_in_function)
6224 ((need_new ? newbase : oldbase) + index + 1));
6225 decl = builtin_decl_explicit (tmpbase);
6226 if (decl == NULL_TREE)
6227 return false;
6228 itype = TREE_TYPE (TREE_TYPE (decl));
6229 imode = TYPE_MODE (itype);
6231 /* We could test all of the various optabs involved, but the fact of the
6232 matter is that (with the exception of i486 vs i586 and xadd) all targets
6233 that support any atomic operaton optab also implements compare-and-swap.
6234 Let optabs.c take care of expanding any compare-and-swap loop. */
6235 if (!can_compare_and_swap_p (imode, true))
6236 return false;
6238 gsi = gsi_last_bb (load_bb);
6239 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6241 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6242 It only requires that the operation happen atomically. Thus we can
6243 use the RELAXED memory model. */
6244 call = build_call_expr_loc (loc, decl, 3, addr,
6245 fold_convert_loc (loc, itype, rhs),
6246 build_int_cst (NULL,
6247 seq_cst ? MEMMODEL_SEQ_CST
6248 : MEMMODEL_RELAXED));
6250 if (need_old || need_new)
6252 lhs = need_old ? loaded_val : stored_val;
6253 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6254 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6256 else
6257 call = fold_convert_loc (loc, void_type_node, call);
6258 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6259 gsi_remove (&gsi, true);
6261 gsi = gsi_last_bb (store_bb);
6262 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6263 gsi_remove (&gsi, true);
6264 gsi = gsi_last_bb (store_bb);
6265 stmt = gsi_stmt (gsi);
6266 gsi_remove (&gsi, true);
6268 if (gimple_in_ssa_p (cfun))
6270 release_defs (stmt);
6271 update_ssa (TODO_update_ssa_no_phi);
6274 return true;
6277 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6279 oldval = *addr;
6280 repeat:
6281 newval = rhs; // with oldval replacing *addr in rhs
6282 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6283 if (oldval != newval)
6284 goto repeat;
6286 INDEX is log2 of the size of the data type, and thus usable to find the
6287 index of the builtin decl. */
6289 static bool
6290 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6291 tree addr, tree loaded_val, tree stored_val,
6292 int index)
6294 tree loadedi, storedi, initial, new_storedi, old_vali;
6295 tree type, itype, cmpxchg, iaddr;
6296 gimple_stmt_iterator si;
6297 basic_block loop_header = single_succ (load_bb);
6298 gimple *phi, *stmt;
6299 edge e;
6300 enum built_in_function fncode;
6302 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6303 order to use the RELAXED memory model effectively. */
6304 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6305 + index + 1);
6306 cmpxchg = builtin_decl_explicit (fncode);
6307 if (cmpxchg == NULL_TREE)
6308 return false;
6309 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6310 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6312 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
6313 return false;
6315 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6316 si = gsi_last_bb (load_bb);
6317 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6319 /* For floating-point values, we'll need to view-convert them to integers
6320 so that we can perform the atomic compare and swap. Simplify the
6321 following code by always setting up the "i"ntegral variables. */
6322 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6324 tree iaddr_val;
6326 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6327 true));
6328 iaddr_val
6329 = force_gimple_operand_gsi (&si,
6330 fold_convert (TREE_TYPE (iaddr), addr),
6331 false, NULL_TREE, true, GSI_SAME_STMT);
6332 stmt = gimple_build_assign (iaddr, iaddr_val);
6333 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6334 loadedi = create_tmp_var (itype);
6335 if (gimple_in_ssa_p (cfun))
6336 loadedi = make_ssa_name (loadedi);
6338 else
6340 iaddr = addr;
6341 loadedi = loaded_val;
6344 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6345 tree loaddecl = builtin_decl_explicit (fncode);
6346 if (loaddecl)
6347 initial
6348 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6349 build_call_expr (loaddecl, 2, iaddr,
6350 build_int_cst (NULL_TREE,
6351 MEMMODEL_RELAXED)));
6352 else
6353 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6354 build_int_cst (TREE_TYPE (iaddr), 0));
6356 initial
6357 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6358 GSI_SAME_STMT);
6360 /* Move the value to the LOADEDI temporary. */
6361 if (gimple_in_ssa_p (cfun))
6363 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6364 phi = create_phi_node (loadedi, loop_header);
6365 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6366 initial);
6368 else
6369 gsi_insert_before (&si,
6370 gimple_build_assign (loadedi, initial),
6371 GSI_SAME_STMT);
6372 if (loadedi != loaded_val)
6374 gimple_stmt_iterator gsi2;
6375 tree x;
6377 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6378 gsi2 = gsi_start_bb (loop_header);
6379 if (gimple_in_ssa_p (cfun))
6381 gassign *stmt;
6382 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6383 true, GSI_SAME_STMT);
6384 stmt = gimple_build_assign (loaded_val, x);
6385 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6387 else
6389 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6390 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6391 true, GSI_SAME_STMT);
6394 gsi_remove (&si, true);
6396 si = gsi_last_bb (store_bb);
6397 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6399 if (iaddr == addr)
6400 storedi = stored_val;
6401 else
6402 storedi
6403 = force_gimple_operand_gsi (&si,
6404 build1 (VIEW_CONVERT_EXPR, itype,
6405 stored_val), true, NULL_TREE, true,
6406 GSI_SAME_STMT);
6408 /* Build the compare&swap statement. */
6409 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6410 new_storedi = force_gimple_operand_gsi (&si,
6411 fold_convert (TREE_TYPE (loadedi),
6412 new_storedi),
6413 true, NULL_TREE,
6414 true, GSI_SAME_STMT);
6416 if (gimple_in_ssa_p (cfun))
6417 old_vali = loadedi;
6418 else
6420 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6421 stmt = gimple_build_assign (old_vali, loadedi);
6422 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6424 stmt = gimple_build_assign (loadedi, new_storedi);
6425 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6428 /* Note that we always perform the comparison as an integer, even for
6429 floating point. This allows the atomic operation to properly
6430 succeed even with NaNs and -0.0. */
6431 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6432 stmt = gimple_build_cond_empty (ne);
6433 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6435 /* Update cfg. */
6436 e = single_succ_edge (store_bb);
6437 e->flags &= ~EDGE_FALLTHRU;
6438 e->flags |= EDGE_FALSE_VALUE;
6440 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6442 /* Copy the new value to loadedi (we already did that before the condition
6443 if we are not in SSA). */
6444 if (gimple_in_ssa_p (cfun))
6446 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6447 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6450 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6451 gsi_remove (&si, true);
6453 struct loop *loop = alloc_loop ();
6454 loop->header = loop_header;
6455 loop->latch = store_bb;
6456 add_loop (loop, loop_header->loop_father);
6458 if (gimple_in_ssa_p (cfun))
6459 update_ssa (TODO_update_ssa_no_phi);
6461 return true;
6464 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6466 GOMP_atomic_start ();
6467 *addr = rhs;
6468 GOMP_atomic_end ();
6470 The result is not globally atomic, but works so long as all parallel
6471 references are within #pragma omp atomic directives. According to
6472 responses received from omp@openmp.org, appears to be within spec.
6473 Which makes sense, since that's how several other compilers handle
6474 this situation as well.
6475 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6476 expanding. STORED_VAL is the operand of the matching
6477 GIMPLE_OMP_ATOMIC_STORE.
6479 We replace
6480 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6481 loaded_val = *addr;
6483 and replace
6484 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6485 *addr = stored_val;
6488 static bool
6489 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6490 tree addr, tree loaded_val, tree stored_val)
6492 gimple_stmt_iterator si;
6493 gassign *stmt;
6494 tree t;
6496 si = gsi_last_bb (load_bb);
6497 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6499 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6500 t = build_call_expr (t, 0);
6501 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6503 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6504 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6505 gsi_remove (&si, true);
6507 si = gsi_last_bb (store_bb);
6508 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6510 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6511 stored_val);
6512 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6514 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6515 t = build_call_expr (t, 0);
6516 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6517 gsi_remove (&si, true);
6519 if (gimple_in_ssa_p (cfun))
6520 update_ssa (TODO_update_ssa_no_phi);
6521 return true;
6524 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6525 using expand_omp_atomic_fetch_op. If it failed, we try to
6526 call expand_omp_atomic_pipeline, and if it fails too, the
6527 ultimate fallback is wrapping the operation in a mutex
6528 (expand_omp_atomic_mutex). REGION is the atomic region built
6529 by build_omp_regions_1(). */
6531 static void
6532 expand_omp_atomic (struct omp_region *region)
6534 basic_block load_bb = region->entry, store_bb = region->exit;
6535 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6536 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6537 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6538 tree addr = gimple_omp_atomic_load_rhs (load);
6539 tree stored_val = gimple_omp_atomic_store_val (store);
6540 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6541 HOST_WIDE_INT index;
6543 /* Make sure the type is one of the supported sizes. */
6544 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6545 index = exact_log2 (index);
6546 if (index >= 0 && index <= 4)
6548 unsigned int align = TYPE_ALIGN_UNIT (type);
6550 /* __sync builtins require strict data alignment. */
6551 if (exact_log2 (align) >= index)
6553 /* Atomic load. */
6554 if (loaded_val == stored_val
6555 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6556 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6557 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6558 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6559 return;
6561 /* Atomic store. */
6562 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6563 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6564 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6565 && store_bb == single_succ (load_bb)
6566 && first_stmt (store_bb) == store
6567 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6568 stored_val, index))
6569 return;
6571 /* When possible, use specialized atomic update functions. */
6572 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6573 && store_bb == single_succ (load_bb)
6574 && expand_omp_atomic_fetch_op (load_bb, addr,
6575 loaded_val, stored_val, index))
6576 return;
6578 /* If we don't have specialized __sync builtins, try and implement
6579 as a compare and swap loop. */
6580 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6581 loaded_val, stored_val, index))
6582 return;
6586 /* The ultimate fallback is wrapping the operation in a mutex. */
6587 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6590 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6591 at REGION_EXIT. */
6593 static void
6594 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6595 basic_block region_exit)
6597 struct loop *outer = region_entry->loop_father;
6598 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6600 /* Don't parallelize the kernels region if it contains more than one outer
6601 loop. */
6602 unsigned int nr_outer_loops = 0;
6603 struct loop *single_outer = NULL;
6604 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6606 gcc_assert (loop_outer (loop) == outer);
6608 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6609 continue;
6611 if (region_exit != NULL
6612 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6613 continue;
6615 nr_outer_loops++;
6616 single_outer = loop;
6618 if (nr_outer_loops != 1)
6619 return;
6621 for (struct loop *loop = single_outer->inner;
6622 loop != NULL;
6623 loop = loop->inner)
6624 if (loop->next)
6625 return;
6627 /* Mark the loops in the region. */
6628 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6629 loop->in_oacc_kernels_region = true;
6632 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6634 struct GTY(()) grid_launch_attributes_trees
6636 tree kernel_dim_array_type;
6637 tree kernel_lattrs_dimnum_decl;
6638 tree kernel_lattrs_grid_decl;
6639 tree kernel_lattrs_group_decl;
6640 tree kernel_launch_attributes_type;
6643 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6645 /* Create types used to pass kernel launch attributes to target. */
6647 static void
6648 grid_create_kernel_launch_attr_types (void)
6650 if (grid_attr_trees)
6651 return;
6652 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6654 tree dim_arr_index_type
6655 = build_index_type (build_int_cst (integer_type_node, 2));
6656 grid_attr_trees->kernel_dim_array_type
6657 = build_array_type (uint32_type_node, dim_arr_index_type);
6659 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6660 grid_attr_trees->kernel_lattrs_dimnum_decl
6661 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6662 uint32_type_node);
6663 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6665 grid_attr_trees->kernel_lattrs_grid_decl
6666 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6667 grid_attr_trees->kernel_dim_array_type);
6668 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6669 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6670 grid_attr_trees->kernel_lattrs_group_decl
6671 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6672 grid_attr_trees->kernel_dim_array_type);
6673 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6674 = grid_attr_trees->kernel_lattrs_grid_decl;
6675 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6676 "__gomp_kernel_launch_attributes",
6677 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6680 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6681 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6682 of type uint32_type_node. */
6684 static void
6685 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6686 tree fld_decl, int index, tree value)
6688 tree ref = build4 (ARRAY_REF, uint32_type_node,
6689 build3 (COMPONENT_REF,
6690 grid_attr_trees->kernel_dim_array_type,
6691 range_var, fld_decl, NULL_TREE),
6692 build_int_cst (integer_type_node, index),
6693 NULL_TREE, NULL_TREE);
6694 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6697 /* Return a tree representation of a pointer to a structure with grid and
6698 work-group size information. Statements filling that information will be
6699 inserted before GSI, TGT_STMT is the target statement which has the
6700 necessary information in it. */
6702 static tree
6703 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6704 gomp_target *tgt_stmt)
6706 grid_create_kernel_launch_attr_types ();
6707 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6708 "__kernel_launch_attrs");
6710 unsigned max_dim = 0;
6711 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6712 clause;
6713 clause = OMP_CLAUSE_CHAIN (clause))
6715 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6716 continue;
6718 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6719 max_dim = MAX (dim, max_dim);
6721 grid_insert_store_range_dim (gsi, lattrs,
6722 grid_attr_trees->kernel_lattrs_grid_decl,
6723 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6724 grid_insert_store_range_dim (gsi, lattrs,
6725 grid_attr_trees->kernel_lattrs_group_decl,
6726 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6729 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6730 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6731 gcc_checking_assert (max_dim <= 2);
6732 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6733 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6734 GSI_SAME_STMT);
6735 TREE_ADDRESSABLE (lattrs) = 1;
6736 return build_fold_addr_expr (lattrs);
6739 /* Build target argument identifier from the DEVICE identifier, value
6740 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6742 static tree
6743 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6745 tree t = build_int_cst (integer_type_node, device);
6746 if (subseqent_param)
6747 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6748 build_int_cst (integer_type_node,
6749 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6750 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6751 build_int_cst (integer_type_node, id));
6752 return t;
6755 /* Like above but return it in type that can be directly stored as an element
6756 of the argument array. */
6758 static tree
6759 get_target_argument_identifier (int device, bool subseqent_param, int id)
6761 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6762 return fold_convert (ptr_type_node, t);
6765 /* Return a target argument consisting of DEVICE identifier, value identifier
6766 ID, and the actual VALUE. */
6768 static tree
6769 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6770 tree value)
6772 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6773 fold_convert (integer_type_node, value),
6774 build_int_cst (unsigned_type_node,
6775 GOMP_TARGET_ARG_VALUE_SHIFT));
6776 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6777 get_target_argument_identifier_1 (device, false, id));
6778 t = fold_convert (ptr_type_node, t);
6779 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6782 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6783 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6784 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6785 arguments. */
6787 static void
6788 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6789 int id, tree value, vec <tree> *args)
6791 if (tree_fits_shwi_p (value)
6792 && tree_to_shwi (value) > -(1 << 15)
6793 && tree_to_shwi (value) < (1 << 15))
6794 args->quick_push (get_target_argument_value (gsi, device, id, value));
6795 else
6797 args->quick_push (get_target_argument_identifier (device, true, id));
6798 value = fold_convert (ptr_type_node, value);
6799 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6800 GSI_SAME_STMT);
6801 args->quick_push (value);
6805 /* Create an array of arguments that is then passed to GOMP_target. */
6807 static tree
6808 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6810 auto_vec <tree, 6> args;
6811 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6812 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6813 if (c)
6814 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6815 else
6816 t = integer_minus_one_node;
6817 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6818 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6820 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6821 if (c)
6822 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6823 else
6824 t = integer_minus_one_node;
6825 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6826 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6827 &args);
6829 /* Add HSA-specific grid sizes, if available. */
6830 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6831 OMP_CLAUSE__GRIDDIM_))
6833 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6834 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6835 args.quick_push (t);
6836 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6839 /* Produce more, perhaps device specific, arguments here. */
6841 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6842 args.length () + 1),
6843 ".omp_target_args");
6844 for (unsigned i = 0; i < args.length (); i++)
6846 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6847 build_int_cst (integer_type_node, i),
6848 NULL_TREE, NULL_TREE);
6849 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6850 GSI_SAME_STMT);
6852 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6853 build_int_cst (integer_type_node, args.length ()),
6854 NULL_TREE, NULL_TREE);
6855 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6856 GSI_SAME_STMT);
6857 TREE_ADDRESSABLE (argarray) = 1;
6858 return build_fold_addr_expr (argarray);
6861 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6863 static void
6864 expand_omp_target (struct omp_region *region)
6866 basic_block entry_bb, exit_bb, new_bb;
6867 struct function *child_cfun;
6868 tree child_fn, block, t;
6869 gimple_stmt_iterator gsi;
6870 gomp_target *entry_stmt;
6871 gimple *stmt;
6872 edge e;
6873 bool offloaded, data_region;
6875 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6876 new_bb = region->entry;
6878 offloaded = is_gimple_omp_offloaded (entry_stmt);
6879 switch (gimple_omp_target_kind (entry_stmt))
6881 case GF_OMP_TARGET_KIND_REGION:
6882 case GF_OMP_TARGET_KIND_UPDATE:
6883 case GF_OMP_TARGET_KIND_ENTER_DATA:
6884 case GF_OMP_TARGET_KIND_EXIT_DATA:
6885 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6886 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6887 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6888 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6889 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6890 data_region = false;
6891 break;
6892 case GF_OMP_TARGET_KIND_DATA:
6893 case GF_OMP_TARGET_KIND_OACC_DATA:
6894 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6895 data_region = true;
6896 break;
6897 default:
6898 gcc_unreachable ();
6901 child_fn = NULL_TREE;
6902 child_cfun = NULL;
6903 if (offloaded)
6905 child_fn = gimple_omp_target_child_fn (entry_stmt);
6906 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6909 /* Supported by expand_omp_taskreg, but not here. */
6910 if (child_cfun != NULL)
6911 gcc_checking_assert (!child_cfun->cfg);
6912 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6914 entry_bb = region->entry;
6915 exit_bb = region->exit;
6917 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6918 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6920 if (offloaded)
6922 unsigned srcidx, dstidx, num;
6924 /* If the offloading region needs data sent from the parent
6925 function, then the very first statement (except possible
6926 tree profile counter updates) of the offloading body
6927 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6928 &.OMP_DATA_O is passed as an argument to the child function,
6929 we need to replace it with the argument as seen by the child
6930 function.
6932 In most cases, this will end up being the identity assignment
6933 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6934 a function call that has been inlined, the original PARM_DECL
6935 .OMP_DATA_I may have been converted into a different local
6936 variable. In which case, we need to keep the assignment. */
6937 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6938 if (data_arg)
6940 basic_block entry_succ_bb = single_succ (entry_bb);
6941 gimple_stmt_iterator gsi;
6942 tree arg;
6943 gimple *tgtcopy_stmt = NULL;
6944 tree sender = TREE_VEC_ELT (data_arg, 0);
6946 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6948 gcc_assert (!gsi_end_p (gsi));
6949 stmt = gsi_stmt (gsi);
6950 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6951 continue;
6953 if (gimple_num_ops (stmt) == 2)
6955 tree arg = gimple_assign_rhs1 (stmt);
6957 /* We're ignoring the subcode because we're
6958 effectively doing a STRIP_NOPS. */
6960 if (TREE_CODE (arg) == ADDR_EXPR
6961 && TREE_OPERAND (arg, 0) == sender)
6963 tgtcopy_stmt = stmt;
6964 break;
6969 gcc_assert (tgtcopy_stmt != NULL);
6970 arg = DECL_ARGUMENTS (child_fn);
6972 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6973 gsi_remove (&gsi, true);
6976 /* Declare local variables needed in CHILD_CFUN. */
6977 block = DECL_INITIAL (child_fn);
6978 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
6979 /* The gimplifier could record temporaries in the offloading block
6980 rather than in containing function's local_decls chain,
6981 which would mean cgraph missed finalizing them. Do it now. */
6982 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
6983 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
6984 varpool_node::finalize_decl (t);
6985 DECL_SAVED_TREE (child_fn) = NULL;
6986 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6987 gimple_set_body (child_fn, NULL);
6988 TREE_USED (block) = 1;
6990 /* Reset DECL_CONTEXT on function arguments. */
6991 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
6992 DECL_CONTEXT (t) = child_fn;
6994 /* Split ENTRY_BB at GIMPLE_*,
6995 so that it can be moved to the child function. */
6996 gsi = gsi_last_bb (entry_bb);
6997 stmt = gsi_stmt (gsi);
6998 gcc_assert (stmt
6999 && gimple_code (stmt) == gimple_code (entry_stmt));
7000 e = split_block (entry_bb, stmt);
7001 gsi_remove (&gsi, true);
7002 entry_bb = e->dest;
7003 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7005 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7006 if (exit_bb)
7008 gsi = gsi_last_bb (exit_bb);
7009 gcc_assert (!gsi_end_p (gsi)
7010 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7011 stmt = gimple_build_return (NULL);
7012 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7013 gsi_remove (&gsi, true);
7016 /* Move the offloading region into CHILD_CFUN. */
7018 block = gimple_block (entry_stmt);
7020 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7021 if (exit_bb)
7022 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7023 /* When the OMP expansion process cannot guarantee an up-to-date
7024 loop tree arrange for the child function to fixup loops. */
7025 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7026 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7028 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7029 num = vec_safe_length (child_cfun->local_decls);
7030 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7032 t = (*child_cfun->local_decls)[srcidx];
7033 if (DECL_CONTEXT (t) == cfun->decl)
7034 continue;
7035 if (srcidx != dstidx)
7036 (*child_cfun->local_decls)[dstidx] = t;
7037 dstidx++;
7039 if (dstidx != num)
7040 vec_safe_truncate (child_cfun->local_decls, dstidx);
7042 /* Inform the callgraph about the new function. */
7043 child_cfun->curr_properties = cfun->curr_properties;
7044 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7045 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7046 cgraph_node *node = cgraph_node::get_create (child_fn);
7047 node->parallelized_function = 1;
7048 cgraph_node::add_new_function (child_fn, true);
7050 /* Add the new function to the offload table. */
7051 if (ENABLE_OFFLOADING)
7052 vec_safe_push (offload_funcs, child_fn);
7054 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7055 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7057 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7058 fixed in a following pass. */
7059 push_cfun (child_cfun);
7060 if (need_asm)
7061 assign_assembler_name_if_neeeded (child_fn);
7062 cgraph_edge::rebuild_edges ();
7064 /* Some EH regions might become dead, see PR34608. If
7065 pass_cleanup_cfg isn't the first pass to happen with the
7066 new child, these dead EH edges might cause problems.
7067 Clean them up now. */
7068 if (flag_exceptions)
7070 basic_block bb;
7071 bool changed = false;
7073 FOR_EACH_BB_FN (bb, cfun)
7074 changed |= gimple_purge_dead_eh_edges (bb);
7075 if (changed)
7076 cleanup_tree_cfg ();
7078 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7079 verify_loop_structure ();
7080 pop_cfun ();
7082 if (dump_file && !gimple_in_ssa_p (cfun))
7084 omp_any_child_fn_dumped = true;
7085 dump_function_header (dump_file, child_fn, dump_flags);
7086 dump_function_to_file (child_fn, dump_file, dump_flags);
7090 /* Emit a library call to launch the offloading region, or do data
7091 transfers. */
7092 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7093 enum built_in_function start_ix;
7094 location_t clause_loc;
7095 unsigned int flags_i = 0;
7096 bool oacc_kernels_p = false;
7098 switch (gimple_omp_target_kind (entry_stmt))
7100 case GF_OMP_TARGET_KIND_REGION:
7101 start_ix = BUILT_IN_GOMP_TARGET;
7102 break;
7103 case GF_OMP_TARGET_KIND_DATA:
7104 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7105 break;
7106 case GF_OMP_TARGET_KIND_UPDATE:
7107 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7108 break;
7109 case GF_OMP_TARGET_KIND_ENTER_DATA:
7110 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7111 break;
7112 case GF_OMP_TARGET_KIND_EXIT_DATA:
7113 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7114 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7115 break;
7116 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7117 oacc_kernels_p = true;
7118 /* FALLTHROUGH */
7119 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7120 start_ix = BUILT_IN_GOACC_PARALLEL;
7121 break;
7122 case GF_OMP_TARGET_KIND_OACC_DATA:
7123 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7124 start_ix = BUILT_IN_GOACC_DATA_START;
7125 break;
7126 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7127 start_ix = BUILT_IN_GOACC_UPDATE;
7128 break;
7129 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7130 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7131 break;
7132 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7133 start_ix = BUILT_IN_GOACC_DECLARE;
7134 break;
7135 default:
7136 gcc_unreachable ();
7139 clauses = gimple_omp_target_clauses (entry_stmt);
7141 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7142 library choose) and there is no conditional. */
7143 cond = NULL_TREE;
7144 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7146 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7147 if (c)
7148 cond = OMP_CLAUSE_IF_EXPR (c);
7150 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7151 if (c)
7153 /* Even if we pass it to all library function calls, it is currently only
7154 defined/used for the OpenMP target ones. */
7155 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7156 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7157 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7158 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7160 device = OMP_CLAUSE_DEVICE_ID (c);
7161 clause_loc = OMP_CLAUSE_LOCATION (c);
7163 else
7164 clause_loc = gimple_location (entry_stmt);
7166 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7167 if (c)
7168 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7170 /* Ensure 'device' is of the correct type. */
7171 device = fold_convert_loc (clause_loc, integer_type_node, device);
7173 /* If we found the clause 'if (cond)', build
7174 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7175 if (cond)
7177 cond = gimple_boolify (cond);
7179 basic_block cond_bb, then_bb, else_bb;
7180 edge e;
7181 tree tmp_var;
7183 tmp_var = create_tmp_var (TREE_TYPE (device));
7184 if (offloaded)
7185 e = split_block_after_labels (new_bb);
7186 else
7188 gsi = gsi_last_bb (new_bb);
7189 gsi_prev (&gsi);
7190 e = split_block (new_bb, gsi_stmt (gsi));
7192 cond_bb = e->src;
7193 new_bb = e->dest;
7194 remove_edge (e);
7196 then_bb = create_empty_bb (cond_bb);
7197 else_bb = create_empty_bb (then_bb);
7198 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7199 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7201 stmt = gimple_build_cond_empty (cond);
7202 gsi = gsi_last_bb (cond_bb);
7203 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7205 gsi = gsi_start_bb (then_bb);
7206 stmt = gimple_build_assign (tmp_var, device);
7207 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7209 gsi = gsi_start_bb (else_bb);
7210 stmt = gimple_build_assign (tmp_var,
7211 build_int_cst (integer_type_node,
7212 GOMP_DEVICE_HOST_FALLBACK));
7213 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7215 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7216 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7217 add_bb_to_loop (then_bb, cond_bb->loop_father);
7218 add_bb_to_loop (else_bb, cond_bb->loop_father);
7219 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7220 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7222 device = tmp_var;
7223 gsi = gsi_last_bb (new_bb);
7225 else
7227 gsi = gsi_last_bb (new_bb);
7228 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7229 true, GSI_SAME_STMT);
7232 t = gimple_omp_target_data_arg (entry_stmt);
7233 if (t == NULL)
7235 t1 = size_zero_node;
7236 t2 = build_zero_cst (ptr_type_node);
7237 t3 = t2;
7238 t4 = t2;
7240 else
7242 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7243 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7244 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7245 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7246 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7249 gimple *g;
7250 bool tagging = false;
7251 /* The maximum number used by any start_ix, without varargs. */
7252 auto_vec<tree, 11> args;
7253 args.quick_push (device);
7254 if (offloaded)
7255 args.quick_push (build_fold_addr_expr (child_fn));
7256 args.quick_push (t1);
7257 args.quick_push (t2);
7258 args.quick_push (t3);
7259 args.quick_push (t4);
7260 switch (start_ix)
7262 case BUILT_IN_GOACC_DATA_START:
7263 case BUILT_IN_GOACC_DECLARE:
7264 case BUILT_IN_GOMP_TARGET_DATA:
7265 break;
7266 case BUILT_IN_GOMP_TARGET:
7267 case BUILT_IN_GOMP_TARGET_UPDATE:
7268 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7269 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7270 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7271 if (c)
7272 depend = OMP_CLAUSE_DECL (c);
7273 else
7274 depend = build_int_cst (ptr_type_node, 0);
7275 args.quick_push (depend);
7276 if (start_ix == BUILT_IN_GOMP_TARGET)
7277 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7278 break;
7279 case BUILT_IN_GOACC_PARALLEL:
7281 oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7282 tagging = true;
7284 /* FALLTHRU */
7285 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7286 case BUILT_IN_GOACC_UPDATE:
7288 tree t_async = NULL_TREE;
7290 /* If present, use the value specified by the respective
7291 clause, making sure that is of the correct type. */
7292 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7293 if (c)
7294 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7295 integer_type_node,
7296 OMP_CLAUSE_ASYNC_EXPR (c));
7297 else if (!tagging)
7298 /* Default values for t_async. */
7299 t_async = fold_convert_loc (gimple_location (entry_stmt),
7300 integer_type_node,
7301 build_int_cst (integer_type_node,
7302 GOMP_ASYNC_SYNC));
7303 if (tagging && t_async)
7305 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7307 if (TREE_CODE (t_async) == INTEGER_CST)
7309 /* See if we can pack the async arg in to the tag's
7310 operand. */
7311 i_async = TREE_INT_CST_LOW (t_async);
7312 if (i_async < GOMP_LAUNCH_OP_MAX)
7313 t_async = NULL_TREE;
7314 else
7315 i_async = GOMP_LAUNCH_OP_MAX;
7317 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7318 i_async));
7320 if (t_async)
7321 args.safe_push (t_async);
7323 /* Save the argument index, and ... */
7324 unsigned t_wait_idx = args.length ();
7325 unsigned num_waits = 0;
7326 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7327 if (!tagging || c)
7328 /* ... push a placeholder. */
7329 args.safe_push (integer_zero_node);
7331 for (; c; c = OMP_CLAUSE_CHAIN (c))
7332 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7334 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7335 integer_type_node,
7336 OMP_CLAUSE_WAIT_EXPR (c)));
7337 num_waits++;
7340 if (!tagging || num_waits)
7342 tree len;
7344 /* Now that we know the number, update the placeholder. */
7345 if (tagging)
7346 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7347 else
7348 len = build_int_cst (integer_type_node, num_waits);
7349 len = fold_convert_loc (gimple_location (entry_stmt),
7350 unsigned_type_node, len);
7351 args[t_wait_idx] = len;
7354 break;
7355 default:
7356 gcc_unreachable ();
7358 if (tagging)
7359 /* Push terminal marker - zero. */
7360 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7362 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7363 gimple_set_location (g, gimple_location (entry_stmt));
7364 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7365 if (!offloaded)
7367 g = gsi_stmt (gsi);
7368 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7369 gsi_remove (&gsi, true);
7371 if (data_region && region->exit)
7373 gsi = gsi_last_bb (region->exit);
7374 g = gsi_stmt (gsi);
7375 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7376 gsi_remove (&gsi, true);
7380 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7381 iteration variable derived from the thread number. INTRA_GROUP means this
7382 is an expansion of a loop iterating over work-items within a separate
7383 iteration over groups. */
7385 static void
7386 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7388 gimple_stmt_iterator gsi;
7389 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7390 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7391 == GF_OMP_FOR_KIND_GRID_LOOP);
7392 size_t collapse = gimple_omp_for_collapse (for_stmt);
7393 struct omp_for_data_loop *loops
7394 = XALLOCAVEC (struct omp_for_data_loop,
7395 gimple_omp_for_collapse (for_stmt));
7396 struct omp_for_data fd;
7398 remove_edge (BRANCH_EDGE (kfor->entry));
7399 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7401 gcc_assert (kfor->cont);
7402 omp_extract_for_data (for_stmt, &fd, loops);
7404 gsi = gsi_start_bb (body_bb);
7406 for (size_t dim = 0; dim < collapse; dim++)
7408 tree type, itype;
7409 itype = type = TREE_TYPE (fd.loops[dim].v);
7410 if (POINTER_TYPE_P (type))
7411 itype = signed_type_for (type);
7413 tree n1 = fd.loops[dim].n1;
7414 tree step = fd.loops[dim].step;
7415 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7416 true, NULL_TREE, true, GSI_SAME_STMT);
7417 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7418 true, NULL_TREE, true, GSI_SAME_STMT);
7419 tree threadid;
7420 if (gimple_omp_for_grid_group_iter (for_stmt))
7422 gcc_checking_assert (!intra_group);
7423 threadid = build_call_expr (builtin_decl_explicit
7424 (BUILT_IN_HSA_WORKGROUPID), 1,
7425 build_int_cstu (unsigned_type_node, dim));
7427 else if (intra_group)
7428 threadid = build_call_expr (builtin_decl_explicit
7429 (BUILT_IN_HSA_WORKITEMID), 1,
7430 build_int_cstu (unsigned_type_node, dim));
7431 else
7432 threadid = build_call_expr (builtin_decl_explicit
7433 (BUILT_IN_HSA_WORKITEMABSID), 1,
7434 build_int_cstu (unsigned_type_node, dim));
7435 threadid = fold_convert (itype, threadid);
7436 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7437 true, GSI_SAME_STMT);
7439 tree startvar = fd.loops[dim].v;
7440 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7441 if (POINTER_TYPE_P (type))
7442 t = fold_build_pointer_plus (n1, t);
7443 else
7444 t = fold_build2 (PLUS_EXPR, type, t, n1);
7445 t = fold_convert (type, t);
7446 t = force_gimple_operand_gsi (&gsi, t,
7447 DECL_P (startvar)
7448 && TREE_ADDRESSABLE (startvar),
7449 NULL_TREE, true, GSI_SAME_STMT);
7450 gassign *assign_stmt = gimple_build_assign (startvar, t);
7451 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7453 /* Remove the omp for statement. */
7454 gsi = gsi_last_bb (kfor->entry);
7455 gsi_remove (&gsi, true);
7457 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7458 gsi = gsi_last_bb (kfor->cont);
7459 gcc_assert (!gsi_end_p (gsi)
7460 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7461 gsi_remove (&gsi, true);
7463 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7464 gsi = gsi_last_bb (kfor->exit);
7465 gcc_assert (!gsi_end_p (gsi)
7466 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7467 if (intra_group)
7468 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7469 gsi_remove (&gsi, true);
7471 /* Fixup the much simpler CFG. */
7472 remove_edge (find_edge (kfor->cont, body_bb));
7474 if (kfor->cont != body_bb)
7475 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7476 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7479 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7480 argument_decls. */
7482 struct grid_arg_decl_map
7484 tree old_arg;
7485 tree new_arg;
7488 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7489 pertaining to kernel function. */
7491 static tree
7492 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7494 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7495 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7496 tree t = *tp;
7498 if (t == adm->old_arg)
7499 *tp = adm->new_arg;
7500 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7501 return NULL_TREE;
7504 /* If TARGET region contains a kernel body for loop, remove its region from the
7505 TARGET and expand it in HSA gridified kernel fashion. */
7507 static void
7508 grid_expand_target_grid_body (struct omp_region *target)
7510 if (!hsa_gen_requested_p ())
7511 return;
7513 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7514 struct omp_region **pp;
7516 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7517 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7518 break;
7520 struct omp_region *gpukernel = *pp;
7522 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7523 if (!gpukernel)
7525 /* HSA cannot handle OACC stuff. */
7526 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7527 return;
7528 gcc_checking_assert (orig_child_fndecl);
7529 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7530 OMP_CLAUSE__GRIDDIM_));
7531 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7533 hsa_register_kernel (n);
7534 return;
7537 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7538 OMP_CLAUSE__GRIDDIM_));
7539 tree inside_block
7540 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7541 *pp = gpukernel->next;
7542 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7543 if ((*pp)->type == GIMPLE_OMP_FOR)
7544 break;
7546 struct omp_region *kfor = *pp;
7547 gcc_assert (kfor);
7548 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7549 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7550 *pp = kfor->next;
7551 if (kfor->inner)
7553 if (gimple_omp_for_grid_group_iter (for_stmt))
7555 struct omp_region **next_pp;
7556 for (pp = &kfor->inner; *pp; pp = next_pp)
7558 next_pp = &(*pp)->next;
7559 if ((*pp)->type != GIMPLE_OMP_FOR)
7560 continue;
7561 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7562 gcc_assert (gimple_omp_for_kind (inner)
7563 == GF_OMP_FOR_KIND_GRID_LOOP);
7564 grid_expand_omp_for_loop (*pp, true);
7565 *pp = (*pp)->next;
7566 next_pp = pp;
7569 expand_omp (kfor->inner);
7571 if (gpukernel->inner)
7572 expand_omp (gpukernel->inner);
7574 tree kern_fndecl = copy_node (orig_child_fndecl);
7575 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7576 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7577 tree tgtblock = gimple_block (tgt_stmt);
7578 tree fniniblock = make_node (BLOCK);
7579 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7580 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7581 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7582 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7583 DECL_INITIAL (kern_fndecl) = fniniblock;
7584 push_struct_function (kern_fndecl);
7585 cfun->function_end_locus = gimple_location (tgt_stmt);
7586 init_tree_ssa (cfun);
7587 pop_cfun ();
7589 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7590 gcc_assert (!DECL_CHAIN (old_parm_decl));
7591 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7592 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7593 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7594 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7595 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7596 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7597 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7598 kern_cfun->curr_properties = cfun->curr_properties;
7600 grid_expand_omp_for_loop (kfor, false);
7602 /* Remove the omp for statement. */
7603 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7604 gsi_remove (&gsi, true);
7605 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7606 return. */
7607 gsi = gsi_last_bb (gpukernel->exit);
7608 gcc_assert (!gsi_end_p (gsi)
7609 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7610 gimple *ret_stmt = gimple_build_return (NULL);
7611 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7612 gsi_remove (&gsi, true);
7614 /* Statements in the first BB in the target construct have been produced by
7615 target lowering and must be copied inside the GPUKERNEL, with the two
7616 exceptions of the first OMP statement and the OMP_DATA assignment
7617 statement. */
7618 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7619 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7620 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7621 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7622 !gsi_end_p (tsi); gsi_next (&tsi))
7624 gimple *stmt = gsi_stmt (tsi);
7625 if (is_gimple_omp (stmt))
7626 break;
7627 if (sender
7628 && is_gimple_assign (stmt)
7629 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7630 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7631 continue;
7632 gimple *copy = gimple_copy (stmt);
7633 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7634 gimple_set_block (copy, fniniblock);
7637 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7638 gpukernel->exit, inside_block);
7640 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7641 kcn->mark_force_output ();
7642 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7644 hsa_register_kernel (kcn, orig_child);
7646 cgraph_node::add_new_function (kern_fndecl, true);
7647 push_cfun (kern_cfun);
7648 cgraph_edge::rebuild_edges ();
7650 /* Re-map any mention of the PARM_DECL of the original function to the
7651 PARM_DECL of the new one.
7653 TODO: It would be great if lowering produced references into the GPU
7654 kernel decl straight away and we did not have to do this. */
7655 struct grid_arg_decl_map adm;
7656 adm.old_arg = old_parm_decl;
7657 adm.new_arg = new_parm_decl;
7658 basic_block bb;
7659 FOR_EACH_BB_FN (bb, kern_cfun)
7661 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7663 gimple *stmt = gsi_stmt (gsi);
7664 struct walk_stmt_info wi;
7665 memset (&wi, 0, sizeof (wi));
7666 wi.info = &adm;
7667 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7670 pop_cfun ();
7672 return;
7675 /* Expand the parallel region tree rooted at REGION. Expansion
7676 proceeds in depth-first order. Innermost regions are expanded
7677 first. This way, parallel regions that require a new function to
7678 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7679 internal dependencies in their body. */
7681 static void
7682 expand_omp (struct omp_region *region)
7684 omp_any_child_fn_dumped = false;
7685 while (region)
7687 location_t saved_location;
7688 gimple *inner_stmt = NULL;
7690 /* First, determine whether this is a combined parallel+workshare
7691 region. */
7692 if (region->type == GIMPLE_OMP_PARALLEL)
7693 determine_parallel_type (region);
7694 else if (region->type == GIMPLE_OMP_TARGET)
7695 grid_expand_target_grid_body (region);
7697 if (region->type == GIMPLE_OMP_FOR
7698 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7699 inner_stmt = last_stmt (region->inner->entry);
7701 if (region->inner)
7702 expand_omp (region->inner);
7704 saved_location = input_location;
7705 if (gimple_has_location (last_stmt (region->entry)))
7706 input_location = gimple_location (last_stmt (region->entry));
7708 switch (region->type)
7710 case GIMPLE_OMP_PARALLEL:
7711 case GIMPLE_OMP_TASK:
7712 expand_omp_taskreg (region);
7713 break;
7715 case GIMPLE_OMP_FOR:
7716 expand_omp_for (region, inner_stmt);
7717 break;
7719 case GIMPLE_OMP_SECTIONS:
7720 expand_omp_sections (region);
7721 break;
7723 case GIMPLE_OMP_SECTION:
7724 /* Individual omp sections are handled together with their
7725 parent GIMPLE_OMP_SECTIONS region. */
7726 break;
7728 case GIMPLE_OMP_SINGLE:
7729 expand_omp_single (region);
7730 break;
7732 case GIMPLE_OMP_ORDERED:
7734 gomp_ordered *ord_stmt
7735 = as_a <gomp_ordered *> (last_stmt (region->entry));
7736 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7737 OMP_CLAUSE_DEPEND))
7739 /* We'll expand these when expanding corresponding
7740 worksharing region with ordered(n) clause. */
7741 gcc_assert (region->outer
7742 && region->outer->type == GIMPLE_OMP_FOR);
7743 region->ord_stmt = ord_stmt;
7744 break;
7747 /* FALLTHRU */
7748 case GIMPLE_OMP_MASTER:
7749 case GIMPLE_OMP_TASKGROUP:
7750 case GIMPLE_OMP_CRITICAL:
7751 case GIMPLE_OMP_TEAMS:
7752 expand_omp_synch (region);
7753 break;
7755 case GIMPLE_OMP_ATOMIC_LOAD:
7756 expand_omp_atomic (region);
7757 break;
7759 case GIMPLE_OMP_TARGET:
7760 expand_omp_target (region);
7761 break;
7763 default:
7764 gcc_unreachable ();
7767 input_location = saved_location;
7768 region = region->next;
7770 if (omp_any_child_fn_dumped)
7772 if (dump_file)
7773 dump_function_header (dump_file, current_function_decl, dump_flags);
7774 omp_any_child_fn_dumped = false;
7778 /* Helper for build_omp_regions. Scan the dominator tree starting at
7779 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7780 true, the function ends once a single tree is built (otherwise, whole
7781 forest of OMP constructs may be built). */
7783 static void
7784 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7785 bool single_tree)
7787 gimple_stmt_iterator gsi;
7788 gimple *stmt;
7789 basic_block son;
7791 gsi = gsi_last_bb (bb);
7792 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7794 struct omp_region *region;
7795 enum gimple_code code;
7797 stmt = gsi_stmt (gsi);
7798 code = gimple_code (stmt);
7799 if (code == GIMPLE_OMP_RETURN)
7801 /* STMT is the return point out of region PARENT. Mark it
7802 as the exit point and make PARENT the immediately
7803 enclosing region. */
7804 gcc_assert (parent);
7805 region = parent;
7806 region->exit = bb;
7807 parent = parent->outer;
7809 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7811 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
7812 GIMPLE_OMP_RETURN, but matches with
7813 GIMPLE_OMP_ATOMIC_LOAD. */
7814 gcc_assert (parent);
7815 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7816 region = parent;
7817 region->exit = bb;
7818 parent = parent->outer;
7820 else if (code == GIMPLE_OMP_CONTINUE)
7822 gcc_assert (parent);
7823 parent->cont = bb;
7825 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7827 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7828 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7830 else
7832 region = new_omp_region (bb, code, parent);
7833 /* Otherwise... */
7834 if (code == GIMPLE_OMP_TARGET)
7836 switch (gimple_omp_target_kind (stmt))
7838 case GF_OMP_TARGET_KIND_REGION:
7839 case GF_OMP_TARGET_KIND_DATA:
7840 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7841 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7842 case GF_OMP_TARGET_KIND_OACC_DATA:
7843 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7844 break;
7845 case GF_OMP_TARGET_KIND_UPDATE:
7846 case GF_OMP_TARGET_KIND_ENTER_DATA:
7847 case GF_OMP_TARGET_KIND_EXIT_DATA:
7848 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7849 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7850 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7851 /* ..., other than for those stand-alone directives... */
7852 region = NULL;
7853 break;
7854 default:
7855 gcc_unreachable ();
7858 else if (code == GIMPLE_OMP_ORDERED
7859 && omp_find_clause (gimple_omp_ordered_clauses
7860 (as_a <gomp_ordered *> (stmt)),
7861 OMP_CLAUSE_DEPEND))
7862 /* #pragma omp ordered depend is also just a stand-alone
7863 directive. */
7864 region = NULL;
7865 /* ..., this directive becomes the parent for a new region. */
7866 if (region)
7867 parent = region;
7871 if (single_tree && !parent)
7872 return;
7874 for (son = first_dom_son (CDI_DOMINATORS, bb);
7875 son;
7876 son = next_dom_son (CDI_DOMINATORS, son))
7877 build_omp_regions_1 (son, parent, single_tree);
7880 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7881 root_omp_region. */
7883 static void
7884 build_omp_regions_root (basic_block root)
7886 gcc_assert (root_omp_region == NULL);
7887 build_omp_regions_1 (root, NULL, true);
7888 gcc_assert (root_omp_region != NULL);
7891 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7893 void
7894 omp_expand_local (basic_block head)
7896 build_omp_regions_root (head);
7897 if (dump_file && (dump_flags & TDF_DETAILS))
7899 fprintf (dump_file, "\nOMP region tree\n\n");
7900 dump_omp_region (dump_file, root_omp_region, 0);
7901 fprintf (dump_file, "\n");
7904 remove_exit_barriers (root_omp_region);
7905 expand_omp (root_omp_region);
7907 omp_free_regions ();
7910 /* Scan the CFG and build a tree of OMP regions. Return the root of
7911 the OMP region tree. */
7913 static void
7914 build_omp_regions (void)
7916 gcc_assert (root_omp_region == NULL);
7917 calculate_dominance_info (CDI_DOMINATORS);
7918 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7921 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7923 static unsigned int
7924 execute_expand_omp (void)
7926 build_omp_regions ();
7928 if (!root_omp_region)
7929 return 0;
7931 if (dump_file)
7933 fprintf (dump_file, "\nOMP region tree\n\n");
7934 dump_omp_region (dump_file, root_omp_region, 0);
7935 fprintf (dump_file, "\n");
7938 remove_exit_barriers (root_omp_region);
7940 expand_omp (root_omp_region);
7942 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7943 verify_loop_structure ();
7944 cleanup_tree_cfg ();
7946 omp_free_regions ();
7948 return 0;
7951 /* OMP expansion -- the default pass, run before creation of SSA form. */
7953 namespace {
7955 const pass_data pass_data_expand_omp =
7957 GIMPLE_PASS, /* type */
7958 "ompexp", /* name */
7959 OPTGROUP_OPENMP, /* optinfo_flags */
7960 TV_NONE, /* tv_id */
7961 PROP_gimple_any, /* properties_required */
7962 PROP_gimple_eomp, /* properties_provided */
7963 0, /* properties_destroyed */
7964 0, /* todo_flags_start */
7965 0, /* todo_flags_finish */
7968 class pass_expand_omp : public gimple_opt_pass
7970 public:
7971 pass_expand_omp (gcc::context *ctxt)
7972 : gimple_opt_pass (pass_data_expand_omp, ctxt)
7975 /* opt_pass methods: */
7976 virtual unsigned int execute (function *)
7978 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
7979 || flag_openmp_simd != 0)
7980 && !seen_error ());
7982 /* This pass always runs, to provide PROP_gimple_eomp.
7983 But often, there is nothing to do. */
7984 if (!gate)
7985 return 0;
7987 return execute_expand_omp ();
7990 }; // class pass_expand_omp
7992 } // anon namespace
7994 gimple_opt_pass *
7995 make_pass_expand_omp (gcc::context *ctxt)
7997 return new pass_expand_omp (ctxt);
8000 namespace {
8002 const pass_data pass_data_expand_omp_ssa =
8004 GIMPLE_PASS, /* type */
8005 "ompexpssa", /* name */
8006 OPTGROUP_OPENMP, /* optinfo_flags */
8007 TV_NONE, /* tv_id */
8008 PROP_cfg | PROP_ssa, /* properties_required */
8009 PROP_gimple_eomp, /* properties_provided */
8010 0, /* properties_destroyed */
8011 0, /* todo_flags_start */
8012 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8015 class pass_expand_omp_ssa : public gimple_opt_pass
8017 public:
8018 pass_expand_omp_ssa (gcc::context *ctxt)
8019 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8022 /* opt_pass methods: */
8023 virtual bool gate (function *fun)
8025 return !(fun->curr_properties & PROP_gimple_eomp);
8027 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8028 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8030 }; // class pass_expand_omp_ssa
8032 } // anon namespace
8034 gimple_opt_pass *
8035 make_pass_expand_omp_ssa (gcc::context *ctxt)
8037 return new pass_expand_omp_ssa (ctxt);
8040 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8041 GIMPLE_* codes. */
8043 bool
8044 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8045 int *region_idx)
8047 gimple *last = last_stmt (bb);
8048 enum gimple_code code = gimple_code (last);
8049 struct omp_region *cur_region = *region;
8050 bool fallthru = false;
8052 switch (code)
8054 case GIMPLE_OMP_PARALLEL:
8055 case GIMPLE_OMP_TASK:
8056 case GIMPLE_OMP_FOR:
8057 case GIMPLE_OMP_SINGLE:
8058 case GIMPLE_OMP_TEAMS:
8059 case GIMPLE_OMP_MASTER:
8060 case GIMPLE_OMP_TASKGROUP:
8061 case GIMPLE_OMP_CRITICAL:
8062 case GIMPLE_OMP_SECTION:
8063 case GIMPLE_OMP_GRID_BODY:
8064 cur_region = new_omp_region (bb, code, cur_region);
8065 fallthru = true;
8066 break;
8068 case GIMPLE_OMP_ORDERED:
8069 cur_region = new_omp_region (bb, code, cur_region);
8070 fallthru = true;
8071 if (omp_find_clause (gimple_omp_ordered_clauses
8072 (as_a <gomp_ordered *> (last)),
8073 OMP_CLAUSE_DEPEND))
8074 cur_region = cur_region->outer;
8075 break;
8077 case GIMPLE_OMP_TARGET:
8078 cur_region = new_omp_region (bb, code, cur_region);
8079 fallthru = true;
8080 switch (gimple_omp_target_kind (last))
8082 case GF_OMP_TARGET_KIND_REGION:
8083 case GF_OMP_TARGET_KIND_DATA:
8084 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8085 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8086 case GF_OMP_TARGET_KIND_OACC_DATA:
8087 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8088 break;
8089 case GF_OMP_TARGET_KIND_UPDATE:
8090 case GF_OMP_TARGET_KIND_ENTER_DATA:
8091 case GF_OMP_TARGET_KIND_EXIT_DATA:
8092 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8093 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8094 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8095 cur_region = cur_region->outer;
8096 break;
8097 default:
8098 gcc_unreachable ();
8100 break;
8102 case GIMPLE_OMP_SECTIONS:
8103 cur_region = new_omp_region (bb, code, cur_region);
8104 fallthru = true;
8105 break;
8107 case GIMPLE_OMP_SECTIONS_SWITCH:
8108 fallthru = false;
8109 break;
8111 case GIMPLE_OMP_ATOMIC_LOAD:
8112 case GIMPLE_OMP_ATOMIC_STORE:
8113 fallthru = true;
8114 break;
8116 case GIMPLE_OMP_RETURN:
8117 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8118 somewhere other than the next block. This will be
8119 created later. */
8120 cur_region->exit = bb;
8121 if (cur_region->type == GIMPLE_OMP_TASK)
8122 /* Add an edge corresponding to not scheduling the task
8123 immediately. */
8124 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8125 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8126 cur_region = cur_region->outer;
8127 break;
8129 case GIMPLE_OMP_CONTINUE:
8130 cur_region->cont = bb;
8131 switch (cur_region->type)
8133 case GIMPLE_OMP_FOR:
8134 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8135 succs edges as abnormal to prevent splitting
8136 them. */
8137 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8138 /* Make the loopback edge. */
8139 make_edge (bb, single_succ (cur_region->entry),
8140 EDGE_ABNORMAL);
8142 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8143 corresponds to the case that the body of the loop
8144 is not executed at all. */
8145 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8146 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8147 fallthru = false;
8148 break;
8150 case GIMPLE_OMP_SECTIONS:
8151 /* Wire up the edges into and out of the nested sections. */
8153 basic_block switch_bb = single_succ (cur_region->entry);
8155 struct omp_region *i;
8156 for (i = cur_region->inner; i ; i = i->next)
8158 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8159 make_edge (switch_bb, i->entry, 0);
8160 make_edge (i->exit, bb, EDGE_FALLTHRU);
8163 /* Make the loopback edge to the block with
8164 GIMPLE_OMP_SECTIONS_SWITCH. */
8165 make_edge (bb, switch_bb, 0);
8167 /* Make the edge from the switch to exit. */
8168 make_edge (switch_bb, bb->next_bb, 0);
8169 fallthru = false;
8171 break;
8173 case GIMPLE_OMP_TASK:
8174 fallthru = true;
8175 break;
8177 default:
8178 gcc_unreachable ();
8180 break;
8182 default:
8183 gcc_unreachable ();
8186 if (*region != cur_region)
8188 *region = cur_region;
8189 if (cur_region)
8190 *region_idx = cur_region->entry->index;
8191 else
8192 *region_idx = 0;
8195 return fallthru;
8198 #include "gt-omp-expand.h"