Small ChangeLog tweak.
[official-gcc.git] / gcc / omp-expand.c
blob7a7c7475d3124277b693252ca7feea77774793b9
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "debug.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
67 struct omp_region
69 /* The enclosing region. */
70 struct omp_region *outer;
72 /* First child region. */
73 struct omp_region *inner;
75 /* Next peer region. */
76 struct omp_region *next;
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
117 /* Return true if REGION is a combined parallel+workshare region. */
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
122 return region->is_combined_parallel;
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
139 Is lowered into:
141 # BLOCK 2 (PAR_ENTRY_BB)
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
196 return true;
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205 if (!simd_schedule)
206 return chunk_size;
208 int vf = omp_max_vf ();
209 if (vf == 1)
210 return chunk_size;
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
219 /* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 struct omp_for_data fd;
233 tree n1, n2;
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
239 if (gimple_omp_for_combined_into_p (for_stmt))
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
263 if (fd.chunk_size)
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
270 return ws_args;
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
284 gcc_unreachable ();
287 /* Discover whether REGION is a combined parallel+workshare region. */
289 static void
290 determine_parallel_type (struct omp_region *region)
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
323 if (region->inner->type == GIMPLE_OMP_FOR)
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
353 /* Debugging dumps for parallel regions. */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
358 /* Dump the parallel region tree rooted at REGION. */
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
369 if (region->cont)
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
388 dump_omp_region (stderr, region, 0);
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
394 dump_omp_region (stderr, root_omp_region, 0);
397 /* Create a new parallel region starting at STMT inside region PARENT. */
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
403 struct omp_region *region = XCNEW (struct omp_region);
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
409 if (parent)
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
416 else
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
424 return region;
427 /* Release the memory associated with the region tree rooted at REGION. */
429 static void
430 free_omp_region_1 (struct omp_region *region)
432 struct omp_region *i, *n;
434 for (i = region->inner; i ; i = n)
436 n = i->next;
437 free_omp_region_1 (i);
440 free (region);
443 /* Release the memory for the entire omp region tree. */
445 void
446 omp_free_regions (void)
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
451 n = r->next;
452 free_omp_region_1 (r);
454 root_omp_region = NULL;
457 /* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
460 static gcond *
461 gimple_build_cond_empty (tree cond)
463 enum tree_code pred_code;
464 tree lhs, rhs;
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
470 /* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
498 return false;
501 /* Build the function calls to GOMP_parallel_start etc to actually
502 generate the parallel operation. REGION is the parallel region
503 being expanded. BB is the block where to insert the code. WS_ARGS
504 will be set if this is a call to a combined parallel+workshare
505 construct, it contains the list of additional arguments needed by
506 the workshare construct. */
508 static void
509 expand_parallel_call (struct omp_region *region, basic_block bb,
510 gomp_parallel *entry_stmt,
511 vec<tree, va_gc> *ws_args)
513 tree t, t1, t2, val, cond, c, clauses, flags;
514 gimple_stmt_iterator gsi;
515 gimple *stmt;
516 enum built_in_function start_ix;
517 int start_ix2;
518 location_t clause_loc;
519 vec<tree, va_gc> *args;
521 clauses = gimple_omp_parallel_clauses (entry_stmt);
523 /* Determine what flavor of GOMP_parallel we will be
524 emitting. */
525 start_ix = BUILT_IN_GOMP_PARALLEL;
526 if (is_combined_parallel (region))
528 switch (region->inner->type)
530 case GIMPLE_OMP_FOR:
531 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
532 switch (region->inner->sched_kind)
534 case OMP_CLAUSE_SCHEDULE_RUNTIME:
535 start_ix2 = 3;
536 break;
537 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
538 case OMP_CLAUSE_SCHEDULE_GUIDED:
539 if (region->inner->sched_modifiers
540 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
542 start_ix2 = 3 + region->inner->sched_kind;
543 break;
545 /* FALLTHRU */
546 default:
547 start_ix2 = region->inner->sched_kind;
548 break;
550 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
551 start_ix = (enum built_in_function) start_ix2;
552 break;
553 case GIMPLE_OMP_SECTIONS:
554 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
555 break;
556 default:
557 gcc_unreachable ();
561 /* By default, the value of NUM_THREADS is zero (selected at run time)
562 and there is no conditional. */
563 cond = NULL_TREE;
564 val = build_int_cst (unsigned_type_node, 0);
565 flags = build_int_cst (unsigned_type_node, 0);
567 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
568 if (c)
569 cond = OMP_CLAUSE_IF_EXPR (c);
571 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
572 if (c)
574 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
575 clause_loc = OMP_CLAUSE_LOCATION (c);
577 else
578 clause_loc = gimple_location (entry_stmt);
580 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
581 if (c)
582 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
584 /* Ensure 'val' is of the correct type. */
585 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
587 /* If we found the clause 'if (cond)', build either
588 (cond != 0) or (cond ? val : 1u). */
589 if (cond)
591 cond = gimple_boolify (cond);
593 if (integer_zerop (val))
594 val = fold_build2_loc (clause_loc,
595 EQ_EXPR, unsigned_type_node, cond,
596 build_int_cst (TREE_TYPE (cond), 0));
597 else
599 basic_block cond_bb, then_bb, else_bb;
600 edge e, e_then, e_else;
601 tree tmp_then, tmp_else, tmp_join, tmp_var;
603 tmp_var = create_tmp_var (TREE_TYPE (val));
604 if (gimple_in_ssa_p (cfun))
606 tmp_then = make_ssa_name (tmp_var);
607 tmp_else = make_ssa_name (tmp_var);
608 tmp_join = make_ssa_name (tmp_var);
610 else
612 tmp_then = tmp_var;
613 tmp_else = tmp_var;
614 tmp_join = tmp_var;
617 e = split_block_after_labels (bb);
618 cond_bb = e->src;
619 bb = e->dest;
620 remove_edge (e);
622 then_bb = create_empty_bb (cond_bb);
623 else_bb = create_empty_bb (then_bb);
624 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
625 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
627 stmt = gimple_build_cond_empty (cond);
628 gsi = gsi_start_bb (cond_bb);
629 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
631 gsi = gsi_start_bb (then_bb);
632 expand_omp_build_assign (&gsi, tmp_then, val, true);
634 gsi = gsi_start_bb (else_bb);
635 expand_omp_build_assign (&gsi, tmp_else,
636 build_int_cst (unsigned_type_node, 1),
637 true);
639 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
640 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
641 add_bb_to_loop (then_bb, cond_bb->loop_father);
642 add_bb_to_loop (else_bb, cond_bb->loop_father);
643 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
644 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
646 if (gimple_in_ssa_p (cfun))
648 gphi *phi = create_phi_node (tmp_join, bb);
649 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
650 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
653 val = tmp_join;
656 gsi = gsi_start_bb (bb);
657 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
658 false, GSI_CONTINUE_LINKING);
661 gsi = gsi_last_bb (bb);
662 t = gimple_omp_parallel_data_arg (entry_stmt);
663 if (t == NULL)
664 t1 = null_pointer_node;
665 else
666 t1 = build_fold_addr_expr (t);
667 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
668 t2 = build_fold_addr_expr (child_fndecl);
670 vec_alloc (args, 4 + vec_safe_length (ws_args));
671 args->quick_push (t2);
672 args->quick_push (t1);
673 args->quick_push (val);
674 if (ws_args)
675 args->splice (*ws_args);
676 args->quick_push (flags);
678 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
679 builtin_decl_explicit (start_ix), args);
681 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
682 false, GSI_CONTINUE_LINKING);
684 if (hsa_gen_requested_p ()
685 && parallel_needs_hsa_kernel_p (region))
687 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
688 hsa_register_kernel (child_cnode);
692 /* Insert a function call whose name is FUNC_NAME with the information from
693 ENTRY_STMT into the basic_block BB. */
695 static void
696 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
697 vec <tree, va_gc> *ws_args)
699 tree t, t1, t2;
700 gimple_stmt_iterator gsi;
701 vec <tree, va_gc> *args;
703 gcc_assert (vec_safe_length (ws_args) == 2);
704 tree func_name = (*ws_args)[0];
705 tree grain = (*ws_args)[1];
707 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
708 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
709 gcc_assert (count != NULL_TREE);
710 count = OMP_CLAUSE_OPERAND (count, 0);
712 gsi = gsi_last_bb (bb);
713 t = gimple_omp_parallel_data_arg (entry_stmt);
714 if (t == NULL)
715 t1 = null_pointer_node;
716 else
717 t1 = build_fold_addr_expr (t);
718 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
720 vec_alloc (args, 4);
721 args->quick_push (t2);
722 args->quick_push (t1);
723 args->quick_push (count);
724 args->quick_push (grain);
725 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
727 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
728 GSI_CONTINUE_LINKING);
731 /* Build the function call to GOMP_task to actually
732 generate the task operation. BB is the block where to insert the code. */
734 static void
735 expand_task_call (struct omp_region *region, basic_block bb,
736 gomp_task *entry_stmt)
738 tree t1, t2, t3;
739 gimple_stmt_iterator gsi;
740 location_t loc = gimple_location (entry_stmt);
742 tree clauses = gimple_omp_task_clauses (entry_stmt);
744 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
745 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
746 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
747 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
748 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
749 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
751 unsigned int iflags
752 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
753 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
754 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
756 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
757 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
758 tree num_tasks = NULL_TREE;
759 bool ull = false;
760 if (taskloop_p)
762 gimple *g = last_stmt (region->outer->entry);
763 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
764 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
765 struct omp_for_data fd;
766 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
767 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
768 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
769 OMP_CLAUSE__LOOPTEMP_);
770 startvar = OMP_CLAUSE_DECL (startvar);
771 endvar = OMP_CLAUSE_DECL (endvar);
772 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
773 if (fd.loop.cond_code == LT_EXPR)
774 iflags |= GOMP_TASK_FLAG_UP;
775 tree tclauses = gimple_omp_for_clauses (g);
776 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
777 if (num_tasks)
778 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
779 else
781 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
782 if (num_tasks)
784 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
785 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
787 else
788 num_tasks = integer_zero_node;
790 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
791 if (ifc == NULL_TREE)
792 iflags |= GOMP_TASK_FLAG_IF;
793 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
794 iflags |= GOMP_TASK_FLAG_NOGROUP;
795 ull = fd.iter_type == long_long_unsigned_type_node;
797 else if (priority)
798 iflags |= GOMP_TASK_FLAG_PRIORITY;
800 tree flags = build_int_cst (unsigned_type_node, iflags);
802 tree cond = boolean_true_node;
803 if (ifc)
805 if (taskloop_p)
807 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
808 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
809 build_int_cst (unsigned_type_node,
810 GOMP_TASK_FLAG_IF),
811 build_int_cst (unsigned_type_node, 0));
812 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
813 flags, t);
815 else
816 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
819 if (finalc)
821 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
822 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
823 build_int_cst (unsigned_type_node,
824 GOMP_TASK_FLAG_FINAL),
825 build_int_cst (unsigned_type_node, 0));
826 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
828 if (depend)
829 depend = OMP_CLAUSE_DECL (depend);
830 else
831 depend = build_int_cst (ptr_type_node, 0);
832 if (priority)
833 priority = fold_convert (integer_type_node,
834 OMP_CLAUSE_PRIORITY_EXPR (priority));
835 else
836 priority = integer_zero_node;
838 gsi = gsi_last_bb (bb);
839 tree t = gimple_omp_task_data_arg (entry_stmt);
840 if (t == NULL)
841 t2 = null_pointer_node;
842 else
843 t2 = build_fold_addr_expr_loc (loc, t);
844 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
845 t = gimple_omp_task_copy_fn (entry_stmt);
846 if (t == NULL)
847 t3 = null_pointer_node;
848 else
849 t3 = build_fold_addr_expr_loc (loc, t);
851 if (taskloop_p)
852 t = build_call_expr (ull
853 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
854 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
855 11, t1, t2, t3,
856 gimple_omp_task_arg_size (entry_stmt),
857 gimple_omp_task_arg_align (entry_stmt), flags,
858 num_tasks, priority, startvar, endvar, step);
859 else
860 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
861 9, t1, t2, t3,
862 gimple_omp_task_arg_size (entry_stmt),
863 gimple_omp_task_arg_align (entry_stmt), cond, flags,
864 depend, priority);
866 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
867 false, GSI_CONTINUE_LINKING);
870 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
872 static tree
873 vec2chain (vec<tree, va_gc> *v)
875 tree chain = NULL_TREE, t;
876 unsigned ix;
878 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
880 DECL_CHAIN (t) = chain;
881 chain = t;
884 return chain;
887 /* Remove barriers in REGION->EXIT's block. Note that this is only
888 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
889 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
890 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
891 removed. */
893 static void
894 remove_exit_barrier (struct omp_region *region)
896 gimple_stmt_iterator gsi;
897 basic_block exit_bb;
898 edge_iterator ei;
899 edge e;
900 gimple *stmt;
901 int any_addressable_vars = -1;
903 exit_bb = region->exit;
905 /* If the parallel region doesn't return, we don't have REGION->EXIT
906 block at all. */
907 if (! exit_bb)
908 return;
910 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
911 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
912 statements that can appear in between are extremely limited -- no
913 memory operations at all. Here, we allow nothing at all, so the
914 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
915 gsi = gsi_last_bb (exit_bb);
916 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
917 gsi_prev (&gsi);
918 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
919 return;
921 FOR_EACH_EDGE (e, ei, exit_bb->preds)
923 gsi = gsi_last_bb (e->src);
924 if (gsi_end_p (gsi))
925 continue;
926 stmt = gsi_stmt (gsi);
927 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
928 && !gimple_omp_return_nowait_p (stmt))
930 /* OpenMP 3.0 tasks unfortunately prevent this optimization
931 in many cases. If there could be tasks queued, the barrier
932 might be needed to let the tasks run before some local
933 variable of the parallel that the task uses as shared
934 runs out of scope. The task can be spawned either
935 from within current function (this would be easy to check)
936 or from some function it calls and gets passed an address
937 of such a variable. */
938 if (any_addressable_vars < 0)
940 gomp_parallel *parallel_stmt
941 = as_a <gomp_parallel *> (last_stmt (region->entry));
942 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
943 tree local_decls, block, decl;
944 unsigned ix;
946 any_addressable_vars = 0;
947 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
948 if (TREE_ADDRESSABLE (decl))
950 any_addressable_vars = 1;
951 break;
953 for (block = gimple_block (stmt);
954 !any_addressable_vars
955 && block
956 && TREE_CODE (block) == BLOCK;
957 block = BLOCK_SUPERCONTEXT (block))
959 for (local_decls = BLOCK_VARS (block);
960 local_decls;
961 local_decls = DECL_CHAIN (local_decls))
962 if (TREE_ADDRESSABLE (local_decls))
964 any_addressable_vars = 1;
965 break;
967 if (block == gimple_block (parallel_stmt))
968 break;
971 if (!any_addressable_vars)
972 gimple_omp_return_set_nowait (stmt);
977 static void
978 remove_exit_barriers (struct omp_region *region)
980 if (region->type == GIMPLE_OMP_PARALLEL)
981 remove_exit_barrier (region);
983 if (region->inner)
985 region = region->inner;
986 remove_exit_barriers (region);
987 while (region->next)
989 region = region->next;
990 remove_exit_barriers (region);
995 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
996 calls. These can't be declared as const functions, but
997 within one parallel body they are constant, so they can be
998 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
999 which are declared const. Similarly for task body, except
1000 that in untied task omp_get_thread_num () can change at any task
1001 scheduling point. */
1003 static void
1004 optimize_omp_library_calls (gimple *entry_stmt)
1006 basic_block bb;
1007 gimple_stmt_iterator gsi;
1008 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1009 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1010 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1011 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1012 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1013 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1014 OMP_CLAUSE_UNTIED) != NULL);
1016 FOR_EACH_BB_FN (bb, cfun)
1017 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1019 gimple *call = gsi_stmt (gsi);
1020 tree decl;
1022 if (is_gimple_call (call)
1023 && (decl = gimple_call_fndecl (call))
1024 && DECL_EXTERNAL (decl)
1025 && TREE_PUBLIC (decl)
1026 && DECL_INITIAL (decl) == NULL)
1028 tree built_in;
1030 if (DECL_NAME (decl) == thr_num_id)
1032 /* In #pragma omp task untied omp_get_thread_num () can change
1033 during the execution of the task region. */
1034 if (untied_task)
1035 continue;
1036 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1038 else if (DECL_NAME (decl) == num_thr_id)
1039 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1040 else
1041 continue;
1043 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1044 || gimple_call_num_args (call) != 0)
1045 continue;
1047 if (flag_exceptions && !TREE_NOTHROW (decl))
1048 continue;
1050 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1051 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1052 TREE_TYPE (TREE_TYPE (built_in))))
1053 continue;
1055 gimple_call_set_fndecl (call, built_in);
1060 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1061 regimplified. */
1063 static tree
1064 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1066 tree t = *tp;
1068 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1069 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1070 return t;
1072 if (TREE_CODE (t) == ADDR_EXPR)
1073 recompute_tree_invariant_for_addr_expr (t);
1075 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1076 return NULL_TREE;
1079 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1081 static void
1082 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1083 bool after)
1085 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1086 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1087 !after, after ? GSI_CONTINUE_LINKING
1088 : GSI_SAME_STMT);
1089 gimple *stmt = gimple_build_assign (to, from);
1090 if (after)
1091 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1092 else
1093 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1094 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1095 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1097 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1098 gimple_regimplify_operands (stmt, &gsi);
1102 /* Expand the OpenMP parallel or task directive starting at REGION. */
1104 static void
1105 expand_omp_taskreg (struct omp_region *region)
1107 basic_block entry_bb, exit_bb, new_bb;
1108 struct function *child_cfun;
1109 tree child_fn, block, t;
1110 gimple_stmt_iterator gsi;
1111 gimple *entry_stmt, *stmt;
1112 edge e;
1113 vec<tree, va_gc> *ws_args;
1115 entry_stmt = last_stmt (region->entry);
1116 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1117 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1119 entry_bb = region->entry;
1120 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1121 exit_bb = region->cont;
1122 else
1123 exit_bb = region->exit;
1125 bool is_cilk_for
1126 = (flag_cilkplus
1127 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1128 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1129 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1131 if (is_cilk_for)
1132 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1133 and the inner statement contains the name of the built-in function
1134 and grain. */
1135 ws_args = region->inner->ws_args;
1136 else if (is_combined_parallel (region))
1137 ws_args = region->ws_args;
1138 else
1139 ws_args = NULL;
1141 if (child_cfun->cfg)
1143 /* Due to inlining, it may happen that we have already outlined
1144 the region, in which case all we need to do is make the
1145 sub-graph unreachable and emit the parallel call. */
1146 edge entry_succ_e, exit_succ_e;
1148 entry_succ_e = single_succ_edge (entry_bb);
1150 gsi = gsi_last_bb (entry_bb);
1151 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1152 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1153 gsi_remove (&gsi, true);
1155 new_bb = entry_bb;
1156 if (exit_bb)
1158 exit_succ_e = single_succ_edge (exit_bb);
1159 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1161 remove_edge_and_dominated_blocks (entry_succ_e);
1163 else
1165 unsigned srcidx, dstidx, num;
1167 /* If the parallel region needs data sent from the parent
1168 function, then the very first statement (except possible
1169 tree profile counter updates) of the parallel body
1170 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1171 &.OMP_DATA_O is passed as an argument to the child function,
1172 we need to replace it with the argument as seen by the child
1173 function.
1175 In most cases, this will end up being the identity assignment
1176 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1177 a function call that has been inlined, the original PARM_DECL
1178 .OMP_DATA_I may have been converted into a different local
1179 variable. In which case, we need to keep the assignment. */
1180 if (gimple_omp_taskreg_data_arg (entry_stmt))
1182 basic_block entry_succ_bb
1183 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1184 : FALLTHRU_EDGE (entry_bb)->dest;
1185 tree arg;
1186 gimple *parcopy_stmt = NULL;
1188 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1190 gimple *stmt;
1192 gcc_assert (!gsi_end_p (gsi));
1193 stmt = gsi_stmt (gsi);
1194 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1195 continue;
1197 if (gimple_num_ops (stmt) == 2)
1199 tree arg = gimple_assign_rhs1 (stmt);
1201 /* We're ignore the subcode because we're
1202 effectively doing a STRIP_NOPS. */
1204 if (TREE_CODE (arg) == ADDR_EXPR
1205 && TREE_OPERAND (arg, 0)
1206 == gimple_omp_taskreg_data_arg (entry_stmt))
1208 parcopy_stmt = stmt;
1209 break;
1214 gcc_assert (parcopy_stmt != NULL);
1215 arg = DECL_ARGUMENTS (child_fn);
1217 if (!gimple_in_ssa_p (cfun))
1219 if (gimple_assign_lhs (parcopy_stmt) == arg)
1220 gsi_remove (&gsi, true);
1221 else
1223 /* ?? Is setting the subcode really necessary ?? */
1224 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1225 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1228 else
1230 tree lhs = gimple_assign_lhs (parcopy_stmt);
1231 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1232 /* We'd like to set the rhs to the default def in the child_fn,
1233 but it's too early to create ssa names in the child_fn.
1234 Instead, we set the rhs to the parm. In
1235 move_sese_region_to_fn, we introduce a default def for the
1236 parm, map the parm to it's default def, and once we encounter
1237 this stmt, replace the parm with the default def. */
1238 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1239 update_stmt (parcopy_stmt);
1243 /* Declare local variables needed in CHILD_CFUN. */
1244 block = DECL_INITIAL (child_fn);
1245 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1246 /* The gimplifier could record temporaries in parallel/task block
1247 rather than in containing function's local_decls chain,
1248 which would mean cgraph missed finalizing them. Do it now. */
1249 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1250 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1251 varpool_node::finalize_decl (t);
1252 DECL_SAVED_TREE (child_fn) = NULL;
1253 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1254 gimple_set_body (child_fn, NULL);
1255 TREE_USED (block) = 1;
1257 /* Reset DECL_CONTEXT on function arguments. */
1258 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1259 DECL_CONTEXT (t) = child_fn;
1261 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1262 so that it can be moved to the child function. */
1263 gsi = gsi_last_bb (entry_bb);
1264 stmt = gsi_stmt (gsi);
1265 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1266 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1267 e = split_block (entry_bb, stmt);
1268 gsi_remove (&gsi, true);
1269 entry_bb = e->dest;
1270 edge e2 = NULL;
1271 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1272 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1273 else
1275 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1276 gcc_assert (e2->dest == region->exit);
1277 remove_edge (BRANCH_EDGE (entry_bb));
1278 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1279 gsi = gsi_last_bb (region->exit);
1280 gcc_assert (!gsi_end_p (gsi)
1281 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1282 gsi_remove (&gsi, true);
1285 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1286 if (exit_bb)
1288 gsi = gsi_last_bb (exit_bb);
1289 gcc_assert (!gsi_end_p (gsi)
1290 && (gimple_code (gsi_stmt (gsi))
1291 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1292 stmt = gimple_build_return (NULL);
1293 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1294 gsi_remove (&gsi, true);
1297 /* Move the parallel region into CHILD_CFUN. */
1299 if (gimple_in_ssa_p (cfun))
1301 init_tree_ssa (child_cfun);
1302 init_ssa_operands (child_cfun);
1303 child_cfun->gimple_df->in_ssa_p = true;
1304 block = NULL_TREE;
1306 else
1307 block = gimple_block (entry_stmt);
1309 /* Make sure to generate early debug for the function before
1310 outlining anything. */
1311 if (! gimple_in_ssa_p (cfun))
1312 (*debug_hooks->early_global_decl) (cfun->decl);
1314 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1315 if (exit_bb)
1316 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1317 if (e2)
1319 basic_block dest_bb = e2->dest;
1320 if (!exit_bb)
1321 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1322 remove_edge (e2);
1323 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1325 /* When the OMP expansion process cannot guarantee an up-to-date
1326 loop tree arrange for the child function to fixup loops. */
1327 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1328 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1330 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1331 num = vec_safe_length (child_cfun->local_decls);
1332 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1334 t = (*child_cfun->local_decls)[srcidx];
1335 if (DECL_CONTEXT (t) == cfun->decl)
1336 continue;
1337 if (srcidx != dstidx)
1338 (*child_cfun->local_decls)[dstidx] = t;
1339 dstidx++;
1341 if (dstidx != num)
1342 vec_safe_truncate (child_cfun->local_decls, dstidx);
1344 /* Inform the callgraph about the new function. */
1345 child_cfun->curr_properties = cfun->curr_properties;
1346 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1347 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1348 cgraph_node *node = cgraph_node::get_create (child_fn);
1349 node->parallelized_function = 1;
1350 cgraph_node::add_new_function (child_fn, true);
1352 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1353 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1355 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1356 fixed in a following pass. */
1357 push_cfun (child_cfun);
1358 if (need_asm)
1359 assign_assembler_name_if_needed (child_fn);
1361 if (optimize)
1362 optimize_omp_library_calls (entry_stmt);
1363 cgraph_edge::rebuild_edges ();
1365 /* Some EH regions might become dead, see PR34608. If
1366 pass_cleanup_cfg isn't the first pass to happen with the
1367 new child, these dead EH edges might cause problems.
1368 Clean them up now. */
1369 if (flag_exceptions)
1371 basic_block bb;
1372 bool changed = false;
1374 FOR_EACH_BB_FN (bb, cfun)
1375 changed |= gimple_purge_dead_eh_edges (bb);
1376 if (changed)
1377 cleanup_tree_cfg ();
1379 if (gimple_in_ssa_p (cfun))
1380 update_ssa (TODO_update_ssa);
1381 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1382 verify_loop_structure ();
1383 pop_cfun ();
1385 if (dump_file && !gimple_in_ssa_p (cfun))
1387 omp_any_child_fn_dumped = true;
1388 dump_function_header (dump_file, child_fn, dump_flags);
1389 dump_function_to_file (child_fn, dump_file, dump_flags);
1393 /* Emit a library call to launch the children threads. */
1394 if (is_cilk_for)
1395 expand_cilk_for_call (new_bb,
1396 as_a <gomp_parallel *> (entry_stmt), ws_args);
1397 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1398 expand_parallel_call (region, new_bb,
1399 as_a <gomp_parallel *> (entry_stmt), ws_args);
1400 else
1401 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1402 if (gimple_in_ssa_p (cfun))
1403 update_ssa (TODO_update_ssa_only_virtuals);
1406 /* Information about members of an OpenACC collapsed loop nest. */
1408 struct oacc_collapse
1410 tree base; /* Base value. */
1411 tree iters; /* Number of steps. */
1412 tree step; /* Step size. */
1413 tree tile; /* Tile increment (if tiled). */
1414 tree outer; /* Tile iterator var. */
1417 /* Helper for expand_oacc_for. Determine collapsed loop information.
1418 Fill in COUNTS array. Emit any initialization code before GSI.
1419 Return the calculated outer loop bound of BOUND_TYPE. */
1421 static tree
1422 expand_oacc_collapse_init (const struct omp_for_data *fd,
1423 gimple_stmt_iterator *gsi,
1424 oacc_collapse *counts, tree bound_type,
1425 location_t loc)
1427 tree tiling = fd->tiling;
1428 tree total = build_int_cst (bound_type, 1);
1429 int ix;
1431 gcc_assert (integer_onep (fd->loop.step));
1432 gcc_assert (integer_zerop (fd->loop.n1));
1434 /* When tiling, the first operand of the tile clause applies to the
1435 innermost loop, and we work outwards from there. Seems
1436 backwards, but whatever. */
1437 for (ix = fd->collapse; ix--;)
1439 const omp_for_data_loop *loop = &fd->loops[ix];
1441 tree iter_type = TREE_TYPE (loop->v);
1442 tree diff_type = iter_type;
1443 tree plus_type = iter_type;
1445 gcc_assert (loop->cond_code == fd->loop.cond_code);
1447 if (POINTER_TYPE_P (iter_type))
1448 plus_type = sizetype;
1449 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1450 diff_type = signed_type_for (diff_type);
1452 if (tiling)
1454 tree num = build_int_cst (integer_type_node, fd->collapse);
1455 tree loop_no = build_int_cst (integer_type_node, ix);
1456 tree tile = TREE_VALUE (tiling);
1457 gcall *call
1458 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1459 /* gwv-outer=*/integer_zero_node,
1460 /* gwv-inner=*/integer_zero_node);
1462 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1463 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1464 gimple_call_set_lhs (call, counts[ix].tile);
1465 gimple_set_location (call, loc);
1466 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1468 tiling = TREE_CHAIN (tiling);
1470 else
1472 counts[ix].tile = NULL;
1473 counts[ix].outer = loop->v;
1476 tree b = loop->n1;
1477 tree e = loop->n2;
1478 tree s = loop->step;
1479 bool up = loop->cond_code == LT_EXPR;
1480 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1481 bool negating;
1482 tree expr;
1484 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1485 true, GSI_SAME_STMT);
1486 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1487 true, GSI_SAME_STMT);
1489 /* Convert the step, avoiding possible unsigned->signed overflow. */
1490 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1491 if (negating)
1492 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1493 s = fold_convert (diff_type, s);
1494 if (negating)
1495 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1496 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1497 true, GSI_SAME_STMT);
1499 /* Determine the range, avoiding possible unsigned->signed overflow. */
1500 negating = !up && TYPE_UNSIGNED (iter_type);
1501 expr = fold_build2 (MINUS_EXPR, plus_type,
1502 fold_convert (plus_type, negating ? b : e),
1503 fold_convert (plus_type, negating ? e : b));
1504 expr = fold_convert (diff_type, expr);
1505 if (negating)
1506 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1507 tree range = force_gimple_operand_gsi
1508 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1510 /* Determine number of iterations. */
1511 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1512 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1513 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1515 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1516 true, GSI_SAME_STMT);
1518 counts[ix].base = b;
1519 counts[ix].iters = iters;
1520 counts[ix].step = s;
1522 total = fold_build2 (MULT_EXPR, bound_type, total,
1523 fold_convert (bound_type, iters));
1526 return total;
1529 /* Emit initializers for collapsed loop members. INNER is true if
1530 this is for the element loop of a TILE. IVAR is the outer
1531 loop iteration variable, from which collapsed loop iteration values
1532 are calculated. COUNTS array has been initialized by
1533 expand_oacc_collapse_inits. */
1535 static void
1536 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1537 gimple_stmt_iterator *gsi,
1538 const oacc_collapse *counts, tree ivar)
1540 tree ivar_type = TREE_TYPE (ivar);
1542 /* The most rapidly changing iteration variable is the innermost
1543 one. */
1544 for (int ix = fd->collapse; ix--;)
1546 const omp_for_data_loop *loop = &fd->loops[ix];
1547 const oacc_collapse *collapse = &counts[ix];
1548 tree v = inner ? loop->v : collapse->outer;
1549 tree iter_type = TREE_TYPE (v);
1550 tree diff_type = TREE_TYPE (collapse->step);
1551 tree plus_type = iter_type;
1552 enum tree_code plus_code = PLUS_EXPR;
1553 tree expr;
1555 if (POINTER_TYPE_P (iter_type))
1557 plus_code = POINTER_PLUS_EXPR;
1558 plus_type = sizetype;
1561 expr = ivar;
1562 if (ix)
1564 tree mod = fold_convert (ivar_type, collapse->iters);
1565 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1566 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1567 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1568 true, GSI_SAME_STMT);
1571 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1572 collapse->step);
1573 expr = fold_build2 (plus_code, iter_type,
1574 inner ? collapse->outer : collapse->base,
1575 fold_convert (plus_type, expr));
1576 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1577 true, GSI_SAME_STMT);
1578 gassign *ass = gimple_build_assign (v, expr);
1579 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1583 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1584 of the combined collapse > 1 loop constructs, generate code like:
1585 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1586 if (cond3 is <)
1587 adj = STEP3 - 1;
1588 else
1589 adj = STEP3 + 1;
1590 count3 = (adj + N32 - N31) / STEP3;
1591 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1592 if (cond2 is <)
1593 adj = STEP2 - 1;
1594 else
1595 adj = STEP2 + 1;
1596 count2 = (adj + N22 - N21) / STEP2;
1597 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1598 if (cond1 is <)
1599 adj = STEP1 - 1;
1600 else
1601 adj = STEP1 + 1;
1602 count1 = (adj + N12 - N11) / STEP1;
1603 count = count1 * count2 * count3;
1604 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1605 count = 0;
1606 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1607 of the combined loop constructs, just initialize COUNTS array
1608 from the _looptemp_ clauses. */
1610 /* NOTE: It *could* be better to moosh all of the BBs together,
1611 creating one larger BB with all the computation and the unexpected
1612 jump at the end. I.e.
1614 bool zero3, zero2, zero1, zero;
1616 zero3 = N32 c3 N31;
1617 count3 = (N32 - N31) /[cl] STEP3;
1618 zero2 = N22 c2 N21;
1619 count2 = (N22 - N21) /[cl] STEP2;
1620 zero1 = N12 c1 N11;
1621 count1 = (N12 - N11) /[cl] STEP1;
1622 zero = zero3 || zero2 || zero1;
1623 count = count1 * count2 * count3;
1624 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1626 After all, we expect the zero=false, and thus we expect to have to
1627 evaluate all of the comparison expressions, so short-circuiting
1628 oughtn't be a win. Since the condition isn't protecting a
1629 denominator, we're not concerned about divide-by-zero, so we can
1630 fully evaluate count even if a numerator turned out to be wrong.
1632 It seems like putting this all together would create much better
1633 scheduling opportunities, and less pressure on the chip's branch
1634 predictor. */
1636 static void
1637 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1638 basic_block &entry_bb, tree *counts,
1639 basic_block &zero_iter1_bb, int &first_zero_iter1,
1640 basic_block &zero_iter2_bb, int &first_zero_iter2,
1641 basic_block &l2_dom_bb)
1643 tree t, type = TREE_TYPE (fd->loop.v);
1644 edge e, ne;
1645 int i;
1647 /* Collapsed loops need work for expansion into SSA form. */
1648 gcc_assert (!gimple_in_ssa_p (cfun));
1650 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1651 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1653 gcc_assert (fd->ordered == 0);
1654 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1655 isn't supposed to be handled, as the inner loop doesn't
1656 use it. */
1657 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1658 OMP_CLAUSE__LOOPTEMP_);
1659 gcc_assert (innerc);
1660 for (i = 0; i < fd->collapse; i++)
1662 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1663 OMP_CLAUSE__LOOPTEMP_);
1664 gcc_assert (innerc);
1665 if (i)
1666 counts[i] = OMP_CLAUSE_DECL (innerc);
1667 else
1668 counts[0] = NULL_TREE;
1670 return;
1673 for (i = fd->collapse; i < fd->ordered; i++)
1675 tree itype = TREE_TYPE (fd->loops[i].v);
1676 counts[i] = NULL_TREE;
1677 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1678 fold_convert (itype, fd->loops[i].n1),
1679 fold_convert (itype, fd->loops[i].n2));
1680 if (t && integer_zerop (t))
1682 for (i = fd->collapse; i < fd->ordered; i++)
1683 counts[i] = build_int_cst (type, 0);
1684 break;
1687 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1689 tree itype = TREE_TYPE (fd->loops[i].v);
1691 if (i >= fd->collapse && counts[i])
1692 continue;
1693 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1694 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1695 fold_convert (itype, fd->loops[i].n1),
1696 fold_convert (itype, fd->loops[i].n2)))
1697 == NULL_TREE || !integer_onep (t)))
1699 gcond *cond_stmt;
1700 tree n1, n2;
1701 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1702 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1703 true, GSI_SAME_STMT);
1704 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1705 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1706 true, GSI_SAME_STMT);
1707 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1708 NULL_TREE, NULL_TREE);
1709 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1710 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1711 expand_omp_regimplify_p, NULL, NULL)
1712 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1713 expand_omp_regimplify_p, NULL, NULL))
1715 *gsi = gsi_for_stmt (cond_stmt);
1716 gimple_regimplify_operands (cond_stmt, gsi);
1718 e = split_block (entry_bb, cond_stmt);
1719 basic_block &zero_iter_bb
1720 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1721 int &first_zero_iter
1722 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1723 if (zero_iter_bb == NULL)
1725 gassign *assign_stmt;
1726 first_zero_iter = i;
1727 zero_iter_bb = create_empty_bb (entry_bb);
1728 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1729 *gsi = gsi_after_labels (zero_iter_bb);
1730 if (i < fd->collapse)
1731 assign_stmt = gimple_build_assign (fd->loop.n2,
1732 build_zero_cst (type));
1733 else
1735 counts[i] = create_tmp_reg (type, ".count");
1736 assign_stmt
1737 = gimple_build_assign (counts[i], build_zero_cst (type));
1739 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1740 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1741 entry_bb);
1743 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1744 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1745 e->flags = EDGE_TRUE_VALUE;
1746 e->probability = REG_BR_PROB_BASE - ne->probability;
1747 if (l2_dom_bb == NULL)
1748 l2_dom_bb = entry_bb;
1749 entry_bb = e->dest;
1750 *gsi = gsi_last_bb (entry_bb);
1753 if (POINTER_TYPE_P (itype))
1754 itype = signed_type_for (itype);
1755 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1756 ? -1 : 1));
1757 t = fold_build2 (PLUS_EXPR, itype,
1758 fold_convert (itype, fd->loops[i].step), t);
1759 t = fold_build2 (PLUS_EXPR, itype, t,
1760 fold_convert (itype, fd->loops[i].n2));
1761 t = fold_build2 (MINUS_EXPR, itype, t,
1762 fold_convert (itype, fd->loops[i].n1));
1763 /* ?? We could probably use CEIL_DIV_EXPR instead of
1764 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1765 generate the same code in the end because generically we
1766 don't know that the values involved must be negative for
1767 GT?? */
1768 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1769 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1770 fold_build1 (NEGATE_EXPR, itype, t),
1771 fold_build1 (NEGATE_EXPR, itype,
1772 fold_convert (itype,
1773 fd->loops[i].step)));
1774 else
1775 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1776 fold_convert (itype, fd->loops[i].step));
1777 t = fold_convert (type, t);
1778 if (TREE_CODE (t) == INTEGER_CST)
1779 counts[i] = t;
1780 else
1782 if (i < fd->collapse || i != first_zero_iter2)
1783 counts[i] = create_tmp_reg (type, ".count");
1784 expand_omp_build_assign (gsi, counts[i], t);
1786 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1788 if (i == 0)
1789 t = counts[0];
1790 else
1791 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1792 expand_omp_build_assign (gsi, fd->loop.n2, t);
1797 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1798 T = V;
1799 V3 = N31 + (T % count3) * STEP3;
1800 T = T / count3;
1801 V2 = N21 + (T % count2) * STEP2;
1802 T = T / count2;
1803 V1 = N11 + T * STEP1;
1804 if this loop doesn't have an inner loop construct combined with it.
1805 If it does have an inner loop construct combined with it and the
1806 iteration count isn't known constant, store values from counts array
1807 into its _looptemp_ temporaries instead. */
1809 static void
1810 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1811 tree *counts, gimple *inner_stmt, tree startvar)
1813 int i;
1814 if (gimple_omp_for_combined_p (fd->for_stmt))
1816 /* If fd->loop.n2 is constant, then no propagation of the counts
1817 is needed, they are constant. */
1818 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1819 return;
1821 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1822 ? gimple_omp_taskreg_clauses (inner_stmt)
1823 : gimple_omp_for_clauses (inner_stmt);
1824 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1825 isn't supposed to be handled, as the inner loop doesn't
1826 use it. */
1827 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1828 gcc_assert (innerc);
1829 for (i = 0; i < fd->collapse; i++)
1831 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1832 OMP_CLAUSE__LOOPTEMP_);
1833 gcc_assert (innerc);
1834 if (i)
1836 tree tem = OMP_CLAUSE_DECL (innerc);
1837 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1838 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1839 false, GSI_CONTINUE_LINKING);
1840 gassign *stmt = gimple_build_assign (tem, t);
1841 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1844 return;
1847 tree type = TREE_TYPE (fd->loop.v);
1848 tree tem = create_tmp_reg (type, ".tem");
1849 gassign *stmt = gimple_build_assign (tem, startvar);
1850 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1852 for (i = fd->collapse - 1; i >= 0; i--)
1854 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1855 itype = vtype;
1856 if (POINTER_TYPE_P (vtype))
1857 itype = signed_type_for (vtype);
1858 if (i != 0)
1859 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1860 else
1861 t = tem;
1862 t = fold_convert (itype, t);
1863 t = fold_build2 (MULT_EXPR, itype, t,
1864 fold_convert (itype, fd->loops[i].step));
1865 if (POINTER_TYPE_P (vtype))
1866 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1867 else
1868 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1869 t = force_gimple_operand_gsi (gsi, t,
1870 DECL_P (fd->loops[i].v)
1871 && TREE_ADDRESSABLE (fd->loops[i].v),
1872 NULL_TREE, false,
1873 GSI_CONTINUE_LINKING);
1874 stmt = gimple_build_assign (fd->loops[i].v, t);
1875 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1876 if (i != 0)
1878 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1879 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1880 false, GSI_CONTINUE_LINKING);
1881 stmt = gimple_build_assign (tem, t);
1882 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1887 /* Helper function for expand_omp_for_*. Generate code like:
1888 L10:
1889 V3 += STEP3;
1890 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1891 L11:
1892 V3 = N31;
1893 V2 += STEP2;
1894 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1895 L12:
1896 V2 = N21;
1897 V1 += STEP1;
1898 goto BODY_BB; */
1900 static basic_block
1901 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1902 basic_block body_bb)
1904 basic_block last_bb, bb, collapse_bb = NULL;
1905 int i;
1906 gimple_stmt_iterator gsi;
1907 edge e;
1908 tree t;
1909 gimple *stmt;
1911 last_bb = cont_bb;
1912 for (i = fd->collapse - 1; i >= 0; i--)
1914 tree vtype = TREE_TYPE (fd->loops[i].v);
1916 bb = create_empty_bb (last_bb);
1917 add_bb_to_loop (bb, last_bb->loop_father);
1918 gsi = gsi_start_bb (bb);
1920 if (i < fd->collapse - 1)
1922 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1923 e->probability = REG_BR_PROB_BASE / 8;
1925 t = fd->loops[i + 1].n1;
1926 t = force_gimple_operand_gsi (&gsi, t,
1927 DECL_P (fd->loops[i + 1].v)
1928 && TREE_ADDRESSABLE (fd->loops[i
1929 + 1].v),
1930 NULL_TREE, false,
1931 GSI_CONTINUE_LINKING);
1932 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1933 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1935 else
1936 collapse_bb = bb;
1938 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1940 if (POINTER_TYPE_P (vtype))
1941 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1942 else
1943 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1944 t = force_gimple_operand_gsi (&gsi, t,
1945 DECL_P (fd->loops[i].v)
1946 && TREE_ADDRESSABLE (fd->loops[i].v),
1947 NULL_TREE, false, GSI_CONTINUE_LINKING);
1948 stmt = gimple_build_assign (fd->loops[i].v, t);
1949 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1951 if (i > 0)
1953 t = fd->loops[i].n2;
1954 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1955 false, GSI_CONTINUE_LINKING);
1956 tree v = fd->loops[i].v;
1957 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1958 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1959 false, GSI_CONTINUE_LINKING);
1960 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1961 stmt = gimple_build_cond_empty (t);
1962 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1963 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1964 e->probability = REG_BR_PROB_BASE * 7 / 8;
1966 else
1967 make_edge (bb, body_bb, EDGE_FALLTHRU);
1968 last_bb = bb;
1971 return collapse_bb;
1974 /* Expand #pragma omp ordered depend(source). */
1976 static void
1977 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1978 tree *counts, location_t loc)
1980 enum built_in_function source_ix
1981 = fd->iter_type == long_integer_type_node
1982 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1983 gimple *g
1984 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1985 build_fold_addr_expr (counts[fd->ordered]));
1986 gimple_set_location (g, loc);
1987 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1990 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1992 static void
1993 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1994 tree *counts, tree c, location_t loc)
1996 auto_vec<tree, 10> args;
1997 enum built_in_function sink_ix
1998 = fd->iter_type == long_integer_type_node
1999 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2000 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2001 int i;
2002 gimple_stmt_iterator gsi2 = *gsi;
2003 bool warned_step = false;
2005 for (i = 0; i < fd->ordered; i++)
2007 tree step = NULL_TREE;
2008 off = TREE_PURPOSE (deps);
2009 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2011 step = TREE_OPERAND (off, 1);
2012 off = TREE_OPERAND (off, 0);
2014 if (!integer_zerop (off))
2016 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2017 || fd->loops[i].cond_code == GT_EXPR);
2018 bool forward = fd->loops[i].cond_code == LT_EXPR;
2019 if (step)
2021 /* Non-simple Fortran DO loops. If step is variable,
2022 we don't know at compile even the direction, so can't
2023 warn. */
2024 if (TREE_CODE (step) != INTEGER_CST)
2025 break;
2026 forward = tree_int_cst_sgn (step) != -1;
2028 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2029 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2030 "lexically later iteration");
2031 break;
2033 deps = TREE_CHAIN (deps);
2035 /* If all offsets corresponding to the collapsed loops are zero,
2036 this depend clause can be ignored. FIXME: but there is still a
2037 flush needed. We need to emit one __sync_synchronize () for it
2038 though (perhaps conditionally)? Solve this together with the
2039 conservative dependence folding optimization.
2040 if (i >= fd->collapse)
2041 return; */
2043 deps = OMP_CLAUSE_DECL (c);
2044 gsi_prev (&gsi2);
2045 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2046 edge e2 = split_block_after_labels (e1->dest);
2048 gsi2 = gsi_after_labels (e1->dest);
2049 *gsi = gsi_last_bb (e1->src);
2050 for (i = 0; i < fd->ordered; i++)
2052 tree itype = TREE_TYPE (fd->loops[i].v);
2053 tree step = NULL_TREE;
2054 tree orig_off = NULL_TREE;
2055 if (POINTER_TYPE_P (itype))
2056 itype = sizetype;
2057 if (i)
2058 deps = TREE_CHAIN (deps);
2059 off = TREE_PURPOSE (deps);
2060 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2062 step = TREE_OPERAND (off, 1);
2063 off = TREE_OPERAND (off, 0);
2064 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2065 && integer_onep (fd->loops[i].step)
2066 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2068 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2069 if (step)
2071 off = fold_convert_loc (loc, itype, off);
2072 orig_off = off;
2073 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2076 if (integer_zerop (off))
2077 t = boolean_true_node;
2078 else
2080 tree a;
2081 tree co = fold_convert_loc (loc, itype, off);
2082 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2084 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2085 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2086 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2087 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2088 co);
2090 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2091 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2092 fd->loops[i].v, co);
2093 else
2094 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2095 fd->loops[i].v, co);
2096 if (step)
2098 tree t1, t2;
2099 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2100 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2101 fd->loops[i].n1);
2102 else
2103 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2104 fd->loops[i].n2);
2105 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2106 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2107 fd->loops[i].n2);
2108 else
2109 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2110 fd->loops[i].n1);
2111 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2112 step, build_int_cst (TREE_TYPE (step), 0));
2113 if (TREE_CODE (step) != INTEGER_CST)
2115 t1 = unshare_expr (t1);
2116 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2117 false, GSI_CONTINUE_LINKING);
2118 t2 = unshare_expr (t2);
2119 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2120 false, GSI_CONTINUE_LINKING);
2122 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2123 t, t2, t1);
2125 else if (fd->loops[i].cond_code == LT_EXPR)
2127 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2128 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2129 fd->loops[i].n1);
2130 else
2131 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2132 fd->loops[i].n2);
2134 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2135 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2136 fd->loops[i].n2);
2137 else
2138 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2139 fd->loops[i].n1);
2141 if (cond)
2142 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2143 else
2144 cond = t;
2146 off = fold_convert_loc (loc, itype, off);
2148 if (step
2149 || (fd->loops[i].cond_code == LT_EXPR
2150 ? !integer_onep (fd->loops[i].step)
2151 : !integer_minus_onep (fd->loops[i].step)))
2153 if (step == NULL_TREE
2154 && TYPE_UNSIGNED (itype)
2155 && fd->loops[i].cond_code == GT_EXPR)
2156 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2157 fold_build1_loc (loc, NEGATE_EXPR, itype,
2158 s));
2159 else
2160 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2161 orig_off ? orig_off : off, s);
2162 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2163 build_int_cst (itype, 0));
2164 if (integer_zerop (t) && !warned_step)
2166 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2167 "in the iteration space");
2168 warned_step = true;
2170 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2171 cond, t);
2174 if (i <= fd->collapse - 1 && fd->collapse > 1)
2175 t = fd->loop.v;
2176 else if (counts[i])
2177 t = counts[i];
2178 else
2180 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2181 fd->loops[i].v, fd->loops[i].n1);
2182 t = fold_convert_loc (loc, fd->iter_type, t);
2184 if (step)
2185 /* We have divided off by step already earlier. */;
2186 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2187 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2188 fold_build1_loc (loc, NEGATE_EXPR, itype,
2189 s));
2190 else
2191 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2192 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2193 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2194 off = fold_convert_loc (loc, fd->iter_type, off);
2195 if (i <= fd->collapse - 1 && fd->collapse > 1)
2197 if (i)
2198 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2199 off);
2200 if (i < fd->collapse - 1)
2202 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2203 counts[i]);
2204 continue;
2207 off = unshare_expr (off);
2208 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2209 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2210 true, GSI_SAME_STMT);
2211 args.safe_push (t);
2213 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2214 gimple_set_location (g, loc);
2215 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2217 cond = unshare_expr (cond);
2218 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2219 GSI_CONTINUE_LINKING);
2220 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2221 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2222 e3->probability = REG_BR_PROB_BASE / 8;
2223 e1->probability = REG_BR_PROB_BASE - e3->probability;
2224 e1->flags = EDGE_TRUE_VALUE;
2225 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2227 *gsi = gsi_after_labels (e2->dest);
2230 /* Expand all #pragma omp ordered depend(source) and
2231 #pragma omp ordered depend(sink:...) constructs in the current
2232 #pragma omp for ordered(n) region. */
2234 static void
2235 expand_omp_ordered_source_sink (struct omp_region *region,
2236 struct omp_for_data *fd, tree *counts,
2237 basic_block cont_bb)
2239 struct omp_region *inner;
2240 int i;
2241 for (i = fd->collapse - 1; i < fd->ordered; i++)
2242 if (i == fd->collapse - 1 && fd->collapse > 1)
2243 counts[i] = NULL_TREE;
2244 else if (i >= fd->collapse && !cont_bb)
2245 counts[i] = build_zero_cst (fd->iter_type);
2246 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2247 && integer_onep (fd->loops[i].step))
2248 counts[i] = NULL_TREE;
2249 else
2250 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2251 tree atype
2252 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2253 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2254 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2256 for (inner = region->inner; inner; inner = inner->next)
2257 if (inner->type == GIMPLE_OMP_ORDERED)
2259 gomp_ordered *ord_stmt = inner->ord_stmt;
2260 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2261 location_t loc = gimple_location (ord_stmt);
2262 tree c;
2263 for (c = gimple_omp_ordered_clauses (ord_stmt);
2264 c; c = OMP_CLAUSE_CHAIN (c))
2265 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2266 break;
2267 if (c)
2268 expand_omp_ordered_source (&gsi, fd, counts, loc);
2269 for (c = gimple_omp_ordered_clauses (ord_stmt);
2270 c; c = OMP_CLAUSE_CHAIN (c))
2271 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2272 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2273 gsi_remove (&gsi, true);
2277 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2278 collapsed. */
2280 static basic_block
2281 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2282 basic_block cont_bb, basic_block body_bb,
2283 bool ordered_lastprivate)
2285 if (fd->ordered == fd->collapse)
2286 return cont_bb;
2288 if (!cont_bb)
2290 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2291 for (int i = fd->collapse; i < fd->ordered; i++)
2293 tree type = TREE_TYPE (fd->loops[i].v);
2294 tree n1 = fold_convert (type, fd->loops[i].n1);
2295 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2296 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2297 size_int (i - fd->collapse + 1),
2298 NULL_TREE, NULL_TREE);
2299 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2301 return NULL;
2304 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2306 tree t, type = TREE_TYPE (fd->loops[i].v);
2307 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2308 expand_omp_build_assign (&gsi, fd->loops[i].v,
2309 fold_convert (type, fd->loops[i].n1));
2310 if (counts[i])
2311 expand_omp_build_assign (&gsi, counts[i],
2312 build_zero_cst (fd->iter_type));
2313 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2314 size_int (i - fd->collapse + 1),
2315 NULL_TREE, NULL_TREE);
2316 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2317 if (!gsi_end_p (gsi))
2318 gsi_prev (&gsi);
2319 else
2320 gsi = gsi_last_bb (body_bb);
2321 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2322 basic_block new_body = e1->dest;
2323 if (body_bb == cont_bb)
2324 cont_bb = new_body;
2325 edge e2 = NULL;
2326 basic_block new_header;
2327 if (EDGE_COUNT (cont_bb->preds) > 0)
2329 gsi = gsi_last_bb (cont_bb);
2330 if (POINTER_TYPE_P (type))
2331 t = fold_build_pointer_plus (fd->loops[i].v,
2332 fold_convert (sizetype,
2333 fd->loops[i].step));
2334 else
2335 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2336 fold_convert (type, fd->loops[i].step));
2337 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2338 if (counts[i])
2340 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2341 build_int_cst (fd->iter_type, 1));
2342 expand_omp_build_assign (&gsi, counts[i], t);
2343 t = counts[i];
2345 else
2347 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2348 fd->loops[i].v, fd->loops[i].n1);
2349 t = fold_convert (fd->iter_type, t);
2350 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2351 true, GSI_SAME_STMT);
2353 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2354 size_int (i - fd->collapse + 1),
2355 NULL_TREE, NULL_TREE);
2356 expand_omp_build_assign (&gsi, aref, t);
2357 gsi_prev (&gsi);
2358 e2 = split_block (cont_bb, gsi_stmt (gsi));
2359 new_header = e2->dest;
2361 else
2362 new_header = cont_bb;
2363 gsi = gsi_after_labels (new_header);
2364 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2365 true, GSI_SAME_STMT);
2366 tree n2
2367 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2368 true, NULL_TREE, true, GSI_SAME_STMT);
2369 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2370 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2371 edge e3 = split_block (new_header, gsi_stmt (gsi));
2372 cont_bb = e3->dest;
2373 remove_edge (e1);
2374 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2375 e3->flags = EDGE_FALSE_VALUE;
2376 e3->probability = REG_BR_PROB_BASE / 8;
2377 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2378 e1->probability = REG_BR_PROB_BASE - e3->probability;
2380 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2381 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2383 if (e2)
2385 struct loop *loop = alloc_loop ();
2386 loop->header = new_header;
2387 loop->latch = e2->src;
2388 add_loop (loop, body_bb->loop_father);
2392 /* If there are any lastprivate clauses and it is possible some loops
2393 might have zero iterations, ensure all the decls are initialized,
2394 otherwise we could crash evaluating C++ class iterators with lastprivate
2395 clauses. */
2396 bool need_inits = false;
2397 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2398 if (need_inits)
2400 tree type = TREE_TYPE (fd->loops[i].v);
2401 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2402 expand_omp_build_assign (&gsi, fd->loops[i].v,
2403 fold_convert (type, fd->loops[i].n1));
2405 else
2407 tree type = TREE_TYPE (fd->loops[i].v);
2408 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2409 boolean_type_node,
2410 fold_convert (type, fd->loops[i].n1),
2411 fold_convert (type, fd->loops[i].n2));
2412 if (!integer_onep (this_cond))
2413 need_inits = true;
2416 return cont_bb;
2419 /* A subroutine of expand_omp_for. Generate code for a parallel
2420 loop with any schedule. Given parameters:
2422 for (V = N1; V cond N2; V += STEP) BODY;
2424 where COND is "<" or ">", we generate pseudocode
2426 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2427 if (more) goto L0; else goto L3;
2429 V = istart0;
2430 iend = iend0;
2432 BODY;
2433 V += STEP;
2434 if (V cond iend) goto L1; else goto L2;
2436 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2439 If this is a combined omp parallel loop, instead of the call to
2440 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2441 If this is gimple_omp_for_combined_p loop, then instead of assigning
2442 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2443 inner GIMPLE_OMP_FOR and V += STEP; and
2444 if (V cond iend) goto L1; else goto L2; are removed.
2446 For collapsed loops, given parameters:
2447 collapse(3)
2448 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2449 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2450 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2451 BODY;
2453 we generate pseudocode
2455 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2456 if (cond3 is <)
2457 adj = STEP3 - 1;
2458 else
2459 adj = STEP3 + 1;
2460 count3 = (adj + N32 - N31) / STEP3;
2461 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2462 if (cond2 is <)
2463 adj = STEP2 - 1;
2464 else
2465 adj = STEP2 + 1;
2466 count2 = (adj + N22 - N21) / STEP2;
2467 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2468 if (cond1 is <)
2469 adj = STEP1 - 1;
2470 else
2471 adj = STEP1 + 1;
2472 count1 = (adj + N12 - N11) / STEP1;
2473 count = count1 * count2 * count3;
2474 goto Z1;
2476 count = 0;
2478 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2479 if (more) goto L0; else goto L3;
2481 V = istart0;
2482 T = V;
2483 V3 = N31 + (T % count3) * STEP3;
2484 T = T / count3;
2485 V2 = N21 + (T % count2) * STEP2;
2486 T = T / count2;
2487 V1 = N11 + T * STEP1;
2488 iend = iend0;
2490 BODY;
2491 V += 1;
2492 if (V < iend) goto L10; else goto L2;
2493 L10:
2494 V3 += STEP3;
2495 if (V3 cond3 N32) goto L1; else goto L11;
2496 L11:
2497 V3 = N31;
2498 V2 += STEP2;
2499 if (V2 cond2 N22) goto L1; else goto L12;
2500 L12:
2501 V2 = N21;
2502 V1 += STEP1;
2503 goto L1;
2505 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2510 static void
2511 expand_omp_for_generic (struct omp_region *region,
2512 struct omp_for_data *fd,
2513 enum built_in_function start_fn,
2514 enum built_in_function next_fn,
2515 gimple *inner_stmt)
2517 tree type, istart0, iend0, iend;
2518 tree t, vmain, vback, bias = NULL_TREE;
2519 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2520 basic_block l2_bb = NULL, l3_bb = NULL;
2521 gimple_stmt_iterator gsi;
2522 gassign *assign_stmt;
2523 bool in_combined_parallel = is_combined_parallel (region);
2524 bool broken_loop = region->cont == NULL;
2525 edge e, ne;
2526 tree *counts = NULL;
2527 int i;
2528 bool ordered_lastprivate = false;
2530 gcc_assert (!broken_loop || !in_combined_parallel);
2531 gcc_assert (fd->iter_type == long_integer_type_node
2532 || !in_combined_parallel);
2534 entry_bb = region->entry;
2535 cont_bb = region->cont;
2536 collapse_bb = NULL;
2537 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2538 gcc_assert (broken_loop
2539 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2540 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2541 l1_bb = single_succ (l0_bb);
2542 if (!broken_loop)
2544 l2_bb = create_empty_bb (cont_bb);
2545 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2546 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2547 == l1_bb));
2548 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2550 else
2551 l2_bb = NULL;
2552 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2553 exit_bb = region->exit;
2555 gsi = gsi_last_bb (entry_bb);
2557 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2558 if (fd->ordered
2559 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2560 OMP_CLAUSE_LASTPRIVATE))
2561 ordered_lastprivate = false;
2562 if (fd->collapse > 1 || fd->ordered)
2564 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2565 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2567 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2568 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2569 zero_iter1_bb, first_zero_iter1,
2570 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2572 if (zero_iter1_bb)
2574 /* Some counts[i] vars might be uninitialized if
2575 some loop has zero iterations. But the body shouldn't
2576 be executed in that case, so just avoid uninit warnings. */
2577 for (i = first_zero_iter1;
2578 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2579 if (SSA_VAR_P (counts[i]))
2580 TREE_NO_WARNING (counts[i]) = 1;
2581 gsi_prev (&gsi);
2582 e = split_block (entry_bb, gsi_stmt (gsi));
2583 entry_bb = e->dest;
2584 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2585 gsi = gsi_last_bb (entry_bb);
2586 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2587 get_immediate_dominator (CDI_DOMINATORS,
2588 zero_iter1_bb));
2590 if (zero_iter2_bb)
2592 /* Some counts[i] vars might be uninitialized if
2593 some loop has zero iterations. But the body shouldn't
2594 be executed in that case, so just avoid uninit warnings. */
2595 for (i = first_zero_iter2; i < fd->ordered; i++)
2596 if (SSA_VAR_P (counts[i]))
2597 TREE_NO_WARNING (counts[i]) = 1;
2598 if (zero_iter1_bb)
2599 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2600 else
2602 gsi_prev (&gsi);
2603 e = split_block (entry_bb, gsi_stmt (gsi));
2604 entry_bb = e->dest;
2605 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2606 gsi = gsi_last_bb (entry_bb);
2607 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2608 get_immediate_dominator
2609 (CDI_DOMINATORS, zero_iter2_bb));
2612 if (fd->collapse == 1)
2614 counts[0] = fd->loop.n2;
2615 fd->loop = fd->loops[0];
2619 type = TREE_TYPE (fd->loop.v);
2620 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2621 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2622 TREE_ADDRESSABLE (istart0) = 1;
2623 TREE_ADDRESSABLE (iend0) = 1;
2625 /* See if we need to bias by LLONG_MIN. */
2626 if (fd->iter_type == long_long_unsigned_type_node
2627 && TREE_CODE (type) == INTEGER_TYPE
2628 && !TYPE_UNSIGNED (type)
2629 && fd->ordered == 0)
2631 tree n1, n2;
2633 if (fd->loop.cond_code == LT_EXPR)
2635 n1 = fd->loop.n1;
2636 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2638 else
2640 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2641 n2 = fd->loop.n1;
2643 if (TREE_CODE (n1) != INTEGER_CST
2644 || TREE_CODE (n2) != INTEGER_CST
2645 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2646 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2649 gimple_stmt_iterator gsif = gsi;
2650 gsi_prev (&gsif);
2652 tree arr = NULL_TREE;
2653 if (in_combined_parallel)
2655 gcc_assert (fd->ordered == 0);
2656 /* In a combined parallel loop, emit a call to
2657 GOMP_loop_foo_next. */
2658 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2659 build_fold_addr_expr (istart0),
2660 build_fold_addr_expr (iend0));
2662 else
2664 tree t0, t1, t2, t3, t4;
2665 /* If this is not a combined parallel loop, emit a call to
2666 GOMP_loop_foo_start in ENTRY_BB. */
2667 t4 = build_fold_addr_expr (iend0);
2668 t3 = build_fold_addr_expr (istart0);
2669 if (fd->ordered)
2671 t0 = build_int_cst (unsigned_type_node,
2672 fd->ordered - fd->collapse + 1);
2673 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2674 fd->ordered
2675 - fd->collapse + 1),
2676 ".omp_counts");
2677 DECL_NAMELESS (arr) = 1;
2678 TREE_ADDRESSABLE (arr) = 1;
2679 TREE_STATIC (arr) = 1;
2680 vec<constructor_elt, va_gc> *v;
2681 vec_alloc (v, fd->ordered - fd->collapse + 1);
2682 int idx;
2684 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2686 tree c;
2687 if (idx == 0 && fd->collapse > 1)
2688 c = fd->loop.n2;
2689 else
2690 c = counts[idx + fd->collapse - 1];
2691 tree purpose = size_int (idx);
2692 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2693 if (TREE_CODE (c) != INTEGER_CST)
2694 TREE_STATIC (arr) = 0;
2697 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2698 if (!TREE_STATIC (arr))
2699 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2700 void_type_node, arr),
2701 true, NULL_TREE, true, GSI_SAME_STMT);
2702 t1 = build_fold_addr_expr (arr);
2703 t2 = NULL_TREE;
2705 else
2707 t2 = fold_convert (fd->iter_type, fd->loop.step);
2708 t1 = fd->loop.n2;
2709 t0 = fd->loop.n1;
2710 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2712 tree innerc
2713 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2714 OMP_CLAUSE__LOOPTEMP_);
2715 gcc_assert (innerc);
2716 t0 = OMP_CLAUSE_DECL (innerc);
2717 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2718 OMP_CLAUSE__LOOPTEMP_);
2719 gcc_assert (innerc);
2720 t1 = OMP_CLAUSE_DECL (innerc);
2722 if (POINTER_TYPE_P (TREE_TYPE (t0))
2723 && TYPE_PRECISION (TREE_TYPE (t0))
2724 != TYPE_PRECISION (fd->iter_type))
2726 /* Avoid casting pointers to integer of a different size. */
2727 tree itype = signed_type_for (type);
2728 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2729 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2731 else
2733 t1 = fold_convert (fd->iter_type, t1);
2734 t0 = fold_convert (fd->iter_type, t0);
2736 if (bias)
2738 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2739 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2742 if (fd->iter_type == long_integer_type_node || fd->ordered)
2744 if (fd->chunk_size)
2746 t = fold_convert (fd->iter_type, fd->chunk_size);
2747 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2748 if (fd->ordered)
2749 t = build_call_expr (builtin_decl_explicit (start_fn),
2750 5, t0, t1, t, t3, t4);
2751 else
2752 t = build_call_expr (builtin_decl_explicit (start_fn),
2753 6, t0, t1, t2, t, t3, t4);
2755 else if (fd->ordered)
2756 t = build_call_expr (builtin_decl_explicit (start_fn),
2757 4, t0, t1, t3, t4);
2758 else
2759 t = build_call_expr (builtin_decl_explicit (start_fn),
2760 5, t0, t1, t2, t3, t4);
2762 else
2764 tree t5;
2765 tree c_bool_type;
2766 tree bfn_decl;
2768 /* The GOMP_loop_ull_*start functions have additional boolean
2769 argument, true for < loops and false for > loops.
2770 In Fortran, the C bool type can be different from
2771 boolean_type_node. */
2772 bfn_decl = builtin_decl_explicit (start_fn);
2773 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2774 t5 = build_int_cst (c_bool_type,
2775 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2776 if (fd->chunk_size)
2778 tree bfn_decl = builtin_decl_explicit (start_fn);
2779 t = fold_convert (fd->iter_type, fd->chunk_size);
2780 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2781 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2783 else
2784 t = build_call_expr (builtin_decl_explicit (start_fn),
2785 6, t5, t0, t1, t2, t3, t4);
2788 if (TREE_TYPE (t) != boolean_type_node)
2789 t = fold_build2 (NE_EXPR, boolean_type_node,
2790 t, build_int_cst (TREE_TYPE (t), 0));
2791 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2792 true, GSI_SAME_STMT);
2793 if (arr && !TREE_STATIC (arr))
2795 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2796 TREE_THIS_VOLATILE (clobber) = 1;
2797 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2798 GSI_SAME_STMT);
2800 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2802 /* Remove the GIMPLE_OMP_FOR statement. */
2803 gsi_remove (&gsi, true);
2805 if (gsi_end_p (gsif))
2806 gsif = gsi_after_labels (gsi_bb (gsif));
2807 gsi_next (&gsif);
2809 /* Iteration setup for sequential loop goes in L0_BB. */
2810 tree startvar = fd->loop.v;
2811 tree endvar = NULL_TREE;
2813 if (gimple_omp_for_combined_p (fd->for_stmt))
2815 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2816 && gimple_omp_for_kind (inner_stmt)
2817 == GF_OMP_FOR_KIND_SIMD);
2818 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2819 OMP_CLAUSE__LOOPTEMP_);
2820 gcc_assert (innerc);
2821 startvar = OMP_CLAUSE_DECL (innerc);
2822 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2823 OMP_CLAUSE__LOOPTEMP_);
2824 gcc_assert (innerc);
2825 endvar = OMP_CLAUSE_DECL (innerc);
2828 gsi = gsi_start_bb (l0_bb);
2829 t = istart0;
2830 if (fd->ordered && fd->collapse == 1)
2831 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2832 fold_convert (fd->iter_type, fd->loop.step));
2833 else if (bias)
2834 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2835 if (fd->ordered && fd->collapse == 1)
2837 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2838 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2839 fd->loop.n1, fold_convert (sizetype, t));
2840 else
2842 t = fold_convert (TREE_TYPE (startvar), t);
2843 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2844 fd->loop.n1, t);
2847 else
2849 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2850 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2851 t = fold_convert (TREE_TYPE (startvar), t);
2853 t = force_gimple_operand_gsi (&gsi, t,
2854 DECL_P (startvar)
2855 && TREE_ADDRESSABLE (startvar),
2856 NULL_TREE, false, GSI_CONTINUE_LINKING);
2857 assign_stmt = gimple_build_assign (startvar, t);
2858 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2860 t = iend0;
2861 if (fd->ordered && fd->collapse == 1)
2862 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2863 fold_convert (fd->iter_type, fd->loop.step));
2864 else if (bias)
2865 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2866 if (fd->ordered && fd->collapse == 1)
2868 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2869 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2870 fd->loop.n1, fold_convert (sizetype, t));
2871 else
2873 t = fold_convert (TREE_TYPE (startvar), t);
2874 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2875 fd->loop.n1, t);
2878 else
2880 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2881 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2882 t = fold_convert (TREE_TYPE (startvar), t);
2884 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2885 false, GSI_CONTINUE_LINKING);
2886 if (endvar)
2888 assign_stmt = gimple_build_assign (endvar, iend);
2889 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2890 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2891 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2892 else
2893 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2894 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2896 /* Handle linear clause adjustments. */
2897 tree itercnt = NULL_TREE;
2898 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2899 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2900 c; c = OMP_CLAUSE_CHAIN (c))
2901 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2902 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2904 tree d = OMP_CLAUSE_DECL (c);
2905 bool is_ref = omp_is_reference (d);
2906 tree t = d, a, dest;
2907 if (is_ref)
2908 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2909 tree type = TREE_TYPE (t);
2910 if (POINTER_TYPE_P (type))
2911 type = sizetype;
2912 dest = unshare_expr (t);
2913 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2914 expand_omp_build_assign (&gsif, v, t);
2915 if (itercnt == NULL_TREE)
2917 itercnt = startvar;
2918 tree n1 = fd->loop.n1;
2919 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2921 itercnt
2922 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2923 itercnt);
2924 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2926 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2927 itercnt, n1);
2928 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2929 itercnt, fd->loop.step);
2930 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2931 NULL_TREE, false,
2932 GSI_CONTINUE_LINKING);
2934 a = fold_build2 (MULT_EXPR, type,
2935 fold_convert (type, itercnt),
2936 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2937 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2938 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2939 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2940 false, GSI_CONTINUE_LINKING);
2941 assign_stmt = gimple_build_assign (dest, t);
2942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2944 if (fd->collapse > 1)
2945 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2947 if (fd->ordered)
2949 /* Until now, counts array contained number of iterations or
2950 variable containing it for ith loop. From now on, we need
2951 those counts only for collapsed loops, and only for the 2nd
2952 till the last collapsed one. Move those one element earlier,
2953 we'll use counts[fd->collapse - 1] for the first source/sink
2954 iteration counter and so on and counts[fd->ordered]
2955 as the array holding the current counter values for
2956 depend(source). */
2957 if (fd->collapse > 1)
2958 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2959 if (broken_loop)
2961 int i;
2962 for (i = fd->collapse; i < fd->ordered; i++)
2964 tree type = TREE_TYPE (fd->loops[i].v);
2965 tree this_cond
2966 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2967 fold_convert (type, fd->loops[i].n1),
2968 fold_convert (type, fd->loops[i].n2));
2969 if (!integer_onep (this_cond))
2970 break;
2972 if (i < fd->ordered)
2974 cont_bb
2975 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2976 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2977 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2978 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2979 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2980 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2981 make_edge (cont_bb, l1_bb, 0);
2982 l2_bb = create_empty_bb (cont_bb);
2983 broken_loop = false;
2986 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2987 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2988 ordered_lastprivate);
2989 if (counts[fd->collapse - 1])
2991 gcc_assert (fd->collapse == 1);
2992 gsi = gsi_last_bb (l0_bb);
2993 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2994 istart0, true);
2995 gsi = gsi_last_bb (cont_bb);
2996 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2997 build_int_cst (fd->iter_type, 1));
2998 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2999 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3000 size_zero_node, NULL_TREE, NULL_TREE);
3001 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3002 t = counts[fd->collapse - 1];
3004 else if (fd->collapse > 1)
3005 t = fd->loop.v;
3006 else
3008 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3009 fd->loops[0].v, fd->loops[0].n1);
3010 t = fold_convert (fd->iter_type, t);
3012 gsi = gsi_last_bb (l0_bb);
3013 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3014 size_zero_node, NULL_TREE, NULL_TREE);
3015 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3016 false, GSI_CONTINUE_LINKING);
3017 expand_omp_build_assign (&gsi, aref, t, true);
3020 if (!broken_loop)
3022 /* Code to control the increment and predicate for the sequential
3023 loop goes in the CONT_BB. */
3024 gsi = gsi_last_bb (cont_bb);
3025 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3026 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3027 vmain = gimple_omp_continue_control_use (cont_stmt);
3028 vback = gimple_omp_continue_control_def (cont_stmt);
3030 if (!gimple_omp_for_combined_p (fd->for_stmt))
3032 if (POINTER_TYPE_P (type))
3033 t = fold_build_pointer_plus (vmain, fd->loop.step);
3034 else
3035 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3036 t = force_gimple_operand_gsi (&gsi, t,
3037 DECL_P (vback)
3038 && TREE_ADDRESSABLE (vback),
3039 NULL_TREE, true, GSI_SAME_STMT);
3040 assign_stmt = gimple_build_assign (vback, t);
3041 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3043 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3045 if (fd->collapse > 1)
3046 t = fd->loop.v;
3047 else
3049 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3050 fd->loops[0].v, fd->loops[0].n1);
3051 t = fold_convert (fd->iter_type, t);
3053 tree aref = build4 (ARRAY_REF, fd->iter_type,
3054 counts[fd->ordered], size_zero_node,
3055 NULL_TREE, NULL_TREE);
3056 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3057 true, GSI_SAME_STMT);
3058 expand_omp_build_assign (&gsi, aref, t);
3061 t = build2 (fd->loop.cond_code, boolean_type_node,
3062 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3063 iend);
3064 gcond *cond_stmt = gimple_build_cond_empty (t);
3065 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3068 /* Remove GIMPLE_OMP_CONTINUE. */
3069 gsi_remove (&gsi, true);
3071 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3072 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3074 /* Emit code to get the next parallel iteration in L2_BB. */
3075 gsi = gsi_start_bb (l2_bb);
3077 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3078 build_fold_addr_expr (istart0),
3079 build_fold_addr_expr (iend0));
3080 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3081 false, GSI_CONTINUE_LINKING);
3082 if (TREE_TYPE (t) != boolean_type_node)
3083 t = fold_build2 (NE_EXPR, boolean_type_node,
3084 t, build_int_cst (TREE_TYPE (t), 0));
3085 gcond *cond_stmt = gimple_build_cond_empty (t);
3086 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3089 /* Add the loop cleanup function. */
3090 gsi = gsi_last_bb (exit_bb);
3091 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3092 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3093 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3094 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3095 else
3096 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3097 gcall *call_stmt = gimple_build_call (t, 0);
3098 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3099 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3100 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3101 if (fd->ordered)
3103 tree arr = counts[fd->ordered];
3104 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3105 TREE_THIS_VOLATILE (clobber) = 1;
3106 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3107 GSI_SAME_STMT);
3109 gsi_remove (&gsi, true);
3111 /* Connect the new blocks. */
3112 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3113 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3115 if (!broken_loop)
3117 gimple_seq phis;
3119 e = find_edge (cont_bb, l3_bb);
3120 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3122 phis = phi_nodes (l3_bb);
3123 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3125 gimple *phi = gsi_stmt (gsi);
3126 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3127 PHI_ARG_DEF_FROM_EDGE (phi, e));
3129 remove_edge (e);
3131 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3132 e = find_edge (cont_bb, l1_bb);
3133 if (e == NULL)
3135 e = BRANCH_EDGE (cont_bb);
3136 gcc_assert (single_succ (e->dest) == l1_bb);
3138 if (gimple_omp_for_combined_p (fd->for_stmt))
3140 remove_edge (e);
3141 e = NULL;
3143 else if (fd->collapse > 1)
3145 remove_edge (e);
3146 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3148 else
3149 e->flags = EDGE_TRUE_VALUE;
3150 if (e)
3152 e->probability = REG_BR_PROB_BASE * 7 / 8;
3153 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3155 else
3157 e = find_edge (cont_bb, l2_bb);
3158 e->flags = EDGE_FALLTHRU;
3160 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3162 if (gimple_in_ssa_p (cfun))
3164 /* Add phis to the outer loop that connect to the phis in the inner,
3165 original loop, and move the loop entry value of the inner phi to
3166 the loop entry value of the outer phi. */
3167 gphi_iterator psi;
3168 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3170 source_location locus;
3171 gphi *nphi;
3172 gphi *exit_phi = psi.phi ();
3174 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3175 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3177 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3178 edge latch_to_l1 = find_edge (latch, l1_bb);
3179 gphi *inner_phi
3180 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3182 tree t = gimple_phi_result (exit_phi);
3183 tree new_res = copy_ssa_name (t, NULL);
3184 nphi = create_phi_node (new_res, l0_bb);
3186 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3187 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3188 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3189 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3190 add_phi_arg (nphi, t, entry_to_l0, locus);
3192 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3193 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3195 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3199 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3200 recompute_dominator (CDI_DOMINATORS, l2_bb));
3201 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3202 recompute_dominator (CDI_DOMINATORS, l3_bb));
3203 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3204 recompute_dominator (CDI_DOMINATORS, l0_bb));
3205 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3206 recompute_dominator (CDI_DOMINATORS, l1_bb));
3208 /* We enter expand_omp_for_generic with a loop. This original loop may
3209 have its own loop struct, or it may be part of an outer loop struct
3210 (which may be the fake loop). */
3211 struct loop *outer_loop = entry_bb->loop_father;
3212 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3214 add_bb_to_loop (l2_bb, outer_loop);
3216 /* We've added a new loop around the original loop. Allocate the
3217 corresponding loop struct. */
3218 struct loop *new_loop = alloc_loop ();
3219 new_loop->header = l0_bb;
3220 new_loop->latch = l2_bb;
3221 add_loop (new_loop, outer_loop);
3223 /* Allocate a loop structure for the original loop unless we already
3224 had one. */
3225 if (!orig_loop_has_loop_struct
3226 && !gimple_omp_for_combined_p (fd->for_stmt))
3228 struct loop *orig_loop = alloc_loop ();
3229 orig_loop->header = l1_bb;
3230 /* The loop may have multiple latches. */
3231 add_loop (orig_loop, new_loop);
3236 /* A subroutine of expand_omp_for. Generate code for a parallel
3237 loop with static schedule and no specified chunk size. Given
3238 parameters:
3240 for (V = N1; V cond N2; V += STEP) BODY;
3242 where COND is "<" or ">", we generate pseudocode
3244 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3245 if (cond is <)
3246 adj = STEP - 1;
3247 else
3248 adj = STEP + 1;
3249 if ((__typeof (V)) -1 > 0 && cond is >)
3250 n = -(adj + N2 - N1) / -STEP;
3251 else
3252 n = (adj + N2 - N1) / STEP;
3253 q = n / nthreads;
3254 tt = n % nthreads;
3255 if (threadid < tt) goto L3; else goto L4;
3257 tt = 0;
3258 q = q + 1;
3260 s0 = q * threadid + tt;
3261 e0 = s0 + q;
3262 V = s0 * STEP + N1;
3263 if (s0 >= e0) goto L2; else goto L0;
3265 e = e0 * STEP + N1;
3267 BODY;
3268 V += STEP;
3269 if (V cond e) goto L1;
3273 static void
3274 expand_omp_for_static_nochunk (struct omp_region *region,
3275 struct omp_for_data *fd,
3276 gimple *inner_stmt)
3278 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3279 tree type, itype, vmain, vback;
3280 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3281 basic_block body_bb, cont_bb, collapse_bb = NULL;
3282 basic_block fin_bb;
3283 gimple_stmt_iterator gsi;
3284 edge ep;
3285 bool broken_loop = region->cont == NULL;
3286 tree *counts = NULL;
3287 tree n1, n2, step;
3289 itype = type = TREE_TYPE (fd->loop.v);
3290 if (POINTER_TYPE_P (type))
3291 itype = signed_type_for (type);
3293 entry_bb = region->entry;
3294 cont_bb = region->cont;
3295 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3296 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3297 gcc_assert (broken_loop
3298 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3299 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3300 body_bb = single_succ (seq_start_bb);
3301 if (!broken_loop)
3303 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3304 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3305 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3307 exit_bb = region->exit;
3309 /* Iteration space partitioning goes in ENTRY_BB. */
3310 gsi = gsi_last_bb (entry_bb);
3311 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3313 if (fd->collapse > 1)
3315 int first_zero_iter = -1, dummy = -1;
3316 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3318 counts = XALLOCAVEC (tree, fd->collapse);
3319 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3320 fin_bb, first_zero_iter,
3321 dummy_bb, dummy, l2_dom_bb);
3322 t = NULL_TREE;
3324 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3325 t = integer_one_node;
3326 else
3327 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3328 fold_convert (type, fd->loop.n1),
3329 fold_convert (type, fd->loop.n2));
3330 if (fd->collapse == 1
3331 && TYPE_UNSIGNED (type)
3332 && (t == NULL_TREE || !integer_onep (t)))
3334 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3335 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3336 true, GSI_SAME_STMT);
3337 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3338 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3339 true, GSI_SAME_STMT);
3340 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3341 NULL_TREE, NULL_TREE);
3342 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3343 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3344 expand_omp_regimplify_p, NULL, NULL)
3345 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3346 expand_omp_regimplify_p, NULL, NULL))
3348 gsi = gsi_for_stmt (cond_stmt);
3349 gimple_regimplify_operands (cond_stmt, &gsi);
3351 ep = split_block (entry_bb, cond_stmt);
3352 ep->flags = EDGE_TRUE_VALUE;
3353 entry_bb = ep->dest;
3354 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3355 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3356 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3357 if (gimple_in_ssa_p (cfun))
3359 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3360 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3361 !gsi_end_p (gpi); gsi_next (&gpi))
3363 gphi *phi = gpi.phi ();
3364 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3365 ep, UNKNOWN_LOCATION);
3368 gsi = gsi_last_bb (entry_bb);
3371 switch (gimple_omp_for_kind (fd->for_stmt))
3373 case GF_OMP_FOR_KIND_FOR:
3374 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3375 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3376 break;
3377 case GF_OMP_FOR_KIND_DISTRIBUTE:
3378 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3379 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3380 break;
3381 default:
3382 gcc_unreachable ();
3384 nthreads = build_call_expr (nthreads, 0);
3385 nthreads = fold_convert (itype, nthreads);
3386 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3387 true, GSI_SAME_STMT);
3388 threadid = build_call_expr (threadid, 0);
3389 threadid = fold_convert (itype, threadid);
3390 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3391 true, GSI_SAME_STMT);
3393 n1 = fd->loop.n1;
3394 n2 = fd->loop.n2;
3395 step = fd->loop.step;
3396 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3398 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3399 OMP_CLAUSE__LOOPTEMP_);
3400 gcc_assert (innerc);
3401 n1 = OMP_CLAUSE_DECL (innerc);
3402 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3403 OMP_CLAUSE__LOOPTEMP_);
3404 gcc_assert (innerc);
3405 n2 = OMP_CLAUSE_DECL (innerc);
3407 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3408 true, NULL_TREE, true, GSI_SAME_STMT);
3409 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3410 true, NULL_TREE, true, GSI_SAME_STMT);
3411 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3412 true, NULL_TREE, true, GSI_SAME_STMT);
3414 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3415 t = fold_build2 (PLUS_EXPR, itype, step, t);
3416 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3417 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3418 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3419 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3420 fold_build1 (NEGATE_EXPR, itype, t),
3421 fold_build1 (NEGATE_EXPR, itype, step));
3422 else
3423 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3424 t = fold_convert (itype, t);
3425 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3427 q = create_tmp_reg (itype, "q");
3428 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3429 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3430 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3432 tt = create_tmp_reg (itype, "tt");
3433 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3434 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3435 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3437 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3438 gcond *cond_stmt = gimple_build_cond_empty (t);
3439 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3441 second_bb = split_block (entry_bb, cond_stmt)->dest;
3442 gsi = gsi_last_bb (second_bb);
3443 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3445 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3446 GSI_SAME_STMT);
3447 gassign *assign_stmt
3448 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3449 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3451 third_bb = split_block (second_bb, assign_stmt)->dest;
3452 gsi = gsi_last_bb (third_bb);
3453 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3455 t = build2 (MULT_EXPR, itype, q, threadid);
3456 t = build2 (PLUS_EXPR, itype, t, tt);
3457 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3459 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3460 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3462 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3463 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3465 /* Remove the GIMPLE_OMP_FOR statement. */
3466 gsi_remove (&gsi, true);
3468 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3469 gsi = gsi_start_bb (seq_start_bb);
3471 tree startvar = fd->loop.v;
3472 tree endvar = NULL_TREE;
3474 if (gimple_omp_for_combined_p (fd->for_stmt))
3476 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3477 ? gimple_omp_parallel_clauses (inner_stmt)
3478 : gimple_omp_for_clauses (inner_stmt);
3479 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3480 gcc_assert (innerc);
3481 startvar = OMP_CLAUSE_DECL (innerc);
3482 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3483 OMP_CLAUSE__LOOPTEMP_);
3484 gcc_assert (innerc);
3485 endvar = OMP_CLAUSE_DECL (innerc);
3486 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3487 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3489 int i;
3490 for (i = 1; i < fd->collapse; i++)
3492 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3493 OMP_CLAUSE__LOOPTEMP_);
3494 gcc_assert (innerc);
3496 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3497 OMP_CLAUSE__LOOPTEMP_);
3498 if (innerc)
3500 /* If needed (distribute parallel for with lastprivate),
3501 propagate down the total number of iterations. */
3502 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3503 fd->loop.n2);
3504 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3505 GSI_CONTINUE_LINKING);
3506 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3507 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3511 t = fold_convert (itype, s0);
3512 t = fold_build2 (MULT_EXPR, itype, t, step);
3513 if (POINTER_TYPE_P (type))
3514 t = fold_build_pointer_plus (n1, t);
3515 else
3516 t = fold_build2 (PLUS_EXPR, type, t, n1);
3517 t = fold_convert (TREE_TYPE (startvar), t);
3518 t = force_gimple_operand_gsi (&gsi, t,
3519 DECL_P (startvar)
3520 && TREE_ADDRESSABLE (startvar),
3521 NULL_TREE, false, GSI_CONTINUE_LINKING);
3522 assign_stmt = gimple_build_assign (startvar, t);
3523 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3525 t = fold_convert (itype, e0);
3526 t = fold_build2 (MULT_EXPR, itype, t, step);
3527 if (POINTER_TYPE_P (type))
3528 t = fold_build_pointer_plus (n1, t);
3529 else
3530 t = fold_build2 (PLUS_EXPR, type, t, n1);
3531 t = fold_convert (TREE_TYPE (startvar), t);
3532 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3533 false, GSI_CONTINUE_LINKING);
3534 if (endvar)
3536 assign_stmt = gimple_build_assign (endvar, e);
3537 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3538 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3539 assign_stmt = gimple_build_assign (fd->loop.v, e);
3540 else
3541 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3542 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544 /* Handle linear clause adjustments. */
3545 tree itercnt = NULL_TREE;
3546 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3547 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3548 c; c = OMP_CLAUSE_CHAIN (c))
3549 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3550 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3552 tree d = OMP_CLAUSE_DECL (c);
3553 bool is_ref = omp_is_reference (d);
3554 tree t = d, a, dest;
3555 if (is_ref)
3556 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3557 if (itercnt == NULL_TREE)
3559 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3561 itercnt = fold_build2 (MINUS_EXPR, itype,
3562 fold_convert (itype, n1),
3563 fold_convert (itype, fd->loop.n1));
3564 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3565 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3566 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3567 NULL_TREE, false,
3568 GSI_CONTINUE_LINKING);
3570 else
3571 itercnt = s0;
3573 tree type = TREE_TYPE (t);
3574 if (POINTER_TYPE_P (type))
3575 type = sizetype;
3576 a = fold_build2 (MULT_EXPR, type,
3577 fold_convert (type, itercnt),
3578 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3579 dest = unshare_expr (t);
3580 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3581 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3582 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3583 false, GSI_CONTINUE_LINKING);
3584 assign_stmt = gimple_build_assign (dest, t);
3585 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3587 if (fd->collapse > 1)
3588 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3590 if (!broken_loop)
3592 /* The code controlling the sequential loop replaces the
3593 GIMPLE_OMP_CONTINUE. */
3594 gsi = gsi_last_bb (cont_bb);
3595 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3596 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3597 vmain = gimple_omp_continue_control_use (cont_stmt);
3598 vback = gimple_omp_continue_control_def (cont_stmt);
3600 if (!gimple_omp_for_combined_p (fd->for_stmt))
3602 if (POINTER_TYPE_P (type))
3603 t = fold_build_pointer_plus (vmain, step);
3604 else
3605 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3606 t = force_gimple_operand_gsi (&gsi, t,
3607 DECL_P (vback)
3608 && TREE_ADDRESSABLE (vback),
3609 NULL_TREE, true, GSI_SAME_STMT);
3610 assign_stmt = gimple_build_assign (vback, t);
3611 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3613 t = build2 (fd->loop.cond_code, boolean_type_node,
3614 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3615 ? t : vback, e);
3616 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3619 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3620 gsi_remove (&gsi, true);
3622 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3623 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3626 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3627 gsi = gsi_last_bb (exit_bb);
3628 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3630 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3631 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3633 gsi_remove (&gsi, true);
3635 /* Connect all the blocks. */
3636 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3637 ep->probability = REG_BR_PROB_BASE / 4 * 3;
3638 ep = find_edge (entry_bb, second_bb);
3639 ep->flags = EDGE_TRUE_VALUE;
3640 ep->probability = REG_BR_PROB_BASE / 4;
3641 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3642 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3644 if (!broken_loop)
3646 ep = find_edge (cont_bb, body_bb);
3647 if (ep == NULL)
3649 ep = BRANCH_EDGE (cont_bb);
3650 gcc_assert (single_succ (ep->dest) == body_bb);
3652 if (gimple_omp_for_combined_p (fd->for_stmt))
3654 remove_edge (ep);
3655 ep = NULL;
3657 else if (fd->collapse > 1)
3659 remove_edge (ep);
3660 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3662 else
3663 ep->flags = EDGE_TRUE_VALUE;
3664 find_edge (cont_bb, fin_bb)->flags
3665 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3668 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3669 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3670 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3672 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3673 recompute_dominator (CDI_DOMINATORS, body_bb));
3674 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3675 recompute_dominator (CDI_DOMINATORS, fin_bb));
3677 struct loop *loop = body_bb->loop_father;
3678 if (loop != entry_bb->loop_father)
3680 gcc_assert (broken_loop || loop->header == body_bb);
3681 gcc_assert (broken_loop
3682 || loop->latch == region->cont
3683 || single_pred (loop->latch) == region->cont);
3684 return;
3687 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3689 loop = alloc_loop ();
3690 loop->header = body_bb;
3691 if (collapse_bb == NULL)
3692 loop->latch = cont_bb;
3693 add_loop (loop, body_bb->loop_father);
3697 /* Return phi in E->DEST with ARG on edge E. */
3699 static gphi *
3700 find_phi_with_arg_on_edge (tree arg, edge e)
3702 basic_block bb = e->dest;
3704 for (gphi_iterator gpi = gsi_start_phis (bb);
3705 !gsi_end_p (gpi);
3706 gsi_next (&gpi))
3708 gphi *phi = gpi.phi ();
3709 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3710 return phi;
3713 return NULL;
3716 /* A subroutine of expand_omp_for. Generate code for a parallel
3717 loop with static schedule and a specified chunk size. Given
3718 parameters:
3720 for (V = N1; V cond N2; V += STEP) BODY;
3722 where COND is "<" or ">", we generate pseudocode
3724 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3725 if (cond is <)
3726 adj = STEP - 1;
3727 else
3728 adj = STEP + 1;
3729 if ((__typeof (V)) -1 > 0 && cond is >)
3730 n = -(adj + N2 - N1) / -STEP;
3731 else
3732 n = (adj + N2 - N1) / STEP;
3733 trip = 0;
3734 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3735 here so that V is defined
3736 if the loop is not entered
3738 s0 = (trip * nthreads + threadid) * CHUNK;
3739 e0 = min (s0 + CHUNK, n);
3740 if (s0 < n) goto L1; else goto L4;
3742 V = s0 * STEP + N1;
3743 e = e0 * STEP + N1;
3745 BODY;
3746 V += STEP;
3747 if (V cond e) goto L2; else goto L3;
3749 trip += 1;
3750 goto L0;
3754 static void
3755 expand_omp_for_static_chunk (struct omp_region *region,
3756 struct omp_for_data *fd, gimple *inner_stmt)
3758 tree n, s0, e0, e, t;
3759 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3760 tree type, itype, vmain, vback, vextra;
3761 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3762 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3763 gimple_stmt_iterator gsi;
3764 edge se;
3765 bool broken_loop = region->cont == NULL;
3766 tree *counts = NULL;
3767 tree n1, n2, step;
3769 itype = type = TREE_TYPE (fd->loop.v);
3770 if (POINTER_TYPE_P (type))
3771 itype = signed_type_for (type);
3773 entry_bb = region->entry;
3774 se = split_block (entry_bb, last_stmt (entry_bb));
3775 entry_bb = se->src;
3776 iter_part_bb = se->dest;
3777 cont_bb = region->cont;
3778 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3779 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3780 gcc_assert (broken_loop
3781 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3782 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3783 body_bb = single_succ (seq_start_bb);
3784 if (!broken_loop)
3786 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3787 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3788 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3789 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3791 exit_bb = region->exit;
3793 /* Trip and adjustment setup goes in ENTRY_BB. */
3794 gsi = gsi_last_bb (entry_bb);
3795 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3797 if (fd->collapse > 1)
3799 int first_zero_iter = -1, dummy = -1;
3800 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3802 counts = XALLOCAVEC (tree, fd->collapse);
3803 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3804 fin_bb, first_zero_iter,
3805 dummy_bb, dummy, l2_dom_bb);
3806 t = NULL_TREE;
3808 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3809 t = integer_one_node;
3810 else
3811 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3812 fold_convert (type, fd->loop.n1),
3813 fold_convert (type, fd->loop.n2));
3814 if (fd->collapse == 1
3815 && TYPE_UNSIGNED (type)
3816 && (t == NULL_TREE || !integer_onep (t)))
3818 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3819 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3820 true, GSI_SAME_STMT);
3821 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3822 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3823 true, GSI_SAME_STMT);
3824 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3825 NULL_TREE, NULL_TREE);
3826 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3827 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3828 expand_omp_regimplify_p, NULL, NULL)
3829 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3830 expand_omp_regimplify_p, NULL, NULL))
3832 gsi = gsi_for_stmt (cond_stmt);
3833 gimple_regimplify_operands (cond_stmt, &gsi);
3835 se = split_block (entry_bb, cond_stmt);
3836 se->flags = EDGE_TRUE_VALUE;
3837 entry_bb = se->dest;
3838 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3839 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3840 se->probability = REG_BR_PROB_BASE / 2000 - 1;
3841 if (gimple_in_ssa_p (cfun))
3843 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3844 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3845 !gsi_end_p (gpi); gsi_next (&gpi))
3847 gphi *phi = gpi.phi ();
3848 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3849 se, UNKNOWN_LOCATION);
3852 gsi = gsi_last_bb (entry_bb);
3855 switch (gimple_omp_for_kind (fd->for_stmt))
3857 case GF_OMP_FOR_KIND_FOR:
3858 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3859 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3860 break;
3861 case GF_OMP_FOR_KIND_DISTRIBUTE:
3862 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3863 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3864 break;
3865 default:
3866 gcc_unreachable ();
3868 nthreads = build_call_expr (nthreads, 0);
3869 nthreads = fold_convert (itype, nthreads);
3870 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3871 true, GSI_SAME_STMT);
3872 threadid = build_call_expr (threadid, 0);
3873 threadid = fold_convert (itype, threadid);
3874 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3875 true, GSI_SAME_STMT);
3877 n1 = fd->loop.n1;
3878 n2 = fd->loop.n2;
3879 step = fd->loop.step;
3880 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3882 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3883 OMP_CLAUSE__LOOPTEMP_);
3884 gcc_assert (innerc);
3885 n1 = OMP_CLAUSE_DECL (innerc);
3886 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3887 OMP_CLAUSE__LOOPTEMP_);
3888 gcc_assert (innerc);
3889 n2 = OMP_CLAUSE_DECL (innerc);
3891 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3892 true, NULL_TREE, true, GSI_SAME_STMT);
3893 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3894 true, NULL_TREE, true, GSI_SAME_STMT);
3895 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3896 true, NULL_TREE, true, GSI_SAME_STMT);
3897 tree chunk_size = fold_convert (itype, fd->chunk_size);
3898 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3899 chunk_size
3900 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3901 GSI_SAME_STMT);
3903 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3904 t = fold_build2 (PLUS_EXPR, itype, step, t);
3905 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3906 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3907 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3908 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3909 fold_build1 (NEGATE_EXPR, itype, t),
3910 fold_build1 (NEGATE_EXPR, itype, step));
3911 else
3912 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3913 t = fold_convert (itype, t);
3914 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3915 true, GSI_SAME_STMT);
3917 trip_var = create_tmp_reg (itype, ".trip");
3918 if (gimple_in_ssa_p (cfun))
3920 trip_init = make_ssa_name (trip_var);
3921 trip_main = make_ssa_name (trip_var);
3922 trip_back = make_ssa_name (trip_var);
3924 else
3926 trip_init = trip_var;
3927 trip_main = trip_var;
3928 trip_back = trip_var;
3931 gassign *assign_stmt
3932 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3933 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3935 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3936 t = fold_build2 (MULT_EXPR, itype, t, step);
3937 if (POINTER_TYPE_P (type))
3938 t = fold_build_pointer_plus (n1, t);
3939 else
3940 t = fold_build2 (PLUS_EXPR, type, t, n1);
3941 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3942 true, GSI_SAME_STMT);
3944 /* Remove the GIMPLE_OMP_FOR. */
3945 gsi_remove (&gsi, true);
3947 gimple_stmt_iterator gsif = gsi;
3949 /* Iteration space partitioning goes in ITER_PART_BB. */
3950 gsi = gsi_last_bb (iter_part_bb);
3952 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3953 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3954 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3955 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3956 false, GSI_CONTINUE_LINKING);
3958 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3959 t = fold_build2 (MIN_EXPR, itype, t, n);
3960 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3961 false, GSI_CONTINUE_LINKING);
3963 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3964 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3966 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3967 gsi = gsi_start_bb (seq_start_bb);
3969 tree startvar = fd->loop.v;
3970 tree endvar = NULL_TREE;
3972 if (gimple_omp_for_combined_p (fd->for_stmt))
3974 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3975 ? gimple_omp_parallel_clauses (inner_stmt)
3976 : gimple_omp_for_clauses (inner_stmt);
3977 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3978 gcc_assert (innerc);
3979 startvar = OMP_CLAUSE_DECL (innerc);
3980 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3981 OMP_CLAUSE__LOOPTEMP_);
3982 gcc_assert (innerc);
3983 endvar = OMP_CLAUSE_DECL (innerc);
3984 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3985 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3987 int i;
3988 for (i = 1; i < fd->collapse; i++)
3990 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3991 OMP_CLAUSE__LOOPTEMP_);
3992 gcc_assert (innerc);
3994 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3995 OMP_CLAUSE__LOOPTEMP_);
3996 if (innerc)
3998 /* If needed (distribute parallel for with lastprivate),
3999 propagate down the total number of iterations. */
4000 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4001 fd->loop.n2);
4002 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4003 GSI_CONTINUE_LINKING);
4004 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4010 t = fold_convert (itype, s0);
4011 t = fold_build2 (MULT_EXPR, itype, t, step);
4012 if (POINTER_TYPE_P (type))
4013 t = fold_build_pointer_plus (n1, t);
4014 else
4015 t = fold_build2 (PLUS_EXPR, type, t, n1);
4016 t = fold_convert (TREE_TYPE (startvar), t);
4017 t = force_gimple_operand_gsi (&gsi, t,
4018 DECL_P (startvar)
4019 && TREE_ADDRESSABLE (startvar),
4020 NULL_TREE, false, GSI_CONTINUE_LINKING);
4021 assign_stmt = gimple_build_assign (startvar, t);
4022 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4024 t = fold_convert (itype, e0);
4025 t = fold_build2 (MULT_EXPR, itype, t, step);
4026 if (POINTER_TYPE_P (type))
4027 t = fold_build_pointer_plus (n1, t);
4028 else
4029 t = fold_build2 (PLUS_EXPR, type, t, n1);
4030 t = fold_convert (TREE_TYPE (startvar), t);
4031 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4032 false, GSI_CONTINUE_LINKING);
4033 if (endvar)
4035 assign_stmt = gimple_build_assign (endvar, e);
4036 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4037 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4038 assign_stmt = gimple_build_assign (fd->loop.v, e);
4039 else
4040 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4041 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043 /* Handle linear clause adjustments. */
4044 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4045 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4046 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4047 c; c = OMP_CLAUSE_CHAIN (c))
4048 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4049 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4051 tree d = OMP_CLAUSE_DECL (c);
4052 bool is_ref = omp_is_reference (d);
4053 tree t = d, a, dest;
4054 if (is_ref)
4055 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4056 tree type = TREE_TYPE (t);
4057 if (POINTER_TYPE_P (type))
4058 type = sizetype;
4059 dest = unshare_expr (t);
4060 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4061 expand_omp_build_assign (&gsif, v, t);
4062 if (itercnt == NULL_TREE)
4064 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4066 itercntbias
4067 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4068 fold_convert (itype, fd->loop.n1));
4069 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4070 itercntbias, step);
4071 itercntbias
4072 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4073 NULL_TREE, true,
4074 GSI_SAME_STMT);
4075 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4076 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4077 NULL_TREE, false,
4078 GSI_CONTINUE_LINKING);
4080 else
4081 itercnt = s0;
4083 a = fold_build2 (MULT_EXPR, type,
4084 fold_convert (type, itercnt),
4085 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4086 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4087 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4088 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4089 false, GSI_CONTINUE_LINKING);
4090 assign_stmt = gimple_build_assign (dest, t);
4091 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4093 if (fd->collapse > 1)
4094 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4096 if (!broken_loop)
4098 /* The code controlling the sequential loop goes in CONT_BB,
4099 replacing the GIMPLE_OMP_CONTINUE. */
4100 gsi = gsi_last_bb (cont_bb);
4101 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4102 vmain = gimple_omp_continue_control_use (cont_stmt);
4103 vback = gimple_omp_continue_control_def (cont_stmt);
4105 if (!gimple_omp_for_combined_p (fd->for_stmt))
4107 if (POINTER_TYPE_P (type))
4108 t = fold_build_pointer_plus (vmain, step);
4109 else
4110 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4111 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4112 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4113 true, GSI_SAME_STMT);
4114 assign_stmt = gimple_build_assign (vback, t);
4115 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4117 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4118 t = build2 (EQ_EXPR, boolean_type_node,
4119 build_int_cst (itype, 0),
4120 build_int_cst (itype, 1));
4121 else
4122 t = build2 (fd->loop.cond_code, boolean_type_node,
4123 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4124 ? t : vback, e);
4125 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4128 /* Remove GIMPLE_OMP_CONTINUE. */
4129 gsi_remove (&gsi, true);
4131 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4132 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4134 /* Trip update code goes into TRIP_UPDATE_BB. */
4135 gsi = gsi_start_bb (trip_update_bb);
4137 t = build_int_cst (itype, 1);
4138 t = build2 (PLUS_EXPR, itype, trip_main, t);
4139 assign_stmt = gimple_build_assign (trip_back, t);
4140 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4143 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4144 gsi = gsi_last_bb (exit_bb);
4145 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4147 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4148 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4150 gsi_remove (&gsi, true);
4152 /* Connect the new blocks. */
4153 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4154 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4156 if (!broken_loop)
4158 se = find_edge (cont_bb, body_bb);
4159 if (se == NULL)
4161 se = BRANCH_EDGE (cont_bb);
4162 gcc_assert (single_succ (se->dest) == body_bb);
4164 if (gimple_omp_for_combined_p (fd->for_stmt))
4166 remove_edge (se);
4167 se = NULL;
4169 else if (fd->collapse > 1)
4171 remove_edge (se);
4172 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4174 else
4175 se->flags = EDGE_TRUE_VALUE;
4176 find_edge (cont_bb, trip_update_bb)->flags
4177 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4179 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4180 iter_part_bb);
4183 if (gimple_in_ssa_p (cfun))
4185 gphi_iterator psi;
4186 gphi *phi;
4187 edge re, ene;
4188 edge_var_map *vm;
4189 size_t i;
4191 gcc_assert (fd->collapse == 1 && !broken_loop);
4193 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4194 remove arguments of the phi nodes in fin_bb. We need to create
4195 appropriate phi nodes in iter_part_bb instead. */
4196 se = find_edge (iter_part_bb, fin_bb);
4197 re = single_succ_edge (trip_update_bb);
4198 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4199 ene = single_succ_edge (entry_bb);
4201 psi = gsi_start_phis (fin_bb);
4202 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4203 gsi_next (&psi), ++i)
4205 gphi *nphi;
4206 source_location locus;
4208 phi = psi.phi ();
4209 t = gimple_phi_result (phi);
4210 gcc_assert (t == redirect_edge_var_map_result (vm));
4212 if (!single_pred_p (fin_bb))
4213 t = copy_ssa_name (t, phi);
4215 nphi = create_phi_node (t, iter_part_bb);
4217 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4218 locus = gimple_phi_arg_location_from_edge (phi, se);
4220 /* A special case -- fd->loop.v is not yet computed in
4221 iter_part_bb, we need to use vextra instead. */
4222 if (t == fd->loop.v)
4223 t = vextra;
4224 add_phi_arg (nphi, t, ene, locus);
4225 locus = redirect_edge_var_map_location (vm);
4226 tree back_arg = redirect_edge_var_map_def (vm);
4227 add_phi_arg (nphi, back_arg, re, locus);
4228 edge ce = find_edge (cont_bb, body_bb);
4229 if (ce == NULL)
4231 ce = BRANCH_EDGE (cont_bb);
4232 gcc_assert (single_succ (ce->dest) == body_bb);
4233 ce = single_succ_edge (ce->dest);
4235 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4236 gcc_assert (inner_loop_phi != NULL);
4237 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4238 find_edge (seq_start_bb, body_bb), locus);
4240 if (!single_pred_p (fin_bb))
4241 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4243 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4244 redirect_edge_var_map_clear (re);
4245 if (single_pred_p (fin_bb))
4246 while (1)
4248 psi = gsi_start_phis (fin_bb);
4249 if (gsi_end_p (psi))
4250 break;
4251 remove_phi_node (&psi, false);
4254 /* Make phi node for trip. */
4255 phi = create_phi_node (trip_main, iter_part_bb);
4256 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4257 UNKNOWN_LOCATION);
4258 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4259 UNKNOWN_LOCATION);
4262 if (!broken_loop)
4263 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4264 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4265 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4266 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4267 recompute_dominator (CDI_DOMINATORS, fin_bb));
4268 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4269 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4270 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4271 recompute_dominator (CDI_DOMINATORS, body_bb));
4273 if (!broken_loop)
4275 struct loop *loop = body_bb->loop_father;
4276 struct loop *trip_loop = alloc_loop ();
4277 trip_loop->header = iter_part_bb;
4278 trip_loop->latch = trip_update_bb;
4279 add_loop (trip_loop, iter_part_bb->loop_father);
4281 if (loop != entry_bb->loop_father)
4283 gcc_assert (loop->header == body_bb);
4284 gcc_assert (loop->latch == region->cont
4285 || single_pred (loop->latch) == region->cont);
4286 trip_loop->inner = loop;
4287 return;
4290 if (!gimple_omp_for_combined_p (fd->for_stmt))
4292 loop = alloc_loop ();
4293 loop->header = body_bb;
4294 if (collapse_bb == NULL)
4295 loop->latch = cont_bb;
4296 add_loop (loop, trip_loop);
4301 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4302 Given parameters:
4303 for (V = N1; V cond N2; V += STEP) BODY;
4305 where COND is "<" or ">" or "!=", we generate pseudocode
4307 for (ind_var = low; ind_var < high; ind_var++)
4309 V = n1 + (ind_var * STEP)
4311 <BODY>
4314 In the above pseudocode, low and high are function parameters of the
4315 child function. In the function below, we are inserting a temp.
4316 variable that will be making a call to two OMP functions that will not be
4317 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4318 with _Cilk_for). These functions are replaced with low and high
4319 by the function that handles taskreg. */
4322 static void
4323 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4325 bool broken_loop = region->cont == NULL;
4326 basic_block entry_bb = region->entry;
4327 basic_block cont_bb = region->cont;
4329 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4330 gcc_assert (broken_loop
4331 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4332 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4333 basic_block l1_bb, l2_bb;
4335 if (!broken_loop)
4337 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4338 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4339 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4340 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4342 else
4344 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4345 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4346 l2_bb = single_succ (l1_bb);
4348 basic_block exit_bb = region->exit;
4349 basic_block l2_dom_bb = NULL;
4351 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4353 /* Below statements until the "tree high_val = ..." are pseudo statements
4354 used to pass information to be used by expand_omp_taskreg.
4355 low_val and high_val will be replaced by the __low and __high
4356 parameter from the child function.
4358 The call_exprs part is a place-holder, it is mainly used
4359 to distinctly identify to the top-level part that this is
4360 where we should put low and high (reasoning given in header
4361 comment). */
4363 gomp_parallel *par_stmt
4364 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4365 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4366 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4367 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4369 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4370 high_val = t;
4371 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4372 low_val = t;
4374 gcc_assert (low_val && high_val);
4376 tree type = TREE_TYPE (low_val);
4377 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4378 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4380 /* Not needed in SSA form right now. */
4381 gcc_assert (!gimple_in_ssa_p (cfun));
4382 if (l2_dom_bb == NULL)
4383 l2_dom_bb = l1_bb;
4385 tree n1 = low_val;
4386 tree n2 = high_val;
4388 gimple *stmt = gimple_build_assign (ind_var, n1);
4390 /* Replace the GIMPLE_OMP_FOR statement. */
4391 gsi_replace (&gsi, stmt, true);
4393 if (!broken_loop)
4395 /* Code to control the increment goes in the CONT_BB. */
4396 gsi = gsi_last_bb (cont_bb);
4397 stmt = gsi_stmt (gsi);
4398 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4399 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4400 build_one_cst (type));
4402 /* Replace GIMPLE_OMP_CONTINUE. */
4403 gsi_replace (&gsi, stmt, true);
4406 /* Emit the condition in L1_BB. */
4407 gsi = gsi_after_labels (l1_bb);
4408 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4409 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4410 fd->loop.step);
4411 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4412 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4413 fd->loop.n1, fold_convert (sizetype, t));
4414 else
4415 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4416 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4417 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4418 expand_omp_build_assign (&gsi, fd->loop.v, t);
4420 /* The condition is always '<' since the runtime will fill in the low
4421 and high values. */
4422 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4423 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4425 /* Remove GIMPLE_OMP_RETURN. */
4426 gsi = gsi_last_bb (exit_bb);
4427 gsi_remove (&gsi, true);
4429 /* Connect the new blocks. */
4430 remove_edge (FALLTHRU_EDGE (entry_bb));
4432 edge e, ne;
4433 if (!broken_loop)
4435 remove_edge (BRANCH_EDGE (entry_bb));
4436 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4438 e = BRANCH_EDGE (l1_bb);
4439 ne = FALLTHRU_EDGE (l1_bb);
4440 e->flags = EDGE_TRUE_VALUE;
4442 else
4444 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4446 ne = single_succ_edge (l1_bb);
4447 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4450 ne->flags = EDGE_FALSE_VALUE;
4451 e->probability = REG_BR_PROB_BASE * 7 / 8;
4452 ne->probability = REG_BR_PROB_BASE / 8;
4454 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4455 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4456 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4458 if (!broken_loop)
4460 struct loop *loop = alloc_loop ();
4461 loop->header = l1_bb;
4462 loop->latch = cont_bb;
4463 add_loop (loop, l1_bb->loop_father);
4464 loop->safelen = INT_MAX;
4467 /* Pick the correct library function based on the precision of the
4468 induction variable type. */
4469 tree lib_fun = NULL_TREE;
4470 if (TYPE_PRECISION (type) == 32)
4471 lib_fun = cilk_for_32_fndecl;
4472 else if (TYPE_PRECISION (type) == 64)
4473 lib_fun = cilk_for_64_fndecl;
4474 else
4475 gcc_unreachable ();
4477 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4479 /* WS_ARGS contains the library function flavor to call:
4480 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4481 user-defined grain value. If the user does not define one, then zero
4482 is passed in by the parser. */
4483 vec_alloc (region->ws_args, 2);
4484 region->ws_args->quick_push (lib_fun);
4485 region->ws_args->quick_push (fd->chunk_size);
4488 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4489 loop. Given parameters:
4491 for (V = N1; V cond N2; V += STEP) BODY;
4493 where COND is "<" or ">", we generate pseudocode
4495 V = N1;
4496 goto L1;
4498 BODY;
4499 V += STEP;
4501 if (V cond N2) goto L0; else goto L2;
4504 For collapsed loops, given parameters:
4505 collapse(3)
4506 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4507 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4508 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4509 BODY;
4511 we generate pseudocode
4513 if (cond3 is <)
4514 adj = STEP3 - 1;
4515 else
4516 adj = STEP3 + 1;
4517 count3 = (adj + N32 - N31) / STEP3;
4518 if (cond2 is <)
4519 adj = STEP2 - 1;
4520 else
4521 adj = STEP2 + 1;
4522 count2 = (adj + N22 - N21) / STEP2;
4523 if (cond1 is <)
4524 adj = STEP1 - 1;
4525 else
4526 adj = STEP1 + 1;
4527 count1 = (adj + N12 - N11) / STEP1;
4528 count = count1 * count2 * count3;
4529 V = 0;
4530 V1 = N11;
4531 V2 = N21;
4532 V3 = N31;
4533 goto L1;
4535 BODY;
4536 V += 1;
4537 V3 += STEP3;
4538 V2 += (V3 cond3 N32) ? 0 : STEP2;
4539 V3 = (V3 cond3 N32) ? V3 : N31;
4540 V1 += (V2 cond2 N22) ? 0 : STEP1;
4541 V2 = (V2 cond2 N22) ? V2 : N21;
4543 if (V < count) goto L0; else goto L2;
4548 static void
4549 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4551 tree type, t;
4552 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4553 gimple_stmt_iterator gsi;
4554 gimple *stmt;
4555 gcond *cond_stmt;
4556 bool broken_loop = region->cont == NULL;
4557 edge e, ne;
4558 tree *counts = NULL;
4559 int i;
4560 int safelen_int = INT_MAX;
4561 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4562 OMP_CLAUSE_SAFELEN);
4563 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4564 OMP_CLAUSE__SIMDUID_);
4565 tree n1, n2;
4567 if (safelen)
4569 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4570 if (TREE_CODE (safelen) != INTEGER_CST)
4571 safelen_int = 0;
4572 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4573 safelen_int = tree_to_uhwi (safelen);
4574 if (safelen_int == 1)
4575 safelen_int = 0;
4577 type = TREE_TYPE (fd->loop.v);
4578 entry_bb = region->entry;
4579 cont_bb = region->cont;
4580 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4581 gcc_assert (broken_loop
4582 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4583 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4584 if (!broken_loop)
4586 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4587 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4588 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4589 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4591 else
4593 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4594 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4595 l2_bb = single_succ (l1_bb);
4597 exit_bb = region->exit;
4598 l2_dom_bb = NULL;
4600 gsi = gsi_last_bb (entry_bb);
4602 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4603 /* Not needed in SSA form right now. */
4604 gcc_assert (!gimple_in_ssa_p (cfun));
4605 if (fd->collapse > 1)
4607 int first_zero_iter = -1, dummy = -1;
4608 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4610 counts = XALLOCAVEC (tree, fd->collapse);
4611 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4612 zero_iter_bb, first_zero_iter,
4613 dummy_bb, dummy, l2_dom_bb);
4615 if (l2_dom_bb == NULL)
4616 l2_dom_bb = l1_bb;
4618 n1 = fd->loop.n1;
4619 n2 = fd->loop.n2;
4620 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4622 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4623 OMP_CLAUSE__LOOPTEMP_);
4624 gcc_assert (innerc);
4625 n1 = OMP_CLAUSE_DECL (innerc);
4626 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4627 OMP_CLAUSE__LOOPTEMP_);
4628 gcc_assert (innerc);
4629 n2 = OMP_CLAUSE_DECL (innerc);
4631 tree step = fd->loop.step;
4633 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4634 OMP_CLAUSE__SIMT_);
4635 if (is_simt)
4637 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4638 is_simt = safelen_int > 1;
4640 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4641 if (is_simt)
4643 simt_lane = create_tmp_var (unsigned_type_node);
4644 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4645 gimple_call_set_lhs (g, simt_lane);
4646 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4647 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4648 fold_convert (TREE_TYPE (step), simt_lane));
4649 n1 = fold_convert (type, n1);
4650 if (POINTER_TYPE_P (type))
4651 n1 = fold_build_pointer_plus (n1, offset);
4652 else
4653 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4655 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4656 if (fd->collapse > 1)
4657 simt_maxlane = build_one_cst (unsigned_type_node);
4658 else if (safelen_int < omp_max_simt_vf ())
4659 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4660 tree vf
4661 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4662 unsigned_type_node, 0);
4663 if (simt_maxlane)
4664 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4665 vf = fold_convert (TREE_TYPE (step), vf);
4666 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4669 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4670 if (fd->collapse > 1)
4672 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4674 gsi_prev (&gsi);
4675 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4676 gsi_next (&gsi);
4678 else
4679 for (i = 0; i < fd->collapse; i++)
4681 tree itype = TREE_TYPE (fd->loops[i].v);
4682 if (POINTER_TYPE_P (itype))
4683 itype = signed_type_for (itype);
4684 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4685 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4689 /* Remove the GIMPLE_OMP_FOR statement. */
4690 gsi_remove (&gsi, true);
4692 if (!broken_loop)
4694 /* Code to control the increment goes in the CONT_BB. */
4695 gsi = gsi_last_bb (cont_bb);
4696 stmt = gsi_stmt (gsi);
4697 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4699 if (POINTER_TYPE_P (type))
4700 t = fold_build_pointer_plus (fd->loop.v, step);
4701 else
4702 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4703 expand_omp_build_assign (&gsi, fd->loop.v, t);
4705 if (fd->collapse > 1)
4707 i = fd->collapse - 1;
4708 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4710 t = fold_convert (sizetype, fd->loops[i].step);
4711 t = fold_build_pointer_plus (fd->loops[i].v, t);
4713 else
4715 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4716 fd->loops[i].step);
4717 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4718 fd->loops[i].v, t);
4720 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4722 for (i = fd->collapse - 1; i > 0; i--)
4724 tree itype = TREE_TYPE (fd->loops[i].v);
4725 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4726 if (POINTER_TYPE_P (itype2))
4727 itype2 = signed_type_for (itype2);
4728 t = build3 (COND_EXPR, itype2,
4729 build2 (fd->loops[i].cond_code, boolean_type_node,
4730 fd->loops[i].v,
4731 fold_convert (itype, fd->loops[i].n2)),
4732 build_int_cst (itype2, 0),
4733 fold_convert (itype2, fd->loops[i - 1].step));
4734 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4735 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4736 else
4737 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4738 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4740 t = build3 (COND_EXPR, itype,
4741 build2 (fd->loops[i].cond_code, boolean_type_node,
4742 fd->loops[i].v,
4743 fold_convert (itype, fd->loops[i].n2)),
4744 fd->loops[i].v,
4745 fold_convert (itype, fd->loops[i].n1));
4746 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4750 /* Remove GIMPLE_OMP_CONTINUE. */
4751 gsi_remove (&gsi, true);
4754 /* Emit the condition in L1_BB. */
4755 gsi = gsi_start_bb (l1_bb);
4757 t = fold_convert (type, n2);
4758 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4759 false, GSI_CONTINUE_LINKING);
4760 tree v = fd->loop.v;
4761 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4762 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4763 false, GSI_CONTINUE_LINKING);
4764 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4765 cond_stmt = gimple_build_cond_empty (t);
4766 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4767 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4768 NULL, NULL)
4769 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4770 NULL, NULL))
4772 gsi = gsi_for_stmt (cond_stmt);
4773 gimple_regimplify_operands (cond_stmt, &gsi);
4776 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4777 if (is_simt)
4779 gsi = gsi_start_bb (l2_bb);
4780 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4781 if (POINTER_TYPE_P (type))
4782 t = fold_build_pointer_plus (fd->loop.v, step);
4783 else
4784 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4785 expand_omp_build_assign (&gsi, fd->loop.v, t);
4788 /* Remove GIMPLE_OMP_RETURN. */
4789 gsi = gsi_last_bb (exit_bb);
4790 gsi_remove (&gsi, true);
4792 /* Connect the new blocks. */
4793 remove_edge (FALLTHRU_EDGE (entry_bb));
4795 if (!broken_loop)
4797 remove_edge (BRANCH_EDGE (entry_bb));
4798 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4800 e = BRANCH_EDGE (l1_bb);
4801 ne = FALLTHRU_EDGE (l1_bb);
4802 e->flags = EDGE_TRUE_VALUE;
4804 else
4806 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4808 ne = single_succ_edge (l1_bb);
4809 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4812 ne->flags = EDGE_FALSE_VALUE;
4813 e->probability = REG_BR_PROB_BASE * 7 / 8;
4814 ne->probability = REG_BR_PROB_BASE / 8;
4816 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4817 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4819 if (simt_maxlane)
4821 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4822 NULL_TREE, NULL_TREE);
4823 gsi = gsi_last_bb (entry_bb);
4824 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4825 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4826 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4827 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4828 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4829 l2_dom_bb = entry_bb;
4831 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4833 if (!broken_loop)
4835 struct loop *loop = alloc_loop ();
4836 loop->header = l1_bb;
4837 loop->latch = cont_bb;
4838 add_loop (loop, l1_bb->loop_father);
4839 loop->safelen = safelen_int;
4840 if (simduid)
4842 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4843 cfun->has_simduid_loops = true;
4845 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4846 the loop. */
4847 if ((flag_tree_loop_vectorize
4848 || (!global_options_set.x_flag_tree_loop_vectorize
4849 && !global_options_set.x_flag_tree_vectorize))
4850 && flag_tree_loop_optimize
4851 && loop->safelen > 1)
4853 loop->force_vectorize = true;
4854 cfun->has_force_vectorize_loops = true;
4857 else if (simduid)
4858 cfun->has_simduid_loops = true;
4861 /* Taskloop construct is represented after gimplification with
4862 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4863 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4864 which should just compute all the needed loop temporaries
4865 for GIMPLE_OMP_TASK. */
4867 static void
4868 expand_omp_taskloop_for_outer (struct omp_region *region,
4869 struct omp_for_data *fd,
4870 gimple *inner_stmt)
4872 tree type, bias = NULL_TREE;
4873 basic_block entry_bb, cont_bb, exit_bb;
4874 gimple_stmt_iterator gsi;
4875 gassign *assign_stmt;
4876 tree *counts = NULL;
4877 int i;
4879 gcc_assert (inner_stmt);
4880 gcc_assert (region->cont);
4881 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4882 && gimple_omp_task_taskloop_p (inner_stmt));
4883 type = TREE_TYPE (fd->loop.v);
4885 /* See if we need to bias by LLONG_MIN. */
4886 if (fd->iter_type == long_long_unsigned_type_node
4887 && TREE_CODE (type) == INTEGER_TYPE
4888 && !TYPE_UNSIGNED (type))
4890 tree n1, n2;
4892 if (fd->loop.cond_code == LT_EXPR)
4894 n1 = fd->loop.n1;
4895 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4897 else
4899 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4900 n2 = fd->loop.n1;
4902 if (TREE_CODE (n1) != INTEGER_CST
4903 || TREE_CODE (n2) != INTEGER_CST
4904 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4905 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4908 entry_bb = region->entry;
4909 cont_bb = region->cont;
4910 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4911 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4912 exit_bb = region->exit;
4914 gsi = gsi_last_bb (entry_bb);
4915 gimple *for_stmt = gsi_stmt (gsi);
4916 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4917 if (fd->collapse > 1)
4919 int first_zero_iter = -1, dummy = -1;
4920 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4922 counts = XALLOCAVEC (tree, fd->collapse);
4923 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4924 zero_iter_bb, first_zero_iter,
4925 dummy_bb, dummy, l2_dom_bb);
4927 if (zero_iter_bb)
4929 /* Some counts[i] vars might be uninitialized if
4930 some loop has zero iterations. But the body shouldn't
4931 be executed in that case, so just avoid uninit warnings. */
4932 for (i = first_zero_iter; i < fd->collapse; i++)
4933 if (SSA_VAR_P (counts[i]))
4934 TREE_NO_WARNING (counts[i]) = 1;
4935 gsi_prev (&gsi);
4936 edge e = split_block (entry_bb, gsi_stmt (gsi));
4937 entry_bb = e->dest;
4938 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4939 gsi = gsi_last_bb (entry_bb);
4940 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4941 get_immediate_dominator (CDI_DOMINATORS,
4942 zero_iter_bb));
4946 tree t0, t1;
4947 t1 = fd->loop.n2;
4948 t0 = fd->loop.n1;
4949 if (POINTER_TYPE_P (TREE_TYPE (t0))
4950 && TYPE_PRECISION (TREE_TYPE (t0))
4951 != TYPE_PRECISION (fd->iter_type))
4953 /* Avoid casting pointers to integer of a different size. */
4954 tree itype = signed_type_for (type);
4955 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4956 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4958 else
4960 t1 = fold_convert (fd->iter_type, t1);
4961 t0 = fold_convert (fd->iter_type, t0);
4963 if (bias)
4965 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4966 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4969 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4970 OMP_CLAUSE__LOOPTEMP_);
4971 gcc_assert (innerc);
4972 tree startvar = OMP_CLAUSE_DECL (innerc);
4973 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4974 gcc_assert (innerc);
4975 tree endvar = OMP_CLAUSE_DECL (innerc);
4976 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4978 gcc_assert (innerc);
4979 for (i = 1; i < fd->collapse; i++)
4981 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4982 OMP_CLAUSE__LOOPTEMP_);
4983 gcc_assert (innerc);
4985 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4986 OMP_CLAUSE__LOOPTEMP_);
4987 if (innerc)
4989 /* If needed (inner taskloop has lastprivate clause), propagate
4990 down the total number of iterations. */
4991 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4992 NULL_TREE, false,
4993 GSI_CONTINUE_LINKING);
4994 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4995 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4999 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5000 GSI_CONTINUE_LINKING);
5001 assign_stmt = gimple_build_assign (startvar, t0);
5002 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5004 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5005 GSI_CONTINUE_LINKING);
5006 assign_stmt = gimple_build_assign (endvar, t1);
5007 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5008 if (fd->collapse > 1)
5009 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5011 /* Remove the GIMPLE_OMP_FOR statement. */
5012 gsi = gsi_for_stmt (for_stmt);
5013 gsi_remove (&gsi, true);
5015 gsi = gsi_last_bb (cont_bb);
5016 gsi_remove (&gsi, true);
5018 gsi = gsi_last_bb (exit_bb);
5019 gsi_remove (&gsi, true);
5021 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5022 remove_edge (BRANCH_EDGE (entry_bb));
5023 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
5024 remove_edge (BRANCH_EDGE (cont_bb));
5025 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5026 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5027 recompute_dominator (CDI_DOMINATORS, region->entry));
5030 /* Taskloop construct is represented after gimplification with
5031 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5032 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5033 GOMP_taskloop{,_ull} function arranges for each task to be given just
5034 a single range of iterations. */
5036 static void
5037 expand_omp_taskloop_for_inner (struct omp_region *region,
5038 struct omp_for_data *fd,
5039 gimple *inner_stmt)
5041 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5042 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5043 basic_block fin_bb;
5044 gimple_stmt_iterator gsi;
5045 edge ep;
5046 bool broken_loop = region->cont == NULL;
5047 tree *counts = NULL;
5048 tree n1, n2, step;
5050 itype = type = TREE_TYPE (fd->loop.v);
5051 if (POINTER_TYPE_P (type))
5052 itype = signed_type_for (type);
5054 /* See if we need to bias by LLONG_MIN. */
5055 if (fd->iter_type == long_long_unsigned_type_node
5056 && TREE_CODE (type) == INTEGER_TYPE
5057 && !TYPE_UNSIGNED (type))
5059 tree n1, n2;
5061 if (fd->loop.cond_code == LT_EXPR)
5063 n1 = fd->loop.n1;
5064 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5066 else
5068 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5069 n2 = fd->loop.n1;
5071 if (TREE_CODE (n1) != INTEGER_CST
5072 || TREE_CODE (n2) != INTEGER_CST
5073 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5074 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5077 entry_bb = region->entry;
5078 cont_bb = region->cont;
5079 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5080 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5081 gcc_assert (broken_loop
5082 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5083 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5084 if (!broken_loop)
5086 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5087 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5089 exit_bb = region->exit;
5091 /* Iteration space partitioning goes in ENTRY_BB. */
5092 gsi = gsi_last_bb (entry_bb);
5093 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5095 if (fd->collapse > 1)
5097 int first_zero_iter = -1, dummy = -1;
5098 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5100 counts = XALLOCAVEC (tree, fd->collapse);
5101 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5102 fin_bb, first_zero_iter,
5103 dummy_bb, dummy, l2_dom_bb);
5104 t = NULL_TREE;
5106 else
5107 t = integer_one_node;
5109 step = fd->loop.step;
5110 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5111 OMP_CLAUSE__LOOPTEMP_);
5112 gcc_assert (innerc);
5113 n1 = OMP_CLAUSE_DECL (innerc);
5114 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5115 gcc_assert (innerc);
5116 n2 = OMP_CLAUSE_DECL (innerc);
5117 if (bias)
5119 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5120 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5122 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5123 true, NULL_TREE, true, GSI_SAME_STMT);
5124 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5125 true, NULL_TREE, true, GSI_SAME_STMT);
5126 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5127 true, NULL_TREE, true, GSI_SAME_STMT);
5129 tree startvar = fd->loop.v;
5130 tree endvar = NULL_TREE;
5132 if (gimple_omp_for_combined_p (fd->for_stmt))
5134 tree clauses = gimple_omp_for_clauses (inner_stmt);
5135 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5136 gcc_assert (innerc);
5137 startvar = OMP_CLAUSE_DECL (innerc);
5138 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5139 OMP_CLAUSE__LOOPTEMP_);
5140 gcc_assert (innerc);
5141 endvar = OMP_CLAUSE_DECL (innerc);
5143 t = fold_convert (TREE_TYPE (startvar), n1);
5144 t = force_gimple_operand_gsi (&gsi, t,
5145 DECL_P (startvar)
5146 && TREE_ADDRESSABLE (startvar),
5147 NULL_TREE, false, GSI_CONTINUE_LINKING);
5148 gimple *assign_stmt = gimple_build_assign (startvar, t);
5149 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5151 t = fold_convert (TREE_TYPE (startvar), n2);
5152 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5153 false, GSI_CONTINUE_LINKING);
5154 if (endvar)
5156 assign_stmt = gimple_build_assign (endvar, e);
5157 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5158 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5159 assign_stmt = gimple_build_assign (fd->loop.v, e);
5160 else
5161 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5162 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5164 if (fd->collapse > 1)
5165 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5167 if (!broken_loop)
5169 /* The code controlling the sequential loop replaces the
5170 GIMPLE_OMP_CONTINUE. */
5171 gsi = gsi_last_bb (cont_bb);
5172 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5173 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5174 vmain = gimple_omp_continue_control_use (cont_stmt);
5175 vback = gimple_omp_continue_control_def (cont_stmt);
5177 if (!gimple_omp_for_combined_p (fd->for_stmt))
5179 if (POINTER_TYPE_P (type))
5180 t = fold_build_pointer_plus (vmain, step);
5181 else
5182 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5183 t = force_gimple_operand_gsi (&gsi, t,
5184 DECL_P (vback)
5185 && TREE_ADDRESSABLE (vback),
5186 NULL_TREE, true, GSI_SAME_STMT);
5187 assign_stmt = gimple_build_assign (vback, t);
5188 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5190 t = build2 (fd->loop.cond_code, boolean_type_node,
5191 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5192 ? t : vback, e);
5193 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5196 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5197 gsi_remove (&gsi, true);
5199 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5200 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5203 /* Remove the GIMPLE_OMP_FOR statement. */
5204 gsi = gsi_for_stmt (fd->for_stmt);
5205 gsi_remove (&gsi, true);
5207 /* Remove the GIMPLE_OMP_RETURN statement. */
5208 gsi = gsi_last_bb (exit_bb);
5209 gsi_remove (&gsi, true);
5211 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5212 if (!broken_loop)
5213 remove_edge (BRANCH_EDGE (entry_bb));
5214 else
5216 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5217 region->outer->cont = NULL;
5220 /* Connect all the blocks. */
5221 if (!broken_loop)
5223 ep = find_edge (cont_bb, body_bb);
5224 if (gimple_omp_for_combined_p (fd->for_stmt))
5226 remove_edge (ep);
5227 ep = NULL;
5229 else if (fd->collapse > 1)
5231 remove_edge (ep);
5232 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5234 else
5235 ep->flags = EDGE_TRUE_VALUE;
5236 find_edge (cont_bb, fin_bb)->flags
5237 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5240 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5241 recompute_dominator (CDI_DOMINATORS, body_bb));
5242 if (!broken_loop)
5243 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5244 recompute_dominator (CDI_DOMINATORS, fin_bb));
5246 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5248 struct loop *loop = alloc_loop ();
5249 loop->header = body_bb;
5250 if (collapse_bb == NULL)
5251 loop->latch = cont_bb;
5252 add_loop (loop, body_bb->loop_father);
5256 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5257 partitioned loop. The lowering here is abstracted, in that the
5258 loop parameters are passed through internal functions, which are
5259 further lowered by oacc_device_lower, once we get to the target
5260 compiler. The loop is of the form:
5262 for (V = B; V LTGT E; V += S) {BODY}
5264 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5265 (constant 0 for no chunking) and we will have a GWV partitioning
5266 mask, specifying dimensions over which the loop is to be
5267 partitioned (see note below). We generate code that looks like
5268 (this ignores tiling):
5270 <entry_bb> [incoming FALL->body, BRANCH->exit]
5271 typedef signedintify (typeof (V)) T; // underlying signed integral type
5272 T range = E - B;
5273 T chunk_no = 0;
5274 T DIR = LTGT == '<' ? +1 : -1;
5275 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5276 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5278 <head_bb> [created by splitting end of entry_bb]
5279 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5280 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5281 if (!(offset LTGT bound)) goto bottom_bb;
5283 <body_bb> [incoming]
5284 V = B + offset;
5285 {BODY}
5287 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5288 offset += step;
5289 if (offset LTGT bound) goto body_bb; [*]
5291 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5292 chunk_no++;
5293 if (chunk < chunk_max) goto head_bb;
5295 <exit_bb> [incoming]
5296 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5298 [*] Needed if V live at end of loop. */
5300 static void
5301 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5303 tree v = fd->loop.v;
5304 enum tree_code cond_code = fd->loop.cond_code;
5305 enum tree_code plus_code = PLUS_EXPR;
5307 tree chunk_size = integer_minus_one_node;
5308 tree gwv = integer_zero_node;
5309 tree iter_type = TREE_TYPE (v);
5310 tree diff_type = iter_type;
5311 tree plus_type = iter_type;
5312 struct oacc_collapse *counts = NULL;
5314 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5315 == GF_OMP_FOR_KIND_OACC_LOOP);
5316 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5317 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5319 if (POINTER_TYPE_P (iter_type))
5321 plus_code = POINTER_PLUS_EXPR;
5322 plus_type = sizetype;
5324 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5325 diff_type = signed_type_for (diff_type);
5327 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5328 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5329 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5330 basic_block bottom_bb = NULL;
5332 /* entry_bb has two sucessors; the branch edge is to the exit
5333 block, fallthrough edge to body. */
5334 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5335 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5337 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5338 body_bb, or to a block whose only successor is the body_bb. Its
5339 fallthrough successor is the final block (same as the branch
5340 successor of the entry_bb). */
5341 if (cont_bb)
5343 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5344 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5346 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5347 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5349 else
5350 gcc_assert (!gimple_in_ssa_p (cfun));
5352 /* The exit block only has entry_bb and cont_bb as predecessors. */
5353 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5355 tree chunk_no;
5356 tree chunk_max = NULL_TREE;
5357 tree bound, offset;
5358 tree step = create_tmp_var (diff_type, ".step");
5359 bool up = cond_code == LT_EXPR;
5360 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5361 bool chunking = !gimple_in_ssa_p (cfun);
5362 bool negating;
5364 /* Tiling vars. */
5365 tree tile_size = NULL_TREE;
5366 tree element_s = NULL_TREE;
5367 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5368 basic_block elem_body_bb = NULL;
5369 basic_block elem_cont_bb = NULL;
5371 /* SSA instances. */
5372 tree offset_incr = NULL_TREE;
5373 tree offset_init = NULL_TREE;
5375 gimple_stmt_iterator gsi;
5376 gassign *ass;
5377 gcall *call;
5378 gimple *stmt;
5379 tree expr;
5380 location_t loc;
5381 edge split, be, fte;
5383 /* Split the end of entry_bb to create head_bb. */
5384 split = split_block (entry_bb, last_stmt (entry_bb));
5385 basic_block head_bb = split->dest;
5386 entry_bb = split->src;
5388 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5389 gsi = gsi_last_bb (entry_bb);
5390 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5391 loc = gimple_location (for_stmt);
5393 if (gimple_in_ssa_p (cfun))
5395 offset_init = gimple_omp_for_index (for_stmt, 0);
5396 gcc_assert (integer_zerop (fd->loop.n1));
5397 /* The SSA parallelizer does gang parallelism. */
5398 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5401 if (fd->collapse > 1 || fd->tiling)
5403 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5404 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5405 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5406 TREE_TYPE (fd->loop.n2), loc);
5408 if (SSA_VAR_P (fd->loop.n2))
5410 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5411 true, GSI_SAME_STMT);
5412 ass = gimple_build_assign (fd->loop.n2, total);
5413 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5417 tree b = fd->loop.n1;
5418 tree e = fd->loop.n2;
5419 tree s = fd->loop.step;
5421 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5422 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5424 /* Convert the step, avoiding possible unsigned->signed overflow. */
5425 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5426 if (negating)
5427 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5428 s = fold_convert (diff_type, s);
5429 if (negating)
5430 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5431 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5433 if (!chunking)
5434 chunk_size = integer_zero_node;
5435 expr = fold_convert (diff_type, chunk_size);
5436 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5437 NULL_TREE, true, GSI_SAME_STMT);
5439 if (fd->tiling)
5441 /* Determine the tile size and element step,
5442 modify the outer loop step size. */
5443 tile_size = create_tmp_var (diff_type, ".tile_size");
5444 expr = build_int_cst (diff_type, 1);
5445 for (int ix = 0; ix < fd->collapse; ix++)
5446 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5447 expr = force_gimple_operand_gsi (&gsi, expr, true,
5448 NULL_TREE, true, GSI_SAME_STMT);
5449 ass = gimple_build_assign (tile_size, expr);
5450 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5452 element_s = create_tmp_var (diff_type, ".element_s");
5453 ass = gimple_build_assign (element_s, s);
5454 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5456 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5457 s = force_gimple_operand_gsi (&gsi, expr, true,
5458 NULL_TREE, true, GSI_SAME_STMT);
5461 /* Determine the range, avoiding possible unsigned->signed overflow. */
5462 negating = !up && TYPE_UNSIGNED (iter_type);
5463 expr = fold_build2 (MINUS_EXPR, plus_type,
5464 fold_convert (plus_type, negating ? b : e),
5465 fold_convert (plus_type, negating ? e : b));
5466 expr = fold_convert (diff_type, expr);
5467 if (negating)
5468 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5469 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5470 NULL_TREE, true, GSI_SAME_STMT);
5472 chunk_no = build_int_cst (diff_type, 0);
5473 if (chunking)
5475 gcc_assert (!gimple_in_ssa_p (cfun));
5477 expr = chunk_no;
5478 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5479 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5481 ass = gimple_build_assign (chunk_no, expr);
5482 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5484 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5485 build_int_cst (integer_type_node,
5486 IFN_GOACC_LOOP_CHUNKS),
5487 dir, range, s, chunk_size, gwv);
5488 gimple_call_set_lhs (call, chunk_max);
5489 gimple_set_location (call, loc);
5490 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5492 else
5493 chunk_size = chunk_no;
5495 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5496 build_int_cst (integer_type_node,
5497 IFN_GOACC_LOOP_STEP),
5498 dir, range, s, chunk_size, gwv);
5499 gimple_call_set_lhs (call, step);
5500 gimple_set_location (call, loc);
5501 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5503 /* Remove the GIMPLE_OMP_FOR. */
5504 gsi_remove (&gsi, true);
5506 /* Fixup edges from head_bb. */
5507 be = BRANCH_EDGE (head_bb);
5508 fte = FALLTHRU_EDGE (head_bb);
5509 be->flags |= EDGE_FALSE_VALUE;
5510 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5512 basic_block body_bb = fte->dest;
5514 if (gimple_in_ssa_p (cfun))
5516 gsi = gsi_last_bb (cont_bb);
5517 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5519 offset = gimple_omp_continue_control_use (cont_stmt);
5520 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5522 else
5524 offset = create_tmp_var (diff_type, ".offset");
5525 offset_init = offset_incr = offset;
5527 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5529 /* Loop offset & bound go into head_bb. */
5530 gsi = gsi_start_bb (head_bb);
5532 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5533 build_int_cst (integer_type_node,
5534 IFN_GOACC_LOOP_OFFSET),
5535 dir, range, s,
5536 chunk_size, gwv, chunk_no);
5537 gimple_call_set_lhs (call, offset_init);
5538 gimple_set_location (call, loc);
5539 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5541 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5542 build_int_cst (integer_type_node,
5543 IFN_GOACC_LOOP_BOUND),
5544 dir, range, s,
5545 chunk_size, gwv, offset_init);
5546 gimple_call_set_lhs (call, bound);
5547 gimple_set_location (call, loc);
5548 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5550 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5551 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5552 GSI_CONTINUE_LINKING);
5554 /* V assignment goes into body_bb. */
5555 if (!gimple_in_ssa_p (cfun))
5557 gsi = gsi_start_bb (body_bb);
5559 expr = build2 (plus_code, iter_type, b,
5560 fold_convert (plus_type, offset));
5561 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5562 true, GSI_SAME_STMT);
5563 ass = gimple_build_assign (v, expr);
5564 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5566 if (fd->collapse > 1 || fd->tiling)
5567 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5569 if (fd->tiling)
5571 /* Determine the range of the element loop -- usually simply
5572 the tile_size, but could be smaller if the final
5573 iteration of the outer loop is a partial tile. */
5574 tree e_range = create_tmp_var (diff_type, ".e_range");
5576 expr = build2 (MIN_EXPR, diff_type,
5577 build2 (MINUS_EXPR, diff_type, bound, offset),
5578 build2 (MULT_EXPR, diff_type, tile_size,
5579 element_s));
5580 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5581 true, GSI_SAME_STMT);
5582 ass = gimple_build_assign (e_range, expr);
5583 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5585 /* Determine bound, offset & step of inner loop. */
5586 e_bound = create_tmp_var (diff_type, ".e_bound");
5587 e_offset = create_tmp_var (diff_type, ".e_offset");
5588 e_step = create_tmp_var (diff_type, ".e_step");
5590 /* Mark these as element loops. */
5591 tree t, e_gwv = integer_minus_one_node;
5592 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5594 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5595 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5596 element_s, chunk, e_gwv, chunk);
5597 gimple_call_set_lhs (call, e_offset);
5598 gimple_set_location (call, loc);
5599 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5601 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5602 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5603 element_s, chunk, e_gwv, e_offset);
5604 gimple_call_set_lhs (call, e_bound);
5605 gimple_set_location (call, loc);
5606 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5608 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5609 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5610 element_s, chunk, e_gwv);
5611 gimple_call_set_lhs (call, e_step);
5612 gimple_set_location (call, loc);
5613 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5615 /* Add test and split block. */
5616 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5617 stmt = gimple_build_cond_empty (expr);
5618 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5619 split = split_block (body_bb, stmt);
5620 elem_body_bb = split->dest;
5621 if (cont_bb == body_bb)
5622 cont_bb = elem_body_bb;
5623 body_bb = split->src;
5625 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5627 /* Initialize the user's loop vars. */
5628 gsi = gsi_start_bb (elem_body_bb);
5629 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5633 /* Loop increment goes into cont_bb. If this is not a loop, we
5634 will have spawned threads as if it was, and each one will
5635 execute one iteration. The specification is not explicit about
5636 whether such constructs are ill-formed or not, and they can
5637 occur, especially when noreturn routines are involved. */
5638 if (cont_bb)
5640 gsi = gsi_last_bb (cont_bb);
5641 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5642 loc = gimple_location (cont_stmt);
5644 if (fd->tiling)
5646 /* Insert element loop increment and test. */
5647 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5648 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5649 true, GSI_SAME_STMT);
5650 ass = gimple_build_assign (e_offset, expr);
5651 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5652 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5654 stmt = gimple_build_cond_empty (expr);
5655 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5656 split = split_block (cont_bb, stmt);
5657 elem_cont_bb = split->src;
5658 cont_bb = split->dest;
5660 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5661 make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5663 make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5665 gsi = gsi_for_stmt (cont_stmt);
5668 /* Increment offset. */
5669 if (gimple_in_ssa_p (cfun))
5670 expr = build2 (plus_code, iter_type, offset,
5671 fold_convert (plus_type, step));
5672 else
5673 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5674 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5675 true, GSI_SAME_STMT);
5676 ass = gimple_build_assign (offset_incr, expr);
5677 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5678 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5679 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5681 /* Remove the GIMPLE_OMP_CONTINUE. */
5682 gsi_remove (&gsi, true);
5684 /* Fixup edges from cont_bb. */
5685 be = BRANCH_EDGE (cont_bb);
5686 fte = FALLTHRU_EDGE (cont_bb);
5687 be->flags |= EDGE_TRUE_VALUE;
5688 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5690 if (chunking)
5692 /* Split the beginning of exit_bb to make bottom_bb. We
5693 need to insert a nop at the start, because splitting is
5694 after a stmt, not before. */
5695 gsi = gsi_start_bb (exit_bb);
5696 stmt = gimple_build_nop ();
5697 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5698 split = split_block (exit_bb, stmt);
5699 bottom_bb = split->src;
5700 exit_bb = split->dest;
5701 gsi = gsi_last_bb (bottom_bb);
5703 /* Chunk increment and test goes into bottom_bb. */
5704 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5705 build_int_cst (diff_type, 1));
5706 ass = gimple_build_assign (chunk_no, expr);
5707 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5709 /* Chunk test at end of bottom_bb. */
5710 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5711 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5712 GSI_CONTINUE_LINKING);
5714 /* Fixup edges from bottom_bb. */
5715 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5716 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5720 gsi = gsi_last_bb (exit_bb);
5721 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5722 loc = gimple_location (gsi_stmt (gsi));
5724 if (!gimple_in_ssa_p (cfun))
5726 /* Insert the final value of V, in case it is live. This is the
5727 value for the only thread that survives past the join. */
5728 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5729 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5730 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5731 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5732 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5733 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5734 true, GSI_SAME_STMT);
5735 ass = gimple_build_assign (v, expr);
5736 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5739 /* Remove the OMP_RETURN. */
5740 gsi_remove (&gsi, true);
5742 if (cont_bb)
5744 /* We now have one, two or three nested loops. Update the loop
5745 structures. */
5746 struct loop *parent = entry_bb->loop_father;
5747 struct loop *body = body_bb->loop_father;
5749 if (chunking)
5751 struct loop *chunk_loop = alloc_loop ();
5752 chunk_loop->header = head_bb;
5753 chunk_loop->latch = bottom_bb;
5754 add_loop (chunk_loop, parent);
5755 parent = chunk_loop;
5757 else if (parent != body)
5759 gcc_assert (body->header == body_bb);
5760 gcc_assert (body->latch == cont_bb
5761 || single_pred (body->latch) == cont_bb);
5762 parent = NULL;
5765 if (parent)
5767 struct loop *body_loop = alloc_loop ();
5768 body_loop->header = body_bb;
5769 body_loop->latch = cont_bb;
5770 add_loop (body_loop, parent);
5772 if (fd->tiling)
5774 /* Insert tiling's element loop. */
5775 struct loop *inner_loop = alloc_loop ();
5776 inner_loop->header = elem_body_bb;
5777 inner_loop->latch = elem_cont_bb;
5778 add_loop (inner_loop, body_loop);
5784 /* Expand the OMP loop defined by REGION. */
5786 static void
5787 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5789 struct omp_for_data fd;
5790 struct omp_for_data_loop *loops;
5792 loops
5793 = (struct omp_for_data_loop *)
5794 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5795 * sizeof (struct omp_for_data_loop));
5796 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5797 &fd, loops);
5798 region->sched_kind = fd.sched_kind;
5799 region->sched_modifiers = fd.sched_modifiers;
5801 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5802 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5803 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5804 if (region->cont)
5806 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5807 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5808 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5810 else
5811 /* If there isn't a continue then this is a degerate case where
5812 the introduction of abnormal edges during lowering will prevent
5813 original loops from being detected. Fix that up. */
5814 loops_state_set (LOOPS_NEED_FIXUP);
5816 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5817 expand_omp_simd (region, &fd);
5818 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5819 expand_cilk_for (region, &fd);
5820 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5822 gcc_assert (!inner_stmt);
5823 expand_oacc_for (region, &fd);
5825 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5827 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5828 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5829 else
5830 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5832 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5833 && !fd.have_ordered)
5835 if (fd.chunk_size == NULL)
5836 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5837 else
5838 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5840 else
5842 int fn_index, start_ix, next_ix;
5844 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5845 == GF_OMP_FOR_KIND_FOR);
5846 if (fd.chunk_size == NULL
5847 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5848 fd.chunk_size = integer_zero_node;
5849 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5850 switch (fd.sched_kind)
5852 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5853 fn_index = 3;
5854 break;
5855 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5856 case OMP_CLAUSE_SCHEDULE_GUIDED:
5857 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5858 && !fd.ordered
5859 && !fd.have_ordered)
5861 fn_index = 3 + fd.sched_kind;
5862 break;
5864 /* FALLTHRU */
5865 default:
5866 fn_index = fd.sched_kind;
5867 break;
5869 if (!fd.ordered)
5870 fn_index += fd.have_ordered * 6;
5871 if (fd.ordered)
5872 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5873 else
5874 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5875 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5876 if (fd.iter_type == long_long_unsigned_type_node)
5878 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5879 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5880 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5881 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5883 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5884 (enum built_in_function) next_ix, inner_stmt);
5887 if (gimple_in_ssa_p (cfun))
5888 update_ssa (TODO_update_ssa_only_virtuals);
5891 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5893 v = GOMP_sections_start (n);
5895 switch (v)
5897 case 0:
5898 goto L2;
5899 case 1:
5900 section 1;
5901 goto L1;
5902 case 2:
5904 case n:
5906 default:
5907 abort ();
5910 v = GOMP_sections_next ();
5911 goto L0;
5913 reduction;
5915 If this is a combined parallel sections, replace the call to
5916 GOMP_sections_start with call to GOMP_sections_next. */
5918 static void
5919 expand_omp_sections (struct omp_region *region)
5921 tree t, u, vin = NULL, vmain, vnext, l2;
5922 unsigned len;
5923 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5924 gimple_stmt_iterator si, switch_si;
5925 gomp_sections *sections_stmt;
5926 gimple *stmt;
5927 gomp_continue *cont;
5928 edge_iterator ei;
5929 edge e;
5930 struct omp_region *inner;
5931 unsigned i, casei;
5932 bool exit_reachable = region->cont != NULL;
5934 gcc_assert (region->exit != NULL);
5935 entry_bb = region->entry;
5936 l0_bb = single_succ (entry_bb);
5937 l1_bb = region->cont;
5938 l2_bb = region->exit;
5939 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5940 l2 = gimple_block_label (l2_bb);
5941 else
5943 /* This can happen if there are reductions. */
5944 len = EDGE_COUNT (l0_bb->succs);
5945 gcc_assert (len > 0);
5946 e = EDGE_SUCC (l0_bb, len - 1);
5947 si = gsi_last_bb (e->dest);
5948 l2 = NULL_TREE;
5949 if (gsi_end_p (si)
5950 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5951 l2 = gimple_block_label (e->dest);
5952 else
5953 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5955 si = gsi_last_bb (e->dest);
5956 if (gsi_end_p (si)
5957 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5959 l2 = gimple_block_label (e->dest);
5960 break;
5964 if (exit_reachable)
5965 default_bb = create_empty_bb (l1_bb->prev_bb);
5966 else
5967 default_bb = create_empty_bb (l0_bb);
5969 /* We will build a switch() with enough cases for all the
5970 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5971 and a default case to abort if something goes wrong. */
5972 len = EDGE_COUNT (l0_bb->succs);
5974 /* Use vec::quick_push on label_vec throughout, since we know the size
5975 in advance. */
5976 auto_vec<tree> label_vec (len);
5978 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5979 GIMPLE_OMP_SECTIONS statement. */
5980 si = gsi_last_bb (entry_bb);
5981 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5982 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5983 vin = gimple_omp_sections_control (sections_stmt);
5984 if (!is_combined_parallel (region))
5986 /* If we are not inside a combined parallel+sections region,
5987 call GOMP_sections_start. */
5988 t = build_int_cst (unsigned_type_node, len - 1);
5989 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5990 stmt = gimple_build_call (u, 1, t);
5992 else
5994 /* Otherwise, call GOMP_sections_next. */
5995 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5996 stmt = gimple_build_call (u, 0);
5998 gimple_call_set_lhs (stmt, vin);
5999 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6000 gsi_remove (&si, true);
6002 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6003 L0_BB. */
6004 switch_si = gsi_last_bb (l0_bb);
6005 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6006 if (exit_reachable)
6008 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6009 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6010 vmain = gimple_omp_continue_control_use (cont);
6011 vnext = gimple_omp_continue_control_def (cont);
6013 else
6015 vmain = vin;
6016 vnext = NULL_TREE;
6019 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6020 label_vec.quick_push (t);
6021 i = 1;
6023 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6024 for (inner = region->inner, casei = 1;
6025 inner;
6026 inner = inner->next, i++, casei++)
6028 basic_block s_entry_bb, s_exit_bb;
6030 /* Skip optional reduction region. */
6031 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6033 --i;
6034 --casei;
6035 continue;
6038 s_entry_bb = inner->entry;
6039 s_exit_bb = inner->exit;
6041 t = gimple_block_label (s_entry_bb);
6042 u = build_int_cst (unsigned_type_node, casei);
6043 u = build_case_label (u, NULL, t);
6044 label_vec.quick_push (u);
6046 si = gsi_last_bb (s_entry_bb);
6047 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6048 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6049 gsi_remove (&si, true);
6050 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6052 if (s_exit_bb == NULL)
6053 continue;
6055 si = gsi_last_bb (s_exit_bb);
6056 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6057 gsi_remove (&si, true);
6059 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6062 /* Error handling code goes in DEFAULT_BB. */
6063 t = gimple_block_label (default_bb);
6064 u = build_case_label (NULL, NULL, t);
6065 make_edge (l0_bb, default_bb, 0);
6066 add_bb_to_loop (default_bb, current_loops->tree_root);
6068 stmt = gimple_build_switch (vmain, u, label_vec);
6069 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6070 gsi_remove (&switch_si, true);
6072 si = gsi_start_bb (default_bb);
6073 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6074 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6076 if (exit_reachable)
6078 tree bfn_decl;
6080 /* Code to get the next section goes in L1_BB. */
6081 si = gsi_last_bb (l1_bb);
6082 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6084 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6085 stmt = gimple_build_call (bfn_decl, 0);
6086 gimple_call_set_lhs (stmt, vnext);
6087 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6088 gsi_remove (&si, true);
6090 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6093 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6094 si = gsi_last_bb (l2_bb);
6095 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6096 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6097 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6098 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6099 else
6100 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6101 stmt = gimple_build_call (t, 0);
6102 if (gimple_omp_return_lhs (gsi_stmt (si)))
6103 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6104 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6105 gsi_remove (&si, true);
6107 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6110 /* Expand code for an OpenMP single directive. We've already expanded
6111 much of the code, here we simply place the GOMP_barrier call. */
6113 static void
6114 expand_omp_single (struct omp_region *region)
6116 basic_block entry_bb, exit_bb;
6117 gimple_stmt_iterator si;
6119 entry_bb = region->entry;
6120 exit_bb = region->exit;
6122 si = gsi_last_bb (entry_bb);
6123 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6124 gsi_remove (&si, true);
6125 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6127 si = gsi_last_bb (exit_bb);
6128 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6130 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6131 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6133 gsi_remove (&si, true);
6134 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6137 /* Generic expansion for OpenMP synchronization directives: master,
6138 ordered and critical. All we need to do here is remove the entry
6139 and exit markers for REGION. */
6141 static void
6142 expand_omp_synch (struct omp_region *region)
6144 basic_block entry_bb, exit_bb;
6145 gimple_stmt_iterator si;
6147 entry_bb = region->entry;
6148 exit_bb = region->exit;
6150 si = gsi_last_bb (entry_bb);
6151 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6152 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6153 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6154 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6155 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6156 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6157 gsi_remove (&si, true);
6158 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6160 if (exit_bb)
6162 si = gsi_last_bb (exit_bb);
6163 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6164 gsi_remove (&si, true);
6165 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6169 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6170 operation as a normal volatile load. */
6172 static bool
6173 expand_omp_atomic_load (basic_block load_bb, tree addr,
6174 tree loaded_val, int index)
6176 enum built_in_function tmpbase;
6177 gimple_stmt_iterator gsi;
6178 basic_block store_bb;
6179 location_t loc;
6180 gimple *stmt;
6181 tree decl, call, type, itype;
6183 gsi = gsi_last_bb (load_bb);
6184 stmt = gsi_stmt (gsi);
6185 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6186 loc = gimple_location (stmt);
6188 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6189 is smaller than word size, then expand_atomic_load assumes that the load
6190 is atomic. We could avoid the builtin entirely in this case. */
6192 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6193 decl = builtin_decl_explicit (tmpbase);
6194 if (decl == NULL_TREE)
6195 return false;
6197 type = TREE_TYPE (loaded_val);
6198 itype = TREE_TYPE (TREE_TYPE (decl));
6200 call = build_call_expr_loc (loc, decl, 2, addr,
6201 build_int_cst (NULL,
6202 gimple_omp_atomic_seq_cst_p (stmt)
6203 ? MEMMODEL_SEQ_CST
6204 : MEMMODEL_RELAXED));
6205 if (!useless_type_conversion_p (type, itype))
6206 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6207 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6209 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6210 gsi_remove (&gsi, true);
6212 store_bb = single_succ (load_bb);
6213 gsi = gsi_last_bb (store_bb);
6214 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6215 gsi_remove (&gsi, true);
6217 if (gimple_in_ssa_p (cfun))
6218 update_ssa (TODO_update_ssa_no_phi);
6220 return true;
6223 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6224 operation as a normal volatile store. */
6226 static bool
6227 expand_omp_atomic_store (basic_block load_bb, tree addr,
6228 tree loaded_val, tree stored_val, int index)
6230 enum built_in_function tmpbase;
6231 gimple_stmt_iterator gsi;
6232 basic_block store_bb = single_succ (load_bb);
6233 location_t loc;
6234 gimple *stmt;
6235 tree decl, call, type, itype;
6236 machine_mode imode;
6237 bool exchange;
6239 gsi = gsi_last_bb (load_bb);
6240 stmt = gsi_stmt (gsi);
6241 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6243 /* If the load value is needed, then this isn't a store but an exchange. */
6244 exchange = gimple_omp_atomic_need_value_p (stmt);
6246 gsi = gsi_last_bb (store_bb);
6247 stmt = gsi_stmt (gsi);
6248 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6249 loc = gimple_location (stmt);
6251 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6252 is smaller than word size, then expand_atomic_store assumes that the store
6253 is atomic. We could avoid the builtin entirely in this case. */
6255 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6256 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6257 decl = builtin_decl_explicit (tmpbase);
6258 if (decl == NULL_TREE)
6259 return false;
6261 type = TREE_TYPE (stored_val);
6263 /* Dig out the type of the function's second argument. */
6264 itype = TREE_TYPE (decl);
6265 itype = TYPE_ARG_TYPES (itype);
6266 itype = TREE_CHAIN (itype);
6267 itype = TREE_VALUE (itype);
6268 imode = TYPE_MODE (itype);
6270 if (exchange && !can_atomic_exchange_p (imode, true))
6271 return false;
6273 if (!useless_type_conversion_p (itype, type))
6274 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6275 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6276 build_int_cst (NULL,
6277 gimple_omp_atomic_seq_cst_p (stmt)
6278 ? MEMMODEL_SEQ_CST
6279 : MEMMODEL_RELAXED));
6280 if (exchange)
6282 if (!useless_type_conversion_p (type, itype))
6283 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6284 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6287 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6288 gsi_remove (&gsi, true);
6290 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6291 gsi = gsi_last_bb (load_bb);
6292 gsi_remove (&gsi, true);
6294 if (gimple_in_ssa_p (cfun))
6295 update_ssa (TODO_update_ssa_no_phi);
6297 return true;
6300 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6301 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6302 size of the data type, and thus usable to find the index of the builtin
6303 decl. Returns false if the expression is not of the proper form. */
6305 static bool
6306 expand_omp_atomic_fetch_op (basic_block load_bb,
6307 tree addr, tree loaded_val,
6308 tree stored_val, int index)
6310 enum built_in_function oldbase, newbase, tmpbase;
6311 tree decl, itype, call;
6312 tree lhs, rhs;
6313 basic_block store_bb = single_succ (load_bb);
6314 gimple_stmt_iterator gsi;
6315 gimple *stmt;
6316 location_t loc;
6317 enum tree_code code;
6318 bool need_old, need_new;
6319 machine_mode imode;
6320 bool seq_cst;
6322 /* We expect to find the following sequences:
6324 load_bb:
6325 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6327 store_bb:
6328 val = tmp OP something; (or: something OP tmp)
6329 GIMPLE_OMP_STORE (val)
6331 ???FIXME: Allow a more flexible sequence.
6332 Perhaps use data flow to pick the statements.
6336 gsi = gsi_after_labels (store_bb);
6337 stmt = gsi_stmt (gsi);
6338 loc = gimple_location (stmt);
6339 if (!is_gimple_assign (stmt))
6340 return false;
6341 gsi_next (&gsi);
6342 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6343 return false;
6344 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6345 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6346 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6347 gcc_checking_assert (!need_old || !need_new);
6349 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6350 return false;
6352 /* Check for one of the supported fetch-op operations. */
6353 code = gimple_assign_rhs_code (stmt);
6354 switch (code)
6356 case PLUS_EXPR:
6357 case POINTER_PLUS_EXPR:
6358 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6359 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6360 break;
6361 case MINUS_EXPR:
6362 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6363 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6364 break;
6365 case BIT_AND_EXPR:
6366 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6367 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6368 break;
6369 case BIT_IOR_EXPR:
6370 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6371 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6372 break;
6373 case BIT_XOR_EXPR:
6374 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6375 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6376 break;
6377 default:
6378 return false;
6381 /* Make sure the expression is of the proper form. */
6382 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6383 rhs = gimple_assign_rhs2 (stmt);
6384 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6385 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6386 rhs = gimple_assign_rhs1 (stmt);
6387 else
6388 return false;
6390 tmpbase = ((enum built_in_function)
6391 ((need_new ? newbase : oldbase) + index + 1));
6392 decl = builtin_decl_explicit (tmpbase);
6393 if (decl == NULL_TREE)
6394 return false;
6395 itype = TREE_TYPE (TREE_TYPE (decl));
6396 imode = TYPE_MODE (itype);
6398 /* We could test all of the various optabs involved, but the fact of the
6399 matter is that (with the exception of i486 vs i586 and xadd) all targets
6400 that support any atomic operaton optab also implements compare-and-swap.
6401 Let optabs.c take care of expanding any compare-and-swap loop. */
6402 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6403 return false;
6405 gsi = gsi_last_bb (load_bb);
6406 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6408 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6409 It only requires that the operation happen atomically. Thus we can
6410 use the RELAXED memory model. */
6411 call = build_call_expr_loc (loc, decl, 3, addr,
6412 fold_convert_loc (loc, itype, rhs),
6413 build_int_cst (NULL,
6414 seq_cst ? MEMMODEL_SEQ_CST
6415 : MEMMODEL_RELAXED));
6417 if (need_old || need_new)
6419 lhs = need_old ? loaded_val : stored_val;
6420 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6421 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6423 else
6424 call = fold_convert_loc (loc, void_type_node, call);
6425 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6426 gsi_remove (&gsi, true);
6428 gsi = gsi_last_bb (store_bb);
6429 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6430 gsi_remove (&gsi, true);
6431 gsi = gsi_last_bb (store_bb);
6432 stmt = gsi_stmt (gsi);
6433 gsi_remove (&gsi, true);
6435 if (gimple_in_ssa_p (cfun))
6437 release_defs (stmt);
6438 update_ssa (TODO_update_ssa_no_phi);
6441 return true;
6444 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6446 oldval = *addr;
6447 repeat:
6448 newval = rhs; // with oldval replacing *addr in rhs
6449 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6450 if (oldval != newval)
6451 goto repeat;
6453 INDEX is log2 of the size of the data type, and thus usable to find the
6454 index of the builtin decl. */
6456 static bool
6457 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6458 tree addr, tree loaded_val, tree stored_val,
6459 int index)
6461 tree loadedi, storedi, initial, new_storedi, old_vali;
6462 tree type, itype, cmpxchg, iaddr;
6463 gimple_stmt_iterator si;
6464 basic_block loop_header = single_succ (load_bb);
6465 gimple *phi, *stmt;
6466 edge e;
6467 enum built_in_function fncode;
6469 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6470 order to use the RELAXED memory model effectively. */
6471 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6472 + index + 1);
6473 cmpxchg = builtin_decl_explicit (fncode);
6474 if (cmpxchg == NULL_TREE)
6475 return false;
6476 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6477 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6479 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6480 || !can_atomic_load_p (TYPE_MODE (itype)))
6481 return false;
6483 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6484 si = gsi_last_bb (load_bb);
6485 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6487 /* For floating-point values, we'll need to view-convert them to integers
6488 so that we can perform the atomic compare and swap. Simplify the
6489 following code by always setting up the "i"ntegral variables. */
6490 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6492 tree iaddr_val;
6494 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6495 true));
6496 iaddr_val
6497 = force_gimple_operand_gsi (&si,
6498 fold_convert (TREE_TYPE (iaddr), addr),
6499 false, NULL_TREE, true, GSI_SAME_STMT);
6500 stmt = gimple_build_assign (iaddr, iaddr_val);
6501 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6502 loadedi = create_tmp_var (itype);
6503 if (gimple_in_ssa_p (cfun))
6504 loadedi = make_ssa_name (loadedi);
6506 else
6508 iaddr = addr;
6509 loadedi = loaded_val;
6512 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6513 tree loaddecl = builtin_decl_explicit (fncode);
6514 if (loaddecl)
6515 initial
6516 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6517 build_call_expr (loaddecl, 2, iaddr,
6518 build_int_cst (NULL_TREE,
6519 MEMMODEL_RELAXED)));
6520 else
6521 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6522 build_int_cst (TREE_TYPE (iaddr), 0));
6524 initial
6525 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6526 GSI_SAME_STMT);
6528 /* Move the value to the LOADEDI temporary. */
6529 if (gimple_in_ssa_p (cfun))
6531 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6532 phi = create_phi_node (loadedi, loop_header);
6533 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6534 initial);
6536 else
6537 gsi_insert_before (&si,
6538 gimple_build_assign (loadedi, initial),
6539 GSI_SAME_STMT);
6540 if (loadedi != loaded_val)
6542 gimple_stmt_iterator gsi2;
6543 tree x;
6545 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6546 gsi2 = gsi_start_bb (loop_header);
6547 if (gimple_in_ssa_p (cfun))
6549 gassign *stmt;
6550 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6551 true, GSI_SAME_STMT);
6552 stmt = gimple_build_assign (loaded_val, x);
6553 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6555 else
6557 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6558 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6559 true, GSI_SAME_STMT);
6562 gsi_remove (&si, true);
6564 si = gsi_last_bb (store_bb);
6565 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6567 if (iaddr == addr)
6568 storedi = stored_val;
6569 else
6570 storedi
6571 = force_gimple_operand_gsi (&si,
6572 build1 (VIEW_CONVERT_EXPR, itype,
6573 stored_val), true, NULL_TREE, true,
6574 GSI_SAME_STMT);
6576 /* Build the compare&swap statement. */
6577 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6578 new_storedi = force_gimple_operand_gsi (&si,
6579 fold_convert (TREE_TYPE (loadedi),
6580 new_storedi),
6581 true, NULL_TREE,
6582 true, GSI_SAME_STMT);
6584 if (gimple_in_ssa_p (cfun))
6585 old_vali = loadedi;
6586 else
6588 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6589 stmt = gimple_build_assign (old_vali, loadedi);
6590 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6592 stmt = gimple_build_assign (loadedi, new_storedi);
6593 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6596 /* Note that we always perform the comparison as an integer, even for
6597 floating point. This allows the atomic operation to properly
6598 succeed even with NaNs and -0.0. */
6599 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6600 stmt = gimple_build_cond_empty (ne);
6601 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6603 /* Update cfg. */
6604 e = single_succ_edge (store_bb);
6605 e->flags &= ~EDGE_FALLTHRU;
6606 e->flags |= EDGE_FALSE_VALUE;
6608 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6610 /* Copy the new value to loadedi (we already did that before the condition
6611 if we are not in SSA). */
6612 if (gimple_in_ssa_p (cfun))
6614 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6615 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6618 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6619 gsi_remove (&si, true);
6621 struct loop *loop = alloc_loop ();
6622 loop->header = loop_header;
6623 loop->latch = store_bb;
6624 add_loop (loop, loop_header->loop_father);
6626 if (gimple_in_ssa_p (cfun))
6627 update_ssa (TODO_update_ssa_no_phi);
6629 return true;
6632 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6634 GOMP_atomic_start ();
6635 *addr = rhs;
6636 GOMP_atomic_end ();
6638 The result is not globally atomic, but works so long as all parallel
6639 references are within #pragma omp atomic directives. According to
6640 responses received from omp@openmp.org, appears to be within spec.
6641 Which makes sense, since that's how several other compilers handle
6642 this situation as well.
6643 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6644 expanding. STORED_VAL is the operand of the matching
6645 GIMPLE_OMP_ATOMIC_STORE.
6647 We replace
6648 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6649 loaded_val = *addr;
6651 and replace
6652 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6653 *addr = stored_val;
6656 static bool
6657 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6658 tree addr, tree loaded_val, tree stored_val)
6660 gimple_stmt_iterator si;
6661 gassign *stmt;
6662 tree t;
6664 si = gsi_last_bb (load_bb);
6665 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6667 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6668 t = build_call_expr (t, 0);
6669 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6671 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6672 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6673 gsi_remove (&si, true);
6675 si = gsi_last_bb (store_bb);
6676 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6678 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6679 stored_val);
6680 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6682 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6683 t = build_call_expr (t, 0);
6684 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6685 gsi_remove (&si, true);
6687 if (gimple_in_ssa_p (cfun))
6688 update_ssa (TODO_update_ssa_no_phi);
6689 return true;
6692 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6693 using expand_omp_atomic_fetch_op. If it failed, we try to
6694 call expand_omp_atomic_pipeline, and if it fails too, the
6695 ultimate fallback is wrapping the operation in a mutex
6696 (expand_omp_atomic_mutex). REGION is the atomic region built
6697 by build_omp_regions_1(). */
6699 static void
6700 expand_omp_atomic (struct omp_region *region)
6702 basic_block load_bb = region->entry, store_bb = region->exit;
6703 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6704 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6705 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6706 tree addr = gimple_omp_atomic_load_rhs (load);
6707 tree stored_val = gimple_omp_atomic_store_val (store);
6708 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6709 HOST_WIDE_INT index;
6711 /* Make sure the type is one of the supported sizes. */
6712 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6713 index = exact_log2 (index);
6714 if (index >= 0 && index <= 4)
6716 unsigned int align = TYPE_ALIGN_UNIT (type);
6718 /* __sync builtins require strict data alignment. */
6719 if (exact_log2 (align) >= index)
6721 /* Atomic load. */
6722 if (loaded_val == stored_val
6723 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6724 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6725 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6726 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6727 return;
6729 /* Atomic store. */
6730 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6731 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6732 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6733 && store_bb == single_succ (load_bb)
6734 && first_stmt (store_bb) == store
6735 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6736 stored_val, index))
6737 return;
6739 /* When possible, use specialized atomic update functions. */
6740 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6741 && store_bb == single_succ (load_bb)
6742 && expand_omp_atomic_fetch_op (load_bb, addr,
6743 loaded_val, stored_val, index))
6744 return;
6746 /* If we don't have specialized __sync builtins, try and implement
6747 as a compare and swap loop. */
6748 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6749 loaded_val, stored_val, index))
6750 return;
6754 /* The ultimate fallback is wrapping the operation in a mutex. */
6755 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6758 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6759 at REGION_EXIT. */
6761 static void
6762 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6763 basic_block region_exit)
6765 struct loop *outer = region_entry->loop_father;
6766 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6768 /* Don't parallelize the kernels region if it contains more than one outer
6769 loop. */
6770 unsigned int nr_outer_loops = 0;
6771 struct loop *single_outer = NULL;
6772 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6774 gcc_assert (loop_outer (loop) == outer);
6776 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6777 continue;
6779 if (region_exit != NULL
6780 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6781 continue;
6783 nr_outer_loops++;
6784 single_outer = loop;
6786 if (nr_outer_loops != 1)
6787 return;
6789 for (struct loop *loop = single_outer->inner;
6790 loop != NULL;
6791 loop = loop->inner)
6792 if (loop->next)
6793 return;
6795 /* Mark the loops in the region. */
6796 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6797 loop->in_oacc_kernels_region = true;
6800 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6802 struct GTY(()) grid_launch_attributes_trees
6804 tree kernel_dim_array_type;
6805 tree kernel_lattrs_dimnum_decl;
6806 tree kernel_lattrs_grid_decl;
6807 tree kernel_lattrs_group_decl;
6808 tree kernel_launch_attributes_type;
6811 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6813 /* Create types used to pass kernel launch attributes to target. */
6815 static void
6816 grid_create_kernel_launch_attr_types (void)
6818 if (grid_attr_trees)
6819 return;
6820 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6822 tree dim_arr_index_type
6823 = build_index_type (build_int_cst (integer_type_node, 2));
6824 grid_attr_trees->kernel_dim_array_type
6825 = build_array_type (uint32_type_node, dim_arr_index_type);
6827 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6828 grid_attr_trees->kernel_lattrs_dimnum_decl
6829 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6830 uint32_type_node);
6831 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6833 grid_attr_trees->kernel_lattrs_grid_decl
6834 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6835 grid_attr_trees->kernel_dim_array_type);
6836 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6837 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6838 grid_attr_trees->kernel_lattrs_group_decl
6839 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6840 grid_attr_trees->kernel_dim_array_type);
6841 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6842 = grid_attr_trees->kernel_lattrs_grid_decl;
6843 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6844 "__gomp_kernel_launch_attributes",
6845 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6848 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6849 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6850 of type uint32_type_node. */
6852 static void
6853 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6854 tree fld_decl, int index, tree value)
6856 tree ref = build4 (ARRAY_REF, uint32_type_node,
6857 build3 (COMPONENT_REF,
6858 grid_attr_trees->kernel_dim_array_type,
6859 range_var, fld_decl, NULL_TREE),
6860 build_int_cst (integer_type_node, index),
6861 NULL_TREE, NULL_TREE);
6862 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6865 /* Return a tree representation of a pointer to a structure with grid and
6866 work-group size information. Statements filling that information will be
6867 inserted before GSI, TGT_STMT is the target statement which has the
6868 necessary information in it. */
6870 static tree
6871 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6872 gomp_target *tgt_stmt)
6874 grid_create_kernel_launch_attr_types ();
6875 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6876 "__kernel_launch_attrs");
6878 unsigned max_dim = 0;
6879 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6880 clause;
6881 clause = OMP_CLAUSE_CHAIN (clause))
6883 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6884 continue;
6886 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6887 max_dim = MAX (dim, max_dim);
6889 grid_insert_store_range_dim (gsi, lattrs,
6890 grid_attr_trees->kernel_lattrs_grid_decl,
6891 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6892 grid_insert_store_range_dim (gsi, lattrs,
6893 grid_attr_trees->kernel_lattrs_group_decl,
6894 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6897 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6898 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6899 gcc_checking_assert (max_dim <= 2);
6900 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6901 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6902 GSI_SAME_STMT);
6903 TREE_ADDRESSABLE (lattrs) = 1;
6904 return build_fold_addr_expr (lattrs);
6907 /* Build target argument identifier from the DEVICE identifier, value
6908 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6910 static tree
6911 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6913 tree t = build_int_cst (integer_type_node, device);
6914 if (subseqent_param)
6915 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6916 build_int_cst (integer_type_node,
6917 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6918 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6919 build_int_cst (integer_type_node, id));
6920 return t;
6923 /* Like above but return it in type that can be directly stored as an element
6924 of the argument array. */
6926 static tree
6927 get_target_argument_identifier (int device, bool subseqent_param, int id)
6929 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6930 return fold_convert (ptr_type_node, t);
6933 /* Return a target argument consisting of DEVICE identifier, value identifier
6934 ID, and the actual VALUE. */
6936 static tree
6937 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6938 tree value)
6940 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6941 fold_convert (integer_type_node, value),
6942 build_int_cst (unsigned_type_node,
6943 GOMP_TARGET_ARG_VALUE_SHIFT));
6944 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6945 get_target_argument_identifier_1 (device, false, id));
6946 t = fold_convert (ptr_type_node, t);
6947 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6950 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6951 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6952 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6953 arguments. */
6955 static void
6956 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6957 int id, tree value, vec <tree> *args)
6959 if (tree_fits_shwi_p (value)
6960 && tree_to_shwi (value) > -(1 << 15)
6961 && tree_to_shwi (value) < (1 << 15))
6962 args->quick_push (get_target_argument_value (gsi, device, id, value));
6963 else
6965 args->quick_push (get_target_argument_identifier (device, true, id));
6966 value = fold_convert (ptr_type_node, value);
6967 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6968 GSI_SAME_STMT);
6969 args->quick_push (value);
6973 /* Create an array of arguments that is then passed to GOMP_target. */
6975 static tree
6976 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6978 auto_vec <tree, 6> args;
6979 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6980 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6981 if (c)
6982 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6983 else
6984 t = integer_minus_one_node;
6985 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6986 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6988 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6989 if (c)
6990 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6991 else
6992 t = integer_minus_one_node;
6993 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6994 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6995 &args);
6997 /* Add HSA-specific grid sizes, if available. */
6998 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6999 OMP_CLAUSE__GRIDDIM_))
7001 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7002 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7003 args.quick_push (t);
7004 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7007 /* Produce more, perhaps device specific, arguments here. */
7009 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7010 args.length () + 1),
7011 ".omp_target_args");
7012 for (unsigned i = 0; i < args.length (); i++)
7014 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7015 build_int_cst (integer_type_node, i),
7016 NULL_TREE, NULL_TREE);
7017 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7018 GSI_SAME_STMT);
7020 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7021 build_int_cst (integer_type_node, args.length ()),
7022 NULL_TREE, NULL_TREE);
7023 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7024 GSI_SAME_STMT);
7025 TREE_ADDRESSABLE (argarray) = 1;
7026 return build_fold_addr_expr (argarray);
7029 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7031 static void
7032 expand_omp_target (struct omp_region *region)
7034 basic_block entry_bb, exit_bb, new_bb;
7035 struct function *child_cfun;
7036 tree child_fn, block, t;
7037 gimple_stmt_iterator gsi;
7038 gomp_target *entry_stmt;
7039 gimple *stmt;
7040 edge e;
7041 bool offloaded, data_region;
7043 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7044 new_bb = region->entry;
7046 offloaded = is_gimple_omp_offloaded (entry_stmt);
7047 switch (gimple_omp_target_kind (entry_stmt))
7049 case GF_OMP_TARGET_KIND_REGION:
7050 case GF_OMP_TARGET_KIND_UPDATE:
7051 case GF_OMP_TARGET_KIND_ENTER_DATA:
7052 case GF_OMP_TARGET_KIND_EXIT_DATA:
7053 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7054 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7055 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7056 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7057 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7058 data_region = false;
7059 break;
7060 case GF_OMP_TARGET_KIND_DATA:
7061 case GF_OMP_TARGET_KIND_OACC_DATA:
7062 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7063 data_region = true;
7064 break;
7065 default:
7066 gcc_unreachable ();
7069 child_fn = NULL_TREE;
7070 child_cfun = NULL;
7071 if (offloaded)
7073 child_fn = gimple_omp_target_child_fn (entry_stmt);
7074 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7077 /* Supported by expand_omp_taskreg, but not here. */
7078 if (child_cfun != NULL)
7079 gcc_checking_assert (!child_cfun->cfg);
7080 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7082 entry_bb = region->entry;
7083 exit_bb = region->exit;
7085 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7087 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7089 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7090 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7091 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7092 DECL_ATTRIBUTES (child_fn)
7093 = tree_cons (get_identifier ("oacc kernels"),
7094 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7097 if (offloaded)
7099 unsigned srcidx, dstidx, num;
7101 /* If the offloading region needs data sent from the parent
7102 function, then the very first statement (except possible
7103 tree profile counter updates) of the offloading body
7104 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7105 &.OMP_DATA_O is passed as an argument to the child function,
7106 we need to replace it with the argument as seen by the child
7107 function.
7109 In most cases, this will end up being the identity assignment
7110 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7111 a function call that has been inlined, the original PARM_DECL
7112 .OMP_DATA_I may have been converted into a different local
7113 variable. In which case, we need to keep the assignment. */
7114 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7115 if (data_arg)
7117 basic_block entry_succ_bb = single_succ (entry_bb);
7118 gimple_stmt_iterator gsi;
7119 tree arg;
7120 gimple *tgtcopy_stmt = NULL;
7121 tree sender = TREE_VEC_ELT (data_arg, 0);
7123 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7125 gcc_assert (!gsi_end_p (gsi));
7126 stmt = gsi_stmt (gsi);
7127 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7128 continue;
7130 if (gimple_num_ops (stmt) == 2)
7132 tree arg = gimple_assign_rhs1 (stmt);
7134 /* We're ignoring the subcode because we're
7135 effectively doing a STRIP_NOPS. */
7137 if (TREE_CODE (arg) == ADDR_EXPR
7138 && TREE_OPERAND (arg, 0) == sender)
7140 tgtcopy_stmt = stmt;
7141 break;
7146 gcc_assert (tgtcopy_stmt != NULL);
7147 arg = DECL_ARGUMENTS (child_fn);
7149 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7150 gsi_remove (&gsi, true);
7153 /* Declare local variables needed in CHILD_CFUN. */
7154 block = DECL_INITIAL (child_fn);
7155 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7156 /* The gimplifier could record temporaries in the offloading block
7157 rather than in containing function's local_decls chain,
7158 which would mean cgraph missed finalizing them. Do it now. */
7159 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7160 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7161 varpool_node::finalize_decl (t);
7162 DECL_SAVED_TREE (child_fn) = NULL;
7163 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7164 gimple_set_body (child_fn, NULL);
7165 TREE_USED (block) = 1;
7167 /* Reset DECL_CONTEXT on function arguments. */
7168 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7169 DECL_CONTEXT (t) = child_fn;
7171 /* Split ENTRY_BB at GIMPLE_*,
7172 so that it can be moved to the child function. */
7173 gsi = gsi_last_bb (entry_bb);
7174 stmt = gsi_stmt (gsi);
7175 gcc_assert (stmt
7176 && gimple_code (stmt) == gimple_code (entry_stmt));
7177 e = split_block (entry_bb, stmt);
7178 gsi_remove (&gsi, true);
7179 entry_bb = e->dest;
7180 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7182 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7183 if (exit_bb)
7185 gsi = gsi_last_bb (exit_bb);
7186 gcc_assert (!gsi_end_p (gsi)
7187 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7188 stmt = gimple_build_return (NULL);
7189 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7190 gsi_remove (&gsi, true);
7193 /* Make sure to generate early debug for the function before
7194 outlining anything. */
7195 if (! gimple_in_ssa_p (cfun))
7196 (*debug_hooks->early_global_decl) (cfun->decl);
7198 /* Move the offloading region into CHILD_CFUN. */
7200 block = gimple_block (entry_stmt);
7202 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7203 if (exit_bb)
7204 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7205 /* When the OMP expansion process cannot guarantee an up-to-date
7206 loop tree arrange for the child function to fixup loops. */
7207 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7208 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7210 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7211 num = vec_safe_length (child_cfun->local_decls);
7212 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7214 t = (*child_cfun->local_decls)[srcidx];
7215 if (DECL_CONTEXT (t) == cfun->decl)
7216 continue;
7217 if (srcidx != dstidx)
7218 (*child_cfun->local_decls)[dstidx] = t;
7219 dstidx++;
7221 if (dstidx != num)
7222 vec_safe_truncate (child_cfun->local_decls, dstidx);
7224 /* Inform the callgraph about the new function. */
7225 child_cfun->curr_properties = cfun->curr_properties;
7226 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7227 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7228 cgraph_node *node = cgraph_node::get_create (child_fn);
7229 node->parallelized_function = 1;
7230 cgraph_node::add_new_function (child_fn, true);
7232 /* Add the new function to the offload table. */
7233 if (ENABLE_OFFLOADING)
7234 vec_safe_push (offload_funcs, child_fn);
7236 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7237 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7239 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7240 fixed in a following pass. */
7241 push_cfun (child_cfun);
7242 if (need_asm)
7243 assign_assembler_name_if_needed (child_fn);
7244 cgraph_edge::rebuild_edges ();
7246 /* Some EH regions might become dead, see PR34608. If
7247 pass_cleanup_cfg isn't the first pass to happen with the
7248 new child, these dead EH edges might cause problems.
7249 Clean them up now. */
7250 if (flag_exceptions)
7252 basic_block bb;
7253 bool changed = false;
7255 FOR_EACH_BB_FN (bb, cfun)
7256 changed |= gimple_purge_dead_eh_edges (bb);
7257 if (changed)
7258 cleanup_tree_cfg ();
7260 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7261 verify_loop_structure ();
7262 pop_cfun ();
7264 if (dump_file && !gimple_in_ssa_p (cfun))
7266 omp_any_child_fn_dumped = true;
7267 dump_function_header (dump_file, child_fn, dump_flags);
7268 dump_function_to_file (child_fn, dump_file, dump_flags);
7272 /* Emit a library call to launch the offloading region, or do data
7273 transfers. */
7274 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7275 enum built_in_function start_ix;
7276 location_t clause_loc;
7277 unsigned int flags_i = 0;
7279 switch (gimple_omp_target_kind (entry_stmt))
7281 case GF_OMP_TARGET_KIND_REGION:
7282 start_ix = BUILT_IN_GOMP_TARGET;
7283 break;
7284 case GF_OMP_TARGET_KIND_DATA:
7285 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7286 break;
7287 case GF_OMP_TARGET_KIND_UPDATE:
7288 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7289 break;
7290 case GF_OMP_TARGET_KIND_ENTER_DATA:
7291 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7292 break;
7293 case GF_OMP_TARGET_KIND_EXIT_DATA:
7294 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7295 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7296 break;
7297 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7298 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7299 start_ix = BUILT_IN_GOACC_PARALLEL;
7300 break;
7301 case GF_OMP_TARGET_KIND_OACC_DATA:
7302 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7303 start_ix = BUILT_IN_GOACC_DATA_START;
7304 break;
7305 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7306 start_ix = BUILT_IN_GOACC_UPDATE;
7307 break;
7308 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7309 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7310 break;
7311 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7312 start_ix = BUILT_IN_GOACC_DECLARE;
7313 break;
7314 default:
7315 gcc_unreachable ();
7318 clauses = gimple_omp_target_clauses (entry_stmt);
7320 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7321 library choose) and there is no conditional. */
7322 cond = NULL_TREE;
7323 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7325 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7326 if (c)
7327 cond = OMP_CLAUSE_IF_EXPR (c);
7329 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7330 if (c)
7332 /* Even if we pass it to all library function calls, it is currently only
7333 defined/used for the OpenMP target ones. */
7334 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7335 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7336 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7337 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7339 device = OMP_CLAUSE_DEVICE_ID (c);
7340 clause_loc = OMP_CLAUSE_LOCATION (c);
7342 else
7343 clause_loc = gimple_location (entry_stmt);
7345 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7346 if (c)
7347 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7349 /* Ensure 'device' is of the correct type. */
7350 device = fold_convert_loc (clause_loc, integer_type_node, device);
7352 /* If we found the clause 'if (cond)', build
7353 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7354 if (cond)
7356 cond = gimple_boolify (cond);
7358 basic_block cond_bb, then_bb, else_bb;
7359 edge e;
7360 tree tmp_var;
7362 tmp_var = create_tmp_var (TREE_TYPE (device));
7363 if (offloaded)
7364 e = split_block_after_labels (new_bb);
7365 else
7367 gsi = gsi_last_bb (new_bb);
7368 gsi_prev (&gsi);
7369 e = split_block (new_bb, gsi_stmt (gsi));
7371 cond_bb = e->src;
7372 new_bb = e->dest;
7373 remove_edge (e);
7375 then_bb = create_empty_bb (cond_bb);
7376 else_bb = create_empty_bb (then_bb);
7377 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7378 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7380 stmt = gimple_build_cond_empty (cond);
7381 gsi = gsi_last_bb (cond_bb);
7382 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7384 gsi = gsi_start_bb (then_bb);
7385 stmt = gimple_build_assign (tmp_var, device);
7386 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7388 gsi = gsi_start_bb (else_bb);
7389 stmt = gimple_build_assign (tmp_var,
7390 build_int_cst (integer_type_node,
7391 GOMP_DEVICE_HOST_FALLBACK));
7392 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7394 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7395 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7396 add_bb_to_loop (then_bb, cond_bb->loop_father);
7397 add_bb_to_loop (else_bb, cond_bb->loop_father);
7398 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7399 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7401 device = tmp_var;
7402 gsi = gsi_last_bb (new_bb);
7404 else
7406 gsi = gsi_last_bb (new_bb);
7407 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7408 true, GSI_SAME_STMT);
7411 t = gimple_omp_target_data_arg (entry_stmt);
7412 if (t == NULL)
7414 t1 = size_zero_node;
7415 t2 = build_zero_cst (ptr_type_node);
7416 t3 = t2;
7417 t4 = t2;
7419 else
7421 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7422 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7423 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7424 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7425 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7428 gimple *g;
7429 bool tagging = false;
7430 /* The maximum number used by any start_ix, without varargs. */
7431 auto_vec<tree, 11> args;
7432 args.quick_push (device);
7433 if (offloaded)
7434 args.quick_push (build_fold_addr_expr (child_fn));
7435 args.quick_push (t1);
7436 args.quick_push (t2);
7437 args.quick_push (t3);
7438 args.quick_push (t4);
7439 switch (start_ix)
7441 case BUILT_IN_GOACC_DATA_START:
7442 case BUILT_IN_GOACC_DECLARE:
7443 case BUILT_IN_GOMP_TARGET_DATA:
7444 break;
7445 case BUILT_IN_GOMP_TARGET:
7446 case BUILT_IN_GOMP_TARGET_UPDATE:
7447 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7448 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7449 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7450 if (c)
7451 depend = OMP_CLAUSE_DECL (c);
7452 else
7453 depend = build_int_cst (ptr_type_node, 0);
7454 args.quick_push (depend);
7455 if (start_ix == BUILT_IN_GOMP_TARGET)
7456 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7457 break;
7458 case BUILT_IN_GOACC_PARALLEL:
7459 oacc_set_fn_attrib (child_fn, clauses, &args);
7460 tagging = true;
7461 /* FALLTHRU */
7462 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7463 case BUILT_IN_GOACC_UPDATE:
7465 tree t_async = NULL_TREE;
7467 /* If present, use the value specified by the respective
7468 clause, making sure that is of the correct type. */
7469 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7470 if (c)
7471 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7472 integer_type_node,
7473 OMP_CLAUSE_ASYNC_EXPR (c));
7474 else if (!tagging)
7475 /* Default values for t_async. */
7476 t_async = fold_convert_loc (gimple_location (entry_stmt),
7477 integer_type_node,
7478 build_int_cst (integer_type_node,
7479 GOMP_ASYNC_SYNC));
7480 if (tagging && t_async)
7482 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7484 if (TREE_CODE (t_async) == INTEGER_CST)
7486 /* See if we can pack the async arg in to the tag's
7487 operand. */
7488 i_async = TREE_INT_CST_LOW (t_async);
7489 if (i_async < GOMP_LAUNCH_OP_MAX)
7490 t_async = NULL_TREE;
7491 else
7492 i_async = GOMP_LAUNCH_OP_MAX;
7494 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7495 i_async));
7497 if (t_async)
7498 args.safe_push (t_async);
7500 /* Save the argument index, and ... */
7501 unsigned t_wait_idx = args.length ();
7502 unsigned num_waits = 0;
7503 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7504 if (!tagging || c)
7505 /* ... push a placeholder. */
7506 args.safe_push (integer_zero_node);
7508 for (; c; c = OMP_CLAUSE_CHAIN (c))
7509 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7511 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7512 integer_type_node,
7513 OMP_CLAUSE_WAIT_EXPR (c)));
7514 num_waits++;
7517 if (!tagging || num_waits)
7519 tree len;
7521 /* Now that we know the number, update the placeholder. */
7522 if (tagging)
7523 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7524 else
7525 len = build_int_cst (integer_type_node, num_waits);
7526 len = fold_convert_loc (gimple_location (entry_stmt),
7527 unsigned_type_node, len);
7528 args[t_wait_idx] = len;
7531 break;
7532 default:
7533 gcc_unreachable ();
7535 if (tagging)
7536 /* Push terminal marker - zero. */
7537 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7539 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7540 gimple_set_location (g, gimple_location (entry_stmt));
7541 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7542 if (!offloaded)
7544 g = gsi_stmt (gsi);
7545 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7546 gsi_remove (&gsi, true);
7548 if (data_region && region->exit)
7550 gsi = gsi_last_bb (region->exit);
7551 g = gsi_stmt (gsi);
7552 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7553 gsi_remove (&gsi, true);
7557 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7558 iteration variable derived from the thread number. INTRA_GROUP means this
7559 is an expansion of a loop iterating over work-items within a separate
7560 iteration over groups. */
7562 static void
7563 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7565 gimple_stmt_iterator gsi;
7566 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7567 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7568 == GF_OMP_FOR_KIND_GRID_LOOP);
7569 size_t collapse = gimple_omp_for_collapse (for_stmt);
7570 struct omp_for_data_loop *loops
7571 = XALLOCAVEC (struct omp_for_data_loop,
7572 gimple_omp_for_collapse (for_stmt));
7573 struct omp_for_data fd;
7575 remove_edge (BRANCH_EDGE (kfor->entry));
7576 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7578 gcc_assert (kfor->cont);
7579 omp_extract_for_data (for_stmt, &fd, loops);
7581 gsi = gsi_start_bb (body_bb);
7583 for (size_t dim = 0; dim < collapse; dim++)
7585 tree type, itype;
7586 itype = type = TREE_TYPE (fd.loops[dim].v);
7587 if (POINTER_TYPE_P (type))
7588 itype = signed_type_for (type);
7590 tree n1 = fd.loops[dim].n1;
7591 tree step = fd.loops[dim].step;
7592 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7593 true, NULL_TREE, true, GSI_SAME_STMT);
7594 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7595 true, NULL_TREE, true, GSI_SAME_STMT);
7596 tree threadid;
7597 if (gimple_omp_for_grid_group_iter (for_stmt))
7599 gcc_checking_assert (!intra_group);
7600 threadid = build_call_expr (builtin_decl_explicit
7601 (BUILT_IN_HSA_WORKGROUPID), 1,
7602 build_int_cstu (unsigned_type_node, dim));
7604 else if (intra_group)
7605 threadid = build_call_expr (builtin_decl_explicit
7606 (BUILT_IN_HSA_WORKITEMID), 1,
7607 build_int_cstu (unsigned_type_node, dim));
7608 else
7609 threadid = build_call_expr (builtin_decl_explicit
7610 (BUILT_IN_HSA_WORKITEMABSID), 1,
7611 build_int_cstu (unsigned_type_node, dim));
7612 threadid = fold_convert (itype, threadid);
7613 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7614 true, GSI_SAME_STMT);
7616 tree startvar = fd.loops[dim].v;
7617 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7618 if (POINTER_TYPE_P (type))
7619 t = fold_build_pointer_plus (n1, t);
7620 else
7621 t = fold_build2 (PLUS_EXPR, type, t, n1);
7622 t = fold_convert (type, t);
7623 t = force_gimple_operand_gsi (&gsi, t,
7624 DECL_P (startvar)
7625 && TREE_ADDRESSABLE (startvar),
7626 NULL_TREE, true, GSI_SAME_STMT);
7627 gassign *assign_stmt = gimple_build_assign (startvar, t);
7628 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7630 /* Remove the omp for statement. */
7631 gsi = gsi_last_bb (kfor->entry);
7632 gsi_remove (&gsi, true);
7634 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7635 gsi = gsi_last_bb (kfor->cont);
7636 gcc_assert (!gsi_end_p (gsi)
7637 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7638 gsi_remove (&gsi, true);
7640 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7641 gsi = gsi_last_bb (kfor->exit);
7642 gcc_assert (!gsi_end_p (gsi)
7643 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7644 if (intra_group)
7645 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7646 gsi_remove (&gsi, true);
7648 /* Fixup the much simpler CFG. */
7649 remove_edge (find_edge (kfor->cont, body_bb));
7651 if (kfor->cont != body_bb)
7652 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7653 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7656 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7657 argument_decls. */
7659 struct grid_arg_decl_map
7661 tree old_arg;
7662 tree new_arg;
7665 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7666 pertaining to kernel function. */
7668 static tree
7669 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7671 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7672 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7673 tree t = *tp;
7675 if (t == adm->old_arg)
7676 *tp = adm->new_arg;
7677 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7678 return NULL_TREE;
7681 /* If TARGET region contains a kernel body for loop, remove its region from the
7682 TARGET and expand it in HSA gridified kernel fashion. */
7684 static void
7685 grid_expand_target_grid_body (struct omp_region *target)
7687 if (!hsa_gen_requested_p ())
7688 return;
7690 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7691 struct omp_region **pp;
7693 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7694 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7695 break;
7697 struct omp_region *gpukernel = *pp;
7699 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7700 if (!gpukernel)
7702 /* HSA cannot handle OACC stuff. */
7703 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7704 return;
7705 gcc_checking_assert (orig_child_fndecl);
7706 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7707 OMP_CLAUSE__GRIDDIM_));
7708 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7710 hsa_register_kernel (n);
7711 return;
7714 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7715 OMP_CLAUSE__GRIDDIM_));
7716 tree inside_block
7717 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7718 *pp = gpukernel->next;
7719 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7720 if ((*pp)->type == GIMPLE_OMP_FOR)
7721 break;
7723 struct omp_region *kfor = *pp;
7724 gcc_assert (kfor);
7725 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7726 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7727 *pp = kfor->next;
7728 if (kfor->inner)
7730 if (gimple_omp_for_grid_group_iter (for_stmt))
7732 struct omp_region **next_pp;
7733 for (pp = &kfor->inner; *pp; pp = next_pp)
7735 next_pp = &(*pp)->next;
7736 if ((*pp)->type != GIMPLE_OMP_FOR)
7737 continue;
7738 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7739 gcc_assert (gimple_omp_for_kind (inner)
7740 == GF_OMP_FOR_KIND_GRID_LOOP);
7741 grid_expand_omp_for_loop (*pp, true);
7742 *pp = (*pp)->next;
7743 next_pp = pp;
7746 expand_omp (kfor->inner);
7748 if (gpukernel->inner)
7749 expand_omp (gpukernel->inner);
7751 tree kern_fndecl = copy_node (orig_child_fndecl);
7752 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7753 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7754 tree tgtblock = gimple_block (tgt_stmt);
7755 tree fniniblock = make_node (BLOCK);
7756 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7757 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7758 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7759 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7760 DECL_INITIAL (kern_fndecl) = fniniblock;
7761 push_struct_function (kern_fndecl);
7762 cfun->function_end_locus = gimple_location (tgt_stmt);
7763 init_tree_ssa (cfun);
7764 pop_cfun ();
7766 /* Make sure to generate early debug for the function before
7767 outlining anything. */
7768 if (! gimple_in_ssa_p (cfun))
7769 (*debug_hooks->early_global_decl) (cfun->decl);
7771 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7772 gcc_assert (!DECL_CHAIN (old_parm_decl));
7773 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7774 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7775 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7776 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7777 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7778 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7779 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7780 kern_cfun->curr_properties = cfun->curr_properties;
7782 grid_expand_omp_for_loop (kfor, false);
7784 /* Remove the omp for statement. */
7785 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7786 gsi_remove (&gsi, true);
7787 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7788 return. */
7789 gsi = gsi_last_bb (gpukernel->exit);
7790 gcc_assert (!gsi_end_p (gsi)
7791 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7792 gimple *ret_stmt = gimple_build_return (NULL);
7793 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7794 gsi_remove (&gsi, true);
7796 /* Statements in the first BB in the target construct have been produced by
7797 target lowering and must be copied inside the GPUKERNEL, with the two
7798 exceptions of the first OMP statement and the OMP_DATA assignment
7799 statement. */
7800 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7801 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7802 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7803 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7804 !gsi_end_p (tsi); gsi_next (&tsi))
7806 gimple *stmt = gsi_stmt (tsi);
7807 if (is_gimple_omp (stmt))
7808 break;
7809 if (sender
7810 && is_gimple_assign (stmt)
7811 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7812 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7813 continue;
7814 gimple *copy = gimple_copy (stmt);
7815 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7816 gimple_set_block (copy, fniniblock);
7819 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7820 gpukernel->exit, inside_block);
7822 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7823 kcn->mark_force_output ();
7824 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7826 hsa_register_kernel (kcn, orig_child);
7828 cgraph_node::add_new_function (kern_fndecl, true);
7829 push_cfun (kern_cfun);
7830 cgraph_edge::rebuild_edges ();
7832 /* Re-map any mention of the PARM_DECL of the original function to the
7833 PARM_DECL of the new one.
7835 TODO: It would be great if lowering produced references into the GPU
7836 kernel decl straight away and we did not have to do this. */
7837 struct grid_arg_decl_map adm;
7838 adm.old_arg = old_parm_decl;
7839 adm.new_arg = new_parm_decl;
7840 basic_block bb;
7841 FOR_EACH_BB_FN (bb, kern_cfun)
7843 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7845 gimple *stmt = gsi_stmt (gsi);
7846 struct walk_stmt_info wi;
7847 memset (&wi, 0, sizeof (wi));
7848 wi.info = &adm;
7849 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7852 pop_cfun ();
7854 return;
7857 /* Expand the parallel region tree rooted at REGION. Expansion
7858 proceeds in depth-first order. Innermost regions are expanded
7859 first. This way, parallel regions that require a new function to
7860 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7861 internal dependencies in their body. */
7863 static void
7864 expand_omp (struct omp_region *region)
7866 omp_any_child_fn_dumped = false;
7867 while (region)
7869 location_t saved_location;
7870 gimple *inner_stmt = NULL;
7872 /* First, determine whether this is a combined parallel+workshare
7873 region. */
7874 if (region->type == GIMPLE_OMP_PARALLEL)
7875 determine_parallel_type (region);
7876 else if (region->type == GIMPLE_OMP_TARGET)
7877 grid_expand_target_grid_body (region);
7879 if (region->type == GIMPLE_OMP_FOR
7880 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7881 inner_stmt = last_stmt (region->inner->entry);
7883 if (region->inner)
7884 expand_omp (region->inner);
7886 saved_location = input_location;
7887 if (gimple_has_location (last_stmt (region->entry)))
7888 input_location = gimple_location (last_stmt (region->entry));
7890 switch (region->type)
7892 case GIMPLE_OMP_PARALLEL:
7893 case GIMPLE_OMP_TASK:
7894 expand_omp_taskreg (region);
7895 break;
7897 case GIMPLE_OMP_FOR:
7898 expand_omp_for (region, inner_stmt);
7899 break;
7901 case GIMPLE_OMP_SECTIONS:
7902 expand_omp_sections (region);
7903 break;
7905 case GIMPLE_OMP_SECTION:
7906 /* Individual omp sections are handled together with their
7907 parent GIMPLE_OMP_SECTIONS region. */
7908 break;
7910 case GIMPLE_OMP_SINGLE:
7911 expand_omp_single (region);
7912 break;
7914 case GIMPLE_OMP_ORDERED:
7916 gomp_ordered *ord_stmt
7917 = as_a <gomp_ordered *> (last_stmt (region->entry));
7918 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7919 OMP_CLAUSE_DEPEND))
7921 /* We'll expand these when expanding corresponding
7922 worksharing region with ordered(n) clause. */
7923 gcc_assert (region->outer
7924 && region->outer->type == GIMPLE_OMP_FOR);
7925 region->ord_stmt = ord_stmt;
7926 break;
7929 /* FALLTHRU */
7930 case GIMPLE_OMP_MASTER:
7931 case GIMPLE_OMP_TASKGROUP:
7932 case GIMPLE_OMP_CRITICAL:
7933 case GIMPLE_OMP_TEAMS:
7934 expand_omp_synch (region);
7935 break;
7937 case GIMPLE_OMP_ATOMIC_LOAD:
7938 expand_omp_atomic (region);
7939 break;
7941 case GIMPLE_OMP_TARGET:
7942 expand_omp_target (region);
7943 break;
7945 default:
7946 gcc_unreachable ();
7949 input_location = saved_location;
7950 region = region->next;
7952 if (omp_any_child_fn_dumped)
7954 if (dump_file)
7955 dump_function_header (dump_file, current_function_decl, dump_flags);
7956 omp_any_child_fn_dumped = false;
7960 /* Helper for build_omp_regions. Scan the dominator tree starting at
7961 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7962 true, the function ends once a single tree is built (otherwise, whole
7963 forest of OMP constructs may be built). */
7965 static void
7966 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7967 bool single_tree)
7969 gimple_stmt_iterator gsi;
7970 gimple *stmt;
7971 basic_block son;
7973 gsi = gsi_last_bb (bb);
7974 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7976 struct omp_region *region;
7977 enum gimple_code code;
7979 stmt = gsi_stmt (gsi);
7980 code = gimple_code (stmt);
7981 if (code == GIMPLE_OMP_RETURN)
7983 /* STMT is the return point out of region PARENT. Mark it
7984 as the exit point and make PARENT the immediately
7985 enclosing region. */
7986 gcc_assert (parent);
7987 region = parent;
7988 region->exit = bb;
7989 parent = parent->outer;
7991 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7993 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7994 GIMPLE_OMP_RETURN, but matches with
7995 GIMPLE_OMP_ATOMIC_LOAD. */
7996 gcc_assert (parent);
7997 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7998 region = parent;
7999 region->exit = bb;
8000 parent = parent->outer;
8002 else if (code == GIMPLE_OMP_CONTINUE)
8004 gcc_assert (parent);
8005 parent->cont = bb;
8007 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8009 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8010 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8012 else
8014 region = new_omp_region (bb, code, parent);
8015 /* Otherwise... */
8016 if (code == GIMPLE_OMP_TARGET)
8018 switch (gimple_omp_target_kind (stmt))
8020 case GF_OMP_TARGET_KIND_REGION:
8021 case GF_OMP_TARGET_KIND_DATA:
8022 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8023 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8024 case GF_OMP_TARGET_KIND_OACC_DATA:
8025 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8026 break;
8027 case GF_OMP_TARGET_KIND_UPDATE:
8028 case GF_OMP_TARGET_KIND_ENTER_DATA:
8029 case GF_OMP_TARGET_KIND_EXIT_DATA:
8030 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8031 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8032 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8033 /* ..., other than for those stand-alone directives... */
8034 region = NULL;
8035 break;
8036 default:
8037 gcc_unreachable ();
8040 else if (code == GIMPLE_OMP_ORDERED
8041 && omp_find_clause (gimple_omp_ordered_clauses
8042 (as_a <gomp_ordered *> (stmt)),
8043 OMP_CLAUSE_DEPEND))
8044 /* #pragma omp ordered depend is also just a stand-alone
8045 directive. */
8046 region = NULL;
8047 /* ..., this directive becomes the parent for a new region. */
8048 if (region)
8049 parent = region;
8053 if (single_tree && !parent)
8054 return;
8056 for (son = first_dom_son (CDI_DOMINATORS, bb);
8057 son;
8058 son = next_dom_son (CDI_DOMINATORS, son))
8059 build_omp_regions_1 (son, parent, single_tree);
8062 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8063 root_omp_region. */
8065 static void
8066 build_omp_regions_root (basic_block root)
8068 gcc_assert (root_omp_region == NULL);
8069 build_omp_regions_1 (root, NULL, true);
8070 gcc_assert (root_omp_region != NULL);
8073 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8075 void
8076 omp_expand_local (basic_block head)
8078 build_omp_regions_root (head);
8079 if (dump_file && (dump_flags & TDF_DETAILS))
8081 fprintf (dump_file, "\nOMP region tree\n\n");
8082 dump_omp_region (dump_file, root_omp_region, 0);
8083 fprintf (dump_file, "\n");
8086 remove_exit_barriers (root_omp_region);
8087 expand_omp (root_omp_region);
8089 omp_free_regions ();
8092 /* Scan the CFG and build a tree of OMP regions. Return the root of
8093 the OMP region tree. */
8095 static void
8096 build_omp_regions (void)
8098 gcc_assert (root_omp_region == NULL);
8099 calculate_dominance_info (CDI_DOMINATORS);
8100 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8103 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8105 static unsigned int
8106 execute_expand_omp (void)
8108 build_omp_regions ();
8110 if (!root_omp_region)
8111 return 0;
8113 if (dump_file)
8115 fprintf (dump_file, "\nOMP region tree\n\n");
8116 dump_omp_region (dump_file, root_omp_region, 0);
8117 fprintf (dump_file, "\n");
8120 remove_exit_barriers (root_omp_region);
8122 expand_omp (root_omp_region);
8124 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8125 verify_loop_structure ();
8126 cleanup_tree_cfg ();
8128 omp_free_regions ();
8130 return 0;
8133 /* OMP expansion -- the default pass, run before creation of SSA form. */
8135 namespace {
8137 const pass_data pass_data_expand_omp =
8139 GIMPLE_PASS, /* type */
8140 "ompexp", /* name */
8141 OPTGROUP_OMP, /* optinfo_flags */
8142 TV_NONE, /* tv_id */
8143 PROP_gimple_any, /* properties_required */
8144 PROP_gimple_eomp, /* properties_provided */
8145 0, /* properties_destroyed */
8146 0, /* todo_flags_start */
8147 0, /* todo_flags_finish */
8150 class pass_expand_omp : public gimple_opt_pass
8152 public:
8153 pass_expand_omp (gcc::context *ctxt)
8154 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8157 /* opt_pass methods: */
8158 virtual unsigned int execute (function *)
8160 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8161 || flag_openmp_simd != 0)
8162 && !seen_error ());
8164 /* This pass always runs, to provide PROP_gimple_eomp.
8165 But often, there is nothing to do. */
8166 if (!gate)
8167 return 0;
8169 return execute_expand_omp ();
8172 }; // class pass_expand_omp
8174 } // anon namespace
8176 gimple_opt_pass *
8177 make_pass_expand_omp (gcc::context *ctxt)
8179 return new pass_expand_omp (ctxt);
8182 namespace {
8184 const pass_data pass_data_expand_omp_ssa =
8186 GIMPLE_PASS, /* type */
8187 "ompexpssa", /* name */
8188 OPTGROUP_OMP, /* optinfo_flags */
8189 TV_NONE, /* tv_id */
8190 PROP_cfg | PROP_ssa, /* properties_required */
8191 PROP_gimple_eomp, /* properties_provided */
8192 0, /* properties_destroyed */
8193 0, /* todo_flags_start */
8194 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8197 class pass_expand_omp_ssa : public gimple_opt_pass
8199 public:
8200 pass_expand_omp_ssa (gcc::context *ctxt)
8201 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8204 /* opt_pass methods: */
8205 virtual bool gate (function *fun)
8207 return !(fun->curr_properties & PROP_gimple_eomp);
8209 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8210 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8212 }; // class pass_expand_omp_ssa
8214 } // anon namespace
8216 gimple_opt_pass *
8217 make_pass_expand_omp_ssa (gcc::context *ctxt)
8219 return new pass_expand_omp_ssa (ctxt);
8222 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8223 GIMPLE_* codes. */
8225 bool
8226 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8227 int *region_idx)
8229 gimple *last = last_stmt (bb);
8230 enum gimple_code code = gimple_code (last);
8231 struct omp_region *cur_region = *region;
8232 bool fallthru = false;
8234 switch (code)
8236 case GIMPLE_OMP_PARALLEL:
8237 case GIMPLE_OMP_TASK:
8238 case GIMPLE_OMP_FOR:
8239 case GIMPLE_OMP_SINGLE:
8240 case GIMPLE_OMP_TEAMS:
8241 case GIMPLE_OMP_MASTER:
8242 case GIMPLE_OMP_TASKGROUP:
8243 case GIMPLE_OMP_CRITICAL:
8244 case GIMPLE_OMP_SECTION:
8245 case GIMPLE_OMP_GRID_BODY:
8246 cur_region = new_omp_region (bb, code, cur_region);
8247 fallthru = true;
8248 break;
8250 case GIMPLE_OMP_ORDERED:
8251 cur_region = new_omp_region (bb, code, cur_region);
8252 fallthru = true;
8253 if (omp_find_clause (gimple_omp_ordered_clauses
8254 (as_a <gomp_ordered *> (last)),
8255 OMP_CLAUSE_DEPEND))
8256 cur_region = cur_region->outer;
8257 break;
8259 case GIMPLE_OMP_TARGET:
8260 cur_region = new_omp_region (bb, code, cur_region);
8261 fallthru = true;
8262 switch (gimple_omp_target_kind (last))
8264 case GF_OMP_TARGET_KIND_REGION:
8265 case GF_OMP_TARGET_KIND_DATA:
8266 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8267 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8268 case GF_OMP_TARGET_KIND_OACC_DATA:
8269 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8270 break;
8271 case GF_OMP_TARGET_KIND_UPDATE:
8272 case GF_OMP_TARGET_KIND_ENTER_DATA:
8273 case GF_OMP_TARGET_KIND_EXIT_DATA:
8274 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8275 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8276 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8277 cur_region = cur_region->outer;
8278 break;
8279 default:
8280 gcc_unreachable ();
8282 break;
8284 case GIMPLE_OMP_SECTIONS:
8285 cur_region = new_omp_region (bb, code, cur_region);
8286 fallthru = true;
8287 break;
8289 case GIMPLE_OMP_SECTIONS_SWITCH:
8290 fallthru = false;
8291 break;
8293 case GIMPLE_OMP_ATOMIC_LOAD:
8294 case GIMPLE_OMP_ATOMIC_STORE:
8295 fallthru = true;
8296 break;
8298 case GIMPLE_OMP_RETURN:
8299 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8300 somewhere other than the next block. This will be
8301 created later. */
8302 cur_region->exit = bb;
8303 if (cur_region->type == GIMPLE_OMP_TASK)
8304 /* Add an edge corresponding to not scheduling the task
8305 immediately. */
8306 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8307 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8308 cur_region = cur_region->outer;
8309 break;
8311 case GIMPLE_OMP_CONTINUE:
8312 cur_region->cont = bb;
8313 switch (cur_region->type)
8315 case GIMPLE_OMP_FOR:
8316 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8317 succs edges as abnormal to prevent splitting
8318 them. */
8319 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8320 /* Make the loopback edge. */
8321 make_edge (bb, single_succ (cur_region->entry),
8322 EDGE_ABNORMAL);
8324 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8325 corresponds to the case that the body of the loop
8326 is not executed at all. */
8327 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8328 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8329 fallthru = false;
8330 break;
8332 case GIMPLE_OMP_SECTIONS:
8333 /* Wire up the edges into and out of the nested sections. */
8335 basic_block switch_bb = single_succ (cur_region->entry);
8337 struct omp_region *i;
8338 for (i = cur_region->inner; i ; i = i->next)
8340 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8341 make_edge (switch_bb, i->entry, 0);
8342 make_edge (i->exit, bb, EDGE_FALLTHRU);
8345 /* Make the loopback edge to the block with
8346 GIMPLE_OMP_SECTIONS_SWITCH. */
8347 make_edge (bb, switch_bb, 0);
8349 /* Make the edge from the switch to exit. */
8350 make_edge (switch_bb, bb->next_bb, 0);
8351 fallthru = false;
8353 break;
8355 case GIMPLE_OMP_TASK:
8356 fallthru = true;
8357 break;
8359 default:
8360 gcc_unreachable ();
8362 break;
8364 default:
8365 gcc_unreachable ();
8368 if (*region != cur_region)
8370 *region = cur_region;
8371 if (cur_region)
8372 *region_idx = cur_region->entry->index;
8373 else
8374 *region_idx = 0;
8377 return fallthru;
8380 #include "gt-omp-expand.h"