compiler: don't generate stubs for ambiguous direct interface methods
[official-gcc.git] / gcc / omp-expand.cc
blobe7a8af4ff9d3d3d91beba16cd3bd38f10bf75beb
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2022 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
336 if (region->inner->type == GIMPLE_OMP_FOR)
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
376 /* Dump the parallel region tree rooted at REGION. */
378 void
379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
387 if (region->cont)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
403 DEBUG_FUNCTION void
404 debug_omp_region (struct omp_region *region)
406 dump_omp_region (stderr, region, 0);
409 DEBUG_FUNCTION void
410 debug_all_omp_regions (void)
412 dump_omp_region (stderr, root_omp_region, 0);
415 /* Create a new parallel region starting at STMT inside region PARENT. */
417 static struct omp_region *
418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
421 struct omp_region *region = XCNEW (struct omp_region);
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
427 if (parent)
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
434 else
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
442 return region;
445 /* Release the memory associated with the region tree rooted at REGION. */
447 static void
448 free_omp_region_1 (struct omp_region *region)
450 struct omp_region *i, *n;
452 for (i = region->inner; i ; i = n)
454 n = i->next;
455 free_omp_region_1 (i);
458 free (region);
461 /* Release the memory for the entire omp region tree. */
463 void
464 omp_free_regions (void)
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
469 n = r->next;
470 free_omp_region_1 (r);
472 root_omp_region = NULL;
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
478 static gcond *
479 gimple_build_cond_empty (tree cond)
481 enum tree_code pred_code;
482 tree lhs, rhs;
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
496 static void
497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
548 static void
549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
571 switch (region->inner->type)
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
632 else
633 clause_loc = gimple_location (entry_stmt);
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
646 cond = gimple_boolify (cond);
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
665 else
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701 if (gimple_in_ssa_p (cfun))
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
708 val = tmp_join;
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
736 if (rtmp)
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
750 static void
751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 else
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 else
811 num_tasks = integer_zero_node;
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 else
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
830 tree flags = build_int_cst (unsigned_type_node, iflags);
832 tree cond = boolean_true_node;
833 if (ifc)
835 if (taskloop_p)
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
849 if (finalc)
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
868 gsi = gsi_last_nondebug_bb (bb);
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
909 static void
910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
917 depend = OMP_CLAUSE_DECL (depend);
919 bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE;
920 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
921 enum built_in_function f = (nowait
922 ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
923 : BUILT_IN_GOMP_TASKWAIT_DEPEND);
924 tree t = build_call_expr (builtin_decl_explicit (f), 1, depend);
926 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
927 false, GSI_CONTINUE_LINKING);
930 /* Build the function call to GOMP_teams_reg to actually
931 generate the host teams operation. REGION is the teams region
932 being expanded. BB is the block where to insert the code. */
934 static void
935 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
937 tree clauses = gimple_omp_teams_clauses (entry_stmt);
938 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
939 if (num_teams == NULL_TREE)
940 num_teams = build_int_cst (unsigned_type_node, 0);
941 else
943 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
944 num_teams = fold_convert (unsigned_type_node, num_teams);
946 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
947 if (thread_limit == NULL_TREE)
948 thread_limit = build_int_cst (unsigned_type_node, 0);
949 else
951 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
952 thread_limit = fold_convert (unsigned_type_node, thread_limit);
955 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
956 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
957 if (t == NULL)
958 t1 = null_pointer_node;
959 else
960 t1 = build_fold_addr_expr (t);
961 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
962 tree t2 = build_fold_addr_expr (child_fndecl);
964 vec<tree, va_gc> *args;
965 vec_alloc (args, 5);
966 args->quick_push (t2);
967 args->quick_push (t1);
968 args->quick_push (num_teams);
969 args->quick_push (thread_limit);
970 /* For future extensibility. */
971 args->quick_push (build_zero_cst (unsigned_type_node));
973 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
974 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
975 args);
977 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
978 false, GSI_CONTINUE_LINKING);
981 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
983 static tree
984 vec2chain (vec<tree, va_gc> *v)
986 tree chain = NULL_TREE, t;
987 unsigned ix;
989 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
991 DECL_CHAIN (t) = chain;
992 chain = t;
995 return chain;
998 /* Remove barriers in REGION->EXIT's block. Note that this is only
999 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1000 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1001 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1002 removed. */
1004 static void
1005 remove_exit_barrier (struct omp_region *region)
1007 gimple_stmt_iterator gsi;
1008 basic_block exit_bb;
1009 edge_iterator ei;
1010 edge e;
1011 gimple *stmt;
1012 int any_addressable_vars = -1;
1014 exit_bb = region->exit;
1016 /* If the parallel region doesn't return, we don't have REGION->EXIT
1017 block at all. */
1018 if (! exit_bb)
1019 return;
1021 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1022 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1023 statements that can appear in between are extremely limited -- no
1024 memory operations at all. Here, we allow nothing at all, so the
1025 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1026 gsi = gsi_last_nondebug_bb (exit_bb);
1027 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1028 gsi_prev_nondebug (&gsi);
1029 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1030 return;
1032 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1034 gsi = gsi_last_nondebug_bb (e->src);
1035 if (gsi_end_p (gsi))
1036 continue;
1037 stmt = gsi_stmt (gsi);
1038 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1039 && !gimple_omp_return_nowait_p (stmt))
1041 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1042 in many cases. If there could be tasks queued, the barrier
1043 might be needed to let the tasks run before some local
1044 variable of the parallel that the task uses as shared
1045 runs out of scope. The task can be spawned either
1046 from within current function (this would be easy to check)
1047 or from some function it calls and gets passed an address
1048 of such a variable. */
1049 if (any_addressable_vars < 0)
1051 gomp_parallel *parallel_stmt
1052 = as_a <gomp_parallel *> (last_stmt (region->entry));
1053 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1054 tree local_decls, block, decl;
1055 unsigned ix;
1057 any_addressable_vars = 0;
1058 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1059 if (TREE_ADDRESSABLE (decl))
1061 any_addressable_vars = 1;
1062 break;
1064 for (block = gimple_block (stmt);
1065 !any_addressable_vars
1066 && block
1067 && TREE_CODE (block) == BLOCK;
1068 block = BLOCK_SUPERCONTEXT (block))
1070 for (local_decls = BLOCK_VARS (block);
1071 local_decls;
1072 local_decls = DECL_CHAIN (local_decls))
1073 if (TREE_ADDRESSABLE (local_decls))
1075 any_addressable_vars = 1;
1076 break;
1078 if (block == gimple_block (parallel_stmt))
1079 break;
1082 if (!any_addressable_vars)
1083 gimple_omp_return_set_nowait (stmt);
1088 static void
1089 remove_exit_barriers (struct omp_region *region)
1091 if (region->type == GIMPLE_OMP_PARALLEL)
1092 remove_exit_barrier (region);
1094 if (region->inner)
1096 region = region->inner;
1097 remove_exit_barriers (region);
1098 while (region->next)
1100 region = region->next;
1101 remove_exit_barriers (region);
1106 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1107 calls. These can't be declared as const functions, but
1108 within one parallel body they are constant, so they can be
1109 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1110 which are declared const. Similarly for task body, except
1111 that in untied task omp_get_thread_num () can change at any task
1112 scheduling point. */
1114 static void
1115 optimize_omp_library_calls (gimple *entry_stmt)
1117 basic_block bb;
1118 gimple_stmt_iterator gsi;
1119 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1120 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1121 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1122 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1123 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1124 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1125 OMP_CLAUSE_UNTIED) != NULL);
1127 FOR_EACH_BB_FN (bb, cfun)
1128 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1130 gimple *call = gsi_stmt (gsi);
1131 tree decl;
1133 if (is_gimple_call (call)
1134 && (decl = gimple_call_fndecl (call))
1135 && DECL_EXTERNAL (decl)
1136 && TREE_PUBLIC (decl)
1137 && DECL_INITIAL (decl) == NULL)
1139 tree built_in;
1141 if (DECL_NAME (decl) == thr_num_id)
1143 /* In #pragma omp task untied omp_get_thread_num () can change
1144 during the execution of the task region. */
1145 if (untied_task)
1146 continue;
1147 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1149 else if (DECL_NAME (decl) == num_thr_id)
1150 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1151 else
1152 continue;
1154 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1155 || gimple_call_num_args (call) != 0)
1156 continue;
1158 if (flag_exceptions && !TREE_NOTHROW (decl))
1159 continue;
1161 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1162 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1163 TREE_TYPE (TREE_TYPE (built_in))))
1164 continue;
1166 gimple_call_set_fndecl (call, built_in);
1171 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1172 regimplified. */
1174 static tree
1175 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1177 tree t = *tp;
1179 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1180 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1181 return t;
1183 if (TREE_CODE (t) == ADDR_EXPR)
1184 recompute_tree_invariant_for_addr_expr (t);
1186 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1187 return NULL_TREE;
1190 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1192 static void
1193 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1194 bool after)
1196 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1197 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1198 !after, after ? GSI_CONTINUE_LINKING
1199 : GSI_SAME_STMT);
1200 gimple *stmt = gimple_build_assign (to, from);
1201 if (after)
1202 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1203 else
1204 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1205 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1206 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1208 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1209 gimple_regimplify_operands (stmt, &gsi);
1213 /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1215 static gcond *
1216 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1217 tree lhs, tree rhs, bool after = false)
1219 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1220 if (after)
1221 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1222 else
1223 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1224 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 NULL, NULL)
1226 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1227 NULL, NULL))
1229 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1230 gimple_regimplify_operands (cond_stmt, &gsi);
1232 return cond_stmt;
1235 /* Expand the OpenMP parallel or task directive starting at REGION. */
1237 static void
1238 expand_omp_taskreg (struct omp_region *region)
1240 basic_block entry_bb, exit_bb, new_bb;
1241 struct function *child_cfun;
1242 tree child_fn, block, t;
1243 gimple_stmt_iterator gsi;
1244 gimple *entry_stmt, *stmt;
1245 edge e;
1246 vec<tree, va_gc> *ws_args;
1248 entry_stmt = last_stmt (region->entry);
1249 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1250 && gimple_omp_task_taskwait_p (entry_stmt))
1252 new_bb = region->entry;
1253 gsi = gsi_last_nondebug_bb (region->entry);
1254 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1255 gsi_remove (&gsi, true);
1256 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1257 return;
1260 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1261 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1263 entry_bb = region->entry;
1264 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1265 exit_bb = region->cont;
1266 else
1267 exit_bb = region->exit;
1269 if (is_combined_parallel (region))
1270 ws_args = region->ws_args;
1271 else
1272 ws_args = NULL;
1274 if (child_cfun->cfg)
1276 /* Due to inlining, it may happen that we have already outlined
1277 the region, in which case all we need to do is make the
1278 sub-graph unreachable and emit the parallel call. */
1279 edge entry_succ_e, exit_succ_e;
1281 entry_succ_e = single_succ_edge (entry_bb);
1283 gsi = gsi_last_nondebug_bb (entry_bb);
1284 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1285 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1287 gsi_remove (&gsi, true);
1289 new_bb = entry_bb;
1290 if (exit_bb)
1292 exit_succ_e = single_succ_edge (exit_bb);
1293 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1295 remove_edge_and_dominated_blocks (entry_succ_e);
1297 else
1299 unsigned srcidx, dstidx, num;
1301 /* If the parallel region needs data sent from the parent
1302 function, then the very first statement (except possible
1303 tree profile counter updates) of the parallel body
1304 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1305 &.OMP_DATA_O is passed as an argument to the child function,
1306 we need to replace it with the argument as seen by the child
1307 function.
1309 In most cases, this will end up being the identity assignment
1310 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1311 a function call that has been inlined, the original PARM_DECL
1312 .OMP_DATA_I may have been converted into a different local
1313 variable. In which case, we need to keep the assignment. */
1314 if (gimple_omp_taskreg_data_arg (entry_stmt))
1316 basic_block entry_succ_bb
1317 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1318 : FALLTHRU_EDGE (entry_bb)->dest;
1319 tree arg;
1320 gimple *parcopy_stmt = NULL;
1322 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1324 gimple *stmt;
1326 gcc_assert (!gsi_end_p (gsi));
1327 stmt = gsi_stmt (gsi);
1328 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1329 continue;
1331 if (gimple_num_ops (stmt) == 2)
1333 tree arg = gimple_assign_rhs1 (stmt);
1335 /* We're ignore the subcode because we're
1336 effectively doing a STRIP_NOPS. */
1338 if (TREE_CODE (arg) == ADDR_EXPR
1339 && (TREE_OPERAND (arg, 0)
1340 == gimple_omp_taskreg_data_arg (entry_stmt)))
1342 parcopy_stmt = stmt;
1343 break;
1348 gcc_assert (parcopy_stmt != NULL);
1349 arg = DECL_ARGUMENTS (child_fn);
1351 if (!gimple_in_ssa_p (cfun))
1353 if (gimple_assign_lhs (parcopy_stmt) == arg)
1354 gsi_remove (&gsi, true);
1355 else
1357 /* ?? Is setting the subcode really necessary ?? */
1358 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1359 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1362 else
1364 tree lhs = gimple_assign_lhs (parcopy_stmt);
1365 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1366 /* We'd like to set the rhs to the default def in the child_fn,
1367 but it's too early to create ssa names in the child_fn.
1368 Instead, we set the rhs to the parm. In
1369 move_sese_region_to_fn, we introduce a default def for the
1370 parm, map the parm to it's default def, and once we encounter
1371 this stmt, replace the parm with the default def. */
1372 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1373 update_stmt (parcopy_stmt);
1377 /* Declare local variables needed in CHILD_CFUN. */
1378 block = DECL_INITIAL (child_fn);
1379 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1380 /* The gimplifier could record temporaries in parallel/task block
1381 rather than in containing function's local_decls chain,
1382 which would mean cgraph missed finalizing them. Do it now. */
1383 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1384 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1385 varpool_node::finalize_decl (t);
1386 DECL_SAVED_TREE (child_fn) = NULL;
1387 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1388 gimple_set_body (child_fn, NULL);
1389 TREE_USED (block) = 1;
1391 /* Reset DECL_CONTEXT on function arguments. */
1392 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1393 DECL_CONTEXT (t) = child_fn;
1395 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1396 so that it can be moved to the child function. */
1397 gsi = gsi_last_nondebug_bb (entry_bb);
1398 stmt = gsi_stmt (gsi);
1399 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1400 || gimple_code (stmt) == GIMPLE_OMP_TASK
1401 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1402 e = split_block (entry_bb, stmt);
1403 gsi_remove (&gsi, true);
1404 entry_bb = e->dest;
1405 edge e2 = NULL;
1406 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1407 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1408 else
1410 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1411 gcc_assert (e2->dest == region->exit);
1412 remove_edge (BRANCH_EDGE (entry_bb));
1413 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1414 gsi = gsi_last_nondebug_bb (region->exit);
1415 gcc_assert (!gsi_end_p (gsi)
1416 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1417 gsi_remove (&gsi, true);
1420 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1421 if (exit_bb)
1423 gsi = gsi_last_nondebug_bb (exit_bb);
1424 gcc_assert (!gsi_end_p (gsi)
1425 && (gimple_code (gsi_stmt (gsi))
1426 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1427 stmt = gimple_build_return (NULL);
1428 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1429 gsi_remove (&gsi, true);
1432 /* Move the parallel region into CHILD_CFUN. */
1434 if (gimple_in_ssa_p (cfun))
1436 init_tree_ssa (child_cfun);
1437 init_ssa_operands (child_cfun);
1438 child_cfun->gimple_df->in_ssa_p = true;
1439 block = NULL_TREE;
1441 else
1442 block = gimple_block (entry_stmt);
1444 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1445 if (exit_bb)
1446 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1447 if (e2)
1449 basic_block dest_bb = e2->dest;
1450 if (!exit_bb)
1451 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1452 remove_edge (e2);
1453 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1455 /* When the OMP expansion process cannot guarantee an up-to-date
1456 loop tree arrange for the child function to fixup loops. */
1457 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1458 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1460 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1461 num = vec_safe_length (child_cfun->local_decls);
1462 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1464 t = (*child_cfun->local_decls)[srcidx];
1465 if (DECL_CONTEXT (t) == cfun->decl)
1466 continue;
1467 if (srcidx != dstidx)
1468 (*child_cfun->local_decls)[dstidx] = t;
1469 dstidx++;
1471 if (dstidx != num)
1472 vec_safe_truncate (child_cfun->local_decls, dstidx);
1474 /* Inform the callgraph about the new function. */
1475 child_cfun->curr_properties = cfun->curr_properties;
1476 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1477 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1478 cgraph_node *node = cgraph_node::get_create (child_fn);
1479 node->parallelized_function = 1;
1480 cgraph_node::add_new_function (child_fn, true);
1482 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1483 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1485 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1486 fixed in a following pass. */
1487 push_cfun (child_cfun);
1488 if (need_asm)
1489 assign_assembler_name_if_needed (child_fn);
1491 if (optimize)
1492 optimize_omp_library_calls (entry_stmt);
1493 update_max_bb_count ();
1494 cgraph_edge::rebuild_edges ();
1496 /* Some EH regions might become dead, see PR34608. If
1497 pass_cleanup_cfg isn't the first pass to happen with the
1498 new child, these dead EH edges might cause problems.
1499 Clean them up now. */
1500 if (flag_exceptions)
1502 basic_block bb;
1503 bool changed = false;
1505 FOR_EACH_BB_FN (bb, cfun)
1506 changed |= gimple_purge_dead_eh_edges (bb);
1507 if (changed)
1508 cleanup_tree_cfg ();
1510 if (gimple_in_ssa_p (cfun))
1511 update_ssa (TODO_update_ssa);
1512 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1513 verify_loop_structure ();
1514 pop_cfun ();
1516 if (dump_file && !gimple_in_ssa_p (cfun))
1518 omp_any_child_fn_dumped = true;
1519 dump_function_header (dump_file, child_fn, dump_flags);
1520 dump_function_to_file (child_fn, dump_file, dump_flags);
1524 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1526 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1527 expand_parallel_call (region, new_bb,
1528 as_a <gomp_parallel *> (entry_stmt), ws_args);
1529 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1530 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1531 else
1532 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1533 if (gimple_in_ssa_p (cfun))
1534 update_ssa (TODO_update_ssa_only_virtuals);
1537 /* Information about members of an OpenACC collapsed loop nest. */
1539 struct oacc_collapse
1541 tree base; /* Base value. */
1542 tree iters; /* Number of steps. */
1543 tree step; /* Step size. */
1544 tree tile; /* Tile increment (if tiled). */
1545 tree outer; /* Tile iterator var. */
1548 /* Helper for expand_oacc_for. Determine collapsed loop information.
1549 Fill in COUNTS array. Emit any initialization code before GSI.
1550 Return the calculated outer loop bound of BOUND_TYPE. */
1552 static tree
1553 expand_oacc_collapse_init (const struct omp_for_data *fd,
1554 gimple_stmt_iterator *gsi,
1555 oacc_collapse *counts, tree diff_type,
1556 tree bound_type, location_t loc)
1558 tree tiling = fd->tiling;
1559 tree total = build_int_cst (bound_type, 1);
1560 int ix;
1562 gcc_assert (integer_onep (fd->loop.step));
1563 gcc_assert (integer_zerop (fd->loop.n1));
1565 /* When tiling, the first operand of the tile clause applies to the
1566 innermost loop, and we work outwards from there. Seems
1567 backwards, but whatever. */
1568 for (ix = fd->collapse; ix--;)
1570 const omp_for_data_loop *loop = &fd->loops[ix];
1572 tree iter_type = TREE_TYPE (loop->v);
1573 tree plus_type = iter_type;
1575 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1577 if (POINTER_TYPE_P (iter_type))
1578 plus_type = sizetype;
1580 if (tiling)
1582 tree num = build_int_cst (integer_type_node, fd->collapse);
1583 tree loop_no = build_int_cst (integer_type_node, ix);
1584 tree tile = TREE_VALUE (tiling);
1585 gcall *call
1586 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1587 /* gwv-outer=*/integer_zero_node,
1588 /* gwv-inner=*/integer_zero_node);
1590 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1591 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1592 gimple_call_set_lhs (call, counts[ix].tile);
1593 gimple_set_location (call, loc);
1594 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1596 tiling = TREE_CHAIN (tiling);
1598 else
1600 counts[ix].tile = NULL;
1601 counts[ix].outer = loop->v;
1604 tree b = loop->n1;
1605 tree e = loop->n2;
1606 tree s = loop->step;
1607 bool up = loop->cond_code == LT_EXPR;
1608 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1609 bool negating;
1610 tree expr;
1612 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1615 true, GSI_SAME_STMT);
1617 /* Convert the step, avoiding possible unsigned->signed overflow. */
1618 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1619 if (negating)
1620 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1621 s = fold_convert (diff_type, s);
1622 if (negating)
1623 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1624 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1625 true, GSI_SAME_STMT);
1627 /* Determine the range, avoiding possible unsigned->signed overflow. */
1628 negating = !up && TYPE_UNSIGNED (iter_type);
1629 expr = fold_build2 (MINUS_EXPR, plus_type,
1630 fold_convert (plus_type, negating ? b : e),
1631 fold_convert (plus_type, negating ? e : b));
1632 expr = fold_convert (diff_type, expr);
1633 if (negating)
1634 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1635 tree range = force_gimple_operand_gsi
1636 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1638 /* Determine number of iterations. */
1639 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1640 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1641 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1643 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1644 true, GSI_SAME_STMT);
1646 counts[ix].base = b;
1647 counts[ix].iters = iters;
1648 counts[ix].step = s;
1650 total = fold_build2 (MULT_EXPR, bound_type, total,
1651 fold_convert (bound_type, iters));
1654 return total;
1657 /* Emit initializers for collapsed loop members. INNER is true if
1658 this is for the element loop of a TILE. IVAR is the outer
1659 loop iteration variable, from which collapsed loop iteration values
1660 are calculated. COUNTS array has been initialized by
1661 expand_oacc_collapse_inits. */
1663 static void
1664 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1665 gimple_stmt_iterator *gsi,
1666 const oacc_collapse *counts, tree ivar,
1667 tree diff_type)
1669 tree ivar_type = TREE_TYPE (ivar);
1671 /* The most rapidly changing iteration variable is the innermost
1672 one. */
1673 for (int ix = fd->collapse; ix--;)
1675 const omp_for_data_loop *loop = &fd->loops[ix];
1676 const oacc_collapse *collapse = &counts[ix];
1677 tree v = inner ? loop->v : collapse->outer;
1678 tree iter_type = TREE_TYPE (v);
1679 tree plus_type = iter_type;
1680 enum tree_code plus_code = PLUS_EXPR;
1681 tree expr;
1683 if (POINTER_TYPE_P (iter_type))
1685 plus_code = POINTER_PLUS_EXPR;
1686 plus_type = sizetype;
1689 expr = ivar;
1690 if (ix)
1692 tree mod = fold_convert (ivar_type, collapse->iters);
1693 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1694 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1695 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1696 true, GSI_SAME_STMT);
1699 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1700 fold_convert (diff_type, collapse->step));
1701 expr = fold_build2 (plus_code, iter_type,
1702 inner ? collapse->outer : collapse->base,
1703 fold_convert (plus_type, expr));
1704 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1705 true, GSI_SAME_STMT);
1706 gassign *ass = gimple_build_assign (v, expr);
1707 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1711 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1712 of the combined collapse > 1 loop constructs, generate code like:
1713 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1714 if (cond3 is <)
1715 adj = STEP3 - 1;
1716 else
1717 adj = STEP3 + 1;
1718 count3 = (adj + N32 - N31) / STEP3;
1719 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1720 if (cond2 is <)
1721 adj = STEP2 - 1;
1722 else
1723 adj = STEP2 + 1;
1724 count2 = (adj + N22 - N21) / STEP2;
1725 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1726 if (cond1 is <)
1727 adj = STEP1 - 1;
1728 else
1729 adj = STEP1 + 1;
1730 count1 = (adj + N12 - N11) / STEP1;
1731 count = count1 * count2 * count3;
1732 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1733 count = 0;
1734 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1735 of the combined loop constructs, just initialize COUNTS array
1736 from the _looptemp_ clauses. For loop nests with non-rectangular
1737 loops, do this only for the rectangular loops. Then pick
1738 the loops which reference outer vars in their bound expressions
1739 and the loops which they refer to and for this sub-nest compute
1740 number of iterations. For triangular loops use Faulhaber's formula,
1741 otherwise as a fallback, compute by iterating the loops.
1742 If e.g. the sub-nest is
1743 for (I = N11; I COND1 N12; I += STEP1)
1744 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1745 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1747 COUNT = 0;
1748 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1749 for (tmpj = M21 * tmpi + N21;
1750 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1752 int tmpk1 = M31 * tmpj + N31;
1753 int tmpk2 = M32 * tmpj + N32;
1754 if (tmpk1 COND3 tmpk2)
1756 if (COND3 is <)
1757 adj = STEP3 - 1;
1758 else
1759 adj = STEP3 + 1;
1760 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1763 and finally multiply the counts of the rectangular loops not
1764 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1765 store number of iterations of the loops from fd->first_nonrect
1766 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1767 by the counts of rectangular loops not referenced in any non-rectangular
1768 loops sandwitched in between those. */
1770 /* NOTE: It *could* be better to moosh all of the BBs together,
1771 creating one larger BB with all the computation and the unexpected
1772 jump at the end. I.e.
1774 bool zero3, zero2, zero1, zero;
1776 zero3 = N32 c3 N31;
1777 count3 = (N32 - N31) /[cl] STEP3;
1778 zero2 = N22 c2 N21;
1779 count2 = (N22 - N21) /[cl] STEP2;
1780 zero1 = N12 c1 N11;
1781 count1 = (N12 - N11) /[cl] STEP1;
1782 zero = zero3 || zero2 || zero1;
1783 count = count1 * count2 * count3;
1784 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1786 After all, we expect the zero=false, and thus we expect to have to
1787 evaluate all of the comparison expressions, so short-circuiting
1788 oughtn't be a win. Since the condition isn't protecting a
1789 denominator, we're not concerned about divide-by-zero, so we can
1790 fully evaluate count even if a numerator turned out to be wrong.
1792 It seems like putting this all together would create much better
1793 scheduling opportunities, and less pressure on the chip's branch
1794 predictor. */
1796 static void
1797 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1798 basic_block &entry_bb, tree *counts,
1799 basic_block &zero_iter1_bb, int &first_zero_iter1,
1800 basic_block &zero_iter2_bb, int &first_zero_iter2,
1801 basic_block &l2_dom_bb)
1803 tree t, type = TREE_TYPE (fd->loop.v);
1804 edge e, ne;
1805 int i;
1807 /* Collapsed loops need work for expansion into SSA form. */
1808 gcc_assert (!gimple_in_ssa_p (cfun));
1810 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1811 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1813 gcc_assert (fd->ordered == 0);
1814 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1815 isn't supposed to be handled, as the inner loop doesn't
1816 use it. */
1817 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1818 OMP_CLAUSE__LOOPTEMP_);
1819 gcc_assert (innerc);
1820 for (i = 0; i < fd->collapse; i++)
1822 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1823 OMP_CLAUSE__LOOPTEMP_);
1824 gcc_assert (innerc);
1825 if (i)
1826 counts[i] = OMP_CLAUSE_DECL (innerc);
1827 else
1828 counts[0] = NULL_TREE;
1830 if (fd->non_rect
1831 && fd->last_nonrect == fd->first_nonrect + 1
1832 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1834 tree c[4];
1835 for (i = 0; i < 4; i++)
1837 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1838 OMP_CLAUSE__LOOPTEMP_);
1839 gcc_assert (innerc);
1840 c[i] = OMP_CLAUSE_DECL (innerc);
1842 counts[0] = c[0];
1843 fd->first_inner_iterations = c[1];
1844 fd->factor = c[2];
1845 fd->adjn1 = c[3];
1847 return;
1850 for (i = fd->collapse; i < fd->ordered; i++)
1852 tree itype = TREE_TYPE (fd->loops[i].v);
1853 counts[i] = NULL_TREE;
1854 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1855 fold_convert (itype, fd->loops[i].n1),
1856 fold_convert (itype, fd->loops[i].n2));
1857 if (t && integer_zerop (t))
1859 for (i = fd->collapse; i < fd->ordered; i++)
1860 counts[i] = build_int_cst (type, 0);
1861 break;
1864 bool rect_count_seen = false;
1865 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1867 tree itype = TREE_TYPE (fd->loops[i].v);
1869 if (i >= fd->collapse && counts[i])
1870 continue;
1871 if (fd->non_rect)
1873 /* Skip loops that use outer iterators in their expressions
1874 during this phase. */
1875 if (fd->loops[i].m1 || fd->loops[i].m2)
1877 counts[i] = build_zero_cst (type);
1878 continue;
1881 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1882 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1883 fold_convert (itype, fd->loops[i].n1),
1884 fold_convert (itype, fd->loops[i].n2)))
1885 == NULL_TREE || !integer_onep (t)))
1887 gcond *cond_stmt;
1888 tree n1, n2;
1889 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1890 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1891 true, GSI_SAME_STMT);
1892 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1893 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1894 true, GSI_SAME_STMT);
1895 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1896 n1, n2);
1897 e = split_block (entry_bb, cond_stmt);
1898 basic_block &zero_iter_bb
1899 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1900 int &first_zero_iter
1901 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1902 if (zero_iter_bb == NULL)
1904 gassign *assign_stmt;
1905 first_zero_iter = i;
1906 zero_iter_bb = create_empty_bb (entry_bb);
1907 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1908 *gsi = gsi_after_labels (zero_iter_bb);
1909 if (i < fd->collapse)
1910 assign_stmt = gimple_build_assign (fd->loop.n2,
1911 build_zero_cst (type));
1912 else
1914 counts[i] = create_tmp_reg (type, ".count");
1915 assign_stmt
1916 = gimple_build_assign (counts[i], build_zero_cst (type));
1918 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1919 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1920 entry_bb);
1922 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1923 ne->probability = profile_probability::very_unlikely ();
1924 e->flags = EDGE_TRUE_VALUE;
1925 e->probability = ne->probability.invert ();
1926 if (l2_dom_bb == NULL)
1927 l2_dom_bb = entry_bb;
1928 entry_bb = e->dest;
1929 *gsi = gsi_last_nondebug_bb (entry_bb);
1932 if (POINTER_TYPE_P (itype))
1933 itype = signed_type_for (itype);
1934 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1935 ? -1 : 1));
1936 t = fold_build2 (PLUS_EXPR, itype,
1937 fold_convert (itype, fd->loops[i].step), t);
1938 t = fold_build2 (PLUS_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].n2));
1940 t = fold_build2 (MINUS_EXPR, itype, t,
1941 fold_convert (itype, fd->loops[i].n1));
1942 /* ?? We could probably use CEIL_DIV_EXPR instead of
1943 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1944 generate the same code in the end because generically we
1945 don't know that the values involved must be negative for
1946 GT?? */
1947 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1948 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1949 fold_build1 (NEGATE_EXPR, itype, t),
1950 fold_build1 (NEGATE_EXPR, itype,
1951 fold_convert (itype,
1952 fd->loops[i].step)));
1953 else
1954 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1955 fold_convert (itype, fd->loops[i].step));
1956 t = fold_convert (type, t);
1957 if (TREE_CODE (t) == INTEGER_CST)
1958 counts[i] = t;
1959 else
1961 if (i < fd->collapse || i != first_zero_iter2)
1962 counts[i] = create_tmp_reg (type, ".count");
1963 expand_omp_build_assign (gsi, counts[i], t);
1965 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1967 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1968 continue;
1969 if (!rect_count_seen)
1971 t = counts[i];
1972 rect_count_seen = true;
1974 else
1975 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1976 expand_omp_build_assign (gsi, fd->loop.n2, t);
1979 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1981 gcc_assert (fd->last_nonrect != -1);
1983 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1984 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1985 build_zero_cst (type));
1986 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1987 if (fd->loops[i].m1
1988 || fd->loops[i].m2
1989 || fd->loops[i].non_rect_referenced)
1990 break;
1991 if (i == fd->last_nonrect
1992 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1993 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1994 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1996 int o = fd->first_nonrect;
1997 tree itype = TREE_TYPE (fd->loops[o].v);
1998 tree n1o = create_tmp_reg (itype, ".n1o");
1999 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
2000 expand_omp_build_assign (gsi, n1o, t);
2001 tree n2o = create_tmp_reg (itype, ".n2o");
2002 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2003 expand_omp_build_assign (gsi, n2o, t);
2004 if (fd->loops[i].m1 && fd->loops[i].m2)
2005 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2006 unshare_expr (fd->loops[i].m1));
2007 else if (fd->loops[i].m1)
2008 t = fold_unary (NEGATE_EXPR, itype,
2009 unshare_expr (fd->loops[i].m1));
2010 else
2011 t = unshare_expr (fd->loops[i].m2);
2012 tree m2minusm1
2013 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2014 true, GSI_SAME_STMT);
2016 gimple_stmt_iterator gsi2 = *gsi;
2017 gsi_prev (&gsi2);
2018 e = split_block (entry_bb, gsi_stmt (gsi2));
2019 e = split_block (e->dest, (gimple *) NULL);
2020 basic_block bb1 = e->src;
2021 entry_bb = e->dest;
2022 *gsi = gsi_after_labels (entry_bb);
2024 gsi2 = gsi_after_labels (bb1);
2025 tree ostep = fold_convert (itype, fd->loops[o].step);
2026 t = build_int_cst (itype, (fd->loops[o].cond_code
2027 == LT_EXPR ? -1 : 1));
2028 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2029 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2030 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2031 if (TYPE_UNSIGNED (itype)
2032 && fd->loops[o].cond_code == GT_EXPR)
2033 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2034 fold_build1 (NEGATE_EXPR, itype, t),
2035 fold_build1 (NEGATE_EXPR, itype, ostep));
2036 else
2037 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2038 tree outer_niters
2039 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2040 true, GSI_SAME_STMT);
2041 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2042 build_one_cst (itype));
2043 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2044 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2045 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2046 true, GSI_SAME_STMT);
2047 tree n1, n2, n1e, n2e;
2048 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2049 if (fd->loops[i].m1)
2051 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2052 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2053 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2055 else
2056 n1 = t;
2057 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2058 true, GSI_SAME_STMT);
2059 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2060 if (fd->loops[i].m2)
2062 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2063 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2064 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2066 else
2067 n2 = t;
2068 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2069 true, GSI_SAME_STMT);
2070 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2071 if (fd->loops[i].m1)
2073 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2074 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2075 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2077 else
2078 n1e = t;
2079 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2080 true, GSI_SAME_STMT);
2081 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2082 if (fd->loops[i].m2)
2084 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2085 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2086 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2088 else
2089 n2e = t;
2090 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2091 true, GSI_SAME_STMT);
2092 gcond *cond_stmt
2093 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2094 n1, n2);
2095 e = split_block (bb1, cond_stmt);
2096 e->flags = EDGE_TRUE_VALUE;
2097 e->probability = profile_probability::likely ().guessed ();
2098 basic_block bb2 = e->dest;
2099 gsi2 = gsi_after_labels (bb2);
2101 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2102 n1e, n2e);
2103 e = split_block (bb2, cond_stmt);
2104 e->flags = EDGE_TRUE_VALUE;
2105 e->probability = profile_probability::likely ().guessed ();
2106 gsi2 = gsi_after_labels (e->dest);
2108 tree step = fold_convert (itype, fd->loops[i].step);
2109 t = build_int_cst (itype, (fd->loops[i].cond_code
2110 == LT_EXPR ? -1 : 1));
2111 t = fold_build2 (PLUS_EXPR, itype, step, t);
2112 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2113 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2114 if (TYPE_UNSIGNED (itype)
2115 && fd->loops[i].cond_code == GT_EXPR)
2116 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2117 fold_build1 (NEGATE_EXPR, itype, t),
2118 fold_build1 (NEGATE_EXPR, itype, step));
2119 else
2120 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2121 tree first_inner_iterations
2122 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2123 true, GSI_SAME_STMT);
2124 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2125 if (TYPE_UNSIGNED (itype)
2126 && fd->loops[i].cond_code == GT_EXPR)
2127 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2128 fold_build1 (NEGATE_EXPR, itype, t),
2129 fold_build1 (NEGATE_EXPR, itype, step));
2130 else
2131 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2132 tree factor
2133 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2134 true, GSI_SAME_STMT);
2135 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2136 build_one_cst (itype));
2137 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2138 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2139 t = fold_build2 (MULT_EXPR, itype, factor, t);
2140 t = fold_build2 (PLUS_EXPR, itype,
2141 fold_build2 (MULT_EXPR, itype, outer_niters,
2142 first_inner_iterations), t);
2143 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2144 fold_convert (type, t));
2146 basic_block bb3 = create_empty_bb (bb1);
2147 add_bb_to_loop (bb3, bb1->loop_father);
2149 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2150 e->probability = profile_probability::unlikely ().guessed ();
2152 gsi2 = gsi_after_labels (bb3);
2153 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2154 n1e, n2e);
2155 e = split_block (bb3, cond_stmt);
2156 e->flags = EDGE_TRUE_VALUE;
2157 e->probability = profile_probability::likely ().guessed ();
2158 basic_block bb4 = e->dest;
2160 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2161 ne->probability = e->probability.invert ();
2163 basic_block bb5 = create_empty_bb (bb2);
2164 add_bb_to_loop (bb5, bb2->loop_father);
2166 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2167 ne->probability = profile_probability::unlikely ().guessed ();
2169 for (int j = 0; j < 2; j++)
2171 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2172 t = fold_build2 (MINUS_EXPR, itype,
2173 unshare_expr (fd->loops[i].n1),
2174 unshare_expr (fd->loops[i].n2));
2175 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2176 tree tem
2177 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2178 true, GSI_SAME_STMT);
2179 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2180 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2181 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2182 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2183 true, GSI_SAME_STMT);
2184 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2185 if (fd->loops[i].m1)
2187 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2188 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2189 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2191 else
2192 n1 = t;
2193 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2194 true, GSI_SAME_STMT);
2195 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2196 if (fd->loops[i].m2)
2198 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2199 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2200 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2202 else
2203 n2 = t;
2204 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2205 true, GSI_SAME_STMT);
2206 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2208 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2209 n1, n2);
2210 e = split_block (gsi_bb (gsi2), cond_stmt);
2211 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2212 e->probability = profile_probability::unlikely ().guessed ();
2213 ne = make_edge (e->src, bb1,
2214 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2215 ne->probability = e->probability.invert ();
2216 gsi2 = gsi_after_labels (e->dest);
2218 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2219 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2221 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2224 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2225 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2226 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2228 if (fd->first_nonrect + 1 == fd->last_nonrect)
2230 fd->first_inner_iterations = first_inner_iterations;
2231 fd->factor = factor;
2232 fd->adjn1 = n1o;
2235 else
2237 /* Fallback implementation. Evaluate the loops with m1/m2
2238 non-NULL as well as their outer loops at runtime using temporaries
2239 instead of the original iteration variables, and in the
2240 body just bump the counter. */
2241 gimple_stmt_iterator gsi2 = *gsi;
2242 gsi_prev (&gsi2);
2243 e = split_block (entry_bb, gsi_stmt (gsi2));
2244 e = split_block (e->dest, (gimple *) NULL);
2245 basic_block cur_bb = e->src;
2246 basic_block next_bb = e->dest;
2247 entry_bb = e->dest;
2248 *gsi = gsi_after_labels (entry_bb);
2250 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2251 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2253 for (i = 0; i <= fd->last_nonrect; i++)
2255 if (fd->loops[i].m1 == NULL_TREE
2256 && fd->loops[i].m2 == NULL_TREE
2257 && !fd->loops[i].non_rect_referenced)
2258 continue;
2260 tree itype = TREE_TYPE (fd->loops[i].v);
2262 gsi2 = gsi_after_labels (cur_bb);
2263 tree n1, n2;
2264 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2265 if (fd->loops[i].m1 == NULL_TREE)
2266 n1 = t;
2267 else if (POINTER_TYPE_P (itype))
2269 gcc_assert (integer_onep (fd->loops[i].m1));
2270 t = fold_convert (sizetype,
2271 unshare_expr (fd->loops[i].n1));
2272 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2274 else
2276 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2277 n1 = fold_build2 (MULT_EXPR, itype,
2278 vs[i - fd->loops[i].outer], n1);
2279 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2281 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2282 true, GSI_SAME_STMT);
2283 if (i < fd->last_nonrect)
2285 vs[i] = create_tmp_reg (itype, ".it");
2286 expand_omp_build_assign (&gsi2, vs[i], n1);
2288 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2289 if (fd->loops[i].m2 == NULL_TREE)
2290 n2 = t;
2291 else if (POINTER_TYPE_P (itype))
2293 gcc_assert (integer_onep (fd->loops[i].m2));
2294 t = fold_convert (sizetype,
2295 unshare_expr (fd->loops[i].n2));
2296 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2298 else
2300 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2301 n2 = fold_build2 (MULT_EXPR, itype,
2302 vs[i - fd->loops[i].outer], n2);
2303 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2305 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2306 true, GSI_SAME_STMT);
2307 if (POINTER_TYPE_P (itype))
2308 itype = signed_type_for (itype);
2309 if (i == fd->last_nonrect)
2311 gcond *cond_stmt
2312 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2313 n1, n2);
2314 e = split_block (cur_bb, cond_stmt);
2315 e->flags = EDGE_TRUE_VALUE;
2316 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2317 e->probability = profile_probability::likely ().guessed ();
2318 ne->probability = e->probability.invert ();
2319 gsi2 = gsi_after_labels (e->dest);
2321 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2322 ? -1 : 1));
2323 t = fold_build2 (PLUS_EXPR, itype,
2324 fold_convert (itype, fd->loops[i].step), t);
2325 t = fold_build2 (PLUS_EXPR, itype, t,
2326 fold_convert (itype, n2));
2327 t = fold_build2 (MINUS_EXPR, itype, t,
2328 fold_convert (itype, n1));
2329 tree step = fold_convert (itype, fd->loops[i].step);
2330 if (TYPE_UNSIGNED (itype)
2331 && fd->loops[i].cond_code == GT_EXPR)
2332 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2333 fold_build1 (NEGATE_EXPR, itype, t),
2334 fold_build1 (NEGATE_EXPR, itype, step));
2335 else
2336 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2337 t = fold_convert (type, t);
2338 t = fold_build2 (PLUS_EXPR, type,
2339 counts[fd->last_nonrect], t);
2340 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2341 true, GSI_SAME_STMT);
2342 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2343 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2344 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2345 break;
2347 e = split_block (cur_bb, last_stmt (cur_bb));
2349 basic_block new_cur_bb = create_empty_bb (cur_bb);
2350 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2352 gsi2 = gsi_after_labels (e->dest);
2353 tree step = fold_convert (itype,
2354 unshare_expr (fd->loops[i].step));
2355 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2356 t = fold_build_pointer_plus (vs[i],
2357 fold_convert (sizetype, step));
2358 else
2359 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2360 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2361 true, GSI_SAME_STMT);
2362 expand_omp_build_assign (&gsi2, vs[i], t);
2364 ne = split_block (e->dest, last_stmt (e->dest));
2365 gsi2 = gsi_after_labels (ne->dest);
2367 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2368 edge e3, e4;
2369 if (next_bb == entry_bb)
2371 e3 = find_edge (ne->dest, next_bb);
2372 e3->flags = EDGE_FALSE_VALUE;
2374 else
2375 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2376 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2377 e4->probability = profile_probability::likely ().guessed ();
2378 e3->probability = e4->probability.invert ();
2379 basic_block esrc = e->src;
2380 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2381 cur_bb = new_cur_bb;
2382 basic_block latch_bb = next_bb;
2383 next_bb = e->dest;
2384 remove_edge (e);
2385 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2386 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2387 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2390 t = NULL_TREE;
2391 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2392 if (!fd->loops[i].non_rect_referenced
2393 && fd->loops[i].m1 == NULL_TREE
2394 && fd->loops[i].m2 == NULL_TREE)
2396 if (t == NULL_TREE)
2397 t = counts[i];
2398 else
2399 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2401 if (t)
2403 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2404 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2406 if (!rect_count_seen)
2407 t = counts[fd->last_nonrect];
2408 else
2409 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2410 counts[fd->last_nonrect]);
2411 expand_omp_build_assign (gsi, fd->loop.n2, t);
2413 else if (fd->non_rect)
2415 tree t = fd->loop.n2;
2416 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2417 int non_rect_referenced = 0, non_rect = 0;
2418 for (i = 0; i < fd->collapse; i++)
2420 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2421 && !integer_zerop (counts[i]))
2422 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2423 if (fd->loops[i].non_rect_referenced)
2424 non_rect_referenced++;
2425 if (fd->loops[i].m1 || fd->loops[i].m2)
2426 non_rect++;
2428 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2429 counts[fd->last_nonrect] = t;
2433 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2434 T = V;
2435 V3 = N31 + (T % count3) * STEP3;
2436 T = T / count3;
2437 V2 = N21 + (T % count2) * STEP2;
2438 T = T / count2;
2439 V1 = N11 + T * STEP1;
2440 if this loop doesn't have an inner loop construct combined with it.
2441 If it does have an inner loop construct combined with it and the
2442 iteration count isn't known constant, store values from counts array
2443 into its _looptemp_ temporaries instead.
2444 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2445 inclusive), use the count of all those loops together, and either
2446 find quadratic etc. equation roots, or as a fallback, do:
2447 COUNT = 0;
2448 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2449 for (tmpj = M21 * tmpi + N21;
2450 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2452 int tmpk1 = M31 * tmpj + N31;
2453 int tmpk2 = M32 * tmpj + N32;
2454 if (tmpk1 COND3 tmpk2)
2456 if (COND3 is <)
2457 adj = STEP3 - 1;
2458 else
2459 adj = STEP3 + 1;
2460 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2461 if (COUNT + temp > T)
2463 V1 = tmpi;
2464 V2 = tmpj;
2465 V3 = tmpk1 + (T - COUNT) * STEP3;
2466 goto done;
2468 else
2469 COUNT += temp;
2472 done:;
2473 but for optional innermost or outermost rectangular loops that aren't
2474 referenced by other loop expressions keep doing the division/modulo. */
2476 static void
2477 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2478 tree *counts, tree *nonrect_bounds,
2479 gimple *inner_stmt, tree startvar)
2481 int i;
2482 if (gimple_omp_for_combined_p (fd->for_stmt))
2484 /* If fd->loop.n2 is constant, then no propagation of the counts
2485 is needed, they are constant. */
2486 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2487 return;
2489 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2490 ? gimple_omp_taskreg_clauses (inner_stmt)
2491 : gimple_omp_for_clauses (inner_stmt);
2492 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2493 isn't supposed to be handled, as the inner loop doesn't
2494 use it. */
2495 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2496 gcc_assert (innerc);
2497 int count = 0;
2498 if (fd->non_rect
2499 && fd->last_nonrect == fd->first_nonrect + 1
2500 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2501 count = 4;
2502 for (i = 0; i < fd->collapse + count; i++)
2504 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2505 OMP_CLAUSE__LOOPTEMP_);
2506 gcc_assert (innerc);
2507 if (i)
2509 tree tem = OMP_CLAUSE_DECL (innerc);
2510 tree t;
2511 if (i < fd->collapse)
2512 t = counts[i];
2513 else
2514 switch (i - fd->collapse)
2516 case 0: t = counts[0]; break;
2517 case 1: t = fd->first_inner_iterations; break;
2518 case 2: t = fd->factor; break;
2519 case 3: t = fd->adjn1; break;
2520 default: gcc_unreachable ();
2522 t = fold_convert (TREE_TYPE (tem), t);
2523 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2524 false, GSI_CONTINUE_LINKING);
2525 gassign *stmt = gimple_build_assign (tem, t);
2526 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2529 return;
2532 tree type = TREE_TYPE (fd->loop.v);
2533 tree tem = create_tmp_reg (type, ".tem");
2534 gassign *stmt = gimple_build_assign (tem, startvar);
2535 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2537 for (i = fd->collapse - 1; i >= 0; i--)
2539 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2540 itype = vtype;
2541 if (POINTER_TYPE_P (vtype))
2542 itype = signed_type_for (vtype);
2543 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2544 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2545 else
2546 t = tem;
2547 if (i == fd->last_nonrect)
2549 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2550 false, GSI_CONTINUE_LINKING);
2551 tree stopval = t;
2552 tree idx = create_tmp_reg (type, ".count");
2553 expand_omp_build_assign (gsi, idx,
2554 build_zero_cst (type), true);
2555 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2556 if (fd->first_nonrect + 1 == fd->last_nonrect
2557 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2558 || fd->first_inner_iterations)
2559 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2560 != CODE_FOR_nothing)
2561 && !integer_zerop (fd->loop.n2))
2563 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2564 tree itype = TREE_TYPE (fd->loops[i].v);
2565 tree first_inner_iterations = fd->first_inner_iterations;
2566 tree factor = fd->factor;
2567 gcond *cond_stmt
2568 = expand_omp_build_cond (gsi, NE_EXPR, factor,
2569 build_zero_cst (TREE_TYPE (factor)));
2570 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2571 basic_block bb0 = e->src;
2572 e->flags = EDGE_TRUE_VALUE;
2573 e->probability = profile_probability::likely ();
2574 bb_triang_dom = bb0;
2575 *gsi = gsi_after_labels (e->dest);
2576 tree slltype = long_long_integer_type_node;
2577 tree ulltype = long_long_unsigned_type_node;
2578 tree stopvalull = fold_convert (ulltype, stopval);
2579 stopvalull
2580 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2581 false, GSI_CONTINUE_LINKING);
2582 first_inner_iterations
2583 = fold_convert (slltype, first_inner_iterations);
2584 first_inner_iterations
2585 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2586 NULL_TREE, false,
2587 GSI_CONTINUE_LINKING);
2588 factor = fold_convert (slltype, factor);
2589 factor
2590 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2591 false, GSI_CONTINUE_LINKING);
2592 tree first_inner_iterationsd
2593 = fold_build1 (FLOAT_EXPR, double_type_node,
2594 first_inner_iterations);
2595 first_inner_iterationsd
2596 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2597 NULL_TREE, false,
2598 GSI_CONTINUE_LINKING);
2599 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2600 factor);
2601 factord = force_gimple_operand_gsi (gsi, factord, true,
2602 NULL_TREE, false,
2603 GSI_CONTINUE_LINKING);
2604 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2605 stopvalull);
2606 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2607 NULL_TREE, false,
2608 GSI_CONTINUE_LINKING);
2609 /* Temporarily disable flag_rounding_math, values will be
2610 decimal numbers divided by 2 and worst case imprecisions
2611 due to too large values ought to be caught later by the
2612 checks for fallback. */
2613 int save_flag_rounding_math = flag_rounding_math;
2614 flag_rounding_math = 0;
2615 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2616 build_real (double_type_node, dconst2));
2617 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2618 first_inner_iterationsd, t);
2619 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2620 GSI_CONTINUE_LINKING);
2621 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2622 build_real (double_type_node, dconst2));
2623 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2624 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2625 fold_build2 (MULT_EXPR, double_type_node,
2626 t3, t3));
2627 flag_rounding_math = save_flag_rounding_math;
2628 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2629 GSI_CONTINUE_LINKING);
2630 if (flag_exceptions
2631 && cfun->can_throw_non_call_exceptions
2632 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2634 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2635 build_zero_cst (double_type_node));
2636 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2637 false, GSI_CONTINUE_LINKING);
2638 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2639 boolean_false_node,
2640 NULL_TREE, NULL_TREE);
2642 else
2643 cond_stmt
2644 = gimple_build_cond (LT_EXPR, t,
2645 build_zero_cst (double_type_node),
2646 NULL_TREE, NULL_TREE);
2647 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2648 e = split_block (gsi_bb (*gsi), cond_stmt);
2649 basic_block bb1 = e->src;
2650 e->flags = EDGE_FALSE_VALUE;
2651 e->probability = profile_probability::very_likely ();
2652 *gsi = gsi_after_labels (e->dest);
2653 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2654 tree sqrtr = create_tmp_var (double_type_node);
2655 gimple_call_set_lhs (call, sqrtr);
2656 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2657 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2658 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2659 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2660 tree c = create_tmp_var (ulltype);
2661 tree d = create_tmp_var (ulltype);
2662 expand_omp_build_assign (gsi, c, t, true);
2663 t = fold_build2 (MINUS_EXPR, ulltype, c,
2664 build_one_cst (ulltype));
2665 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2666 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2667 t = fold_build2 (MULT_EXPR, ulltype,
2668 fold_convert (ulltype, fd->factor), t);
2669 tree t2
2670 = fold_build2 (MULT_EXPR, ulltype, c,
2671 fold_convert (ulltype,
2672 fd->first_inner_iterations));
2673 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2674 expand_omp_build_assign (gsi, d, t, true);
2675 t = fold_build2 (MULT_EXPR, ulltype,
2676 fold_convert (ulltype, fd->factor), c);
2677 t = fold_build2 (PLUS_EXPR, ulltype,
2678 t, fold_convert (ulltype,
2679 fd->first_inner_iterations));
2680 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2681 GSI_CONTINUE_LINKING);
2682 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2683 NULL_TREE, NULL_TREE);
2684 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2685 e = split_block (gsi_bb (*gsi), cond_stmt);
2686 basic_block bb2 = e->src;
2687 e->flags = EDGE_TRUE_VALUE;
2688 e->probability = profile_probability::very_likely ();
2689 *gsi = gsi_after_labels (e->dest);
2690 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2691 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2692 GSI_CONTINUE_LINKING);
2693 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2694 NULL_TREE, NULL_TREE);
2695 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2696 e = split_block (gsi_bb (*gsi), cond_stmt);
2697 basic_block bb3 = e->src;
2698 e->flags = EDGE_FALSE_VALUE;
2699 e->probability = profile_probability::very_likely ();
2700 *gsi = gsi_after_labels (e->dest);
2701 t = fold_convert (itype, c);
2702 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2703 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2704 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2705 GSI_CONTINUE_LINKING);
2706 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2707 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2708 t2 = fold_convert (itype, t2);
2709 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2710 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2711 if (fd->loops[i].m1)
2713 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2714 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2716 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2717 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2718 bb_triang = e->src;
2719 *gsi = gsi_after_labels (e->dest);
2720 remove_edge (e);
2721 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2722 e->probability = profile_probability::very_unlikely ();
2723 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2724 e->probability = profile_probability::very_unlikely ();
2725 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2726 e->probability = profile_probability::very_unlikely ();
2728 basic_block bb4 = create_empty_bb (bb0);
2729 add_bb_to_loop (bb4, bb0->loop_father);
2730 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2731 e->probability = profile_probability::unlikely ();
2732 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2733 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2734 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2735 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2736 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2737 counts[i], counts[i - 1]);
2738 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2739 GSI_CONTINUE_LINKING);
2740 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2741 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2742 t = fold_convert (itype, t);
2743 t2 = fold_convert (itype, t2);
2744 t = fold_build2 (MULT_EXPR, itype, t,
2745 fold_convert (itype, fd->loops[i].step));
2746 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2747 t2 = fold_build2 (MULT_EXPR, itype, t2,
2748 fold_convert (itype, fd->loops[i - 1].step));
2749 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2750 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2751 false, GSI_CONTINUE_LINKING);
2752 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2753 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2754 if (fd->loops[i].m1)
2756 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2757 fd->loops[i - 1].v);
2758 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2760 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2761 false, GSI_CONTINUE_LINKING);
2762 stmt = gimple_build_assign (fd->loops[i].v, t);
2763 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2765 /* Fallback implementation. Evaluate the loops in between
2766 (inclusive) fd->first_nonrect and fd->last_nonrect at
2767 runtime unsing temporaries instead of the original iteration
2768 variables, in the body just bump the counter and compare
2769 with the desired value. */
2770 gimple_stmt_iterator gsi2 = *gsi;
2771 basic_block entry_bb = gsi_bb (gsi2);
2772 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2773 e = split_block (e->dest, (gimple *) NULL);
2774 basic_block dom_bb = NULL;
2775 basic_block cur_bb = e->src;
2776 basic_block next_bb = e->dest;
2777 entry_bb = e->dest;
2778 *gsi = gsi_after_labels (entry_bb);
2780 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2781 tree n1 = NULL_TREE, n2 = NULL_TREE;
2782 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2784 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2786 tree itype = TREE_TYPE (fd->loops[j].v);
2787 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2788 && fd->loops[j].m2 == NULL_TREE
2789 && !fd->loops[j].non_rect_referenced);
2790 gsi2 = gsi_after_labels (cur_bb);
2791 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2792 if (fd->loops[j].m1 == NULL_TREE)
2793 n1 = rect_p ? build_zero_cst (type) : t;
2794 else if (POINTER_TYPE_P (itype))
2796 gcc_assert (integer_onep (fd->loops[j].m1));
2797 t = fold_convert (sizetype,
2798 unshare_expr (fd->loops[j].n1));
2799 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2801 else
2803 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2804 n1 = fold_build2 (MULT_EXPR, itype,
2805 vs[j - fd->loops[j].outer], n1);
2806 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2808 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2809 true, GSI_SAME_STMT);
2810 if (j < fd->last_nonrect)
2812 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2813 expand_omp_build_assign (&gsi2, vs[j], n1);
2815 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2816 if (fd->loops[j].m2 == NULL_TREE)
2817 n2 = rect_p ? counts[j] : t;
2818 else if (POINTER_TYPE_P (itype))
2820 gcc_assert (integer_onep (fd->loops[j].m2));
2821 t = fold_convert (sizetype,
2822 unshare_expr (fd->loops[j].n2));
2823 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2825 else
2827 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2828 n2 = fold_build2 (MULT_EXPR, itype,
2829 vs[j - fd->loops[j].outer], n2);
2830 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2832 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2833 true, GSI_SAME_STMT);
2834 if (POINTER_TYPE_P (itype))
2835 itype = signed_type_for (itype);
2836 if (j == fd->last_nonrect)
2838 gcond *cond_stmt
2839 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2840 n1, n2);
2841 e = split_block (cur_bb, cond_stmt);
2842 e->flags = EDGE_TRUE_VALUE;
2843 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2844 e->probability = profile_probability::likely ().guessed ();
2845 ne->probability = e->probability.invert ();
2846 gsi2 = gsi_after_labels (e->dest);
2848 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2849 ? -1 : 1));
2850 t = fold_build2 (PLUS_EXPR, itype,
2851 fold_convert (itype, fd->loops[j].step), t);
2852 t = fold_build2 (PLUS_EXPR, itype, t,
2853 fold_convert (itype, n2));
2854 t = fold_build2 (MINUS_EXPR, itype, t,
2855 fold_convert (itype, n1));
2856 tree step = fold_convert (itype, fd->loops[j].step);
2857 if (TYPE_UNSIGNED (itype)
2858 && fd->loops[j].cond_code == GT_EXPR)
2859 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2860 fold_build1 (NEGATE_EXPR, itype, t),
2861 fold_build1 (NEGATE_EXPR, itype, step));
2862 else
2863 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2864 t = fold_convert (type, t);
2865 t = fold_build2 (PLUS_EXPR, type, idx, t);
2866 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2867 true, GSI_SAME_STMT);
2868 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2869 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2870 cond_stmt
2871 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2872 NULL_TREE);
2873 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2874 e = split_block (gsi_bb (gsi2), cond_stmt);
2875 e->flags = EDGE_TRUE_VALUE;
2876 e->probability = profile_probability::likely ().guessed ();
2877 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2878 ne->probability = e->probability.invert ();
2879 gsi2 = gsi_after_labels (e->dest);
2880 expand_omp_build_assign (&gsi2, idx, t);
2881 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2882 break;
2884 e = split_block (cur_bb, last_stmt (cur_bb));
2886 basic_block new_cur_bb = create_empty_bb (cur_bb);
2887 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2889 gsi2 = gsi_after_labels (e->dest);
2890 if (rect_p)
2891 t = fold_build2 (PLUS_EXPR, type, vs[j],
2892 build_one_cst (type));
2893 else
2895 tree step
2896 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2897 if (POINTER_TYPE_P (vtype))
2898 t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2899 step));
2900 else
2901 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2903 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2904 true, GSI_SAME_STMT);
2905 expand_omp_build_assign (&gsi2, vs[j], t);
2907 edge ne = split_block (e->dest, last_stmt (e->dest));
2908 gsi2 = gsi_after_labels (ne->dest);
2910 gcond *cond_stmt;
2911 if (next_bb == entry_bb)
2912 /* No need to actually check the outermost condition. */
2913 cond_stmt
2914 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2915 boolean_true_node,
2916 NULL_TREE, NULL_TREE);
2917 else
2918 cond_stmt
2919 = gimple_build_cond (rect_p ? LT_EXPR
2920 : fd->loops[j].cond_code,
2921 vs[j], n2, NULL_TREE, NULL_TREE);
2922 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2923 edge e3, e4;
2924 if (next_bb == entry_bb)
2926 e3 = find_edge (ne->dest, next_bb);
2927 e3->flags = EDGE_FALSE_VALUE;
2928 dom_bb = ne->dest;
2930 else
2931 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2932 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2933 e4->probability = profile_probability::likely ().guessed ();
2934 e3->probability = e4->probability.invert ();
2935 basic_block esrc = e->src;
2936 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2937 cur_bb = new_cur_bb;
2938 basic_block latch_bb = next_bb;
2939 next_bb = e->dest;
2940 remove_edge (e);
2941 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2942 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2943 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2945 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2947 tree vtype = TREE_TYPE (fd->loops[j].v);
2948 tree itype = vtype;
2949 if (POINTER_TYPE_P (itype))
2950 itype = signed_type_for (itype);
2951 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2952 && fd->loops[j].m2 == NULL_TREE
2953 && !fd->loops[j].non_rect_referenced);
2954 if (j == fd->last_nonrect)
2956 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2957 t = fold_convert (itype, t);
2958 tree t2
2959 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2960 t = fold_build2 (MULT_EXPR, itype, t, t2);
2961 if (POINTER_TYPE_P (vtype))
2962 t = fold_build_pointer_plus (n1,
2963 fold_convert (sizetype, t));
2964 else
2965 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2967 else if (rect_p)
2969 t = fold_convert (itype, vs[j]);
2970 t = fold_build2 (MULT_EXPR, itype, t,
2971 fold_convert (itype, fd->loops[j].step));
2972 if (POINTER_TYPE_P (vtype))
2973 t = fold_build_pointer_plus (fd->loops[j].n1,
2974 fold_convert (sizetype, t));
2975 else
2976 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2978 else
2979 t = vs[j];
2980 t = force_gimple_operand_gsi (gsi, t, false,
2981 NULL_TREE, true,
2982 GSI_SAME_STMT);
2983 stmt = gimple_build_assign (fd->loops[j].v, t);
2984 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2986 if (gsi_end_p (*gsi))
2987 *gsi = gsi_last_bb (gsi_bb (*gsi));
2988 else
2989 gsi_prev (gsi);
2990 if (bb_triang)
2992 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2993 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2994 *gsi = gsi_after_labels (e->dest);
2995 if (!gsi_end_p (*gsi))
2996 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2997 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
3000 else
3002 t = fold_convert (itype, t);
3003 t = fold_build2 (MULT_EXPR, itype, t,
3004 fold_convert (itype, fd->loops[i].step));
3005 if (POINTER_TYPE_P (vtype))
3006 t = fold_build_pointer_plus (fd->loops[i].n1, t);
3007 else
3008 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3009 t = force_gimple_operand_gsi (gsi, t,
3010 DECL_P (fd->loops[i].v)
3011 && TREE_ADDRESSABLE (fd->loops[i].v),
3012 NULL_TREE, false,
3013 GSI_CONTINUE_LINKING);
3014 stmt = gimple_build_assign (fd->loops[i].v, t);
3015 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3017 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3019 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3020 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3021 false, GSI_CONTINUE_LINKING);
3022 stmt = gimple_build_assign (tem, t);
3023 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3025 if (i == fd->last_nonrect)
3026 i = fd->first_nonrect;
3028 if (fd->non_rect)
3029 for (i = 0; i <= fd->last_nonrect; i++)
3030 if (fd->loops[i].m2)
3032 tree itype = TREE_TYPE (fd->loops[i].v);
3034 tree t;
3035 if (POINTER_TYPE_P (itype))
3037 gcc_assert (integer_onep (fd->loops[i].m2));
3038 t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3039 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3042 else
3044 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3045 t = fold_build2 (MULT_EXPR, itype,
3046 fd->loops[i - fd->loops[i].outer].v, t);
3047 t = fold_build2 (PLUS_EXPR, itype, t,
3048 fold_convert (itype,
3049 unshare_expr (fd->loops[i].n2)));
3051 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3052 t = force_gimple_operand_gsi (gsi, t, false,
3053 NULL_TREE, false,
3054 GSI_CONTINUE_LINKING);
3055 stmt = gimple_build_assign (nonrect_bounds[i], t);
3056 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3060 /* Helper function for expand_omp_for_*. Generate code like:
3061 L10:
3062 V3 += STEP3;
3063 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3064 L11:
3065 V3 = N31;
3066 V2 += STEP2;
3067 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3068 L12:
3069 V2 = N21;
3070 V1 += STEP1;
3071 goto BODY_BB;
3072 For non-rectangular loops, use temporaries stored in nonrect_bounds
3073 for the upper bounds if M?2 multiplier is present. Given e.g.
3074 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3075 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3076 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3077 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3079 L10:
3080 V4 += STEP4;
3081 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3082 L11:
3083 V4 = N41 + M41 * V2; // This can be left out if the loop
3084 // refers to the immediate parent loop
3085 V3 += STEP3;
3086 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3087 L12:
3088 V3 = N31;
3089 V2 += STEP2;
3090 if (V2 cond2 N22) goto L120; else goto L13;
3091 L120:
3092 V4 = N41 + M41 * V2;
3093 NONRECT_BOUND4 = N42 + M42 * V2;
3094 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3095 L13:
3096 V2 = N21;
3097 V1 += STEP1;
3098 goto L120; */
3100 static basic_block
3101 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3102 basic_block cont_bb, basic_block body_bb)
3104 basic_block last_bb, bb, collapse_bb = NULL;
3105 int i;
3106 gimple_stmt_iterator gsi;
3107 edge e;
3108 tree t;
3109 gimple *stmt;
3111 last_bb = cont_bb;
3112 for (i = fd->collapse - 1; i >= 0; i--)
3114 tree vtype = TREE_TYPE (fd->loops[i].v);
3116 bb = create_empty_bb (last_bb);
3117 add_bb_to_loop (bb, last_bb->loop_father);
3118 gsi = gsi_start_bb (bb);
3120 if (i < fd->collapse - 1)
3122 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3123 e->probability
3124 = profile_probability::guessed_always ().apply_scale (1, 8);
3126 struct omp_for_data_loop *l = &fd->loops[i + 1];
3127 if (l->m1 == NULL_TREE || l->outer != 1)
3129 t = l->n1;
3130 if (l->m1)
3132 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3133 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3134 fold_convert (sizetype, t));
3135 else
3137 tree t2
3138 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3139 fd->loops[i + 1 - l->outer].v, l->m1);
3140 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3143 t = force_gimple_operand_gsi (&gsi, t,
3144 DECL_P (l->v)
3145 && TREE_ADDRESSABLE (l->v),
3146 NULL_TREE, false,
3147 GSI_CONTINUE_LINKING);
3148 stmt = gimple_build_assign (l->v, t);
3149 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3152 else
3153 collapse_bb = bb;
3155 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3157 if (POINTER_TYPE_P (vtype))
3158 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3159 else
3160 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3161 t = force_gimple_operand_gsi (&gsi, t,
3162 DECL_P (fd->loops[i].v)
3163 && TREE_ADDRESSABLE (fd->loops[i].v),
3164 NULL_TREE, false, GSI_CONTINUE_LINKING);
3165 stmt = gimple_build_assign (fd->loops[i].v, t);
3166 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3168 if (fd->loops[i].non_rect_referenced)
3170 basic_block update_bb = NULL, prev_bb = NULL;
3171 for (int j = i + 1; j <= fd->last_nonrect; j++)
3172 if (j - fd->loops[j].outer == i)
3174 tree n1, n2;
3175 struct omp_for_data_loop *l = &fd->loops[j];
3176 basic_block this_bb = create_empty_bb (last_bb);
3177 add_bb_to_loop (this_bb, last_bb->loop_father);
3178 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3179 if (prev_bb)
3181 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3182 e->probability
3183 = profile_probability::guessed_always ().apply_scale (7,
3185 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3187 if (l->m1)
3189 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3190 t = fold_build_pointer_plus (fd->loops[i].v,
3191 fold_convert (sizetype,
3192 l->n1));
3193 else
3195 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3196 fd->loops[i].v);
3197 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3198 t, l->n1);
3200 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3201 false,
3202 GSI_CONTINUE_LINKING);
3203 stmt = gimple_build_assign (l->v, n1);
3204 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3205 n1 = l->v;
3207 else
3208 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3209 NULL_TREE, false,
3210 GSI_CONTINUE_LINKING);
3211 if (l->m2)
3213 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3214 t = fold_build_pointer_plus (fd->loops[i].v,
3215 fold_convert (sizetype,
3216 l->n2));
3217 else
3219 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3220 fd->loops[i].v);
3221 t = fold_build2 (PLUS_EXPR,
3222 TREE_TYPE (nonrect_bounds[j]),
3223 t, unshare_expr (l->n2));
3225 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3226 false,
3227 GSI_CONTINUE_LINKING);
3228 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3229 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3230 n2 = nonrect_bounds[j];
3232 else
3233 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3234 true, NULL_TREE, false,
3235 GSI_CONTINUE_LINKING);
3236 gcond *cond_stmt
3237 = gimple_build_cond (l->cond_code, n1, n2,
3238 NULL_TREE, NULL_TREE);
3239 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3240 if (update_bb == NULL)
3241 update_bb = this_bb;
3242 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3243 e->probability
3244 = profile_probability::guessed_always ().apply_scale (1, 8);
3245 if (prev_bb == NULL)
3246 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3247 prev_bb = this_bb;
3249 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3250 e->probability
3251 = profile_probability::guessed_always ().apply_scale (7, 8);
3252 body_bb = update_bb;
3255 if (i > 0)
3257 if (fd->loops[i].m2)
3258 t = nonrect_bounds[i];
3259 else
3260 t = unshare_expr (fd->loops[i].n2);
3261 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3262 false, GSI_CONTINUE_LINKING);
3263 tree v = fd->loops[i].v;
3264 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3265 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3266 false, GSI_CONTINUE_LINKING);
3267 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3268 stmt = gimple_build_cond_empty (t);
3269 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3270 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3271 expand_omp_regimplify_p, NULL, NULL)
3272 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3273 expand_omp_regimplify_p, NULL, NULL))
3274 gimple_regimplify_operands (stmt, &gsi);
3275 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3276 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3278 else
3279 make_edge (bb, body_bb, EDGE_FALLTHRU);
3280 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3281 last_bb = bb;
3284 return collapse_bb;
3287 /* Expand #pragma omp ordered depend(source). */
3289 static void
3290 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3291 tree *counts, location_t loc)
3293 enum built_in_function source_ix
3294 = fd->iter_type == long_integer_type_node
3295 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3296 gimple *g
3297 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3298 build_fold_addr_expr (counts[fd->ordered]));
3299 gimple_set_location (g, loc);
3300 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3303 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3305 static void
3306 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3307 tree *counts, tree c, location_t loc)
3309 auto_vec<tree, 10> args;
3310 enum built_in_function sink_ix
3311 = fd->iter_type == long_integer_type_node
3312 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3313 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3314 int i;
3315 gimple_stmt_iterator gsi2 = *gsi;
3316 bool warned_step = false;
3318 for (i = 0; i < fd->ordered; i++)
3320 tree step = NULL_TREE;
3321 off = TREE_PURPOSE (deps);
3322 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3324 step = TREE_OPERAND (off, 1);
3325 off = TREE_OPERAND (off, 0);
3327 if (!integer_zerop (off))
3329 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3330 || fd->loops[i].cond_code == GT_EXPR);
3331 bool forward = fd->loops[i].cond_code == LT_EXPR;
3332 if (step)
3334 /* Non-simple Fortran DO loops. If step is variable,
3335 we don't know at compile even the direction, so can't
3336 warn. */
3337 if (TREE_CODE (step) != INTEGER_CST)
3338 break;
3339 forward = tree_int_cst_sgn (step) != -1;
3341 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3342 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3343 "waiting for lexically later iteration");
3344 break;
3346 deps = TREE_CHAIN (deps);
3348 /* If all offsets corresponding to the collapsed loops are zero,
3349 this depend clause can be ignored. FIXME: but there is still a
3350 flush needed. We need to emit one __sync_synchronize () for it
3351 though (perhaps conditionally)? Solve this together with the
3352 conservative dependence folding optimization.
3353 if (i >= fd->collapse)
3354 return; */
3356 deps = OMP_CLAUSE_DECL (c);
3357 gsi_prev (&gsi2);
3358 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3359 edge e2 = split_block_after_labels (e1->dest);
3361 gsi2 = gsi_after_labels (e1->dest);
3362 *gsi = gsi_last_bb (e1->src);
3363 for (i = 0; i < fd->ordered; i++)
3365 tree itype = TREE_TYPE (fd->loops[i].v);
3366 tree step = NULL_TREE;
3367 tree orig_off = NULL_TREE;
3368 if (POINTER_TYPE_P (itype))
3369 itype = sizetype;
3370 if (i)
3371 deps = TREE_CHAIN (deps);
3372 off = TREE_PURPOSE (deps);
3373 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3375 step = TREE_OPERAND (off, 1);
3376 off = TREE_OPERAND (off, 0);
3377 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3378 && integer_onep (fd->loops[i].step)
3379 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3381 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3382 if (step)
3384 off = fold_convert_loc (loc, itype, off);
3385 orig_off = off;
3386 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3389 if (integer_zerop (off))
3390 t = boolean_true_node;
3391 else
3393 tree a;
3394 tree co = fold_convert_loc (loc, itype, off);
3395 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3397 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3398 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3399 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3400 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3401 co);
3403 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3404 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3405 fd->loops[i].v, co);
3406 else
3407 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3408 fd->loops[i].v, co);
3409 if (step)
3411 tree t1, t2;
3412 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3413 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3414 fd->loops[i].n1);
3415 else
3416 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3417 fd->loops[i].n2);
3418 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3419 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3420 fd->loops[i].n2);
3421 else
3422 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3423 fd->loops[i].n1);
3424 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3425 step, build_int_cst (TREE_TYPE (step), 0));
3426 if (TREE_CODE (step) != INTEGER_CST)
3428 t1 = unshare_expr (t1);
3429 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3430 false, GSI_CONTINUE_LINKING);
3431 t2 = unshare_expr (t2);
3432 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3433 false, GSI_CONTINUE_LINKING);
3435 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3436 t, t2, t1);
3438 else if (fd->loops[i].cond_code == LT_EXPR)
3440 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3441 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3442 fd->loops[i].n1);
3443 else
3444 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3445 fd->loops[i].n2);
3447 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3448 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3449 fd->loops[i].n2);
3450 else
3451 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3452 fd->loops[i].n1);
3454 if (cond)
3455 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3456 else
3457 cond = t;
3459 off = fold_convert_loc (loc, itype, off);
3461 if (step
3462 || (fd->loops[i].cond_code == LT_EXPR
3463 ? !integer_onep (fd->loops[i].step)
3464 : !integer_minus_onep (fd->loops[i].step)))
3466 if (step == NULL_TREE
3467 && TYPE_UNSIGNED (itype)
3468 && fd->loops[i].cond_code == GT_EXPR)
3469 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3470 fold_build1_loc (loc, NEGATE_EXPR, itype,
3471 s));
3472 else
3473 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3474 orig_off ? orig_off : off, s);
3475 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3476 build_int_cst (itype, 0));
3477 if (integer_zerop (t) && !warned_step)
3479 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3480 "refers to iteration never in the iteration "
3481 "space");
3482 warned_step = true;
3484 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3485 cond, t);
3488 if (i <= fd->collapse - 1 && fd->collapse > 1)
3489 t = fd->loop.v;
3490 else if (counts[i])
3491 t = counts[i];
3492 else
3494 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3495 fd->loops[i].v, fd->loops[i].n1);
3496 t = fold_convert_loc (loc, fd->iter_type, t);
3498 if (step)
3499 /* We have divided off by step already earlier. */;
3500 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3501 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3502 fold_build1_loc (loc, NEGATE_EXPR, itype,
3503 s));
3504 else
3505 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3506 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3507 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3508 off = fold_convert_loc (loc, fd->iter_type, off);
3509 if (i <= fd->collapse - 1 && fd->collapse > 1)
3511 if (i)
3512 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3513 off);
3514 if (i < fd->collapse - 1)
3516 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3517 counts[i]);
3518 continue;
3521 off = unshare_expr (off);
3522 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3523 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3524 true, GSI_SAME_STMT);
3525 args.safe_push (t);
3527 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3528 gimple_set_location (g, loc);
3529 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3531 cond = unshare_expr (cond);
3532 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3533 GSI_CONTINUE_LINKING);
3534 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3535 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3536 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3537 e1->probability = e3->probability.invert ();
3538 e1->flags = EDGE_TRUE_VALUE;
3539 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3541 *gsi = gsi_after_labels (e2->dest);
3544 /* Expand all #pragma omp ordered depend(source) and
3545 #pragma omp ordered depend(sink:...) constructs in the current
3546 #pragma omp for ordered(n) region. */
3548 static void
3549 expand_omp_ordered_source_sink (struct omp_region *region,
3550 struct omp_for_data *fd, tree *counts,
3551 basic_block cont_bb)
3553 struct omp_region *inner;
3554 int i;
3555 for (i = fd->collapse - 1; i < fd->ordered; i++)
3556 if (i == fd->collapse - 1 && fd->collapse > 1)
3557 counts[i] = NULL_TREE;
3558 else if (i >= fd->collapse && !cont_bb)
3559 counts[i] = build_zero_cst (fd->iter_type);
3560 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3561 && integer_onep (fd->loops[i].step))
3562 counts[i] = NULL_TREE;
3563 else
3564 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3565 tree atype
3566 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3567 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3568 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3570 for (inner = region->inner; inner; inner = inner->next)
3571 if (inner->type == GIMPLE_OMP_ORDERED)
3573 gomp_ordered *ord_stmt = inner->ord_stmt;
3574 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3575 location_t loc = gimple_location (ord_stmt);
3576 tree c;
3577 for (c = gimple_omp_ordered_clauses (ord_stmt);
3578 c; c = OMP_CLAUSE_CHAIN (c))
3579 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3580 break;
3581 if (c)
3582 expand_omp_ordered_source (&gsi, fd, counts, loc);
3583 for (c = gimple_omp_ordered_clauses (ord_stmt);
3584 c; c = OMP_CLAUSE_CHAIN (c))
3585 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3586 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3587 gsi_remove (&gsi, true);
3591 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3592 collapsed. */
3594 static basic_block
3595 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3596 basic_block cont_bb, basic_block body_bb,
3597 bool ordered_lastprivate)
3599 if (fd->ordered == fd->collapse)
3600 return cont_bb;
3602 if (!cont_bb)
3604 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3605 for (int i = fd->collapse; i < fd->ordered; i++)
3607 tree type = TREE_TYPE (fd->loops[i].v);
3608 tree n1 = fold_convert (type, fd->loops[i].n1);
3609 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3610 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3611 size_int (i - fd->collapse + 1),
3612 NULL_TREE, NULL_TREE);
3613 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3615 return NULL;
3618 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3620 tree t, type = TREE_TYPE (fd->loops[i].v);
3621 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3622 expand_omp_build_assign (&gsi, fd->loops[i].v,
3623 fold_convert (type, fd->loops[i].n1));
3624 if (counts[i])
3625 expand_omp_build_assign (&gsi, counts[i],
3626 build_zero_cst (fd->iter_type));
3627 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3628 size_int (i - fd->collapse + 1),
3629 NULL_TREE, NULL_TREE);
3630 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3631 if (!gsi_end_p (gsi))
3632 gsi_prev (&gsi);
3633 else
3634 gsi = gsi_last_bb (body_bb);
3635 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3636 basic_block new_body = e1->dest;
3637 if (body_bb == cont_bb)
3638 cont_bb = new_body;
3639 edge e2 = NULL;
3640 basic_block new_header;
3641 if (EDGE_COUNT (cont_bb->preds) > 0)
3643 gsi = gsi_last_bb (cont_bb);
3644 if (POINTER_TYPE_P (type))
3645 t = fold_build_pointer_plus (fd->loops[i].v,
3646 fold_convert (sizetype,
3647 fd->loops[i].step));
3648 else
3649 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3650 fold_convert (type, fd->loops[i].step));
3651 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3652 if (counts[i])
3654 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3655 build_int_cst (fd->iter_type, 1));
3656 expand_omp_build_assign (&gsi, counts[i], t);
3657 t = counts[i];
3659 else
3661 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3662 fd->loops[i].v, fd->loops[i].n1);
3663 t = fold_convert (fd->iter_type, t);
3664 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3665 true, GSI_SAME_STMT);
3667 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3668 size_int (i - fd->collapse + 1),
3669 NULL_TREE, NULL_TREE);
3670 expand_omp_build_assign (&gsi, aref, t);
3671 gsi_prev (&gsi);
3672 e2 = split_block (cont_bb, gsi_stmt (gsi));
3673 new_header = e2->dest;
3675 else
3676 new_header = cont_bb;
3677 gsi = gsi_after_labels (new_header);
3678 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3679 true, GSI_SAME_STMT);
3680 tree n2
3681 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3682 true, NULL_TREE, true, GSI_SAME_STMT);
3683 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3684 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3685 edge e3 = split_block (new_header, gsi_stmt (gsi));
3686 cont_bb = e3->dest;
3687 remove_edge (e1);
3688 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3689 e3->flags = EDGE_FALSE_VALUE;
3690 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3691 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3692 e1->probability = e3->probability.invert ();
3694 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3695 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3697 if (e2)
3699 class loop *loop = alloc_loop ();
3700 loop->header = new_header;
3701 loop->latch = e2->src;
3702 add_loop (loop, body_bb->loop_father);
3706 /* If there are any lastprivate clauses and it is possible some loops
3707 might have zero iterations, ensure all the decls are initialized,
3708 otherwise we could crash evaluating C++ class iterators with lastprivate
3709 clauses. */
3710 bool need_inits = false;
3711 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3712 if (need_inits)
3714 tree type = TREE_TYPE (fd->loops[i].v);
3715 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3716 expand_omp_build_assign (&gsi, fd->loops[i].v,
3717 fold_convert (type, fd->loops[i].n1));
3719 else
3721 tree type = TREE_TYPE (fd->loops[i].v);
3722 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3723 boolean_type_node,
3724 fold_convert (type, fd->loops[i].n1),
3725 fold_convert (type, fd->loops[i].n2));
3726 if (!integer_onep (this_cond))
3727 need_inits = true;
3730 return cont_bb;
3733 /* A subroutine of expand_omp_for. Generate code for a parallel
3734 loop with any schedule. Given parameters:
3736 for (V = N1; V cond N2; V += STEP) BODY;
3738 where COND is "<" or ">", we generate pseudocode
3740 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3741 if (more) goto L0; else goto L3;
3743 V = istart0;
3744 iend = iend0;
3746 BODY;
3747 V += STEP;
3748 if (V cond iend) goto L1; else goto L2;
3750 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3753 If this is a combined omp parallel loop, instead of the call to
3754 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3755 If this is gimple_omp_for_combined_p loop, then instead of assigning
3756 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3757 inner GIMPLE_OMP_FOR and V += STEP; and
3758 if (V cond iend) goto L1; else goto L2; are removed.
3760 For collapsed loops, given parameters:
3761 collapse(3)
3762 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3763 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3764 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3765 BODY;
3767 we generate pseudocode
3769 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3770 if (cond3 is <)
3771 adj = STEP3 - 1;
3772 else
3773 adj = STEP3 + 1;
3774 count3 = (adj + N32 - N31) / STEP3;
3775 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3776 if (cond2 is <)
3777 adj = STEP2 - 1;
3778 else
3779 adj = STEP2 + 1;
3780 count2 = (adj + N22 - N21) / STEP2;
3781 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3782 if (cond1 is <)
3783 adj = STEP1 - 1;
3784 else
3785 adj = STEP1 + 1;
3786 count1 = (adj + N12 - N11) / STEP1;
3787 count = count1 * count2 * count3;
3788 goto Z1;
3790 count = 0;
3792 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3793 if (more) goto L0; else goto L3;
3795 V = istart0;
3796 T = V;
3797 V3 = N31 + (T % count3) * STEP3;
3798 T = T / count3;
3799 V2 = N21 + (T % count2) * STEP2;
3800 T = T / count2;
3801 V1 = N11 + T * STEP1;
3802 iend = iend0;
3804 BODY;
3805 V += 1;
3806 if (V < iend) goto L10; else goto L2;
3807 L10:
3808 V3 += STEP3;
3809 if (V3 cond3 N32) goto L1; else goto L11;
3810 L11:
3811 V3 = N31;
3812 V2 += STEP2;
3813 if (V2 cond2 N22) goto L1; else goto L12;
3814 L12:
3815 V2 = N21;
3816 V1 += STEP1;
3817 goto L1;
3819 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3824 static void
3825 expand_omp_for_generic (struct omp_region *region,
3826 struct omp_for_data *fd,
3827 enum built_in_function start_fn,
3828 enum built_in_function next_fn,
3829 tree sched_arg,
3830 gimple *inner_stmt)
3832 tree type, istart0, iend0, iend;
3833 tree t, vmain, vback, bias = NULL_TREE;
3834 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3835 basic_block l2_bb = NULL, l3_bb = NULL;
3836 gimple_stmt_iterator gsi;
3837 gassign *assign_stmt;
3838 bool in_combined_parallel = is_combined_parallel (region);
3839 bool broken_loop = region->cont == NULL;
3840 edge e, ne;
3841 tree *counts = NULL;
3842 int i;
3843 bool ordered_lastprivate = false;
3845 gcc_assert (!broken_loop || !in_combined_parallel);
3846 gcc_assert (fd->iter_type == long_integer_type_node
3847 || !in_combined_parallel);
3849 entry_bb = region->entry;
3850 cont_bb = region->cont;
3851 collapse_bb = NULL;
3852 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3853 gcc_assert (broken_loop
3854 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3855 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3856 l1_bb = single_succ (l0_bb);
3857 if (!broken_loop)
3859 l2_bb = create_empty_bb (cont_bb);
3860 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3861 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3862 == l1_bb));
3863 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3865 else
3866 l2_bb = NULL;
3867 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3868 exit_bb = region->exit;
3870 gsi = gsi_last_nondebug_bb (entry_bb);
3872 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3873 if (fd->ordered
3874 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3875 OMP_CLAUSE_LASTPRIVATE))
3876 ordered_lastprivate = false;
3877 tree reductions = NULL_TREE;
3878 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3879 tree memv = NULL_TREE;
3880 if (fd->lastprivate_conditional)
3882 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3883 OMP_CLAUSE__CONDTEMP_);
3884 if (fd->have_pointer_condtemp)
3885 condtemp = OMP_CLAUSE_DECL (c);
3886 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3887 cond_var = OMP_CLAUSE_DECL (c);
3889 if (sched_arg)
3891 if (fd->have_reductemp)
3893 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3894 OMP_CLAUSE__REDUCTEMP_);
3895 reductions = OMP_CLAUSE_DECL (c);
3896 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3897 gimple *g = SSA_NAME_DEF_STMT (reductions);
3898 reductions = gimple_assign_rhs1 (g);
3899 OMP_CLAUSE_DECL (c) = reductions;
3900 entry_bb = gimple_bb (g);
3901 edge e = split_block (entry_bb, g);
3902 if (region->entry == entry_bb)
3903 region->entry = e->dest;
3904 gsi = gsi_last_bb (entry_bb);
3906 else
3907 reductions = null_pointer_node;
3908 if (fd->have_pointer_condtemp)
3910 tree type = TREE_TYPE (condtemp);
3911 memv = create_tmp_var (type);
3912 TREE_ADDRESSABLE (memv) = 1;
3913 unsigned HOST_WIDE_INT sz
3914 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3915 sz *= fd->lastprivate_conditional;
3916 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3917 false);
3918 mem = build_fold_addr_expr (memv);
3920 else
3921 mem = null_pointer_node;
3923 if (fd->collapse > 1 || fd->ordered)
3925 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3926 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3928 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3929 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3930 zero_iter1_bb, first_zero_iter1,
3931 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3933 if (zero_iter1_bb)
3935 /* Some counts[i] vars might be uninitialized if
3936 some loop has zero iterations. But the body shouldn't
3937 be executed in that case, so just avoid uninit warnings. */
3938 for (i = first_zero_iter1;
3939 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3940 if (SSA_VAR_P (counts[i]))
3941 suppress_warning (counts[i], OPT_Wuninitialized);
3942 gsi_prev (&gsi);
3943 e = split_block (entry_bb, gsi_stmt (gsi));
3944 entry_bb = e->dest;
3945 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3946 gsi = gsi_last_nondebug_bb (entry_bb);
3947 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3948 get_immediate_dominator (CDI_DOMINATORS,
3949 zero_iter1_bb));
3951 if (zero_iter2_bb)
3953 /* Some counts[i] vars might be uninitialized if
3954 some loop has zero iterations. But the body shouldn't
3955 be executed in that case, so just avoid uninit warnings. */
3956 for (i = first_zero_iter2; i < fd->ordered; i++)
3957 if (SSA_VAR_P (counts[i]))
3958 suppress_warning (counts[i], OPT_Wuninitialized);
3959 if (zero_iter1_bb)
3960 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3961 else
3963 gsi_prev (&gsi);
3964 e = split_block (entry_bb, gsi_stmt (gsi));
3965 entry_bb = e->dest;
3966 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3967 gsi = gsi_last_nondebug_bb (entry_bb);
3968 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3969 get_immediate_dominator
3970 (CDI_DOMINATORS, zero_iter2_bb));
3973 if (fd->collapse == 1)
3975 counts[0] = fd->loop.n2;
3976 fd->loop = fd->loops[0];
3980 type = TREE_TYPE (fd->loop.v);
3981 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3982 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3983 TREE_ADDRESSABLE (istart0) = 1;
3984 TREE_ADDRESSABLE (iend0) = 1;
3986 /* See if we need to bias by LLONG_MIN. */
3987 if (fd->iter_type == long_long_unsigned_type_node
3988 && TREE_CODE (type) == INTEGER_TYPE
3989 && !TYPE_UNSIGNED (type)
3990 && fd->ordered == 0)
3992 tree n1, n2;
3994 if (fd->loop.cond_code == LT_EXPR)
3996 n1 = fd->loop.n1;
3997 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3999 else
4001 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4002 n2 = fd->loop.n1;
4004 if (TREE_CODE (n1) != INTEGER_CST
4005 || TREE_CODE (n2) != INTEGER_CST
4006 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4007 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4010 gimple_stmt_iterator gsif = gsi;
4011 gsi_prev (&gsif);
4013 tree arr = NULL_TREE;
4014 if (in_combined_parallel)
4016 gcc_assert (fd->ordered == 0);
4017 /* In a combined parallel loop, emit a call to
4018 GOMP_loop_foo_next. */
4019 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4020 build_fold_addr_expr (istart0),
4021 build_fold_addr_expr (iend0));
4023 else
4025 tree t0, t1, t2, t3, t4;
4026 /* If this is not a combined parallel loop, emit a call to
4027 GOMP_loop_foo_start in ENTRY_BB. */
4028 t4 = build_fold_addr_expr (iend0);
4029 t3 = build_fold_addr_expr (istart0);
4030 if (fd->ordered)
4032 t0 = build_int_cst (unsigned_type_node,
4033 fd->ordered - fd->collapse + 1);
4034 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4035 fd->ordered
4036 - fd->collapse + 1),
4037 ".omp_counts");
4038 DECL_NAMELESS (arr) = 1;
4039 TREE_ADDRESSABLE (arr) = 1;
4040 TREE_STATIC (arr) = 1;
4041 vec<constructor_elt, va_gc> *v;
4042 vec_alloc (v, fd->ordered - fd->collapse + 1);
4043 int idx;
4045 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4047 tree c;
4048 if (idx == 0 && fd->collapse > 1)
4049 c = fd->loop.n2;
4050 else
4051 c = counts[idx + fd->collapse - 1];
4052 tree purpose = size_int (idx);
4053 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4054 if (TREE_CODE (c) != INTEGER_CST)
4055 TREE_STATIC (arr) = 0;
4058 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4059 if (!TREE_STATIC (arr))
4060 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4061 void_type_node, arr),
4062 true, NULL_TREE, true, GSI_SAME_STMT);
4063 t1 = build_fold_addr_expr (arr);
4064 t2 = NULL_TREE;
4066 else
4068 t2 = fold_convert (fd->iter_type, fd->loop.step);
4069 t1 = fd->loop.n2;
4070 t0 = fd->loop.n1;
4071 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4073 tree innerc
4074 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4075 OMP_CLAUSE__LOOPTEMP_);
4076 gcc_assert (innerc);
4077 t0 = OMP_CLAUSE_DECL (innerc);
4078 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4079 OMP_CLAUSE__LOOPTEMP_);
4080 gcc_assert (innerc);
4081 t1 = OMP_CLAUSE_DECL (innerc);
4083 if (POINTER_TYPE_P (TREE_TYPE (t0))
4084 && TYPE_PRECISION (TREE_TYPE (t0))
4085 != TYPE_PRECISION (fd->iter_type))
4087 /* Avoid casting pointers to integer of a different size. */
4088 tree itype = signed_type_for (type);
4089 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4090 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4092 else
4094 t1 = fold_convert (fd->iter_type, t1);
4095 t0 = fold_convert (fd->iter_type, t0);
4097 if (bias)
4099 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4100 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4103 if (fd->iter_type == long_integer_type_node || fd->ordered)
4105 if (fd->chunk_size)
4107 t = fold_convert (fd->iter_type, fd->chunk_size);
4108 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4109 if (sched_arg)
4111 if (fd->ordered)
4112 t = build_call_expr (builtin_decl_explicit (start_fn),
4113 8, t0, t1, sched_arg, t, t3, t4,
4114 reductions, mem);
4115 else
4116 t = build_call_expr (builtin_decl_explicit (start_fn),
4117 9, t0, t1, t2, sched_arg, t, t3, t4,
4118 reductions, mem);
4120 else if (fd->ordered)
4121 t = build_call_expr (builtin_decl_explicit (start_fn),
4122 5, t0, t1, t, t3, t4);
4123 else
4124 t = build_call_expr (builtin_decl_explicit (start_fn),
4125 6, t0, t1, t2, t, t3, t4);
4127 else if (fd->ordered)
4128 t = build_call_expr (builtin_decl_explicit (start_fn),
4129 4, t0, t1, t3, t4);
4130 else
4131 t = build_call_expr (builtin_decl_explicit (start_fn),
4132 5, t0, t1, t2, t3, t4);
4134 else
4136 tree t5;
4137 tree c_bool_type;
4138 tree bfn_decl;
4140 /* The GOMP_loop_ull_*start functions have additional boolean
4141 argument, true for < loops and false for > loops.
4142 In Fortran, the C bool type can be different from
4143 boolean_type_node. */
4144 bfn_decl = builtin_decl_explicit (start_fn);
4145 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4146 t5 = build_int_cst (c_bool_type,
4147 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4148 if (fd->chunk_size)
4150 tree bfn_decl = builtin_decl_explicit (start_fn);
4151 t = fold_convert (fd->iter_type, fd->chunk_size);
4152 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4153 if (sched_arg)
4154 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4155 t, t3, t4, reductions, mem);
4156 else
4157 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4159 else
4160 t = build_call_expr (builtin_decl_explicit (start_fn),
4161 6, t5, t0, t1, t2, t3, t4);
4164 if (TREE_TYPE (t) != boolean_type_node)
4165 t = fold_build2 (NE_EXPR, boolean_type_node,
4166 t, build_int_cst (TREE_TYPE (t), 0));
4167 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4168 true, GSI_SAME_STMT);
4169 if (arr && !TREE_STATIC (arr))
4171 tree clobber = build_clobber (TREE_TYPE (arr));
4172 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4173 GSI_SAME_STMT);
4175 if (fd->have_pointer_condtemp)
4176 expand_omp_build_assign (&gsi, condtemp, memv, false);
4177 if (fd->have_reductemp)
4179 gimple *g = gsi_stmt (gsi);
4180 gsi_remove (&gsi, true);
4181 release_ssa_name (gimple_assign_lhs (g));
4183 entry_bb = region->entry;
4184 gsi = gsi_last_nondebug_bb (entry_bb);
4186 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4188 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4190 /* Remove the GIMPLE_OMP_FOR statement. */
4191 gsi_remove (&gsi, true);
4193 if (gsi_end_p (gsif))
4194 gsif = gsi_after_labels (gsi_bb (gsif));
4195 gsi_next (&gsif);
4197 /* Iteration setup for sequential loop goes in L0_BB. */
4198 tree startvar = fd->loop.v;
4199 tree endvar = NULL_TREE;
4201 if (gimple_omp_for_combined_p (fd->for_stmt))
4203 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4204 && gimple_omp_for_kind (inner_stmt)
4205 == GF_OMP_FOR_KIND_SIMD);
4206 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4207 OMP_CLAUSE__LOOPTEMP_);
4208 gcc_assert (innerc);
4209 startvar = OMP_CLAUSE_DECL (innerc);
4210 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4211 OMP_CLAUSE__LOOPTEMP_);
4212 gcc_assert (innerc);
4213 endvar = OMP_CLAUSE_DECL (innerc);
4216 gsi = gsi_start_bb (l0_bb);
4217 t = istart0;
4218 if (fd->ordered && fd->collapse == 1)
4219 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4220 fold_convert (fd->iter_type, fd->loop.step));
4221 else if (bias)
4222 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4223 if (fd->ordered && fd->collapse == 1)
4225 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4226 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4227 fd->loop.n1, fold_convert (sizetype, t));
4228 else
4230 t = fold_convert (TREE_TYPE (startvar), t);
4231 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4232 fd->loop.n1, t);
4235 else
4237 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4238 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4239 t = fold_convert (TREE_TYPE (startvar), t);
4241 t = force_gimple_operand_gsi (&gsi, t,
4242 DECL_P (startvar)
4243 && TREE_ADDRESSABLE (startvar),
4244 NULL_TREE, false, GSI_CONTINUE_LINKING);
4245 assign_stmt = gimple_build_assign (startvar, t);
4246 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4247 if (cond_var)
4249 tree itype = TREE_TYPE (cond_var);
4250 /* For lastprivate(conditional:) itervar, we need some iteration
4251 counter that starts at unsigned non-zero and increases.
4252 Prefer as few IVs as possible, so if we can use startvar
4253 itself, use that, or startvar + constant (those would be
4254 incremented with step), and as last resort use the s0 + 1
4255 incremented by 1. */
4256 if ((fd->ordered && fd->collapse == 1)
4257 || bias
4258 || POINTER_TYPE_P (type)
4259 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4260 || fd->loop.cond_code != LT_EXPR)
4261 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4262 build_int_cst (itype, 1));
4263 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4264 t = fold_convert (itype, t);
4265 else
4267 tree c = fold_convert (itype, fd->loop.n1);
4268 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4269 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4271 t = force_gimple_operand_gsi (&gsi, t, false,
4272 NULL_TREE, false, GSI_CONTINUE_LINKING);
4273 assign_stmt = gimple_build_assign (cond_var, t);
4274 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4277 t = iend0;
4278 if (fd->ordered && fd->collapse == 1)
4279 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4280 fold_convert (fd->iter_type, fd->loop.step));
4281 else if (bias)
4282 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4283 if (fd->ordered && fd->collapse == 1)
4285 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4286 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4287 fd->loop.n1, fold_convert (sizetype, t));
4288 else
4290 t = fold_convert (TREE_TYPE (startvar), t);
4291 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4292 fd->loop.n1, t);
4295 else
4297 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4298 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4299 t = fold_convert (TREE_TYPE (startvar), t);
4301 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4302 false, GSI_CONTINUE_LINKING);
4303 if (endvar)
4305 assign_stmt = gimple_build_assign (endvar, iend);
4306 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4307 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4308 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4309 else
4310 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4311 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4313 /* Handle linear clause adjustments. */
4314 tree itercnt = NULL_TREE;
4315 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4316 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4317 c; c = OMP_CLAUSE_CHAIN (c))
4318 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4319 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4321 tree d = OMP_CLAUSE_DECL (c);
4322 tree t = d, a, dest;
4323 if (omp_privatize_by_reference (t))
4324 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4325 tree type = TREE_TYPE (t);
4326 if (POINTER_TYPE_P (type))
4327 type = sizetype;
4328 dest = unshare_expr (t);
4329 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4330 expand_omp_build_assign (&gsif, v, t);
4331 if (itercnt == NULL_TREE)
4333 itercnt = startvar;
4334 tree n1 = fd->loop.n1;
4335 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4337 itercnt
4338 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4339 itercnt);
4340 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4342 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4343 itercnt, n1);
4344 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4345 itercnt, fd->loop.step);
4346 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4347 NULL_TREE, false,
4348 GSI_CONTINUE_LINKING);
4350 a = fold_build2 (MULT_EXPR, type,
4351 fold_convert (type, itercnt),
4352 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4353 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4354 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4355 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4356 false, GSI_CONTINUE_LINKING);
4357 expand_omp_build_assign (&gsi, dest, t, true);
4359 if (fd->collapse > 1)
4360 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4362 if (fd->ordered)
4364 /* Until now, counts array contained number of iterations or
4365 variable containing it for ith loop. From now on, we need
4366 those counts only for collapsed loops, and only for the 2nd
4367 till the last collapsed one. Move those one element earlier,
4368 we'll use counts[fd->collapse - 1] for the first source/sink
4369 iteration counter and so on and counts[fd->ordered]
4370 as the array holding the current counter values for
4371 depend(source). */
4372 if (fd->collapse > 1)
4373 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4374 if (broken_loop)
4376 int i;
4377 for (i = fd->collapse; i < fd->ordered; i++)
4379 tree type = TREE_TYPE (fd->loops[i].v);
4380 tree this_cond
4381 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4382 fold_convert (type, fd->loops[i].n1),
4383 fold_convert (type, fd->loops[i].n2));
4384 if (!integer_onep (this_cond))
4385 break;
4387 if (i < fd->ordered)
4389 cont_bb
4390 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4391 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4392 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4393 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4394 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4395 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4396 make_edge (cont_bb, l1_bb, 0);
4397 l2_bb = create_empty_bb (cont_bb);
4398 broken_loop = false;
4401 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4402 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4403 ordered_lastprivate);
4404 if (counts[fd->collapse - 1])
4406 gcc_assert (fd->collapse == 1);
4407 gsi = gsi_last_bb (l0_bb);
4408 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4409 istart0, true);
4410 if (cont_bb)
4412 gsi = gsi_last_bb (cont_bb);
4413 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4414 counts[fd->collapse - 1],
4415 build_int_cst (fd->iter_type, 1));
4416 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4417 tree aref = build4 (ARRAY_REF, fd->iter_type,
4418 counts[fd->ordered], size_zero_node,
4419 NULL_TREE, NULL_TREE);
4420 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4422 t = counts[fd->collapse - 1];
4424 else if (fd->collapse > 1)
4425 t = fd->loop.v;
4426 else
4428 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4429 fd->loops[0].v, fd->loops[0].n1);
4430 t = fold_convert (fd->iter_type, t);
4432 gsi = gsi_last_bb (l0_bb);
4433 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4434 size_zero_node, NULL_TREE, NULL_TREE);
4435 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4436 false, GSI_CONTINUE_LINKING);
4437 expand_omp_build_assign (&gsi, aref, t, true);
4440 if (!broken_loop)
4442 /* Code to control the increment and predicate for the sequential
4443 loop goes in the CONT_BB. */
4444 gsi = gsi_last_nondebug_bb (cont_bb);
4445 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4446 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4447 vmain = gimple_omp_continue_control_use (cont_stmt);
4448 vback = gimple_omp_continue_control_def (cont_stmt);
4450 if (cond_var)
4452 tree itype = TREE_TYPE (cond_var);
4453 tree t2;
4454 if ((fd->ordered && fd->collapse == 1)
4455 || bias
4456 || POINTER_TYPE_P (type)
4457 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4458 || fd->loop.cond_code != LT_EXPR)
4459 t2 = build_int_cst (itype, 1);
4460 else
4461 t2 = fold_convert (itype, fd->loop.step);
4462 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4463 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4464 NULL_TREE, true, GSI_SAME_STMT);
4465 assign_stmt = gimple_build_assign (cond_var, t2);
4466 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4469 if (!gimple_omp_for_combined_p (fd->for_stmt))
4471 if (POINTER_TYPE_P (type))
4472 t = fold_build_pointer_plus (vmain, fd->loop.step);
4473 else
4474 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4475 t = force_gimple_operand_gsi (&gsi, t,
4476 DECL_P (vback)
4477 && TREE_ADDRESSABLE (vback),
4478 NULL_TREE, true, GSI_SAME_STMT);
4479 assign_stmt = gimple_build_assign (vback, t);
4480 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4482 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4484 tree tem;
4485 if (fd->collapse > 1)
4486 tem = fd->loop.v;
4487 else
4489 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4490 fd->loops[0].v, fd->loops[0].n1);
4491 tem = fold_convert (fd->iter_type, tem);
4493 tree aref = build4 (ARRAY_REF, fd->iter_type,
4494 counts[fd->ordered], size_zero_node,
4495 NULL_TREE, NULL_TREE);
4496 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4497 true, GSI_SAME_STMT);
4498 expand_omp_build_assign (&gsi, aref, tem);
4501 t = build2 (fd->loop.cond_code, boolean_type_node,
4502 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4503 iend);
4504 gcond *cond_stmt = gimple_build_cond_empty (t);
4505 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4508 /* Remove GIMPLE_OMP_CONTINUE. */
4509 gsi_remove (&gsi, true);
4511 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4512 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4514 /* Emit code to get the next parallel iteration in L2_BB. */
4515 gsi = gsi_start_bb (l2_bb);
4517 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4518 build_fold_addr_expr (istart0),
4519 build_fold_addr_expr (iend0));
4520 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4521 false, GSI_CONTINUE_LINKING);
4522 if (TREE_TYPE (t) != boolean_type_node)
4523 t = fold_build2 (NE_EXPR, boolean_type_node,
4524 t, build_int_cst (TREE_TYPE (t), 0));
4525 gcond *cond_stmt = gimple_build_cond_empty (t);
4526 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4529 /* Add the loop cleanup function. */
4530 gsi = gsi_last_nondebug_bb (exit_bb);
4531 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4532 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4533 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4534 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4535 else
4536 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4537 gcall *call_stmt = gimple_build_call (t, 0);
4538 if (fd->ordered)
4540 tree arr = counts[fd->ordered];
4541 tree clobber = build_clobber (TREE_TYPE (arr));
4542 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4543 GSI_SAME_STMT);
4545 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4547 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4548 if (fd->have_reductemp)
4550 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4551 gimple_call_lhs (call_stmt));
4552 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4555 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4556 gsi_remove (&gsi, true);
4558 /* Connect the new blocks. */
4559 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4560 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4562 if (!broken_loop)
4564 gimple_seq phis;
4566 e = find_edge (cont_bb, l3_bb);
4567 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4569 phis = phi_nodes (l3_bb);
4570 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4572 gimple *phi = gsi_stmt (gsi);
4573 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4574 PHI_ARG_DEF_FROM_EDGE (phi, e));
4576 remove_edge (e);
4578 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4579 e = find_edge (cont_bb, l1_bb);
4580 if (e == NULL)
4582 e = BRANCH_EDGE (cont_bb);
4583 gcc_assert (single_succ (e->dest) == l1_bb);
4585 if (gimple_omp_for_combined_p (fd->for_stmt))
4587 remove_edge (e);
4588 e = NULL;
4590 else if (fd->collapse > 1)
4592 remove_edge (e);
4593 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4595 else
4596 e->flags = EDGE_TRUE_VALUE;
4597 if (e)
4599 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4600 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4602 else
4604 e = find_edge (cont_bb, l2_bb);
4605 e->flags = EDGE_FALLTHRU;
4607 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4609 if (gimple_in_ssa_p (cfun))
4611 /* Add phis to the outer loop that connect to the phis in the inner,
4612 original loop, and move the loop entry value of the inner phi to
4613 the loop entry value of the outer phi. */
4614 gphi_iterator psi;
4615 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4617 location_t locus;
4618 gphi *nphi;
4619 gphi *exit_phi = psi.phi ();
4621 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4622 continue;
4624 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4625 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4627 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4628 edge latch_to_l1 = find_edge (latch, l1_bb);
4629 gphi *inner_phi
4630 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4632 tree t = gimple_phi_result (exit_phi);
4633 tree new_res = copy_ssa_name (t, NULL);
4634 nphi = create_phi_node (new_res, l0_bb);
4636 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4637 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4638 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4639 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4640 add_phi_arg (nphi, t, entry_to_l0, locus);
4642 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4643 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4645 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4649 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4650 recompute_dominator (CDI_DOMINATORS, l2_bb));
4651 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4652 recompute_dominator (CDI_DOMINATORS, l3_bb));
4653 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4654 recompute_dominator (CDI_DOMINATORS, l0_bb));
4655 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4656 recompute_dominator (CDI_DOMINATORS, l1_bb));
4658 /* We enter expand_omp_for_generic with a loop. This original loop may
4659 have its own loop struct, or it may be part of an outer loop struct
4660 (which may be the fake loop). */
4661 class loop *outer_loop = entry_bb->loop_father;
4662 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4664 add_bb_to_loop (l2_bb, outer_loop);
4666 /* We've added a new loop around the original loop. Allocate the
4667 corresponding loop struct. */
4668 class loop *new_loop = alloc_loop ();
4669 new_loop->header = l0_bb;
4670 new_loop->latch = l2_bb;
4671 add_loop (new_loop, outer_loop);
4673 /* Allocate a loop structure for the original loop unless we already
4674 had one. */
4675 if (!orig_loop_has_loop_struct
4676 && !gimple_omp_for_combined_p (fd->for_stmt))
4678 class loop *orig_loop = alloc_loop ();
4679 orig_loop->header = l1_bb;
4680 /* The loop may have multiple latches. */
4681 add_loop (orig_loop, new_loop);
4686 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4687 compute needed allocation size. If !ALLOC of team allocations,
4688 if ALLOC of thread allocation. SZ is the initial needed size for
4689 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4690 CNT number of elements of each array, for !ALLOC this is
4691 omp_get_num_threads (), for ALLOC number of iterations handled by the
4692 current thread. If PTR is non-NULL, it is the start of the allocation
4693 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4694 clauses pointers to the corresponding arrays. */
4696 static tree
4697 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4698 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4699 gimple_stmt_iterator *gsi, bool alloc)
4701 tree eltsz = NULL_TREE;
4702 unsigned HOST_WIDE_INT preval = 0;
4703 if (ptr && sz)
4704 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4705 ptr, size_int (sz));
4706 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4707 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4708 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4709 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4711 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4712 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4713 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4715 unsigned HOST_WIDE_INT szl
4716 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4717 szl = least_bit_hwi (szl);
4718 if (szl)
4719 al = MIN (al, szl);
4721 if (ptr == NULL_TREE)
4723 if (eltsz == NULL_TREE)
4724 eltsz = TYPE_SIZE_UNIT (pointee_type);
4725 else
4726 eltsz = size_binop (PLUS_EXPR, eltsz,
4727 TYPE_SIZE_UNIT (pointee_type));
4729 if (preval == 0 && al <= alloc_align)
4731 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4732 sz += diff;
4733 if (diff && ptr)
4734 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4735 ptr, size_int (diff));
4737 else if (al > preval)
4739 if (ptr)
4741 ptr = fold_convert (pointer_sized_int_node, ptr);
4742 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4743 build_int_cst (pointer_sized_int_node,
4744 al - 1));
4745 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4746 build_int_cst (pointer_sized_int_node,
4747 -(HOST_WIDE_INT) al));
4748 ptr = fold_convert (ptr_type_node, ptr);
4750 else
4751 sz += al - 1;
4753 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4754 preval = al;
4755 else
4756 preval = 1;
4757 if (ptr)
4759 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4760 ptr = OMP_CLAUSE_DECL (c);
4761 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4762 size_binop (MULT_EXPR, cnt,
4763 TYPE_SIZE_UNIT (pointee_type)));
4767 if (ptr == NULL_TREE)
4769 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4770 if (sz)
4771 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4772 return eltsz;
4774 else
4775 return ptr;
4778 /* Return the last _looptemp_ clause if one has been created for
4779 lastprivate on distribute parallel for{, simd} or taskloop.
4780 FD is the loop data and INNERC should be the second _looptemp_
4781 clause (the one holding the end of the range).
4782 This is followed by collapse - 1 _looptemp_ clauses for the
4783 counts[1] and up, and for triangular loops followed by 4
4784 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4785 one factor and one adjn1). After this there is optionally one
4786 _looptemp_ clause that this function returns. */
4788 static tree
4789 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4791 gcc_assert (innerc);
4792 int count = fd->collapse - 1;
4793 if (fd->non_rect
4794 && fd->last_nonrect == fd->first_nonrect + 1
4795 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4796 count += 4;
4797 for (int i = 0; i < count; i++)
4799 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4800 OMP_CLAUSE__LOOPTEMP_);
4801 gcc_assert (innerc);
4803 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4804 OMP_CLAUSE__LOOPTEMP_);
4807 /* A subroutine of expand_omp_for. Generate code for a parallel
4808 loop with static schedule and no specified chunk size. Given
4809 parameters:
4811 for (V = N1; V cond N2; V += STEP) BODY;
4813 where COND is "<" or ">", we generate pseudocode
4815 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4816 if (cond is <)
4817 adj = STEP - 1;
4818 else
4819 adj = STEP + 1;
4820 if ((__typeof (V)) -1 > 0 && cond is >)
4821 n = -(adj + N2 - N1) / -STEP;
4822 else
4823 n = (adj + N2 - N1) / STEP;
4824 q = n / nthreads;
4825 tt = n % nthreads;
4826 if (threadid < tt) goto L3; else goto L4;
4828 tt = 0;
4829 q = q + 1;
4831 s0 = q * threadid + tt;
4832 e0 = s0 + q;
4833 V = s0 * STEP + N1;
4834 if (s0 >= e0) goto L2; else goto L0;
4836 e = e0 * STEP + N1;
4838 BODY;
4839 V += STEP;
4840 if (V cond e) goto L1;
4844 static void
4845 expand_omp_for_static_nochunk (struct omp_region *region,
4846 struct omp_for_data *fd,
4847 gimple *inner_stmt)
4849 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4850 tree type, itype, vmain, vback;
4851 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4852 basic_block body_bb, cont_bb, collapse_bb = NULL;
4853 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4854 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4855 gimple_stmt_iterator gsi, gsip;
4856 edge ep;
4857 bool broken_loop = region->cont == NULL;
4858 tree *counts = NULL;
4859 tree n1, n2, step;
4860 tree reductions = NULL_TREE;
4861 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4863 itype = type = TREE_TYPE (fd->loop.v);
4864 if (POINTER_TYPE_P (type))
4865 itype = signed_type_for (type);
4867 entry_bb = region->entry;
4868 cont_bb = region->cont;
4869 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4870 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4871 gcc_assert (broken_loop
4872 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4873 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4874 body_bb = single_succ (seq_start_bb);
4875 if (!broken_loop)
4877 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4878 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4879 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4881 exit_bb = region->exit;
4883 /* Iteration space partitioning goes in ENTRY_BB. */
4884 gsi = gsi_last_nondebug_bb (entry_bb);
4885 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4886 gsip = gsi;
4887 gsi_prev (&gsip);
4889 if (fd->collapse > 1)
4891 int first_zero_iter = -1, dummy = -1;
4892 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4894 counts = XALLOCAVEC (tree, fd->collapse);
4895 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4896 fin_bb, first_zero_iter,
4897 dummy_bb, dummy, l2_dom_bb);
4898 t = NULL_TREE;
4900 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4901 t = integer_one_node;
4902 else
4903 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4904 fold_convert (type, fd->loop.n1),
4905 fold_convert (type, fd->loop.n2));
4906 if (fd->collapse == 1
4907 && TYPE_UNSIGNED (type)
4908 && (t == NULL_TREE || !integer_onep (t)))
4910 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4911 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4912 true, GSI_SAME_STMT);
4913 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4914 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4915 true, GSI_SAME_STMT);
4916 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4917 n1, n2);
4918 ep = split_block (entry_bb, cond_stmt);
4919 ep->flags = EDGE_TRUE_VALUE;
4920 entry_bb = ep->dest;
4921 ep->probability = profile_probability::very_likely ();
4922 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4923 ep->probability = profile_probability::very_unlikely ();
4924 if (gimple_in_ssa_p (cfun))
4926 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4927 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4928 !gsi_end_p (gpi); gsi_next (&gpi))
4930 gphi *phi = gpi.phi ();
4931 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4932 ep, UNKNOWN_LOCATION);
4935 gsi = gsi_last_bb (entry_bb);
4938 if (fd->lastprivate_conditional)
4940 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4941 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4942 if (fd->have_pointer_condtemp)
4943 condtemp = OMP_CLAUSE_DECL (c);
4944 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4945 cond_var = OMP_CLAUSE_DECL (c);
4947 if (fd->have_reductemp
4948 /* For scan, we don't want to reinitialize condtemp before the
4949 second loop. */
4950 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4951 || fd->have_nonctrl_scantemp)
4953 tree t1 = build_int_cst (long_integer_type_node, 0);
4954 tree t2 = build_int_cst (long_integer_type_node, 1);
4955 tree t3 = build_int_cstu (long_integer_type_node,
4956 (HOST_WIDE_INT_1U << 31) + 1);
4957 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4958 gimple_stmt_iterator gsi2 = gsi_none ();
4959 gimple *g = NULL;
4960 tree mem = null_pointer_node, memv = NULL_TREE;
4961 unsigned HOST_WIDE_INT condtemp_sz = 0;
4962 unsigned HOST_WIDE_INT alloc_align = 0;
4963 if (fd->have_reductemp)
4965 gcc_assert (!fd->have_nonctrl_scantemp);
4966 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4967 reductions = OMP_CLAUSE_DECL (c);
4968 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4969 g = SSA_NAME_DEF_STMT (reductions);
4970 reductions = gimple_assign_rhs1 (g);
4971 OMP_CLAUSE_DECL (c) = reductions;
4972 gsi2 = gsi_for_stmt (g);
4974 else
4976 if (gsi_end_p (gsip))
4977 gsi2 = gsi_after_labels (region->entry);
4978 else
4979 gsi2 = gsip;
4980 reductions = null_pointer_node;
4982 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4984 tree type;
4985 if (fd->have_pointer_condtemp)
4986 type = TREE_TYPE (condtemp);
4987 else
4988 type = ptr_type_node;
4989 memv = create_tmp_var (type);
4990 TREE_ADDRESSABLE (memv) = 1;
4991 unsigned HOST_WIDE_INT sz = 0;
4992 tree size = NULL_TREE;
4993 if (fd->have_pointer_condtemp)
4995 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4996 sz *= fd->lastprivate_conditional;
4997 condtemp_sz = sz;
4999 if (fd->have_nonctrl_scantemp)
5001 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5002 gimple *g = gimple_build_call (nthreads, 0);
5003 nthreads = create_tmp_var (integer_type_node);
5004 gimple_call_set_lhs (g, nthreads);
5005 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5006 nthreads = fold_convert (sizetype, nthreads);
5007 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5008 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5009 alloc_align, nthreads, NULL,
5010 false);
5011 size = fold_convert (type, size);
5013 else
5014 size = build_int_cst (type, sz);
5015 expand_omp_build_assign (&gsi2, memv, size, false);
5016 mem = build_fold_addr_expr (memv);
5018 tree t
5019 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5020 9, t1, t2, t2, t3, t1, null_pointer_node,
5021 null_pointer_node, reductions, mem);
5022 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5023 true, GSI_SAME_STMT);
5024 if (fd->have_pointer_condtemp)
5025 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5026 if (fd->have_nonctrl_scantemp)
5028 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5029 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5030 alloc_align, nthreads, &gsi2, false);
5032 if (fd->have_reductemp)
5034 gsi_remove (&gsi2, true);
5035 release_ssa_name (gimple_assign_lhs (g));
5038 switch (gimple_omp_for_kind (fd->for_stmt))
5040 case GF_OMP_FOR_KIND_FOR:
5041 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5042 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5043 break;
5044 case GF_OMP_FOR_KIND_DISTRIBUTE:
5045 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5046 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5047 break;
5048 default:
5049 gcc_unreachable ();
5051 nthreads = build_call_expr (nthreads, 0);
5052 nthreads = fold_convert (itype, nthreads);
5053 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5054 true, GSI_SAME_STMT);
5055 threadid = build_call_expr (threadid, 0);
5056 threadid = fold_convert (itype, threadid);
5057 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5058 true, GSI_SAME_STMT);
5060 n1 = fd->loop.n1;
5061 n2 = fd->loop.n2;
5062 step = fd->loop.step;
5063 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5065 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5066 OMP_CLAUSE__LOOPTEMP_);
5067 gcc_assert (innerc);
5068 n1 = OMP_CLAUSE_DECL (innerc);
5069 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5070 OMP_CLAUSE__LOOPTEMP_);
5071 gcc_assert (innerc);
5072 n2 = OMP_CLAUSE_DECL (innerc);
5074 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5075 true, NULL_TREE, true, GSI_SAME_STMT);
5076 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5077 true, NULL_TREE, true, GSI_SAME_STMT);
5078 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5079 true, NULL_TREE, true, GSI_SAME_STMT);
5081 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5082 t = fold_build2 (PLUS_EXPR, itype, step, t);
5083 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5084 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5085 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5086 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5087 fold_build1 (NEGATE_EXPR, itype, t),
5088 fold_build1 (NEGATE_EXPR, itype, step));
5089 else
5090 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5091 t = fold_convert (itype, t);
5092 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5094 q = create_tmp_reg (itype, "q");
5095 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5096 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5097 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5099 tt = create_tmp_reg (itype, "tt");
5100 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5101 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5102 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5104 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5105 gcond *cond_stmt = gimple_build_cond_empty (t);
5106 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5108 second_bb = split_block (entry_bb, cond_stmt)->dest;
5109 gsi = gsi_last_nondebug_bb (second_bb);
5110 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5112 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5113 GSI_SAME_STMT);
5114 gassign *assign_stmt
5115 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5116 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5118 third_bb = split_block (second_bb, assign_stmt)->dest;
5119 gsi = gsi_last_nondebug_bb (third_bb);
5120 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5122 if (fd->have_nonctrl_scantemp)
5124 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5125 tree controlp = NULL_TREE, controlb = NULL_TREE;
5126 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5127 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5128 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5130 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5131 controlb = OMP_CLAUSE_DECL (c);
5132 else
5133 controlp = OMP_CLAUSE_DECL (c);
5134 if (controlb && controlp)
5135 break;
5137 gcc_assert (controlp && controlb);
5138 tree cnt = create_tmp_var (sizetype);
5139 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5140 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5141 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5142 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5143 alloc_align, cnt, NULL, true);
5144 tree size = create_tmp_var (sizetype);
5145 expand_omp_build_assign (&gsi, size, sz, false);
5146 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5147 size, size_int (16384));
5148 expand_omp_build_assign (&gsi, controlb, cmp);
5149 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5150 NULL_TREE, NULL_TREE);
5151 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5152 fourth_bb = split_block (third_bb, g)->dest;
5153 gsi = gsi_last_nondebug_bb (fourth_bb);
5154 /* FIXME: Once we have allocators, this should use allocator. */
5155 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5156 gimple_call_set_lhs (g, controlp);
5157 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5158 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5159 &gsi, true);
5160 gsi_prev (&gsi);
5161 g = gsi_stmt (gsi);
5162 fifth_bb = split_block (fourth_bb, g)->dest;
5163 gsi = gsi_last_nondebug_bb (fifth_bb);
5165 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5166 gimple_call_set_lhs (g, controlp);
5167 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5168 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5169 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5170 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5171 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5173 tree tmp = create_tmp_var (sizetype);
5174 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5175 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5176 TYPE_SIZE_UNIT (pointee_type));
5177 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5178 g = gimple_build_call (alloca_decl, 2, tmp,
5179 size_int (TYPE_ALIGN (pointee_type)));
5180 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5181 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5184 sixth_bb = split_block (fifth_bb, g)->dest;
5185 gsi = gsi_last_nondebug_bb (sixth_bb);
5188 t = build2 (MULT_EXPR, itype, q, threadid);
5189 t = build2 (PLUS_EXPR, itype, t, tt);
5190 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5192 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5193 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5195 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5196 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5198 /* Remove the GIMPLE_OMP_FOR statement. */
5199 gsi_remove (&gsi, true);
5201 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5202 gsi = gsi_start_bb (seq_start_bb);
5204 tree startvar = fd->loop.v;
5205 tree endvar = NULL_TREE;
5207 if (gimple_omp_for_combined_p (fd->for_stmt))
5209 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5210 ? gimple_omp_parallel_clauses (inner_stmt)
5211 : gimple_omp_for_clauses (inner_stmt);
5212 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5213 gcc_assert (innerc);
5214 startvar = OMP_CLAUSE_DECL (innerc);
5215 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5216 OMP_CLAUSE__LOOPTEMP_);
5217 gcc_assert (innerc);
5218 endvar = OMP_CLAUSE_DECL (innerc);
5219 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5220 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5222 innerc = find_lastprivate_looptemp (fd, innerc);
5223 if (innerc)
5225 /* If needed (distribute parallel for with lastprivate),
5226 propagate down the total number of iterations. */
5227 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5228 fd->loop.n2);
5229 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5230 GSI_CONTINUE_LINKING);
5231 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5232 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5236 t = fold_convert (itype, s0);
5237 t = fold_build2 (MULT_EXPR, itype, t, step);
5238 if (POINTER_TYPE_P (type))
5240 t = fold_build_pointer_plus (n1, t);
5241 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5242 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5243 t = fold_convert (signed_type_for (type), t);
5245 else
5246 t = fold_build2 (PLUS_EXPR, type, t, n1);
5247 t = fold_convert (TREE_TYPE (startvar), t);
5248 t = force_gimple_operand_gsi (&gsi, t,
5249 DECL_P (startvar)
5250 && TREE_ADDRESSABLE (startvar),
5251 NULL_TREE, false, GSI_CONTINUE_LINKING);
5252 assign_stmt = gimple_build_assign (startvar, t);
5253 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5254 if (cond_var)
5256 tree itype = TREE_TYPE (cond_var);
5257 /* For lastprivate(conditional:) itervar, we need some iteration
5258 counter that starts at unsigned non-zero and increases.
5259 Prefer as few IVs as possible, so if we can use startvar
5260 itself, use that, or startvar + constant (those would be
5261 incremented with step), and as last resort use the s0 + 1
5262 incremented by 1. */
5263 if (POINTER_TYPE_P (type)
5264 || TREE_CODE (n1) != INTEGER_CST
5265 || fd->loop.cond_code != LT_EXPR)
5266 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5267 build_int_cst (itype, 1));
5268 else if (tree_int_cst_sgn (n1) == 1)
5269 t = fold_convert (itype, t);
5270 else
5272 tree c = fold_convert (itype, n1);
5273 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5274 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5276 t = force_gimple_operand_gsi (&gsi, t, false,
5277 NULL_TREE, false, GSI_CONTINUE_LINKING);
5278 assign_stmt = gimple_build_assign (cond_var, t);
5279 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5282 t = fold_convert (itype, e0);
5283 t = fold_build2 (MULT_EXPR, itype, t, step);
5284 if (POINTER_TYPE_P (type))
5286 t = fold_build_pointer_plus (n1, t);
5287 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5288 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5289 t = fold_convert (signed_type_for (type), t);
5291 else
5292 t = fold_build2 (PLUS_EXPR, type, t, n1);
5293 t = fold_convert (TREE_TYPE (startvar), t);
5294 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5295 false, GSI_CONTINUE_LINKING);
5296 if (endvar)
5298 assign_stmt = gimple_build_assign (endvar, e);
5299 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5300 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5301 assign_stmt = gimple_build_assign (fd->loop.v, e);
5302 else
5303 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5304 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5306 /* Handle linear clause adjustments. */
5307 tree itercnt = NULL_TREE;
5308 tree *nonrect_bounds = NULL;
5309 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5310 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5311 c; c = OMP_CLAUSE_CHAIN (c))
5312 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5313 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5315 tree d = OMP_CLAUSE_DECL (c);
5316 tree t = d, a, dest;
5317 if (omp_privatize_by_reference (t))
5318 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5319 if (itercnt == NULL_TREE)
5321 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5323 itercnt = fold_build2 (MINUS_EXPR, itype,
5324 fold_convert (itype, n1),
5325 fold_convert (itype, fd->loop.n1));
5326 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5327 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5328 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5329 NULL_TREE, false,
5330 GSI_CONTINUE_LINKING);
5332 else
5333 itercnt = s0;
5335 tree type = TREE_TYPE (t);
5336 if (POINTER_TYPE_P (type))
5337 type = sizetype;
5338 a = fold_build2 (MULT_EXPR, type,
5339 fold_convert (type, itercnt),
5340 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5341 dest = unshare_expr (t);
5342 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5343 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5344 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5345 false, GSI_CONTINUE_LINKING);
5346 expand_omp_build_assign (&gsi, dest, t, true);
5348 if (fd->collapse > 1)
5350 if (fd->non_rect)
5352 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5353 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5355 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5356 startvar);
5359 if (!broken_loop)
5361 /* The code controlling the sequential loop replaces the
5362 GIMPLE_OMP_CONTINUE. */
5363 gsi = gsi_last_nondebug_bb (cont_bb);
5364 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5365 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5366 vmain = gimple_omp_continue_control_use (cont_stmt);
5367 vback = gimple_omp_continue_control_def (cont_stmt);
5369 if (cond_var)
5371 tree itype = TREE_TYPE (cond_var);
5372 tree t2;
5373 if (POINTER_TYPE_P (type)
5374 || TREE_CODE (n1) != INTEGER_CST
5375 || fd->loop.cond_code != LT_EXPR)
5376 t2 = build_int_cst (itype, 1);
5377 else
5378 t2 = fold_convert (itype, step);
5379 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5380 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5381 NULL_TREE, true, GSI_SAME_STMT);
5382 assign_stmt = gimple_build_assign (cond_var, t2);
5383 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5386 if (!gimple_omp_for_combined_p (fd->for_stmt))
5388 if (POINTER_TYPE_P (type))
5389 t = fold_build_pointer_plus (vmain, step);
5390 else
5391 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5392 t = force_gimple_operand_gsi (&gsi, t,
5393 DECL_P (vback)
5394 && TREE_ADDRESSABLE (vback),
5395 NULL_TREE, true, GSI_SAME_STMT);
5396 assign_stmt = gimple_build_assign (vback, t);
5397 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5399 t = build2 (fd->loop.cond_code, boolean_type_node,
5400 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5401 ? t : vback, e);
5402 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5405 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5406 gsi_remove (&gsi, true);
5408 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5409 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5410 cont_bb, body_bb);
5413 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5414 gsi = gsi_last_nondebug_bb (exit_bb);
5415 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5417 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5418 if (fd->have_reductemp
5419 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5420 && !fd->have_nonctrl_scantemp))
5422 tree fn;
5423 if (t)
5424 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5425 else
5426 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5427 gcall *g = gimple_build_call (fn, 0);
5428 if (t)
5430 gimple_call_set_lhs (g, t);
5431 if (fd->have_reductemp)
5432 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5433 NOP_EXPR, t),
5434 GSI_SAME_STMT);
5436 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5438 else
5439 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5441 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5442 && !fd->have_nonctrl_scantemp)
5444 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5445 gcall *g = gimple_build_call (fn, 0);
5446 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5448 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5450 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5451 tree controlp = NULL_TREE, controlb = NULL_TREE;
5452 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5453 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5454 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5456 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5457 controlb = OMP_CLAUSE_DECL (c);
5458 else
5459 controlp = OMP_CLAUSE_DECL (c);
5460 if (controlb && controlp)
5461 break;
5463 gcc_assert (controlp && controlb);
5464 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5465 NULL_TREE, NULL_TREE);
5466 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5467 exit1_bb = split_block (exit_bb, g)->dest;
5468 gsi = gsi_after_labels (exit1_bb);
5469 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5470 controlp);
5471 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5472 exit2_bb = split_block (exit1_bb, g)->dest;
5473 gsi = gsi_after_labels (exit2_bb);
5474 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5475 controlp);
5476 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5477 exit3_bb = split_block (exit2_bb, g)->dest;
5478 gsi = gsi_after_labels (exit3_bb);
5480 gsi_remove (&gsi, true);
5482 /* Connect all the blocks. */
5483 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5484 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5485 ep = find_edge (entry_bb, second_bb);
5486 ep->flags = EDGE_TRUE_VALUE;
5487 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5488 if (fourth_bb)
5490 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5491 ep->probability
5492 = profile_probability::guessed_always ().apply_scale (1, 2);
5493 ep = find_edge (third_bb, fourth_bb);
5494 ep->flags = EDGE_TRUE_VALUE;
5495 ep->probability
5496 = profile_probability::guessed_always ().apply_scale (1, 2);
5497 ep = find_edge (fourth_bb, fifth_bb);
5498 redirect_edge_and_branch (ep, sixth_bb);
5500 else
5501 sixth_bb = third_bb;
5502 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5503 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5504 if (exit1_bb)
5506 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5507 ep->probability
5508 = profile_probability::guessed_always ().apply_scale (1, 2);
5509 ep = find_edge (exit_bb, exit1_bb);
5510 ep->flags = EDGE_TRUE_VALUE;
5511 ep->probability
5512 = profile_probability::guessed_always ().apply_scale (1, 2);
5513 ep = find_edge (exit1_bb, exit2_bb);
5514 redirect_edge_and_branch (ep, exit3_bb);
5517 if (!broken_loop)
5519 ep = find_edge (cont_bb, body_bb);
5520 if (ep == NULL)
5522 ep = BRANCH_EDGE (cont_bb);
5523 gcc_assert (single_succ (ep->dest) == body_bb);
5525 if (gimple_omp_for_combined_p (fd->for_stmt))
5527 remove_edge (ep);
5528 ep = NULL;
5530 else if (fd->collapse > 1)
5532 remove_edge (ep);
5533 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5535 else
5536 ep->flags = EDGE_TRUE_VALUE;
5537 find_edge (cont_bb, fin_bb)->flags
5538 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5541 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5542 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5543 if (fourth_bb)
5545 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5546 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5548 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5550 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5551 recompute_dominator (CDI_DOMINATORS, body_bb));
5552 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5553 recompute_dominator (CDI_DOMINATORS, fin_bb));
5554 if (exit1_bb)
5556 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5557 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5560 class loop *loop = body_bb->loop_father;
5561 if (loop != entry_bb->loop_father)
5563 gcc_assert (broken_loop || loop->header == body_bb);
5564 gcc_assert (broken_loop
5565 || loop->latch == region->cont
5566 || single_pred (loop->latch) == region->cont);
5567 return;
5570 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5572 loop = alloc_loop ();
5573 loop->header = body_bb;
5574 if (collapse_bb == NULL)
5575 loop->latch = cont_bb;
5576 add_loop (loop, body_bb->loop_father);
5580 /* Return phi in E->DEST with ARG on edge E. */
5582 static gphi *
5583 find_phi_with_arg_on_edge (tree arg, edge e)
5585 basic_block bb = e->dest;
5587 for (gphi_iterator gpi = gsi_start_phis (bb);
5588 !gsi_end_p (gpi);
5589 gsi_next (&gpi))
5591 gphi *phi = gpi.phi ();
5592 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5593 return phi;
5596 return NULL;
5599 /* A subroutine of expand_omp_for. Generate code for a parallel
5600 loop with static schedule and a specified chunk size. Given
5601 parameters:
5603 for (V = N1; V cond N2; V += STEP) BODY;
5605 where COND is "<" or ">", we generate pseudocode
5607 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5608 if (cond is <)
5609 adj = STEP - 1;
5610 else
5611 adj = STEP + 1;
5612 if ((__typeof (V)) -1 > 0 && cond is >)
5613 n = -(adj + N2 - N1) / -STEP;
5614 else
5615 n = (adj + N2 - N1) / STEP;
5616 trip = 0;
5617 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5618 here so that V is defined
5619 if the loop is not entered
5621 s0 = (trip * nthreads + threadid) * CHUNK;
5622 e0 = min (s0 + CHUNK, n);
5623 if (s0 < n) goto L1; else goto L4;
5625 V = s0 * STEP + N1;
5626 e = e0 * STEP + N1;
5628 BODY;
5629 V += STEP;
5630 if (V cond e) goto L2; else goto L3;
5632 trip += 1;
5633 goto L0;
5637 static void
5638 expand_omp_for_static_chunk (struct omp_region *region,
5639 struct omp_for_data *fd, gimple *inner_stmt)
5641 tree n, s0, e0, e, t;
5642 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5643 tree type, itype, vmain, vback, vextra;
5644 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5645 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5646 gimple_stmt_iterator gsi, gsip;
5647 edge se;
5648 bool broken_loop = region->cont == NULL;
5649 tree *counts = NULL;
5650 tree n1, n2, step;
5651 tree reductions = NULL_TREE;
5652 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5654 itype = type = TREE_TYPE (fd->loop.v);
5655 if (POINTER_TYPE_P (type))
5656 itype = signed_type_for (type);
5658 entry_bb = region->entry;
5659 se = split_block (entry_bb, last_stmt (entry_bb));
5660 entry_bb = se->src;
5661 iter_part_bb = se->dest;
5662 cont_bb = region->cont;
5663 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5664 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5665 gcc_assert (broken_loop
5666 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5667 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5668 body_bb = single_succ (seq_start_bb);
5669 if (!broken_loop)
5671 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5672 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5673 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5674 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5676 exit_bb = region->exit;
5678 /* Trip and adjustment setup goes in ENTRY_BB. */
5679 gsi = gsi_last_nondebug_bb (entry_bb);
5680 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5681 gsip = gsi;
5682 gsi_prev (&gsip);
5684 if (fd->collapse > 1)
5686 int first_zero_iter = -1, dummy = -1;
5687 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5689 counts = XALLOCAVEC (tree, fd->collapse);
5690 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5691 fin_bb, first_zero_iter,
5692 dummy_bb, dummy, l2_dom_bb);
5693 t = NULL_TREE;
5695 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5696 t = integer_one_node;
5697 else
5698 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5699 fold_convert (type, fd->loop.n1),
5700 fold_convert (type, fd->loop.n2));
5701 if (fd->collapse == 1
5702 && TYPE_UNSIGNED (type)
5703 && (t == NULL_TREE || !integer_onep (t)))
5705 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5706 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5707 true, GSI_SAME_STMT);
5708 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5709 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5710 true, GSI_SAME_STMT);
5711 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5712 n1, n2);
5713 se = split_block (entry_bb, cond_stmt);
5714 se->flags = EDGE_TRUE_VALUE;
5715 entry_bb = se->dest;
5716 se->probability = profile_probability::very_likely ();
5717 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5718 se->probability = profile_probability::very_unlikely ();
5719 if (gimple_in_ssa_p (cfun))
5721 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5722 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5723 !gsi_end_p (gpi); gsi_next (&gpi))
5725 gphi *phi = gpi.phi ();
5726 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5727 se, UNKNOWN_LOCATION);
5730 gsi = gsi_last_bb (entry_bb);
5733 if (fd->lastprivate_conditional)
5735 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5736 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5737 if (fd->have_pointer_condtemp)
5738 condtemp = OMP_CLAUSE_DECL (c);
5739 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5740 cond_var = OMP_CLAUSE_DECL (c);
5742 if (fd->have_reductemp || fd->have_pointer_condtemp)
5744 tree t1 = build_int_cst (long_integer_type_node, 0);
5745 tree t2 = build_int_cst (long_integer_type_node, 1);
5746 tree t3 = build_int_cstu (long_integer_type_node,
5747 (HOST_WIDE_INT_1U << 31) + 1);
5748 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5749 gimple_stmt_iterator gsi2 = gsi_none ();
5750 gimple *g = NULL;
5751 tree mem = null_pointer_node, memv = NULL_TREE;
5752 if (fd->have_reductemp)
5754 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5755 reductions = OMP_CLAUSE_DECL (c);
5756 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5757 g = SSA_NAME_DEF_STMT (reductions);
5758 reductions = gimple_assign_rhs1 (g);
5759 OMP_CLAUSE_DECL (c) = reductions;
5760 gsi2 = gsi_for_stmt (g);
5762 else
5764 if (gsi_end_p (gsip))
5765 gsi2 = gsi_after_labels (region->entry);
5766 else
5767 gsi2 = gsip;
5768 reductions = null_pointer_node;
5770 if (fd->have_pointer_condtemp)
5772 tree type = TREE_TYPE (condtemp);
5773 memv = create_tmp_var (type);
5774 TREE_ADDRESSABLE (memv) = 1;
5775 unsigned HOST_WIDE_INT sz
5776 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5777 sz *= fd->lastprivate_conditional;
5778 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5779 false);
5780 mem = build_fold_addr_expr (memv);
5782 tree t
5783 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5784 9, t1, t2, t2, t3, t1, null_pointer_node,
5785 null_pointer_node, reductions, mem);
5786 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5787 true, GSI_SAME_STMT);
5788 if (fd->have_pointer_condtemp)
5789 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5790 if (fd->have_reductemp)
5792 gsi_remove (&gsi2, true);
5793 release_ssa_name (gimple_assign_lhs (g));
5796 switch (gimple_omp_for_kind (fd->for_stmt))
5798 case GF_OMP_FOR_KIND_FOR:
5799 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5800 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5801 break;
5802 case GF_OMP_FOR_KIND_DISTRIBUTE:
5803 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5804 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5805 break;
5806 default:
5807 gcc_unreachable ();
5809 nthreads = build_call_expr (nthreads, 0);
5810 nthreads = fold_convert (itype, nthreads);
5811 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5812 true, GSI_SAME_STMT);
5813 threadid = build_call_expr (threadid, 0);
5814 threadid = fold_convert (itype, threadid);
5815 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5816 true, GSI_SAME_STMT);
5818 n1 = fd->loop.n1;
5819 n2 = fd->loop.n2;
5820 step = fd->loop.step;
5821 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5823 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5824 OMP_CLAUSE__LOOPTEMP_);
5825 gcc_assert (innerc);
5826 n1 = OMP_CLAUSE_DECL (innerc);
5827 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5828 OMP_CLAUSE__LOOPTEMP_);
5829 gcc_assert (innerc);
5830 n2 = OMP_CLAUSE_DECL (innerc);
5832 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5833 true, NULL_TREE, true, GSI_SAME_STMT);
5834 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5835 true, NULL_TREE, true, GSI_SAME_STMT);
5836 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5837 true, NULL_TREE, true, GSI_SAME_STMT);
5838 tree chunk_size = fold_convert (itype, fd->chunk_size);
5839 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5840 chunk_size
5841 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5842 GSI_SAME_STMT);
5844 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5845 t = fold_build2 (PLUS_EXPR, itype, step, t);
5846 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5847 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5848 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5849 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5850 fold_build1 (NEGATE_EXPR, itype, t),
5851 fold_build1 (NEGATE_EXPR, itype, step));
5852 else
5853 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5854 t = fold_convert (itype, t);
5855 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5856 true, GSI_SAME_STMT);
5858 trip_var = create_tmp_reg (itype, ".trip");
5859 if (gimple_in_ssa_p (cfun))
5861 trip_init = make_ssa_name (trip_var);
5862 trip_main = make_ssa_name (trip_var);
5863 trip_back = make_ssa_name (trip_var);
5865 else
5867 trip_init = trip_var;
5868 trip_main = trip_var;
5869 trip_back = trip_var;
5872 gassign *assign_stmt
5873 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5874 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5876 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5877 t = fold_build2 (MULT_EXPR, itype, t, step);
5878 if (POINTER_TYPE_P (type))
5879 t = fold_build_pointer_plus (n1, t);
5880 else
5881 t = fold_build2 (PLUS_EXPR, type, t, n1);
5882 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5883 true, GSI_SAME_STMT);
5885 /* Remove the GIMPLE_OMP_FOR. */
5886 gsi_remove (&gsi, true);
5888 gimple_stmt_iterator gsif = gsi;
5890 /* Iteration space partitioning goes in ITER_PART_BB. */
5891 gsi = gsi_last_bb (iter_part_bb);
5893 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5894 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5895 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5896 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5897 false, GSI_CONTINUE_LINKING);
5899 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5900 t = fold_build2 (MIN_EXPR, itype, t, n);
5901 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5902 false, GSI_CONTINUE_LINKING);
5904 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5905 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5907 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5908 gsi = gsi_start_bb (seq_start_bb);
5910 tree startvar = fd->loop.v;
5911 tree endvar = NULL_TREE;
5913 if (gimple_omp_for_combined_p (fd->for_stmt))
5915 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5916 ? gimple_omp_parallel_clauses (inner_stmt)
5917 : gimple_omp_for_clauses (inner_stmt);
5918 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5919 gcc_assert (innerc);
5920 startvar = OMP_CLAUSE_DECL (innerc);
5921 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5922 OMP_CLAUSE__LOOPTEMP_);
5923 gcc_assert (innerc);
5924 endvar = OMP_CLAUSE_DECL (innerc);
5925 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5926 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5928 innerc = find_lastprivate_looptemp (fd, innerc);
5929 if (innerc)
5931 /* If needed (distribute parallel for with lastprivate),
5932 propagate down the total number of iterations. */
5933 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5934 fd->loop.n2);
5935 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5936 GSI_CONTINUE_LINKING);
5937 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5938 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5943 t = fold_convert (itype, s0);
5944 t = fold_build2 (MULT_EXPR, itype, t, step);
5945 if (POINTER_TYPE_P (type))
5947 t = fold_build_pointer_plus (n1, t);
5948 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5949 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5950 t = fold_convert (signed_type_for (type), t);
5952 else
5953 t = fold_build2 (PLUS_EXPR, type, t, n1);
5954 t = fold_convert (TREE_TYPE (startvar), t);
5955 t = force_gimple_operand_gsi (&gsi, t,
5956 DECL_P (startvar)
5957 && TREE_ADDRESSABLE (startvar),
5958 NULL_TREE, false, GSI_CONTINUE_LINKING);
5959 assign_stmt = gimple_build_assign (startvar, t);
5960 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5961 if (cond_var)
5963 tree itype = TREE_TYPE (cond_var);
5964 /* For lastprivate(conditional:) itervar, we need some iteration
5965 counter that starts at unsigned non-zero and increases.
5966 Prefer as few IVs as possible, so if we can use startvar
5967 itself, use that, or startvar + constant (those would be
5968 incremented with step), and as last resort use the s0 + 1
5969 incremented by 1. */
5970 if (POINTER_TYPE_P (type)
5971 || TREE_CODE (n1) != INTEGER_CST
5972 || fd->loop.cond_code != LT_EXPR)
5973 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5974 build_int_cst (itype, 1));
5975 else if (tree_int_cst_sgn (n1) == 1)
5976 t = fold_convert (itype, t);
5977 else
5979 tree c = fold_convert (itype, n1);
5980 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5981 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5983 t = force_gimple_operand_gsi (&gsi, t, false,
5984 NULL_TREE, false, GSI_CONTINUE_LINKING);
5985 assign_stmt = gimple_build_assign (cond_var, t);
5986 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5989 t = fold_convert (itype, e0);
5990 t = fold_build2 (MULT_EXPR, itype, t, step);
5991 if (POINTER_TYPE_P (type))
5993 t = fold_build_pointer_plus (n1, t);
5994 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5995 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5996 t = fold_convert (signed_type_for (type), t);
5998 else
5999 t = fold_build2 (PLUS_EXPR, type, t, n1);
6000 t = fold_convert (TREE_TYPE (startvar), t);
6001 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6002 false, GSI_CONTINUE_LINKING);
6003 if (endvar)
6005 assign_stmt = gimple_build_assign (endvar, e);
6006 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6007 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6008 assign_stmt = gimple_build_assign (fd->loop.v, e);
6009 else
6010 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6011 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6013 /* Handle linear clause adjustments. */
6014 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6015 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6016 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6017 c; c = OMP_CLAUSE_CHAIN (c))
6018 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6019 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6021 tree d = OMP_CLAUSE_DECL (c);
6022 tree t = d, a, dest;
6023 if (omp_privatize_by_reference (t))
6024 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6025 tree type = TREE_TYPE (t);
6026 if (POINTER_TYPE_P (type))
6027 type = sizetype;
6028 dest = unshare_expr (t);
6029 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6030 expand_omp_build_assign (&gsif, v, t);
6031 if (itercnt == NULL_TREE)
6033 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6035 itercntbias
6036 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6037 fold_convert (itype, fd->loop.n1));
6038 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6039 itercntbias, step);
6040 itercntbias
6041 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6042 NULL_TREE, true,
6043 GSI_SAME_STMT);
6044 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6045 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6046 NULL_TREE, false,
6047 GSI_CONTINUE_LINKING);
6049 else
6050 itercnt = s0;
6052 a = fold_build2 (MULT_EXPR, type,
6053 fold_convert (type, itercnt),
6054 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6055 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6056 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6057 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6058 false, GSI_CONTINUE_LINKING);
6059 expand_omp_build_assign (&gsi, dest, t, true);
6061 if (fd->collapse > 1)
6062 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6064 if (!broken_loop)
6066 /* The code controlling the sequential loop goes in CONT_BB,
6067 replacing the GIMPLE_OMP_CONTINUE. */
6068 gsi = gsi_last_nondebug_bb (cont_bb);
6069 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6070 vmain = gimple_omp_continue_control_use (cont_stmt);
6071 vback = gimple_omp_continue_control_def (cont_stmt);
6073 if (cond_var)
6075 tree itype = TREE_TYPE (cond_var);
6076 tree t2;
6077 if (POINTER_TYPE_P (type)
6078 || TREE_CODE (n1) != INTEGER_CST
6079 || fd->loop.cond_code != LT_EXPR)
6080 t2 = build_int_cst (itype, 1);
6081 else
6082 t2 = fold_convert (itype, step);
6083 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6084 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6085 NULL_TREE, true, GSI_SAME_STMT);
6086 assign_stmt = gimple_build_assign (cond_var, t2);
6087 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6090 if (!gimple_omp_for_combined_p (fd->for_stmt))
6092 if (POINTER_TYPE_P (type))
6093 t = fold_build_pointer_plus (vmain, step);
6094 else
6095 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6096 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6097 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6098 true, GSI_SAME_STMT);
6099 assign_stmt = gimple_build_assign (vback, t);
6100 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6102 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6103 t = build2 (EQ_EXPR, boolean_type_node,
6104 build_int_cst (itype, 0),
6105 build_int_cst (itype, 1));
6106 else
6107 t = build2 (fd->loop.cond_code, boolean_type_node,
6108 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6109 ? t : vback, e);
6110 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6113 /* Remove GIMPLE_OMP_CONTINUE. */
6114 gsi_remove (&gsi, true);
6116 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6117 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6119 /* Trip update code goes into TRIP_UPDATE_BB. */
6120 gsi = gsi_start_bb (trip_update_bb);
6122 t = build_int_cst (itype, 1);
6123 t = build2 (PLUS_EXPR, itype, trip_main, t);
6124 assign_stmt = gimple_build_assign (trip_back, t);
6125 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6128 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6129 gsi = gsi_last_nondebug_bb (exit_bb);
6130 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6132 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6133 if (fd->have_reductemp || fd->have_pointer_condtemp)
6135 tree fn;
6136 if (t)
6137 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6138 else
6139 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6140 gcall *g = gimple_build_call (fn, 0);
6141 if (t)
6143 gimple_call_set_lhs (g, t);
6144 if (fd->have_reductemp)
6145 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6146 NOP_EXPR, t),
6147 GSI_SAME_STMT);
6149 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6151 else
6152 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6154 else if (fd->have_pointer_condtemp)
6156 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6157 gcall *g = gimple_build_call (fn, 0);
6158 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6160 gsi_remove (&gsi, true);
6162 /* Connect the new blocks. */
6163 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6164 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6166 if (!broken_loop)
6168 se = find_edge (cont_bb, body_bb);
6169 if (se == NULL)
6171 se = BRANCH_EDGE (cont_bb);
6172 gcc_assert (single_succ (se->dest) == body_bb);
6174 if (gimple_omp_for_combined_p (fd->for_stmt))
6176 remove_edge (se);
6177 se = NULL;
6179 else if (fd->collapse > 1)
6181 remove_edge (se);
6182 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6184 else
6185 se->flags = EDGE_TRUE_VALUE;
6186 find_edge (cont_bb, trip_update_bb)->flags
6187 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6189 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6190 iter_part_bb);
6193 if (gimple_in_ssa_p (cfun))
6195 gphi_iterator psi;
6196 gphi *phi;
6197 edge re, ene;
6198 edge_var_map *vm;
6199 size_t i;
6201 gcc_assert (fd->collapse == 1 && !broken_loop);
6203 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6204 remove arguments of the phi nodes in fin_bb. We need to create
6205 appropriate phi nodes in iter_part_bb instead. */
6206 se = find_edge (iter_part_bb, fin_bb);
6207 re = single_succ_edge (trip_update_bb);
6208 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6209 ene = single_succ_edge (entry_bb);
6211 psi = gsi_start_phis (fin_bb);
6212 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6213 gsi_next (&psi), ++i)
6215 gphi *nphi;
6216 location_t locus;
6218 phi = psi.phi ();
6219 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6220 redirect_edge_var_map_def (vm), 0))
6221 continue;
6223 t = gimple_phi_result (phi);
6224 gcc_assert (t == redirect_edge_var_map_result (vm));
6226 if (!single_pred_p (fin_bb))
6227 t = copy_ssa_name (t, phi);
6229 nphi = create_phi_node (t, iter_part_bb);
6231 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6232 locus = gimple_phi_arg_location_from_edge (phi, se);
6234 /* A special case -- fd->loop.v is not yet computed in
6235 iter_part_bb, we need to use vextra instead. */
6236 if (t == fd->loop.v)
6237 t = vextra;
6238 add_phi_arg (nphi, t, ene, locus);
6239 locus = redirect_edge_var_map_location (vm);
6240 tree back_arg = redirect_edge_var_map_def (vm);
6241 add_phi_arg (nphi, back_arg, re, locus);
6242 edge ce = find_edge (cont_bb, body_bb);
6243 if (ce == NULL)
6245 ce = BRANCH_EDGE (cont_bb);
6246 gcc_assert (single_succ (ce->dest) == body_bb);
6247 ce = single_succ_edge (ce->dest);
6249 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6250 gcc_assert (inner_loop_phi != NULL);
6251 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6252 find_edge (seq_start_bb, body_bb), locus);
6254 if (!single_pred_p (fin_bb))
6255 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6257 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6258 redirect_edge_var_map_clear (re);
6259 if (single_pred_p (fin_bb))
6260 while (1)
6262 psi = gsi_start_phis (fin_bb);
6263 if (gsi_end_p (psi))
6264 break;
6265 remove_phi_node (&psi, false);
6268 /* Make phi node for trip. */
6269 phi = create_phi_node (trip_main, iter_part_bb);
6270 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6271 UNKNOWN_LOCATION);
6272 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6273 UNKNOWN_LOCATION);
6276 if (!broken_loop)
6277 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6278 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6279 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6280 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6281 recompute_dominator (CDI_DOMINATORS, fin_bb));
6282 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6283 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6284 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6285 recompute_dominator (CDI_DOMINATORS, body_bb));
6287 if (!broken_loop)
6289 class loop *loop = body_bb->loop_father;
6290 class loop *trip_loop = alloc_loop ();
6291 trip_loop->header = iter_part_bb;
6292 trip_loop->latch = trip_update_bb;
6293 add_loop (trip_loop, iter_part_bb->loop_father);
6295 if (loop != entry_bb->loop_father)
6297 gcc_assert (loop->header == body_bb);
6298 gcc_assert (loop->latch == region->cont
6299 || single_pred (loop->latch) == region->cont);
6300 trip_loop->inner = loop;
6301 return;
6304 if (!gimple_omp_for_combined_p (fd->for_stmt))
6306 loop = alloc_loop ();
6307 loop->header = body_bb;
6308 if (collapse_bb == NULL)
6309 loop->latch = cont_bb;
6310 add_loop (loop, trip_loop);
6315 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6316 loop. Given parameters:
6318 for (V = N1; V cond N2; V += STEP) BODY;
6320 where COND is "<" or ">", we generate pseudocode
6322 V = N1;
6323 goto L1;
6325 BODY;
6326 V += STEP;
6328 if (V cond N2) goto L0; else goto L2;
6331 For collapsed loops, emit the outer loops as scalar
6332 and only try to vectorize the innermost loop. */
6334 static void
6335 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6337 tree type, t;
6338 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6339 gimple_stmt_iterator gsi;
6340 gimple *stmt;
6341 gcond *cond_stmt;
6342 bool broken_loop = region->cont == NULL;
6343 edge e, ne;
6344 tree *counts = NULL;
6345 int i;
6346 int safelen_int = INT_MAX;
6347 bool dont_vectorize = false;
6348 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6349 OMP_CLAUSE_SAFELEN);
6350 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6351 OMP_CLAUSE__SIMDUID_);
6352 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6353 OMP_CLAUSE_IF);
6354 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6355 OMP_CLAUSE_SIMDLEN);
6356 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6357 OMP_CLAUSE__CONDTEMP_);
6358 tree n1, n2;
6359 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6361 if (safelen)
6363 poly_uint64 val;
6364 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6365 if (!poly_int_tree_p (safelen, &val))
6366 safelen_int = 0;
6367 else
6368 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6369 if (safelen_int == 1)
6370 safelen_int = 0;
6372 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6373 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6375 safelen_int = 0;
6376 dont_vectorize = true;
6378 type = TREE_TYPE (fd->loop.v);
6379 entry_bb = region->entry;
6380 cont_bb = region->cont;
6381 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6382 gcc_assert (broken_loop
6383 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6384 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6385 if (!broken_loop)
6387 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6388 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6389 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6390 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6392 else
6394 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6395 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6396 l2_bb = single_succ (l1_bb);
6398 exit_bb = region->exit;
6399 l2_dom_bb = NULL;
6401 gsi = gsi_last_nondebug_bb (entry_bb);
6403 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6404 /* Not needed in SSA form right now. */
6405 gcc_assert (!gimple_in_ssa_p (cfun));
6406 if (fd->collapse > 1
6407 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6408 || broken_loop))
6410 int first_zero_iter = -1, dummy = -1;
6411 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6413 counts = XALLOCAVEC (tree, fd->collapse);
6414 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6415 zero_iter_bb, first_zero_iter,
6416 dummy_bb, dummy, l2_dom_bb);
6418 if (l2_dom_bb == NULL)
6419 l2_dom_bb = l1_bb;
6421 n1 = fd->loop.n1;
6422 n2 = fd->loop.n2;
6423 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6425 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6426 OMP_CLAUSE__LOOPTEMP_);
6427 gcc_assert (innerc);
6428 n1 = OMP_CLAUSE_DECL (innerc);
6429 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6430 OMP_CLAUSE__LOOPTEMP_);
6431 gcc_assert (innerc);
6432 n2 = OMP_CLAUSE_DECL (innerc);
6434 tree step = fd->loop.step;
6435 tree orig_step = step; /* May be different from step if is_simt. */
6437 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6438 OMP_CLAUSE__SIMT_);
6439 if (is_simt)
6441 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6442 is_simt = safelen_int > 1;
6444 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6445 if (is_simt)
6447 simt_lane = create_tmp_var (unsigned_type_node);
6448 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6449 gimple_call_set_lhs (g, simt_lane);
6450 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6451 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6452 fold_convert (TREE_TYPE (step), simt_lane));
6453 n1 = fold_convert (type, n1);
6454 if (POINTER_TYPE_P (type))
6455 n1 = fold_build_pointer_plus (n1, offset);
6456 else
6457 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6459 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6460 if (fd->collapse > 1)
6461 simt_maxlane = build_one_cst (unsigned_type_node);
6462 else if (safelen_int < omp_max_simt_vf ())
6463 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6464 tree vf
6465 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6466 unsigned_type_node, 0);
6467 if (simt_maxlane)
6468 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6469 vf = fold_convert (TREE_TYPE (step), vf);
6470 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6473 tree n2var = NULL_TREE;
6474 tree n2v = NULL_TREE;
6475 tree *nonrect_bounds = NULL;
6476 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6477 if (fd->collapse > 1)
6479 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6481 if (fd->non_rect)
6483 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6484 memset (nonrect_bounds, 0,
6485 sizeof (tree) * (fd->last_nonrect + 1));
6487 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6488 gcc_assert (entry_bb == gsi_bb (gsi));
6489 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6490 gsi_prev (&gsi);
6491 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6492 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6493 NULL, n1);
6494 gsi = gsi_for_stmt (fd->for_stmt);
6496 if (broken_loop)
6498 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6500 /* Compute in n2var the limit for the first innermost loop,
6501 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6502 where cnt is how many iterations would the loop have if
6503 all further iterations were assigned to the current task. */
6504 n2var = create_tmp_var (type);
6505 i = fd->collapse - 1;
6506 tree itype = TREE_TYPE (fd->loops[i].v);
6507 if (POINTER_TYPE_P (itype))
6508 itype = signed_type_for (itype);
6509 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6510 ? -1 : 1));
6511 t = fold_build2 (PLUS_EXPR, itype,
6512 fold_convert (itype, fd->loops[i].step), t);
6513 t = fold_build2 (PLUS_EXPR, itype, t,
6514 fold_convert (itype, fd->loops[i].n2));
6515 if (fd->loops[i].m2)
6517 tree t2 = fold_convert (itype,
6518 fd->loops[i - fd->loops[i].outer].v);
6519 tree t3 = fold_convert (itype, fd->loops[i].m2);
6520 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6521 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6523 t = fold_build2 (MINUS_EXPR, itype, t,
6524 fold_convert (itype, fd->loops[i].v));
6525 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6526 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6527 fold_build1 (NEGATE_EXPR, itype, t),
6528 fold_build1 (NEGATE_EXPR, itype,
6529 fold_convert (itype,
6530 fd->loops[i].step)));
6531 else
6532 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6533 fold_convert (itype, fd->loops[i].step));
6534 t = fold_convert (type, t);
6535 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6536 min_arg1 = create_tmp_var (type);
6537 expand_omp_build_assign (&gsi, min_arg1, t2);
6538 min_arg2 = create_tmp_var (type);
6539 expand_omp_build_assign (&gsi, min_arg2, t);
6541 else
6543 if (TREE_CODE (n2) == INTEGER_CST)
6545 /* Indicate for lastprivate handling that at least one iteration
6546 has been performed, without wasting runtime. */
6547 if (integer_nonzerop (n2))
6548 expand_omp_build_assign (&gsi, fd->loop.v,
6549 fold_convert (type, n2));
6550 else
6551 /* Indicate that no iteration has been performed. */
6552 expand_omp_build_assign (&gsi, fd->loop.v,
6553 build_one_cst (type));
6555 else
6557 expand_omp_build_assign (&gsi, fd->loop.v,
6558 build_zero_cst (type));
6559 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6561 for (i = 0; i < fd->collapse; i++)
6563 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6564 if (fd->loops[i].m1)
6566 tree t2
6567 = fold_convert (TREE_TYPE (t),
6568 fd->loops[i - fd->loops[i].outer].v);
6569 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6570 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6571 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6573 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6574 /* For normal non-combined collapsed loops just initialize
6575 the outermost iterator in the entry_bb. */
6576 if (!broken_loop)
6577 break;
6581 else
6582 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6583 tree altv = NULL_TREE, altn2 = NULL_TREE;
6584 if (fd->collapse == 1
6585 && !broken_loop
6586 && TREE_CODE (orig_step) != INTEGER_CST)
6588 /* The vectorizer currently punts on loops with non-constant steps
6589 for the main IV (can't compute number of iterations and gives up
6590 because of that). As for OpenMP loops it is always possible to
6591 compute the number of iterations upfront, use an alternate IV
6592 as the loop iterator:
6593 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6594 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6595 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6596 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6597 tree itype = TREE_TYPE (fd->loop.v);
6598 if (POINTER_TYPE_P (itype))
6599 itype = signed_type_for (itype);
6600 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6601 t = fold_build2 (PLUS_EXPR, itype,
6602 fold_convert (itype, step), t);
6603 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6604 t = fold_build2 (MINUS_EXPR, itype, t,
6605 fold_convert (itype, fd->loop.v));
6606 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6607 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6608 fold_build1 (NEGATE_EXPR, itype, t),
6609 fold_build1 (NEGATE_EXPR, itype,
6610 fold_convert (itype, step)));
6611 else
6612 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6613 fold_convert (itype, step));
6614 t = fold_convert (TREE_TYPE (altv), t);
6615 altn2 = create_tmp_var (TREE_TYPE (altv));
6616 expand_omp_build_assign (&gsi, altn2, t);
6617 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6618 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6619 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6620 true, GSI_SAME_STMT);
6621 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6622 build_zero_cst (TREE_TYPE (altv)));
6623 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6625 else if (fd->collapse > 1
6626 && !broken_loop
6627 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6628 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6630 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6631 altn2 = create_tmp_var (TREE_TYPE (altv));
6633 if (cond_var)
6635 if (POINTER_TYPE_P (type)
6636 || TREE_CODE (n1) != INTEGER_CST
6637 || fd->loop.cond_code != LT_EXPR
6638 || tree_int_cst_sgn (n1) != 1)
6639 expand_omp_build_assign (&gsi, cond_var,
6640 build_one_cst (TREE_TYPE (cond_var)));
6641 else
6642 expand_omp_build_assign (&gsi, cond_var,
6643 fold_convert (TREE_TYPE (cond_var), n1));
6646 /* Remove the GIMPLE_OMP_FOR statement. */
6647 gsi_remove (&gsi, true);
6649 if (!broken_loop)
6651 /* Code to control the increment goes in the CONT_BB. */
6652 gsi = gsi_last_nondebug_bb (cont_bb);
6653 stmt = gsi_stmt (gsi);
6654 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6656 if (fd->collapse == 1
6657 || gimple_omp_for_combined_into_p (fd->for_stmt))
6659 if (POINTER_TYPE_P (type))
6660 t = fold_build_pointer_plus (fd->loop.v, step);
6661 else
6662 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6663 expand_omp_build_assign (&gsi, fd->loop.v, t);
6665 else if (TREE_CODE (n2) != INTEGER_CST)
6666 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6667 if (altv)
6669 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6670 build_one_cst (TREE_TYPE (altv)));
6671 expand_omp_build_assign (&gsi, altv, t);
6674 if (fd->collapse > 1)
6676 i = fd->collapse - 1;
6677 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6679 t = fold_convert (sizetype, fd->loops[i].step);
6680 t = fold_build_pointer_plus (fd->loops[i].v, t);
6682 else
6684 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6685 fd->loops[i].step);
6686 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6687 fd->loops[i].v, t);
6689 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6691 if (cond_var)
6693 if (POINTER_TYPE_P (type)
6694 || TREE_CODE (n1) != INTEGER_CST
6695 || fd->loop.cond_code != LT_EXPR
6696 || tree_int_cst_sgn (n1) != 1)
6697 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6698 build_one_cst (TREE_TYPE (cond_var)));
6699 else
6700 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6701 fold_convert (TREE_TYPE (cond_var), step));
6702 expand_omp_build_assign (&gsi, cond_var, t);
6705 /* Remove GIMPLE_OMP_CONTINUE. */
6706 gsi_remove (&gsi, true);
6709 /* Emit the condition in L1_BB. */
6710 gsi = gsi_start_bb (l1_bb);
6712 if (altv)
6713 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6714 else if (fd->collapse > 1
6715 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6716 && !broken_loop)
6718 i = fd->collapse - 1;
6719 tree itype = TREE_TYPE (fd->loops[i].v);
6720 if (fd->loops[i].m2)
6721 t = n2v = create_tmp_var (itype);
6722 else
6723 t = fold_convert (itype, fd->loops[i].n2);
6724 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6725 false, GSI_CONTINUE_LINKING);
6726 tree v = fd->loops[i].v;
6727 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6728 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6729 false, GSI_CONTINUE_LINKING);
6730 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6732 else
6734 if (fd->collapse > 1 && !broken_loop)
6735 t = n2var;
6736 else
6737 t = fold_convert (type, n2);
6738 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6739 false, GSI_CONTINUE_LINKING);
6740 tree v = fd->loop.v;
6741 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6742 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6743 false, GSI_CONTINUE_LINKING);
6744 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6746 cond_stmt = gimple_build_cond_empty (t);
6747 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6748 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6749 NULL, NULL)
6750 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6751 NULL, NULL))
6753 gsi = gsi_for_stmt (cond_stmt);
6754 gimple_regimplify_operands (cond_stmt, &gsi);
6757 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6758 if (is_simt)
6760 gsi = gsi_start_bb (l2_bb);
6761 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6762 if (POINTER_TYPE_P (type))
6763 t = fold_build_pointer_plus (fd->loop.v, step);
6764 else
6765 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6766 expand_omp_build_assign (&gsi, fd->loop.v, t);
6769 /* Remove GIMPLE_OMP_RETURN. */
6770 gsi = gsi_last_nondebug_bb (exit_bb);
6771 gsi_remove (&gsi, true);
6773 /* Connect the new blocks. */
6774 remove_edge (FALLTHRU_EDGE (entry_bb));
6776 if (!broken_loop)
6778 remove_edge (BRANCH_EDGE (entry_bb));
6779 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6781 e = BRANCH_EDGE (l1_bb);
6782 ne = FALLTHRU_EDGE (l1_bb);
6783 e->flags = EDGE_TRUE_VALUE;
6785 else
6787 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6789 ne = single_succ_edge (l1_bb);
6790 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6793 ne->flags = EDGE_FALSE_VALUE;
6794 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6795 ne->probability = e->probability.invert ();
6797 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6798 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6800 if (simt_maxlane)
6802 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6803 NULL_TREE, NULL_TREE);
6804 gsi = gsi_last_bb (entry_bb);
6805 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6806 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6807 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6808 FALLTHRU_EDGE (entry_bb)->probability
6809 = profile_probability::guessed_always ().apply_scale (7, 8);
6810 BRANCH_EDGE (entry_bb)->probability
6811 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6812 l2_dom_bb = entry_bb;
6814 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6816 if (!broken_loop && fd->collapse > 1)
6818 basic_block last_bb = l1_bb;
6819 basic_block init_bb = NULL;
6820 for (i = fd->collapse - 2; i >= 0; i--)
6822 tree nextn2v = NULL_TREE;
6823 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6824 e = EDGE_SUCC (last_bb, 0);
6825 else
6826 e = EDGE_SUCC (last_bb, 1);
6827 basic_block bb = split_edge (e);
6828 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6830 t = fold_convert (sizetype, fd->loops[i].step);
6831 t = fold_build_pointer_plus (fd->loops[i].v, t);
6833 else
6835 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6836 fd->loops[i].step);
6837 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6838 fd->loops[i].v, t);
6840 gsi = gsi_after_labels (bb);
6841 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6843 bb = split_block (bb, last_stmt (bb))->dest;
6844 gsi = gsi_start_bb (bb);
6845 tree itype = TREE_TYPE (fd->loops[i].v);
6846 if (fd->loops[i].m2)
6847 t = nextn2v = create_tmp_var (itype);
6848 else
6849 t = fold_convert (itype, fd->loops[i].n2);
6850 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6851 false, GSI_CONTINUE_LINKING);
6852 tree v = fd->loops[i].v;
6853 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6854 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6855 false, GSI_CONTINUE_LINKING);
6856 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6857 cond_stmt = gimple_build_cond_empty (t);
6858 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6859 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6860 expand_omp_regimplify_p, NULL, NULL)
6861 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6862 expand_omp_regimplify_p, NULL, NULL))
6864 gsi = gsi_for_stmt (cond_stmt);
6865 gimple_regimplify_operands (cond_stmt, &gsi);
6867 ne = single_succ_edge (bb);
6868 ne->flags = EDGE_FALSE_VALUE;
6870 init_bb = create_empty_bb (bb);
6871 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6872 add_bb_to_loop (init_bb, bb->loop_father);
6873 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6874 e->probability
6875 = profile_probability::guessed_always ().apply_scale (7, 8);
6876 ne->probability = e->probability.invert ();
6878 gsi = gsi_after_labels (init_bb);
6879 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6880 fd->loops[i + 1].n1);
6881 if (fd->loops[i + 1].m1)
6883 tree t2 = fold_convert (TREE_TYPE (t),
6884 fd->loops[i + 1
6885 - fd->loops[i + 1].outer].v);
6886 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6887 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6888 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6890 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6891 if (fd->loops[i + 1].m2)
6893 if (i + 2 == fd->collapse && (n2var || altv))
6895 gcc_assert (n2v == NULL_TREE);
6896 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6898 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6899 fd->loops[i + 1].n2);
6900 tree t2 = fold_convert (TREE_TYPE (t),
6901 fd->loops[i + 1
6902 - fd->loops[i + 1].outer].v);
6903 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6904 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6905 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6906 expand_omp_build_assign (&gsi, n2v, t);
6908 if (i + 2 == fd->collapse && n2var)
6910 /* For composite simd, n2 is the first iteration the current
6911 task shouldn't already handle, so we effectively want to use
6912 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6913 as the vectorized loop. Except the vectorizer will not
6914 vectorize that, so instead compute N2VAR as
6915 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6916 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6917 as the loop to vectorize. */
6918 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6919 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6921 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6922 == LT_EXPR ? -1 : 1));
6923 t = fold_build2 (PLUS_EXPR, itype,
6924 fold_convert (itype,
6925 fd->loops[i + 1].step), t);
6926 if (fd->loops[i + 1].m2)
6927 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6928 else
6929 t = fold_build2 (PLUS_EXPR, itype, t,
6930 fold_convert (itype,
6931 fd->loops[i + 1].n2));
6932 t = fold_build2 (MINUS_EXPR, itype, t,
6933 fold_convert (itype, fd->loops[i + 1].v));
6934 tree step = fold_convert (itype, fd->loops[i + 1].step);
6935 if (TYPE_UNSIGNED (itype)
6936 && fd->loops[i + 1].cond_code == GT_EXPR)
6937 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6938 fold_build1 (NEGATE_EXPR, itype, t),
6939 fold_build1 (NEGATE_EXPR, itype, step));
6940 else
6941 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6942 t = fold_convert (type, t);
6944 else
6945 t = counts[i + 1];
6946 expand_omp_build_assign (&gsi, min_arg1, t2);
6947 expand_omp_build_assign (&gsi, min_arg2, t);
6948 e = split_block (init_bb, last_stmt (init_bb));
6949 gsi = gsi_after_labels (e->dest);
6950 init_bb = e->dest;
6951 remove_edge (FALLTHRU_EDGE (entry_bb));
6952 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6953 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6954 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6955 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6956 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6957 expand_omp_build_assign (&gsi, n2var, t);
6959 if (i + 2 == fd->collapse && altv)
6961 /* The vectorizer currently punts on loops with non-constant
6962 steps for the main IV (can't compute number of iterations
6963 and gives up because of that). As for OpenMP loops it is
6964 always possible to compute the number of iterations upfront,
6965 use an alternate IV as the loop iterator. */
6966 expand_omp_build_assign (&gsi, altv,
6967 build_zero_cst (TREE_TYPE (altv)));
6968 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6969 if (POINTER_TYPE_P (itype))
6970 itype = signed_type_for (itype);
6971 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6972 ? -1 : 1));
6973 t = fold_build2 (PLUS_EXPR, itype,
6974 fold_convert (itype, fd->loops[i + 1].step), t);
6975 t = fold_build2 (PLUS_EXPR, itype, t,
6976 fold_convert (itype,
6977 fd->loops[i + 1].m2
6978 ? n2v : fd->loops[i + 1].n2));
6979 t = fold_build2 (MINUS_EXPR, itype, t,
6980 fold_convert (itype, fd->loops[i + 1].v));
6981 tree step = fold_convert (itype, fd->loops[i + 1].step);
6982 if (TYPE_UNSIGNED (itype)
6983 && fd->loops[i + 1].cond_code == GT_EXPR)
6984 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6985 fold_build1 (NEGATE_EXPR, itype, t),
6986 fold_build1 (NEGATE_EXPR, itype, step));
6987 else
6988 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6989 t = fold_convert (TREE_TYPE (altv), t);
6990 expand_omp_build_assign (&gsi, altn2, t);
6991 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6992 fd->loops[i + 1].m2
6993 ? n2v : fd->loops[i + 1].n2);
6994 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6995 fd->loops[i + 1].v, t2);
6996 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6997 true, GSI_SAME_STMT);
6998 gassign *g
6999 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7000 build_zero_cst (TREE_TYPE (altv)));
7001 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7003 n2v = nextn2v;
7005 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7006 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7008 e = find_edge (entry_bb, last_bb);
7009 redirect_edge_succ (e, bb);
7010 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7011 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7014 last_bb = bb;
7017 if (!broken_loop)
7019 class loop *loop = alloc_loop ();
7020 loop->header = l1_bb;
7021 loop->latch = cont_bb;
7022 add_loop (loop, l1_bb->loop_father);
7023 loop->safelen = safelen_int;
7024 if (simduid)
7026 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7027 cfun->has_simduid_loops = true;
7029 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7030 the loop. */
7031 if ((flag_tree_loop_vectorize
7032 || !OPTION_SET_P (flag_tree_loop_vectorize))
7033 && flag_tree_loop_optimize
7034 && loop->safelen > 1)
7036 loop->force_vectorize = true;
7037 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7039 unsigned HOST_WIDE_INT v
7040 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7041 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7042 loop->simdlen = v;
7044 cfun->has_force_vectorize_loops = true;
7046 else if (dont_vectorize)
7047 loop->dont_vectorize = true;
7049 else if (simduid)
7050 cfun->has_simduid_loops = true;
7053 /* Taskloop construct is represented after gimplification with
7054 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7055 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7056 which should just compute all the needed loop temporaries
7057 for GIMPLE_OMP_TASK. */
7059 static void
7060 expand_omp_taskloop_for_outer (struct omp_region *region,
7061 struct omp_for_data *fd,
7062 gimple *inner_stmt)
7064 tree type, bias = NULL_TREE;
7065 basic_block entry_bb, cont_bb, exit_bb;
7066 gimple_stmt_iterator gsi;
7067 gassign *assign_stmt;
7068 tree *counts = NULL;
7069 int i;
7071 gcc_assert (inner_stmt);
7072 gcc_assert (region->cont);
7073 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7074 && gimple_omp_task_taskloop_p (inner_stmt));
7075 type = TREE_TYPE (fd->loop.v);
7077 /* See if we need to bias by LLONG_MIN. */
7078 if (fd->iter_type == long_long_unsigned_type_node
7079 && TREE_CODE (type) == INTEGER_TYPE
7080 && !TYPE_UNSIGNED (type))
7082 tree n1, n2;
7084 if (fd->loop.cond_code == LT_EXPR)
7086 n1 = fd->loop.n1;
7087 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7089 else
7091 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7092 n2 = fd->loop.n1;
7094 if (TREE_CODE (n1) != INTEGER_CST
7095 || TREE_CODE (n2) != INTEGER_CST
7096 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7097 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7100 entry_bb = region->entry;
7101 cont_bb = region->cont;
7102 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7103 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7104 exit_bb = region->exit;
7106 gsi = gsi_last_nondebug_bb (entry_bb);
7107 gimple *for_stmt = gsi_stmt (gsi);
7108 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7109 if (fd->collapse > 1)
7111 int first_zero_iter = -1, dummy = -1;
7112 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7114 counts = XALLOCAVEC (tree, fd->collapse);
7115 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7116 zero_iter_bb, first_zero_iter,
7117 dummy_bb, dummy, l2_dom_bb);
7119 if (zero_iter_bb)
7121 /* Some counts[i] vars might be uninitialized if
7122 some loop has zero iterations. But the body shouldn't
7123 be executed in that case, so just avoid uninit warnings. */
7124 for (i = first_zero_iter; i < fd->collapse; i++)
7125 if (SSA_VAR_P (counts[i]))
7126 suppress_warning (counts[i], OPT_Wuninitialized);
7127 gsi_prev (&gsi);
7128 edge e = split_block (entry_bb, gsi_stmt (gsi));
7129 entry_bb = e->dest;
7130 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7131 gsi = gsi_last_bb (entry_bb);
7132 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7133 get_immediate_dominator (CDI_DOMINATORS,
7134 zero_iter_bb));
7138 tree t0, t1;
7139 t1 = fd->loop.n2;
7140 t0 = fd->loop.n1;
7141 if (POINTER_TYPE_P (TREE_TYPE (t0))
7142 && TYPE_PRECISION (TREE_TYPE (t0))
7143 != TYPE_PRECISION (fd->iter_type))
7145 /* Avoid casting pointers to integer of a different size. */
7146 tree itype = signed_type_for (type);
7147 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7148 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7150 else
7152 t1 = fold_convert (fd->iter_type, t1);
7153 t0 = fold_convert (fd->iter_type, t0);
7155 if (bias)
7157 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7158 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7161 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7162 OMP_CLAUSE__LOOPTEMP_);
7163 gcc_assert (innerc);
7164 tree startvar = OMP_CLAUSE_DECL (innerc);
7165 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7166 gcc_assert (innerc);
7167 tree endvar = OMP_CLAUSE_DECL (innerc);
7168 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7170 innerc = find_lastprivate_looptemp (fd, innerc);
7171 if (innerc)
7173 /* If needed (inner taskloop has lastprivate clause), propagate
7174 down the total number of iterations. */
7175 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7176 NULL_TREE, false,
7177 GSI_CONTINUE_LINKING);
7178 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7179 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7183 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7184 GSI_CONTINUE_LINKING);
7185 assign_stmt = gimple_build_assign (startvar, t0);
7186 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7188 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7189 GSI_CONTINUE_LINKING);
7190 assign_stmt = gimple_build_assign (endvar, t1);
7191 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7192 if (fd->collapse > 1)
7193 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7195 /* Remove the GIMPLE_OMP_FOR statement. */
7196 gsi = gsi_for_stmt (for_stmt);
7197 gsi_remove (&gsi, true);
7199 gsi = gsi_last_nondebug_bb (cont_bb);
7200 gsi_remove (&gsi, true);
7202 gsi = gsi_last_nondebug_bb (exit_bb);
7203 gsi_remove (&gsi, true);
7205 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7206 remove_edge (BRANCH_EDGE (entry_bb));
7207 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7208 remove_edge (BRANCH_EDGE (cont_bb));
7209 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7210 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7211 recompute_dominator (CDI_DOMINATORS, region->entry));
7214 /* Taskloop construct is represented after gimplification with
7215 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7216 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7217 GOMP_taskloop{,_ull} function arranges for each task to be given just
7218 a single range of iterations. */
7220 static void
7221 expand_omp_taskloop_for_inner (struct omp_region *region,
7222 struct omp_for_data *fd,
7223 gimple *inner_stmt)
7225 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7226 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7227 basic_block fin_bb;
7228 gimple_stmt_iterator gsi;
7229 edge ep;
7230 bool broken_loop = region->cont == NULL;
7231 tree *counts = NULL;
7232 tree n1, n2, step;
7234 itype = type = TREE_TYPE (fd->loop.v);
7235 if (POINTER_TYPE_P (type))
7236 itype = signed_type_for (type);
7238 /* See if we need to bias by LLONG_MIN. */
7239 if (fd->iter_type == long_long_unsigned_type_node
7240 && TREE_CODE (type) == INTEGER_TYPE
7241 && !TYPE_UNSIGNED (type))
7243 tree n1, n2;
7245 if (fd->loop.cond_code == LT_EXPR)
7247 n1 = fd->loop.n1;
7248 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7250 else
7252 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7253 n2 = fd->loop.n1;
7255 if (TREE_CODE (n1) != INTEGER_CST
7256 || TREE_CODE (n2) != INTEGER_CST
7257 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7258 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7261 entry_bb = region->entry;
7262 cont_bb = region->cont;
7263 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7264 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7265 gcc_assert (broken_loop
7266 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7267 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7268 if (!broken_loop)
7270 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7271 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7273 exit_bb = region->exit;
7275 /* Iteration space partitioning goes in ENTRY_BB. */
7276 gsi = gsi_last_nondebug_bb (entry_bb);
7277 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7279 if (fd->collapse > 1)
7281 int first_zero_iter = -1, dummy = -1;
7282 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7284 counts = XALLOCAVEC (tree, fd->collapse);
7285 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7286 fin_bb, first_zero_iter,
7287 dummy_bb, dummy, l2_dom_bb);
7288 t = NULL_TREE;
7290 else
7291 t = integer_one_node;
7293 step = fd->loop.step;
7294 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7295 OMP_CLAUSE__LOOPTEMP_);
7296 gcc_assert (innerc);
7297 n1 = OMP_CLAUSE_DECL (innerc);
7298 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7299 gcc_assert (innerc);
7300 n2 = OMP_CLAUSE_DECL (innerc);
7301 if (bias)
7303 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7304 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7306 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7307 true, NULL_TREE, true, GSI_SAME_STMT);
7308 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7309 true, NULL_TREE, true, GSI_SAME_STMT);
7310 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7311 true, NULL_TREE, true, GSI_SAME_STMT);
7313 tree startvar = fd->loop.v;
7314 tree endvar = NULL_TREE;
7316 if (gimple_omp_for_combined_p (fd->for_stmt))
7318 tree clauses = gimple_omp_for_clauses (inner_stmt);
7319 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7320 gcc_assert (innerc);
7321 startvar = OMP_CLAUSE_DECL (innerc);
7322 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7323 OMP_CLAUSE__LOOPTEMP_);
7324 gcc_assert (innerc);
7325 endvar = OMP_CLAUSE_DECL (innerc);
7327 t = fold_convert (TREE_TYPE (startvar), n1);
7328 t = force_gimple_operand_gsi (&gsi, t,
7329 DECL_P (startvar)
7330 && TREE_ADDRESSABLE (startvar),
7331 NULL_TREE, false, GSI_CONTINUE_LINKING);
7332 gimple *assign_stmt = gimple_build_assign (startvar, t);
7333 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7335 t = fold_convert (TREE_TYPE (startvar), n2);
7336 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7337 false, GSI_CONTINUE_LINKING);
7338 if (endvar)
7340 assign_stmt = gimple_build_assign (endvar, e);
7341 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7342 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7343 assign_stmt = gimple_build_assign (fd->loop.v, e);
7344 else
7345 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7346 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7349 tree *nonrect_bounds = NULL;
7350 if (fd->collapse > 1)
7352 if (fd->non_rect)
7354 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7355 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7357 gcc_assert (gsi_bb (gsi) == entry_bb);
7358 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7359 startvar);
7360 entry_bb = gsi_bb (gsi);
7363 if (!broken_loop)
7365 /* The code controlling the sequential loop replaces the
7366 GIMPLE_OMP_CONTINUE. */
7367 gsi = gsi_last_nondebug_bb (cont_bb);
7368 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7369 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7370 vmain = gimple_omp_continue_control_use (cont_stmt);
7371 vback = gimple_omp_continue_control_def (cont_stmt);
7373 if (!gimple_omp_for_combined_p (fd->for_stmt))
7375 if (POINTER_TYPE_P (type))
7376 t = fold_build_pointer_plus (vmain, step);
7377 else
7378 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7379 t = force_gimple_operand_gsi (&gsi, t,
7380 DECL_P (vback)
7381 && TREE_ADDRESSABLE (vback),
7382 NULL_TREE, true, GSI_SAME_STMT);
7383 assign_stmt = gimple_build_assign (vback, t);
7384 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7386 t = build2 (fd->loop.cond_code, boolean_type_node,
7387 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7388 ? t : vback, e);
7389 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7392 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7393 gsi_remove (&gsi, true);
7395 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7396 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7397 cont_bb, body_bb);
7400 /* Remove the GIMPLE_OMP_FOR statement. */
7401 gsi = gsi_for_stmt (fd->for_stmt);
7402 gsi_remove (&gsi, true);
7404 /* Remove the GIMPLE_OMP_RETURN statement. */
7405 gsi = gsi_last_nondebug_bb (exit_bb);
7406 gsi_remove (&gsi, true);
7408 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7409 if (!broken_loop)
7410 remove_edge (BRANCH_EDGE (entry_bb));
7411 else
7413 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7414 region->outer->cont = NULL;
7417 /* Connect all the blocks. */
7418 if (!broken_loop)
7420 ep = find_edge (cont_bb, body_bb);
7421 if (gimple_omp_for_combined_p (fd->for_stmt))
7423 remove_edge (ep);
7424 ep = NULL;
7426 else if (fd->collapse > 1)
7428 remove_edge (ep);
7429 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7431 else
7432 ep->flags = EDGE_TRUE_VALUE;
7433 find_edge (cont_bb, fin_bb)->flags
7434 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7437 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7438 recompute_dominator (CDI_DOMINATORS, body_bb));
7439 if (!broken_loop)
7440 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7441 recompute_dominator (CDI_DOMINATORS, fin_bb));
7443 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7445 class loop *loop = alloc_loop ();
7446 loop->header = body_bb;
7447 if (collapse_bb == NULL)
7448 loop->latch = cont_bb;
7449 add_loop (loop, body_bb->loop_father);
7453 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7454 partitioned loop. The lowering here is abstracted, in that the
7455 loop parameters are passed through internal functions, which are
7456 further lowered by oacc_device_lower, once we get to the target
7457 compiler. The loop is of the form:
7459 for (V = B; V LTGT E; V += S) {BODY}
7461 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7462 (constant 0 for no chunking) and we will have a GWV partitioning
7463 mask, specifying dimensions over which the loop is to be
7464 partitioned (see note below). We generate code that looks like
7465 (this ignores tiling):
7467 <entry_bb> [incoming FALL->body, BRANCH->exit]
7468 typedef signedintify (typeof (V)) T; // underlying signed integral type
7469 T range = E - B;
7470 T chunk_no = 0;
7471 T DIR = LTGT == '<' ? +1 : -1;
7472 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7473 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7475 <head_bb> [created by splitting end of entry_bb]
7476 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7477 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7478 if (!(offset LTGT bound)) goto bottom_bb;
7480 <body_bb> [incoming]
7481 V = B + offset;
7482 {BODY}
7484 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7485 offset += step;
7486 if (offset LTGT bound) goto body_bb; [*]
7488 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7489 chunk_no++;
7490 if (chunk < chunk_max) goto head_bb;
7492 <exit_bb> [incoming]
7493 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7495 [*] Needed if V live at end of loop. */
7497 static void
7498 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7500 bool is_oacc_kernels_parallelized
7501 = (lookup_attribute ("oacc kernels parallelized",
7502 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7504 bool is_oacc_kernels
7505 = (lookup_attribute ("oacc kernels",
7506 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7507 if (is_oacc_kernels_parallelized)
7508 gcc_checking_assert (is_oacc_kernels);
7510 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7511 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7512 for SSA specifics, and some are for 'parloops' OpenACC
7513 'kernels'-parallelized specifics. */
7515 tree v = fd->loop.v;
7516 enum tree_code cond_code = fd->loop.cond_code;
7517 enum tree_code plus_code = PLUS_EXPR;
7519 tree chunk_size = integer_minus_one_node;
7520 tree gwv = integer_zero_node;
7521 tree iter_type = TREE_TYPE (v);
7522 tree diff_type = iter_type;
7523 tree plus_type = iter_type;
7524 struct oacc_collapse *counts = NULL;
7526 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7527 == GF_OMP_FOR_KIND_OACC_LOOP);
7528 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7529 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7531 if (POINTER_TYPE_P (iter_type))
7533 plus_code = POINTER_PLUS_EXPR;
7534 plus_type = sizetype;
7536 for (int ix = fd->collapse; ix--;)
7538 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7539 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7540 diff_type = diff_type2;
7542 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7543 diff_type = signed_type_for (diff_type);
7544 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7545 diff_type = integer_type_node;
7547 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7548 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7549 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7550 basic_block bottom_bb = NULL;
7552 /* entry_bb has two successors; the branch edge is to the exit
7553 block, fallthrough edge to body. */
7554 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7555 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7557 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7558 body_bb, or to a block whose only successor is the body_bb. Its
7559 fallthrough successor is the final block (same as the branch
7560 successor of the entry_bb). */
7561 if (cont_bb)
7563 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7564 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7566 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7567 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7569 else
7570 gcc_assert (!gimple_in_ssa_p (cfun));
7572 /* The exit block only has entry_bb and cont_bb as predecessors. */
7573 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7575 tree chunk_no;
7576 tree chunk_max = NULL_TREE;
7577 tree bound, offset;
7578 tree step = create_tmp_var (diff_type, ".step");
7579 bool up = cond_code == LT_EXPR;
7580 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7581 bool chunking = !gimple_in_ssa_p (cfun);
7582 bool negating;
7584 /* Tiling vars. */
7585 tree tile_size = NULL_TREE;
7586 tree element_s = NULL_TREE;
7587 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7588 basic_block elem_body_bb = NULL;
7589 basic_block elem_cont_bb = NULL;
7591 /* SSA instances. */
7592 tree offset_incr = NULL_TREE;
7593 tree offset_init = NULL_TREE;
7595 gimple_stmt_iterator gsi;
7596 gassign *ass;
7597 gcall *call;
7598 gimple *stmt;
7599 tree expr;
7600 location_t loc;
7601 edge split, be, fte;
7603 /* Split the end of entry_bb to create head_bb. */
7604 split = split_block (entry_bb, last_stmt (entry_bb));
7605 basic_block head_bb = split->dest;
7606 entry_bb = split->src;
7608 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7609 gsi = gsi_last_nondebug_bb (entry_bb);
7610 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7611 loc = gimple_location (for_stmt);
7613 if (gimple_in_ssa_p (cfun))
7615 offset_init = gimple_omp_for_index (for_stmt, 0);
7616 gcc_assert (integer_zerop (fd->loop.n1));
7617 /* The SSA parallelizer does gang parallelism. */
7618 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7621 if (fd->collapse > 1 || fd->tiling)
7623 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7624 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7625 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7626 TREE_TYPE (fd->loop.n2), loc);
7628 if (SSA_VAR_P (fd->loop.n2))
7630 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7631 true, GSI_SAME_STMT);
7632 ass = gimple_build_assign (fd->loop.n2, total);
7633 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7637 tree b = fd->loop.n1;
7638 tree e = fd->loop.n2;
7639 tree s = fd->loop.step;
7641 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7642 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7644 /* Convert the step, avoiding possible unsigned->signed overflow. */
7645 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7646 if (negating)
7647 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7648 s = fold_convert (diff_type, s);
7649 if (negating)
7650 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7651 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7653 if (!chunking)
7654 chunk_size = integer_zero_node;
7655 expr = fold_convert (diff_type, chunk_size);
7656 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7657 NULL_TREE, true, GSI_SAME_STMT);
7659 if (fd->tiling)
7661 /* Determine the tile size and element step,
7662 modify the outer loop step size. */
7663 tile_size = create_tmp_var (diff_type, ".tile_size");
7664 expr = build_int_cst (diff_type, 1);
7665 for (int ix = 0; ix < fd->collapse; ix++)
7666 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7667 expr = force_gimple_operand_gsi (&gsi, expr, true,
7668 NULL_TREE, true, GSI_SAME_STMT);
7669 ass = gimple_build_assign (tile_size, expr);
7670 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7672 element_s = create_tmp_var (diff_type, ".element_s");
7673 ass = gimple_build_assign (element_s, s);
7674 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7676 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7677 s = force_gimple_operand_gsi (&gsi, expr, true,
7678 NULL_TREE, true, GSI_SAME_STMT);
7681 /* Determine the range, avoiding possible unsigned->signed overflow. */
7682 negating = !up && TYPE_UNSIGNED (iter_type);
7683 expr = fold_build2 (MINUS_EXPR, plus_type,
7684 fold_convert (plus_type, negating ? b : e),
7685 fold_convert (plus_type, negating ? e : b));
7686 expr = fold_convert (diff_type, expr);
7687 if (negating)
7688 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7689 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7690 NULL_TREE, true, GSI_SAME_STMT);
7692 chunk_no = build_int_cst (diff_type, 0);
7693 if (chunking)
7695 gcc_assert (!gimple_in_ssa_p (cfun));
7697 expr = chunk_no;
7698 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7699 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7701 ass = gimple_build_assign (chunk_no, expr);
7702 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7704 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7705 build_int_cst (integer_type_node,
7706 IFN_GOACC_LOOP_CHUNKS),
7707 dir, range, s, chunk_size, gwv);
7708 gimple_call_set_lhs (call, chunk_max);
7709 gimple_set_location (call, loc);
7710 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7712 else
7713 chunk_size = chunk_no;
7715 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7716 build_int_cst (integer_type_node,
7717 IFN_GOACC_LOOP_STEP),
7718 dir, range, s, chunk_size, gwv);
7719 gimple_call_set_lhs (call, step);
7720 gimple_set_location (call, loc);
7721 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7723 /* Remove the GIMPLE_OMP_FOR. */
7724 gsi_remove (&gsi, true);
7726 /* Fixup edges from head_bb. */
7727 be = BRANCH_EDGE (head_bb);
7728 fte = FALLTHRU_EDGE (head_bb);
7729 be->flags |= EDGE_FALSE_VALUE;
7730 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7732 basic_block body_bb = fte->dest;
7734 if (gimple_in_ssa_p (cfun))
7736 gsi = gsi_last_nondebug_bb (cont_bb);
7737 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7739 offset = gimple_omp_continue_control_use (cont_stmt);
7740 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7742 else
7744 offset = create_tmp_var (diff_type, ".offset");
7745 offset_init = offset_incr = offset;
7747 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7749 /* Loop offset & bound go into head_bb. */
7750 gsi = gsi_start_bb (head_bb);
7752 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7753 build_int_cst (integer_type_node,
7754 IFN_GOACC_LOOP_OFFSET),
7755 dir, range, s,
7756 chunk_size, gwv, chunk_no);
7757 gimple_call_set_lhs (call, offset_init);
7758 gimple_set_location (call, loc);
7759 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7761 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7762 build_int_cst (integer_type_node,
7763 IFN_GOACC_LOOP_BOUND),
7764 dir, range, s,
7765 chunk_size, gwv, offset_init);
7766 gimple_call_set_lhs (call, bound);
7767 gimple_set_location (call, loc);
7768 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7770 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7771 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7772 GSI_CONTINUE_LINKING);
7774 /* V assignment goes into body_bb. */
7775 if (!gimple_in_ssa_p (cfun))
7777 gsi = gsi_start_bb (body_bb);
7779 expr = build2 (plus_code, iter_type, b,
7780 fold_convert (plus_type, offset));
7781 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7782 true, GSI_SAME_STMT);
7783 ass = gimple_build_assign (v, expr);
7784 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7786 if (fd->collapse > 1 || fd->tiling)
7787 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7789 if (fd->tiling)
7791 /* Determine the range of the element loop -- usually simply
7792 the tile_size, but could be smaller if the final
7793 iteration of the outer loop is a partial tile. */
7794 tree e_range = create_tmp_var (diff_type, ".e_range");
7796 expr = build2 (MIN_EXPR, diff_type,
7797 build2 (MINUS_EXPR, diff_type, bound, offset),
7798 build2 (MULT_EXPR, diff_type, tile_size,
7799 element_s));
7800 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7801 true, GSI_SAME_STMT);
7802 ass = gimple_build_assign (e_range, expr);
7803 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7805 /* Determine bound, offset & step of inner loop. */
7806 e_bound = create_tmp_var (diff_type, ".e_bound");
7807 e_offset = create_tmp_var (diff_type, ".e_offset");
7808 e_step = create_tmp_var (diff_type, ".e_step");
7810 /* Mark these as element loops. */
7811 tree t, e_gwv = integer_minus_one_node;
7812 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7814 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7815 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7816 element_s, chunk, e_gwv, chunk);
7817 gimple_call_set_lhs (call, e_offset);
7818 gimple_set_location (call, loc);
7819 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7821 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7822 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7823 element_s, chunk, e_gwv, e_offset);
7824 gimple_call_set_lhs (call, e_bound);
7825 gimple_set_location (call, loc);
7826 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7828 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7829 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7830 element_s, chunk, e_gwv);
7831 gimple_call_set_lhs (call, e_step);
7832 gimple_set_location (call, loc);
7833 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7835 /* Add test and split block. */
7836 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7837 stmt = gimple_build_cond_empty (expr);
7838 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7839 split = split_block (body_bb, stmt);
7840 elem_body_bb = split->dest;
7841 if (cont_bb == body_bb)
7842 cont_bb = elem_body_bb;
7843 body_bb = split->src;
7845 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7847 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7848 if (cont_bb == NULL)
7850 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7851 e->probability = profile_probability::even ();
7852 split->probability = profile_probability::even ();
7855 /* Initialize the user's loop vars. */
7856 gsi = gsi_start_bb (elem_body_bb);
7857 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7858 diff_type);
7862 /* Loop increment goes into cont_bb. If this is not a loop, we
7863 will have spawned threads as if it was, and each one will
7864 execute one iteration. The specification is not explicit about
7865 whether such constructs are ill-formed or not, and they can
7866 occur, especially when noreturn routines are involved. */
7867 if (cont_bb)
7869 gsi = gsi_last_nondebug_bb (cont_bb);
7870 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7871 loc = gimple_location (cont_stmt);
7873 if (fd->tiling)
7875 /* Insert element loop increment and test. */
7876 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7877 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7878 true, GSI_SAME_STMT);
7879 ass = gimple_build_assign (e_offset, expr);
7880 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7881 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7883 stmt = gimple_build_cond_empty (expr);
7884 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7885 split = split_block (cont_bb, stmt);
7886 elem_cont_bb = split->src;
7887 cont_bb = split->dest;
7889 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7890 split->probability = profile_probability::unlikely ().guessed ();
7891 edge latch_edge
7892 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7893 latch_edge->probability = profile_probability::likely ().guessed ();
7895 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7896 skip_edge->probability = profile_probability::unlikely ().guessed ();
7897 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7898 loop_entry_edge->probability
7899 = profile_probability::likely ().guessed ();
7901 gsi = gsi_for_stmt (cont_stmt);
7904 /* Increment offset. */
7905 if (gimple_in_ssa_p (cfun))
7906 expr = build2 (plus_code, iter_type, offset,
7907 fold_convert (plus_type, step));
7908 else
7909 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7910 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7911 true, GSI_SAME_STMT);
7912 ass = gimple_build_assign (offset_incr, expr);
7913 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7914 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7915 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7917 /* Remove the GIMPLE_OMP_CONTINUE. */
7918 gsi_remove (&gsi, true);
7920 /* Fixup edges from cont_bb. */
7921 be = BRANCH_EDGE (cont_bb);
7922 fte = FALLTHRU_EDGE (cont_bb);
7923 be->flags |= EDGE_TRUE_VALUE;
7924 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7926 if (chunking)
7928 /* Split the beginning of exit_bb to make bottom_bb. We
7929 need to insert a nop at the start, because splitting is
7930 after a stmt, not before. */
7931 gsi = gsi_start_bb (exit_bb);
7932 stmt = gimple_build_nop ();
7933 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7934 split = split_block (exit_bb, stmt);
7935 bottom_bb = split->src;
7936 exit_bb = split->dest;
7937 gsi = gsi_last_bb (bottom_bb);
7939 /* Chunk increment and test goes into bottom_bb. */
7940 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7941 build_int_cst (diff_type, 1));
7942 ass = gimple_build_assign (chunk_no, expr);
7943 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7945 /* Chunk test at end of bottom_bb. */
7946 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7947 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7948 GSI_CONTINUE_LINKING);
7950 /* Fixup edges from bottom_bb. */
7951 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7952 split->probability = profile_probability::unlikely ().guessed ();
7953 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7954 latch_edge->probability = profile_probability::likely ().guessed ();
7958 gsi = gsi_last_nondebug_bb (exit_bb);
7959 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7960 loc = gimple_location (gsi_stmt (gsi));
7962 if (!gimple_in_ssa_p (cfun))
7964 /* Insert the final value of V, in case it is live. This is the
7965 value for the only thread that survives past the join. */
7966 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7967 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7968 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7969 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7970 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7971 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7972 true, GSI_SAME_STMT);
7973 ass = gimple_build_assign (v, expr);
7974 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7977 /* Remove the OMP_RETURN. */
7978 gsi_remove (&gsi, true);
7980 if (cont_bb)
7982 /* We now have one, two or three nested loops. Update the loop
7983 structures. */
7984 class loop *parent = entry_bb->loop_father;
7985 class loop *body = body_bb->loop_father;
7987 if (chunking)
7989 class loop *chunk_loop = alloc_loop ();
7990 chunk_loop->header = head_bb;
7991 chunk_loop->latch = bottom_bb;
7992 add_loop (chunk_loop, parent);
7993 parent = chunk_loop;
7995 else if (parent != body)
7997 gcc_assert (body->header == body_bb);
7998 gcc_assert (body->latch == cont_bb
7999 || single_pred (body->latch) == cont_bb);
8000 parent = NULL;
8003 if (parent)
8005 class loop *body_loop = alloc_loop ();
8006 body_loop->header = body_bb;
8007 body_loop->latch = cont_bb;
8008 add_loop (body_loop, parent);
8010 if (fd->tiling)
8012 /* Insert tiling's element loop. */
8013 class loop *inner_loop = alloc_loop ();
8014 inner_loop->header = elem_body_bb;
8015 inner_loop->latch = elem_cont_bb;
8016 add_loop (inner_loop, body_loop);
8022 /* Expand the OMP loop defined by REGION. */
8024 static void
8025 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8027 struct omp_for_data fd;
8028 struct omp_for_data_loop *loops;
8030 loops = XALLOCAVEC (struct omp_for_data_loop,
8031 gimple_omp_for_collapse (last_stmt (region->entry)));
8032 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8033 &fd, loops);
8034 region->sched_kind = fd.sched_kind;
8035 region->sched_modifiers = fd.sched_modifiers;
8036 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8037 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8039 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8040 if ((loops[i].m1 || loops[i].m2)
8041 && (loops[i].m1 == NULL_TREE
8042 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8043 && (loops[i].m2 == NULL_TREE
8044 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8045 && TREE_CODE (loops[i].step) == INTEGER_CST
8046 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8048 tree t;
8049 tree itype = TREE_TYPE (loops[i].v);
8050 if (loops[i].m1 && loops[i].m2)
8051 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8052 else if (loops[i].m1)
8053 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8054 else
8055 t = loops[i].m2;
8056 t = fold_build2 (MULT_EXPR, itype, t,
8057 fold_convert (itype,
8058 loops[i - loops[i].outer].step));
8059 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8060 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8061 fold_build1 (NEGATE_EXPR, itype, t),
8062 fold_build1 (NEGATE_EXPR, itype,
8063 fold_convert (itype,
8064 loops[i].step)));
8065 else
8066 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8067 fold_convert (itype, loops[i].step));
8068 if (integer_nonzerop (t))
8069 error_at (gimple_location (fd.for_stmt),
8070 "invalid OpenMP non-rectangular loop step; "
8071 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8072 "step %qE",
8073 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8074 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8075 loops[i - loops[i].outer].step, i + 1,
8076 loops[i].step);
8080 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8081 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8082 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8083 if (region->cont)
8085 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8086 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8087 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8089 else
8090 /* If there isn't a continue then this is a degerate case where
8091 the introduction of abnormal edges during lowering will prevent
8092 original loops from being detected. Fix that up. */
8093 loops_state_set (LOOPS_NEED_FIXUP);
8095 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8096 expand_omp_simd (region, &fd);
8097 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8099 gcc_assert (!inner_stmt && !fd.non_rect);
8100 expand_oacc_for (region, &fd);
8102 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8104 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8105 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8106 else
8107 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8109 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8110 && !fd.have_ordered)
8112 if (fd.chunk_size == NULL)
8113 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8114 else
8115 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8117 else
8119 int fn_index, start_ix, next_ix;
8120 unsigned HOST_WIDE_INT sched = 0;
8121 tree sched_arg = NULL_TREE;
8123 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8124 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8125 if (fd.chunk_size == NULL
8126 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8127 fd.chunk_size = integer_zero_node;
8128 switch (fd.sched_kind)
8130 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8131 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8132 && fd.lastprivate_conditional == 0)
8134 gcc_assert (!fd.have_ordered);
8135 fn_index = 6;
8136 sched = 4;
8138 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8139 && !fd.have_ordered
8140 && fd.lastprivate_conditional == 0)
8141 fn_index = 7;
8142 else
8144 fn_index = 3;
8145 sched = (HOST_WIDE_INT_1U << 31);
8147 break;
8148 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8149 case OMP_CLAUSE_SCHEDULE_GUIDED:
8150 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8151 && !fd.have_ordered
8152 && fd.lastprivate_conditional == 0)
8154 fn_index = 3 + fd.sched_kind;
8155 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8156 break;
8158 fn_index = fd.sched_kind;
8159 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8160 sched += (HOST_WIDE_INT_1U << 31);
8161 break;
8162 case OMP_CLAUSE_SCHEDULE_STATIC:
8163 gcc_assert (fd.have_ordered);
8164 fn_index = 0;
8165 sched = (HOST_WIDE_INT_1U << 31) + 1;
8166 break;
8167 default:
8168 gcc_unreachable ();
8170 if (!fd.ordered)
8171 fn_index += fd.have_ordered * 8;
8172 if (fd.ordered)
8173 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8174 else
8175 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8176 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8177 if (fd.have_reductemp || fd.have_pointer_condtemp)
8179 if (fd.ordered)
8180 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8181 else if (fd.have_ordered)
8182 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8183 else
8184 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8185 sched_arg = build_int_cstu (long_integer_type_node, sched);
8186 if (!fd.chunk_size)
8187 fd.chunk_size = integer_zero_node;
8189 if (fd.iter_type == long_long_unsigned_type_node)
8191 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8192 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8193 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8194 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8196 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8197 (enum built_in_function) next_ix, sched_arg,
8198 inner_stmt);
8201 if (gimple_in_ssa_p (cfun))
8202 update_ssa (TODO_update_ssa_only_virtuals);
8205 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8207 v = GOMP_sections_start (n);
8209 switch (v)
8211 case 0:
8212 goto L2;
8213 case 1:
8214 section 1;
8215 goto L1;
8216 case 2:
8218 case n:
8220 default:
8221 abort ();
8224 v = GOMP_sections_next ();
8225 goto L0;
8227 reduction;
8229 If this is a combined parallel sections, replace the call to
8230 GOMP_sections_start with call to GOMP_sections_next. */
8232 static void
8233 expand_omp_sections (struct omp_region *region)
8235 tree t, u, vin = NULL, vmain, vnext, l2;
8236 unsigned len;
8237 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8238 gimple_stmt_iterator si, switch_si;
8239 gomp_sections *sections_stmt;
8240 gimple *stmt;
8241 gomp_continue *cont;
8242 edge_iterator ei;
8243 edge e;
8244 struct omp_region *inner;
8245 unsigned i, casei;
8246 bool exit_reachable = region->cont != NULL;
8248 gcc_assert (region->exit != NULL);
8249 entry_bb = region->entry;
8250 l0_bb = single_succ (entry_bb);
8251 l1_bb = region->cont;
8252 l2_bb = region->exit;
8253 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8254 l2 = gimple_block_label (l2_bb);
8255 else
8257 /* This can happen if there are reductions. */
8258 len = EDGE_COUNT (l0_bb->succs);
8259 gcc_assert (len > 0);
8260 e = EDGE_SUCC (l0_bb, len - 1);
8261 si = gsi_last_nondebug_bb (e->dest);
8262 l2 = NULL_TREE;
8263 if (gsi_end_p (si)
8264 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8265 l2 = gimple_block_label (e->dest);
8266 else
8267 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8269 si = gsi_last_nondebug_bb (e->dest);
8270 if (gsi_end_p (si)
8271 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8273 l2 = gimple_block_label (e->dest);
8274 break;
8278 if (exit_reachable)
8279 default_bb = create_empty_bb (l1_bb->prev_bb);
8280 else
8281 default_bb = create_empty_bb (l0_bb);
8283 /* We will build a switch() with enough cases for all the
8284 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8285 and a default case to abort if something goes wrong. */
8286 len = EDGE_COUNT (l0_bb->succs);
8288 /* Use vec::quick_push on label_vec throughout, since we know the size
8289 in advance. */
8290 auto_vec<tree> label_vec (len);
8292 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8293 GIMPLE_OMP_SECTIONS statement. */
8294 si = gsi_last_nondebug_bb (entry_bb);
8295 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8296 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8297 vin = gimple_omp_sections_control (sections_stmt);
8298 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8299 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8300 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8301 tree cond_var = NULL_TREE;
8302 if (reductmp || condtmp)
8304 tree reductions = null_pointer_node, mem = null_pointer_node;
8305 tree memv = NULL_TREE, condtemp = NULL_TREE;
8306 gimple_stmt_iterator gsi = gsi_none ();
8307 gimple *g = NULL;
8308 if (reductmp)
8310 reductions = OMP_CLAUSE_DECL (reductmp);
8311 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8312 g = SSA_NAME_DEF_STMT (reductions);
8313 reductions = gimple_assign_rhs1 (g);
8314 OMP_CLAUSE_DECL (reductmp) = reductions;
8315 gsi = gsi_for_stmt (g);
8317 else
8318 gsi = si;
8319 if (condtmp)
8321 condtemp = OMP_CLAUSE_DECL (condtmp);
8322 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8323 OMP_CLAUSE__CONDTEMP_);
8324 cond_var = OMP_CLAUSE_DECL (c);
8325 tree type = TREE_TYPE (condtemp);
8326 memv = create_tmp_var (type);
8327 TREE_ADDRESSABLE (memv) = 1;
8328 unsigned cnt = 0;
8329 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8330 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8331 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8332 ++cnt;
8333 unsigned HOST_WIDE_INT sz
8334 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8335 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8336 false);
8337 mem = build_fold_addr_expr (memv);
8339 t = build_int_cst (unsigned_type_node, len - 1);
8340 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8341 stmt = gimple_build_call (u, 3, t, reductions, mem);
8342 gimple_call_set_lhs (stmt, vin);
8343 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8344 if (condtmp)
8346 expand_omp_build_assign (&gsi, condtemp, memv, false);
8347 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8348 vin, build_one_cst (TREE_TYPE (cond_var)));
8349 expand_omp_build_assign (&gsi, cond_var, t, false);
8351 if (reductmp)
8353 gsi_remove (&gsi, true);
8354 release_ssa_name (gimple_assign_lhs (g));
8357 else if (!is_combined_parallel (region))
8359 /* If we are not inside a combined parallel+sections region,
8360 call GOMP_sections_start. */
8361 t = build_int_cst (unsigned_type_node, len - 1);
8362 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8363 stmt = gimple_build_call (u, 1, t);
8365 else
8367 /* Otherwise, call GOMP_sections_next. */
8368 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8369 stmt = gimple_build_call (u, 0);
8371 if (!reductmp && !condtmp)
8373 gimple_call_set_lhs (stmt, vin);
8374 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8376 gsi_remove (&si, true);
8378 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8379 L0_BB. */
8380 switch_si = gsi_last_nondebug_bb (l0_bb);
8381 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8382 if (exit_reachable)
8384 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8385 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8386 vmain = gimple_omp_continue_control_use (cont);
8387 vnext = gimple_omp_continue_control_def (cont);
8389 else
8391 vmain = vin;
8392 vnext = NULL_TREE;
8395 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8396 label_vec.quick_push (t);
8397 i = 1;
8399 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8400 for (inner = region->inner, casei = 1;
8401 inner;
8402 inner = inner->next, i++, casei++)
8404 basic_block s_entry_bb, s_exit_bb;
8406 /* Skip optional reduction region. */
8407 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8409 --i;
8410 --casei;
8411 continue;
8414 s_entry_bb = inner->entry;
8415 s_exit_bb = inner->exit;
8417 t = gimple_block_label (s_entry_bb);
8418 u = build_int_cst (unsigned_type_node, casei);
8419 u = build_case_label (u, NULL, t);
8420 label_vec.quick_push (u);
8422 si = gsi_last_nondebug_bb (s_entry_bb);
8423 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8424 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8425 gsi_remove (&si, true);
8426 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8428 if (s_exit_bb == NULL)
8429 continue;
8431 si = gsi_last_nondebug_bb (s_exit_bb);
8432 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8433 gsi_remove (&si, true);
8435 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8438 /* Error handling code goes in DEFAULT_BB. */
8439 t = gimple_block_label (default_bb);
8440 u = build_case_label (NULL, NULL, t);
8441 make_edge (l0_bb, default_bb, 0);
8442 add_bb_to_loop (default_bb, current_loops->tree_root);
8444 stmt = gimple_build_switch (vmain, u, label_vec);
8445 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8446 gsi_remove (&switch_si, true);
8448 si = gsi_start_bb (default_bb);
8449 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8450 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8452 if (exit_reachable)
8454 tree bfn_decl;
8456 /* Code to get the next section goes in L1_BB. */
8457 si = gsi_last_nondebug_bb (l1_bb);
8458 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8460 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8461 stmt = gimple_build_call (bfn_decl, 0);
8462 gimple_call_set_lhs (stmt, vnext);
8463 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8464 if (cond_var)
8466 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8467 vnext, build_one_cst (TREE_TYPE (cond_var)));
8468 expand_omp_build_assign (&si, cond_var, t, false);
8470 gsi_remove (&si, true);
8472 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8475 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8476 si = gsi_last_nondebug_bb (l2_bb);
8477 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8478 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8479 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8480 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8481 else
8482 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8483 stmt = gimple_build_call (t, 0);
8484 if (gimple_omp_return_lhs (gsi_stmt (si)))
8485 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8486 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8487 gsi_remove (&si, true);
8489 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8492 /* Expand code for an OpenMP single or scope directive. We've already expanded
8493 much of the code, here we simply place the GOMP_barrier call. */
8495 static void
8496 expand_omp_single (struct omp_region *region)
8498 basic_block entry_bb, exit_bb;
8499 gimple_stmt_iterator si;
8501 entry_bb = region->entry;
8502 exit_bb = region->exit;
8504 si = gsi_last_nondebug_bb (entry_bb);
8505 enum gimple_code code = gimple_code (gsi_stmt (si));
8506 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8507 gsi_remove (&si, true);
8508 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8510 if (exit_bb == NULL)
8512 gcc_assert (code == GIMPLE_OMP_SCOPE);
8513 return;
8516 si = gsi_last_nondebug_bb (exit_bb);
8517 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8519 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8520 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8522 gsi_remove (&si, true);
8523 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8526 /* Generic expansion for OpenMP synchronization directives: master,
8527 ordered and critical. All we need to do here is remove the entry
8528 and exit markers for REGION. */
8530 static void
8531 expand_omp_synch (struct omp_region *region)
8533 basic_block entry_bb, exit_bb;
8534 gimple_stmt_iterator si;
8536 entry_bb = region->entry;
8537 exit_bb = region->exit;
8539 si = gsi_last_nondebug_bb (entry_bb);
8540 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8541 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8542 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8543 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8544 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8545 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8546 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8547 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8548 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8550 expand_omp_taskreg (region);
8551 return;
8553 gsi_remove (&si, true);
8554 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8556 if (exit_bb)
8558 si = gsi_last_nondebug_bb (exit_bb);
8559 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8560 gsi_remove (&si, true);
8561 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8565 /* Translate enum omp_memory_order to enum memmodel for the embedded
8566 fail clause in there. */
8568 static enum memmodel
8569 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8571 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8573 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8574 switch (mo & OMP_MEMORY_ORDER_MASK)
8576 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8577 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8578 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8579 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8580 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8581 default: break;
8583 gcc_unreachable ();
8584 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8585 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8586 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8587 default: gcc_unreachable ();
8591 /* Translate enum omp_memory_order to enum memmodel. The two enums
8592 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8593 is 0 and omp_memory_order has the fail mode encoded in it too. */
8595 static enum memmodel
8596 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8598 enum memmodel ret, fail_ret;
8599 switch (mo & OMP_MEMORY_ORDER_MASK)
8601 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8602 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8603 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8604 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8605 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8606 default: gcc_unreachable ();
8608 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8609 we can just return ret here unconditionally. Otherwise, work around
8610 it here and make sure fail memmodel is not stronger. */
8611 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8612 return ret;
8613 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8614 if (fail_ret > ret)
8615 return fail_ret;
8616 return ret;
8619 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8620 operation as a normal volatile load. */
8622 static bool
8623 expand_omp_atomic_load (basic_block load_bb, tree addr,
8624 tree loaded_val, int index)
8626 enum built_in_function tmpbase;
8627 gimple_stmt_iterator gsi;
8628 basic_block store_bb;
8629 location_t loc;
8630 gimple *stmt;
8631 tree decl, call, type, itype;
8633 gsi = gsi_last_nondebug_bb (load_bb);
8634 stmt = gsi_stmt (gsi);
8635 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8636 loc = gimple_location (stmt);
8638 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8639 is smaller than word size, then expand_atomic_load assumes that the load
8640 is atomic. We could avoid the builtin entirely in this case. */
8642 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8643 decl = builtin_decl_explicit (tmpbase);
8644 if (decl == NULL_TREE)
8645 return false;
8647 type = TREE_TYPE (loaded_val);
8648 itype = TREE_TYPE (TREE_TYPE (decl));
8650 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8651 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8652 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8653 if (!useless_type_conversion_p (type, itype))
8654 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8655 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8657 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8658 gsi_remove (&gsi, true);
8660 store_bb = single_succ (load_bb);
8661 gsi = gsi_last_nondebug_bb (store_bb);
8662 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8663 gsi_remove (&gsi, true);
8665 if (gimple_in_ssa_p (cfun))
8666 update_ssa (TODO_update_ssa_no_phi);
8668 return true;
8671 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8672 operation as a normal volatile store. */
8674 static bool
8675 expand_omp_atomic_store (basic_block load_bb, tree addr,
8676 tree loaded_val, tree stored_val, int index)
8678 enum built_in_function tmpbase;
8679 gimple_stmt_iterator gsi;
8680 basic_block store_bb = single_succ (load_bb);
8681 location_t loc;
8682 gimple *stmt;
8683 tree decl, call, type, itype;
8684 machine_mode imode;
8685 bool exchange;
8687 gsi = gsi_last_nondebug_bb (load_bb);
8688 stmt = gsi_stmt (gsi);
8689 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8691 /* If the load value is needed, then this isn't a store but an exchange. */
8692 exchange = gimple_omp_atomic_need_value_p (stmt);
8694 gsi = gsi_last_nondebug_bb (store_bb);
8695 stmt = gsi_stmt (gsi);
8696 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8697 loc = gimple_location (stmt);
8699 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8700 is smaller than word size, then expand_atomic_store assumes that the store
8701 is atomic. We could avoid the builtin entirely in this case. */
8703 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8704 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8705 decl = builtin_decl_explicit (tmpbase);
8706 if (decl == NULL_TREE)
8707 return false;
8709 type = TREE_TYPE (stored_val);
8711 /* Dig out the type of the function's second argument. */
8712 itype = TREE_TYPE (decl);
8713 itype = TYPE_ARG_TYPES (itype);
8714 itype = TREE_CHAIN (itype);
8715 itype = TREE_VALUE (itype);
8716 imode = TYPE_MODE (itype);
8718 if (exchange && !can_atomic_exchange_p (imode, true))
8719 return false;
8721 if (!useless_type_conversion_p (itype, type))
8722 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8723 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8724 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8725 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8726 if (exchange)
8728 if (!useless_type_conversion_p (type, itype))
8729 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8730 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8733 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8734 gsi_remove (&gsi, true);
8736 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8737 gsi = gsi_last_nondebug_bb (load_bb);
8738 gsi_remove (&gsi, true);
8740 if (gimple_in_ssa_p (cfun))
8741 update_ssa (TODO_update_ssa_no_phi);
8743 return true;
8746 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8747 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8748 size of the data type, and thus usable to find the index of the builtin
8749 decl. Returns false if the expression is not of the proper form. */
8751 static bool
8752 expand_omp_atomic_fetch_op (basic_block load_bb,
8753 tree addr, tree loaded_val,
8754 tree stored_val, int index)
8756 enum built_in_function oldbase, newbase, tmpbase;
8757 tree decl, itype, call;
8758 tree lhs, rhs;
8759 basic_block store_bb = single_succ (load_bb);
8760 gimple_stmt_iterator gsi;
8761 gimple *stmt;
8762 location_t loc;
8763 enum tree_code code;
8764 bool need_old, need_new;
8765 machine_mode imode;
8767 /* We expect to find the following sequences:
8769 load_bb:
8770 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8772 store_bb:
8773 val = tmp OP something; (or: something OP tmp)
8774 GIMPLE_OMP_STORE (val)
8776 ???FIXME: Allow a more flexible sequence.
8777 Perhaps use data flow to pick the statements.
8781 gsi = gsi_after_labels (store_bb);
8782 stmt = gsi_stmt (gsi);
8783 if (is_gimple_debug (stmt))
8785 gsi_next_nondebug (&gsi);
8786 if (gsi_end_p (gsi))
8787 return false;
8788 stmt = gsi_stmt (gsi);
8790 loc = gimple_location (stmt);
8791 if (!is_gimple_assign (stmt))
8792 return false;
8793 gsi_next_nondebug (&gsi);
8794 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8795 return false;
8796 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8797 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8798 enum omp_memory_order omo
8799 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8800 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8801 gcc_checking_assert (!need_old || !need_new);
8803 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8804 return false;
8806 /* Check for one of the supported fetch-op operations. */
8807 code = gimple_assign_rhs_code (stmt);
8808 switch (code)
8810 case PLUS_EXPR:
8811 case POINTER_PLUS_EXPR:
8812 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8813 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8814 break;
8815 case MINUS_EXPR:
8816 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8817 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8818 break;
8819 case BIT_AND_EXPR:
8820 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8821 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8822 break;
8823 case BIT_IOR_EXPR:
8824 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8825 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8826 break;
8827 case BIT_XOR_EXPR:
8828 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8829 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8830 break;
8831 default:
8832 return false;
8835 /* Make sure the expression is of the proper form. */
8836 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8837 rhs = gimple_assign_rhs2 (stmt);
8838 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8839 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8840 rhs = gimple_assign_rhs1 (stmt);
8841 else
8842 return false;
8844 tmpbase = ((enum built_in_function)
8845 ((need_new ? newbase : oldbase) + index + 1));
8846 decl = builtin_decl_explicit (tmpbase);
8847 if (decl == NULL_TREE)
8848 return false;
8849 itype = TREE_TYPE (TREE_TYPE (decl));
8850 imode = TYPE_MODE (itype);
8852 /* We could test all of the various optabs involved, but the fact of the
8853 matter is that (with the exception of i486 vs i586 and xadd) all targets
8854 that support any atomic operaton optab also implements compare-and-swap.
8855 Let optabs.cc take care of expanding any compare-and-swap loop. */
8856 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8857 return false;
8859 gsi = gsi_last_nondebug_bb (load_bb);
8860 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8862 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8863 It only requires that the operation happen atomically. Thus we can
8864 use the RELAXED memory model. */
8865 call = build_call_expr_loc (loc, decl, 3, addr,
8866 fold_convert_loc (loc, itype, rhs),
8867 build_int_cst (NULL, mo));
8869 if (need_old || need_new)
8871 lhs = need_old ? loaded_val : stored_val;
8872 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8873 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8875 else
8876 call = fold_convert_loc (loc, void_type_node, call);
8877 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8878 gsi_remove (&gsi, true);
8880 gsi = gsi_last_nondebug_bb (store_bb);
8881 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8882 gsi_remove (&gsi, true);
8883 gsi = gsi_last_nondebug_bb (store_bb);
8884 stmt = gsi_stmt (gsi);
8885 gsi_remove (&gsi, true);
8887 if (gimple_in_ssa_p (cfun))
8889 release_defs (stmt);
8890 update_ssa (TODO_update_ssa_no_phi);
8893 return true;
8896 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8897 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8898 Returns false if the expression is not of the proper form. */
8900 static bool
8901 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8902 tree loaded_val, tree stored_val, int index)
8904 /* We expect to find the following sequences:
8906 load_bb:
8907 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8909 store_bb:
8910 val = tmp == e ? d : tmp;
8911 GIMPLE_OMP_ATOMIC_STORE (val)
8913 or in store_bb instead:
8914 tmp2 = tmp == e;
8915 val = tmp2 ? d : tmp;
8916 GIMPLE_OMP_ATOMIC_STORE (val)
8919 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8920 val = e == tmp3 ? d : tmp;
8921 GIMPLE_OMP_ATOMIC_STORE (val)
8923 etc. */
8926 basic_block store_bb = single_succ (load_bb);
8927 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8928 gimple *store_stmt = gsi_stmt (gsi);
8929 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8930 return false;
8931 gsi_prev_nondebug (&gsi);
8932 if (gsi_end_p (gsi))
8933 return false;
8934 gimple *condexpr_stmt = gsi_stmt (gsi);
8935 if (!is_gimple_assign (condexpr_stmt)
8936 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8937 return false;
8938 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8939 return false;
8940 gimple *cond_stmt = NULL;
8941 gimple *vce_stmt = NULL;
8942 gsi_prev_nondebug (&gsi);
8943 if (!gsi_end_p (gsi))
8945 cond_stmt = gsi_stmt (gsi);
8946 if (!is_gimple_assign (cond_stmt))
8947 return false;
8948 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8950 gsi_prev_nondebug (&gsi);
8951 if (!gsi_end_p (gsi))
8953 vce_stmt = gsi_stmt (gsi);
8954 if (!is_gimple_assign (vce_stmt)
8955 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8956 return false;
8959 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8960 std::swap (vce_stmt, cond_stmt);
8961 else
8962 return false;
8963 if (vce_stmt)
8965 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8966 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8967 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8968 return false;
8969 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8970 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8971 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8972 TYPE_SIZE (TREE_TYPE (loaded_val))))
8973 return false;
8974 gsi_prev_nondebug (&gsi);
8975 if (!gsi_end_p (gsi))
8976 return false;
8979 tree cond = gimple_assign_rhs1 (condexpr_stmt);
8980 tree cond_op1, cond_op2;
8981 if (cond_stmt)
8983 /* We should now always get a separate cond_stmt. */
8984 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8985 return false;
8986 cond_op1 = gimple_assign_rhs1 (cond_stmt);
8987 cond_op2 = gimple_assign_rhs2 (cond_stmt);
8989 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
8990 return false;
8991 else
8993 cond_op1 = TREE_OPERAND (cond, 0);
8994 cond_op2 = TREE_OPERAND (cond, 1);
8996 tree d;
8997 if (TREE_CODE (cond) == NE_EXPR)
8999 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9000 return false;
9001 d = gimple_assign_rhs3 (condexpr_stmt);
9003 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9004 return false;
9005 else
9006 d = gimple_assign_rhs2 (condexpr_stmt);
9007 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9008 if (operand_equal_p (e, cond_op1))
9009 e = cond_op2;
9010 else if (operand_equal_p (e, cond_op2))
9011 e = cond_op1;
9012 else
9013 return false;
9015 location_t loc = gimple_location (store_stmt);
9016 gimple *load_stmt = last_stmt (load_bb);
9017 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9018 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9019 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9020 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9021 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9022 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9023 gcc_checking_assert (!need_old || !need_new);
9025 enum built_in_function fncode
9026 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9027 + index + 1);
9028 tree cmpxchg = builtin_decl_explicit (fncode);
9029 if (cmpxchg == NULL_TREE)
9030 return false;
9031 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9033 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9034 || !can_atomic_load_p (TYPE_MODE (itype)))
9035 return false;
9037 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9038 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9039 return false;
9041 gsi = gsi_for_stmt (store_stmt);
9042 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9044 tree ne = create_tmp_reg (itype);
9045 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9046 gimple_set_location (g, loc);
9047 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9048 e = ne;
9050 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9052 tree nd = create_tmp_reg (itype);
9053 enum tree_code code;
9054 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9056 code = VIEW_CONVERT_EXPR;
9057 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9059 else
9060 code = NOP_EXPR;
9061 gimple *g = gimple_build_assign (nd, code, d);
9062 gimple_set_location (g, loc);
9063 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9064 d = nd;
9067 tree ctype = build_complex_type (itype);
9068 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9069 gimple *g
9070 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9071 build_int_cst (integer_type_node, flag),
9072 mo, fmo);
9073 tree cres = create_tmp_reg (ctype);
9074 gimple_call_set_lhs (g, cres);
9075 gimple_set_location (g, loc);
9076 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9078 if (cond_stmt || need_old || need_new)
9080 tree im = create_tmp_reg (itype);
9081 g = gimple_build_assign (im, IMAGPART_EXPR,
9082 build1 (IMAGPART_EXPR, itype, cres));
9083 gimple_set_location (g, loc);
9084 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9086 tree re = NULL_TREE;
9087 if (need_old || need_new)
9089 re = create_tmp_reg (itype);
9090 g = gimple_build_assign (re, REALPART_EXPR,
9091 build1 (REALPART_EXPR, itype, cres));
9092 gimple_set_location (g, loc);
9093 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9096 if (cond_stmt)
9098 g = gimple_build_assign (cond, NOP_EXPR, im);
9099 gimple_set_location (g, loc);
9100 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9103 if (need_new)
9105 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9106 cond_stmt
9107 ? cond : build2 (NE_EXPR, boolean_type_node,
9108 im, build_zero_cst (itype)),
9109 d, re);
9110 gimple_set_location (g, loc);
9111 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9112 re = gimple_assign_lhs (g);
9115 if (need_old || need_new)
9117 tree v = need_old ? loaded_val : stored_val;
9118 enum tree_code code;
9119 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9121 code = VIEW_CONVERT_EXPR;
9122 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9124 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9125 code = NOP_EXPR;
9126 else
9127 code = TREE_CODE (re);
9128 g = gimple_build_assign (v, code, re);
9129 gimple_set_location (g, loc);
9130 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9134 gsi_remove (&gsi, true);
9135 gsi = gsi_for_stmt (load_stmt);
9136 gsi_remove (&gsi, true);
9137 gsi = gsi_for_stmt (condexpr_stmt);
9138 gsi_remove (&gsi, true);
9139 if (cond_stmt)
9141 gsi = gsi_for_stmt (cond_stmt);
9142 gsi_remove (&gsi, true);
9144 if (vce_stmt)
9146 gsi = gsi_for_stmt (vce_stmt);
9147 gsi_remove (&gsi, true);
9150 return true;
9153 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9155 oldval = *addr;
9156 repeat:
9157 newval = rhs; // with oldval replacing *addr in rhs
9158 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9159 if (oldval != newval)
9160 goto repeat;
9162 INDEX is log2 of the size of the data type, and thus usable to find the
9163 index of the builtin decl. */
9165 static bool
9166 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9167 tree addr, tree loaded_val, tree stored_val,
9168 int index)
9170 tree loadedi, storedi, initial, new_storedi, old_vali;
9171 tree type, itype, cmpxchg, iaddr, atype;
9172 gimple_stmt_iterator si;
9173 basic_block loop_header = single_succ (load_bb);
9174 gimple *phi, *stmt;
9175 edge e;
9176 enum built_in_function fncode;
9178 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9179 + index + 1);
9180 cmpxchg = builtin_decl_explicit (fncode);
9181 if (cmpxchg == NULL_TREE)
9182 return false;
9183 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9184 atype = type;
9185 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9187 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9188 || !can_atomic_load_p (TYPE_MODE (itype)))
9189 return false;
9191 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9192 si = gsi_last_nondebug_bb (load_bb);
9193 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9194 location_t loc = gimple_location (gsi_stmt (si));
9195 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9196 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9197 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9199 /* For floating-point values, we'll need to view-convert them to integers
9200 so that we can perform the atomic compare and swap. Simplify the
9201 following code by always setting up the "i"ntegral variables. */
9202 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9204 tree iaddr_val;
9206 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9207 true));
9208 atype = itype;
9209 iaddr_val
9210 = force_gimple_operand_gsi (&si,
9211 fold_convert (TREE_TYPE (iaddr), addr),
9212 false, NULL_TREE, true, GSI_SAME_STMT);
9213 stmt = gimple_build_assign (iaddr, iaddr_val);
9214 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9215 loadedi = create_tmp_var (itype);
9216 if (gimple_in_ssa_p (cfun))
9217 loadedi = make_ssa_name (loadedi);
9219 else
9221 iaddr = addr;
9222 loadedi = loaded_val;
9225 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9226 tree loaddecl = builtin_decl_explicit (fncode);
9227 if (loaddecl)
9228 initial
9229 = fold_convert (atype,
9230 build_call_expr (loaddecl, 2, iaddr,
9231 build_int_cst (NULL_TREE,
9232 MEMMODEL_RELAXED)));
9233 else
9235 tree off
9236 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9237 true), 0);
9238 initial = build2 (MEM_REF, atype, iaddr, off);
9241 initial
9242 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9243 GSI_SAME_STMT);
9245 /* Move the value to the LOADEDI temporary. */
9246 if (gimple_in_ssa_p (cfun))
9248 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9249 phi = create_phi_node (loadedi, loop_header);
9250 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9251 initial);
9253 else
9254 gsi_insert_before (&si,
9255 gimple_build_assign (loadedi, initial),
9256 GSI_SAME_STMT);
9257 if (loadedi != loaded_val)
9259 gimple_stmt_iterator gsi2;
9260 tree x;
9262 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9263 gsi2 = gsi_start_bb (loop_header);
9264 if (gimple_in_ssa_p (cfun))
9266 gassign *stmt;
9267 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9268 true, GSI_SAME_STMT);
9269 stmt = gimple_build_assign (loaded_val, x);
9270 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9272 else
9274 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9275 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9276 true, GSI_SAME_STMT);
9279 gsi_remove (&si, true);
9281 si = gsi_last_nondebug_bb (store_bb);
9282 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9284 if (iaddr == addr)
9285 storedi = stored_val;
9286 else
9287 storedi
9288 = force_gimple_operand_gsi (&si,
9289 build1 (VIEW_CONVERT_EXPR, itype,
9290 stored_val), true, NULL_TREE, true,
9291 GSI_SAME_STMT);
9293 /* Build the compare&swap statement. */
9294 tree ctype = build_complex_type (itype);
9295 int flag = int_size_in_bytes (itype);
9296 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9297 ctype, 6, iaddr, loadedi,
9298 storedi,
9299 build_int_cst (integer_type_node,
9300 flag),
9301 mo, fmo);
9302 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9303 new_storedi = force_gimple_operand_gsi (&si,
9304 fold_convert (TREE_TYPE (loadedi),
9305 new_storedi),
9306 true, NULL_TREE,
9307 true, GSI_SAME_STMT);
9309 if (gimple_in_ssa_p (cfun))
9310 old_vali = loadedi;
9311 else
9313 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9314 stmt = gimple_build_assign (old_vali, loadedi);
9315 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9317 stmt = gimple_build_assign (loadedi, new_storedi);
9318 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9321 /* Note that we always perform the comparison as an integer, even for
9322 floating point. This allows the atomic operation to properly
9323 succeed even with NaNs and -0.0. */
9324 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9325 stmt = gimple_build_cond_empty (ne);
9326 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9328 /* Update cfg. */
9329 e = single_succ_edge (store_bb);
9330 e->flags &= ~EDGE_FALLTHRU;
9331 e->flags |= EDGE_FALSE_VALUE;
9332 /* Expect no looping. */
9333 e->probability = profile_probability::guessed_always ();
9335 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9336 e->probability = profile_probability::guessed_never ();
9338 /* Copy the new value to loadedi (we already did that before the condition
9339 if we are not in SSA). */
9340 if (gimple_in_ssa_p (cfun))
9342 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9343 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9346 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9347 gsi_remove (&si, true);
9349 class loop *loop = alloc_loop ();
9350 loop->header = loop_header;
9351 loop->latch = store_bb;
9352 add_loop (loop, loop_header->loop_father);
9354 if (gimple_in_ssa_p (cfun))
9355 update_ssa (TODO_update_ssa_no_phi);
9357 return true;
9360 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9362 GOMP_atomic_start ();
9363 *addr = rhs;
9364 GOMP_atomic_end ();
9366 The result is not globally atomic, but works so long as all parallel
9367 references are within #pragma omp atomic directives. According to
9368 responses received from omp@openmp.org, appears to be within spec.
9369 Which makes sense, since that's how several other compilers handle
9370 this situation as well.
9371 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9372 expanding. STORED_VAL is the operand of the matching
9373 GIMPLE_OMP_ATOMIC_STORE.
9375 We replace
9376 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9377 loaded_val = *addr;
9379 and replace
9380 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9381 *addr = stored_val;
9384 static bool
9385 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9386 tree addr, tree loaded_val, tree stored_val)
9388 gimple_stmt_iterator si;
9389 gassign *stmt;
9390 tree t;
9392 si = gsi_last_nondebug_bb (load_bb);
9393 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9395 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9396 t = build_call_expr (t, 0);
9397 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9399 tree mem = build_simple_mem_ref (addr);
9400 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9401 TREE_OPERAND (mem, 1)
9402 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9403 true),
9404 TREE_OPERAND (mem, 1));
9405 stmt = gimple_build_assign (loaded_val, mem);
9406 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9407 gsi_remove (&si, true);
9409 si = gsi_last_nondebug_bb (store_bb);
9410 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9412 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9413 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9415 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9416 t = build_call_expr (t, 0);
9417 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9418 gsi_remove (&si, true);
9420 if (gimple_in_ssa_p (cfun))
9421 update_ssa (TODO_update_ssa_no_phi);
9422 return true;
9425 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9426 using expand_omp_atomic_fetch_op. If it failed, we try to
9427 call expand_omp_atomic_pipeline, and if it fails too, the
9428 ultimate fallback is wrapping the operation in a mutex
9429 (expand_omp_atomic_mutex). REGION is the atomic region built
9430 by build_omp_regions_1(). */
9432 static void
9433 expand_omp_atomic (struct omp_region *region)
9435 basic_block load_bb = region->entry, store_bb = region->exit;
9436 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9437 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9438 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9439 tree addr = gimple_omp_atomic_load_rhs (load);
9440 tree stored_val = gimple_omp_atomic_store_val (store);
9441 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9442 HOST_WIDE_INT index;
9444 /* Make sure the type is one of the supported sizes. */
9445 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9446 index = exact_log2 (index);
9447 if (index >= 0 && index <= 4)
9449 unsigned int align = TYPE_ALIGN_UNIT (type);
9451 /* __sync builtins require strict data alignment. */
9452 if (exact_log2 (align) >= index)
9454 /* Atomic load. */
9455 scalar_mode smode;
9456 if (loaded_val == stored_val
9457 && (is_int_mode (TYPE_MODE (type), &smode)
9458 || is_float_mode (TYPE_MODE (type), &smode))
9459 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9460 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9461 return;
9463 /* Atomic store. */
9464 if ((is_int_mode (TYPE_MODE (type), &smode)
9465 || is_float_mode (TYPE_MODE (type), &smode))
9466 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9467 && store_bb == single_succ (load_bb)
9468 && first_stmt (store_bb) == store
9469 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9470 stored_val, index))
9471 return;
9473 /* When possible, use specialized atomic update functions. */
9474 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9475 && store_bb == single_succ (load_bb)
9476 && expand_omp_atomic_fetch_op (load_bb, addr,
9477 loaded_val, stored_val, index))
9478 return;
9480 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9481 if (store_bb == single_succ (load_bb)
9482 && !gimple_in_ssa_p (cfun)
9483 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9484 index))
9485 return;
9487 /* If we don't have specialized __sync builtins, try and implement
9488 as a compare and swap loop. */
9489 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9490 loaded_val, stored_val, index))
9491 return;
9495 /* The ultimate fallback is wrapping the operation in a mutex. */
9496 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9499 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9500 at REGION_EXIT. */
9502 static void
9503 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9504 basic_block region_exit)
9506 class loop *outer = region_entry->loop_father;
9507 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9509 /* Don't parallelize the kernels region if it contains more than one outer
9510 loop. */
9511 unsigned int nr_outer_loops = 0;
9512 class loop *single_outer = NULL;
9513 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9515 gcc_assert (loop_outer (loop) == outer);
9517 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9518 continue;
9520 if (region_exit != NULL
9521 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9522 continue;
9524 nr_outer_loops++;
9525 single_outer = loop;
9527 if (nr_outer_loops != 1)
9528 return;
9530 for (class loop *loop = single_outer->inner;
9531 loop != NULL;
9532 loop = loop->inner)
9533 if (loop->next)
9534 return;
9536 /* Mark the loops in the region. */
9537 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9538 loop->in_oacc_kernels_region = true;
9541 /* Build target argument identifier from the DEVICE identifier, value
9542 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9544 static tree
9545 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9547 tree t = build_int_cst (integer_type_node, device);
9548 if (subseqent_param)
9549 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9550 build_int_cst (integer_type_node,
9551 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9552 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9553 build_int_cst (integer_type_node, id));
9554 return t;
9557 /* Like above but return it in type that can be directly stored as an element
9558 of the argument array. */
9560 static tree
9561 get_target_argument_identifier (int device, bool subseqent_param, int id)
9563 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9564 return fold_convert (ptr_type_node, t);
9567 /* Return a target argument consisting of DEVICE identifier, value identifier
9568 ID, and the actual VALUE. */
9570 static tree
9571 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9572 tree value)
9574 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9575 fold_convert (integer_type_node, value),
9576 build_int_cst (unsigned_type_node,
9577 GOMP_TARGET_ARG_VALUE_SHIFT));
9578 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9579 get_target_argument_identifier_1 (device, false, id));
9580 t = fold_convert (ptr_type_node, t);
9581 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9584 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9585 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9586 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9587 arguments. */
9589 static void
9590 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9591 int id, tree value, vec <tree> *args)
9593 if (tree_fits_shwi_p (value)
9594 && tree_to_shwi (value) > -(1 << 15)
9595 && tree_to_shwi (value) < (1 << 15))
9596 args->quick_push (get_target_argument_value (gsi, device, id, value));
9597 else
9599 args->quick_push (get_target_argument_identifier (device, true, id));
9600 value = fold_convert (ptr_type_node, value);
9601 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9602 GSI_SAME_STMT);
9603 args->quick_push (value);
9607 /* Create an array of arguments that is then passed to GOMP_target. */
9609 static tree
9610 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9612 auto_vec <tree, 6> args;
9613 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9614 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9615 if (c)
9616 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9617 else
9618 t = integer_minus_one_node;
9619 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9620 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9622 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9623 if (c)
9624 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9625 else
9626 t = integer_minus_one_node;
9627 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9628 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9629 &args);
9631 /* Produce more, perhaps device specific, arguments here. */
9633 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9634 args.length () + 1),
9635 ".omp_target_args");
9636 for (unsigned i = 0; i < args.length (); i++)
9638 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9639 build_int_cst (integer_type_node, i),
9640 NULL_TREE, NULL_TREE);
9641 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9642 GSI_SAME_STMT);
9644 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9645 build_int_cst (integer_type_node, args.length ()),
9646 NULL_TREE, NULL_TREE);
9647 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9648 GSI_SAME_STMT);
9649 TREE_ADDRESSABLE (argarray) = 1;
9650 return build_fold_addr_expr (argarray);
9653 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9655 static void
9656 expand_omp_target (struct omp_region *region)
9658 basic_block entry_bb, exit_bb, new_bb;
9659 struct function *child_cfun;
9660 tree child_fn, block, t;
9661 gimple_stmt_iterator gsi;
9662 gomp_target *entry_stmt;
9663 gimple *stmt;
9664 edge e;
9665 bool offloaded;
9666 int target_kind;
9668 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9669 target_kind = gimple_omp_target_kind (entry_stmt);
9670 new_bb = region->entry;
9672 offloaded = is_gimple_omp_offloaded (entry_stmt);
9673 switch (target_kind)
9675 case GF_OMP_TARGET_KIND_REGION:
9676 case GF_OMP_TARGET_KIND_UPDATE:
9677 case GF_OMP_TARGET_KIND_ENTER_DATA:
9678 case GF_OMP_TARGET_KIND_EXIT_DATA:
9679 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9680 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9681 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9682 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9683 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9684 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9685 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9686 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9687 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9688 case GF_OMP_TARGET_KIND_DATA:
9689 case GF_OMP_TARGET_KIND_OACC_DATA:
9690 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9691 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9692 break;
9693 default:
9694 gcc_unreachable ();
9697 child_fn = NULL_TREE;
9698 child_cfun = NULL;
9699 if (offloaded)
9701 child_fn = gimple_omp_target_child_fn (entry_stmt);
9702 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9705 /* Supported by expand_omp_taskreg, but not here. */
9706 if (child_cfun != NULL)
9707 gcc_checking_assert (!child_cfun->cfg);
9708 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9710 entry_bb = region->entry;
9711 exit_bb = region->exit;
9713 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9714 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9716 /* Going on, all OpenACC compute constructs are mapped to
9717 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9718 To distinguish between them, we attach attributes. */
9719 switch (target_kind)
9721 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9722 DECL_ATTRIBUTES (child_fn)
9723 = tree_cons (get_identifier ("oacc parallel"),
9724 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9725 break;
9726 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9727 DECL_ATTRIBUTES (child_fn)
9728 = tree_cons (get_identifier ("oacc kernels"),
9729 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9730 break;
9731 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9732 DECL_ATTRIBUTES (child_fn)
9733 = tree_cons (get_identifier ("oacc serial"),
9734 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9735 break;
9736 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9737 DECL_ATTRIBUTES (child_fn)
9738 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9739 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9740 break;
9741 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9742 DECL_ATTRIBUTES (child_fn)
9743 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9744 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9745 break;
9746 default:
9747 /* Make sure we don't miss any. */
9748 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9749 && is_gimple_omp_offloaded (entry_stmt)));
9750 break;
9753 if (offloaded)
9755 unsigned srcidx, dstidx, num;
9757 /* If the offloading region needs data sent from the parent
9758 function, then the very first statement (except possible
9759 tree profile counter updates) of the offloading body
9760 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9761 &.OMP_DATA_O is passed as an argument to the child function,
9762 we need to replace it with the argument as seen by the child
9763 function.
9765 In most cases, this will end up being the identity assignment
9766 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9767 a function call that has been inlined, the original PARM_DECL
9768 .OMP_DATA_I may have been converted into a different local
9769 variable. In which case, we need to keep the assignment. */
9770 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9771 if (data_arg)
9773 basic_block entry_succ_bb = single_succ (entry_bb);
9774 gimple_stmt_iterator gsi;
9775 tree arg;
9776 gimple *tgtcopy_stmt = NULL;
9777 tree sender = TREE_VEC_ELT (data_arg, 0);
9779 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9781 gcc_assert (!gsi_end_p (gsi));
9782 stmt = gsi_stmt (gsi);
9783 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9784 continue;
9786 if (gimple_num_ops (stmt) == 2)
9788 tree arg = gimple_assign_rhs1 (stmt);
9790 /* We're ignoring the subcode because we're
9791 effectively doing a STRIP_NOPS. */
9793 if (TREE_CODE (arg) == ADDR_EXPR
9794 && TREE_OPERAND (arg, 0) == sender)
9796 tgtcopy_stmt = stmt;
9797 break;
9802 gcc_assert (tgtcopy_stmt != NULL);
9803 arg = DECL_ARGUMENTS (child_fn);
9805 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9806 gsi_remove (&gsi, true);
9809 /* Declare local variables needed in CHILD_CFUN. */
9810 block = DECL_INITIAL (child_fn);
9811 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9812 /* The gimplifier could record temporaries in the offloading block
9813 rather than in containing function's local_decls chain,
9814 which would mean cgraph missed finalizing them. Do it now. */
9815 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9816 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9817 varpool_node::finalize_decl (t);
9818 DECL_SAVED_TREE (child_fn) = NULL;
9819 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9820 gimple_set_body (child_fn, NULL);
9821 TREE_USED (block) = 1;
9823 /* Reset DECL_CONTEXT on function arguments. */
9824 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9825 DECL_CONTEXT (t) = child_fn;
9827 /* Split ENTRY_BB at GIMPLE_*,
9828 so that it can be moved to the child function. */
9829 gsi = gsi_last_nondebug_bb (entry_bb);
9830 stmt = gsi_stmt (gsi);
9831 gcc_assert (stmt
9832 && gimple_code (stmt) == gimple_code (entry_stmt));
9833 e = split_block (entry_bb, stmt);
9834 gsi_remove (&gsi, true);
9835 entry_bb = e->dest;
9836 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9838 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9839 if (exit_bb)
9841 gsi = gsi_last_nondebug_bb (exit_bb);
9842 gcc_assert (!gsi_end_p (gsi)
9843 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9844 stmt = gimple_build_return (NULL);
9845 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9846 gsi_remove (&gsi, true);
9849 /* Move the offloading region into CHILD_CFUN. */
9851 block = gimple_block (entry_stmt);
9853 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9854 if (exit_bb)
9855 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9856 /* When the OMP expansion process cannot guarantee an up-to-date
9857 loop tree arrange for the child function to fixup loops. */
9858 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9859 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9861 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9862 num = vec_safe_length (child_cfun->local_decls);
9863 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9865 t = (*child_cfun->local_decls)[srcidx];
9866 if (DECL_CONTEXT (t) == cfun->decl)
9867 continue;
9868 if (srcidx != dstidx)
9869 (*child_cfun->local_decls)[dstidx] = t;
9870 dstidx++;
9872 if (dstidx != num)
9873 vec_safe_truncate (child_cfun->local_decls, dstidx);
9875 /* Inform the callgraph about the new function. */
9876 child_cfun->curr_properties = cfun->curr_properties;
9877 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9878 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9879 cgraph_node *node = cgraph_node::get_create (child_fn);
9880 node->parallelized_function = 1;
9881 cgraph_node::add_new_function (child_fn, true);
9883 /* Add the new function to the offload table. */
9884 if (ENABLE_OFFLOADING)
9886 if (in_lto_p)
9887 DECL_PRESERVE_P (child_fn) = 1;
9888 vec_safe_push (offload_funcs, child_fn);
9891 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9892 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9894 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9895 fixed in a following pass. */
9896 push_cfun (child_cfun);
9897 if (need_asm)
9898 assign_assembler_name_if_needed (child_fn);
9899 cgraph_edge::rebuild_edges ();
9901 /* Some EH regions might become dead, see PR34608. If
9902 pass_cleanup_cfg isn't the first pass to happen with the
9903 new child, these dead EH edges might cause problems.
9904 Clean them up now. */
9905 if (flag_exceptions)
9907 basic_block bb;
9908 bool changed = false;
9910 FOR_EACH_BB_FN (bb, cfun)
9911 changed |= gimple_purge_dead_eh_edges (bb);
9912 if (changed)
9913 cleanup_tree_cfg ();
9915 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9916 verify_loop_structure ();
9917 pop_cfun ();
9919 if (dump_file && !gimple_in_ssa_p (cfun))
9921 omp_any_child_fn_dumped = true;
9922 dump_function_header (dump_file, child_fn, dump_flags);
9923 dump_function_to_file (child_fn, dump_file, dump_flags);
9926 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9929 /* Emit a library call to launch the offloading region, or do data
9930 transfers. */
9931 tree t1, t2, t3, t4, depend, c, clauses;
9932 enum built_in_function start_ix;
9933 unsigned int flags_i = 0;
9935 switch (gimple_omp_target_kind (entry_stmt))
9937 case GF_OMP_TARGET_KIND_REGION:
9938 start_ix = BUILT_IN_GOMP_TARGET;
9939 break;
9940 case GF_OMP_TARGET_KIND_DATA:
9941 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9942 break;
9943 case GF_OMP_TARGET_KIND_UPDATE:
9944 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9945 break;
9946 case GF_OMP_TARGET_KIND_ENTER_DATA:
9947 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9948 break;
9949 case GF_OMP_TARGET_KIND_EXIT_DATA:
9950 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9951 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9952 break;
9953 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9954 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9955 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9956 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9957 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9958 start_ix = BUILT_IN_GOACC_PARALLEL;
9959 break;
9960 case GF_OMP_TARGET_KIND_OACC_DATA:
9961 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9962 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9963 start_ix = BUILT_IN_GOACC_DATA_START;
9964 break;
9965 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9966 start_ix = BUILT_IN_GOACC_UPDATE;
9967 break;
9968 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9969 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9970 break;
9971 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9972 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9973 break;
9974 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9975 start_ix = BUILT_IN_GOACC_DECLARE;
9976 break;
9977 default:
9978 gcc_unreachable ();
9981 clauses = gimple_omp_target_clauses (entry_stmt);
9983 tree device = NULL_TREE;
9984 location_t device_loc = UNKNOWN_LOCATION;
9985 tree goacc_flags = NULL_TREE;
9986 bool need_device_adjustment = false;
9987 gimple_stmt_iterator adj_gsi;
9988 if (is_gimple_omp_oacc (entry_stmt))
9990 /* By default, no GOACC_FLAGs are set. */
9991 goacc_flags = integer_zero_node;
9993 else
9995 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9996 if (c)
9998 device = OMP_CLAUSE_DEVICE_ID (c);
9999 /* Ensure 'device' is of the correct type. */
10000 device = fold_convert_loc (device_loc, integer_type_node, device);
10001 if (TREE_CODE (device) == INTEGER_CST)
10003 if (wi::to_wide (device) == GOMP_DEVICE_ICV)
10004 device = build_int_cst (integer_type_node,
10005 GOMP_DEVICE_HOST_FALLBACK);
10006 else if (wi::to_wide (device) == GOMP_DEVICE_HOST_FALLBACK)
10007 device = build_int_cst (integer_type_node,
10008 GOMP_DEVICE_HOST_FALLBACK - 1);
10010 else
10011 need_device_adjustment = true;
10012 device_loc = OMP_CLAUSE_LOCATION (c);
10013 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10014 sorry_at (device_loc, "%<ancestor%> not yet supported");
10016 else
10018 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10019 library choose). */
10020 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10021 device_loc = gimple_location (entry_stmt);
10024 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10025 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10026 nowait doesn't appear. */
10027 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10028 c = NULL;
10029 if (c)
10030 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10033 /* By default, there is no conditional. */
10034 tree cond = NULL_TREE;
10035 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10036 if (c)
10037 cond = OMP_CLAUSE_IF_EXPR (c);
10038 /* If we found the clause 'if (cond)', build:
10039 OpenACC: goacc_flags = (cond ? goacc_flags
10040 : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10041 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10042 if (cond)
10044 tree *tp;
10045 if (is_gimple_omp_oacc (entry_stmt))
10046 tp = &goacc_flags;
10047 else
10048 tp = &device;
10050 cond = gimple_boolify (cond);
10052 basic_block cond_bb, then_bb, else_bb;
10053 edge e;
10054 tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10055 if (offloaded)
10056 e = split_block_after_labels (new_bb);
10057 else
10059 gsi = gsi_last_nondebug_bb (new_bb);
10060 gsi_prev (&gsi);
10061 e = split_block (new_bb, gsi_stmt (gsi));
10063 cond_bb = e->src;
10064 new_bb = e->dest;
10065 remove_edge (e);
10067 then_bb = create_empty_bb (cond_bb);
10068 else_bb = create_empty_bb (then_bb);
10069 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10070 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10072 stmt = gimple_build_cond_empty (cond);
10073 gsi = gsi_last_bb (cond_bb);
10074 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10076 gsi = gsi_start_bb (then_bb);
10077 stmt = gimple_build_assign (tmp_var, *tp);
10078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10079 adj_gsi = gsi;
10081 gsi = gsi_start_bb (else_bb);
10082 if (is_gimple_omp_oacc (entry_stmt))
10083 stmt = gimple_build_assign (tmp_var,
10084 BIT_IOR_EXPR,
10085 *tp,
10086 build_int_cst (integer_type_node,
10087 GOACC_FLAG_HOST_FALLBACK));
10088 else
10089 stmt = gimple_build_assign (tmp_var,
10090 build_int_cst (integer_type_node,
10091 GOMP_DEVICE_HOST_FALLBACK));
10092 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10094 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10095 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10096 add_bb_to_loop (then_bb, cond_bb->loop_father);
10097 add_bb_to_loop (else_bb, cond_bb->loop_father);
10098 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10099 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10101 *tp = tmp_var;
10103 gsi = gsi_last_nondebug_bb (new_bb);
10105 else
10107 gsi = gsi_last_nondebug_bb (new_bb);
10109 if (device != NULL_TREE)
10110 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10111 true, GSI_SAME_STMT);
10112 if (need_device_adjustment)
10114 tree tmp_var = create_tmp_var (TREE_TYPE (device));
10115 stmt = gimple_build_assign (tmp_var, device);
10116 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10117 adj_gsi = gsi_for_stmt (stmt);
10118 device = tmp_var;
10122 if (need_device_adjustment)
10124 tree uns = fold_convert (unsigned_type_node, device);
10125 uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10126 false, GSI_CONTINUE_LINKING);
10127 edge e = split_block (gsi_bb (adj_gsi), gsi_stmt (adj_gsi));
10128 basic_block cond_bb = e->src;
10129 basic_block else_bb = e->dest;
10130 if (gsi_bb (adj_gsi) == new_bb)
10132 new_bb = else_bb;
10133 gsi = gsi_last_nondebug_bb (new_bb);
10136 basic_block then_bb = create_empty_bb (cond_bb);
10137 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10139 cond = build2 (GT_EXPR, boolean_type_node, uns,
10140 build_int_cst (unsigned_type_node,
10141 GOMP_DEVICE_HOST_FALLBACK - 1));
10142 stmt = gimple_build_cond_empty (cond);
10143 adj_gsi = gsi_last_bb (cond_bb);
10144 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10146 adj_gsi = gsi_start_bb (then_bb);
10147 tree add = build2 (PLUS_EXPR, integer_type_node, device,
10148 build_int_cst (integer_type_node, -1));
10149 stmt = gimple_build_assign (device, add);
10150 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10152 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10153 e->flags = EDGE_FALSE_VALUE;
10154 add_bb_to_loop (then_bb, cond_bb->loop_father);
10155 make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10158 t = gimple_omp_target_data_arg (entry_stmt);
10159 if (t == NULL)
10161 t1 = size_zero_node;
10162 t2 = build_zero_cst (ptr_type_node);
10163 t3 = t2;
10164 t4 = t2;
10166 else
10168 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10169 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10170 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10171 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10172 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10175 gimple *g;
10176 bool tagging = false;
10177 /* The maximum number used by any start_ix, without varargs. */
10178 auto_vec<tree, 11> args;
10179 if (is_gimple_omp_oacc (entry_stmt))
10181 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10182 TREE_TYPE (goacc_flags), goacc_flags);
10183 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10184 NULL_TREE, true,
10185 GSI_SAME_STMT);
10186 args.quick_push (goacc_flags_m);
10188 else
10189 args.quick_push (device);
10190 if (offloaded)
10191 args.quick_push (build_fold_addr_expr (child_fn));
10192 args.quick_push (t1);
10193 args.quick_push (t2);
10194 args.quick_push (t3);
10195 args.quick_push (t4);
10196 switch (start_ix)
10198 case BUILT_IN_GOACC_DATA_START:
10199 case BUILT_IN_GOACC_DECLARE:
10200 case BUILT_IN_GOMP_TARGET_DATA:
10201 break;
10202 case BUILT_IN_GOMP_TARGET:
10203 case BUILT_IN_GOMP_TARGET_UPDATE:
10204 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10205 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10206 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10207 if (c)
10208 depend = OMP_CLAUSE_DECL (c);
10209 else
10210 depend = build_int_cst (ptr_type_node, 0);
10211 args.quick_push (depend);
10212 if (start_ix == BUILT_IN_GOMP_TARGET)
10213 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10214 break;
10215 case BUILT_IN_GOACC_PARALLEL:
10216 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10218 tree dims = NULL_TREE;
10219 unsigned int ix;
10221 /* For serial constructs we set all dimensions to 1. */
10222 for (ix = GOMP_DIM_MAX; ix--;)
10223 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10224 oacc_replace_fn_attrib (child_fn, dims);
10226 else
10227 oacc_set_fn_attrib (child_fn, clauses, &args);
10228 tagging = true;
10229 /* FALLTHRU */
10230 case BUILT_IN_GOACC_ENTER_DATA:
10231 case BUILT_IN_GOACC_EXIT_DATA:
10232 case BUILT_IN_GOACC_UPDATE:
10234 tree t_async = NULL_TREE;
10236 /* If present, use the value specified by the respective
10237 clause, making sure that is of the correct type. */
10238 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10239 if (c)
10240 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10241 integer_type_node,
10242 OMP_CLAUSE_ASYNC_EXPR (c));
10243 else if (!tagging)
10244 /* Default values for t_async. */
10245 t_async = fold_convert_loc (gimple_location (entry_stmt),
10246 integer_type_node,
10247 build_int_cst (integer_type_node,
10248 GOMP_ASYNC_SYNC));
10249 if (tagging && t_async)
10251 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10253 if (TREE_CODE (t_async) == INTEGER_CST)
10255 /* See if we can pack the async arg in to the tag's
10256 operand. */
10257 i_async = TREE_INT_CST_LOW (t_async);
10258 if (i_async < GOMP_LAUNCH_OP_MAX)
10259 t_async = NULL_TREE;
10260 else
10261 i_async = GOMP_LAUNCH_OP_MAX;
10263 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10264 i_async));
10266 if (t_async)
10267 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10268 NULL_TREE, true,
10269 GSI_SAME_STMT));
10271 /* Save the argument index, and ... */
10272 unsigned t_wait_idx = args.length ();
10273 unsigned num_waits = 0;
10274 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10275 if (!tagging || c)
10276 /* ... push a placeholder. */
10277 args.safe_push (integer_zero_node);
10279 for (; c; c = OMP_CLAUSE_CHAIN (c))
10280 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10282 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10283 integer_type_node,
10284 OMP_CLAUSE_WAIT_EXPR (c));
10285 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10286 GSI_SAME_STMT);
10287 args.safe_push (arg);
10288 num_waits++;
10291 if (!tagging || num_waits)
10293 tree len;
10295 /* Now that we know the number, update the placeholder. */
10296 if (tagging)
10297 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10298 else
10299 len = build_int_cst (integer_type_node, num_waits);
10300 len = fold_convert_loc (gimple_location (entry_stmt),
10301 unsigned_type_node, len);
10302 args[t_wait_idx] = len;
10305 break;
10306 default:
10307 gcc_unreachable ();
10309 if (tagging)
10310 /* Push terminal marker - zero. */
10311 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10313 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10314 gimple_set_location (g, gimple_location (entry_stmt));
10315 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10316 if (!offloaded)
10318 g = gsi_stmt (gsi);
10319 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10320 gsi_remove (&gsi, true);
10324 /* Expand the parallel region tree rooted at REGION. Expansion
10325 proceeds in depth-first order. Innermost regions are expanded
10326 first. This way, parallel regions that require a new function to
10327 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10328 internal dependencies in their body. */
10330 static void
10331 expand_omp (struct omp_region *region)
10333 omp_any_child_fn_dumped = false;
10334 while (region)
10336 location_t saved_location;
10337 gimple *inner_stmt = NULL;
10339 /* First, determine whether this is a combined parallel+workshare
10340 region. */
10341 if (region->type == GIMPLE_OMP_PARALLEL)
10342 determine_parallel_type (region);
10344 if (region->type == GIMPLE_OMP_FOR
10345 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10346 inner_stmt = last_stmt (region->inner->entry);
10348 if (region->inner)
10349 expand_omp (region->inner);
10351 saved_location = input_location;
10352 if (gimple_has_location (last_stmt (region->entry)))
10353 input_location = gimple_location (last_stmt (region->entry));
10355 switch (region->type)
10357 case GIMPLE_OMP_PARALLEL:
10358 case GIMPLE_OMP_TASK:
10359 expand_omp_taskreg (region);
10360 break;
10362 case GIMPLE_OMP_FOR:
10363 expand_omp_for (region, inner_stmt);
10364 break;
10366 case GIMPLE_OMP_SECTIONS:
10367 expand_omp_sections (region);
10368 break;
10370 case GIMPLE_OMP_SECTION:
10371 /* Individual omp sections are handled together with their
10372 parent GIMPLE_OMP_SECTIONS region. */
10373 break;
10375 case GIMPLE_OMP_SINGLE:
10376 case GIMPLE_OMP_SCOPE:
10377 expand_omp_single (region);
10378 break;
10380 case GIMPLE_OMP_ORDERED:
10382 gomp_ordered *ord_stmt
10383 = as_a <gomp_ordered *> (last_stmt (region->entry));
10384 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10385 OMP_CLAUSE_DEPEND))
10387 /* We'll expand these when expanding corresponding
10388 worksharing region with ordered(n) clause. */
10389 gcc_assert (region->outer
10390 && region->outer->type == GIMPLE_OMP_FOR);
10391 region->ord_stmt = ord_stmt;
10392 break;
10395 /* FALLTHRU */
10396 case GIMPLE_OMP_MASTER:
10397 case GIMPLE_OMP_MASKED:
10398 case GIMPLE_OMP_TASKGROUP:
10399 case GIMPLE_OMP_CRITICAL:
10400 case GIMPLE_OMP_TEAMS:
10401 expand_omp_synch (region);
10402 break;
10404 case GIMPLE_OMP_ATOMIC_LOAD:
10405 expand_omp_atomic (region);
10406 break;
10408 case GIMPLE_OMP_TARGET:
10409 expand_omp_target (region);
10410 break;
10412 default:
10413 gcc_unreachable ();
10416 input_location = saved_location;
10417 region = region->next;
10419 if (omp_any_child_fn_dumped)
10421 if (dump_file)
10422 dump_function_header (dump_file, current_function_decl, dump_flags);
10423 omp_any_child_fn_dumped = false;
10427 /* Helper for build_omp_regions. Scan the dominator tree starting at
10428 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10429 true, the function ends once a single tree is built (otherwise, whole
10430 forest of OMP constructs may be built). */
10432 static void
10433 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10434 bool single_tree)
10436 gimple_stmt_iterator gsi;
10437 gimple *stmt;
10438 basic_block son;
10440 gsi = gsi_last_nondebug_bb (bb);
10441 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10443 struct omp_region *region;
10444 enum gimple_code code;
10446 stmt = gsi_stmt (gsi);
10447 code = gimple_code (stmt);
10448 if (code == GIMPLE_OMP_RETURN)
10450 /* STMT is the return point out of region PARENT. Mark it
10451 as the exit point and make PARENT the immediately
10452 enclosing region. */
10453 gcc_assert (parent);
10454 region = parent;
10455 region->exit = bb;
10456 parent = parent->outer;
10458 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10460 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10461 GIMPLE_OMP_RETURN, but matches with
10462 GIMPLE_OMP_ATOMIC_LOAD. */
10463 gcc_assert (parent);
10464 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10465 region = parent;
10466 region->exit = bb;
10467 parent = parent->outer;
10469 else if (code == GIMPLE_OMP_CONTINUE)
10471 gcc_assert (parent);
10472 parent->cont = bb;
10474 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10476 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10477 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10479 else
10481 region = new_omp_region (bb, code, parent);
10482 /* Otherwise... */
10483 if (code == GIMPLE_OMP_TARGET)
10485 switch (gimple_omp_target_kind (stmt))
10487 case GF_OMP_TARGET_KIND_REGION:
10488 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10489 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10490 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10491 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10492 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10493 break;
10494 case GF_OMP_TARGET_KIND_UPDATE:
10495 case GF_OMP_TARGET_KIND_ENTER_DATA:
10496 case GF_OMP_TARGET_KIND_EXIT_DATA:
10497 case GF_OMP_TARGET_KIND_DATA:
10498 case GF_OMP_TARGET_KIND_OACC_DATA:
10499 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10500 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10501 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10502 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10503 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10504 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10505 /* ..., other than for those stand-alone directives... */
10506 region = NULL;
10507 break;
10508 default:
10509 gcc_unreachable ();
10512 else if (code == GIMPLE_OMP_ORDERED
10513 && omp_find_clause (gimple_omp_ordered_clauses
10514 (as_a <gomp_ordered *> (stmt)),
10515 OMP_CLAUSE_DEPEND))
10516 /* #pragma omp ordered depend is also just a stand-alone
10517 directive. */
10518 region = NULL;
10519 else if (code == GIMPLE_OMP_TASK
10520 && gimple_omp_task_taskwait_p (stmt))
10521 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10522 region = NULL;
10523 /* ..., this directive becomes the parent for a new region. */
10524 if (region)
10525 parent = region;
10529 if (single_tree && !parent)
10530 return;
10532 for (son = first_dom_son (CDI_DOMINATORS, bb);
10533 son;
10534 son = next_dom_son (CDI_DOMINATORS, son))
10535 build_omp_regions_1 (son, parent, single_tree);
10538 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10539 root_omp_region. */
10541 static void
10542 build_omp_regions_root (basic_block root)
10544 gcc_assert (root_omp_region == NULL);
10545 build_omp_regions_1 (root, NULL, true);
10546 gcc_assert (root_omp_region != NULL);
10549 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10551 void
10552 omp_expand_local (basic_block head)
10554 build_omp_regions_root (head);
10555 if (dump_file && (dump_flags & TDF_DETAILS))
10557 fprintf (dump_file, "\nOMP region tree\n\n");
10558 dump_omp_region (dump_file, root_omp_region, 0);
10559 fprintf (dump_file, "\n");
10562 remove_exit_barriers (root_omp_region);
10563 expand_omp (root_omp_region);
10565 omp_free_regions ();
10568 /* Scan the CFG and build a tree of OMP regions. Return the root of
10569 the OMP region tree. */
10571 static void
10572 build_omp_regions (void)
10574 gcc_assert (root_omp_region == NULL);
10575 calculate_dominance_info (CDI_DOMINATORS);
10576 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10579 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10581 static unsigned int
10582 execute_expand_omp (void)
10584 build_omp_regions ();
10586 if (!root_omp_region)
10587 return 0;
10589 if (dump_file)
10591 fprintf (dump_file, "\nOMP region tree\n\n");
10592 dump_omp_region (dump_file, root_omp_region, 0);
10593 fprintf (dump_file, "\n");
10596 remove_exit_barriers (root_omp_region);
10598 expand_omp (root_omp_region);
10600 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10601 verify_loop_structure ();
10602 cleanup_tree_cfg ();
10604 omp_free_regions ();
10606 return 0;
10609 /* OMP expansion -- the default pass, run before creation of SSA form. */
10611 namespace {
10613 const pass_data pass_data_expand_omp =
10615 GIMPLE_PASS, /* type */
10616 "ompexp", /* name */
10617 OPTGROUP_OMP, /* optinfo_flags */
10618 TV_NONE, /* tv_id */
10619 PROP_gimple_any, /* properties_required */
10620 PROP_gimple_eomp, /* properties_provided */
10621 0, /* properties_destroyed */
10622 0, /* todo_flags_start */
10623 0, /* todo_flags_finish */
10626 class pass_expand_omp : public gimple_opt_pass
10628 public:
10629 pass_expand_omp (gcc::context *ctxt)
10630 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10633 /* opt_pass methods: */
10634 virtual unsigned int execute (function *)
10636 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10637 || flag_openmp_simd != 0)
10638 && !seen_error ());
10640 /* This pass always runs, to provide PROP_gimple_eomp.
10641 But often, there is nothing to do. */
10642 if (!gate)
10643 return 0;
10645 return execute_expand_omp ();
10648 }; // class pass_expand_omp
10650 } // anon namespace
10652 gimple_opt_pass *
10653 make_pass_expand_omp (gcc::context *ctxt)
10655 return new pass_expand_omp (ctxt);
10658 namespace {
10660 const pass_data pass_data_expand_omp_ssa =
10662 GIMPLE_PASS, /* type */
10663 "ompexpssa", /* name */
10664 OPTGROUP_OMP, /* optinfo_flags */
10665 TV_NONE, /* tv_id */
10666 PROP_cfg | PROP_ssa, /* properties_required */
10667 PROP_gimple_eomp, /* properties_provided */
10668 0, /* properties_destroyed */
10669 0, /* todo_flags_start */
10670 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10673 class pass_expand_omp_ssa : public gimple_opt_pass
10675 public:
10676 pass_expand_omp_ssa (gcc::context *ctxt)
10677 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10680 /* opt_pass methods: */
10681 virtual bool gate (function *fun)
10683 return !(fun->curr_properties & PROP_gimple_eomp);
10685 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10686 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10688 }; // class pass_expand_omp_ssa
10690 } // anon namespace
10692 gimple_opt_pass *
10693 make_pass_expand_omp_ssa (gcc::context *ctxt)
10695 return new pass_expand_omp_ssa (ctxt);
10698 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10699 GIMPLE_* codes. */
10701 bool
10702 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10703 int *region_idx)
10705 gimple *last = last_stmt (bb);
10706 enum gimple_code code = gimple_code (last);
10707 struct omp_region *cur_region = *region;
10708 bool fallthru = false;
10710 switch (code)
10712 case GIMPLE_OMP_PARALLEL:
10713 case GIMPLE_OMP_FOR:
10714 case GIMPLE_OMP_SINGLE:
10715 case GIMPLE_OMP_TEAMS:
10716 case GIMPLE_OMP_MASTER:
10717 case GIMPLE_OMP_MASKED:
10718 case GIMPLE_OMP_SCOPE:
10719 case GIMPLE_OMP_TASKGROUP:
10720 case GIMPLE_OMP_CRITICAL:
10721 case GIMPLE_OMP_SECTION:
10722 cur_region = new_omp_region (bb, code, cur_region);
10723 fallthru = true;
10724 break;
10726 case GIMPLE_OMP_TASK:
10727 cur_region = new_omp_region (bb, code, cur_region);
10728 fallthru = true;
10729 if (gimple_omp_task_taskwait_p (last))
10730 cur_region = cur_region->outer;
10731 break;
10733 case GIMPLE_OMP_ORDERED:
10734 cur_region = new_omp_region (bb, code, cur_region);
10735 fallthru = true;
10736 if (omp_find_clause (gimple_omp_ordered_clauses
10737 (as_a <gomp_ordered *> (last)),
10738 OMP_CLAUSE_DEPEND))
10739 cur_region = cur_region->outer;
10740 break;
10742 case GIMPLE_OMP_TARGET:
10743 cur_region = new_omp_region (bb, code, cur_region);
10744 fallthru = true;
10745 switch (gimple_omp_target_kind (last))
10747 case GF_OMP_TARGET_KIND_REGION:
10748 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10749 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10750 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10751 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10752 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10753 break;
10754 case GF_OMP_TARGET_KIND_UPDATE:
10755 case GF_OMP_TARGET_KIND_ENTER_DATA:
10756 case GF_OMP_TARGET_KIND_EXIT_DATA:
10757 case GF_OMP_TARGET_KIND_DATA:
10758 case GF_OMP_TARGET_KIND_OACC_DATA:
10759 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10760 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10761 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10762 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10763 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10764 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10765 cur_region = cur_region->outer;
10766 break;
10767 default:
10768 gcc_unreachable ();
10770 break;
10772 case GIMPLE_OMP_SECTIONS:
10773 cur_region = new_omp_region (bb, code, cur_region);
10774 fallthru = true;
10775 break;
10777 case GIMPLE_OMP_SECTIONS_SWITCH:
10778 fallthru = false;
10779 break;
10781 case GIMPLE_OMP_ATOMIC_LOAD:
10782 case GIMPLE_OMP_ATOMIC_STORE:
10783 fallthru = true;
10784 break;
10786 case GIMPLE_OMP_RETURN:
10787 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10788 somewhere other than the next block. This will be
10789 created later. */
10790 cur_region->exit = bb;
10791 if (cur_region->type == GIMPLE_OMP_TASK)
10792 /* Add an edge corresponding to not scheduling the task
10793 immediately. */
10794 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10795 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10796 cur_region = cur_region->outer;
10797 break;
10799 case GIMPLE_OMP_CONTINUE:
10800 cur_region->cont = bb;
10801 switch (cur_region->type)
10803 case GIMPLE_OMP_FOR:
10804 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10805 succs edges as abnormal to prevent splitting
10806 them. */
10807 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10808 /* Make the loopback edge. */
10809 make_edge (bb, single_succ (cur_region->entry),
10810 EDGE_ABNORMAL);
10812 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10813 corresponds to the case that the body of the loop
10814 is not executed at all. */
10815 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10816 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10817 fallthru = false;
10818 break;
10820 case GIMPLE_OMP_SECTIONS:
10821 /* Wire up the edges into and out of the nested sections. */
10823 basic_block switch_bb = single_succ (cur_region->entry);
10825 struct omp_region *i;
10826 for (i = cur_region->inner; i ; i = i->next)
10828 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10829 make_edge (switch_bb, i->entry, 0);
10830 make_edge (i->exit, bb, EDGE_FALLTHRU);
10833 /* Make the loopback edge to the block with
10834 GIMPLE_OMP_SECTIONS_SWITCH. */
10835 make_edge (bb, switch_bb, 0);
10837 /* Make the edge from the switch to exit. */
10838 make_edge (switch_bb, bb->next_bb, 0);
10839 fallthru = false;
10841 break;
10843 case GIMPLE_OMP_TASK:
10844 fallthru = true;
10845 break;
10847 default:
10848 gcc_unreachable ();
10850 break;
10852 default:
10853 gcc_unreachable ();
10856 if (*region != cur_region)
10858 *region = cur_region;
10859 if (cur_region)
10860 *region_idx = cur_region->entry->index;
10861 else
10862 *region_idx = 0;
10865 return fallthru;