Fix ICE on view conversion between struct and integer
[official-gcc.git] / gcc / omp-expand.cc
blob1023c56fc3deae2337fe00dec067916f878fd5be
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2022 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
68 struct omp_region
70 /* The enclosing region. */
71 struct omp_region *outer;
73 /* First child region. */
74 struct omp_region *inner;
76 /* Next peer region. */
77 struct omp_region *next;
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
121 /* Return true if REGION is a combined parallel+workshare region. */
123 static inline bool
124 is_combined_parallel (struct omp_region *region)
126 return region->is_combined_parallel;
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
143 Is lowered into:
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
171 static bool
172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_stmt (ws_entry_bb);
177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
202 return true;
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
208 static tree
209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
225 /* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
229 static vec<tree, va_gc> *
230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
232 tree t;
233 location_t loc = gimple_location (ws_stmt);
234 vec<tree, va_gc> *ws_args;
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
238 struct omp_for_data fd;
239 tree n1, n2;
241 omp_extract_for_data (for_stmt, &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
245 if (gimple_omp_for_combined_into_p (for_stmt))
247 tree innerc
248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (t);
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (t);
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (t);
269 if (fd.chunk_size)
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 ws_args->quick_push (t);
276 return ws_args;
278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (gimple_bb (ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (ws_args, 1);
286 ws_args->quick_push (t);
287 return ws_args;
290 gcc_unreachable ();
293 /* Discover whether REGION is a combined parallel+workshare region. */
295 static void
296 determine_parallel_type (struct omp_region *region)
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324 return;
326 if (single_succ (par_entry_bb) == ws_entry_bb
327 && single_succ (ws_exit_bb) == par_exit_bb
328 && workshare_safe_to_combine_p (ws_entry_bb)
329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 || (last_and_only_stmt (ws_entry_bb)
331 && last_and_only_stmt (par_exit_bb))))
333 gimple *par_stmt = last_stmt (par_entry_bb);
334 gimple *ws_stmt = last_stmt (ws_entry_bb);
336 if (region->inner->type == GIMPLE_OMP_FOR)
338 /* If this is a combined parallel loop, we need to determine
339 whether or not to use the combined library calls. There
340 are two cases where we do not apply the transformation:
341 static loops and any kind of ordered loop. In the first
342 case, we already open code the loop so there is no need
343 to do anything else. In the latter case, the combined
344 parallel loop call would still need extra synchronization
345 to implement ordered semantics, so there would not be any
346 gain in using the combined call. */
347 tree clauses = gimple_omp_for_clauses (ws_stmt);
348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 if (c == NULL
350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 == OMP_CLAUSE_SCHEDULE_STATIC)
352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 return;
358 else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__REDUCTEMP_)
361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 OMP_CLAUSE__CONDTEMP_)))
363 return;
365 region->is_combined_parallel = true;
366 region->inner->is_combined_parallel = true;
367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
371 /* Debugging dumps for parallel regions. */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
376 /* Dump the parallel region tree rooted at REGION. */
378 void
379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 gimple_code_name[region->type]);
384 if (region->inner)
385 dump_omp_region (file, region->inner, indent + 4);
387 if (region->cont)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 region->cont->index);
393 if (region->exit)
394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 region->exit->index);
396 else
397 fprintf (file, "%*s[no exit marker]\n", indent, "");
399 if (region->next)
400 dump_omp_region (file, region->next, indent);
403 DEBUG_FUNCTION void
404 debug_omp_region (struct omp_region *region)
406 dump_omp_region (stderr, region, 0);
409 DEBUG_FUNCTION void
410 debug_all_omp_regions (void)
412 dump_omp_region (stderr, root_omp_region, 0);
415 /* Create a new parallel region starting at STMT inside region PARENT. */
417 static struct omp_region *
418 new_omp_region (basic_block bb, enum gimple_code type,
419 struct omp_region *parent)
421 struct omp_region *region = XCNEW (struct omp_region);
423 region->outer = parent;
424 region->entry = bb;
425 region->type = type;
427 if (parent)
429 /* This is a nested region. Add it to the list of inner
430 regions in PARENT. */
431 region->next = parent->inner;
432 parent->inner = region;
434 else
436 /* This is a toplevel region. Add it to the list of toplevel
437 regions in ROOT_OMP_REGION. */
438 region->next = root_omp_region;
439 root_omp_region = region;
442 return region;
445 /* Release the memory associated with the region tree rooted at REGION. */
447 static void
448 free_omp_region_1 (struct omp_region *region)
450 struct omp_region *i, *n;
452 for (i = region->inner; i ; i = n)
454 n = i->next;
455 free_omp_region_1 (i);
458 free (region);
461 /* Release the memory for the entire omp region tree. */
463 void
464 omp_free_regions (void)
466 struct omp_region *r, *n;
467 for (r = root_omp_region; r ; r = n)
469 n = r->next;
470 free_omp_region_1 (r);
472 root_omp_region = NULL;
475 /* A convenience function to build an empty GIMPLE_COND with just the
476 condition. */
478 static gcond *
479 gimple_build_cond_empty (tree cond)
481 enum tree_code pred_code;
482 tree lhs, rhs;
484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489 Add CHILD_FNDECL to decl chain of the supercontext of the block
490 ENTRY_BLOCK - this is the block which originally contained the
491 code from which CHILD_FNDECL was created.
493 Together, these actions ensure that the debug info for the outlined
494 function will be emitted with the correct lexical scope. */
496 static void
497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 tree child_fndecl)
500 tree parent_fndecl = NULL_TREE;
501 gimple *entry_stmt;
502 /* OMP expansion expands inner regions before outer ones, so if
503 we e.g. have explicit task region nested in parallel region, when
504 expanding the task region current_function_decl will be the original
505 source function, but we actually want to use as context the child
506 function of the parallel. */
507 for (region = region->outer;
508 region && parent_fndecl == NULL_TREE; region = region->outer)
509 switch (region->type)
511 case GIMPLE_OMP_PARALLEL:
512 case GIMPLE_OMP_TASK:
513 case GIMPLE_OMP_TEAMS:
514 entry_stmt = last_stmt (region->entry);
515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 break;
517 case GIMPLE_OMP_TARGET:
518 entry_stmt = last_stmt (region->entry);
519 parent_fndecl
520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 break;
522 default:
523 break;
526 if (parent_fndecl == NULL_TREE)
527 parent_fndecl = current_function_decl;
528 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 tree b = BLOCK_SUPERCONTEXT (entry_block);
533 if (TREE_CODE (b) == BLOCK)
535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 BLOCK_VARS (b) = child_fndecl;
541 /* Build the function calls to GOMP_parallel etc to actually
542 generate the parallel operation. REGION is the parallel region
543 being expanded. BB is the block where to insert the code. WS_ARGS
544 will be set if this is a call to a combined parallel+workshare
545 construct, it contains the list of additional arguments needed by
546 the workshare construct. */
548 static void
549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 gomp_parallel *entry_stmt,
551 vec<tree, va_gc> *ws_args)
553 tree t, t1, t2, val, cond, c, clauses, flags;
554 gimple_stmt_iterator gsi;
555 gimple *stmt;
556 enum built_in_function start_ix;
557 int start_ix2;
558 location_t clause_loc;
559 vec<tree, va_gc> *args;
561 clauses = gimple_omp_parallel_clauses (entry_stmt);
563 /* Determine what flavor of GOMP_parallel we will be
564 emitting. */
565 start_ix = BUILT_IN_GOMP_PARALLEL;
566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567 if (rtmp)
568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569 else if (is_combined_parallel (region))
571 switch (region->inner->type)
573 case GIMPLE_OMP_FOR:
574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 switch (region->inner->sched_kind)
577 case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 /* For lastprivate(conditional:), our implementation
579 requires monotonic behavior. */
580 if (region->inner->has_lastprivate_conditional != 0)
581 start_ix2 = 3;
582 else if ((region->inner->sched_modifiers
583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 start_ix2 = 6;
585 else if ((region->inner->sched_modifiers
586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 start_ix2 = 7;
588 else
589 start_ix2 = 3;
590 break;
591 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 case OMP_CLAUSE_SCHEDULE_GUIDED:
593 if ((region->inner->sched_modifiers
594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 && !region->inner->has_lastprivate_conditional)
597 start_ix2 = 3 + region->inner->sched_kind;
598 break;
600 /* FALLTHRU */
601 default:
602 start_ix2 = region->inner->sched_kind;
603 break;
605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 start_ix = (enum built_in_function) start_ix2;
607 break;
608 case GIMPLE_OMP_SECTIONS:
609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 break;
611 default:
612 gcc_unreachable ();
616 /* By default, the value of NUM_THREADS is zero (selected at run time)
617 and there is no conditional. */
618 cond = NULL_TREE;
619 val = build_int_cst (unsigned_type_node, 0);
620 flags = build_int_cst (unsigned_type_node, 0);
622 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623 if (c)
624 cond = OMP_CLAUSE_IF_EXPR (c);
626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627 if (c)
629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630 clause_loc = OMP_CLAUSE_LOCATION (c);
632 else
633 clause_loc = gimple_location (entry_stmt);
635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636 if (c)
637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639 /* Ensure 'val' is of the correct type. */
640 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642 /* If we found the clause 'if (cond)', build either
643 (cond != 0) or (cond ? val : 1u). */
644 if (cond)
646 cond = gimple_boolify (cond);
648 if (integer_zerop (val))
649 val = fold_build2_loc (clause_loc,
650 EQ_EXPR, unsigned_type_node, cond,
651 build_int_cst (TREE_TYPE (cond), 0));
652 else
654 basic_block cond_bb, then_bb, else_bb;
655 edge e, e_then, e_else;
656 tree tmp_then, tmp_else, tmp_join, tmp_var;
658 tmp_var = create_tmp_var (TREE_TYPE (val));
659 if (gimple_in_ssa_p (cfun))
661 tmp_then = make_ssa_name (tmp_var);
662 tmp_else = make_ssa_name (tmp_var);
663 tmp_join = make_ssa_name (tmp_var);
665 else
667 tmp_then = tmp_var;
668 tmp_else = tmp_var;
669 tmp_join = tmp_var;
672 e = split_block_after_labels (bb);
673 cond_bb = e->src;
674 bb = e->dest;
675 remove_edge (e);
677 then_bb = create_empty_bb (cond_bb);
678 else_bb = create_empty_bb (then_bb);
679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682 stmt = gimple_build_cond_empty (cond);
683 gsi = gsi_start_bb (cond_bb);
684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686 gsi = gsi_start_bb (then_bb);
687 expand_omp_build_assign (&gsi, tmp_then, val, true);
689 gsi = gsi_start_bb (else_bb);
690 expand_omp_build_assign (&gsi, tmp_else,
691 build_int_cst (unsigned_type_node, 1),
692 true);
694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 add_bb_to_loop (then_bb, cond_bb->loop_father);
697 add_bb_to_loop (else_bb, cond_bb->loop_father);
698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701 if (gimple_in_ssa_p (cfun))
703 gphi *phi = create_phi_node (tmp_join, bb);
704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
708 val = tmp_join;
711 gsi = gsi_start_bb (bb);
712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 false, GSI_CONTINUE_LINKING);
716 gsi = gsi_last_nondebug_bb (bb);
717 t = gimple_omp_parallel_data_arg (entry_stmt);
718 if (t == NULL)
719 t1 = null_pointer_node;
720 else
721 t1 = build_fold_addr_expr (t);
722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723 t2 = build_fold_addr_expr (child_fndecl);
725 vec_alloc (args, 4 + vec_safe_length (ws_args));
726 args->quick_push (t2);
727 args->quick_push (t1);
728 args->quick_push (val);
729 if (ws_args)
730 args->splice (*ws_args);
731 args->quick_push (flags);
733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 builtin_decl_explicit (start_ix), args);
736 if (rtmp)
738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 fold_convert (type,
741 fold_convert (pointer_sized_int_node, t)));
743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 false, GSI_CONTINUE_LINKING);
747 /* Build the function call to GOMP_task to actually
748 generate the task operation. BB is the block where to insert the code. */
750 static void
751 expand_task_call (struct omp_region *region, basic_block bb,
752 gomp_task *entry_stmt)
754 tree t1, t2, t3;
755 gimple_stmt_iterator gsi;
756 location_t loc = gimple_location (entry_stmt);
758 tree clauses = gimple_omp_task_clauses (entry_stmt);
760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
768 unsigned int iflags
769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775 tree num_tasks = NULL_TREE;
776 bool ull = false;
777 if (taskloop_p)
779 gimple *g = last_stmt (region->outer->entry);
780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782 struct omp_for_data fd;
783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 OMP_CLAUSE__LOOPTEMP_);
787 startvar = OMP_CLAUSE_DECL (startvar);
788 endvar = OMP_CLAUSE_DECL (endvar);
789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790 if (fd.loop.cond_code == LT_EXPR)
791 iflags |= GOMP_TASK_FLAG_UP;
792 tree tclauses = gimple_omp_for_clauses (g);
793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794 if (num_tasks)
796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 iflags |= GOMP_TASK_FLAG_STRICT;
798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 else
802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 if (num_tasks)
805 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 iflags |= GOMP_TASK_FLAG_STRICT;
808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 else
811 num_tasks = integer_zero_node;
813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814 if (ifc == NULL_TREE)
815 iflags |= GOMP_TASK_FLAG_IF;
816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 iflags |= GOMP_TASK_FLAG_NOGROUP;
818 ull = fd.iter_type == long_long_unsigned_type_node;
819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 else
824 if (priority)
825 iflags |= GOMP_TASK_FLAG_PRIORITY;
826 if (detach)
827 iflags |= GOMP_TASK_FLAG_DETACH;
830 tree flags = build_int_cst (unsigned_type_node, iflags);
832 tree cond = boolean_true_node;
833 if (ifc)
835 if (taskloop_p)
837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 build_int_cst (unsigned_type_node,
840 GOMP_TASK_FLAG_IF),
841 build_int_cst (unsigned_type_node, 0));
842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 flags, t);
845 else
846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
849 if (finalc)
851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 build_int_cst (unsigned_type_node,
854 GOMP_TASK_FLAG_FINAL),
855 build_int_cst (unsigned_type_node, 0));
856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 if (depend)
859 depend = OMP_CLAUSE_DECL (depend);
860 else
861 depend = build_int_cst (ptr_type_node, 0);
862 if (priority)
863 priority = fold_convert (integer_type_node,
864 OMP_CLAUSE_PRIORITY_EXPR (priority));
865 else
866 priority = integer_zero_node;
868 gsi = gsi_last_nondebug_bb (bb);
870 detach = (detach
871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 : null_pointer_node);
874 tree t = gimple_omp_task_data_arg (entry_stmt);
875 if (t == NULL)
876 t2 = null_pointer_node;
877 else
878 t2 = build_fold_addr_expr_loc (loc, t);
879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880 t = gimple_omp_task_copy_fn (entry_stmt);
881 if (t == NULL)
882 t3 = null_pointer_node;
883 else
884 t3 = build_fold_addr_expr_loc (loc, t);
886 if (taskloop_p)
887 t = build_call_expr (ull
888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 11, t1, t2, t3,
891 gimple_omp_task_arg_size (entry_stmt),
892 gimple_omp_task_arg_align (entry_stmt), flags,
893 num_tasks, priority, startvar, endvar, step);
894 else
895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 10, t1, t2, t3,
897 gimple_omp_task_arg_size (entry_stmt),
898 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 depend, priority, detach);
901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 false, GSI_CONTINUE_LINKING);
905 /* Build the function call to GOMP_taskwait_depend to actually
906 generate the taskwait operation. BB is the block where to insert the
907 code. */
909 static void
910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912 tree clauses = gimple_omp_task_clauses (entry_stmt);
913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914 if (depend == NULL_TREE)
915 return;
917 depend = OMP_CLAUSE_DECL (depend);
919 bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE;
920 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
921 enum built_in_function f = (nowait
922 ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
923 : BUILT_IN_GOMP_TASKWAIT_DEPEND);
924 tree t = build_call_expr (builtin_decl_explicit (f), 1, depend);
926 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
927 false, GSI_CONTINUE_LINKING);
930 /* Build the function call to GOMP_teams_reg to actually
931 generate the host teams operation. REGION is the teams region
932 being expanded. BB is the block where to insert the code. */
934 static void
935 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
937 tree clauses = gimple_omp_teams_clauses (entry_stmt);
938 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
939 if (num_teams == NULL_TREE)
940 num_teams = build_int_cst (unsigned_type_node, 0);
941 else
943 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
944 num_teams = fold_convert (unsigned_type_node, num_teams);
946 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
947 if (thread_limit == NULL_TREE)
948 thread_limit = build_int_cst (unsigned_type_node, 0);
949 else
951 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
952 thread_limit = fold_convert (unsigned_type_node, thread_limit);
955 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
956 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
957 if (t == NULL)
958 t1 = null_pointer_node;
959 else
960 t1 = build_fold_addr_expr (t);
961 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
962 tree t2 = build_fold_addr_expr (child_fndecl);
964 vec<tree, va_gc> *args;
965 vec_alloc (args, 5);
966 args->quick_push (t2);
967 args->quick_push (t1);
968 args->quick_push (num_teams);
969 args->quick_push (thread_limit);
970 /* For future extensibility. */
971 args->quick_push (build_zero_cst (unsigned_type_node));
973 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
974 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
975 args);
977 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
978 false, GSI_CONTINUE_LINKING);
981 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
983 static tree
984 vec2chain (vec<tree, va_gc> *v)
986 tree chain = NULL_TREE, t;
987 unsigned ix;
989 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
991 DECL_CHAIN (t) = chain;
992 chain = t;
995 return chain;
998 /* Remove barriers in REGION->EXIT's block. Note that this is only
999 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1000 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1001 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1002 removed. */
1004 static void
1005 remove_exit_barrier (struct omp_region *region)
1007 gimple_stmt_iterator gsi;
1008 basic_block exit_bb;
1009 edge_iterator ei;
1010 edge e;
1011 gimple *stmt;
1012 int any_addressable_vars = -1;
1014 exit_bb = region->exit;
1016 /* If the parallel region doesn't return, we don't have REGION->EXIT
1017 block at all. */
1018 if (! exit_bb)
1019 return;
1021 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1022 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1023 statements that can appear in between are extremely limited -- no
1024 memory operations at all. Here, we allow nothing at all, so the
1025 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1026 gsi = gsi_last_nondebug_bb (exit_bb);
1027 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1028 gsi_prev_nondebug (&gsi);
1029 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1030 return;
1032 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1034 gsi = gsi_last_nondebug_bb (e->src);
1035 if (gsi_end_p (gsi))
1036 continue;
1037 stmt = gsi_stmt (gsi);
1038 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1039 && !gimple_omp_return_nowait_p (stmt))
1041 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1042 in many cases. If there could be tasks queued, the barrier
1043 might be needed to let the tasks run before some local
1044 variable of the parallel that the task uses as shared
1045 runs out of scope. The task can be spawned either
1046 from within current function (this would be easy to check)
1047 or from some function it calls and gets passed an address
1048 of such a variable. */
1049 if (any_addressable_vars < 0)
1051 gomp_parallel *parallel_stmt
1052 = as_a <gomp_parallel *> (last_stmt (region->entry));
1053 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1054 tree local_decls, block, decl;
1055 unsigned ix;
1057 any_addressable_vars = 0;
1058 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1059 if (TREE_ADDRESSABLE (decl))
1061 any_addressable_vars = 1;
1062 break;
1064 for (block = gimple_block (stmt);
1065 !any_addressable_vars
1066 && block
1067 && TREE_CODE (block) == BLOCK;
1068 block = BLOCK_SUPERCONTEXT (block))
1070 for (local_decls = BLOCK_VARS (block);
1071 local_decls;
1072 local_decls = DECL_CHAIN (local_decls))
1073 if (TREE_ADDRESSABLE (local_decls))
1075 any_addressable_vars = 1;
1076 break;
1078 if (block == gimple_block (parallel_stmt))
1079 break;
1082 if (!any_addressable_vars)
1083 gimple_omp_return_set_nowait (stmt);
1088 static void
1089 remove_exit_barriers (struct omp_region *region)
1091 if (region->type == GIMPLE_OMP_PARALLEL)
1092 remove_exit_barrier (region);
1094 if (region->inner)
1096 region = region->inner;
1097 remove_exit_barriers (region);
1098 while (region->next)
1100 region = region->next;
1101 remove_exit_barriers (region);
1106 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1107 calls. These can't be declared as const functions, but
1108 within one parallel body they are constant, so they can be
1109 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1110 which are declared const. Similarly for task body, except
1111 that in untied task omp_get_thread_num () can change at any task
1112 scheduling point. */
1114 static void
1115 optimize_omp_library_calls (gimple *entry_stmt)
1117 basic_block bb;
1118 gimple_stmt_iterator gsi;
1119 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1120 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1121 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1122 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1123 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1124 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1125 OMP_CLAUSE_UNTIED) != NULL);
1127 FOR_EACH_BB_FN (bb, cfun)
1128 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1130 gimple *call = gsi_stmt (gsi);
1131 tree decl;
1133 if (is_gimple_call (call)
1134 && (decl = gimple_call_fndecl (call))
1135 && DECL_EXTERNAL (decl)
1136 && TREE_PUBLIC (decl)
1137 && DECL_INITIAL (decl) == NULL)
1139 tree built_in;
1141 if (DECL_NAME (decl) == thr_num_id)
1143 /* In #pragma omp task untied omp_get_thread_num () can change
1144 during the execution of the task region. */
1145 if (untied_task)
1146 continue;
1147 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1149 else if (DECL_NAME (decl) == num_thr_id)
1150 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1151 else
1152 continue;
1154 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1155 || gimple_call_num_args (call) != 0)
1156 continue;
1158 if (flag_exceptions && !TREE_NOTHROW (decl))
1159 continue;
1161 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1162 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1163 TREE_TYPE (TREE_TYPE (built_in))))
1164 continue;
1166 gimple_call_set_fndecl (call, built_in);
1171 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1172 regimplified. */
1174 static tree
1175 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1177 tree t = *tp;
1179 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1180 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1181 return t;
1183 if (TREE_CODE (t) == ADDR_EXPR)
1184 recompute_tree_invariant_for_addr_expr (t);
1186 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1187 return NULL_TREE;
1190 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1192 static void
1193 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1194 bool after)
1196 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1197 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1198 !after, after ? GSI_CONTINUE_LINKING
1199 : GSI_SAME_STMT);
1200 gimple *stmt = gimple_build_assign (to, from);
1201 if (after)
1202 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1203 else
1204 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1205 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1206 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1208 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1209 gimple_regimplify_operands (stmt, &gsi);
1213 /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1215 static gcond *
1216 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1217 tree lhs, tree rhs, bool after = false)
1219 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1220 if (after)
1221 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1222 else
1223 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1224 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 NULL, NULL)
1226 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1227 NULL, NULL))
1229 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1230 gimple_regimplify_operands (cond_stmt, &gsi);
1232 return cond_stmt;
1235 /* Expand the OpenMP parallel or task directive starting at REGION. */
1237 static void
1238 expand_omp_taskreg (struct omp_region *region)
1240 basic_block entry_bb, exit_bb, new_bb;
1241 struct function *child_cfun;
1242 tree child_fn, block, t;
1243 gimple_stmt_iterator gsi;
1244 gimple *entry_stmt, *stmt;
1245 edge e;
1246 vec<tree, va_gc> *ws_args;
1248 entry_stmt = last_stmt (region->entry);
1249 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1250 && gimple_omp_task_taskwait_p (entry_stmt))
1252 new_bb = region->entry;
1253 gsi = gsi_last_nondebug_bb (region->entry);
1254 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1255 gsi_remove (&gsi, true);
1256 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1257 return;
1260 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1261 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1263 entry_bb = region->entry;
1264 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1265 exit_bb = region->cont;
1266 else
1267 exit_bb = region->exit;
1269 if (is_combined_parallel (region))
1270 ws_args = region->ws_args;
1271 else
1272 ws_args = NULL;
1274 if (child_cfun->cfg)
1276 /* Due to inlining, it may happen that we have already outlined
1277 the region, in which case all we need to do is make the
1278 sub-graph unreachable and emit the parallel call. */
1279 edge entry_succ_e, exit_succ_e;
1281 entry_succ_e = single_succ_edge (entry_bb);
1283 gsi = gsi_last_nondebug_bb (entry_bb);
1284 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1285 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1287 gsi_remove (&gsi, true);
1289 new_bb = entry_bb;
1290 if (exit_bb)
1292 exit_succ_e = single_succ_edge (exit_bb);
1293 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1295 remove_edge_and_dominated_blocks (entry_succ_e);
1297 else
1299 unsigned srcidx, dstidx, num;
1301 /* If the parallel region needs data sent from the parent
1302 function, then the very first statement (except possible
1303 tree profile counter updates) of the parallel body
1304 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1305 &.OMP_DATA_O is passed as an argument to the child function,
1306 we need to replace it with the argument as seen by the child
1307 function.
1309 In most cases, this will end up being the identity assignment
1310 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1311 a function call that has been inlined, the original PARM_DECL
1312 .OMP_DATA_I may have been converted into a different local
1313 variable. In which case, we need to keep the assignment. */
1314 if (gimple_omp_taskreg_data_arg (entry_stmt))
1316 basic_block entry_succ_bb
1317 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1318 : FALLTHRU_EDGE (entry_bb)->dest;
1319 tree arg;
1320 gimple *parcopy_stmt = NULL;
1322 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1324 gimple *stmt;
1326 gcc_assert (!gsi_end_p (gsi));
1327 stmt = gsi_stmt (gsi);
1328 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1329 continue;
1331 if (gimple_num_ops (stmt) == 2)
1333 tree arg = gimple_assign_rhs1 (stmt);
1335 /* We're ignore the subcode because we're
1336 effectively doing a STRIP_NOPS. */
1338 if (TREE_CODE (arg) == ADDR_EXPR
1339 && (TREE_OPERAND (arg, 0)
1340 == gimple_omp_taskreg_data_arg (entry_stmt)))
1342 parcopy_stmt = stmt;
1343 break;
1348 gcc_assert (parcopy_stmt != NULL);
1349 arg = DECL_ARGUMENTS (child_fn);
1351 if (!gimple_in_ssa_p (cfun))
1353 if (gimple_assign_lhs (parcopy_stmt) == arg)
1354 gsi_remove (&gsi, true);
1355 else
1357 /* ?? Is setting the subcode really necessary ?? */
1358 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1359 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1362 else
1364 tree lhs = gimple_assign_lhs (parcopy_stmt);
1365 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1366 /* We'd like to set the rhs to the default def in the child_fn,
1367 but it's too early to create ssa names in the child_fn.
1368 Instead, we set the rhs to the parm. In
1369 move_sese_region_to_fn, we introduce a default def for the
1370 parm, map the parm to it's default def, and once we encounter
1371 this stmt, replace the parm with the default def. */
1372 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1373 update_stmt (parcopy_stmt);
1377 /* Declare local variables needed in CHILD_CFUN. */
1378 block = DECL_INITIAL (child_fn);
1379 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1380 /* The gimplifier could record temporaries in parallel/task block
1381 rather than in containing function's local_decls chain,
1382 which would mean cgraph missed finalizing them. Do it now. */
1383 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1384 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1385 varpool_node::finalize_decl (t);
1386 DECL_SAVED_TREE (child_fn) = NULL;
1387 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1388 gimple_set_body (child_fn, NULL);
1389 TREE_USED (block) = 1;
1391 /* Reset DECL_CONTEXT on function arguments. */
1392 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1393 DECL_CONTEXT (t) = child_fn;
1395 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1396 so that it can be moved to the child function. */
1397 gsi = gsi_last_nondebug_bb (entry_bb);
1398 stmt = gsi_stmt (gsi);
1399 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1400 || gimple_code (stmt) == GIMPLE_OMP_TASK
1401 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1402 e = split_block (entry_bb, stmt);
1403 gsi_remove (&gsi, true);
1404 entry_bb = e->dest;
1405 edge e2 = NULL;
1406 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1407 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1408 else
1410 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1411 gcc_assert (e2->dest == region->exit);
1412 remove_edge (BRANCH_EDGE (entry_bb));
1413 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1414 gsi = gsi_last_nondebug_bb (region->exit);
1415 gcc_assert (!gsi_end_p (gsi)
1416 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1417 gsi_remove (&gsi, true);
1420 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1421 if (exit_bb)
1423 gsi = gsi_last_nondebug_bb (exit_bb);
1424 gcc_assert (!gsi_end_p (gsi)
1425 && (gimple_code (gsi_stmt (gsi))
1426 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1427 stmt = gimple_build_return (NULL);
1428 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1429 gsi_remove (&gsi, true);
1432 /* Move the parallel region into CHILD_CFUN. */
1434 if (gimple_in_ssa_p (cfun))
1436 init_tree_ssa (child_cfun);
1437 init_ssa_operands (child_cfun);
1438 child_cfun->gimple_df->in_ssa_p = true;
1439 block = NULL_TREE;
1441 else
1442 block = gimple_block (entry_stmt);
1444 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1445 if (exit_bb)
1446 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1447 if (e2)
1449 basic_block dest_bb = e2->dest;
1450 if (!exit_bb)
1451 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1452 remove_edge (e2);
1453 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1455 /* When the OMP expansion process cannot guarantee an up-to-date
1456 loop tree arrange for the child function to fixup loops. */
1457 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1458 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1460 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1461 num = vec_safe_length (child_cfun->local_decls);
1462 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1464 t = (*child_cfun->local_decls)[srcidx];
1465 if (DECL_CONTEXT (t) == cfun->decl)
1466 continue;
1467 if (srcidx != dstidx)
1468 (*child_cfun->local_decls)[dstidx] = t;
1469 dstidx++;
1471 if (dstidx != num)
1472 vec_safe_truncate (child_cfun->local_decls, dstidx);
1474 /* Inform the callgraph about the new function. */
1475 child_cfun->curr_properties = cfun->curr_properties;
1476 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1477 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1478 cgraph_node *node = cgraph_node::get_create (child_fn);
1479 node->parallelized_function = 1;
1480 cgraph_node::add_new_function (child_fn, true);
1482 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1483 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1485 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1486 fixed in a following pass. */
1487 push_cfun (child_cfun);
1488 if (need_asm)
1489 assign_assembler_name_if_needed (child_fn);
1491 if (optimize)
1492 optimize_omp_library_calls (entry_stmt);
1493 update_max_bb_count ();
1494 cgraph_edge::rebuild_edges ();
1496 /* Some EH regions might become dead, see PR34608. If
1497 pass_cleanup_cfg isn't the first pass to happen with the
1498 new child, these dead EH edges might cause problems.
1499 Clean them up now. */
1500 if (flag_exceptions)
1502 basic_block bb;
1503 bool changed = false;
1505 FOR_EACH_BB_FN (bb, cfun)
1506 changed |= gimple_purge_dead_eh_edges (bb);
1507 if (changed)
1508 cleanup_tree_cfg ();
1510 if (gimple_in_ssa_p (cfun))
1511 update_ssa (TODO_update_ssa);
1512 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1513 verify_loop_structure ();
1514 pop_cfun ();
1516 if (dump_file && !gimple_in_ssa_p (cfun))
1518 omp_any_child_fn_dumped = true;
1519 dump_function_header (dump_file, child_fn, dump_flags);
1520 dump_function_to_file (child_fn, dump_file, dump_flags);
1524 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1526 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1527 expand_parallel_call (region, new_bb,
1528 as_a <gomp_parallel *> (entry_stmt), ws_args);
1529 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1530 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1531 else
1532 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1533 if (gimple_in_ssa_p (cfun))
1534 update_ssa (TODO_update_ssa_only_virtuals);
1537 /* Information about members of an OpenACC collapsed loop nest. */
1539 struct oacc_collapse
1541 tree base; /* Base value. */
1542 tree iters; /* Number of steps. */
1543 tree step; /* Step size. */
1544 tree tile; /* Tile increment (if tiled). */
1545 tree outer; /* Tile iterator var. */
1548 /* Helper for expand_oacc_for. Determine collapsed loop information.
1549 Fill in COUNTS array. Emit any initialization code before GSI.
1550 Return the calculated outer loop bound of BOUND_TYPE. */
1552 static tree
1553 expand_oacc_collapse_init (const struct omp_for_data *fd,
1554 gimple_stmt_iterator *gsi,
1555 oacc_collapse *counts, tree diff_type,
1556 tree bound_type, location_t loc)
1558 tree tiling = fd->tiling;
1559 tree total = build_int_cst (bound_type, 1);
1560 int ix;
1562 gcc_assert (integer_onep (fd->loop.step));
1563 gcc_assert (integer_zerop (fd->loop.n1));
1565 /* When tiling, the first operand of the tile clause applies to the
1566 innermost loop, and we work outwards from there. Seems
1567 backwards, but whatever. */
1568 for (ix = fd->collapse; ix--;)
1570 const omp_for_data_loop *loop = &fd->loops[ix];
1572 tree iter_type = TREE_TYPE (loop->v);
1573 tree plus_type = iter_type;
1575 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1577 if (POINTER_TYPE_P (iter_type))
1578 plus_type = sizetype;
1580 if (tiling)
1582 tree num = build_int_cst (integer_type_node, fd->collapse);
1583 tree loop_no = build_int_cst (integer_type_node, ix);
1584 tree tile = TREE_VALUE (tiling);
1585 gcall *call
1586 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1587 /* gwv-outer=*/integer_zero_node,
1588 /* gwv-inner=*/integer_zero_node);
1590 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1591 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1592 gimple_call_set_lhs (call, counts[ix].tile);
1593 gimple_set_location (call, loc);
1594 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1596 tiling = TREE_CHAIN (tiling);
1598 else
1600 counts[ix].tile = NULL;
1601 counts[ix].outer = loop->v;
1604 tree b = loop->n1;
1605 tree e = loop->n2;
1606 tree s = loop->step;
1607 bool up = loop->cond_code == LT_EXPR;
1608 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1609 bool negating;
1610 tree expr;
1612 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1615 true, GSI_SAME_STMT);
1617 /* Convert the step, avoiding possible unsigned->signed overflow. */
1618 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1619 if (negating)
1620 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1621 s = fold_convert (diff_type, s);
1622 if (negating)
1623 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1624 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1625 true, GSI_SAME_STMT);
1627 /* Determine the range, avoiding possible unsigned->signed overflow. */
1628 negating = !up && TYPE_UNSIGNED (iter_type);
1629 expr = fold_build2 (MINUS_EXPR, plus_type,
1630 fold_convert (plus_type, negating ? b : e),
1631 fold_convert (plus_type, negating ? e : b));
1632 expr = fold_convert (diff_type, expr);
1633 if (negating)
1634 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1635 tree range = force_gimple_operand_gsi
1636 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1638 /* Determine number of iterations. */
1639 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1640 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1641 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1643 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1644 true, GSI_SAME_STMT);
1646 counts[ix].base = b;
1647 counts[ix].iters = iters;
1648 counts[ix].step = s;
1650 total = fold_build2 (MULT_EXPR, bound_type, total,
1651 fold_convert (bound_type, iters));
1654 return total;
1657 /* Emit initializers for collapsed loop members. INNER is true if
1658 this is for the element loop of a TILE. IVAR is the outer
1659 loop iteration variable, from which collapsed loop iteration values
1660 are calculated. COUNTS array has been initialized by
1661 expand_oacc_collapse_inits. */
1663 static void
1664 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1665 gimple_stmt_iterator *gsi,
1666 const oacc_collapse *counts, tree ivar,
1667 tree diff_type)
1669 tree ivar_type = TREE_TYPE (ivar);
1671 /* The most rapidly changing iteration variable is the innermost
1672 one. */
1673 for (int ix = fd->collapse; ix--;)
1675 const omp_for_data_loop *loop = &fd->loops[ix];
1676 const oacc_collapse *collapse = &counts[ix];
1677 tree v = inner ? loop->v : collapse->outer;
1678 tree iter_type = TREE_TYPE (v);
1679 tree plus_type = iter_type;
1680 enum tree_code plus_code = PLUS_EXPR;
1681 tree expr;
1683 if (POINTER_TYPE_P (iter_type))
1685 plus_code = POINTER_PLUS_EXPR;
1686 plus_type = sizetype;
1689 expr = ivar;
1690 if (ix)
1692 tree mod = fold_convert (ivar_type, collapse->iters);
1693 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1694 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1695 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1696 true, GSI_SAME_STMT);
1699 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1700 fold_convert (diff_type, collapse->step));
1701 expr = fold_build2 (plus_code, iter_type,
1702 inner ? collapse->outer : collapse->base,
1703 fold_convert (plus_type, expr));
1704 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1705 true, GSI_SAME_STMT);
1706 gassign *ass = gimple_build_assign (v, expr);
1707 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1711 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1712 of the combined collapse > 1 loop constructs, generate code like:
1713 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1714 if (cond3 is <)
1715 adj = STEP3 - 1;
1716 else
1717 adj = STEP3 + 1;
1718 count3 = (adj + N32 - N31) / STEP3;
1719 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1720 if (cond2 is <)
1721 adj = STEP2 - 1;
1722 else
1723 adj = STEP2 + 1;
1724 count2 = (adj + N22 - N21) / STEP2;
1725 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1726 if (cond1 is <)
1727 adj = STEP1 - 1;
1728 else
1729 adj = STEP1 + 1;
1730 count1 = (adj + N12 - N11) / STEP1;
1731 count = count1 * count2 * count3;
1732 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1733 count = 0;
1734 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1735 of the combined loop constructs, just initialize COUNTS array
1736 from the _looptemp_ clauses. For loop nests with non-rectangular
1737 loops, do this only for the rectangular loops. Then pick
1738 the loops which reference outer vars in their bound expressions
1739 and the loops which they refer to and for this sub-nest compute
1740 number of iterations. For triangular loops use Faulhaber's formula,
1741 otherwise as a fallback, compute by iterating the loops.
1742 If e.g. the sub-nest is
1743 for (I = N11; I COND1 N12; I += STEP1)
1744 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1745 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1747 COUNT = 0;
1748 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1749 for (tmpj = M21 * tmpi + N21;
1750 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1752 int tmpk1 = M31 * tmpj + N31;
1753 int tmpk2 = M32 * tmpj + N32;
1754 if (tmpk1 COND3 tmpk2)
1756 if (COND3 is <)
1757 adj = STEP3 - 1;
1758 else
1759 adj = STEP3 + 1;
1760 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1763 and finally multiply the counts of the rectangular loops not
1764 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1765 store number of iterations of the loops from fd->first_nonrect
1766 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1767 by the counts of rectangular loops not referenced in any non-rectangular
1768 loops sandwitched in between those. */
1770 /* NOTE: It *could* be better to moosh all of the BBs together,
1771 creating one larger BB with all the computation and the unexpected
1772 jump at the end. I.e.
1774 bool zero3, zero2, zero1, zero;
1776 zero3 = N32 c3 N31;
1777 count3 = (N32 - N31) /[cl] STEP3;
1778 zero2 = N22 c2 N21;
1779 count2 = (N22 - N21) /[cl] STEP2;
1780 zero1 = N12 c1 N11;
1781 count1 = (N12 - N11) /[cl] STEP1;
1782 zero = zero3 || zero2 || zero1;
1783 count = count1 * count2 * count3;
1784 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1786 After all, we expect the zero=false, and thus we expect to have to
1787 evaluate all of the comparison expressions, so short-circuiting
1788 oughtn't be a win. Since the condition isn't protecting a
1789 denominator, we're not concerned about divide-by-zero, so we can
1790 fully evaluate count even if a numerator turned out to be wrong.
1792 It seems like putting this all together would create much better
1793 scheduling opportunities, and less pressure on the chip's branch
1794 predictor. */
1796 static void
1797 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1798 basic_block &entry_bb, tree *counts,
1799 basic_block &zero_iter1_bb, int &first_zero_iter1,
1800 basic_block &zero_iter2_bb, int &first_zero_iter2,
1801 basic_block &l2_dom_bb)
1803 tree t, type = TREE_TYPE (fd->loop.v);
1804 edge e, ne;
1805 int i;
1807 /* Collapsed loops need work for expansion into SSA form. */
1808 gcc_assert (!gimple_in_ssa_p (cfun));
1810 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1811 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1813 gcc_assert (fd->ordered == 0);
1814 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1815 isn't supposed to be handled, as the inner loop doesn't
1816 use it. */
1817 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1818 OMP_CLAUSE__LOOPTEMP_);
1819 gcc_assert (innerc);
1820 for (i = 0; i < fd->collapse; i++)
1822 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1823 OMP_CLAUSE__LOOPTEMP_);
1824 gcc_assert (innerc);
1825 if (i)
1826 counts[i] = OMP_CLAUSE_DECL (innerc);
1827 else
1828 counts[0] = NULL_TREE;
1830 if (fd->non_rect
1831 && fd->last_nonrect == fd->first_nonrect + 1
1832 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1834 tree c[4];
1835 for (i = 0; i < 4; i++)
1837 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1838 OMP_CLAUSE__LOOPTEMP_);
1839 gcc_assert (innerc);
1840 c[i] = OMP_CLAUSE_DECL (innerc);
1842 counts[0] = c[0];
1843 fd->first_inner_iterations = c[1];
1844 fd->factor = c[2];
1845 fd->adjn1 = c[3];
1847 return;
1850 for (i = fd->collapse; i < fd->ordered; i++)
1852 tree itype = TREE_TYPE (fd->loops[i].v);
1853 counts[i] = NULL_TREE;
1854 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1855 fold_convert (itype, fd->loops[i].n1),
1856 fold_convert (itype, fd->loops[i].n2));
1857 if (t && integer_zerop (t))
1859 for (i = fd->collapse; i < fd->ordered; i++)
1860 counts[i] = build_int_cst (type, 0);
1861 break;
1864 bool rect_count_seen = false;
1865 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1867 tree itype = TREE_TYPE (fd->loops[i].v);
1869 if (i >= fd->collapse && counts[i])
1870 continue;
1871 if (fd->non_rect)
1873 /* Skip loops that use outer iterators in their expressions
1874 during this phase. */
1875 if (fd->loops[i].m1 || fd->loops[i].m2)
1877 counts[i] = build_zero_cst (type);
1878 continue;
1881 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1882 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1883 fold_convert (itype, fd->loops[i].n1),
1884 fold_convert (itype, fd->loops[i].n2)))
1885 == NULL_TREE || !integer_onep (t)))
1887 gcond *cond_stmt;
1888 tree n1, n2;
1889 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1890 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1891 true, GSI_SAME_STMT);
1892 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1893 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1894 true, GSI_SAME_STMT);
1895 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1896 n1, n2);
1897 e = split_block (entry_bb, cond_stmt);
1898 basic_block &zero_iter_bb
1899 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1900 int &first_zero_iter
1901 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1902 if (zero_iter_bb == NULL)
1904 gassign *assign_stmt;
1905 first_zero_iter = i;
1906 zero_iter_bb = create_empty_bb (entry_bb);
1907 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1908 *gsi = gsi_after_labels (zero_iter_bb);
1909 if (i < fd->collapse)
1910 assign_stmt = gimple_build_assign (fd->loop.n2,
1911 build_zero_cst (type));
1912 else
1914 counts[i] = create_tmp_reg (type, ".count");
1915 assign_stmt
1916 = gimple_build_assign (counts[i], build_zero_cst (type));
1918 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1919 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1920 entry_bb);
1922 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1923 ne->probability = profile_probability::very_unlikely ();
1924 e->flags = EDGE_TRUE_VALUE;
1925 e->probability = ne->probability.invert ();
1926 if (l2_dom_bb == NULL)
1927 l2_dom_bb = entry_bb;
1928 entry_bb = e->dest;
1929 *gsi = gsi_last_nondebug_bb (entry_bb);
1932 if (POINTER_TYPE_P (itype))
1933 itype = signed_type_for (itype);
1934 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1935 ? -1 : 1));
1936 t = fold_build2 (PLUS_EXPR, itype,
1937 fold_convert (itype, fd->loops[i].step), t);
1938 t = fold_build2 (PLUS_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].n2));
1940 t = fold_build2 (MINUS_EXPR, itype, t,
1941 fold_convert (itype, fd->loops[i].n1));
1942 /* ?? We could probably use CEIL_DIV_EXPR instead of
1943 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1944 generate the same code in the end because generically we
1945 don't know that the values involved must be negative for
1946 GT?? */
1947 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1948 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1949 fold_build1 (NEGATE_EXPR, itype, t),
1950 fold_build1 (NEGATE_EXPR, itype,
1951 fold_convert (itype,
1952 fd->loops[i].step)));
1953 else
1954 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1955 fold_convert (itype, fd->loops[i].step));
1956 t = fold_convert (type, t);
1957 if (TREE_CODE (t) == INTEGER_CST)
1958 counts[i] = t;
1959 else
1961 if (i < fd->collapse || i != first_zero_iter2)
1962 counts[i] = create_tmp_reg (type, ".count");
1963 expand_omp_build_assign (gsi, counts[i], t);
1965 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1967 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1968 continue;
1969 if (!rect_count_seen)
1971 t = counts[i];
1972 rect_count_seen = true;
1974 else
1975 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1976 expand_omp_build_assign (gsi, fd->loop.n2, t);
1979 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1981 gcc_assert (fd->last_nonrect != -1);
1983 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1984 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1985 build_zero_cst (type));
1986 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1987 if (fd->loops[i].m1
1988 || fd->loops[i].m2
1989 || fd->loops[i].non_rect_referenced)
1990 break;
1991 if (i == fd->last_nonrect
1992 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1993 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1994 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1996 int o = fd->first_nonrect;
1997 tree itype = TREE_TYPE (fd->loops[o].v);
1998 tree n1o = create_tmp_reg (itype, ".n1o");
1999 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
2000 expand_omp_build_assign (gsi, n1o, t);
2001 tree n2o = create_tmp_reg (itype, ".n2o");
2002 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2003 expand_omp_build_assign (gsi, n2o, t);
2004 if (fd->loops[i].m1 && fd->loops[i].m2)
2005 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2006 unshare_expr (fd->loops[i].m1));
2007 else if (fd->loops[i].m1)
2008 t = fold_unary (NEGATE_EXPR, itype,
2009 unshare_expr (fd->loops[i].m1));
2010 else
2011 t = unshare_expr (fd->loops[i].m2);
2012 tree m2minusm1
2013 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2014 true, GSI_SAME_STMT);
2016 gimple_stmt_iterator gsi2 = *gsi;
2017 gsi_prev (&gsi2);
2018 e = split_block (entry_bb, gsi_stmt (gsi2));
2019 e = split_block (e->dest, (gimple *) NULL);
2020 basic_block bb1 = e->src;
2021 entry_bb = e->dest;
2022 *gsi = gsi_after_labels (entry_bb);
2024 gsi2 = gsi_after_labels (bb1);
2025 tree ostep = fold_convert (itype, fd->loops[o].step);
2026 t = build_int_cst (itype, (fd->loops[o].cond_code
2027 == LT_EXPR ? -1 : 1));
2028 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2029 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2030 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2031 if (TYPE_UNSIGNED (itype)
2032 && fd->loops[o].cond_code == GT_EXPR)
2033 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2034 fold_build1 (NEGATE_EXPR, itype, t),
2035 fold_build1 (NEGATE_EXPR, itype, ostep));
2036 else
2037 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2038 tree outer_niters
2039 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2040 true, GSI_SAME_STMT);
2041 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2042 build_one_cst (itype));
2043 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2044 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2045 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2046 true, GSI_SAME_STMT);
2047 tree n1, n2, n1e, n2e;
2048 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2049 if (fd->loops[i].m1)
2051 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2052 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2053 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2055 else
2056 n1 = t;
2057 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2058 true, GSI_SAME_STMT);
2059 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2060 if (fd->loops[i].m2)
2062 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2063 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2064 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2066 else
2067 n2 = t;
2068 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2069 true, GSI_SAME_STMT);
2070 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2071 if (fd->loops[i].m1)
2073 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2074 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2075 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2077 else
2078 n1e = t;
2079 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2080 true, GSI_SAME_STMT);
2081 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2082 if (fd->loops[i].m2)
2084 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2085 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2086 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2088 else
2089 n2e = t;
2090 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2091 true, GSI_SAME_STMT);
2092 gcond *cond_stmt
2093 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2094 n1, n2);
2095 e = split_block (bb1, cond_stmt);
2096 e->flags = EDGE_TRUE_VALUE;
2097 e->probability = profile_probability::likely ().guessed ();
2098 basic_block bb2 = e->dest;
2099 gsi2 = gsi_after_labels (bb2);
2101 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2102 n1e, n2e);
2103 e = split_block (bb2, cond_stmt);
2104 e->flags = EDGE_TRUE_VALUE;
2105 e->probability = profile_probability::likely ().guessed ();
2106 gsi2 = gsi_after_labels (e->dest);
2108 tree step = fold_convert (itype, fd->loops[i].step);
2109 t = build_int_cst (itype, (fd->loops[i].cond_code
2110 == LT_EXPR ? -1 : 1));
2111 t = fold_build2 (PLUS_EXPR, itype, step, t);
2112 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2113 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2114 if (TYPE_UNSIGNED (itype)
2115 && fd->loops[i].cond_code == GT_EXPR)
2116 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2117 fold_build1 (NEGATE_EXPR, itype, t),
2118 fold_build1 (NEGATE_EXPR, itype, step));
2119 else
2120 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2121 tree first_inner_iterations
2122 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2123 true, GSI_SAME_STMT);
2124 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2125 if (TYPE_UNSIGNED (itype)
2126 && fd->loops[i].cond_code == GT_EXPR)
2127 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2128 fold_build1 (NEGATE_EXPR, itype, t),
2129 fold_build1 (NEGATE_EXPR, itype, step));
2130 else
2131 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2132 tree factor
2133 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2134 true, GSI_SAME_STMT);
2135 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2136 build_one_cst (itype));
2137 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2138 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2139 t = fold_build2 (MULT_EXPR, itype, factor, t);
2140 t = fold_build2 (PLUS_EXPR, itype,
2141 fold_build2 (MULT_EXPR, itype, outer_niters,
2142 first_inner_iterations), t);
2143 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2144 fold_convert (type, t));
2146 basic_block bb3 = create_empty_bb (bb1);
2147 add_bb_to_loop (bb3, bb1->loop_father);
2149 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2150 e->probability = profile_probability::unlikely ().guessed ();
2152 gsi2 = gsi_after_labels (bb3);
2153 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2154 n1e, n2e);
2155 e = split_block (bb3, cond_stmt);
2156 e->flags = EDGE_TRUE_VALUE;
2157 e->probability = profile_probability::likely ().guessed ();
2158 basic_block bb4 = e->dest;
2160 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2161 ne->probability = e->probability.invert ();
2163 basic_block bb5 = create_empty_bb (bb2);
2164 add_bb_to_loop (bb5, bb2->loop_father);
2166 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2167 ne->probability = profile_probability::unlikely ().guessed ();
2169 for (int j = 0; j < 2; j++)
2171 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2172 t = fold_build2 (MINUS_EXPR, itype,
2173 unshare_expr (fd->loops[i].n1),
2174 unshare_expr (fd->loops[i].n2));
2175 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2176 tree tem
2177 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2178 true, GSI_SAME_STMT);
2179 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2180 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2181 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2182 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2183 true, GSI_SAME_STMT);
2184 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2185 if (fd->loops[i].m1)
2187 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2188 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2189 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2191 else
2192 n1 = t;
2193 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2194 true, GSI_SAME_STMT);
2195 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2196 if (fd->loops[i].m2)
2198 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2199 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2200 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2202 else
2203 n2 = t;
2204 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2205 true, GSI_SAME_STMT);
2206 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2208 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2209 n1, n2);
2210 e = split_block (gsi_bb (gsi2), cond_stmt);
2211 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2212 e->probability = profile_probability::unlikely ().guessed ();
2213 ne = make_edge (e->src, bb1,
2214 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2215 ne->probability = e->probability.invert ();
2216 gsi2 = gsi_after_labels (e->dest);
2218 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2219 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2221 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2224 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2225 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2226 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2228 if (fd->first_nonrect + 1 == fd->last_nonrect)
2230 fd->first_inner_iterations = first_inner_iterations;
2231 fd->factor = factor;
2232 fd->adjn1 = n1o;
2235 else
2237 /* Fallback implementation. Evaluate the loops with m1/m2
2238 non-NULL as well as their outer loops at runtime using temporaries
2239 instead of the original iteration variables, and in the
2240 body just bump the counter. */
2241 gimple_stmt_iterator gsi2 = *gsi;
2242 gsi_prev (&gsi2);
2243 e = split_block (entry_bb, gsi_stmt (gsi2));
2244 e = split_block (e->dest, (gimple *) NULL);
2245 basic_block cur_bb = e->src;
2246 basic_block next_bb = e->dest;
2247 entry_bb = e->dest;
2248 *gsi = gsi_after_labels (entry_bb);
2250 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2251 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2253 for (i = 0; i <= fd->last_nonrect; i++)
2255 if (fd->loops[i].m1 == NULL_TREE
2256 && fd->loops[i].m2 == NULL_TREE
2257 && !fd->loops[i].non_rect_referenced)
2258 continue;
2260 tree itype = TREE_TYPE (fd->loops[i].v);
2262 gsi2 = gsi_after_labels (cur_bb);
2263 tree n1, n2;
2264 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2265 if (fd->loops[i].m1 == NULL_TREE)
2266 n1 = t;
2267 else if (POINTER_TYPE_P (itype))
2269 gcc_assert (integer_onep (fd->loops[i].m1));
2270 t = fold_convert (sizetype,
2271 unshare_expr (fd->loops[i].n1));
2272 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2274 else
2276 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2277 n1 = fold_build2 (MULT_EXPR, itype,
2278 vs[i - fd->loops[i].outer], n1);
2279 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2281 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2282 true, GSI_SAME_STMT);
2283 if (i < fd->last_nonrect)
2285 vs[i] = create_tmp_reg (itype, ".it");
2286 expand_omp_build_assign (&gsi2, vs[i], n1);
2288 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2289 if (fd->loops[i].m2 == NULL_TREE)
2290 n2 = t;
2291 else if (POINTER_TYPE_P (itype))
2293 gcc_assert (integer_onep (fd->loops[i].m2));
2294 t = fold_convert (sizetype,
2295 unshare_expr (fd->loops[i].n2));
2296 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2298 else
2300 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2301 n2 = fold_build2 (MULT_EXPR, itype,
2302 vs[i - fd->loops[i].outer], n2);
2303 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2305 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2306 true, GSI_SAME_STMT);
2307 if (POINTER_TYPE_P (itype))
2308 itype = signed_type_for (itype);
2309 if (i == fd->last_nonrect)
2311 gcond *cond_stmt
2312 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2313 n1, n2);
2314 e = split_block (cur_bb, cond_stmt);
2315 e->flags = EDGE_TRUE_VALUE;
2316 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2317 e->probability = profile_probability::likely ().guessed ();
2318 ne->probability = e->probability.invert ();
2319 gsi2 = gsi_after_labels (e->dest);
2321 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2322 ? -1 : 1));
2323 t = fold_build2 (PLUS_EXPR, itype,
2324 fold_convert (itype, fd->loops[i].step), t);
2325 t = fold_build2 (PLUS_EXPR, itype, t,
2326 fold_convert (itype, n2));
2327 t = fold_build2 (MINUS_EXPR, itype, t,
2328 fold_convert (itype, n1));
2329 tree step = fold_convert (itype, fd->loops[i].step);
2330 if (TYPE_UNSIGNED (itype)
2331 && fd->loops[i].cond_code == GT_EXPR)
2332 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2333 fold_build1 (NEGATE_EXPR, itype, t),
2334 fold_build1 (NEGATE_EXPR, itype, step));
2335 else
2336 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2337 t = fold_convert (type, t);
2338 t = fold_build2 (PLUS_EXPR, type,
2339 counts[fd->last_nonrect], t);
2340 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2341 true, GSI_SAME_STMT);
2342 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2343 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2344 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2345 break;
2347 e = split_block (cur_bb, last_stmt (cur_bb));
2349 basic_block new_cur_bb = create_empty_bb (cur_bb);
2350 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2352 gsi2 = gsi_after_labels (e->dest);
2353 tree step = fold_convert (itype,
2354 unshare_expr (fd->loops[i].step));
2355 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2356 t = fold_build_pointer_plus (vs[i],
2357 fold_convert (sizetype, step));
2358 else
2359 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2360 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2361 true, GSI_SAME_STMT);
2362 expand_omp_build_assign (&gsi2, vs[i], t);
2364 ne = split_block (e->dest, last_stmt (e->dest));
2365 gsi2 = gsi_after_labels (ne->dest);
2367 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2368 edge e3, e4;
2369 if (next_bb == entry_bb)
2371 e3 = find_edge (ne->dest, next_bb);
2372 e3->flags = EDGE_FALSE_VALUE;
2374 else
2375 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2376 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2377 e4->probability = profile_probability::likely ().guessed ();
2378 e3->probability = e4->probability.invert ();
2379 basic_block esrc = e->src;
2380 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2381 cur_bb = new_cur_bb;
2382 basic_block latch_bb = next_bb;
2383 next_bb = e->dest;
2384 remove_edge (e);
2385 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2386 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2387 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2390 t = NULL_TREE;
2391 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2392 if (!fd->loops[i].non_rect_referenced
2393 && fd->loops[i].m1 == NULL_TREE
2394 && fd->loops[i].m2 == NULL_TREE)
2396 if (t == NULL_TREE)
2397 t = counts[i];
2398 else
2399 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2401 if (t)
2403 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2404 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2406 if (!rect_count_seen)
2407 t = counts[fd->last_nonrect];
2408 else
2409 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2410 counts[fd->last_nonrect]);
2411 expand_omp_build_assign (gsi, fd->loop.n2, t);
2413 else if (fd->non_rect)
2415 tree t = fd->loop.n2;
2416 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2417 int non_rect_referenced = 0, non_rect = 0;
2418 for (i = 0; i < fd->collapse; i++)
2420 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2421 && !integer_zerop (counts[i]))
2422 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2423 if (fd->loops[i].non_rect_referenced)
2424 non_rect_referenced++;
2425 if (fd->loops[i].m1 || fd->loops[i].m2)
2426 non_rect++;
2428 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2429 counts[fd->last_nonrect] = t;
2433 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2434 T = V;
2435 V3 = N31 + (T % count3) * STEP3;
2436 T = T / count3;
2437 V2 = N21 + (T % count2) * STEP2;
2438 T = T / count2;
2439 V1 = N11 + T * STEP1;
2440 if this loop doesn't have an inner loop construct combined with it.
2441 If it does have an inner loop construct combined with it and the
2442 iteration count isn't known constant, store values from counts array
2443 into its _looptemp_ temporaries instead.
2444 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2445 inclusive), use the count of all those loops together, and either
2446 find quadratic etc. equation roots, or as a fallback, do:
2447 COUNT = 0;
2448 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2449 for (tmpj = M21 * tmpi + N21;
2450 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2452 int tmpk1 = M31 * tmpj + N31;
2453 int tmpk2 = M32 * tmpj + N32;
2454 if (tmpk1 COND3 tmpk2)
2456 if (COND3 is <)
2457 adj = STEP3 - 1;
2458 else
2459 adj = STEP3 + 1;
2460 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2461 if (COUNT + temp > T)
2463 V1 = tmpi;
2464 V2 = tmpj;
2465 V3 = tmpk1 + (T - COUNT) * STEP3;
2466 goto done;
2468 else
2469 COUNT += temp;
2472 done:;
2473 but for optional innermost or outermost rectangular loops that aren't
2474 referenced by other loop expressions keep doing the division/modulo. */
2476 static void
2477 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2478 tree *counts, tree *nonrect_bounds,
2479 gimple *inner_stmt, tree startvar)
2481 int i;
2482 if (gimple_omp_for_combined_p (fd->for_stmt))
2484 /* If fd->loop.n2 is constant, then no propagation of the counts
2485 is needed, they are constant. */
2486 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2487 return;
2489 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2490 ? gimple_omp_taskreg_clauses (inner_stmt)
2491 : gimple_omp_for_clauses (inner_stmt);
2492 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2493 isn't supposed to be handled, as the inner loop doesn't
2494 use it. */
2495 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2496 gcc_assert (innerc);
2497 int count = 0;
2498 if (fd->non_rect
2499 && fd->last_nonrect == fd->first_nonrect + 1
2500 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2501 count = 4;
2502 for (i = 0; i < fd->collapse + count; i++)
2504 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2505 OMP_CLAUSE__LOOPTEMP_);
2506 gcc_assert (innerc);
2507 if (i)
2509 tree tem = OMP_CLAUSE_DECL (innerc);
2510 tree t;
2511 if (i < fd->collapse)
2512 t = counts[i];
2513 else
2514 switch (i - fd->collapse)
2516 case 0: t = counts[0]; break;
2517 case 1: t = fd->first_inner_iterations; break;
2518 case 2: t = fd->factor; break;
2519 case 3: t = fd->adjn1; break;
2520 default: gcc_unreachable ();
2522 t = fold_convert (TREE_TYPE (tem), t);
2523 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2524 false, GSI_CONTINUE_LINKING);
2525 gassign *stmt = gimple_build_assign (tem, t);
2526 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2529 return;
2532 tree type = TREE_TYPE (fd->loop.v);
2533 tree tem = create_tmp_reg (type, ".tem");
2534 gassign *stmt = gimple_build_assign (tem, startvar);
2535 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2537 for (i = fd->collapse - 1; i >= 0; i--)
2539 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2540 itype = vtype;
2541 if (POINTER_TYPE_P (vtype))
2542 itype = signed_type_for (vtype);
2543 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2544 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2545 else
2546 t = tem;
2547 if (i == fd->last_nonrect)
2549 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2550 false, GSI_CONTINUE_LINKING);
2551 tree stopval = t;
2552 tree idx = create_tmp_reg (type, ".count");
2553 expand_omp_build_assign (gsi, idx,
2554 build_zero_cst (type), true);
2555 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2556 if (fd->first_nonrect + 1 == fd->last_nonrect
2557 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2558 || fd->first_inner_iterations)
2559 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2560 != CODE_FOR_nothing)
2561 && !integer_zerop (fd->loop.n2))
2563 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2564 tree itype = TREE_TYPE (fd->loops[i].v);
2565 tree first_inner_iterations = fd->first_inner_iterations;
2566 tree factor = fd->factor;
2567 gcond *cond_stmt
2568 = expand_omp_build_cond (gsi, NE_EXPR, factor,
2569 build_zero_cst (TREE_TYPE (factor)));
2570 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2571 basic_block bb0 = e->src;
2572 e->flags = EDGE_TRUE_VALUE;
2573 e->probability = profile_probability::likely ();
2574 bb_triang_dom = bb0;
2575 *gsi = gsi_after_labels (e->dest);
2576 tree slltype = long_long_integer_type_node;
2577 tree ulltype = long_long_unsigned_type_node;
2578 tree stopvalull = fold_convert (ulltype, stopval);
2579 stopvalull
2580 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2581 false, GSI_CONTINUE_LINKING);
2582 first_inner_iterations
2583 = fold_convert (slltype, first_inner_iterations);
2584 first_inner_iterations
2585 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2586 NULL_TREE, false,
2587 GSI_CONTINUE_LINKING);
2588 factor = fold_convert (slltype, factor);
2589 factor
2590 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2591 false, GSI_CONTINUE_LINKING);
2592 tree first_inner_iterationsd
2593 = fold_build1 (FLOAT_EXPR, double_type_node,
2594 first_inner_iterations);
2595 first_inner_iterationsd
2596 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2597 NULL_TREE, false,
2598 GSI_CONTINUE_LINKING);
2599 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2600 factor);
2601 factord = force_gimple_operand_gsi (gsi, factord, true,
2602 NULL_TREE, false,
2603 GSI_CONTINUE_LINKING);
2604 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2605 stopvalull);
2606 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2607 NULL_TREE, false,
2608 GSI_CONTINUE_LINKING);
2609 /* Temporarily disable flag_rounding_math, values will be
2610 decimal numbers divided by 2 and worst case imprecisions
2611 due to too large values ought to be caught later by the
2612 checks for fallback. */
2613 int save_flag_rounding_math = flag_rounding_math;
2614 flag_rounding_math = 0;
2615 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2616 build_real (double_type_node, dconst2));
2617 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2618 first_inner_iterationsd, t);
2619 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2620 GSI_CONTINUE_LINKING);
2621 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2622 build_real (double_type_node, dconst2));
2623 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2624 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2625 fold_build2 (MULT_EXPR, double_type_node,
2626 t3, t3));
2627 flag_rounding_math = save_flag_rounding_math;
2628 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2629 GSI_CONTINUE_LINKING);
2630 if (flag_exceptions
2631 && cfun->can_throw_non_call_exceptions
2632 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2634 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2635 build_zero_cst (double_type_node));
2636 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2637 false, GSI_CONTINUE_LINKING);
2638 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2639 boolean_false_node,
2640 NULL_TREE, NULL_TREE);
2642 else
2643 cond_stmt
2644 = gimple_build_cond (LT_EXPR, t,
2645 build_zero_cst (double_type_node),
2646 NULL_TREE, NULL_TREE);
2647 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2648 e = split_block (gsi_bb (*gsi), cond_stmt);
2649 basic_block bb1 = e->src;
2650 e->flags = EDGE_FALSE_VALUE;
2651 e->probability = profile_probability::very_likely ();
2652 *gsi = gsi_after_labels (e->dest);
2653 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2654 tree sqrtr = create_tmp_var (double_type_node);
2655 gimple_call_set_lhs (call, sqrtr);
2656 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2657 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2658 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2659 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2660 tree c = create_tmp_var (ulltype);
2661 tree d = create_tmp_var (ulltype);
2662 expand_omp_build_assign (gsi, c, t, true);
2663 t = fold_build2 (MINUS_EXPR, ulltype, c,
2664 build_one_cst (ulltype));
2665 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2666 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2667 t = fold_build2 (MULT_EXPR, ulltype,
2668 fold_convert (ulltype, fd->factor), t);
2669 tree t2
2670 = fold_build2 (MULT_EXPR, ulltype, c,
2671 fold_convert (ulltype,
2672 fd->first_inner_iterations));
2673 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2674 expand_omp_build_assign (gsi, d, t, true);
2675 t = fold_build2 (MULT_EXPR, ulltype,
2676 fold_convert (ulltype, fd->factor), c);
2677 t = fold_build2 (PLUS_EXPR, ulltype,
2678 t, fold_convert (ulltype,
2679 fd->first_inner_iterations));
2680 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2681 GSI_CONTINUE_LINKING);
2682 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2683 NULL_TREE, NULL_TREE);
2684 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2685 e = split_block (gsi_bb (*gsi), cond_stmt);
2686 basic_block bb2 = e->src;
2687 e->flags = EDGE_TRUE_VALUE;
2688 e->probability = profile_probability::very_likely ();
2689 *gsi = gsi_after_labels (e->dest);
2690 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2691 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2692 GSI_CONTINUE_LINKING);
2693 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2694 NULL_TREE, NULL_TREE);
2695 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2696 e = split_block (gsi_bb (*gsi), cond_stmt);
2697 basic_block bb3 = e->src;
2698 e->flags = EDGE_FALSE_VALUE;
2699 e->probability = profile_probability::very_likely ();
2700 *gsi = gsi_after_labels (e->dest);
2701 t = fold_convert (itype, c);
2702 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2703 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2704 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2705 GSI_CONTINUE_LINKING);
2706 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2707 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2708 t2 = fold_convert (itype, t2);
2709 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2710 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2711 if (fd->loops[i].m1)
2713 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2714 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2716 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2717 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2718 bb_triang = e->src;
2719 *gsi = gsi_after_labels (e->dest);
2720 remove_edge (e);
2721 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2722 e->probability = profile_probability::very_unlikely ();
2723 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2724 e->probability = profile_probability::very_unlikely ();
2725 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2726 e->probability = profile_probability::very_unlikely ();
2728 basic_block bb4 = create_empty_bb (bb0);
2729 add_bb_to_loop (bb4, bb0->loop_father);
2730 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2731 e->probability = profile_probability::unlikely ();
2732 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2733 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2734 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2735 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2736 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2737 counts[i], counts[i - 1]);
2738 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2739 GSI_CONTINUE_LINKING);
2740 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2741 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2742 t = fold_convert (itype, t);
2743 t2 = fold_convert (itype, t2);
2744 t = fold_build2 (MULT_EXPR, itype, t,
2745 fold_convert (itype, fd->loops[i].step));
2746 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2747 t2 = fold_build2 (MULT_EXPR, itype, t2,
2748 fold_convert (itype, fd->loops[i - 1].step));
2749 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2750 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2751 false, GSI_CONTINUE_LINKING);
2752 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2753 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2754 if (fd->loops[i].m1)
2756 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2757 fd->loops[i - 1].v);
2758 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2760 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2761 false, GSI_CONTINUE_LINKING);
2762 stmt = gimple_build_assign (fd->loops[i].v, t);
2763 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2765 /* Fallback implementation. Evaluate the loops in between
2766 (inclusive) fd->first_nonrect and fd->last_nonrect at
2767 runtime unsing temporaries instead of the original iteration
2768 variables, in the body just bump the counter and compare
2769 with the desired value. */
2770 gimple_stmt_iterator gsi2 = *gsi;
2771 basic_block entry_bb = gsi_bb (gsi2);
2772 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2773 e = split_block (e->dest, (gimple *) NULL);
2774 basic_block dom_bb = NULL;
2775 basic_block cur_bb = e->src;
2776 basic_block next_bb = e->dest;
2777 entry_bb = e->dest;
2778 *gsi = gsi_after_labels (entry_bb);
2780 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2781 tree n1 = NULL_TREE, n2 = NULL_TREE;
2782 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2784 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2786 tree itype = TREE_TYPE (fd->loops[j].v);
2787 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2788 && fd->loops[j].m2 == NULL_TREE
2789 && !fd->loops[j].non_rect_referenced);
2790 gsi2 = gsi_after_labels (cur_bb);
2791 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2792 if (fd->loops[j].m1 == NULL_TREE)
2793 n1 = rect_p ? build_zero_cst (type) : t;
2794 else if (POINTER_TYPE_P (itype))
2796 gcc_assert (integer_onep (fd->loops[j].m1));
2797 t = fold_convert (sizetype,
2798 unshare_expr (fd->loops[j].n1));
2799 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2801 else
2803 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2804 n1 = fold_build2 (MULT_EXPR, itype,
2805 vs[j - fd->loops[j].outer], n1);
2806 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2808 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2809 true, GSI_SAME_STMT);
2810 if (j < fd->last_nonrect)
2812 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2813 expand_omp_build_assign (&gsi2, vs[j], n1);
2815 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2816 if (fd->loops[j].m2 == NULL_TREE)
2817 n2 = rect_p ? counts[j] : t;
2818 else if (POINTER_TYPE_P (itype))
2820 gcc_assert (integer_onep (fd->loops[j].m2));
2821 t = fold_convert (sizetype,
2822 unshare_expr (fd->loops[j].n2));
2823 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2825 else
2827 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2828 n2 = fold_build2 (MULT_EXPR, itype,
2829 vs[j - fd->loops[j].outer], n2);
2830 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2832 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2833 true, GSI_SAME_STMT);
2834 if (POINTER_TYPE_P (itype))
2835 itype = signed_type_for (itype);
2836 if (j == fd->last_nonrect)
2838 gcond *cond_stmt
2839 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2840 n1, n2);
2841 e = split_block (cur_bb, cond_stmt);
2842 e->flags = EDGE_TRUE_VALUE;
2843 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2844 e->probability = profile_probability::likely ().guessed ();
2845 ne->probability = e->probability.invert ();
2846 gsi2 = gsi_after_labels (e->dest);
2848 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2849 ? -1 : 1));
2850 t = fold_build2 (PLUS_EXPR, itype,
2851 fold_convert (itype, fd->loops[j].step), t);
2852 t = fold_build2 (PLUS_EXPR, itype, t,
2853 fold_convert (itype, n2));
2854 t = fold_build2 (MINUS_EXPR, itype, t,
2855 fold_convert (itype, n1));
2856 tree step = fold_convert (itype, fd->loops[j].step);
2857 if (TYPE_UNSIGNED (itype)
2858 && fd->loops[j].cond_code == GT_EXPR)
2859 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2860 fold_build1 (NEGATE_EXPR, itype, t),
2861 fold_build1 (NEGATE_EXPR, itype, step));
2862 else
2863 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2864 t = fold_convert (type, t);
2865 t = fold_build2 (PLUS_EXPR, type, idx, t);
2866 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2867 true, GSI_SAME_STMT);
2868 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2869 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2870 cond_stmt
2871 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2872 NULL_TREE);
2873 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2874 e = split_block (gsi_bb (gsi2), cond_stmt);
2875 e->flags = EDGE_TRUE_VALUE;
2876 e->probability = profile_probability::likely ().guessed ();
2877 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2878 ne->probability = e->probability.invert ();
2879 gsi2 = gsi_after_labels (e->dest);
2880 expand_omp_build_assign (&gsi2, idx, t);
2881 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2882 break;
2884 e = split_block (cur_bb, last_stmt (cur_bb));
2886 basic_block new_cur_bb = create_empty_bb (cur_bb);
2887 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2889 gsi2 = gsi_after_labels (e->dest);
2890 if (rect_p)
2891 t = fold_build2 (PLUS_EXPR, type, vs[j],
2892 build_one_cst (type));
2893 else
2895 tree step
2896 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2897 if (POINTER_TYPE_P (vtype))
2898 t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2899 step));
2900 else
2901 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2903 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2904 true, GSI_SAME_STMT);
2905 expand_omp_build_assign (&gsi2, vs[j], t);
2907 edge ne = split_block (e->dest, last_stmt (e->dest));
2908 gsi2 = gsi_after_labels (ne->dest);
2910 gcond *cond_stmt;
2911 if (next_bb == entry_bb)
2912 /* No need to actually check the outermost condition. */
2913 cond_stmt
2914 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2915 boolean_true_node,
2916 NULL_TREE, NULL_TREE);
2917 else
2918 cond_stmt
2919 = gimple_build_cond (rect_p ? LT_EXPR
2920 : fd->loops[j].cond_code,
2921 vs[j], n2, NULL_TREE, NULL_TREE);
2922 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2923 edge e3, e4;
2924 if (next_bb == entry_bb)
2926 e3 = find_edge (ne->dest, next_bb);
2927 e3->flags = EDGE_FALSE_VALUE;
2928 dom_bb = ne->dest;
2930 else
2931 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2932 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2933 e4->probability = profile_probability::likely ().guessed ();
2934 e3->probability = e4->probability.invert ();
2935 basic_block esrc = e->src;
2936 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2937 cur_bb = new_cur_bb;
2938 basic_block latch_bb = next_bb;
2939 next_bb = e->dest;
2940 remove_edge (e);
2941 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2942 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2943 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2945 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2947 tree vtype = TREE_TYPE (fd->loops[j].v);
2948 tree itype = vtype;
2949 if (POINTER_TYPE_P (itype))
2950 itype = signed_type_for (itype);
2951 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2952 && fd->loops[j].m2 == NULL_TREE
2953 && !fd->loops[j].non_rect_referenced);
2954 if (j == fd->last_nonrect)
2956 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2957 t = fold_convert (itype, t);
2958 tree t2
2959 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2960 t = fold_build2 (MULT_EXPR, itype, t, t2);
2961 if (POINTER_TYPE_P (vtype))
2962 t = fold_build_pointer_plus (n1,
2963 fold_convert (sizetype, t));
2964 else
2965 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2967 else if (rect_p)
2969 t = fold_convert (itype, vs[j]);
2970 t = fold_build2 (MULT_EXPR, itype, t,
2971 fold_convert (itype, fd->loops[j].step));
2972 if (POINTER_TYPE_P (vtype))
2973 t = fold_build_pointer_plus (fd->loops[j].n1,
2974 fold_convert (sizetype, t));
2975 else
2976 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2978 else
2979 t = vs[j];
2980 t = force_gimple_operand_gsi (gsi, t, false,
2981 NULL_TREE, true,
2982 GSI_SAME_STMT);
2983 stmt = gimple_build_assign (fd->loops[j].v, t);
2984 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2986 if (gsi_end_p (*gsi))
2987 *gsi = gsi_last_bb (gsi_bb (*gsi));
2988 else
2989 gsi_prev (gsi);
2990 if (bb_triang)
2992 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2993 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2994 *gsi = gsi_after_labels (e->dest);
2995 if (!gsi_end_p (*gsi))
2996 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2997 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
3000 else
3002 t = fold_convert (itype, t);
3003 t = fold_build2 (MULT_EXPR, itype, t,
3004 fold_convert (itype, fd->loops[i].step));
3005 if (POINTER_TYPE_P (vtype))
3006 t = fold_build_pointer_plus (fd->loops[i].n1, t);
3007 else
3008 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3009 t = force_gimple_operand_gsi (gsi, t,
3010 DECL_P (fd->loops[i].v)
3011 && TREE_ADDRESSABLE (fd->loops[i].v),
3012 NULL_TREE, false,
3013 GSI_CONTINUE_LINKING);
3014 stmt = gimple_build_assign (fd->loops[i].v, t);
3015 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3017 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3019 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3020 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3021 false, GSI_CONTINUE_LINKING);
3022 stmt = gimple_build_assign (tem, t);
3023 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3025 if (i == fd->last_nonrect)
3026 i = fd->first_nonrect;
3028 if (fd->non_rect)
3029 for (i = 0; i <= fd->last_nonrect; i++)
3030 if (fd->loops[i].m2)
3032 tree itype = TREE_TYPE (fd->loops[i].v);
3034 tree t;
3035 if (POINTER_TYPE_P (itype))
3037 gcc_assert (integer_onep (fd->loops[i].m2));
3038 t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3039 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3042 else
3044 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3045 t = fold_build2 (MULT_EXPR, itype,
3046 fd->loops[i - fd->loops[i].outer].v, t);
3047 t = fold_build2 (PLUS_EXPR, itype, t,
3048 fold_convert (itype,
3049 unshare_expr (fd->loops[i].n2)));
3051 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3052 t = force_gimple_operand_gsi (gsi, t, false,
3053 NULL_TREE, false,
3054 GSI_CONTINUE_LINKING);
3055 stmt = gimple_build_assign (nonrect_bounds[i], t);
3056 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3060 /* Helper function for expand_omp_for_*. Generate code like:
3061 L10:
3062 V3 += STEP3;
3063 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3064 L11:
3065 V3 = N31;
3066 V2 += STEP2;
3067 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3068 L12:
3069 V2 = N21;
3070 V1 += STEP1;
3071 goto BODY_BB;
3072 For non-rectangular loops, use temporaries stored in nonrect_bounds
3073 for the upper bounds if M?2 multiplier is present. Given e.g.
3074 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3075 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3076 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3077 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3079 L10:
3080 V4 += STEP4;
3081 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3082 L11:
3083 V4 = N41 + M41 * V2; // This can be left out if the loop
3084 // refers to the immediate parent loop
3085 V3 += STEP3;
3086 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3087 L12:
3088 V3 = N31;
3089 V2 += STEP2;
3090 if (V2 cond2 N22) goto L120; else goto L13;
3091 L120:
3092 V4 = N41 + M41 * V2;
3093 NONRECT_BOUND4 = N42 + M42 * V2;
3094 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3095 L13:
3096 V2 = N21;
3097 V1 += STEP1;
3098 goto L120; */
3100 static basic_block
3101 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3102 basic_block cont_bb, basic_block body_bb)
3104 basic_block last_bb, bb, collapse_bb = NULL;
3105 int i;
3106 gimple_stmt_iterator gsi;
3107 edge e;
3108 tree t;
3109 gimple *stmt;
3111 last_bb = cont_bb;
3112 for (i = fd->collapse - 1; i >= 0; i--)
3114 tree vtype = TREE_TYPE (fd->loops[i].v);
3116 bb = create_empty_bb (last_bb);
3117 add_bb_to_loop (bb, last_bb->loop_father);
3118 gsi = gsi_start_bb (bb);
3120 if (i < fd->collapse - 1)
3122 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3123 e->probability = profile_probability::guessed_always () / 8;
3125 struct omp_for_data_loop *l = &fd->loops[i + 1];
3126 if (l->m1 == NULL_TREE || l->outer != 1)
3128 t = l->n1;
3129 if (l->m1)
3131 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3132 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3133 fold_convert (sizetype, t));
3134 else
3136 tree t2
3137 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3138 fd->loops[i + 1 - l->outer].v, l->m1);
3139 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3142 t = force_gimple_operand_gsi (&gsi, t,
3143 DECL_P (l->v)
3144 && TREE_ADDRESSABLE (l->v),
3145 NULL_TREE, false,
3146 GSI_CONTINUE_LINKING);
3147 stmt = gimple_build_assign (l->v, t);
3148 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3151 else
3152 collapse_bb = bb;
3154 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3156 if (POINTER_TYPE_P (vtype))
3157 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3158 else
3159 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3160 t = force_gimple_operand_gsi (&gsi, t,
3161 DECL_P (fd->loops[i].v)
3162 && TREE_ADDRESSABLE (fd->loops[i].v),
3163 NULL_TREE, false, GSI_CONTINUE_LINKING);
3164 stmt = gimple_build_assign (fd->loops[i].v, t);
3165 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3167 if (fd->loops[i].non_rect_referenced)
3169 basic_block update_bb = NULL, prev_bb = NULL;
3170 for (int j = i + 1; j <= fd->last_nonrect; j++)
3171 if (j - fd->loops[j].outer == i)
3173 tree n1, n2;
3174 struct omp_for_data_loop *l = &fd->loops[j];
3175 basic_block this_bb = create_empty_bb (last_bb);
3176 add_bb_to_loop (this_bb, last_bb->loop_father);
3177 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3178 if (prev_bb)
3180 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3181 e->probability
3182 = profile_probability::guessed_always ().apply_scale (7,
3184 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3186 if (l->m1)
3188 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3189 t = fold_build_pointer_plus (fd->loops[i].v,
3190 fold_convert (sizetype,
3191 l->n1));
3192 else
3194 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3195 fd->loops[i].v);
3196 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3197 t, l->n1);
3199 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3200 false,
3201 GSI_CONTINUE_LINKING);
3202 stmt = gimple_build_assign (l->v, n1);
3203 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3204 n1 = l->v;
3206 else
3207 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3208 NULL_TREE, false,
3209 GSI_CONTINUE_LINKING);
3210 if (l->m2)
3212 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3213 t = fold_build_pointer_plus (fd->loops[i].v,
3214 fold_convert (sizetype,
3215 l->n2));
3216 else
3218 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3219 fd->loops[i].v);
3220 t = fold_build2 (PLUS_EXPR,
3221 TREE_TYPE (nonrect_bounds[j]),
3222 t, unshare_expr (l->n2));
3224 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3225 false,
3226 GSI_CONTINUE_LINKING);
3227 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3228 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3229 n2 = nonrect_bounds[j];
3231 else
3232 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3233 true, NULL_TREE, false,
3234 GSI_CONTINUE_LINKING);
3235 gcond *cond_stmt
3236 = gimple_build_cond (l->cond_code, n1, n2,
3237 NULL_TREE, NULL_TREE);
3238 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3239 if (update_bb == NULL)
3240 update_bb = this_bb;
3241 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3242 e->probability = profile_probability::guessed_always () / 8;
3243 if (prev_bb == NULL)
3244 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3245 prev_bb = this_bb;
3247 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3248 e->probability
3249 = profile_probability::guessed_always ().apply_scale (7, 8);
3250 body_bb = update_bb;
3253 if (i > 0)
3255 if (fd->loops[i].m2)
3256 t = nonrect_bounds[i];
3257 else
3258 t = unshare_expr (fd->loops[i].n2);
3259 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3260 false, GSI_CONTINUE_LINKING);
3261 tree v = fd->loops[i].v;
3262 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3263 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3264 false, GSI_CONTINUE_LINKING);
3265 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3266 stmt = gimple_build_cond_empty (t);
3267 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3268 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3269 expand_omp_regimplify_p, NULL, NULL)
3270 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3271 expand_omp_regimplify_p, NULL, NULL))
3272 gimple_regimplify_operands (stmt, &gsi);
3273 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3274 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3276 else
3277 make_edge (bb, body_bb, EDGE_FALLTHRU);
3278 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3279 last_bb = bb;
3282 return collapse_bb;
3285 /* Expand #pragma omp ordered depend(source). */
3287 static void
3288 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3289 tree *counts, location_t loc)
3291 enum built_in_function source_ix
3292 = fd->iter_type == long_integer_type_node
3293 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3294 gimple *g
3295 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3296 build_fold_addr_expr (counts[fd->ordered]));
3297 gimple_set_location (g, loc);
3298 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3301 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3303 static void
3304 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3305 tree *counts, tree c, location_t loc)
3307 auto_vec<tree, 10> args;
3308 enum built_in_function sink_ix
3309 = fd->iter_type == long_integer_type_node
3310 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3311 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3312 int i;
3313 gimple_stmt_iterator gsi2 = *gsi;
3314 bool warned_step = false;
3316 for (i = 0; i < fd->ordered; i++)
3318 tree step = NULL_TREE;
3319 off = TREE_PURPOSE (deps);
3320 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3322 step = TREE_OPERAND (off, 1);
3323 off = TREE_OPERAND (off, 0);
3325 if (!integer_zerop (off))
3327 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3328 || fd->loops[i].cond_code == GT_EXPR);
3329 bool forward = fd->loops[i].cond_code == LT_EXPR;
3330 if (step)
3332 /* Non-simple Fortran DO loops. If step is variable,
3333 we don't know at compile even the direction, so can't
3334 warn. */
3335 if (TREE_CODE (step) != INTEGER_CST)
3336 break;
3337 forward = tree_int_cst_sgn (step) != -1;
3339 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3340 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3341 "waiting for lexically later iteration");
3342 break;
3344 deps = TREE_CHAIN (deps);
3346 /* If all offsets corresponding to the collapsed loops are zero,
3347 this depend clause can be ignored. FIXME: but there is still a
3348 flush needed. We need to emit one __sync_synchronize () for it
3349 though (perhaps conditionally)? Solve this together with the
3350 conservative dependence folding optimization.
3351 if (i >= fd->collapse)
3352 return; */
3354 deps = OMP_CLAUSE_DECL (c);
3355 gsi_prev (&gsi2);
3356 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3357 edge e2 = split_block_after_labels (e1->dest);
3359 gsi2 = gsi_after_labels (e1->dest);
3360 *gsi = gsi_last_bb (e1->src);
3361 for (i = 0; i < fd->ordered; i++)
3363 tree itype = TREE_TYPE (fd->loops[i].v);
3364 tree step = NULL_TREE;
3365 tree orig_off = NULL_TREE;
3366 if (POINTER_TYPE_P (itype))
3367 itype = sizetype;
3368 if (i)
3369 deps = TREE_CHAIN (deps);
3370 off = TREE_PURPOSE (deps);
3371 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3373 step = TREE_OPERAND (off, 1);
3374 off = TREE_OPERAND (off, 0);
3375 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3376 && integer_onep (fd->loops[i].step)
3377 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3379 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3380 if (step)
3382 off = fold_convert_loc (loc, itype, off);
3383 orig_off = off;
3384 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3387 if (integer_zerop (off))
3388 t = boolean_true_node;
3389 else
3391 tree a;
3392 tree co = fold_convert_loc (loc, itype, off);
3393 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3395 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3396 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3397 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3398 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3399 co);
3401 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3402 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3403 fd->loops[i].v, co);
3404 else
3405 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3406 fd->loops[i].v, co);
3407 if (step)
3409 tree t1, t2;
3410 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3411 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3412 fd->loops[i].n1);
3413 else
3414 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3415 fd->loops[i].n2);
3416 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3417 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3418 fd->loops[i].n2);
3419 else
3420 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3421 fd->loops[i].n1);
3422 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3423 step, build_int_cst (TREE_TYPE (step), 0));
3424 if (TREE_CODE (step) != INTEGER_CST)
3426 t1 = unshare_expr (t1);
3427 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3428 false, GSI_CONTINUE_LINKING);
3429 t2 = unshare_expr (t2);
3430 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3431 false, GSI_CONTINUE_LINKING);
3433 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3434 t, t2, t1);
3436 else if (fd->loops[i].cond_code == LT_EXPR)
3438 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3439 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3440 fd->loops[i].n1);
3441 else
3442 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3443 fd->loops[i].n2);
3445 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3446 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3447 fd->loops[i].n2);
3448 else
3449 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3450 fd->loops[i].n1);
3452 if (cond)
3453 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3454 else
3455 cond = t;
3457 off = fold_convert_loc (loc, itype, off);
3459 if (step
3460 || (fd->loops[i].cond_code == LT_EXPR
3461 ? !integer_onep (fd->loops[i].step)
3462 : !integer_minus_onep (fd->loops[i].step)))
3464 if (step == NULL_TREE
3465 && TYPE_UNSIGNED (itype)
3466 && fd->loops[i].cond_code == GT_EXPR)
3467 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3468 fold_build1_loc (loc, NEGATE_EXPR, itype,
3469 s));
3470 else
3471 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3472 orig_off ? orig_off : off, s);
3473 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3474 build_int_cst (itype, 0));
3475 if (integer_zerop (t) && !warned_step)
3477 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3478 "refers to iteration never in the iteration "
3479 "space");
3480 warned_step = true;
3482 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3483 cond, t);
3486 if (i <= fd->collapse - 1 && fd->collapse > 1)
3487 t = fd->loop.v;
3488 else if (counts[i])
3489 t = counts[i];
3490 else
3492 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3493 fd->loops[i].v, fd->loops[i].n1);
3494 t = fold_convert_loc (loc, fd->iter_type, t);
3496 if (step)
3497 /* We have divided off by step already earlier. */;
3498 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3499 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3500 fold_build1_loc (loc, NEGATE_EXPR, itype,
3501 s));
3502 else
3503 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3504 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3505 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3506 off = fold_convert_loc (loc, fd->iter_type, off);
3507 if (i <= fd->collapse - 1 && fd->collapse > 1)
3509 if (i)
3510 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3511 off);
3512 if (i < fd->collapse - 1)
3514 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3515 counts[i]);
3516 continue;
3519 off = unshare_expr (off);
3520 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3521 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3522 true, GSI_SAME_STMT);
3523 args.safe_push (t);
3525 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3526 gimple_set_location (g, loc);
3527 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3529 cond = unshare_expr (cond);
3530 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3531 GSI_CONTINUE_LINKING);
3532 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3533 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3534 e3->probability = profile_probability::guessed_always () / 8;
3535 e1->probability = e3->probability.invert ();
3536 e1->flags = EDGE_TRUE_VALUE;
3537 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3539 *gsi = gsi_after_labels (e2->dest);
3542 /* Expand all #pragma omp ordered depend(source) and
3543 #pragma omp ordered depend(sink:...) constructs in the current
3544 #pragma omp for ordered(n) region. */
3546 static void
3547 expand_omp_ordered_source_sink (struct omp_region *region,
3548 struct omp_for_data *fd, tree *counts,
3549 basic_block cont_bb)
3551 struct omp_region *inner;
3552 int i;
3553 for (i = fd->collapse - 1; i < fd->ordered; i++)
3554 if (i == fd->collapse - 1 && fd->collapse > 1)
3555 counts[i] = NULL_TREE;
3556 else if (i >= fd->collapse && !cont_bb)
3557 counts[i] = build_zero_cst (fd->iter_type);
3558 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3559 && integer_onep (fd->loops[i].step))
3560 counts[i] = NULL_TREE;
3561 else
3562 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3563 tree atype
3564 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3565 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3566 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3568 for (inner = region->inner; inner; inner = inner->next)
3569 if (inner->type == GIMPLE_OMP_ORDERED)
3571 gomp_ordered *ord_stmt = inner->ord_stmt;
3572 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3573 location_t loc = gimple_location (ord_stmt);
3574 tree c;
3575 for (c = gimple_omp_ordered_clauses (ord_stmt);
3576 c; c = OMP_CLAUSE_CHAIN (c))
3577 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3578 break;
3579 if (c)
3580 expand_omp_ordered_source (&gsi, fd, counts, loc);
3581 for (c = gimple_omp_ordered_clauses (ord_stmt);
3582 c; c = OMP_CLAUSE_CHAIN (c))
3583 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3584 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3585 gsi_remove (&gsi, true);
3589 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3590 collapsed. */
3592 static basic_block
3593 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3594 basic_block cont_bb, basic_block body_bb,
3595 bool ordered_lastprivate)
3597 if (fd->ordered == fd->collapse)
3598 return cont_bb;
3600 if (!cont_bb)
3602 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3603 for (int i = fd->collapse; i < fd->ordered; i++)
3605 tree type = TREE_TYPE (fd->loops[i].v);
3606 tree n1 = fold_convert (type, fd->loops[i].n1);
3607 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3608 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3609 size_int (i - fd->collapse + 1),
3610 NULL_TREE, NULL_TREE);
3611 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3613 return NULL;
3616 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3618 tree t, type = TREE_TYPE (fd->loops[i].v);
3619 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3620 expand_omp_build_assign (&gsi, fd->loops[i].v,
3621 fold_convert (type, fd->loops[i].n1));
3622 if (counts[i])
3623 expand_omp_build_assign (&gsi, counts[i],
3624 build_zero_cst (fd->iter_type));
3625 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3626 size_int (i - fd->collapse + 1),
3627 NULL_TREE, NULL_TREE);
3628 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3629 if (!gsi_end_p (gsi))
3630 gsi_prev (&gsi);
3631 else
3632 gsi = gsi_last_bb (body_bb);
3633 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3634 basic_block new_body = e1->dest;
3635 if (body_bb == cont_bb)
3636 cont_bb = new_body;
3637 edge e2 = NULL;
3638 basic_block new_header;
3639 if (EDGE_COUNT (cont_bb->preds) > 0)
3641 gsi = gsi_last_bb (cont_bb);
3642 if (POINTER_TYPE_P (type))
3643 t = fold_build_pointer_plus (fd->loops[i].v,
3644 fold_convert (sizetype,
3645 fd->loops[i].step));
3646 else
3647 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3648 fold_convert (type, fd->loops[i].step));
3649 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3650 if (counts[i])
3652 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3653 build_int_cst (fd->iter_type, 1));
3654 expand_omp_build_assign (&gsi, counts[i], t);
3655 t = counts[i];
3657 else
3659 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3660 fd->loops[i].v, fd->loops[i].n1);
3661 t = fold_convert (fd->iter_type, t);
3662 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3663 true, GSI_SAME_STMT);
3665 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3666 size_int (i - fd->collapse + 1),
3667 NULL_TREE, NULL_TREE);
3668 expand_omp_build_assign (&gsi, aref, t);
3669 gsi_prev (&gsi);
3670 e2 = split_block (cont_bb, gsi_stmt (gsi));
3671 new_header = e2->dest;
3673 else
3674 new_header = cont_bb;
3675 gsi = gsi_after_labels (new_header);
3676 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3677 true, GSI_SAME_STMT);
3678 tree n2
3679 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3680 true, NULL_TREE, true, GSI_SAME_STMT);
3681 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3682 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3683 edge e3 = split_block (new_header, gsi_stmt (gsi));
3684 cont_bb = e3->dest;
3685 remove_edge (e1);
3686 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3687 e3->flags = EDGE_FALSE_VALUE;
3688 e3->probability = profile_probability::guessed_always () / 8;
3689 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3690 e1->probability = e3->probability.invert ();
3692 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3693 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3695 if (e2)
3697 class loop *loop = alloc_loop ();
3698 loop->header = new_header;
3699 loop->latch = e2->src;
3700 add_loop (loop, body_bb->loop_father);
3704 /* If there are any lastprivate clauses and it is possible some loops
3705 might have zero iterations, ensure all the decls are initialized,
3706 otherwise we could crash evaluating C++ class iterators with lastprivate
3707 clauses. */
3708 bool need_inits = false;
3709 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3710 if (need_inits)
3712 tree type = TREE_TYPE (fd->loops[i].v);
3713 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3714 expand_omp_build_assign (&gsi, fd->loops[i].v,
3715 fold_convert (type, fd->loops[i].n1));
3717 else
3719 tree type = TREE_TYPE (fd->loops[i].v);
3720 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3721 boolean_type_node,
3722 fold_convert (type, fd->loops[i].n1),
3723 fold_convert (type, fd->loops[i].n2));
3724 if (!integer_onep (this_cond))
3725 need_inits = true;
3728 return cont_bb;
3731 /* A subroutine of expand_omp_for. Generate code for a parallel
3732 loop with any schedule. Given parameters:
3734 for (V = N1; V cond N2; V += STEP) BODY;
3736 where COND is "<" or ">", we generate pseudocode
3738 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3739 if (more) goto L0; else goto L3;
3741 V = istart0;
3742 iend = iend0;
3744 BODY;
3745 V += STEP;
3746 if (V cond iend) goto L1; else goto L2;
3748 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3751 If this is a combined omp parallel loop, instead of the call to
3752 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3753 If this is gimple_omp_for_combined_p loop, then instead of assigning
3754 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3755 inner GIMPLE_OMP_FOR and V += STEP; and
3756 if (V cond iend) goto L1; else goto L2; are removed.
3758 For collapsed loops, given parameters:
3759 collapse(3)
3760 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3761 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3762 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3763 BODY;
3765 we generate pseudocode
3767 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3768 if (cond3 is <)
3769 adj = STEP3 - 1;
3770 else
3771 adj = STEP3 + 1;
3772 count3 = (adj + N32 - N31) / STEP3;
3773 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3774 if (cond2 is <)
3775 adj = STEP2 - 1;
3776 else
3777 adj = STEP2 + 1;
3778 count2 = (adj + N22 - N21) / STEP2;
3779 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3780 if (cond1 is <)
3781 adj = STEP1 - 1;
3782 else
3783 adj = STEP1 + 1;
3784 count1 = (adj + N12 - N11) / STEP1;
3785 count = count1 * count2 * count3;
3786 goto Z1;
3788 count = 0;
3790 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3791 if (more) goto L0; else goto L3;
3793 V = istart0;
3794 T = V;
3795 V3 = N31 + (T % count3) * STEP3;
3796 T = T / count3;
3797 V2 = N21 + (T % count2) * STEP2;
3798 T = T / count2;
3799 V1 = N11 + T * STEP1;
3800 iend = iend0;
3802 BODY;
3803 V += 1;
3804 if (V < iend) goto L10; else goto L2;
3805 L10:
3806 V3 += STEP3;
3807 if (V3 cond3 N32) goto L1; else goto L11;
3808 L11:
3809 V3 = N31;
3810 V2 += STEP2;
3811 if (V2 cond2 N22) goto L1; else goto L12;
3812 L12:
3813 V2 = N21;
3814 V1 += STEP1;
3815 goto L1;
3817 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3822 static void
3823 expand_omp_for_generic (struct omp_region *region,
3824 struct omp_for_data *fd,
3825 enum built_in_function start_fn,
3826 enum built_in_function next_fn,
3827 tree sched_arg,
3828 gimple *inner_stmt)
3830 tree type, istart0, iend0, iend;
3831 tree t, vmain, vback, bias = NULL_TREE;
3832 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3833 basic_block l2_bb = NULL, l3_bb = NULL;
3834 gimple_stmt_iterator gsi;
3835 gassign *assign_stmt;
3836 bool in_combined_parallel = is_combined_parallel (region);
3837 bool broken_loop = region->cont == NULL;
3838 edge e, ne;
3839 tree *counts = NULL;
3840 int i;
3841 bool ordered_lastprivate = false;
3843 gcc_assert (!broken_loop || !in_combined_parallel);
3844 gcc_assert (fd->iter_type == long_integer_type_node
3845 || !in_combined_parallel);
3847 entry_bb = region->entry;
3848 cont_bb = region->cont;
3849 collapse_bb = NULL;
3850 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3851 gcc_assert (broken_loop
3852 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3853 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3854 l1_bb = single_succ (l0_bb);
3855 if (!broken_loop)
3857 l2_bb = create_empty_bb (cont_bb);
3858 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3859 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3860 == l1_bb));
3861 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3863 else
3864 l2_bb = NULL;
3865 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3866 exit_bb = region->exit;
3868 gsi = gsi_last_nondebug_bb (entry_bb);
3870 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3871 if (fd->ordered
3872 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3873 OMP_CLAUSE_LASTPRIVATE))
3874 ordered_lastprivate = false;
3875 tree reductions = NULL_TREE;
3876 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3877 tree memv = NULL_TREE;
3878 if (fd->lastprivate_conditional)
3880 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3881 OMP_CLAUSE__CONDTEMP_);
3882 if (fd->have_pointer_condtemp)
3883 condtemp = OMP_CLAUSE_DECL (c);
3884 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3885 cond_var = OMP_CLAUSE_DECL (c);
3887 if (sched_arg)
3889 if (fd->have_reductemp)
3891 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3892 OMP_CLAUSE__REDUCTEMP_);
3893 reductions = OMP_CLAUSE_DECL (c);
3894 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3895 gimple *g = SSA_NAME_DEF_STMT (reductions);
3896 reductions = gimple_assign_rhs1 (g);
3897 OMP_CLAUSE_DECL (c) = reductions;
3898 entry_bb = gimple_bb (g);
3899 edge e = split_block (entry_bb, g);
3900 if (region->entry == entry_bb)
3901 region->entry = e->dest;
3902 gsi = gsi_last_bb (entry_bb);
3904 else
3905 reductions = null_pointer_node;
3906 if (fd->have_pointer_condtemp)
3908 tree type = TREE_TYPE (condtemp);
3909 memv = create_tmp_var (type);
3910 TREE_ADDRESSABLE (memv) = 1;
3911 unsigned HOST_WIDE_INT sz
3912 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3913 sz *= fd->lastprivate_conditional;
3914 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3915 false);
3916 mem = build_fold_addr_expr (memv);
3918 else
3919 mem = null_pointer_node;
3921 if (fd->collapse > 1 || fd->ordered)
3923 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3924 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3926 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3927 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3928 zero_iter1_bb, first_zero_iter1,
3929 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3931 if (zero_iter1_bb)
3933 /* Some counts[i] vars might be uninitialized if
3934 some loop has zero iterations. But the body shouldn't
3935 be executed in that case, so just avoid uninit warnings. */
3936 for (i = first_zero_iter1;
3937 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3938 if (SSA_VAR_P (counts[i]))
3939 suppress_warning (counts[i], OPT_Wuninitialized);
3940 gsi_prev (&gsi);
3941 e = split_block (entry_bb, gsi_stmt (gsi));
3942 entry_bb = e->dest;
3943 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3944 gsi = gsi_last_nondebug_bb (entry_bb);
3945 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3946 get_immediate_dominator (CDI_DOMINATORS,
3947 zero_iter1_bb));
3949 if (zero_iter2_bb)
3951 /* Some counts[i] vars might be uninitialized if
3952 some loop has zero iterations. But the body shouldn't
3953 be executed in that case, so just avoid uninit warnings. */
3954 for (i = first_zero_iter2; i < fd->ordered; i++)
3955 if (SSA_VAR_P (counts[i]))
3956 suppress_warning (counts[i], OPT_Wuninitialized);
3957 if (zero_iter1_bb)
3958 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3959 else
3961 gsi_prev (&gsi);
3962 e = split_block (entry_bb, gsi_stmt (gsi));
3963 entry_bb = e->dest;
3964 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3965 gsi = gsi_last_nondebug_bb (entry_bb);
3966 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3967 get_immediate_dominator
3968 (CDI_DOMINATORS, zero_iter2_bb));
3971 if (fd->collapse == 1)
3973 counts[0] = fd->loop.n2;
3974 fd->loop = fd->loops[0];
3978 type = TREE_TYPE (fd->loop.v);
3979 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3980 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3981 TREE_ADDRESSABLE (istart0) = 1;
3982 TREE_ADDRESSABLE (iend0) = 1;
3984 /* See if we need to bias by LLONG_MIN. */
3985 if (fd->iter_type == long_long_unsigned_type_node
3986 && TREE_CODE (type) == INTEGER_TYPE
3987 && !TYPE_UNSIGNED (type)
3988 && fd->ordered == 0)
3990 tree n1, n2;
3992 if (fd->loop.cond_code == LT_EXPR)
3994 n1 = fd->loop.n1;
3995 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3997 else
3999 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4000 n2 = fd->loop.n1;
4002 if (TREE_CODE (n1) != INTEGER_CST
4003 || TREE_CODE (n2) != INTEGER_CST
4004 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4005 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4008 gimple_stmt_iterator gsif = gsi;
4009 gsi_prev (&gsif);
4011 tree arr = NULL_TREE;
4012 if (in_combined_parallel)
4014 gcc_assert (fd->ordered == 0);
4015 /* In a combined parallel loop, emit a call to
4016 GOMP_loop_foo_next. */
4017 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4018 build_fold_addr_expr (istart0),
4019 build_fold_addr_expr (iend0));
4021 else
4023 tree t0, t1, t2, t3, t4;
4024 /* If this is not a combined parallel loop, emit a call to
4025 GOMP_loop_foo_start in ENTRY_BB. */
4026 t4 = build_fold_addr_expr (iend0);
4027 t3 = build_fold_addr_expr (istart0);
4028 if (fd->ordered)
4030 t0 = build_int_cst (unsigned_type_node,
4031 fd->ordered - fd->collapse + 1);
4032 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4033 fd->ordered
4034 - fd->collapse + 1),
4035 ".omp_counts");
4036 DECL_NAMELESS (arr) = 1;
4037 TREE_ADDRESSABLE (arr) = 1;
4038 TREE_STATIC (arr) = 1;
4039 vec<constructor_elt, va_gc> *v;
4040 vec_alloc (v, fd->ordered - fd->collapse + 1);
4041 int idx;
4043 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4045 tree c;
4046 if (idx == 0 && fd->collapse > 1)
4047 c = fd->loop.n2;
4048 else
4049 c = counts[idx + fd->collapse - 1];
4050 tree purpose = size_int (idx);
4051 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4052 if (TREE_CODE (c) != INTEGER_CST)
4053 TREE_STATIC (arr) = 0;
4056 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4057 if (!TREE_STATIC (arr))
4058 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4059 void_type_node, arr),
4060 true, NULL_TREE, true, GSI_SAME_STMT);
4061 t1 = build_fold_addr_expr (arr);
4062 t2 = NULL_TREE;
4064 else
4066 t2 = fold_convert (fd->iter_type, fd->loop.step);
4067 t1 = fd->loop.n2;
4068 t0 = fd->loop.n1;
4069 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071 tree innerc
4072 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4073 OMP_CLAUSE__LOOPTEMP_);
4074 gcc_assert (innerc);
4075 t0 = OMP_CLAUSE_DECL (innerc);
4076 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4077 OMP_CLAUSE__LOOPTEMP_);
4078 gcc_assert (innerc);
4079 t1 = OMP_CLAUSE_DECL (innerc);
4081 if (POINTER_TYPE_P (TREE_TYPE (t0))
4082 && TYPE_PRECISION (TREE_TYPE (t0))
4083 != TYPE_PRECISION (fd->iter_type))
4085 /* Avoid casting pointers to integer of a different size. */
4086 tree itype = signed_type_for (type);
4087 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4088 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4090 else
4092 t1 = fold_convert (fd->iter_type, t1);
4093 t0 = fold_convert (fd->iter_type, t0);
4095 if (bias)
4097 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4098 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4101 if (fd->iter_type == long_integer_type_node || fd->ordered)
4103 if (fd->chunk_size)
4105 t = fold_convert (fd->iter_type, fd->chunk_size);
4106 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4107 if (sched_arg)
4109 if (fd->ordered)
4110 t = build_call_expr (builtin_decl_explicit (start_fn),
4111 8, t0, t1, sched_arg, t, t3, t4,
4112 reductions, mem);
4113 else
4114 t = build_call_expr (builtin_decl_explicit (start_fn),
4115 9, t0, t1, t2, sched_arg, t, t3, t4,
4116 reductions, mem);
4118 else if (fd->ordered)
4119 t = build_call_expr (builtin_decl_explicit (start_fn),
4120 5, t0, t1, t, t3, t4);
4121 else
4122 t = build_call_expr (builtin_decl_explicit (start_fn),
4123 6, t0, t1, t2, t, t3, t4);
4125 else if (fd->ordered)
4126 t = build_call_expr (builtin_decl_explicit (start_fn),
4127 4, t0, t1, t3, t4);
4128 else
4129 t = build_call_expr (builtin_decl_explicit (start_fn),
4130 5, t0, t1, t2, t3, t4);
4132 else
4134 tree t5;
4135 tree c_bool_type;
4136 tree bfn_decl;
4138 /* The GOMP_loop_ull_*start functions have additional boolean
4139 argument, true for < loops and false for > loops.
4140 In Fortran, the C bool type can be different from
4141 boolean_type_node. */
4142 bfn_decl = builtin_decl_explicit (start_fn);
4143 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4144 t5 = build_int_cst (c_bool_type,
4145 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4146 if (fd->chunk_size)
4148 tree bfn_decl = builtin_decl_explicit (start_fn);
4149 t = fold_convert (fd->iter_type, fd->chunk_size);
4150 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4151 if (sched_arg)
4152 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4153 t, t3, t4, reductions, mem);
4154 else
4155 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4157 else
4158 t = build_call_expr (builtin_decl_explicit (start_fn),
4159 6, t5, t0, t1, t2, t3, t4);
4162 if (TREE_TYPE (t) != boolean_type_node)
4163 t = fold_build2 (NE_EXPR, boolean_type_node,
4164 t, build_int_cst (TREE_TYPE (t), 0));
4165 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4166 true, GSI_SAME_STMT);
4167 if (arr && !TREE_STATIC (arr))
4169 tree clobber = build_clobber (TREE_TYPE (arr));
4170 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4171 GSI_SAME_STMT);
4173 if (fd->have_pointer_condtemp)
4174 expand_omp_build_assign (&gsi, condtemp, memv, false);
4175 if (fd->have_reductemp)
4177 gimple *g = gsi_stmt (gsi);
4178 gsi_remove (&gsi, true);
4179 release_ssa_name (gimple_assign_lhs (g));
4181 entry_bb = region->entry;
4182 gsi = gsi_last_nondebug_bb (entry_bb);
4184 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4186 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4188 /* Remove the GIMPLE_OMP_FOR statement. */
4189 gsi_remove (&gsi, true);
4191 if (gsi_end_p (gsif))
4192 gsif = gsi_after_labels (gsi_bb (gsif));
4193 gsi_next (&gsif);
4195 /* Iteration setup for sequential loop goes in L0_BB. */
4196 tree startvar = fd->loop.v;
4197 tree endvar = NULL_TREE;
4199 if (gimple_omp_for_combined_p (fd->for_stmt))
4201 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4202 && gimple_omp_for_kind (inner_stmt)
4203 == GF_OMP_FOR_KIND_SIMD);
4204 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4205 OMP_CLAUSE__LOOPTEMP_);
4206 gcc_assert (innerc);
4207 startvar = OMP_CLAUSE_DECL (innerc);
4208 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4209 OMP_CLAUSE__LOOPTEMP_);
4210 gcc_assert (innerc);
4211 endvar = OMP_CLAUSE_DECL (innerc);
4214 gsi = gsi_start_bb (l0_bb);
4215 t = istart0;
4216 if (fd->ordered && fd->collapse == 1)
4217 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4218 fold_convert (fd->iter_type, fd->loop.step));
4219 else if (bias)
4220 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4221 if (fd->ordered && fd->collapse == 1)
4223 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4224 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4225 fd->loop.n1, fold_convert (sizetype, t));
4226 else
4228 t = fold_convert (TREE_TYPE (startvar), t);
4229 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4230 fd->loop.n1, t);
4233 else
4235 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4236 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4237 t = fold_convert (TREE_TYPE (startvar), t);
4239 t = force_gimple_operand_gsi (&gsi, t,
4240 DECL_P (startvar)
4241 && TREE_ADDRESSABLE (startvar),
4242 NULL_TREE, false, GSI_CONTINUE_LINKING);
4243 assign_stmt = gimple_build_assign (startvar, t);
4244 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4245 if (cond_var)
4247 tree itype = TREE_TYPE (cond_var);
4248 /* For lastprivate(conditional:) itervar, we need some iteration
4249 counter that starts at unsigned non-zero and increases.
4250 Prefer as few IVs as possible, so if we can use startvar
4251 itself, use that, or startvar + constant (those would be
4252 incremented with step), and as last resort use the s0 + 1
4253 incremented by 1. */
4254 if ((fd->ordered && fd->collapse == 1)
4255 || bias
4256 || POINTER_TYPE_P (type)
4257 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4258 || fd->loop.cond_code != LT_EXPR)
4259 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4260 build_int_cst (itype, 1));
4261 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4262 t = fold_convert (itype, t);
4263 else
4265 tree c = fold_convert (itype, fd->loop.n1);
4266 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4267 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4269 t = force_gimple_operand_gsi (&gsi, t, false,
4270 NULL_TREE, false, GSI_CONTINUE_LINKING);
4271 assign_stmt = gimple_build_assign (cond_var, t);
4272 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4275 t = iend0;
4276 if (fd->ordered && fd->collapse == 1)
4277 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4278 fold_convert (fd->iter_type, fd->loop.step));
4279 else if (bias)
4280 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4281 if (fd->ordered && fd->collapse == 1)
4283 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4284 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4285 fd->loop.n1, fold_convert (sizetype, t));
4286 else
4288 t = fold_convert (TREE_TYPE (startvar), t);
4289 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4290 fd->loop.n1, t);
4293 else
4295 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4296 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4297 t = fold_convert (TREE_TYPE (startvar), t);
4299 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4300 false, GSI_CONTINUE_LINKING);
4301 if (endvar)
4303 assign_stmt = gimple_build_assign (endvar, iend);
4304 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4305 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4306 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4307 else
4308 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4309 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4311 /* Handle linear clause adjustments. */
4312 tree itercnt = NULL_TREE;
4313 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4314 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4315 c; c = OMP_CLAUSE_CHAIN (c))
4316 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4317 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4319 tree d = OMP_CLAUSE_DECL (c);
4320 tree t = d, a, dest;
4321 if (omp_privatize_by_reference (t))
4322 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4323 tree type = TREE_TYPE (t);
4324 if (POINTER_TYPE_P (type))
4325 type = sizetype;
4326 dest = unshare_expr (t);
4327 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4328 expand_omp_build_assign (&gsif, v, t);
4329 if (itercnt == NULL_TREE)
4331 itercnt = startvar;
4332 tree n1 = fd->loop.n1;
4333 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4335 itercnt
4336 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4337 itercnt);
4338 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4340 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4341 itercnt, n1);
4342 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4343 itercnt, fd->loop.step);
4344 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4345 NULL_TREE, false,
4346 GSI_CONTINUE_LINKING);
4348 a = fold_build2 (MULT_EXPR, type,
4349 fold_convert (type, itercnt),
4350 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4351 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4352 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4353 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4354 false, GSI_CONTINUE_LINKING);
4355 expand_omp_build_assign (&gsi, dest, t, true);
4357 if (fd->collapse > 1)
4358 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4360 if (fd->ordered)
4362 /* Until now, counts array contained number of iterations or
4363 variable containing it for ith loop. From now on, we need
4364 those counts only for collapsed loops, and only for the 2nd
4365 till the last collapsed one. Move those one element earlier,
4366 we'll use counts[fd->collapse - 1] for the first source/sink
4367 iteration counter and so on and counts[fd->ordered]
4368 as the array holding the current counter values for
4369 depend(source). */
4370 if (fd->collapse > 1)
4371 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4372 if (broken_loop)
4374 int i;
4375 for (i = fd->collapse; i < fd->ordered; i++)
4377 tree type = TREE_TYPE (fd->loops[i].v);
4378 tree this_cond
4379 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4380 fold_convert (type, fd->loops[i].n1),
4381 fold_convert (type, fd->loops[i].n2));
4382 if (!integer_onep (this_cond))
4383 break;
4385 if (i < fd->ordered)
4387 cont_bb
4388 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4389 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4390 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4391 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4392 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4393 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4394 make_edge (cont_bb, l1_bb, 0);
4395 l2_bb = create_empty_bb (cont_bb);
4396 broken_loop = false;
4399 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4400 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4401 ordered_lastprivate);
4402 if (counts[fd->collapse - 1])
4404 gcc_assert (fd->collapse == 1);
4405 gsi = gsi_last_bb (l0_bb);
4406 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4407 istart0, true);
4408 if (cont_bb)
4410 gsi = gsi_last_bb (cont_bb);
4411 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4412 counts[fd->collapse - 1],
4413 build_int_cst (fd->iter_type, 1));
4414 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4415 tree aref = build4 (ARRAY_REF, fd->iter_type,
4416 counts[fd->ordered], size_zero_node,
4417 NULL_TREE, NULL_TREE);
4418 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4420 t = counts[fd->collapse - 1];
4422 else if (fd->collapse > 1)
4423 t = fd->loop.v;
4424 else
4426 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4427 fd->loops[0].v, fd->loops[0].n1);
4428 t = fold_convert (fd->iter_type, t);
4430 gsi = gsi_last_bb (l0_bb);
4431 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4432 size_zero_node, NULL_TREE, NULL_TREE);
4433 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4434 false, GSI_CONTINUE_LINKING);
4435 expand_omp_build_assign (&gsi, aref, t, true);
4438 if (!broken_loop)
4440 /* Code to control the increment and predicate for the sequential
4441 loop goes in the CONT_BB. */
4442 gsi = gsi_last_nondebug_bb (cont_bb);
4443 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4444 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4445 vmain = gimple_omp_continue_control_use (cont_stmt);
4446 vback = gimple_omp_continue_control_def (cont_stmt);
4448 if (cond_var)
4450 tree itype = TREE_TYPE (cond_var);
4451 tree t2;
4452 if ((fd->ordered && fd->collapse == 1)
4453 || bias
4454 || POINTER_TYPE_P (type)
4455 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4456 || fd->loop.cond_code != LT_EXPR)
4457 t2 = build_int_cst (itype, 1);
4458 else
4459 t2 = fold_convert (itype, fd->loop.step);
4460 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4461 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4462 NULL_TREE, true, GSI_SAME_STMT);
4463 assign_stmt = gimple_build_assign (cond_var, t2);
4464 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4467 if (!gimple_omp_for_combined_p (fd->for_stmt))
4469 if (POINTER_TYPE_P (type))
4470 t = fold_build_pointer_plus (vmain, fd->loop.step);
4471 else
4472 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4473 t = force_gimple_operand_gsi (&gsi, t,
4474 DECL_P (vback)
4475 && TREE_ADDRESSABLE (vback),
4476 NULL_TREE, true, GSI_SAME_STMT);
4477 assign_stmt = gimple_build_assign (vback, t);
4478 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4480 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4482 tree tem;
4483 if (fd->collapse > 1)
4484 tem = fd->loop.v;
4485 else
4487 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4488 fd->loops[0].v, fd->loops[0].n1);
4489 tem = fold_convert (fd->iter_type, tem);
4491 tree aref = build4 (ARRAY_REF, fd->iter_type,
4492 counts[fd->ordered], size_zero_node,
4493 NULL_TREE, NULL_TREE);
4494 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4495 true, GSI_SAME_STMT);
4496 expand_omp_build_assign (&gsi, aref, tem);
4499 t = build2 (fd->loop.cond_code, boolean_type_node,
4500 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4501 iend);
4502 gcond *cond_stmt = gimple_build_cond_empty (t);
4503 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4506 /* Remove GIMPLE_OMP_CONTINUE. */
4507 gsi_remove (&gsi, true);
4509 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4510 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4512 /* Emit code to get the next parallel iteration in L2_BB. */
4513 gsi = gsi_start_bb (l2_bb);
4515 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4516 build_fold_addr_expr (istart0),
4517 build_fold_addr_expr (iend0));
4518 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4519 false, GSI_CONTINUE_LINKING);
4520 if (TREE_TYPE (t) != boolean_type_node)
4521 t = fold_build2 (NE_EXPR, boolean_type_node,
4522 t, build_int_cst (TREE_TYPE (t), 0));
4523 gcond *cond_stmt = gimple_build_cond_empty (t);
4524 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4527 /* Add the loop cleanup function. */
4528 gsi = gsi_last_nondebug_bb (exit_bb);
4529 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4530 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4531 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4532 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4533 else
4534 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4535 gcall *call_stmt = gimple_build_call (t, 0);
4536 if (fd->ordered)
4538 tree arr = counts[fd->ordered];
4539 tree clobber = build_clobber (TREE_TYPE (arr));
4540 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4541 GSI_SAME_STMT);
4543 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4545 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4546 if (fd->have_reductemp)
4548 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4549 gimple_call_lhs (call_stmt));
4550 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4553 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4554 gsi_remove (&gsi, true);
4556 /* Connect the new blocks. */
4557 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4558 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4560 if (!broken_loop)
4562 gimple_seq phis;
4564 e = find_edge (cont_bb, l3_bb);
4565 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4567 phis = phi_nodes (l3_bb);
4568 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4570 gimple *phi = gsi_stmt (gsi);
4571 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4572 PHI_ARG_DEF_FROM_EDGE (phi, e));
4574 remove_edge (e);
4576 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4577 e = find_edge (cont_bb, l1_bb);
4578 if (e == NULL)
4580 e = BRANCH_EDGE (cont_bb);
4581 gcc_assert (single_succ (e->dest) == l1_bb);
4583 if (gimple_omp_for_combined_p (fd->for_stmt))
4585 remove_edge (e);
4586 e = NULL;
4588 else if (fd->collapse > 1)
4590 remove_edge (e);
4591 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4593 else
4594 e->flags = EDGE_TRUE_VALUE;
4595 if (e)
4597 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4598 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4600 else
4602 e = find_edge (cont_bb, l2_bb);
4603 e->flags = EDGE_FALLTHRU;
4605 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4607 if (gimple_in_ssa_p (cfun))
4609 /* Add phis to the outer loop that connect to the phis in the inner,
4610 original loop, and move the loop entry value of the inner phi to
4611 the loop entry value of the outer phi. */
4612 gphi_iterator psi;
4613 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4615 location_t locus;
4616 gphi *nphi;
4617 gphi *exit_phi = psi.phi ();
4619 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4620 continue;
4622 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4623 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4625 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4626 edge latch_to_l1 = find_edge (latch, l1_bb);
4627 gphi *inner_phi
4628 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4630 tree t = gimple_phi_result (exit_phi);
4631 tree new_res = copy_ssa_name (t, NULL);
4632 nphi = create_phi_node (new_res, l0_bb);
4634 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4635 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4636 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4637 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4638 add_phi_arg (nphi, t, entry_to_l0, locus);
4640 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4641 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4643 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4647 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4648 recompute_dominator (CDI_DOMINATORS, l2_bb));
4649 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4650 recompute_dominator (CDI_DOMINATORS, l3_bb));
4651 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4652 recompute_dominator (CDI_DOMINATORS, l0_bb));
4653 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4654 recompute_dominator (CDI_DOMINATORS, l1_bb));
4656 /* We enter expand_omp_for_generic with a loop. This original loop may
4657 have its own loop struct, or it may be part of an outer loop struct
4658 (which may be the fake loop). */
4659 class loop *outer_loop = entry_bb->loop_father;
4660 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4662 add_bb_to_loop (l2_bb, outer_loop);
4664 /* We've added a new loop around the original loop. Allocate the
4665 corresponding loop struct. */
4666 class loop *new_loop = alloc_loop ();
4667 new_loop->header = l0_bb;
4668 new_loop->latch = l2_bb;
4669 add_loop (new_loop, outer_loop);
4671 /* Allocate a loop structure for the original loop unless we already
4672 had one. */
4673 if (!orig_loop_has_loop_struct
4674 && !gimple_omp_for_combined_p (fd->for_stmt))
4676 class loop *orig_loop = alloc_loop ();
4677 orig_loop->header = l1_bb;
4678 /* The loop may have multiple latches. */
4679 add_loop (orig_loop, new_loop);
4684 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4685 compute needed allocation size. If !ALLOC of team allocations,
4686 if ALLOC of thread allocation. SZ is the initial needed size for
4687 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4688 CNT number of elements of each array, for !ALLOC this is
4689 omp_get_num_threads (), for ALLOC number of iterations handled by the
4690 current thread. If PTR is non-NULL, it is the start of the allocation
4691 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4692 clauses pointers to the corresponding arrays. */
4694 static tree
4695 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4696 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4697 gimple_stmt_iterator *gsi, bool alloc)
4699 tree eltsz = NULL_TREE;
4700 unsigned HOST_WIDE_INT preval = 0;
4701 if (ptr && sz)
4702 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4703 ptr, size_int (sz));
4704 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4705 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4706 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4707 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4709 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4710 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4711 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4713 unsigned HOST_WIDE_INT szl
4714 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4715 szl = least_bit_hwi (szl);
4716 if (szl)
4717 al = MIN (al, szl);
4719 if (ptr == NULL_TREE)
4721 if (eltsz == NULL_TREE)
4722 eltsz = TYPE_SIZE_UNIT (pointee_type);
4723 else
4724 eltsz = size_binop (PLUS_EXPR, eltsz,
4725 TYPE_SIZE_UNIT (pointee_type));
4727 if (preval == 0 && al <= alloc_align)
4729 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4730 sz += diff;
4731 if (diff && ptr)
4732 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4733 ptr, size_int (diff));
4735 else if (al > preval)
4737 if (ptr)
4739 ptr = fold_convert (pointer_sized_int_node, ptr);
4740 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4741 build_int_cst (pointer_sized_int_node,
4742 al - 1));
4743 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4744 build_int_cst (pointer_sized_int_node,
4745 -(HOST_WIDE_INT) al));
4746 ptr = fold_convert (ptr_type_node, ptr);
4748 else
4749 sz += al - 1;
4751 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4752 preval = al;
4753 else
4754 preval = 1;
4755 if (ptr)
4757 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4758 ptr = OMP_CLAUSE_DECL (c);
4759 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4760 size_binop (MULT_EXPR, cnt,
4761 TYPE_SIZE_UNIT (pointee_type)));
4765 if (ptr == NULL_TREE)
4767 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4768 if (sz)
4769 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4770 return eltsz;
4772 else
4773 return ptr;
4776 /* Return the last _looptemp_ clause if one has been created for
4777 lastprivate on distribute parallel for{, simd} or taskloop.
4778 FD is the loop data and INNERC should be the second _looptemp_
4779 clause (the one holding the end of the range).
4780 This is followed by collapse - 1 _looptemp_ clauses for the
4781 counts[1] and up, and for triangular loops followed by 4
4782 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4783 one factor and one adjn1). After this there is optionally one
4784 _looptemp_ clause that this function returns. */
4786 static tree
4787 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4789 gcc_assert (innerc);
4790 int count = fd->collapse - 1;
4791 if (fd->non_rect
4792 && fd->last_nonrect == fd->first_nonrect + 1
4793 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4794 count += 4;
4795 for (int i = 0; i < count; i++)
4797 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4798 OMP_CLAUSE__LOOPTEMP_);
4799 gcc_assert (innerc);
4801 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4802 OMP_CLAUSE__LOOPTEMP_);
4805 /* A subroutine of expand_omp_for. Generate code for a parallel
4806 loop with static schedule and no specified chunk size. Given
4807 parameters:
4809 for (V = N1; V cond N2; V += STEP) BODY;
4811 where COND is "<" or ">", we generate pseudocode
4813 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4814 if (cond is <)
4815 adj = STEP - 1;
4816 else
4817 adj = STEP + 1;
4818 if ((__typeof (V)) -1 > 0 && cond is >)
4819 n = -(adj + N2 - N1) / -STEP;
4820 else
4821 n = (adj + N2 - N1) / STEP;
4822 q = n / nthreads;
4823 tt = n % nthreads;
4824 if (threadid < tt) goto L3; else goto L4;
4826 tt = 0;
4827 q = q + 1;
4829 s0 = q * threadid + tt;
4830 e0 = s0 + q;
4831 V = s0 * STEP + N1;
4832 if (s0 >= e0) goto L2; else goto L0;
4834 e = e0 * STEP + N1;
4836 BODY;
4837 V += STEP;
4838 if (V cond e) goto L1;
4842 static void
4843 expand_omp_for_static_nochunk (struct omp_region *region,
4844 struct omp_for_data *fd,
4845 gimple *inner_stmt)
4847 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4848 tree type, itype, vmain, vback;
4849 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4850 basic_block body_bb, cont_bb, collapse_bb = NULL;
4851 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4852 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4853 gimple_stmt_iterator gsi, gsip;
4854 edge ep;
4855 bool broken_loop = region->cont == NULL;
4856 tree *counts = NULL;
4857 tree n1, n2, step;
4858 tree reductions = NULL_TREE;
4859 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4861 itype = type = TREE_TYPE (fd->loop.v);
4862 if (POINTER_TYPE_P (type))
4863 itype = signed_type_for (type);
4865 entry_bb = region->entry;
4866 cont_bb = region->cont;
4867 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4868 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4869 gcc_assert (broken_loop
4870 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4871 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4872 body_bb = single_succ (seq_start_bb);
4873 if (!broken_loop)
4875 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4876 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4877 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4879 exit_bb = region->exit;
4881 /* Iteration space partitioning goes in ENTRY_BB. */
4882 gsi = gsi_last_nondebug_bb (entry_bb);
4883 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4884 gsip = gsi;
4885 gsi_prev (&gsip);
4887 if (fd->collapse > 1)
4889 int first_zero_iter = -1, dummy = -1;
4890 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4892 counts = XALLOCAVEC (tree, fd->collapse);
4893 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4894 fin_bb, first_zero_iter,
4895 dummy_bb, dummy, l2_dom_bb);
4896 t = NULL_TREE;
4898 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4899 t = integer_one_node;
4900 else
4901 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4902 fold_convert (type, fd->loop.n1),
4903 fold_convert (type, fd->loop.n2));
4904 if (fd->collapse == 1
4905 && TYPE_UNSIGNED (type)
4906 && (t == NULL_TREE || !integer_onep (t)))
4908 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4909 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4910 true, GSI_SAME_STMT);
4911 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4912 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4913 true, GSI_SAME_STMT);
4914 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4915 n1, n2);
4916 ep = split_block (entry_bb, cond_stmt);
4917 ep->flags = EDGE_TRUE_VALUE;
4918 entry_bb = ep->dest;
4919 ep->probability = profile_probability::very_likely ();
4920 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4921 ep->probability = profile_probability::very_unlikely ();
4922 if (gimple_in_ssa_p (cfun))
4924 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4925 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4926 !gsi_end_p (gpi); gsi_next (&gpi))
4928 gphi *phi = gpi.phi ();
4929 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4930 ep, UNKNOWN_LOCATION);
4933 gsi = gsi_last_bb (entry_bb);
4936 if (fd->lastprivate_conditional)
4938 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4939 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4940 if (fd->have_pointer_condtemp)
4941 condtemp = OMP_CLAUSE_DECL (c);
4942 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4943 cond_var = OMP_CLAUSE_DECL (c);
4945 if (fd->have_reductemp
4946 /* For scan, we don't want to reinitialize condtemp before the
4947 second loop. */
4948 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4949 || fd->have_nonctrl_scantemp)
4951 tree t1 = build_int_cst (long_integer_type_node, 0);
4952 tree t2 = build_int_cst (long_integer_type_node, 1);
4953 tree t3 = build_int_cstu (long_integer_type_node,
4954 (HOST_WIDE_INT_1U << 31) + 1);
4955 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4956 gimple_stmt_iterator gsi2 = gsi_none ();
4957 gimple *g = NULL;
4958 tree mem = null_pointer_node, memv = NULL_TREE;
4959 unsigned HOST_WIDE_INT condtemp_sz = 0;
4960 unsigned HOST_WIDE_INT alloc_align = 0;
4961 if (fd->have_reductemp)
4963 gcc_assert (!fd->have_nonctrl_scantemp);
4964 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4965 reductions = OMP_CLAUSE_DECL (c);
4966 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4967 g = SSA_NAME_DEF_STMT (reductions);
4968 reductions = gimple_assign_rhs1 (g);
4969 OMP_CLAUSE_DECL (c) = reductions;
4970 gsi2 = gsi_for_stmt (g);
4972 else
4974 if (gsi_end_p (gsip))
4975 gsi2 = gsi_after_labels (region->entry);
4976 else
4977 gsi2 = gsip;
4978 reductions = null_pointer_node;
4980 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4982 tree type;
4983 if (fd->have_pointer_condtemp)
4984 type = TREE_TYPE (condtemp);
4985 else
4986 type = ptr_type_node;
4987 memv = create_tmp_var (type);
4988 TREE_ADDRESSABLE (memv) = 1;
4989 unsigned HOST_WIDE_INT sz = 0;
4990 tree size = NULL_TREE;
4991 if (fd->have_pointer_condtemp)
4993 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4994 sz *= fd->lastprivate_conditional;
4995 condtemp_sz = sz;
4997 if (fd->have_nonctrl_scantemp)
4999 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5000 gimple *g = gimple_build_call (nthreads, 0);
5001 nthreads = create_tmp_var (integer_type_node);
5002 gimple_call_set_lhs (g, nthreads);
5003 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5004 nthreads = fold_convert (sizetype, nthreads);
5005 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5006 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5007 alloc_align, nthreads, NULL,
5008 false);
5009 size = fold_convert (type, size);
5011 else
5012 size = build_int_cst (type, sz);
5013 expand_omp_build_assign (&gsi2, memv, size, false);
5014 mem = build_fold_addr_expr (memv);
5016 tree t
5017 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5018 9, t1, t2, t2, t3, t1, null_pointer_node,
5019 null_pointer_node, reductions, mem);
5020 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5021 true, GSI_SAME_STMT);
5022 if (fd->have_pointer_condtemp)
5023 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5024 if (fd->have_nonctrl_scantemp)
5026 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5027 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5028 alloc_align, nthreads, &gsi2, false);
5030 if (fd->have_reductemp)
5032 gsi_remove (&gsi2, true);
5033 release_ssa_name (gimple_assign_lhs (g));
5036 switch (gimple_omp_for_kind (fd->for_stmt))
5038 case GF_OMP_FOR_KIND_FOR:
5039 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5040 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5041 break;
5042 case GF_OMP_FOR_KIND_DISTRIBUTE:
5043 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5044 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5045 break;
5046 default:
5047 gcc_unreachable ();
5049 nthreads = build_call_expr (nthreads, 0);
5050 nthreads = fold_convert (itype, nthreads);
5051 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5052 true, GSI_SAME_STMT);
5053 threadid = build_call_expr (threadid, 0);
5054 threadid = fold_convert (itype, threadid);
5055 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5056 true, GSI_SAME_STMT);
5058 n1 = fd->loop.n1;
5059 n2 = fd->loop.n2;
5060 step = fd->loop.step;
5061 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5063 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5064 OMP_CLAUSE__LOOPTEMP_);
5065 gcc_assert (innerc);
5066 n1 = OMP_CLAUSE_DECL (innerc);
5067 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5068 OMP_CLAUSE__LOOPTEMP_);
5069 gcc_assert (innerc);
5070 n2 = OMP_CLAUSE_DECL (innerc);
5072 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5073 true, NULL_TREE, true, GSI_SAME_STMT);
5074 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5075 true, NULL_TREE, true, GSI_SAME_STMT);
5076 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5077 true, NULL_TREE, true, GSI_SAME_STMT);
5079 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5080 t = fold_build2 (PLUS_EXPR, itype, step, t);
5081 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5082 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5083 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5084 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5085 fold_build1 (NEGATE_EXPR, itype, t),
5086 fold_build1 (NEGATE_EXPR, itype, step));
5087 else
5088 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5089 t = fold_convert (itype, t);
5090 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5092 q = create_tmp_reg (itype, "q");
5093 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5094 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5095 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5097 tt = create_tmp_reg (itype, "tt");
5098 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5099 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5100 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5102 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5103 gcond *cond_stmt = gimple_build_cond_empty (t);
5104 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5106 second_bb = split_block (entry_bb, cond_stmt)->dest;
5107 gsi = gsi_last_nondebug_bb (second_bb);
5108 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5110 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5111 GSI_SAME_STMT);
5112 gassign *assign_stmt
5113 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5114 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5116 third_bb = split_block (second_bb, assign_stmt)->dest;
5117 gsi = gsi_last_nondebug_bb (third_bb);
5118 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5120 if (fd->have_nonctrl_scantemp)
5122 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5123 tree controlp = NULL_TREE, controlb = NULL_TREE;
5124 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5125 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5126 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5128 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5129 controlb = OMP_CLAUSE_DECL (c);
5130 else
5131 controlp = OMP_CLAUSE_DECL (c);
5132 if (controlb && controlp)
5133 break;
5135 gcc_assert (controlp && controlb);
5136 tree cnt = create_tmp_var (sizetype);
5137 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5138 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5139 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5140 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5141 alloc_align, cnt, NULL, true);
5142 tree size = create_tmp_var (sizetype);
5143 expand_omp_build_assign (&gsi, size, sz, false);
5144 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5145 size, size_int (16384));
5146 expand_omp_build_assign (&gsi, controlb, cmp);
5147 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5148 NULL_TREE, NULL_TREE);
5149 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5150 fourth_bb = split_block (third_bb, g)->dest;
5151 gsi = gsi_last_nondebug_bb (fourth_bb);
5152 /* FIXME: Once we have allocators, this should use allocator. */
5153 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5154 gimple_call_set_lhs (g, controlp);
5155 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5156 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5157 &gsi, true);
5158 gsi_prev (&gsi);
5159 g = gsi_stmt (gsi);
5160 fifth_bb = split_block (fourth_bb, g)->dest;
5161 gsi = gsi_last_nondebug_bb (fifth_bb);
5163 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5164 gimple_call_set_lhs (g, controlp);
5165 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5166 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5167 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5168 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5169 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5171 tree tmp = create_tmp_var (sizetype);
5172 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5173 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5174 TYPE_SIZE_UNIT (pointee_type));
5175 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5176 g = gimple_build_call (alloca_decl, 2, tmp,
5177 size_int (TYPE_ALIGN (pointee_type)));
5178 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5179 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5182 sixth_bb = split_block (fifth_bb, g)->dest;
5183 gsi = gsi_last_nondebug_bb (sixth_bb);
5186 t = build2 (MULT_EXPR, itype, q, threadid);
5187 t = build2 (PLUS_EXPR, itype, t, tt);
5188 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5190 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5191 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5193 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5194 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5196 /* Remove the GIMPLE_OMP_FOR statement. */
5197 gsi_remove (&gsi, true);
5199 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5200 gsi = gsi_start_bb (seq_start_bb);
5202 tree startvar = fd->loop.v;
5203 tree endvar = NULL_TREE;
5205 if (gimple_omp_for_combined_p (fd->for_stmt))
5207 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5208 ? gimple_omp_parallel_clauses (inner_stmt)
5209 : gimple_omp_for_clauses (inner_stmt);
5210 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5211 gcc_assert (innerc);
5212 startvar = OMP_CLAUSE_DECL (innerc);
5213 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5214 OMP_CLAUSE__LOOPTEMP_);
5215 gcc_assert (innerc);
5216 endvar = OMP_CLAUSE_DECL (innerc);
5217 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5218 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5220 innerc = find_lastprivate_looptemp (fd, innerc);
5221 if (innerc)
5223 /* If needed (distribute parallel for with lastprivate),
5224 propagate down the total number of iterations. */
5225 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5226 fd->loop.n2);
5227 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5228 GSI_CONTINUE_LINKING);
5229 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5230 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5234 t = fold_convert (itype, s0);
5235 t = fold_build2 (MULT_EXPR, itype, t, step);
5236 if (POINTER_TYPE_P (type))
5238 t = fold_build_pointer_plus (n1, t);
5239 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5240 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5241 t = fold_convert (signed_type_for (type), t);
5243 else
5244 t = fold_build2 (PLUS_EXPR, type, t, n1);
5245 t = fold_convert (TREE_TYPE (startvar), t);
5246 t = force_gimple_operand_gsi (&gsi, t,
5247 DECL_P (startvar)
5248 && TREE_ADDRESSABLE (startvar),
5249 NULL_TREE, false, GSI_CONTINUE_LINKING);
5250 assign_stmt = gimple_build_assign (startvar, t);
5251 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5252 if (cond_var)
5254 tree itype = TREE_TYPE (cond_var);
5255 /* For lastprivate(conditional:) itervar, we need some iteration
5256 counter that starts at unsigned non-zero and increases.
5257 Prefer as few IVs as possible, so if we can use startvar
5258 itself, use that, or startvar + constant (those would be
5259 incremented with step), and as last resort use the s0 + 1
5260 incremented by 1. */
5261 if (POINTER_TYPE_P (type)
5262 || TREE_CODE (n1) != INTEGER_CST
5263 || fd->loop.cond_code != LT_EXPR)
5264 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5265 build_int_cst (itype, 1));
5266 else if (tree_int_cst_sgn (n1) == 1)
5267 t = fold_convert (itype, t);
5268 else
5270 tree c = fold_convert (itype, n1);
5271 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5272 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5274 t = force_gimple_operand_gsi (&gsi, t, false,
5275 NULL_TREE, false, GSI_CONTINUE_LINKING);
5276 assign_stmt = gimple_build_assign (cond_var, t);
5277 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5280 t = fold_convert (itype, e0);
5281 t = fold_build2 (MULT_EXPR, itype, t, step);
5282 if (POINTER_TYPE_P (type))
5284 t = fold_build_pointer_plus (n1, t);
5285 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5286 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5287 t = fold_convert (signed_type_for (type), t);
5289 else
5290 t = fold_build2 (PLUS_EXPR, type, t, n1);
5291 t = fold_convert (TREE_TYPE (startvar), t);
5292 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5293 false, GSI_CONTINUE_LINKING);
5294 if (endvar)
5296 assign_stmt = gimple_build_assign (endvar, e);
5297 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5298 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5299 assign_stmt = gimple_build_assign (fd->loop.v, e);
5300 else
5301 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5302 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5304 /* Handle linear clause adjustments. */
5305 tree itercnt = NULL_TREE;
5306 tree *nonrect_bounds = NULL;
5307 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5308 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5309 c; c = OMP_CLAUSE_CHAIN (c))
5310 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5311 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5313 tree d = OMP_CLAUSE_DECL (c);
5314 tree t = d, a, dest;
5315 if (omp_privatize_by_reference (t))
5316 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5317 if (itercnt == NULL_TREE)
5319 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5321 itercnt = fold_build2 (MINUS_EXPR, itype,
5322 fold_convert (itype, n1),
5323 fold_convert (itype, fd->loop.n1));
5324 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5325 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5326 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5327 NULL_TREE, false,
5328 GSI_CONTINUE_LINKING);
5330 else
5331 itercnt = s0;
5333 tree type = TREE_TYPE (t);
5334 if (POINTER_TYPE_P (type))
5335 type = sizetype;
5336 a = fold_build2 (MULT_EXPR, type,
5337 fold_convert (type, itercnt),
5338 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5339 dest = unshare_expr (t);
5340 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5341 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5342 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5343 false, GSI_CONTINUE_LINKING);
5344 expand_omp_build_assign (&gsi, dest, t, true);
5346 if (fd->collapse > 1)
5348 if (fd->non_rect)
5350 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5351 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5353 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5354 startvar);
5357 if (!broken_loop)
5359 /* The code controlling the sequential loop replaces the
5360 GIMPLE_OMP_CONTINUE. */
5361 gsi = gsi_last_nondebug_bb (cont_bb);
5362 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5363 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5364 vmain = gimple_omp_continue_control_use (cont_stmt);
5365 vback = gimple_omp_continue_control_def (cont_stmt);
5367 if (cond_var)
5369 tree itype = TREE_TYPE (cond_var);
5370 tree t2;
5371 if (POINTER_TYPE_P (type)
5372 || TREE_CODE (n1) != INTEGER_CST
5373 || fd->loop.cond_code != LT_EXPR)
5374 t2 = build_int_cst (itype, 1);
5375 else
5376 t2 = fold_convert (itype, step);
5377 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5378 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5379 NULL_TREE, true, GSI_SAME_STMT);
5380 assign_stmt = gimple_build_assign (cond_var, t2);
5381 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5384 if (!gimple_omp_for_combined_p (fd->for_stmt))
5386 if (POINTER_TYPE_P (type))
5387 t = fold_build_pointer_plus (vmain, step);
5388 else
5389 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5390 t = force_gimple_operand_gsi (&gsi, t,
5391 DECL_P (vback)
5392 && TREE_ADDRESSABLE (vback),
5393 NULL_TREE, true, GSI_SAME_STMT);
5394 assign_stmt = gimple_build_assign (vback, t);
5395 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5397 t = build2 (fd->loop.cond_code, boolean_type_node,
5398 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5399 ? t : vback, e);
5400 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5403 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5404 gsi_remove (&gsi, true);
5406 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5407 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5408 cont_bb, body_bb);
5411 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5412 gsi = gsi_last_nondebug_bb (exit_bb);
5413 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5415 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5416 if (fd->have_reductemp
5417 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5418 && !fd->have_nonctrl_scantemp))
5420 tree fn;
5421 if (t)
5422 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5423 else
5424 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5425 gcall *g = gimple_build_call (fn, 0);
5426 if (t)
5428 gimple_call_set_lhs (g, t);
5429 if (fd->have_reductemp)
5430 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5431 NOP_EXPR, t),
5432 GSI_SAME_STMT);
5434 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5436 else
5437 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5439 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5440 && !fd->have_nonctrl_scantemp)
5442 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5443 gcall *g = gimple_build_call (fn, 0);
5444 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5446 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5448 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5449 tree controlp = NULL_TREE, controlb = NULL_TREE;
5450 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5451 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5452 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5454 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5455 controlb = OMP_CLAUSE_DECL (c);
5456 else
5457 controlp = OMP_CLAUSE_DECL (c);
5458 if (controlb && controlp)
5459 break;
5461 gcc_assert (controlp && controlb);
5462 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5463 NULL_TREE, NULL_TREE);
5464 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5465 exit1_bb = split_block (exit_bb, g)->dest;
5466 gsi = gsi_after_labels (exit1_bb);
5467 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5468 controlp);
5469 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5470 exit2_bb = split_block (exit1_bb, g)->dest;
5471 gsi = gsi_after_labels (exit2_bb);
5472 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5473 controlp);
5474 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5475 exit3_bb = split_block (exit2_bb, g)->dest;
5476 gsi = gsi_after_labels (exit3_bb);
5478 gsi_remove (&gsi, true);
5480 /* Connect all the blocks. */
5481 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5482 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5483 ep = find_edge (entry_bb, second_bb);
5484 ep->flags = EDGE_TRUE_VALUE;
5485 ep->probability = profile_probability::guessed_always () / 4;
5486 if (fourth_bb)
5488 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5489 ep->probability = profile_probability::guessed_always () / 2;
5490 ep = find_edge (third_bb, fourth_bb);
5491 ep->flags = EDGE_TRUE_VALUE;
5492 ep->probability = profile_probability::guessed_always () / 2;
5493 ep = find_edge (fourth_bb, fifth_bb);
5494 redirect_edge_and_branch (ep, sixth_bb);
5496 else
5497 sixth_bb = third_bb;
5498 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5499 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5500 if (exit1_bb)
5502 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5503 ep->probability = profile_probability::guessed_always () / 2;
5504 ep = find_edge (exit_bb, exit1_bb);
5505 ep->flags = EDGE_TRUE_VALUE;
5506 ep->probability = profile_probability::guessed_always () / 2;
5507 ep = find_edge (exit1_bb, exit2_bb);
5508 redirect_edge_and_branch (ep, exit3_bb);
5511 if (!broken_loop)
5513 ep = find_edge (cont_bb, body_bb);
5514 if (ep == NULL)
5516 ep = BRANCH_EDGE (cont_bb);
5517 gcc_assert (single_succ (ep->dest) == body_bb);
5519 if (gimple_omp_for_combined_p (fd->for_stmt))
5521 remove_edge (ep);
5522 ep = NULL;
5524 else if (fd->collapse > 1)
5526 remove_edge (ep);
5527 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5529 else
5530 ep->flags = EDGE_TRUE_VALUE;
5531 find_edge (cont_bb, fin_bb)->flags
5532 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5535 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5536 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5537 if (fourth_bb)
5539 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5540 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5542 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5544 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5545 recompute_dominator (CDI_DOMINATORS, body_bb));
5546 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5547 recompute_dominator (CDI_DOMINATORS, fin_bb));
5548 if (exit1_bb)
5550 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5551 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5554 class loop *loop = body_bb->loop_father;
5555 if (loop != entry_bb->loop_father)
5557 gcc_assert (broken_loop || loop->header == body_bb);
5558 gcc_assert (broken_loop
5559 || loop->latch == region->cont
5560 || single_pred (loop->latch) == region->cont);
5561 return;
5564 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5566 loop = alloc_loop ();
5567 loop->header = body_bb;
5568 if (collapse_bb == NULL)
5569 loop->latch = cont_bb;
5570 add_loop (loop, body_bb->loop_father);
5574 /* Return phi in E->DEST with ARG on edge E. */
5576 static gphi *
5577 find_phi_with_arg_on_edge (tree arg, edge e)
5579 basic_block bb = e->dest;
5581 for (gphi_iterator gpi = gsi_start_phis (bb);
5582 !gsi_end_p (gpi);
5583 gsi_next (&gpi))
5585 gphi *phi = gpi.phi ();
5586 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5587 return phi;
5590 return NULL;
5593 /* A subroutine of expand_omp_for. Generate code for a parallel
5594 loop with static schedule and a specified chunk size. Given
5595 parameters:
5597 for (V = N1; V cond N2; V += STEP) BODY;
5599 where COND is "<" or ">", we generate pseudocode
5601 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5602 if (cond is <)
5603 adj = STEP - 1;
5604 else
5605 adj = STEP + 1;
5606 if ((__typeof (V)) -1 > 0 && cond is >)
5607 n = -(adj + N2 - N1) / -STEP;
5608 else
5609 n = (adj + N2 - N1) / STEP;
5610 trip = 0;
5611 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5612 here so that V is defined
5613 if the loop is not entered
5615 s0 = (trip * nthreads + threadid) * CHUNK;
5616 e0 = min (s0 + CHUNK, n);
5617 if (s0 < n) goto L1; else goto L4;
5619 V = s0 * STEP + N1;
5620 e = e0 * STEP + N1;
5622 BODY;
5623 V += STEP;
5624 if (V cond e) goto L2; else goto L3;
5626 trip += 1;
5627 goto L0;
5631 static void
5632 expand_omp_for_static_chunk (struct omp_region *region,
5633 struct omp_for_data *fd, gimple *inner_stmt)
5635 tree n, s0, e0, e, t;
5636 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5637 tree type, itype, vmain, vback, vextra;
5638 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5639 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5640 gimple_stmt_iterator gsi, gsip;
5641 edge se;
5642 bool broken_loop = region->cont == NULL;
5643 tree *counts = NULL;
5644 tree n1, n2, step;
5645 tree reductions = NULL_TREE;
5646 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5648 itype = type = TREE_TYPE (fd->loop.v);
5649 if (POINTER_TYPE_P (type))
5650 itype = signed_type_for (type);
5652 entry_bb = region->entry;
5653 se = split_block (entry_bb, last_stmt (entry_bb));
5654 entry_bb = se->src;
5655 iter_part_bb = se->dest;
5656 cont_bb = region->cont;
5657 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5658 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5659 gcc_assert (broken_loop
5660 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5661 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5662 body_bb = single_succ (seq_start_bb);
5663 if (!broken_loop)
5665 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5666 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5667 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5668 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5670 exit_bb = region->exit;
5672 /* Trip and adjustment setup goes in ENTRY_BB. */
5673 gsi = gsi_last_nondebug_bb (entry_bb);
5674 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5675 gsip = gsi;
5676 gsi_prev (&gsip);
5678 if (fd->collapse > 1)
5680 int first_zero_iter = -1, dummy = -1;
5681 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5683 counts = XALLOCAVEC (tree, fd->collapse);
5684 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5685 fin_bb, first_zero_iter,
5686 dummy_bb, dummy, l2_dom_bb);
5687 t = NULL_TREE;
5689 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5690 t = integer_one_node;
5691 else
5692 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5693 fold_convert (type, fd->loop.n1),
5694 fold_convert (type, fd->loop.n2));
5695 if (fd->collapse == 1
5696 && TYPE_UNSIGNED (type)
5697 && (t == NULL_TREE || !integer_onep (t)))
5699 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5700 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5701 true, GSI_SAME_STMT);
5702 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5703 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5704 true, GSI_SAME_STMT);
5705 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5706 n1, n2);
5707 se = split_block (entry_bb, cond_stmt);
5708 se->flags = EDGE_TRUE_VALUE;
5709 entry_bb = se->dest;
5710 se->probability = profile_probability::very_likely ();
5711 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5712 se->probability = profile_probability::very_unlikely ();
5713 if (gimple_in_ssa_p (cfun))
5715 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5716 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5717 !gsi_end_p (gpi); gsi_next (&gpi))
5719 gphi *phi = gpi.phi ();
5720 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5721 se, UNKNOWN_LOCATION);
5724 gsi = gsi_last_bb (entry_bb);
5727 if (fd->lastprivate_conditional)
5729 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5730 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5731 if (fd->have_pointer_condtemp)
5732 condtemp = OMP_CLAUSE_DECL (c);
5733 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5734 cond_var = OMP_CLAUSE_DECL (c);
5736 if (fd->have_reductemp || fd->have_pointer_condtemp)
5738 tree t1 = build_int_cst (long_integer_type_node, 0);
5739 tree t2 = build_int_cst (long_integer_type_node, 1);
5740 tree t3 = build_int_cstu (long_integer_type_node,
5741 (HOST_WIDE_INT_1U << 31) + 1);
5742 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5743 gimple_stmt_iterator gsi2 = gsi_none ();
5744 gimple *g = NULL;
5745 tree mem = null_pointer_node, memv = NULL_TREE;
5746 if (fd->have_reductemp)
5748 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5749 reductions = OMP_CLAUSE_DECL (c);
5750 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5751 g = SSA_NAME_DEF_STMT (reductions);
5752 reductions = gimple_assign_rhs1 (g);
5753 OMP_CLAUSE_DECL (c) = reductions;
5754 gsi2 = gsi_for_stmt (g);
5756 else
5758 if (gsi_end_p (gsip))
5759 gsi2 = gsi_after_labels (region->entry);
5760 else
5761 gsi2 = gsip;
5762 reductions = null_pointer_node;
5764 if (fd->have_pointer_condtemp)
5766 tree type = TREE_TYPE (condtemp);
5767 memv = create_tmp_var (type);
5768 TREE_ADDRESSABLE (memv) = 1;
5769 unsigned HOST_WIDE_INT sz
5770 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5771 sz *= fd->lastprivate_conditional;
5772 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5773 false);
5774 mem = build_fold_addr_expr (memv);
5776 tree t
5777 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5778 9, t1, t2, t2, t3, t1, null_pointer_node,
5779 null_pointer_node, reductions, mem);
5780 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5781 true, GSI_SAME_STMT);
5782 if (fd->have_pointer_condtemp)
5783 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5784 if (fd->have_reductemp)
5786 gsi_remove (&gsi2, true);
5787 release_ssa_name (gimple_assign_lhs (g));
5790 switch (gimple_omp_for_kind (fd->for_stmt))
5792 case GF_OMP_FOR_KIND_FOR:
5793 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5794 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5795 break;
5796 case GF_OMP_FOR_KIND_DISTRIBUTE:
5797 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5798 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5799 break;
5800 default:
5801 gcc_unreachable ();
5803 nthreads = build_call_expr (nthreads, 0);
5804 nthreads = fold_convert (itype, nthreads);
5805 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5806 true, GSI_SAME_STMT);
5807 threadid = build_call_expr (threadid, 0);
5808 threadid = fold_convert (itype, threadid);
5809 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5810 true, GSI_SAME_STMT);
5812 n1 = fd->loop.n1;
5813 n2 = fd->loop.n2;
5814 step = fd->loop.step;
5815 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5817 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5818 OMP_CLAUSE__LOOPTEMP_);
5819 gcc_assert (innerc);
5820 n1 = OMP_CLAUSE_DECL (innerc);
5821 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5822 OMP_CLAUSE__LOOPTEMP_);
5823 gcc_assert (innerc);
5824 n2 = OMP_CLAUSE_DECL (innerc);
5826 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5827 true, NULL_TREE, true, GSI_SAME_STMT);
5828 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5829 true, NULL_TREE, true, GSI_SAME_STMT);
5830 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5831 true, NULL_TREE, true, GSI_SAME_STMT);
5832 tree chunk_size = fold_convert (itype, fd->chunk_size);
5833 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5834 chunk_size
5835 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5836 GSI_SAME_STMT);
5838 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5839 t = fold_build2 (PLUS_EXPR, itype, step, t);
5840 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5841 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5842 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5843 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5844 fold_build1 (NEGATE_EXPR, itype, t),
5845 fold_build1 (NEGATE_EXPR, itype, step));
5846 else
5847 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5848 t = fold_convert (itype, t);
5849 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5850 true, GSI_SAME_STMT);
5852 trip_var = create_tmp_reg (itype, ".trip");
5853 if (gimple_in_ssa_p (cfun))
5855 trip_init = make_ssa_name (trip_var);
5856 trip_main = make_ssa_name (trip_var);
5857 trip_back = make_ssa_name (trip_var);
5859 else
5861 trip_init = trip_var;
5862 trip_main = trip_var;
5863 trip_back = trip_var;
5866 gassign *assign_stmt
5867 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5868 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5870 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5871 t = fold_build2 (MULT_EXPR, itype, t, step);
5872 if (POINTER_TYPE_P (type))
5873 t = fold_build_pointer_plus (n1, t);
5874 else
5875 t = fold_build2 (PLUS_EXPR, type, t, n1);
5876 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5877 true, GSI_SAME_STMT);
5879 /* Remove the GIMPLE_OMP_FOR. */
5880 gsi_remove (&gsi, true);
5882 gimple_stmt_iterator gsif = gsi;
5884 /* Iteration space partitioning goes in ITER_PART_BB. */
5885 gsi = gsi_last_bb (iter_part_bb);
5887 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5888 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5889 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5890 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5891 false, GSI_CONTINUE_LINKING);
5893 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5894 t = fold_build2 (MIN_EXPR, itype, t, n);
5895 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5896 false, GSI_CONTINUE_LINKING);
5898 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5899 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5901 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5902 gsi = gsi_start_bb (seq_start_bb);
5904 tree startvar = fd->loop.v;
5905 tree endvar = NULL_TREE;
5907 if (gimple_omp_for_combined_p (fd->for_stmt))
5909 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5910 ? gimple_omp_parallel_clauses (inner_stmt)
5911 : gimple_omp_for_clauses (inner_stmt);
5912 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5913 gcc_assert (innerc);
5914 startvar = OMP_CLAUSE_DECL (innerc);
5915 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5916 OMP_CLAUSE__LOOPTEMP_);
5917 gcc_assert (innerc);
5918 endvar = OMP_CLAUSE_DECL (innerc);
5919 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5920 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5922 innerc = find_lastprivate_looptemp (fd, innerc);
5923 if (innerc)
5925 /* If needed (distribute parallel for with lastprivate),
5926 propagate down the total number of iterations. */
5927 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5928 fd->loop.n2);
5929 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5930 GSI_CONTINUE_LINKING);
5931 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5932 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5937 t = fold_convert (itype, s0);
5938 t = fold_build2 (MULT_EXPR, itype, t, step);
5939 if (POINTER_TYPE_P (type))
5941 t = fold_build_pointer_plus (n1, t);
5942 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5943 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5944 t = fold_convert (signed_type_for (type), t);
5946 else
5947 t = fold_build2 (PLUS_EXPR, type, t, n1);
5948 t = fold_convert (TREE_TYPE (startvar), t);
5949 t = force_gimple_operand_gsi (&gsi, t,
5950 DECL_P (startvar)
5951 && TREE_ADDRESSABLE (startvar),
5952 NULL_TREE, false, GSI_CONTINUE_LINKING);
5953 assign_stmt = gimple_build_assign (startvar, t);
5954 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5955 if (cond_var)
5957 tree itype = TREE_TYPE (cond_var);
5958 /* For lastprivate(conditional:) itervar, we need some iteration
5959 counter that starts at unsigned non-zero and increases.
5960 Prefer as few IVs as possible, so if we can use startvar
5961 itself, use that, or startvar + constant (those would be
5962 incremented with step), and as last resort use the s0 + 1
5963 incremented by 1. */
5964 if (POINTER_TYPE_P (type)
5965 || TREE_CODE (n1) != INTEGER_CST
5966 || fd->loop.cond_code != LT_EXPR)
5967 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5968 build_int_cst (itype, 1));
5969 else if (tree_int_cst_sgn (n1) == 1)
5970 t = fold_convert (itype, t);
5971 else
5973 tree c = fold_convert (itype, n1);
5974 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5975 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5977 t = force_gimple_operand_gsi (&gsi, t, false,
5978 NULL_TREE, false, GSI_CONTINUE_LINKING);
5979 assign_stmt = gimple_build_assign (cond_var, t);
5980 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5983 t = fold_convert (itype, e0);
5984 t = fold_build2 (MULT_EXPR, itype, t, step);
5985 if (POINTER_TYPE_P (type))
5987 t = fold_build_pointer_plus (n1, t);
5988 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5989 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5990 t = fold_convert (signed_type_for (type), t);
5992 else
5993 t = fold_build2 (PLUS_EXPR, type, t, n1);
5994 t = fold_convert (TREE_TYPE (startvar), t);
5995 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5996 false, GSI_CONTINUE_LINKING);
5997 if (endvar)
5999 assign_stmt = gimple_build_assign (endvar, e);
6000 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6001 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6002 assign_stmt = gimple_build_assign (fd->loop.v, e);
6003 else
6004 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6007 /* Handle linear clause adjustments. */
6008 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6009 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6010 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6011 c; c = OMP_CLAUSE_CHAIN (c))
6012 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6013 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6015 tree d = OMP_CLAUSE_DECL (c);
6016 tree t = d, a, dest;
6017 if (omp_privatize_by_reference (t))
6018 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6019 tree type = TREE_TYPE (t);
6020 if (POINTER_TYPE_P (type))
6021 type = sizetype;
6022 dest = unshare_expr (t);
6023 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6024 expand_omp_build_assign (&gsif, v, t);
6025 if (itercnt == NULL_TREE)
6027 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6029 itercntbias
6030 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6031 fold_convert (itype, fd->loop.n1));
6032 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6033 itercntbias, step);
6034 itercntbias
6035 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6036 NULL_TREE, true,
6037 GSI_SAME_STMT);
6038 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6039 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6040 NULL_TREE, false,
6041 GSI_CONTINUE_LINKING);
6043 else
6044 itercnt = s0;
6046 a = fold_build2 (MULT_EXPR, type,
6047 fold_convert (type, itercnt),
6048 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6049 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6050 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6051 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6052 false, GSI_CONTINUE_LINKING);
6053 expand_omp_build_assign (&gsi, dest, t, true);
6055 if (fd->collapse > 1)
6056 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6058 if (!broken_loop)
6060 /* The code controlling the sequential loop goes in CONT_BB,
6061 replacing the GIMPLE_OMP_CONTINUE. */
6062 gsi = gsi_last_nondebug_bb (cont_bb);
6063 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6064 vmain = gimple_omp_continue_control_use (cont_stmt);
6065 vback = gimple_omp_continue_control_def (cont_stmt);
6067 if (cond_var)
6069 tree itype = TREE_TYPE (cond_var);
6070 tree t2;
6071 if (POINTER_TYPE_P (type)
6072 || TREE_CODE (n1) != INTEGER_CST
6073 || fd->loop.cond_code != LT_EXPR)
6074 t2 = build_int_cst (itype, 1);
6075 else
6076 t2 = fold_convert (itype, step);
6077 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6078 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6079 NULL_TREE, true, GSI_SAME_STMT);
6080 assign_stmt = gimple_build_assign (cond_var, t2);
6081 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6084 if (!gimple_omp_for_combined_p (fd->for_stmt))
6086 if (POINTER_TYPE_P (type))
6087 t = fold_build_pointer_plus (vmain, step);
6088 else
6089 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6090 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6091 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6092 true, GSI_SAME_STMT);
6093 assign_stmt = gimple_build_assign (vback, t);
6094 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6096 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6097 t = build2 (EQ_EXPR, boolean_type_node,
6098 build_int_cst (itype, 0),
6099 build_int_cst (itype, 1));
6100 else
6101 t = build2 (fd->loop.cond_code, boolean_type_node,
6102 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6103 ? t : vback, e);
6104 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6107 /* Remove GIMPLE_OMP_CONTINUE. */
6108 gsi_remove (&gsi, true);
6110 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6111 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6113 /* Trip update code goes into TRIP_UPDATE_BB. */
6114 gsi = gsi_start_bb (trip_update_bb);
6116 t = build_int_cst (itype, 1);
6117 t = build2 (PLUS_EXPR, itype, trip_main, t);
6118 assign_stmt = gimple_build_assign (trip_back, t);
6119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6122 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6123 gsi = gsi_last_nondebug_bb (exit_bb);
6124 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6126 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6127 if (fd->have_reductemp || fd->have_pointer_condtemp)
6129 tree fn;
6130 if (t)
6131 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6132 else
6133 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6134 gcall *g = gimple_build_call (fn, 0);
6135 if (t)
6137 gimple_call_set_lhs (g, t);
6138 if (fd->have_reductemp)
6139 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6140 NOP_EXPR, t),
6141 GSI_SAME_STMT);
6143 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6145 else
6146 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6148 else if (fd->have_pointer_condtemp)
6150 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6151 gcall *g = gimple_build_call (fn, 0);
6152 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6154 gsi_remove (&gsi, true);
6156 /* Connect the new blocks. */
6157 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6158 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6160 if (!broken_loop)
6162 se = find_edge (cont_bb, body_bb);
6163 if (se == NULL)
6165 se = BRANCH_EDGE (cont_bb);
6166 gcc_assert (single_succ (se->dest) == body_bb);
6168 if (gimple_omp_for_combined_p (fd->for_stmt))
6170 remove_edge (se);
6171 se = NULL;
6173 else if (fd->collapse > 1)
6175 remove_edge (se);
6176 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6178 else
6179 se->flags = EDGE_TRUE_VALUE;
6180 find_edge (cont_bb, trip_update_bb)->flags
6181 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6183 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6184 iter_part_bb);
6187 if (gimple_in_ssa_p (cfun))
6189 gphi_iterator psi;
6190 gphi *phi;
6191 edge re, ene;
6192 edge_var_map *vm;
6193 size_t i;
6195 gcc_assert (fd->collapse == 1 && !broken_loop);
6197 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6198 remove arguments of the phi nodes in fin_bb. We need to create
6199 appropriate phi nodes in iter_part_bb instead. */
6200 se = find_edge (iter_part_bb, fin_bb);
6201 re = single_succ_edge (trip_update_bb);
6202 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6203 ene = single_succ_edge (entry_bb);
6205 psi = gsi_start_phis (fin_bb);
6206 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6207 gsi_next (&psi), ++i)
6209 gphi *nphi;
6210 location_t locus;
6212 phi = psi.phi ();
6213 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6214 redirect_edge_var_map_def (vm), 0))
6215 continue;
6217 t = gimple_phi_result (phi);
6218 gcc_assert (t == redirect_edge_var_map_result (vm));
6220 if (!single_pred_p (fin_bb))
6221 t = copy_ssa_name (t, phi);
6223 nphi = create_phi_node (t, iter_part_bb);
6225 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6226 locus = gimple_phi_arg_location_from_edge (phi, se);
6228 /* A special case -- fd->loop.v is not yet computed in
6229 iter_part_bb, we need to use vextra instead. */
6230 if (t == fd->loop.v)
6231 t = vextra;
6232 add_phi_arg (nphi, t, ene, locus);
6233 locus = redirect_edge_var_map_location (vm);
6234 tree back_arg = redirect_edge_var_map_def (vm);
6235 add_phi_arg (nphi, back_arg, re, locus);
6236 edge ce = find_edge (cont_bb, body_bb);
6237 if (ce == NULL)
6239 ce = BRANCH_EDGE (cont_bb);
6240 gcc_assert (single_succ (ce->dest) == body_bb);
6241 ce = single_succ_edge (ce->dest);
6243 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6244 gcc_assert (inner_loop_phi != NULL);
6245 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6246 find_edge (seq_start_bb, body_bb), locus);
6248 if (!single_pred_p (fin_bb))
6249 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6251 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6252 redirect_edge_var_map_clear (re);
6253 if (single_pred_p (fin_bb))
6254 while (1)
6256 psi = gsi_start_phis (fin_bb);
6257 if (gsi_end_p (psi))
6258 break;
6259 remove_phi_node (&psi, false);
6262 /* Make phi node for trip. */
6263 phi = create_phi_node (trip_main, iter_part_bb);
6264 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6265 UNKNOWN_LOCATION);
6266 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6267 UNKNOWN_LOCATION);
6270 if (!broken_loop)
6271 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6272 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6273 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6274 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6275 recompute_dominator (CDI_DOMINATORS, fin_bb));
6276 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6277 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6278 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6279 recompute_dominator (CDI_DOMINATORS, body_bb));
6281 if (!broken_loop)
6283 class loop *loop = body_bb->loop_father;
6284 class loop *trip_loop = alloc_loop ();
6285 trip_loop->header = iter_part_bb;
6286 trip_loop->latch = trip_update_bb;
6287 add_loop (trip_loop, iter_part_bb->loop_father);
6289 if (loop != entry_bb->loop_father)
6291 gcc_assert (loop->header == body_bb);
6292 gcc_assert (loop->latch == region->cont
6293 || single_pred (loop->latch) == region->cont);
6294 trip_loop->inner = loop;
6295 return;
6298 if (!gimple_omp_for_combined_p (fd->for_stmt))
6300 loop = alloc_loop ();
6301 loop->header = body_bb;
6302 if (collapse_bb == NULL)
6303 loop->latch = cont_bb;
6304 add_loop (loop, trip_loop);
6309 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6310 loop. Given parameters:
6312 for (V = N1; V cond N2; V += STEP) BODY;
6314 where COND is "<" or ">", we generate pseudocode
6316 V = N1;
6317 goto L1;
6319 BODY;
6320 V += STEP;
6322 if (V cond N2) goto L0; else goto L2;
6325 For collapsed loops, emit the outer loops as scalar
6326 and only try to vectorize the innermost loop. */
6328 static void
6329 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6331 tree type, t;
6332 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6333 gimple_stmt_iterator gsi;
6334 gimple *stmt;
6335 gcond *cond_stmt;
6336 bool broken_loop = region->cont == NULL;
6337 edge e, ne;
6338 tree *counts = NULL;
6339 int i;
6340 int safelen_int = INT_MAX;
6341 bool dont_vectorize = false;
6342 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6343 OMP_CLAUSE_SAFELEN);
6344 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6345 OMP_CLAUSE__SIMDUID_);
6346 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6347 OMP_CLAUSE_IF);
6348 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6349 OMP_CLAUSE_SIMDLEN);
6350 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6351 OMP_CLAUSE__CONDTEMP_);
6352 tree n1, n2;
6353 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6355 if (safelen)
6357 poly_uint64 val;
6358 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6359 if (!poly_int_tree_p (safelen, &val))
6360 safelen_int = 0;
6361 else
6362 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6363 if (safelen_int == 1)
6364 safelen_int = 0;
6366 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6367 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6369 safelen_int = 0;
6370 dont_vectorize = true;
6372 type = TREE_TYPE (fd->loop.v);
6373 entry_bb = region->entry;
6374 cont_bb = region->cont;
6375 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6376 gcc_assert (broken_loop
6377 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6378 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6379 if (!broken_loop)
6381 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6382 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6383 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6384 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6386 else
6388 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6389 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6390 l2_bb = single_succ (l1_bb);
6392 exit_bb = region->exit;
6393 l2_dom_bb = NULL;
6395 gsi = gsi_last_nondebug_bb (entry_bb);
6397 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6398 /* Not needed in SSA form right now. */
6399 gcc_assert (!gimple_in_ssa_p (cfun));
6400 if (fd->collapse > 1
6401 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6402 || broken_loop))
6404 int first_zero_iter = -1, dummy = -1;
6405 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6407 counts = XALLOCAVEC (tree, fd->collapse);
6408 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6409 zero_iter_bb, first_zero_iter,
6410 dummy_bb, dummy, l2_dom_bb);
6412 if (l2_dom_bb == NULL)
6413 l2_dom_bb = l1_bb;
6415 n1 = fd->loop.n1;
6416 n2 = fd->loop.n2;
6417 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6419 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6420 OMP_CLAUSE__LOOPTEMP_);
6421 gcc_assert (innerc);
6422 n1 = OMP_CLAUSE_DECL (innerc);
6423 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6424 OMP_CLAUSE__LOOPTEMP_);
6425 gcc_assert (innerc);
6426 n2 = OMP_CLAUSE_DECL (innerc);
6428 tree step = fd->loop.step;
6429 tree orig_step = step; /* May be different from step if is_simt. */
6431 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6432 OMP_CLAUSE__SIMT_);
6433 if (is_simt)
6435 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6436 is_simt = safelen_int > 1;
6438 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6439 if (is_simt)
6441 simt_lane = create_tmp_var (unsigned_type_node);
6442 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6443 gimple_call_set_lhs (g, simt_lane);
6444 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6445 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6446 fold_convert (TREE_TYPE (step), simt_lane));
6447 n1 = fold_convert (type, n1);
6448 if (POINTER_TYPE_P (type))
6449 n1 = fold_build_pointer_plus (n1, offset);
6450 else
6451 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6453 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6454 if (fd->collapse > 1)
6455 simt_maxlane = build_one_cst (unsigned_type_node);
6456 else if (safelen_int < omp_max_simt_vf ())
6457 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6458 tree vf
6459 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6460 unsigned_type_node, 0);
6461 if (simt_maxlane)
6462 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6463 vf = fold_convert (TREE_TYPE (step), vf);
6464 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6467 tree n2var = NULL_TREE;
6468 tree n2v = NULL_TREE;
6469 tree *nonrect_bounds = NULL;
6470 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6471 if (fd->collapse > 1)
6473 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6475 if (fd->non_rect)
6477 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6478 memset (nonrect_bounds, 0,
6479 sizeof (tree) * (fd->last_nonrect + 1));
6481 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6482 gcc_assert (entry_bb == gsi_bb (gsi));
6483 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6484 gsi_prev (&gsi);
6485 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6486 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6487 NULL, n1);
6488 gsi = gsi_for_stmt (fd->for_stmt);
6490 if (broken_loop)
6492 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6494 /* Compute in n2var the limit for the first innermost loop,
6495 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6496 where cnt is how many iterations would the loop have if
6497 all further iterations were assigned to the current task. */
6498 n2var = create_tmp_var (type);
6499 i = fd->collapse - 1;
6500 tree itype = TREE_TYPE (fd->loops[i].v);
6501 if (POINTER_TYPE_P (itype))
6502 itype = signed_type_for (itype);
6503 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6504 ? -1 : 1));
6505 t = fold_build2 (PLUS_EXPR, itype,
6506 fold_convert (itype, fd->loops[i].step), t);
6507 t = fold_build2 (PLUS_EXPR, itype, t,
6508 fold_convert (itype, fd->loops[i].n2));
6509 if (fd->loops[i].m2)
6511 tree t2 = fold_convert (itype,
6512 fd->loops[i - fd->loops[i].outer].v);
6513 tree t3 = fold_convert (itype, fd->loops[i].m2);
6514 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6515 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6517 t = fold_build2 (MINUS_EXPR, itype, t,
6518 fold_convert (itype, fd->loops[i].v));
6519 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6520 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6521 fold_build1 (NEGATE_EXPR, itype, t),
6522 fold_build1 (NEGATE_EXPR, itype,
6523 fold_convert (itype,
6524 fd->loops[i].step)));
6525 else
6526 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6527 fold_convert (itype, fd->loops[i].step));
6528 t = fold_convert (type, t);
6529 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6530 min_arg1 = create_tmp_var (type);
6531 expand_omp_build_assign (&gsi, min_arg1, t2);
6532 min_arg2 = create_tmp_var (type);
6533 expand_omp_build_assign (&gsi, min_arg2, t);
6535 else
6537 if (TREE_CODE (n2) == INTEGER_CST)
6539 /* Indicate for lastprivate handling that at least one iteration
6540 has been performed, without wasting runtime. */
6541 if (integer_nonzerop (n2))
6542 expand_omp_build_assign (&gsi, fd->loop.v,
6543 fold_convert (type, n2));
6544 else
6545 /* Indicate that no iteration has been performed. */
6546 expand_omp_build_assign (&gsi, fd->loop.v,
6547 build_one_cst (type));
6549 else
6551 expand_omp_build_assign (&gsi, fd->loop.v,
6552 build_zero_cst (type));
6553 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6555 for (i = 0; i < fd->collapse; i++)
6557 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6558 if (fd->loops[i].m1)
6560 tree t2
6561 = fold_convert (TREE_TYPE (t),
6562 fd->loops[i - fd->loops[i].outer].v);
6563 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6564 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6565 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6567 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6568 /* For normal non-combined collapsed loops just initialize
6569 the outermost iterator in the entry_bb. */
6570 if (!broken_loop)
6571 break;
6575 else
6576 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6577 tree altv = NULL_TREE, altn2 = NULL_TREE;
6578 if (fd->collapse == 1
6579 && !broken_loop
6580 && TREE_CODE (orig_step) != INTEGER_CST)
6582 /* The vectorizer currently punts on loops with non-constant steps
6583 for the main IV (can't compute number of iterations and gives up
6584 because of that). As for OpenMP loops it is always possible to
6585 compute the number of iterations upfront, use an alternate IV
6586 as the loop iterator:
6587 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6588 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6589 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6590 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6591 tree itype = TREE_TYPE (fd->loop.v);
6592 if (POINTER_TYPE_P (itype))
6593 itype = signed_type_for (itype);
6594 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6595 t = fold_build2 (PLUS_EXPR, itype,
6596 fold_convert (itype, step), t);
6597 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6598 t = fold_build2 (MINUS_EXPR, itype, t,
6599 fold_convert (itype, fd->loop.v));
6600 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6601 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6602 fold_build1 (NEGATE_EXPR, itype, t),
6603 fold_build1 (NEGATE_EXPR, itype,
6604 fold_convert (itype, step)));
6605 else
6606 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6607 fold_convert (itype, step));
6608 t = fold_convert (TREE_TYPE (altv), t);
6609 altn2 = create_tmp_var (TREE_TYPE (altv));
6610 expand_omp_build_assign (&gsi, altn2, t);
6611 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6612 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6613 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6614 true, GSI_SAME_STMT);
6615 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6616 build_zero_cst (TREE_TYPE (altv)));
6617 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6619 else if (fd->collapse > 1
6620 && !broken_loop
6621 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6622 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6624 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6625 altn2 = create_tmp_var (TREE_TYPE (altv));
6627 if (cond_var)
6629 if (POINTER_TYPE_P (type)
6630 || TREE_CODE (n1) != INTEGER_CST
6631 || fd->loop.cond_code != LT_EXPR
6632 || tree_int_cst_sgn (n1) != 1)
6633 expand_omp_build_assign (&gsi, cond_var,
6634 build_one_cst (TREE_TYPE (cond_var)));
6635 else
6636 expand_omp_build_assign (&gsi, cond_var,
6637 fold_convert (TREE_TYPE (cond_var), n1));
6640 /* Remove the GIMPLE_OMP_FOR statement. */
6641 gsi_remove (&gsi, true);
6643 if (!broken_loop)
6645 /* Code to control the increment goes in the CONT_BB. */
6646 gsi = gsi_last_nondebug_bb (cont_bb);
6647 stmt = gsi_stmt (gsi);
6648 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6650 if (fd->collapse == 1
6651 || gimple_omp_for_combined_into_p (fd->for_stmt))
6653 if (POINTER_TYPE_P (type))
6654 t = fold_build_pointer_plus (fd->loop.v, step);
6655 else
6656 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6657 expand_omp_build_assign (&gsi, fd->loop.v, t);
6659 else if (TREE_CODE (n2) != INTEGER_CST)
6660 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6661 if (altv)
6663 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6664 build_one_cst (TREE_TYPE (altv)));
6665 expand_omp_build_assign (&gsi, altv, t);
6668 if (fd->collapse > 1)
6670 i = fd->collapse - 1;
6671 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6673 t = fold_convert (sizetype, fd->loops[i].step);
6674 t = fold_build_pointer_plus (fd->loops[i].v, t);
6676 else
6678 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6679 fd->loops[i].step);
6680 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6681 fd->loops[i].v, t);
6683 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6685 if (cond_var)
6687 if (POINTER_TYPE_P (type)
6688 || TREE_CODE (n1) != INTEGER_CST
6689 || fd->loop.cond_code != LT_EXPR
6690 || tree_int_cst_sgn (n1) != 1)
6691 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6692 build_one_cst (TREE_TYPE (cond_var)));
6693 else
6694 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6695 fold_convert (TREE_TYPE (cond_var), step));
6696 expand_omp_build_assign (&gsi, cond_var, t);
6699 /* Remove GIMPLE_OMP_CONTINUE. */
6700 gsi_remove (&gsi, true);
6703 /* Emit the condition in L1_BB. */
6704 gsi = gsi_start_bb (l1_bb);
6706 if (altv)
6707 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6708 else if (fd->collapse > 1
6709 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6710 && !broken_loop)
6712 i = fd->collapse - 1;
6713 tree itype = TREE_TYPE (fd->loops[i].v);
6714 if (fd->loops[i].m2)
6715 t = n2v = create_tmp_var (itype);
6716 else
6717 t = fold_convert (itype, fd->loops[i].n2);
6718 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6719 false, GSI_CONTINUE_LINKING);
6720 tree v = fd->loops[i].v;
6721 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6722 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6723 false, GSI_CONTINUE_LINKING);
6724 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6726 else
6728 if (fd->collapse > 1 && !broken_loop)
6729 t = n2var;
6730 else
6731 t = fold_convert (type, n2);
6732 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6733 false, GSI_CONTINUE_LINKING);
6734 tree v = fd->loop.v;
6735 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6736 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6737 false, GSI_CONTINUE_LINKING);
6738 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6740 cond_stmt = gimple_build_cond_empty (t);
6741 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6742 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6743 NULL, NULL)
6744 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6745 NULL, NULL))
6747 gsi = gsi_for_stmt (cond_stmt);
6748 gimple_regimplify_operands (cond_stmt, &gsi);
6751 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6752 if (is_simt)
6754 gsi = gsi_start_bb (l2_bb);
6755 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6756 if (POINTER_TYPE_P (type))
6757 t = fold_build_pointer_plus (fd->loop.v, step);
6758 else
6759 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6760 expand_omp_build_assign (&gsi, fd->loop.v, t);
6763 /* Remove GIMPLE_OMP_RETURN. */
6764 gsi = gsi_last_nondebug_bb (exit_bb);
6765 gsi_remove (&gsi, true);
6767 /* Connect the new blocks. */
6768 remove_edge (FALLTHRU_EDGE (entry_bb));
6770 if (!broken_loop)
6772 remove_edge (BRANCH_EDGE (entry_bb));
6773 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6775 e = BRANCH_EDGE (l1_bb);
6776 ne = FALLTHRU_EDGE (l1_bb);
6777 e->flags = EDGE_TRUE_VALUE;
6779 else
6781 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6783 ne = single_succ_edge (l1_bb);
6784 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6787 ne->flags = EDGE_FALSE_VALUE;
6788 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6789 ne->probability = e->probability.invert ();
6791 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6792 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6794 if (simt_maxlane)
6796 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6797 NULL_TREE, NULL_TREE);
6798 gsi = gsi_last_bb (entry_bb);
6799 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6800 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6801 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6802 FALLTHRU_EDGE (entry_bb)->probability
6803 = profile_probability::guessed_always ().apply_scale (7, 8);
6804 BRANCH_EDGE (entry_bb)->probability
6805 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6806 l2_dom_bb = entry_bb;
6808 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6810 if (!broken_loop && fd->collapse > 1)
6812 basic_block last_bb = l1_bb;
6813 basic_block init_bb = NULL;
6814 for (i = fd->collapse - 2; i >= 0; i--)
6816 tree nextn2v = NULL_TREE;
6817 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6818 e = EDGE_SUCC (last_bb, 0);
6819 else
6820 e = EDGE_SUCC (last_bb, 1);
6821 basic_block bb = split_edge (e);
6822 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6824 t = fold_convert (sizetype, fd->loops[i].step);
6825 t = fold_build_pointer_plus (fd->loops[i].v, t);
6827 else
6829 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6830 fd->loops[i].step);
6831 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6832 fd->loops[i].v, t);
6834 gsi = gsi_after_labels (bb);
6835 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6837 bb = split_block (bb, last_stmt (bb))->dest;
6838 gsi = gsi_start_bb (bb);
6839 tree itype = TREE_TYPE (fd->loops[i].v);
6840 if (fd->loops[i].m2)
6841 t = nextn2v = create_tmp_var (itype);
6842 else
6843 t = fold_convert (itype, fd->loops[i].n2);
6844 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6845 false, GSI_CONTINUE_LINKING);
6846 tree v = fd->loops[i].v;
6847 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6848 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6849 false, GSI_CONTINUE_LINKING);
6850 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6851 cond_stmt = gimple_build_cond_empty (t);
6852 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6853 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6854 expand_omp_regimplify_p, NULL, NULL)
6855 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6856 expand_omp_regimplify_p, NULL, NULL))
6858 gsi = gsi_for_stmt (cond_stmt);
6859 gimple_regimplify_operands (cond_stmt, &gsi);
6861 ne = single_succ_edge (bb);
6862 ne->flags = EDGE_FALSE_VALUE;
6864 init_bb = create_empty_bb (bb);
6865 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6866 add_bb_to_loop (init_bb, bb->loop_father);
6867 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6868 e->probability
6869 = profile_probability::guessed_always ().apply_scale (7, 8);
6870 ne->probability = e->probability.invert ();
6872 gsi = gsi_after_labels (init_bb);
6873 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6874 fd->loops[i + 1].n1);
6875 if (fd->loops[i + 1].m1)
6877 tree t2 = fold_convert (TREE_TYPE (t),
6878 fd->loops[i + 1
6879 - fd->loops[i + 1].outer].v);
6880 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6881 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6882 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6884 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6885 if (fd->loops[i + 1].m2)
6887 if (i + 2 == fd->collapse && (n2var || altv))
6889 gcc_assert (n2v == NULL_TREE);
6890 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6892 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6893 fd->loops[i + 1].n2);
6894 tree t2 = fold_convert (TREE_TYPE (t),
6895 fd->loops[i + 1
6896 - fd->loops[i + 1].outer].v);
6897 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6898 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6899 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6900 expand_omp_build_assign (&gsi, n2v, t);
6902 if (i + 2 == fd->collapse && n2var)
6904 /* For composite simd, n2 is the first iteration the current
6905 task shouldn't already handle, so we effectively want to use
6906 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6907 as the vectorized loop. Except the vectorizer will not
6908 vectorize that, so instead compute N2VAR as
6909 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6910 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6911 as the loop to vectorize. */
6912 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6913 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6915 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6916 == LT_EXPR ? -1 : 1));
6917 t = fold_build2 (PLUS_EXPR, itype,
6918 fold_convert (itype,
6919 fd->loops[i + 1].step), t);
6920 if (fd->loops[i + 1].m2)
6921 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6922 else
6923 t = fold_build2 (PLUS_EXPR, itype, t,
6924 fold_convert (itype,
6925 fd->loops[i + 1].n2));
6926 t = fold_build2 (MINUS_EXPR, itype, t,
6927 fold_convert (itype, fd->loops[i + 1].v));
6928 tree step = fold_convert (itype, fd->loops[i + 1].step);
6929 if (TYPE_UNSIGNED (itype)
6930 && fd->loops[i + 1].cond_code == GT_EXPR)
6931 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6932 fold_build1 (NEGATE_EXPR, itype, t),
6933 fold_build1 (NEGATE_EXPR, itype, step));
6934 else
6935 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6936 t = fold_convert (type, t);
6938 else
6939 t = counts[i + 1];
6940 expand_omp_build_assign (&gsi, min_arg1, t2);
6941 expand_omp_build_assign (&gsi, min_arg2, t);
6942 e = split_block (init_bb, last_stmt (init_bb));
6943 gsi = gsi_after_labels (e->dest);
6944 init_bb = e->dest;
6945 remove_edge (FALLTHRU_EDGE (entry_bb));
6946 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6947 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6948 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6949 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6950 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6951 expand_omp_build_assign (&gsi, n2var, t);
6953 if (i + 2 == fd->collapse && altv)
6955 /* The vectorizer currently punts on loops with non-constant
6956 steps for the main IV (can't compute number of iterations
6957 and gives up because of that). As for OpenMP loops it is
6958 always possible to compute the number of iterations upfront,
6959 use an alternate IV as the loop iterator. */
6960 expand_omp_build_assign (&gsi, altv,
6961 build_zero_cst (TREE_TYPE (altv)));
6962 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6963 if (POINTER_TYPE_P (itype))
6964 itype = signed_type_for (itype);
6965 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6966 ? -1 : 1));
6967 t = fold_build2 (PLUS_EXPR, itype,
6968 fold_convert (itype, fd->loops[i + 1].step), t);
6969 t = fold_build2 (PLUS_EXPR, itype, t,
6970 fold_convert (itype,
6971 fd->loops[i + 1].m2
6972 ? n2v : fd->loops[i + 1].n2));
6973 t = fold_build2 (MINUS_EXPR, itype, t,
6974 fold_convert (itype, fd->loops[i + 1].v));
6975 tree step = fold_convert (itype, fd->loops[i + 1].step);
6976 if (TYPE_UNSIGNED (itype)
6977 && fd->loops[i + 1].cond_code == GT_EXPR)
6978 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6979 fold_build1 (NEGATE_EXPR, itype, t),
6980 fold_build1 (NEGATE_EXPR, itype, step));
6981 else
6982 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6983 t = fold_convert (TREE_TYPE (altv), t);
6984 expand_omp_build_assign (&gsi, altn2, t);
6985 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6986 fd->loops[i + 1].m2
6987 ? n2v : fd->loops[i + 1].n2);
6988 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6989 fd->loops[i + 1].v, t2);
6990 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6991 true, GSI_SAME_STMT);
6992 gassign *g
6993 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6994 build_zero_cst (TREE_TYPE (altv)));
6995 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6997 n2v = nextn2v;
6999 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7000 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7002 e = find_edge (entry_bb, last_bb);
7003 redirect_edge_succ (e, bb);
7004 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7005 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7008 last_bb = bb;
7011 if (!broken_loop)
7013 class loop *loop = alloc_loop ();
7014 loop->header = l1_bb;
7015 loop->latch = cont_bb;
7016 add_loop (loop, l1_bb->loop_father);
7017 loop->safelen = safelen_int;
7018 if (simduid)
7020 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7021 cfun->has_simduid_loops = true;
7023 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7024 the loop. */
7025 if ((flag_tree_loop_vectorize
7026 || !OPTION_SET_P (flag_tree_loop_vectorize))
7027 && flag_tree_loop_optimize
7028 && loop->safelen > 1)
7030 loop->force_vectorize = true;
7031 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7033 unsigned HOST_WIDE_INT v
7034 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7035 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7036 loop->simdlen = v;
7038 cfun->has_force_vectorize_loops = true;
7040 else if (dont_vectorize)
7041 loop->dont_vectorize = true;
7043 else if (simduid)
7044 cfun->has_simduid_loops = true;
7047 /* Taskloop construct is represented after gimplification with
7048 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7049 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7050 which should just compute all the needed loop temporaries
7051 for GIMPLE_OMP_TASK. */
7053 static void
7054 expand_omp_taskloop_for_outer (struct omp_region *region,
7055 struct omp_for_data *fd,
7056 gimple *inner_stmt)
7058 tree type, bias = NULL_TREE;
7059 basic_block entry_bb, cont_bb, exit_bb;
7060 gimple_stmt_iterator gsi;
7061 gassign *assign_stmt;
7062 tree *counts = NULL;
7063 int i;
7065 gcc_assert (inner_stmt);
7066 gcc_assert (region->cont);
7067 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7068 && gimple_omp_task_taskloop_p (inner_stmt));
7069 type = TREE_TYPE (fd->loop.v);
7071 /* See if we need to bias by LLONG_MIN. */
7072 if (fd->iter_type == long_long_unsigned_type_node
7073 && TREE_CODE (type) == INTEGER_TYPE
7074 && !TYPE_UNSIGNED (type))
7076 tree n1, n2;
7078 if (fd->loop.cond_code == LT_EXPR)
7080 n1 = fd->loop.n1;
7081 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7083 else
7085 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7086 n2 = fd->loop.n1;
7088 if (TREE_CODE (n1) != INTEGER_CST
7089 || TREE_CODE (n2) != INTEGER_CST
7090 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7091 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7094 entry_bb = region->entry;
7095 cont_bb = region->cont;
7096 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7097 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7098 exit_bb = region->exit;
7100 gsi = gsi_last_nondebug_bb (entry_bb);
7101 gimple *for_stmt = gsi_stmt (gsi);
7102 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7103 if (fd->collapse > 1)
7105 int first_zero_iter = -1, dummy = -1;
7106 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7108 counts = XALLOCAVEC (tree, fd->collapse);
7109 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7110 zero_iter_bb, first_zero_iter,
7111 dummy_bb, dummy, l2_dom_bb);
7113 if (zero_iter_bb)
7115 /* Some counts[i] vars might be uninitialized if
7116 some loop has zero iterations. But the body shouldn't
7117 be executed in that case, so just avoid uninit warnings. */
7118 for (i = first_zero_iter; i < fd->collapse; i++)
7119 if (SSA_VAR_P (counts[i]))
7120 suppress_warning (counts[i], OPT_Wuninitialized);
7121 gsi_prev (&gsi);
7122 edge e = split_block (entry_bb, gsi_stmt (gsi));
7123 entry_bb = e->dest;
7124 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7125 gsi = gsi_last_bb (entry_bb);
7126 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7127 get_immediate_dominator (CDI_DOMINATORS,
7128 zero_iter_bb));
7132 tree t0, t1;
7133 t1 = fd->loop.n2;
7134 t0 = fd->loop.n1;
7135 if (POINTER_TYPE_P (TREE_TYPE (t0))
7136 && TYPE_PRECISION (TREE_TYPE (t0))
7137 != TYPE_PRECISION (fd->iter_type))
7139 /* Avoid casting pointers to integer of a different size. */
7140 tree itype = signed_type_for (type);
7141 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7142 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7144 else
7146 t1 = fold_convert (fd->iter_type, t1);
7147 t0 = fold_convert (fd->iter_type, t0);
7149 if (bias)
7151 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7152 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7155 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7156 OMP_CLAUSE__LOOPTEMP_);
7157 gcc_assert (innerc);
7158 tree startvar = OMP_CLAUSE_DECL (innerc);
7159 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7160 gcc_assert (innerc);
7161 tree endvar = OMP_CLAUSE_DECL (innerc);
7162 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7164 innerc = find_lastprivate_looptemp (fd, innerc);
7165 if (innerc)
7167 /* If needed (inner taskloop has lastprivate clause), propagate
7168 down the total number of iterations. */
7169 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7170 NULL_TREE, false,
7171 GSI_CONTINUE_LINKING);
7172 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7173 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7177 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7178 GSI_CONTINUE_LINKING);
7179 assign_stmt = gimple_build_assign (startvar, t0);
7180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7182 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7183 GSI_CONTINUE_LINKING);
7184 assign_stmt = gimple_build_assign (endvar, t1);
7185 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7186 if (fd->collapse > 1)
7187 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7189 /* Remove the GIMPLE_OMP_FOR statement. */
7190 gsi = gsi_for_stmt (for_stmt);
7191 gsi_remove (&gsi, true);
7193 gsi = gsi_last_nondebug_bb (cont_bb);
7194 gsi_remove (&gsi, true);
7196 gsi = gsi_last_nondebug_bb (exit_bb);
7197 gsi_remove (&gsi, true);
7199 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7200 remove_edge (BRANCH_EDGE (entry_bb));
7201 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7202 remove_edge (BRANCH_EDGE (cont_bb));
7203 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7204 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7205 recompute_dominator (CDI_DOMINATORS, region->entry));
7208 /* Taskloop construct is represented after gimplification with
7209 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7210 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7211 GOMP_taskloop{,_ull} function arranges for each task to be given just
7212 a single range of iterations. */
7214 static void
7215 expand_omp_taskloop_for_inner (struct omp_region *region,
7216 struct omp_for_data *fd,
7217 gimple *inner_stmt)
7219 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7220 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7221 basic_block fin_bb;
7222 gimple_stmt_iterator gsi;
7223 edge ep;
7224 bool broken_loop = region->cont == NULL;
7225 tree *counts = NULL;
7226 tree n1, n2, step;
7228 itype = type = TREE_TYPE (fd->loop.v);
7229 if (POINTER_TYPE_P (type))
7230 itype = signed_type_for (type);
7232 /* See if we need to bias by LLONG_MIN. */
7233 if (fd->iter_type == long_long_unsigned_type_node
7234 && TREE_CODE (type) == INTEGER_TYPE
7235 && !TYPE_UNSIGNED (type))
7237 tree n1, n2;
7239 if (fd->loop.cond_code == LT_EXPR)
7241 n1 = fd->loop.n1;
7242 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7244 else
7246 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7247 n2 = fd->loop.n1;
7249 if (TREE_CODE (n1) != INTEGER_CST
7250 || TREE_CODE (n2) != INTEGER_CST
7251 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7252 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7255 entry_bb = region->entry;
7256 cont_bb = region->cont;
7257 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7258 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7259 gcc_assert (broken_loop
7260 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7261 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7262 if (!broken_loop)
7264 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7265 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7267 exit_bb = region->exit;
7269 /* Iteration space partitioning goes in ENTRY_BB. */
7270 gsi = gsi_last_nondebug_bb (entry_bb);
7271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7273 if (fd->collapse > 1)
7275 int first_zero_iter = -1, dummy = -1;
7276 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7278 counts = XALLOCAVEC (tree, fd->collapse);
7279 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7280 fin_bb, first_zero_iter,
7281 dummy_bb, dummy, l2_dom_bb);
7282 t = NULL_TREE;
7284 else
7285 t = integer_one_node;
7287 step = fd->loop.step;
7288 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7289 OMP_CLAUSE__LOOPTEMP_);
7290 gcc_assert (innerc);
7291 n1 = OMP_CLAUSE_DECL (innerc);
7292 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7293 gcc_assert (innerc);
7294 n2 = OMP_CLAUSE_DECL (innerc);
7295 if (bias)
7297 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7298 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7300 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7301 true, NULL_TREE, true, GSI_SAME_STMT);
7302 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7303 true, NULL_TREE, true, GSI_SAME_STMT);
7304 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7305 true, NULL_TREE, true, GSI_SAME_STMT);
7307 tree startvar = fd->loop.v;
7308 tree endvar = NULL_TREE;
7310 if (gimple_omp_for_combined_p (fd->for_stmt))
7312 tree clauses = gimple_omp_for_clauses (inner_stmt);
7313 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7314 gcc_assert (innerc);
7315 startvar = OMP_CLAUSE_DECL (innerc);
7316 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7317 OMP_CLAUSE__LOOPTEMP_);
7318 gcc_assert (innerc);
7319 endvar = OMP_CLAUSE_DECL (innerc);
7321 t = fold_convert (TREE_TYPE (startvar), n1);
7322 t = force_gimple_operand_gsi (&gsi, t,
7323 DECL_P (startvar)
7324 && TREE_ADDRESSABLE (startvar),
7325 NULL_TREE, false, GSI_CONTINUE_LINKING);
7326 gimple *assign_stmt = gimple_build_assign (startvar, t);
7327 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7329 t = fold_convert (TREE_TYPE (startvar), n2);
7330 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7331 false, GSI_CONTINUE_LINKING);
7332 if (endvar)
7334 assign_stmt = gimple_build_assign (endvar, e);
7335 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7336 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7337 assign_stmt = gimple_build_assign (fd->loop.v, e);
7338 else
7339 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7340 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7343 tree *nonrect_bounds = NULL;
7344 if (fd->collapse > 1)
7346 if (fd->non_rect)
7348 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7349 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7351 gcc_assert (gsi_bb (gsi) == entry_bb);
7352 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7353 startvar);
7354 entry_bb = gsi_bb (gsi);
7357 if (!broken_loop)
7359 /* The code controlling the sequential loop replaces the
7360 GIMPLE_OMP_CONTINUE. */
7361 gsi = gsi_last_nondebug_bb (cont_bb);
7362 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7363 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7364 vmain = gimple_omp_continue_control_use (cont_stmt);
7365 vback = gimple_omp_continue_control_def (cont_stmt);
7367 if (!gimple_omp_for_combined_p (fd->for_stmt))
7369 if (POINTER_TYPE_P (type))
7370 t = fold_build_pointer_plus (vmain, step);
7371 else
7372 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7373 t = force_gimple_operand_gsi (&gsi, t,
7374 DECL_P (vback)
7375 && TREE_ADDRESSABLE (vback),
7376 NULL_TREE, true, GSI_SAME_STMT);
7377 assign_stmt = gimple_build_assign (vback, t);
7378 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7380 t = build2 (fd->loop.cond_code, boolean_type_node,
7381 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7382 ? t : vback, e);
7383 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7386 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7387 gsi_remove (&gsi, true);
7389 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7390 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7391 cont_bb, body_bb);
7394 /* Remove the GIMPLE_OMP_FOR statement. */
7395 gsi = gsi_for_stmt (fd->for_stmt);
7396 gsi_remove (&gsi, true);
7398 /* Remove the GIMPLE_OMP_RETURN statement. */
7399 gsi = gsi_last_nondebug_bb (exit_bb);
7400 gsi_remove (&gsi, true);
7402 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7403 if (!broken_loop)
7404 remove_edge (BRANCH_EDGE (entry_bb));
7405 else
7407 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7408 region->outer->cont = NULL;
7411 /* Connect all the blocks. */
7412 if (!broken_loop)
7414 ep = find_edge (cont_bb, body_bb);
7415 if (gimple_omp_for_combined_p (fd->for_stmt))
7417 remove_edge (ep);
7418 ep = NULL;
7420 else if (fd->collapse > 1)
7422 remove_edge (ep);
7423 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7425 else
7426 ep->flags = EDGE_TRUE_VALUE;
7427 find_edge (cont_bb, fin_bb)->flags
7428 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7431 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7432 recompute_dominator (CDI_DOMINATORS, body_bb));
7433 if (!broken_loop)
7434 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7435 recompute_dominator (CDI_DOMINATORS, fin_bb));
7437 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7439 class loop *loop = alloc_loop ();
7440 loop->header = body_bb;
7441 if (collapse_bb == NULL)
7442 loop->latch = cont_bb;
7443 add_loop (loop, body_bb->loop_father);
7447 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7448 partitioned loop. The lowering here is abstracted, in that the
7449 loop parameters are passed through internal functions, which are
7450 further lowered by oacc_device_lower, once we get to the target
7451 compiler. The loop is of the form:
7453 for (V = B; V LTGT E; V += S) {BODY}
7455 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7456 (constant 0 for no chunking) and we will have a GWV partitioning
7457 mask, specifying dimensions over which the loop is to be
7458 partitioned (see note below). We generate code that looks like
7459 (this ignores tiling):
7461 <entry_bb> [incoming FALL->body, BRANCH->exit]
7462 typedef signedintify (typeof (V)) T; // underlying signed integral type
7463 T range = E - B;
7464 T chunk_no = 0;
7465 T DIR = LTGT == '<' ? +1 : -1;
7466 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7467 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7469 <head_bb> [created by splitting end of entry_bb]
7470 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7471 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7472 if (!(offset LTGT bound)) goto bottom_bb;
7474 <body_bb> [incoming]
7475 V = B + offset;
7476 {BODY}
7478 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7479 offset += step;
7480 if (offset LTGT bound) goto body_bb; [*]
7482 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7483 chunk_no++;
7484 if (chunk < chunk_max) goto head_bb;
7486 <exit_bb> [incoming]
7487 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7489 [*] Needed if V live at end of loop. */
7491 static void
7492 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7494 bool is_oacc_kernels_parallelized
7495 = (lookup_attribute ("oacc kernels parallelized",
7496 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7498 bool is_oacc_kernels
7499 = (lookup_attribute ("oacc kernels",
7500 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7501 if (is_oacc_kernels_parallelized)
7502 gcc_checking_assert (is_oacc_kernels);
7504 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7505 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7506 for SSA specifics, and some are for 'parloops' OpenACC
7507 'kernels'-parallelized specifics. */
7509 tree v = fd->loop.v;
7510 enum tree_code cond_code = fd->loop.cond_code;
7511 enum tree_code plus_code = PLUS_EXPR;
7513 tree chunk_size = integer_minus_one_node;
7514 tree gwv = integer_zero_node;
7515 tree iter_type = TREE_TYPE (v);
7516 tree diff_type = iter_type;
7517 tree plus_type = iter_type;
7518 struct oacc_collapse *counts = NULL;
7520 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7521 == GF_OMP_FOR_KIND_OACC_LOOP);
7522 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7523 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7525 if (POINTER_TYPE_P (iter_type))
7527 plus_code = POINTER_PLUS_EXPR;
7528 plus_type = sizetype;
7530 for (int ix = fd->collapse; ix--;)
7532 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7533 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7534 diff_type = diff_type2;
7536 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7537 diff_type = signed_type_for (diff_type);
7538 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7539 diff_type = integer_type_node;
7541 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7542 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7543 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7544 basic_block bottom_bb = NULL;
7546 /* entry_bb has two successors; the branch edge is to the exit
7547 block, fallthrough edge to body. */
7548 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7549 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7551 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7552 body_bb, or to a block whose only successor is the body_bb. Its
7553 fallthrough successor is the final block (same as the branch
7554 successor of the entry_bb). */
7555 if (cont_bb)
7557 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7558 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7560 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7561 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7563 else
7564 gcc_assert (!gimple_in_ssa_p (cfun));
7566 /* The exit block only has entry_bb and cont_bb as predecessors. */
7567 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7569 tree chunk_no;
7570 tree chunk_max = NULL_TREE;
7571 tree bound, offset;
7572 tree step = create_tmp_var (diff_type, ".step");
7573 bool up = cond_code == LT_EXPR;
7574 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7575 bool chunking = !gimple_in_ssa_p (cfun);
7576 bool negating;
7578 /* Tiling vars. */
7579 tree tile_size = NULL_TREE;
7580 tree element_s = NULL_TREE;
7581 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7582 basic_block elem_body_bb = NULL;
7583 basic_block elem_cont_bb = NULL;
7585 /* SSA instances. */
7586 tree offset_incr = NULL_TREE;
7587 tree offset_init = NULL_TREE;
7589 gimple_stmt_iterator gsi;
7590 gassign *ass;
7591 gcall *call;
7592 gimple *stmt;
7593 tree expr;
7594 location_t loc;
7595 edge split, be, fte;
7597 /* Split the end of entry_bb to create head_bb. */
7598 split = split_block (entry_bb, last_stmt (entry_bb));
7599 basic_block head_bb = split->dest;
7600 entry_bb = split->src;
7602 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7603 gsi = gsi_last_nondebug_bb (entry_bb);
7604 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7605 loc = gimple_location (for_stmt);
7607 if (gimple_in_ssa_p (cfun))
7609 offset_init = gimple_omp_for_index (for_stmt, 0);
7610 gcc_assert (integer_zerop (fd->loop.n1));
7611 /* The SSA parallelizer does gang parallelism. */
7612 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7615 if (fd->collapse > 1 || fd->tiling)
7617 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7618 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7619 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7620 TREE_TYPE (fd->loop.n2), loc);
7622 if (SSA_VAR_P (fd->loop.n2))
7624 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7625 true, GSI_SAME_STMT);
7626 ass = gimple_build_assign (fd->loop.n2, total);
7627 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7631 tree b = fd->loop.n1;
7632 tree e = fd->loop.n2;
7633 tree s = fd->loop.step;
7635 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7636 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7638 /* Convert the step, avoiding possible unsigned->signed overflow. */
7639 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7640 if (negating)
7641 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7642 s = fold_convert (diff_type, s);
7643 if (negating)
7644 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7645 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7647 if (!chunking)
7648 chunk_size = integer_zero_node;
7649 expr = fold_convert (diff_type, chunk_size);
7650 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7651 NULL_TREE, true, GSI_SAME_STMT);
7653 if (fd->tiling)
7655 /* Determine the tile size and element step,
7656 modify the outer loop step size. */
7657 tile_size = create_tmp_var (diff_type, ".tile_size");
7658 expr = build_int_cst (diff_type, 1);
7659 for (int ix = 0; ix < fd->collapse; ix++)
7660 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7661 expr = force_gimple_operand_gsi (&gsi, expr, true,
7662 NULL_TREE, true, GSI_SAME_STMT);
7663 ass = gimple_build_assign (tile_size, expr);
7664 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7666 element_s = create_tmp_var (diff_type, ".element_s");
7667 ass = gimple_build_assign (element_s, s);
7668 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7670 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7671 s = force_gimple_operand_gsi (&gsi, expr, true,
7672 NULL_TREE, true, GSI_SAME_STMT);
7675 /* Determine the range, avoiding possible unsigned->signed overflow. */
7676 negating = !up && TYPE_UNSIGNED (iter_type);
7677 expr = fold_build2 (MINUS_EXPR, plus_type,
7678 fold_convert (plus_type, negating ? b : e),
7679 fold_convert (plus_type, negating ? e : b));
7680 expr = fold_convert (diff_type, expr);
7681 if (negating)
7682 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7683 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7684 NULL_TREE, true, GSI_SAME_STMT);
7686 chunk_no = build_int_cst (diff_type, 0);
7687 if (chunking)
7689 gcc_assert (!gimple_in_ssa_p (cfun));
7691 expr = chunk_no;
7692 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7693 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7695 ass = gimple_build_assign (chunk_no, expr);
7696 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7698 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7699 build_int_cst (integer_type_node,
7700 IFN_GOACC_LOOP_CHUNKS),
7701 dir, range, s, chunk_size, gwv);
7702 gimple_call_set_lhs (call, chunk_max);
7703 gimple_set_location (call, loc);
7704 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7706 else
7707 chunk_size = chunk_no;
7709 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7710 build_int_cst (integer_type_node,
7711 IFN_GOACC_LOOP_STEP),
7712 dir, range, s, chunk_size, gwv);
7713 gimple_call_set_lhs (call, step);
7714 gimple_set_location (call, loc);
7715 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7717 /* Remove the GIMPLE_OMP_FOR. */
7718 gsi_remove (&gsi, true);
7720 /* Fixup edges from head_bb. */
7721 be = BRANCH_EDGE (head_bb);
7722 fte = FALLTHRU_EDGE (head_bb);
7723 be->flags |= EDGE_FALSE_VALUE;
7724 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7726 basic_block body_bb = fte->dest;
7728 if (gimple_in_ssa_p (cfun))
7730 gsi = gsi_last_nondebug_bb (cont_bb);
7731 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7733 offset = gimple_omp_continue_control_use (cont_stmt);
7734 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7736 else
7738 offset = create_tmp_var (diff_type, ".offset");
7739 offset_init = offset_incr = offset;
7741 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7743 /* Loop offset & bound go into head_bb. */
7744 gsi = gsi_start_bb (head_bb);
7746 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7747 build_int_cst (integer_type_node,
7748 IFN_GOACC_LOOP_OFFSET),
7749 dir, range, s,
7750 chunk_size, gwv, chunk_no);
7751 gimple_call_set_lhs (call, offset_init);
7752 gimple_set_location (call, loc);
7753 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7755 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7756 build_int_cst (integer_type_node,
7757 IFN_GOACC_LOOP_BOUND),
7758 dir, range, s,
7759 chunk_size, gwv, offset_init);
7760 gimple_call_set_lhs (call, bound);
7761 gimple_set_location (call, loc);
7762 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7764 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7765 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7766 GSI_CONTINUE_LINKING);
7768 /* V assignment goes into body_bb. */
7769 if (!gimple_in_ssa_p (cfun))
7771 gsi = gsi_start_bb (body_bb);
7773 expr = build2 (plus_code, iter_type, b,
7774 fold_convert (plus_type, offset));
7775 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7776 true, GSI_SAME_STMT);
7777 ass = gimple_build_assign (v, expr);
7778 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7780 if (fd->collapse > 1 || fd->tiling)
7781 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7783 if (fd->tiling)
7785 /* Determine the range of the element loop -- usually simply
7786 the tile_size, but could be smaller if the final
7787 iteration of the outer loop is a partial tile. */
7788 tree e_range = create_tmp_var (diff_type, ".e_range");
7790 expr = build2 (MIN_EXPR, diff_type,
7791 build2 (MINUS_EXPR, diff_type, bound, offset),
7792 build2 (MULT_EXPR, diff_type, tile_size,
7793 element_s));
7794 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7795 true, GSI_SAME_STMT);
7796 ass = gimple_build_assign (e_range, expr);
7797 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7799 /* Determine bound, offset & step of inner loop. */
7800 e_bound = create_tmp_var (diff_type, ".e_bound");
7801 e_offset = create_tmp_var (diff_type, ".e_offset");
7802 e_step = create_tmp_var (diff_type, ".e_step");
7804 /* Mark these as element loops. */
7805 tree t, e_gwv = integer_minus_one_node;
7806 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7808 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7809 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7810 element_s, chunk, e_gwv, chunk);
7811 gimple_call_set_lhs (call, e_offset);
7812 gimple_set_location (call, loc);
7813 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7815 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7816 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7817 element_s, chunk, e_gwv, e_offset);
7818 gimple_call_set_lhs (call, e_bound);
7819 gimple_set_location (call, loc);
7820 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7822 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7823 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7824 element_s, chunk, e_gwv);
7825 gimple_call_set_lhs (call, e_step);
7826 gimple_set_location (call, loc);
7827 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7829 /* Add test and split block. */
7830 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7831 stmt = gimple_build_cond_empty (expr);
7832 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7833 split = split_block (body_bb, stmt);
7834 elem_body_bb = split->dest;
7835 if (cont_bb == body_bb)
7836 cont_bb = elem_body_bb;
7837 body_bb = split->src;
7839 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7841 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7842 if (cont_bb == NULL)
7844 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7845 e->probability = profile_probability::even ();
7846 split->probability = profile_probability::even ();
7849 /* Initialize the user's loop vars. */
7850 gsi = gsi_start_bb (elem_body_bb);
7851 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7852 diff_type);
7856 /* Loop increment goes into cont_bb. If this is not a loop, we
7857 will have spawned threads as if it was, and each one will
7858 execute one iteration. The specification is not explicit about
7859 whether such constructs are ill-formed or not, and they can
7860 occur, especially when noreturn routines are involved. */
7861 if (cont_bb)
7863 gsi = gsi_last_nondebug_bb (cont_bb);
7864 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7865 loc = gimple_location (cont_stmt);
7867 if (fd->tiling)
7869 /* Insert element loop increment and test. */
7870 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7871 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7872 true, GSI_SAME_STMT);
7873 ass = gimple_build_assign (e_offset, expr);
7874 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7875 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7877 stmt = gimple_build_cond_empty (expr);
7878 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7879 split = split_block (cont_bb, stmt);
7880 elem_cont_bb = split->src;
7881 cont_bb = split->dest;
7883 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7884 split->probability = profile_probability::unlikely ().guessed ();
7885 edge latch_edge
7886 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7887 latch_edge->probability = profile_probability::likely ().guessed ();
7889 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7890 skip_edge->probability = profile_probability::unlikely ().guessed ();
7891 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7892 loop_entry_edge->probability
7893 = profile_probability::likely ().guessed ();
7895 gsi = gsi_for_stmt (cont_stmt);
7898 /* Increment offset. */
7899 if (gimple_in_ssa_p (cfun))
7900 expr = build2 (plus_code, iter_type, offset,
7901 fold_convert (plus_type, step));
7902 else
7903 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7904 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7905 true, GSI_SAME_STMT);
7906 ass = gimple_build_assign (offset_incr, expr);
7907 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7908 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7909 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7911 /* Remove the GIMPLE_OMP_CONTINUE. */
7912 gsi_remove (&gsi, true);
7914 /* Fixup edges from cont_bb. */
7915 be = BRANCH_EDGE (cont_bb);
7916 fte = FALLTHRU_EDGE (cont_bb);
7917 be->flags |= EDGE_TRUE_VALUE;
7918 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7920 if (chunking)
7922 /* Split the beginning of exit_bb to make bottom_bb. We
7923 need to insert a nop at the start, because splitting is
7924 after a stmt, not before. */
7925 gsi = gsi_start_bb (exit_bb);
7926 stmt = gimple_build_nop ();
7927 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7928 split = split_block (exit_bb, stmt);
7929 bottom_bb = split->src;
7930 exit_bb = split->dest;
7931 gsi = gsi_last_bb (bottom_bb);
7933 /* Chunk increment and test goes into bottom_bb. */
7934 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7935 build_int_cst (diff_type, 1));
7936 ass = gimple_build_assign (chunk_no, expr);
7937 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7939 /* Chunk test at end of bottom_bb. */
7940 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7941 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7942 GSI_CONTINUE_LINKING);
7944 /* Fixup edges from bottom_bb. */
7945 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7946 split->probability = profile_probability::unlikely ().guessed ();
7947 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7948 latch_edge->probability = profile_probability::likely ().guessed ();
7952 gsi = gsi_last_nondebug_bb (exit_bb);
7953 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7954 loc = gimple_location (gsi_stmt (gsi));
7956 if (!gimple_in_ssa_p (cfun))
7958 /* Insert the final value of V, in case it is live. This is the
7959 value for the only thread that survives past the join. */
7960 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7961 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7962 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7963 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7964 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7965 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7966 true, GSI_SAME_STMT);
7967 ass = gimple_build_assign (v, expr);
7968 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7971 /* Remove the OMP_RETURN. */
7972 gsi_remove (&gsi, true);
7974 if (cont_bb)
7976 /* We now have one, two or three nested loops. Update the loop
7977 structures. */
7978 class loop *parent = entry_bb->loop_father;
7979 class loop *body = body_bb->loop_father;
7981 if (chunking)
7983 class loop *chunk_loop = alloc_loop ();
7984 chunk_loop->header = head_bb;
7985 chunk_loop->latch = bottom_bb;
7986 add_loop (chunk_loop, parent);
7987 parent = chunk_loop;
7989 else if (parent != body)
7991 gcc_assert (body->header == body_bb);
7992 gcc_assert (body->latch == cont_bb
7993 || single_pred (body->latch) == cont_bb);
7994 parent = NULL;
7997 if (parent)
7999 class loop *body_loop = alloc_loop ();
8000 body_loop->header = body_bb;
8001 body_loop->latch = cont_bb;
8002 add_loop (body_loop, parent);
8004 if (fd->tiling)
8006 /* Insert tiling's element loop. */
8007 class loop *inner_loop = alloc_loop ();
8008 inner_loop->header = elem_body_bb;
8009 inner_loop->latch = elem_cont_bb;
8010 add_loop (inner_loop, body_loop);
8016 /* Expand the OMP loop defined by REGION. */
8018 static void
8019 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8021 struct omp_for_data fd;
8022 struct omp_for_data_loop *loops;
8024 loops = XALLOCAVEC (struct omp_for_data_loop,
8025 gimple_omp_for_collapse (last_stmt (region->entry)));
8026 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8027 &fd, loops);
8028 region->sched_kind = fd.sched_kind;
8029 region->sched_modifiers = fd.sched_modifiers;
8030 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8031 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8033 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8034 if ((loops[i].m1 || loops[i].m2)
8035 && (loops[i].m1 == NULL_TREE
8036 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8037 && (loops[i].m2 == NULL_TREE
8038 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8039 && TREE_CODE (loops[i].step) == INTEGER_CST
8040 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8042 tree t;
8043 tree itype = TREE_TYPE (loops[i].v);
8044 if (loops[i].m1 && loops[i].m2)
8045 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8046 else if (loops[i].m1)
8047 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8048 else
8049 t = loops[i].m2;
8050 t = fold_build2 (MULT_EXPR, itype, t,
8051 fold_convert (itype,
8052 loops[i - loops[i].outer].step));
8053 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8054 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8055 fold_build1 (NEGATE_EXPR, itype, t),
8056 fold_build1 (NEGATE_EXPR, itype,
8057 fold_convert (itype,
8058 loops[i].step)));
8059 else
8060 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8061 fold_convert (itype, loops[i].step));
8062 if (integer_nonzerop (t))
8063 error_at (gimple_location (fd.for_stmt),
8064 "invalid OpenMP non-rectangular loop step; "
8065 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8066 "step %qE",
8067 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8068 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8069 loops[i - loops[i].outer].step, i + 1,
8070 loops[i].step);
8074 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8075 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8076 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8077 if (region->cont)
8079 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8080 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8081 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8083 else
8084 /* If there isn't a continue then this is a degerate case where
8085 the introduction of abnormal edges during lowering will prevent
8086 original loops from being detected. Fix that up. */
8087 loops_state_set (LOOPS_NEED_FIXUP);
8089 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8090 expand_omp_simd (region, &fd);
8091 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8093 gcc_assert (!inner_stmt && !fd.non_rect);
8094 expand_oacc_for (region, &fd);
8096 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8098 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8099 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8100 else
8101 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8103 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8104 && !fd.have_ordered)
8106 if (fd.chunk_size == NULL)
8107 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8108 else
8109 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8111 else
8113 int fn_index, start_ix, next_ix;
8114 unsigned HOST_WIDE_INT sched = 0;
8115 tree sched_arg = NULL_TREE;
8117 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8118 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8119 if (fd.chunk_size == NULL
8120 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8121 fd.chunk_size = integer_zero_node;
8122 switch (fd.sched_kind)
8124 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8125 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8126 && fd.lastprivate_conditional == 0)
8128 gcc_assert (!fd.have_ordered);
8129 fn_index = 6;
8130 sched = 4;
8132 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8133 && !fd.have_ordered
8134 && fd.lastprivate_conditional == 0)
8135 fn_index = 7;
8136 else
8138 fn_index = 3;
8139 sched = (HOST_WIDE_INT_1U << 31);
8141 break;
8142 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8143 case OMP_CLAUSE_SCHEDULE_GUIDED:
8144 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8145 && !fd.have_ordered
8146 && fd.lastprivate_conditional == 0)
8148 fn_index = 3 + fd.sched_kind;
8149 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8150 break;
8152 fn_index = fd.sched_kind;
8153 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8154 sched += (HOST_WIDE_INT_1U << 31);
8155 break;
8156 case OMP_CLAUSE_SCHEDULE_STATIC:
8157 gcc_assert (fd.have_ordered);
8158 fn_index = 0;
8159 sched = (HOST_WIDE_INT_1U << 31) + 1;
8160 break;
8161 default:
8162 gcc_unreachable ();
8164 if (!fd.ordered)
8165 fn_index += fd.have_ordered * 8;
8166 if (fd.ordered)
8167 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8168 else
8169 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8170 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8171 if (fd.have_reductemp || fd.have_pointer_condtemp)
8173 if (fd.ordered)
8174 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8175 else if (fd.have_ordered)
8176 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8177 else
8178 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8179 sched_arg = build_int_cstu (long_integer_type_node, sched);
8180 if (!fd.chunk_size)
8181 fd.chunk_size = integer_zero_node;
8183 if (fd.iter_type == long_long_unsigned_type_node)
8185 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8186 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8187 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8188 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8190 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8191 (enum built_in_function) next_ix, sched_arg,
8192 inner_stmt);
8195 if (gimple_in_ssa_p (cfun))
8196 update_ssa (TODO_update_ssa_only_virtuals);
8199 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8201 v = GOMP_sections_start (n);
8203 switch (v)
8205 case 0:
8206 goto L2;
8207 case 1:
8208 section 1;
8209 goto L1;
8210 case 2:
8212 case n:
8214 default:
8215 abort ();
8218 v = GOMP_sections_next ();
8219 goto L0;
8221 reduction;
8223 If this is a combined parallel sections, replace the call to
8224 GOMP_sections_start with call to GOMP_sections_next. */
8226 static void
8227 expand_omp_sections (struct omp_region *region)
8229 tree t, u, vin = NULL, vmain, vnext, l2;
8230 unsigned len;
8231 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8232 gimple_stmt_iterator si, switch_si;
8233 gomp_sections *sections_stmt;
8234 gimple *stmt;
8235 gomp_continue *cont;
8236 edge_iterator ei;
8237 edge e;
8238 struct omp_region *inner;
8239 unsigned i, casei;
8240 bool exit_reachable = region->cont != NULL;
8242 gcc_assert (region->exit != NULL);
8243 entry_bb = region->entry;
8244 l0_bb = single_succ (entry_bb);
8245 l1_bb = region->cont;
8246 l2_bb = region->exit;
8247 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8248 l2 = gimple_block_label (l2_bb);
8249 else
8251 /* This can happen if there are reductions. */
8252 len = EDGE_COUNT (l0_bb->succs);
8253 gcc_assert (len > 0);
8254 e = EDGE_SUCC (l0_bb, len - 1);
8255 si = gsi_last_nondebug_bb (e->dest);
8256 l2 = NULL_TREE;
8257 if (gsi_end_p (si)
8258 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8259 l2 = gimple_block_label (e->dest);
8260 else
8261 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8263 si = gsi_last_nondebug_bb (e->dest);
8264 if (gsi_end_p (si)
8265 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8267 l2 = gimple_block_label (e->dest);
8268 break;
8272 if (exit_reachable)
8273 default_bb = create_empty_bb (l1_bb->prev_bb);
8274 else
8275 default_bb = create_empty_bb (l0_bb);
8277 /* We will build a switch() with enough cases for all the
8278 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8279 and a default case to abort if something goes wrong. */
8280 len = EDGE_COUNT (l0_bb->succs);
8282 /* Use vec::quick_push on label_vec throughout, since we know the size
8283 in advance. */
8284 auto_vec<tree> label_vec (len);
8286 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8287 GIMPLE_OMP_SECTIONS statement. */
8288 si = gsi_last_nondebug_bb (entry_bb);
8289 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8290 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8291 vin = gimple_omp_sections_control (sections_stmt);
8292 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8293 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8294 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8295 tree cond_var = NULL_TREE;
8296 if (reductmp || condtmp)
8298 tree reductions = null_pointer_node, mem = null_pointer_node;
8299 tree memv = NULL_TREE, condtemp = NULL_TREE;
8300 gimple_stmt_iterator gsi = gsi_none ();
8301 gimple *g = NULL;
8302 if (reductmp)
8304 reductions = OMP_CLAUSE_DECL (reductmp);
8305 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8306 g = SSA_NAME_DEF_STMT (reductions);
8307 reductions = gimple_assign_rhs1 (g);
8308 OMP_CLAUSE_DECL (reductmp) = reductions;
8309 gsi = gsi_for_stmt (g);
8311 else
8312 gsi = si;
8313 if (condtmp)
8315 condtemp = OMP_CLAUSE_DECL (condtmp);
8316 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8317 OMP_CLAUSE__CONDTEMP_);
8318 cond_var = OMP_CLAUSE_DECL (c);
8319 tree type = TREE_TYPE (condtemp);
8320 memv = create_tmp_var (type);
8321 TREE_ADDRESSABLE (memv) = 1;
8322 unsigned cnt = 0;
8323 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8324 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8325 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8326 ++cnt;
8327 unsigned HOST_WIDE_INT sz
8328 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8329 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8330 false);
8331 mem = build_fold_addr_expr (memv);
8333 t = build_int_cst (unsigned_type_node, len - 1);
8334 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8335 stmt = gimple_build_call (u, 3, t, reductions, mem);
8336 gimple_call_set_lhs (stmt, vin);
8337 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8338 if (condtmp)
8340 expand_omp_build_assign (&gsi, condtemp, memv, false);
8341 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8342 vin, build_one_cst (TREE_TYPE (cond_var)));
8343 expand_omp_build_assign (&gsi, cond_var, t, false);
8345 if (reductmp)
8347 gsi_remove (&gsi, true);
8348 release_ssa_name (gimple_assign_lhs (g));
8351 else if (!is_combined_parallel (region))
8353 /* If we are not inside a combined parallel+sections region,
8354 call GOMP_sections_start. */
8355 t = build_int_cst (unsigned_type_node, len - 1);
8356 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8357 stmt = gimple_build_call (u, 1, t);
8359 else
8361 /* Otherwise, call GOMP_sections_next. */
8362 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8363 stmt = gimple_build_call (u, 0);
8365 if (!reductmp && !condtmp)
8367 gimple_call_set_lhs (stmt, vin);
8368 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8370 gsi_remove (&si, true);
8372 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8373 L0_BB. */
8374 switch_si = gsi_last_nondebug_bb (l0_bb);
8375 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8376 if (exit_reachable)
8378 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8379 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8380 vmain = gimple_omp_continue_control_use (cont);
8381 vnext = gimple_omp_continue_control_def (cont);
8383 else
8385 vmain = vin;
8386 vnext = NULL_TREE;
8389 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8390 label_vec.quick_push (t);
8391 i = 1;
8393 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8394 for (inner = region->inner, casei = 1;
8395 inner;
8396 inner = inner->next, i++, casei++)
8398 basic_block s_entry_bb, s_exit_bb;
8400 /* Skip optional reduction region. */
8401 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8403 --i;
8404 --casei;
8405 continue;
8408 s_entry_bb = inner->entry;
8409 s_exit_bb = inner->exit;
8411 t = gimple_block_label (s_entry_bb);
8412 u = build_int_cst (unsigned_type_node, casei);
8413 u = build_case_label (u, NULL, t);
8414 label_vec.quick_push (u);
8416 si = gsi_last_nondebug_bb (s_entry_bb);
8417 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8418 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8419 gsi_remove (&si, true);
8420 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8422 if (s_exit_bb == NULL)
8423 continue;
8425 si = gsi_last_nondebug_bb (s_exit_bb);
8426 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8427 gsi_remove (&si, true);
8429 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8432 /* Error handling code goes in DEFAULT_BB. */
8433 t = gimple_block_label (default_bb);
8434 u = build_case_label (NULL, NULL, t);
8435 make_edge (l0_bb, default_bb, 0);
8436 add_bb_to_loop (default_bb, current_loops->tree_root);
8438 stmt = gimple_build_switch (vmain, u, label_vec);
8439 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8440 gsi_remove (&switch_si, true);
8442 si = gsi_start_bb (default_bb);
8443 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8444 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8446 if (exit_reachable)
8448 tree bfn_decl;
8450 /* Code to get the next section goes in L1_BB. */
8451 si = gsi_last_nondebug_bb (l1_bb);
8452 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8454 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8455 stmt = gimple_build_call (bfn_decl, 0);
8456 gimple_call_set_lhs (stmt, vnext);
8457 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8458 if (cond_var)
8460 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8461 vnext, build_one_cst (TREE_TYPE (cond_var)));
8462 expand_omp_build_assign (&si, cond_var, t, false);
8464 gsi_remove (&si, true);
8466 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8469 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8470 si = gsi_last_nondebug_bb (l2_bb);
8471 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8472 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8473 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8474 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8475 else
8476 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8477 stmt = gimple_build_call (t, 0);
8478 if (gimple_omp_return_lhs (gsi_stmt (si)))
8479 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8480 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8481 gsi_remove (&si, true);
8483 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8486 /* Expand code for an OpenMP single or scope directive. We've already expanded
8487 much of the code, here we simply place the GOMP_barrier call. */
8489 static void
8490 expand_omp_single (struct omp_region *region)
8492 basic_block entry_bb, exit_bb;
8493 gimple_stmt_iterator si;
8495 entry_bb = region->entry;
8496 exit_bb = region->exit;
8498 si = gsi_last_nondebug_bb (entry_bb);
8499 enum gimple_code code = gimple_code (gsi_stmt (si));
8500 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8501 gsi_remove (&si, true);
8502 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8504 if (exit_bb == NULL)
8506 gcc_assert (code == GIMPLE_OMP_SCOPE);
8507 return;
8510 si = gsi_last_nondebug_bb (exit_bb);
8511 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8513 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8514 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8516 gsi_remove (&si, true);
8517 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8520 /* Generic expansion for OpenMP synchronization directives: master,
8521 ordered and critical. All we need to do here is remove the entry
8522 and exit markers for REGION. */
8524 static void
8525 expand_omp_synch (struct omp_region *region)
8527 basic_block entry_bb, exit_bb;
8528 gimple_stmt_iterator si;
8530 entry_bb = region->entry;
8531 exit_bb = region->exit;
8533 si = gsi_last_nondebug_bb (entry_bb);
8534 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8535 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8536 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8537 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8538 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8539 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8540 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8541 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8542 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8544 expand_omp_taskreg (region);
8545 return;
8547 gsi_remove (&si, true);
8548 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8550 if (exit_bb)
8552 si = gsi_last_nondebug_bb (exit_bb);
8553 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8554 gsi_remove (&si, true);
8555 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8559 /* Translate enum omp_memory_order to enum memmodel for the embedded
8560 fail clause in there. */
8562 static enum memmodel
8563 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8565 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8567 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8568 switch (mo & OMP_MEMORY_ORDER_MASK)
8570 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8571 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8572 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8573 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8574 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8575 default: break;
8577 gcc_unreachable ();
8578 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8579 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8580 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8581 default: gcc_unreachable ();
8585 /* Translate enum omp_memory_order to enum memmodel. The two enums
8586 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8587 is 0 and omp_memory_order has the fail mode encoded in it too. */
8589 static enum memmodel
8590 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8592 enum memmodel ret, fail_ret;
8593 switch (mo & OMP_MEMORY_ORDER_MASK)
8595 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8596 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8597 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8598 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8599 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8600 default: gcc_unreachable ();
8602 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8603 we can just return ret here unconditionally. Otherwise, work around
8604 it here and make sure fail memmodel is not stronger. */
8605 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8606 return ret;
8607 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8608 if (fail_ret > ret)
8609 return fail_ret;
8610 return ret;
8613 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8614 operation as a normal volatile load. */
8616 static bool
8617 expand_omp_atomic_load (basic_block load_bb, tree addr,
8618 tree loaded_val, int index)
8620 enum built_in_function tmpbase;
8621 gimple_stmt_iterator gsi;
8622 basic_block store_bb;
8623 location_t loc;
8624 gimple *stmt;
8625 tree decl, call, type, itype;
8627 gsi = gsi_last_nondebug_bb (load_bb);
8628 stmt = gsi_stmt (gsi);
8629 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8630 loc = gimple_location (stmt);
8632 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8633 is smaller than word size, then expand_atomic_load assumes that the load
8634 is atomic. We could avoid the builtin entirely in this case. */
8636 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8637 decl = builtin_decl_explicit (tmpbase);
8638 if (decl == NULL_TREE)
8639 return false;
8641 type = TREE_TYPE (loaded_val);
8642 itype = TREE_TYPE (TREE_TYPE (decl));
8644 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8645 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8646 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8647 if (!useless_type_conversion_p (type, itype))
8648 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8649 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8651 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8652 gsi_remove (&gsi, true);
8654 store_bb = single_succ (load_bb);
8655 gsi = gsi_last_nondebug_bb (store_bb);
8656 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8657 gsi_remove (&gsi, true);
8659 if (gimple_in_ssa_p (cfun))
8660 update_ssa (TODO_update_ssa_no_phi);
8662 return true;
8665 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8666 operation as a normal volatile store. */
8668 static bool
8669 expand_omp_atomic_store (basic_block load_bb, tree addr,
8670 tree loaded_val, tree stored_val, int index)
8672 enum built_in_function tmpbase;
8673 gimple_stmt_iterator gsi;
8674 basic_block store_bb = single_succ (load_bb);
8675 location_t loc;
8676 gimple *stmt;
8677 tree decl, call, type, itype;
8678 machine_mode imode;
8679 bool exchange;
8681 gsi = gsi_last_nondebug_bb (load_bb);
8682 stmt = gsi_stmt (gsi);
8683 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8685 /* If the load value is needed, then this isn't a store but an exchange. */
8686 exchange = gimple_omp_atomic_need_value_p (stmt);
8688 gsi = gsi_last_nondebug_bb (store_bb);
8689 stmt = gsi_stmt (gsi);
8690 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8691 loc = gimple_location (stmt);
8693 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8694 is smaller than word size, then expand_atomic_store assumes that the store
8695 is atomic. We could avoid the builtin entirely in this case. */
8697 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8698 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8699 decl = builtin_decl_explicit (tmpbase);
8700 if (decl == NULL_TREE)
8701 return false;
8703 type = TREE_TYPE (stored_val);
8705 /* Dig out the type of the function's second argument. */
8706 itype = TREE_TYPE (decl);
8707 itype = TYPE_ARG_TYPES (itype);
8708 itype = TREE_CHAIN (itype);
8709 itype = TREE_VALUE (itype);
8710 imode = TYPE_MODE (itype);
8712 if (exchange && !can_atomic_exchange_p (imode, true))
8713 return false;
8715 if (!useless_type_conversion_p (itype, type))
8716 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8717 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8718 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8719 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8720 if (exchange)
8722 if (!useless_type_conversion_p (type, itype))
8723 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8724 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8727 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8728 gsi_remove (&gsi, true);
8730 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8731 gsi = gsi_last_nondebug_bb (load_bb);
8732 gsi_remove (&gsi, true);
8734 if (gimple_in_ssa_p (cfun))
8735 update_ssa (TODO_update_ssa_no_phi);
8737 return true;
8740 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8741 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8742 size of the data type, and thus usable to find the index of the builtin
8743 decl. Returns false if the expression is not of the proper form. */
8745 static bool
8746 expand_omp_atomic_fetch_op (basic_block load_bb,
8747 tree addr, tree loaded_val,
8748 tree stored_val, int index)
8750 enum built_in_function oldbase, newbase, tmpbase;
8751 tree decl, itype, call;
8752 tree lhs, rhs;
8753 basic_block store_bb = single_succ (load_bb);
8754 gimple_stmt_iterator gsi;
8755 gimple *stmt;
8756 location_t loc;
8757 enum tree_code code;
8758 bool need_old, need_new;
8759 machine_mode imode;
8761 /* We expect to find the following sequences:
8763 load_bb:
8764 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8766 store_bb:
8767 val = tmp OP something; (or: something OP tmp)
8768 GIMPLE_OMP_STORE (val)
8770 ???FIXME: Allow a more flexible sequence.
8771 Perhaps use data flow to pick the statements.
8775 gsi = gsi_after_labels (store_bb);
8776 stmt = gsi_stmt (gsi);
8777 if (is_gimple_debug (stmt))
8779 gsi_next_nondebug (&gsi);
8780 if (gsi_end_p (gsi))
8781 return false;
8782 stmt = gsi_stmt (gsi);
8784 loc = gimple_location (stmt);
8785 if (!is_gimple_assign (stmt))
8786 return false;
8787 gsi_next_nondebug (&gsi);
8788 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8789 return false;
8790 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8791 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8792 enum omp_memory_order omo
8793 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8794 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8795 gcc_checking_assert (!need_old || !need_new);
8797 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8798 return false;
8800 /* Check for one of the supported fetch-op operations. */
8801 code = gimple_assign_rhs_code (stmt);
8802 switch (code)
8804 case PLUS_EXPR:
8805 case POINTER_PLUS_EXPR:
8806 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8807 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8808 break;
8809 case MINUS_EXPR:
8810 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8811 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8812 break;
8813 case BIT_AND_EXPR:
8814 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8815 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8816 break;
8817 case BIT_IOR_EXPR:
8818 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8819 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8820 break;
8821 case BIT_XOR_EXPR:
8822 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8823 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8824 break;
8825 default:
8826 return false;
8829 /* Make sure the expression is of the proper form. */
8830 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8831 rhs = gimple_assign_rhs2 (stmt);
8832 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8833 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8834 rhs = gimple_assign_rhs1 (stmt);
8835 else
8836 return false;
8838 tmpbase = ((enum built_in_function)
8839 ((need_new ? newbase : oldbase) + index + 1));
8840 decl = builtin_decl_explicit (tmpbase);
8841 if (decl == NULL_TREE)
8842 return false;
8843 itype = TREE_TYPE (TREE_TYPE (decl));
8844 imode = TYPE_MODE (itype);
8846 /* We could test all of the various optabs involved, but the fact of the
8847 matter is that (with the exception of i486 vs i586 and xadd) all targets
8848 that support any atomic operaton optab also implements compare-and-swap.
8849 Let optabs.cc take care of expanding any compare-and-swap loop. */
8850 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8851 return false;
8853 gsi = gsi_last_nondebug_bb (load_bb);
8854 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8856 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8857 It only requires that the operation happen atomically. Thus we can
8858 use the RELAXED memory model. */
8859 call = build_call_expr_loc (loc, decl, 3, addr,
8860 fold_convert_loc (loc, itype, rhs),
8861 build_int_cst (NULL, mo));
8863 if (need_old || need_new)
8865 lhs = need_old ? loaded_val : stored_val;
8866 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8867 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8869 else
8870 call = fold_convert_loc (loc, void_type_node, call);
8871 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8872 gsi_remove (&gsi, true);
8874 gsi = gsi_last_nondebug_bb (store_bb);
8875 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8876 gsi_remove (&gsi, true);
8877 gsi = gsi_last_nondebug_bb (store_bb);
8878 stmt = gsi_stmt (gsi);
8879 gsi_remove (&gsi, true);
8881 if (gimple_in_ssa_p (cfun))
8883 release_defs (stmt);
8884 update_ssa (TODO_update_ssa_no_phi);
8887 return true;
8890 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8891 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8892 Returns false if the expression is not of the proper form. */
8894 static bool
8895 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8896 tree loaded_val, tree stored_val, int index)
8898 /* We expect to find the following sequences:
8900 load_bb:
8901 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8903 store_bb:
8904 val = tmp == e ? d : tmp;
8905 GIMPLE_OMP_ATOMIC_STORE (val)
8907 or in store_bb instead:
8908 tmp2 = tmp == e;
8909 val = tmp2 ? d : tmp;
8910 GIMPLE_OMP_ATOMIC_STORE (val)
8913 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8914 val = e == tmp3 ? d : tmp;
8915 GIMPLE_OMP_ATOMIC_STORE (val)
8917 etc. */
8920 basic_block store_bb = single_succ (load_bb);
8921 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8922 gimple *store_stmt = gsi_stmt (gsi);
8923 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8924 return false;
8925 gsi_prev_nondebug (&gsi);
8926 if (gsi_end_p (gsi))
8927 return false;
8928 gimple *condexpr_stmt = gsi_stmt (gsi);
8929 if (!is_gimple_assign (condexpr_stmt)
8930 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8931 return false;
8932 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8933 return false;
8934 gimple *cond_stmt = NULL;
8935 gimple *vce_stmt = NULL;
8936 gsi_prev_nondebug (&gsi);
8937 if (!gsi_end_p (gsi))
8939 cond_stmt = gsi_stmt (gsi);
8940 if (!is_gimple_assign (cond_stmt))
8941 return false;
8942 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8944 gsi_prev_nondebug (&gsi);
8945 if (!gsi_end_p (gsi))
8947 vce_stmt = gsi_stmt (gsi);
8948 if (!is_gimple_assign (vce_stmt)
8949 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8950 return false;
8953 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8954 std::swap (vce_stmt, cond_stmt);
8955 else
8956 return false;
8957 if (vce_stmt)
8959 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8960 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8961 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8962 return false;
8963 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8964 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8965 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8966 TYPE_SIZE (TREE_TYPE (loaded_val))))
8967 return false;
8968 gsi_prev_nondebug (&gsi);
8969 if (!gsi_end_p (gsi))
8970 return false;
8973 tree cond = gimple_assign_rhs1 (condexpr_stmt);
8974 tree cond_op1, cond_op2;
8975 if (cond_stmt)
8977 /* We should now always get a separate cond_stmt. */
8978 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8979 return false;
8980 cond_op1 = gimple_assign_rhs1 (cond_stmt);
8981 cond_op2 = gimple_assign_rhs2 (cond_stmt);
8983 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
8984 return false;
8985 else
8987 cond_op1 = TREE_OPERAND (cond, 0);
8988 cond_op2 = TREE_OPERAND (cond, 1);
8990 tree d;
8991 if (TREE_CODE (cond) == NE_EXPR)
8993 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
8994 return false;
8995 d = gimple_assign_rhs3 (condexpr_stmt);
8997 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
8998 return false;
8999 else
9000 d = gimple_assign_rhs2 (condexpr_stmt);
9001 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9002 if (operand_equal_p (e, cond_op1))
9003 e = cond_op2;
9004 else if (operand_equal_p (e, cond_op2))
9005 e = cond_op1;
9006 else
9007 return false;
9009 location_t loc = gimple_location (store_stmt);
9010 gimple *load_stmt = last_stmt (load_bb);
9011 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9012 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9013 bool weak = gimple_omp_atomic_weak_p (load_stmt);
9014 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9015 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9016 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9017 gcc_checking_assert (!need_old || !need_new);
9019 enum built_in_function fncode
9020 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9021 + index + 1);
9022 tree cmpxchg = builtin_decl_explicit (fncode);
9023 if (cmpxchg == NULL_TREE)
9024 return false;
9025 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9027 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9028 || !can_atomic_load_p (TYPE_MODE (itype)))
9029 return false;
9031 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9032 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9033 return false;
9035 gsi = gsi_for_stmt (store_stmt);
9036 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9038 tree ne = create_tmp_reg (itype);
9039 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9040 gimple_set_location (g, loc);
9041 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9042 e = ne;
9044 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9046 tree nd = create_tmp_reg (itype);
9047 enum tree_code code;
9048 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9050 code = VIEW_CONVERT_EXPR;
9051 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9053 else
9054 code = NOP_EXPR;
9055 gimple *g = gimple_build_assign (nd, code, d);
9056 gimple_set_location (g, loc);
9057 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9058 d = nd;
9061 tree ctype = build_complex_type (itype);
9062 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9063 gimple *g
9064 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9065 build_int_cst (integer_type_node, flag),
9066 mo, fmo);
9067 tree cres = create_tmp_reg (ctype);
9068 gimple_call_set_lhs (g, cres);
9069 gimple_set_location (g, loc);
9070 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9072 if (cond_stmt || need_old || need_new)
9074 tree im = create_tmp_reg (itype);
9075 g = gimple_build_assign (im, IMAGPART_EXPR,
9076 build1 (IMAGPART_EXPR, itype, cres));
9077 gimple_set_location (g, loc);
9078 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9080 tree re = NULL_TREE;
9081 if (need_old || need_new)
9083 re = create_tmp_reg (itype);
9084 g = gimple_build_assign (re, REALPART_EXPR,
9085 build1 (REALPART_EXPR, itype, cres));
9086 gimple_set_location (g, loc);
9087 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9090 if (cond_stmt)
9092 g = gimple_build_assign (cond, NOP_EXPR, im);
9093 gimple_set_location (g, loc);
9094 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9097 if (need_new)
9099 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9100 cond_stmt
9101 ? cond : build2 (NE_EXPR, boolean_type_node,
9102 im, build_zero_cst (itype)),
9103 d, re);
9104 gimple_set_location (g, loc);
9105 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9106 re = gimple_assign_lhs (g);
9109 if (need_old || need_new)
9111 tree v = need_old ? loaded_val : stored_val;
9112 enum tree_code code;
9113 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9115 code = VIEW_CONVERT_EXPR;
9116 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9118 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9119 code = NOP_EXPR;
9120 else
9121 code = TREE_CODE (re);
9122 g = gimple_build_assign (v, code, re);
9123 gimple_set_location (g, loc);
9124 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9128 gsi_remove (&gsi, true);
9129 gsi = gsi_for_stmt (load_stmt);
9130 gsi_remove (&gsi, true);
9131 gsi = gsi_for_stmt (condexpr_stmt);
9132 gsi_remove (&gsi, true);
9133 if (cond_stmt)
9135 gsi = gsi_for_stmt (cond_stmt);
9136 gsi_remove (&gsi, true);
9138 if (vce_stmt)
9140 gsi = gsi_for_stmt (vce_stmt);
9141 gsi_remove (&gsi, true);
9144 return true;
9147 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9149 oldval = *addr;
9150 repeat:
9151 newval = rhs; // with oldval replacing *addr in rhs
9152 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9153 if (oldval != newval)
9154 goto repeat;
9156 INDEX is log2 of the size of the data type, and thus usable to find the
9157 index of the builtin decl. */
9159 static bool
9160 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9161 tree addr, tree loaded_val, tree stored_val,
9162 int index)
9164 tree loadedi, storedi, initial, new_storedi, old_vali;
9165 tree type, itype, cmpxchg, iaddr, atype;
9166 gimple_stmt_iterator si;
9167 basic_block loop_header = single_succ (load_bb);
9168 gimple *phi, *stmt;
9169 edge e;
9170 enum built_in_function fncode;
9172 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9173 + index + 1);
9174 cmpxchg = builtin_decl_explicit (fncode);
9175 if (cmpxchg == NULL_TREE)
9176 return false;
9177 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9178 atype = type;
9179 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9181 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9182 || !can_atomic_load_p (TYPE_MODE (itype)))
9183 return false;
9185 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9186 si = gsi_last_nondebug_bb (load_bb);
9187 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9188 location_t loc = gimple_location (gsi_stmt (si));
9189 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9190 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9191 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9193 /* For floating-point values, we'll need to view-convert them to integers
9194 so that we can perform the atomic compare and swap. Simplify the
9195 following code by always setting up the "i"ntegral variables. */
9196 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9198 tree iaddr_val;
9200 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9201 true));
9202 atype = itype;
9203 iaddr_val
9204 = force_gimple_operand_gsi (&si,
9205 fold_convert (TREE_TYPE (iaddr), addr),
9206 false, NULL_TREE, true, GSI_SAME_STMT);
9207 stmt = gimple_build_assign (iaddr, iaddr_val);
9208 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9209 loadedi = create_tmp_var (itype);
9210 if (gimple_in_ssa_p (cfun))
9211 loadedi = make_ssa_name (loadedi);
9213 else
9215 iaddr = addr;
9216 loadedi = loaded_val;
9219 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9220 tree loaddecl = builtin_decl_explicit (fncode);
9221 if (loaddecl)
9222 initial
9223 = fold_convert (atype,
9224 build_call_expr (loaddecl, 2, iaddr,
9225 build_int_cst (NULL_TREE,
9226 MEMMODEL_RELAXED)));
9227 else
9229 tree off
9230 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9231 true), 0);
9232 initial = build2 (MEM_REF, atype, iaddr, off);
9235 initial
9236 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9237 GSI_SAME_STMT);
9239 /* Move the value to the LOADEDI temporary. */
9240 if (gimple_in_ssa_p (cfun))
9242 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9243 phi = create_phi_node (loadedi, loop_header);
9244 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9245 initial);
9247 else
9248 gsi_insert_before (&si,
9249 gimple_build_assign (loadedi, initial),
9250 GSI_SAME_STMT);
9251 if (loadedi != loaded_val)
9253 gimple_stmt_iterator gsi2;
9254 tree x;
9256 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9257 gsi2 = gsi_start_bb (loop_header);
9258 if (gimple_in_ssa_p (cfun))
9260 gassign *stmt;
9261 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9262 true, GSI_SAME_STMT);
9263 stmt = gimple_build_assign (loaded_val, x);
9264 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9266 else
9268 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9269 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9270 true, GSI_SAME_STMT);
9273 gsi_remove (&si, true);
9275 si = gsi_last_nondebug_bb (store_bb);
9276 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9278 if (iaddr == addr)
9279 storedi = stored_val;
9280 else
9281 storedi
9282 = force_gimple_operand_gsi (&si,
9283 build1 (VIEW_CONVERT_EXPR, itype,
9284 stored_val), true, NULL_TREE, true,
9285 GSI_SAME_STMT);
9287 /* Build the compare&swap statement. */
9288 tree ctype = build_complex_type (itype);
9289 int flag = int_size_in_bytes (itype);
9290 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9291 ctype, 6, iaddr, loadedi,
9292 storedi,
9293 build_int_cst (integer_type_node,
9294 flag),
9295 mo, fmo);
9296 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9297 new_storedi = force_gimple_operand_gsi (&si,
9298 fold_convert (TREE_TYPE (loadedi),
9299 new_storedi),
9300 true, NULL_TREE,
9301 true, GSI_SAME_STMT);
9303 if (gimple_in_ssa_p (cfun))
9304 old_vali = loadedi;
9305 else
9307 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9308 stmt = gimple_build_assign (old_vali, loadedi);
9309 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9311 stmt = gimple_build_assign (loadedi, new_storedi);
9312 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9315 /* Note that we always perform the comparison as an integer, even for
9316 floating point. This allows the atomic operation to properly
9317 succeed even with NaNs and -0.0. */
9318 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9319 stmt = gimple_build_cond_empty (ne);
9320 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9322 /* Update cfg. */
9323 e = single_succ_edge (store_bb);
9324 e->flags &= ~EDGE_FALLTHRU;
9325 e->flags |= EDGE_FALSE_VALUE;
9326 /* Expect no looping. */
9327 e->probability = profile_probability::guessed_always ();
9329 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9330 e->probability = profile_probability::guessed_never ();
9332 /* Copy the new value to loadedi (we already did that before the condition
9333 if we are not in SSA). */
9334 if (gimple_in_ssa_p (cfun))
9336 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9337 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9340 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9341 gsi_remove (&si, true);
9343 class loop *loop = alloc_loop ();
9344 loop->header = loop_header;
9345 loop->latch = store_bb;
9346 add_loop (loop, loop_header->loop_father);
9348 if (gimple_in_ssa_p (cfun))
9349 update_ssa (TODO_update_ssa_no_phi);
9351 return true;
9354 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9356 GOMP_atomic_start ();
9357 *addr = rhs;
9358 GOMP_atomic_end ();
9360 The result is not globally atomic, but works so long as all parallel
9361 references are within #pragma omp atomic directives. According to
9362 responses received from omp@openmp.org, appears to be within spec.
9363 Which makes sense, since that's how several other compilers handle
9364 this situation as well.
9365 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9366 expanding. STORED_VAL is the operand of the matching
9367 GIMPLE_OMP_ATOMIC_STORE.
9369 We replace
9370 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9371 loaded_val = *addr;
9373 and replace
9374 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9375 *addr = stored_val;
9378 static bool
9379 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9380 tree addr, tree loaded_val, tree stored_val)
9382 gimple_stmt_iterator si;
9383 gassign *stmt;
9384 tree t;
9386 si = gsi_last_nondebug_bb (load_bb);
9387 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9389 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9390 t = build_call_expr (t, 0);
9391 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9393 tree mem = build_simple_mem_ref (addr);
9394 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9395 TREE_OPERAND (mem, 1)
9396 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9397 true),
9398 TREE_OPERAND (mem, 1));
9399 stmt = gimple_build_assign (loaded_val, mem);
9400 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9401 gsi_remove (&si, true);
9403 si = gsi_last_nondebug_bb (store_bb);
9404 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9406 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9407 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9409 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9410 t = build_call_expr (t, 0);
9411 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9412 gsi_remove (&si, true);
9414 if (gimple_in_ssa_p (cfun))
9415 update_ssa (TODO_update_ssa_no_phi);
9416 return true;
9419 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9420 using expand_omp_atomic_fetch_op. If it failed, we try to
9421 call expand_omp_atomic_pipeline, and if it fails too, the
9422 ultimate fallback is wrapping the operation in a mutex
9423 (expand_omp_atomic_mutex). REGION is the atomic region built
9424 by build_omp_regions_1(). */
9426 static void
9427 expand_omp_atomic (struct omp_region *region)
9429 basic_block load_bb = region->entry, store_bb = region->exit;
9430 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9431 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9432 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9433 tree addr = gimple_omp_atomic_load_rhs (load);
9434 tree stored_val = gimple_omp_atomic_store_val (store);
9435 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9436 HOST_WIDE_INT index;
9438 /* Make sure the type is one of the supported sizes. */
9439 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9440 index = exact_log2 (index);
9441 if (index >= 0 && index <= 4)
9443 unsigned int align = TYPE_ALIGN_UNIT (type);
9445 /* __sync builtins require strict data alignment. */
9446 if (exact_log2 (align) >= index)
9448 /* Atomic load. */
9449 scalar_mode smode;
9450 if (loaded_val == stored_val
9451 && (is_int_mode (TYPE_MODE (type), &smode)
9452 || is_float_mode (TYPE_MODE (type), &smode))
9453 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9454 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9455 return;
9457 /* Atomic store. */
9458 if ((is_int_mode (TYPE_MODE (type), &smode)
9459 || is_float_mode (TYPE_MODE (type), &smode))
9460 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9461 && store_bb == single_succ (load_bb)
9462 && first_stmt (store_bb) == store
9463 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9464 stored_val, index))
9465 return;
9467 /* When possible, use specialized atomic update functions. */
9468 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9469 && store_bb == single_succ (load_bb)
9470 && expand_omp_atomic_fetch_op (load_bb, addr,
9471 loaded_val, stored_val, index))
9472 return;
9474 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9475 if (store_bb == single_succ (load_bb)
9476 && !gimple_in_ssa_p (cfun)
9477 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9478 index))
9479 return;
9481 /* If we don't have specialized __sync builtins, try and implement
9482 as a compare and swap loop. */
9483 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9484 loaded_val, stored_val, index))
9485 return;
9489 /* The ultimate fallback is wrapping the operation in a mutex. */
9490 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9493 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9494 at REGION_EXIT. */
9496 static void
9497 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9498 basic_block region_exit)
9500 class loop *outer = region_entry->loop_father;
9501 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9503 /* Don't parallelize the kernels region if it contains more than one outer
9504 loop. */
9505 unsigned int nr_outer_loops = 0;
9506 class loop *single_outer = NULL;
9507 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9509 gcc_assert (loop_outer (loop) == outer);
9511 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9512 continue;
9514 if (region_exit != NULL
9515 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9516 continue;
9518 nr_outer_loops++;
9519 single_outer = loop;
9521 if (nr_outer_loops != 1)
9522 return;
9524 for (class loop *loop = single_outer->inner;
9525 loop != NULL;
9526 loop = loop->inner)
9527 if (loop->next)
9528 return;
9530 /* Mark the loops in the region. */
9531 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9532 loop->in_oacc_kernels_region = true;
9535 /* Build target argument identifier from the DEVICE identifier, value
9536 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9538 static tree
9539 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9541 tree t = build_int_cst (integer_type_node, device);
9542 if (subseqent_param)
9543 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9544 build_int_cst (integer_type_node,
9545 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9546 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9547 build_int_cst (integer_type_node, id));
9548 return t;
9551 /* Like above but return it in type that can be directly stored as an element
9552 of the argument array. */
9554 static tree
9555 get_target_argument_identifier (int device, bool subseqent_param, int id)
9557 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9558 return fold_convert (ptr_type_node, t);
9561 /* Return a target argument consisting of DEVICE identifier, value identifier
9562 ID, and the actual VALUE. */
9564 static tree
9565 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9566 tree value)
9568 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9569 fold_convert (integer_type_node, value),
9570 build_int_cst (unsigned_type_node,
9571 GOMP_TARGET_ARG_VALUE_SHIFT));
9572 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9573 get_target_argument_identifier_1 (device, false, id));
9574 t = fold_convert (ptr_type_node, t);
9575 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9578 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9579 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9580 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9581 arguments. */
9583 static void
9584 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9585 int id, tree value, vec <tree> *args)
9587 if (tree_fits_shwi_p (value)
9588 && tree_to_shwi (value) > -(1 << 15)
9589 && tree_to_shwi (value) < (1 << 15))
9590 args->quick_push (get_target_argument_value (gsi, device, id, value));
9591 else
9593 args->quick_push (get_target_argument_identifier (device, true, id));
9594 value = fold_convert (ptr_type_node, value);
9595 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9596 GSI_SAME_STMT);
9597 args->quick_push (value);
9601 /* Create an array of arguments that is then passed to GOMP_target. */
9603 static tree
9604 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9606 auto_vec <tree, 6> args;
9607 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9608 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9609 if (c)
9610 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9611 else
9612 t = integer_minus_one_node;
9613 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9614 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9616 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9617 if (c)
9618 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9619 else
9620 t = integer_minus_one_node;
9621 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9622 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9623 &args);
9625 /* Produce more, perhaps device specific, arguments here. */
9627 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9628 args.length () + 1),
9629 ".omp_target_args");
9630 for (unsigned i = 0; i < args.length (); i++)
9632 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9633 build_int_cst (integer_type_node, i),
9634 NULL_TREE, NULL_TREE);
9635 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9636 GSI_SAME_STMT);
9638 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9639 build_int_cst (integer_type_node, args.length ()),
9640 NULL_TREE, NULL_TREE);
9641 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9642 GSI_SAME_STMT);
9643 TREE_ADDRESSABLE (argarray) = 1;
9644 return build_fold_addr_expr (argarray);
9647 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9649 static void
9650 expand_omp_target (struct omp_region *region)
9652 basic_block entry_bb, exit_bb, new_bb;
9653 struct function *child_cfun;
9654 tree child_fn, block, t;
9655 gimple_stmt_iterator gsi;
9656 gomp_target *entry_stmt;
9657 gimple *stmt;
9658 edge e;
9659 bool offloaded;
9660 int target_kind;
9662 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9663 target_kind = gimple_omp_target_kind (entry_stmt);
9664 new_bb = region->entry;
9666 offloaded = is_gimple_omp_offloaded (entry_stmt);
9667 switch (target_kind)
9669 case GF_OMP_TARGET_KIND_REGION:
9670 case GF_OMP_TARGET_KIND_UPDATE:
9671 case GF_OMP_TARGET_KIND_ENTER_DATA:
9672 case GF_OMP_TARGET_KIND_EXIT_DATA:
9673 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9674 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9675 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9676 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9677 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9678 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9679 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9680 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9681 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9682 case GF_OMP_TARGET_KIND_DATA:
9683 case GF_OMP_TARGET_KIND_OACC_DATA:
9684 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9685 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9686 break;
9687 default:
9688 gcc_unreachable ();
9691 child_fn = NULL_TREE;
9692 child_cfun = NULL;
9693 if (offloaded)
9695 child_fn = gimple_omp_target_child_fn (entry_stmt);
9696 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9699 /* Supported by expand_omp_taskreg, but not here. */
9700 if (child_cfun != NULL)
9701 gcc_checking_assert (!child_cfun->cfg);
9702 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9704 entry_bb = region->entry;
9705 exit_bb = region->exit;
9707 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9708 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9710 /* Going on, all OpenACC compute constructs are mapped to
9711 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9712 To distinguish between them, we attach attributes. */
9713 switch (target_kind)
9715 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9716 DECL_ATTRIBUTES (child_fn)
9717 = tree_cons (get_identifier ("oacc parallel"),
9718 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9719 break;
9720 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9721 DECL_ATTRIBUTES (child_fn)
9722 = tree_cons (get_identifier ("oacc kernels"),
9723 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9724 break;
9725 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9726 DECL_ATTRIBUTES (child_fn)
9727 = tree_cons (get_identifier ("oacc serial"),
9728 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9729 break;
9730 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9731 DECL_ATTRIBUTES (child_fn)
9732 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9733 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9734 break;
9735 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9736 DECL_ATTRIBUTES (child_fn)
9737 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9738 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9739 break;
9740 default:
9741 /* Make sure we don't miss any. */
9742 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9743 && is_gimple_omp_offloaded (entry_stmt)));
9744 break;
9747 if (offloaded)
9749 unsigned srcidx, dstidx, num;
9751 /* If the offloading region needs data sent from the parent
9752 function, then the very first statement (except possible
9753 tree profile counter updates) of the offloading body
9754 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9755 &.OMP_DATA_O is passed as an argument to the child function,
9756 we need to replace it with the argument as seen by the child
9757 function.
9759 In most cases, this will end up being the identity assignment
9760 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9761 a function call that has been inlined, the original PARM_DECL
9762 .OMP_DATA_I may have been converted into a different local
9763 variable. In which case, we need to keep the assignment. */
9764 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9765 if (data_arg)
9767 basic_block entry_succ_bb = single_succ (entry_bb);
9768 gimple_stmt_iterator gsi;
9769 tree arg;
9770 gimple *tgtcopy_stmt = NULL;
9771 tree sender = TREE_VEC_ELT (data_arg, 0);
9773 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9775 gcc_assert (!gsi_end_p (gsi));
9776 stmt = gsi_stmt (gsi);
9777 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9778 continue;
9780 if (gimple_num_ops (stmt) == 2)
9782 tree arg = gimple_assign_rhs1 (stmt);
9784 /* We're ignoring the subcode because we're
9785 effectively doing a STRIP_NOPS. */
9787 if (TREE_CODE (arg) == ADDR_EXPR
9788 && TREE_OPERAND (arg, 0) == sender)
9790 tgtcopy_stmt = stmt;
9791 break;
9796 gcc_assert (tgtcopy_stmt != NULL);
9797 arg = DECL_ARGUMENTS (child_fn);
9799 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9800 gsi_remove (&gsi, true);
9803 /* Declare local variables needed in CHILD_CFUN. */
9804 block = DECL_INITIAL (child_fn);
9805 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9806 /* The gimplifier could record temporaries in the offloading block
9807 rather than in containing function's local_decls chain,
9808 which would mean cgraph missed finalizing them. Do it now. */
9809 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9810 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9811 varpool_node::finalize_decl (t);
9812 DECL_SAVED_TREE (child_fn) = NULL;
9813 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9814 gimple_set_body (child_fn, NULL);
9815 TREE_USED (block) = 1;
9817 /* Reset DECL_CONTEXT on function arguments. */
9818 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9819 DECL_CONTEXT (t) = child_fn;
9821 /* Split ENTRY_BB at GIMPLE_*,
9822 so that it can be moved to the child function. */
9823 gsi = gsi_last_nondebug_bb (entry_bb);
9824 stmt = gsi_stmt (gsi);
9825 gcc_assert (stmt
9826 && gimple_code (stmt) == gimple_code (entry_stmt));
9827 e = split_block (entry_bb, stmt);
9828 gsi_remove (&gsi, true);
9829 entry_bb = e->dest;
9830 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9832 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9833 if (exit_bb)
9835 gsi = gsi_last_nondebug_bb (exit_bb);
9836 gcc_assert (!gsi_end_p (gsi)
9837 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9838 stmt = gimple_build_return (NULL);
9839 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9840 gsi_remove (&gsi, true);
9843 /* Move the offloading region into CHILD_CFUN. */
9845 block = gimple_block (entry_stmt);
9847 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9848 if (exit_bb)
9849 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9850 /* When the OMP expansion process cannot guarantee an up-to-date
9851 loop tree arrange for the child function to fixup loops. */
9852 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9853 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9855 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9856 num = vec_safe_length (child_cfun->local_decls);
9857 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9859 t = (*child_cfun->local_decls)[srcidx];
9860 if (DECL_CONTEXT (t) == cfun->decl)
9861 continue;
9862 if (srcidx != dstidx)
9863 (*child_cfun->local_decls)[dstidx] = t;
9864 dstidx++;
9866 if (dstidx != num)
9867 vec_safe_truncate (child_cfun->local_decls, dstidx);
9869 /* Inform the callgraph about the new function. */
9870 child_cfun->curr_properties = cfun->curr_properties;
9871 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9872 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9873 cgraph_node *node = cgraph_node::get_create (child_fn);
9874 node->parallelized_function = 1;
9875 cgraph_node::add_new_function (child_fn, true);
9877 /* Add the new function to the offload table. */
9878 if (ENABLE_OFFLOADING)
9880 if (in_lto_p)
9881 DECL_PRESERVE_P (child_fn) = 1;
9882 vec_safe_push (offload_funcs, child_fn);
9885 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9886 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9888 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9889 fixed in a following pass. */
9890 push_cfun (child_cfun);
9891 if (need_asm)
9892 assign_assembler_name_if_needed (child_fn);
9893 cgraph_edge::rebuild_edges ();
9895 /* Some EH regions might become dead, see PR34608. If
9896 pass_cleanup_cfg isn't the first pass to happen with the
9897 new child, these dead EH edges might cause problems.
9898 Clean them up now. */
9899 if (flag_exceptions)
9901 basic_block bb;
9902 bool changed = false;
9904 FOR_EACH_BB_FN (bb, cfun)
9905 changed |= gimple_purge_dead_eh_edges (bb);
9906 if (changed)
9907 cleanup_tree_cfg ();
9909 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9910 verify_loop_structure ();
9911 pop_cfun ();
9913 if (dump_file && !gimple_in_ssa_p (cfun))
9915 omp_any_child_fn_dumped = true;
9916 dump_function_header (dump_file, child_fn, dump_flags);
9917 dump_function_to_file (child_fn, dump_file, dump_flags);
9920 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9923 /* Emit a library call to launch the offloading region, or do data
9924 transfers. */
9925 tree t1, t2, t3, t4, depend, c, clauses;
9926 enum built_in_function start_ix;
9927 unsigned int flags_i = 0;
9929 switch (gimple_omp_target_kind (entry_stmt))
9931 case GF_OMP_TARGET_KIND_REGION:
9932 start_ix = BUILT_IN_GOMP_TARGET;
9933 break;
9934 case GF_OMP_TARGET_KIND_DATA:
9935 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9936 break;
9937 case GF_OMP_TARGET_KIND_UPDATE:
9938 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9939 break;
9940 case GF_OMP_TARGET_KIND_ENTER_DATA:
9941 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9942 break;
9943 case GF_OMP_TARGET_KIND_EXIT_DATA:
9944 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9945 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9946 break;
9947 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9948 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9949 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9950 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9951 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9952 start_ix = BUILT_IN_GOACC_PARALLEL;
9953 break;
9954 case GF_OMP_TARGET_KIND_OACC_DATA:
9955 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9956 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9957 start_ix = BUILT_IN_GOACC_DATA_START;
9958 break;
9959 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9960 start_ix = BUILT_IN_GOACC_UPDATE;
9961 break;
9962 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9963 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9964 break;
9965 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9966 start_ix = BUILT_IN_GOACC_EXIT_DATA;
9967 break;
9968 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9969 start_ix = BUILT_IN_GOACC_DECLARE;
9970 break;
9971 default:
9972 gcc_unreachable ();
9975 clauses = gimple_omp_target_clauses (entry_stmt);
9977 tree device = NULL_TREE;
9978 location_t device_loc = UNKNOWN_LOCATION;
9979 tree goacc_flags = NULL_TREE;
9980 bool need_device_adjustment = false;
9981 gimple_stmt_iterator adj_gsi;
9982 if (is_gimple_omp_oacc (entry_stmt))
9984 /* By default, no GOACC_FLAGs are set. */
9985 goacc_flags = integer_zero_node;
9987 else
9989 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9990 if (c)
9992 device = OMP_CLAUSE_DEVICE_ID (c);
9993 /* Ensure 'device' is of the correct type. */
9994 device = fold_convert_loc (device_loc, integer_type_node, device);
9995 if (TREE_CODE (device) == INTEGER_CST)
9997 if (wi::to_wide (device) == GOMP_DEVICE_ICV)
9998 device = build_int_cst (integer_type_node,
9999 GOMP_DEVICE_HOST_FALLBACK);
10000 else if (wi::to_wide (device) == GOMP_DEVICE_HOST_FALLBACK)
10001 device = build_int_cst (integer_type_node,
10002 GOMP_DEVICE_HOST_FALLBACK - 1);
10004 else
10005 need_device_adjustment = true;
10006 device_loc = OMP_CLAUSE_LOCATION (c);
10007 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10008 sorry_at (device_loc, "%<ancestor%> not yet supported");
10010 else
10012 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10013 library choose). */
10014 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10015 device_loc = gimple_location (entry_stmt);
10018 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10019 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10020 nowait doesn't appear. */
10021 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10022 c = NULL;
10023 if (c)
10024 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10027 /* By default, there is no conditional. */
10028 tree cond = NULL_TREE;
10029 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10030 if (c)
10031 cond = OMP_CLAUSE_IF_EXPR (c);
10032 /* If we found the clause 'if (cond)', build:
10033 OpenACC: goacc_flags = (cond ? goacc_flags
10034 : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10035 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10036 if (cond)
10038 tree *tp;
10039 if (is_gimple_omp_oacc (entry_stmt))
10040 tp = &goacc_flags;
10041 else
10042 tp = &device;
10044 cond = gimple_boolify (cond);
10046 basic_block cond_bb, then_bb, else_bb;
10047 edge e;
10048 tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10049 if (offloaded)
10050 e = split_block_after_labels (new_bb);
10051 else
10053 gsi = gsi_last_nondebug_bb (new_bb);
10054 gsi_prev (&gsi);
10055 e = split_block (new_bb, gsi_stmt (gsi));
10057 cond_bb = e->src;
10058 new_bb = e->dest;
10059 remove_edge (e);
10061 then_bb = create_empty_bb (cond_bb);
10062 else_bb = create_empty_bb (then_bb);
10063 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10064 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10066 stmt = gimple_build_cond_empty (cond);
10067 gsi = gsi_last_bb (cond_bb);
10068 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10070 gsi = gsi_start_bb (then_bb);
10071 stmt = gimple_build_assign (tmp_var, *tp);
10072 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10073 adj_gsi = gsi;
10075 gsi = gsi_start_bb (else_bb);
10076 if (is_gimple_omp_oacc (entry_stmt))
10077 stmt = gimple_build_assign (tmp_var,
10078 BIT_IOR_EXPR,
10079 *tp,
10080 build_int_cst (integer_type_node,
10081 GOACC_FLAG_HOST_FALLBACK));
10082 else
10083 stmt = gimple_build_assign (tmp_var,
10084 build_int_cst (integer_type_node,
10085 GOMP_DEVICE_HOST_FALLBACK));
10086 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10088 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10089 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10090 add_bb_to_loop (then_bb, cond_bb->loop_father);
10091 add_bb_to_loop (else_bb, cond_bb->loop_father);
10092 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10093 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10095 *tp = tmp_var;
10097 gsi = gsi_last_nondebug_bb (new_bb);
10099 else
10101 gsi = gsi_last_nondebug_bb (new_bb);
10103 if (device != NULL_TREE)
10104 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10105 true, GSI_SAME_STMT);
10106 if (need_device_adjustment)
10108 tree tmp_var = create_tmp_var (TREE_TYPE (device));
10109 stmt = gimple_build_assign (tmp_var, device);
10110 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10111 adj_gsi = gsi_for_stmt (stmt);
10112 device = tmp_var;
10116 if (need_device_adjustment)
10118 tree uns = fold_convert (unsigned_type_node, device);
10119 uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10120 false, GSI_CONTINUE_LINKING);
10121 edge e = split_block (gsi_bb (adj_gsi), gsi_stmt (adj_gsi));
10122 basic_block cond_bb = e->src;
10123 basic_block else_bb = e->dest;
10124 if (gsi_bb (adj_gsi) == new_bb)
10126 new_bb = else_bb;
10127 gsi = gsi_last_nondebug_bb (new_bb);
10130 basic_block then_bb = create_empty_bb (cond_bb);
10131 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10133 cond = build2 (GT_EXPR, boolean_type_node, uns,
10134 build_int_cst (unsigned_type_node,
10135 GOMP_DEVICE_HOST_FALLBACK - 1));
10136 stmt = gimple_build_cond_empty (cond);
10137 adj_gsi = gsi_last_bb (cond_bb);
10138 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10140 adj_gsi = gsi_start_bb (then_bb);
10141 tree add = build2 (PLUS_EXPR, integer_type_node, device,
10142 build_int_cst (integer_type_node, -1));
10143 stmt = gimple_build_assign (device, add);
10144 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10146 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10147 e->flags = EDGE_FALSE_VALUE;
10148 add_bb_to_loop (then_bb, cond_bb->loop_father);
10149 make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10152 t = gimple_omp_target_data_arg (entry_stmt);
10153 if (t == NULL)
10155 t1 = size_zero_node;
10156 t2 = build_zero_cst (ptr_type_node);
10157 t3 = t2;
10158 t4 = t2;
10160 else
10162 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10163 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10164 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10165 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10166 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10169 gimple *g;
10170 bool tagging = false;
10171 /* The maximum number used by any start_ix, without varargs. */
10172 auto_vec<tree, 11> args;
10173 if (is_gimple_omp_oacc (entry_stmt))
10175 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10176 TREE_TYPE (goacc_flags), goacc_flags);
10177 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10178 NULL_TREE, true,
10179 GSI_SAME_STMT);
10180 args.quick_push (goacc_flags_m);
10182 else
10183 args.quick_push (device);
10184 if (offloaded)
10185 args.quick_push (build_fold_addr_expr (child_fn));
10186 args.quick_push (t1);
10187 args.quick_push (t2);
10188 args.quick_push (t3);
10189 args.quick_push (t4);
10190 switch (start_ix)
10192 case BUILT_IN_GOACC_DATA_START:
10193 case BUILT_IN_GOACC_DECLARE:
10194 case BUILT_IN_GOMP_TARGET_DATA:
10195 break;
10196 case BUILT_IN_GOMP_TARGET:
10197 case BUILT_IN_GOMP_TARGET_UPDATE:
10198 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10199 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10200 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10201 if (c)
10202 depend = OMP_CLAUSE_DECL (c);
10203 else
10204 depend = build_int_cst (ptr_type_node, 0);
10205 args.quick_push (depend);
10206 if (start_ix == BUILT_IN_GOMP_TARGET)
10207 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10208 break;
10209 case BUILT_IN_GOACC_PARALLEL:
10210 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10212 tree dims = NULL_TREE;
10213 unsigned int ix;
10215 /* For serial constructs we set all dimensions to 1. */
10216 for (ix = GOMP_DIM_MAX; ix--;)
10217 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10218 oacc_replace_fn_attrib (child_fn, dims);
10220 else
10221 oacc_set_fn_attrib (child_fn, clauses, &args);
10222 tagging = true;
10223 /* FALLTHRU */
10224 case BUILT_IN_GOACC_ENTER_DATA:
10225 case BUILT_IN_GOACC_EXIT_DATA:
10226 case BUILT_IN_GOACC_UPDATE:
10228 tree t_async = NULL_TREE;
10230 /* If present, use the value specified by the respective
10231 clause, making sure that is of the correct type. */
10232 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10233 if (c)
10234 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10235 integer_type_node,
10236 OMP_CLAUSE_ASYNC_EXPR (c));
10237 else if (!tagging)
10238 /* Default values for t_async. */
10239 t_async = fold_convert_loc (gimple_location (entry_stmt),
10240 integer_type_node,
10241 build_int_cst (integer_type_node,
10242 GOMP_ASYNC_SYNC));
10243 if (tagging && t_async)
10245 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10247 if (TREE_CODE (t_async) == INTEGER_CST)
10249 /* See if we can pack the async arg in to the tag's
10250 operand. */
10251 i_async = TREE_INT_CST_LOW (t_async);
10252 if (i_async < GOMP_LAUNCH_OP_MAX)
10253 t_async = NULL_TREE;
10254 else
10255 i_async = GOMP_LAUNCH_OP_MAX;
10257 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10258 i_async));
10260 if (t_async)
10261 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10262 NULL_TREE, true,
10263 GSI_SAME_STMT));
10265 /* Save the argument index, and ... */
10266 unsigned t_wait_idx = args.length ();
10267 unsigned num_waits = 0;
10268 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10269 if (!tagging || c)
10270 /* ... push a placeholder. */
10271 args.safe_push (integer_zero_node);
10273 for (; c; c = OMP_CLAUSE_CHAIN (c))
10274 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10276 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10277 integer_type_node,
10278 OMP_CLAUSE_WAIT_EXPR (c));
10279 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10280 GSI_SAME_STMT);
10281 args.safe_push (arg);
10282 num_waits++;
10285 if (!tagging || num_waits)
10287 tree len;
10289 /* Now that we know the number, update the placeholder. */
10290 if (tagging)
10291 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10292 else
10293 len = build_int_cst (integer_type_node, num_waits);
10294 len = fold_convert_loc (gimple_location (entry_stmt),
10295 unsigned_type_node, len);
10296 args[t_wait_idx] = len;
10299 break;
10300 default:
10301 gcc_unreachable ();
10303 if (tagging)
10304 /* Push terminal marker - zero. */
10305 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10307 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10308 gimple_set_location (g, gimple_location (entry_stmt));
10309 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10310 if (!offloaded)
10312 g = gsi_stmt (gsi);
10313 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10314 gsi_remove (&gsi, true);
10318 /* Expand the parallel region tree rooted at REGION. Expansion
10319 proceeds in depth-first order. Innermost regions are expanded
10320 first. This way, parallel regions that require a new function to
10321 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10322 internal dependencies in their body. */
10324 static void
10325 expand_omp (struct omp_region *region)
10327 omp_any_child_fn_dumped = false;
10328 while (region)
10330 location_t saved_location;
10331 gimple *inner_stmt = NULL;
10333 /* First, determine whether this is a combined parallel+workshare
10334 region. */
10335 if (region->type == GIMPLE_OMP_PARALLEL)
10336 determine_parallel_type (region);
10338 if (region->type == GIMPLE_OMP_FOR
10339 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10340 inner_stmt = last_stmt (region->inner->entry);
10342 if (region->inner)
10343 expand_omp (region->inner);
10345 saved_location = input_location;
10346 if (gimple_has_location (last_stmt (region->entry)))
10347 input_location = gimple_location (last_stmt (region->entry));
10349 switch (region->type)
10351 case GIMPLE_OMP_PARALLEL:
10352 case GIMPLE_OMP_TASK:
10353 expand_omp_taskreg (region);
10354 break;
10356 case GIMPLE_OMP_FOR:
10357 expand_omp_for (region, inner_stmt);
10358 break;
10360 case GIMPLE_OMP_SECTIONS:
10361 expand_omp_sections (region);
10362 break;
10364 case GIMPLE_OMP_SECTION:
10365 /* Individual omp sections are handled together with their
10366 parent GIMPLE_OMP_SECTIONS region. */
10367 break;
10369 case GIMPLE_OMP_SINGLE:
10370 case GIMPLE_OMP_SCOPE:
10371 expand_omp_single (region);
10372 break;
10374 case GIMPLE_OMP_ORDERED:
10376 gomp_ordered *ord_stmt
10377 = as_a <gomp_ordered *> (last_stmt (region->entry));
10378 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10379 OMP_CLAUSE_DEPEND))
10381 /* We'll expand these when expanding corresponding
10382 worksharing region with ordered(n) clause. */
10383 gcc_assert (region->outer
10384 && region->outer->type == GIMPLE_OMP_FOR);
10385 region->ord_stmt = ord_stmt;
10386 break;
10389 /* FALLTHRU */
10390 case GIMPLE_OMP_MASTER:
10391 case GIMPLE_OMP_MASKED:
10392 case GIMPLE_OMP_TASKGROUP:
10393 case GIMPLE_OMP_CRITICAL:
10394 case GIMPLE_OMP_TEAMS:
10395 expand_omp_synch (region);
10396 break;
10398 case GIMPLE_OMP_ATOMIC_LOAD:
10399 expand_omp_atomic (region);
10400 break;
10402 case GIMPLE_OMP_TARGET:
10403 expand_omp_target (region);
10404 break;
10406 default:
10407 gcc_unreachable ();
10410 input_location = saved_location;
10411 region = region->next;
10413 if (omp_any_child_fn_dumped)
10415 if (dump_file)
10416 dump_function_header (dump_file, current_function_decl, dump_flags);
10417 omp_any_child_fn_dumped = false;
10421 /* Helper for build_omp_regions. Scan the dominator tree starting at
10422 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10423 true, the function ends once a single tree is built (otherwise, whole
10424 forest of OMP constructs may be built). */
10426 static void
10427 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10428 bool single_tree)
10430 gimple_stmt_iterator gsi;
10431 gimple *stmt;
10432 basic_block son;
10434 gsi = gsi_last_nondebug_bb (bb);
10435 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10437 struct omp_region *region;
10438 enum gimple_code code;
10440 stmt = gsi_stmt (gsi);
10441 code = gimple_code (stmt);
10442 if (code == GIMPLE_OMP_RETURN)
10444 /* STMT is the return point out of region PARENT. Mark it
10445 as the exit point and make PARENT the immediately
10446 enclosing region. */
10447 gcc_assert (parent);
10448 region = parent;
10449 region->exit = bb;
10450 parent = parent->outer;
10452 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10454 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10455 GIMPLE_OMP_RETURN, but matches with
10456 GIMPLE_OMP_ATOMIC_LOAD. */
10457 gcc_assert (parent);
10458 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10459 region = parent;
10460 region->exit = bb;
10461 parent = parent->outer;
10463 else if (code == GIMPLE_OMP_CONTINUE)
10465 gcc_assert (parent);
10466 parent->cont = bb;
10468 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10470 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10471 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10473 else
10475 region = new_omp_region (bb, code, parent);
10476 /* Otherwise... */
10477 if (code == GIMPLE_OMP_TARGET)
10479 switch (gimple_omp_target_kind (stmt))
10481 case GF_OMP_TARGET_KIND_REGION:
10482 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10483 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10484 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10485 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10486 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10487 break;
10488 case GF_OMP_TARGET_KIND_UPDATE:
10489 case GF_OMP_TARGET_KIND_ENTER_DATA:
10490 case GF_OMP_TARGET_KIND_EXIT_DATA:
10491 case GF_OMP_TARGET_KIND_DATA:
10492 case GF_OMP_TARGET_KIND_OACC_DATA:
10493 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10494 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10495 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10496 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10497 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10498 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10499 /* ..., other than for those stand-alone directives... */
10500 region = NULL;
10501 break;
10502 default:
10503 gcc_unreachable ();
10506 else if (code == GIMPLE_OMP_ORDERED
10507 && omp_find_clause (gimple_omp_ordered_clauses
10508 (as_a <gomp_ordered *> (stmt)),
10509 OMP_CLAUSE_DEPEND))
10510 /* #pragma omp ordered depend is also just a stand-alone
10511 directive. */
10512 region = NULL;
10513 else if (code == GIMPLE_OMP_TASK
10514 && gimple_omp_task_taskwait_p (stmt))
10515 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10516 region = NULL;
10517 /* ..., this directive becomes the parent for a new region. */
10518 if (region)
10519 parent = region;
10523 if (single_tree && !parent)
10524 return;
10526 for (son = first_dom_son (CDI_DOMINATORS, bb);
10527 son;
10528 son = next_dom_son (CDI_DOMINATORS, son))
10529 build_omp_regions_1 (son, parent, single_tree);
10532 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10533 root_omp_region. */
10535 static void
10536 build_omp_regions_root (basic_block root)
10538 gcc_assert (root_omp_region == NULL);
10539 build_omp_regions_1 (root, NULL, true);
10540 gcc_assert (root_omp_region != NULL);
10543 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10545 void
10546 omp_expand_local (basic_block head)
10548 build_omp_regions_root (head);
10549 if (dump_file && (dump_flags & TDF_DETAILS))
10551 fprintf (dump_file, "\nOMP region tree\n\n");
10552 dump_omp_region (dump_file, root_omp_region, 0);
10553 fprintf (dump_file, "\n");
10556 remove_exit_barriers (root_omp_region);
10557 expand_omp (root_omp_region);
10559 omp_free_regions ();
10562 /* Scan the CFG and build a tree of OMP regions. Return the root of
10563 the OMP region tree. */
10565 static void
10566 build_omp_regions (void)
10568 gcc_assert (root_omp_region == NULL);
10569 calculate_dominance_info (CDI_DOMINATORS);
10570 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10573 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10575 static unsigned int
10576 execute_expand_omp (void)
10578 build_omp_regions ();
10580 if (!root_omp_region)
10581 return 0;
10583 if (dump_file)
10585 fprintf (dump_file, "\nOMP region tree\n\n");
10586 dump_omp_region (dump_file, root_omp_region, 0);
10587 fprintf (dump_file, "\n");
10590 remove_exit_barriers (root_omp_region);
10592 expand_omp (root_omp_region);
10594 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10595 verify_loop_structure ();
10596 cleanup_tree_cfg ();
10598 omp_free_regions ();
10600 return 0;
10603 /* OMP expansion -- the default pass, run before creation of SSA form. */
10605 namespace {
10607 const pass_data pass_data_expand_omp =
10609 GIMPLE_PASS, /* type */
10610 "ompexp", /* name */
10611 OPTGROUP_OMP, /* optinfo_flags */
10612 TV_NONE, /* tv_id */
10613 PROP_gimple_any, /* properties_required */
10614 PROP_gimple_eomp, /* properties_provided */
10615 0, /* properties_destroyed */
10616 0, /* todo_flags_start */
10617 0, /* todo_flags_finish */
10620 class pass_expand_omp : public gimple_opt_pass
10622 public:
10623 pass_expand_omp (gcc::context *ctxt)
10624 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10627 /* opt_pass methods: */
10628 unsigned int execute (function *) final override
10630 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10631 || flag_openmp_simd != 0)
10632 && !seen_error ());
10634 /* This pass always runs, to provide PROP_gimple_eomp.
10635 But often, there is nothing to do. */
10636 if (!gate)
10637 return 0;
10639 return execute_expand_omp ();
10642 }; // class pass_expand_omp
10644 } // anon namespace
10646 gimple_opt_pass *
10647 make_pass_expand_omp (gcc::context *ctxt)
10649 return new pass_expand_omp (ctxt);
10652 namespace {
10654 const pass_data pass_data_expand_omp_ssa =
10656 GIMPLE_PASS, /* type */
10657 "ompexpssa", /* name */
10658 OPTGROUP_OMP, /* optinfo_flags */
10659 TV_NONE, /* tv_id */
10660 PROP_cfg | PROP_ssa, /* properties_required */
10661 PROP_gimple_eomp, /* properties_provided */
10662 0, /* properties_destroyed */
10663 0, /* todo_flags_start */
10664 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10667 class pass_expand_omp_ssa : public gimple_opt_pass
10669 public:
10670 pass_expand_omp_ssa (gcc::context *ctxt)
10671 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10674 /* opt_pass methods: */
10675 bool gate (function *fun) final override
10677 return !(fun->curr_properties & PROP_gimple_eomp);
10679 unsigned int execute (function *) final override
10681 return execute_expand_omp ();
10683 opt_pass * clone () final override
10685 return new pass_expand_omp_ssa (m_ctxt);
10688 }; // class pass_expand_omp_ssa
10690 } // anon namespace
10692 gimple_opt_pass *
10693 make_pass_expand_omp_ssa (gcc::context *ctxt)
10695 return new pass_expand_omp_ssa (ctxt);
10698 /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10699 GIMPLE_* codes. */
10701 bool
10702 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10703 int *region_idx)
10705 gimple *last = last_stmt (bb);
10706 enum gimple_code code = gimple_code (last);
10707 struct omp_region *cur_region = *region;
10708 bool fallthru = false;
10710 switch (code)
10712 case GIMPLE_OMP_PARALLEL:
10713 case GIMPLE_OMP_FOR:
10714 case GIMPLE_OMP_SINGLE:
10715 case GIMPLE_OMP_TEAMS:
10716 case GIMPLE_OMP_MASTER:
10717 case GIMPLE_OMP_MASKED:
10718 case GIMPLE_OMP_SCOPE:
10719 case GIMPLE_OMP_TASKGROUP:
10720 case GIMPLE_OMP_CRITICAL:
10721 case GIMPLE_OMP_SECTION:
10722 cur_region = new_omp_region (bb, code, cur_region);
10723 fallthru = true;
10724 break;
10726 case GIMPLE_OMP_TASK:
10727 cur_region = new_omp_region (bb, code, cur_region);
10728 fallthru = true;
10729 if (gimple_omp_task_taskwait_p (last))
10730 cur_region = cur_region->outer;
10731 break;
10733 case GIMPLE_OMP_ORDERED:
10734 cur_region = new_omp_region (bb, code, cur_region);
10735 fallthru = true;
10736 if (omp_find_clause (gimple_omp_ordered_clauses
10737 (as_a <gomp_ordered *> (last)),
10738 OMP_CLAUSE_DEPEND))
10739 cur_region = cur_region->outer;
10740 break;
10742 case GIMPLE_OMP_TARGET:
10743 cur_region = new_omp_region (bb, code, cur_region);
10744 fallthru = true;
10745 switch (gimple_omp_target_kind (last))
10747 case GF_OMP_TARGET_KIND_REGION:
10748 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10749 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10750 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10751 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10752 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10753 break;
10754 case GF_OMP_TARGET_KIND_UPDATE:
10755 case GF_OMP_TARGET_KIND_ENTER_DATA:
10756 case GF_OMP_TARGET_KIND_EXIT_DATA:
10757 case GF_OMP_TARGET_KIND_DATA:
10758 case GF_OMP_TARGET_KIND_OACC_DATA:
10759 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10760 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10761 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10762 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10763 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10764 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10765 cur_region = cur_region->outer;
10766 break;
10767 default:
10768 gcc_unreachable ();
10770 break;
10772 case GIMPLE_OMP_SECTIONS:
10773 cur_region = new_omp_region (bb, code, cur_region);
10774 fallthru = true;
10775 break;
10777 case GIMPLE_OMP_SECTIONS_SWITCH:
10778 fallthru = false;
10779 break;
10781 case GIMPLE_OMP_ATOMIC_LOAD:
10782 case GIMPLE_OMP_ATOMIC_STORE:
10783 fallthru = true;
10784 break;
10786 case GIMPLE_OMP_RETURN:
10787 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10788 somewhere other than the next block. This will be
10789 created later. */
10790 cur_region->exit = bb;
10791 if (cur_region->type == GIMPLE_OMP_TASK)
10792 /* Add an edge corresponding to not scheduling the task
10793 immediately. */
10794 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10795 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10796 cur_region = cur_region->outer;
10797 break;
10799 case GIMPLE_OMP_CONTINUE:
10800 cur_region->cont = bb;
10801 switch (cur_region->type)
10803 case GIMPLE_OMP_FOR:
10804 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10805 succs edges as abnormal to prevent splitting
10806 them. */
10807 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10808 /* Make the loopback edge. */
10809 make_edge (bb, single_succ (cur_region->entry),
10810 EDGE_ABNORMAL);
10812 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10813 corresponds to the case that the body of the loop
10814 is not executed at all. */
10815 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10816 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10817 fallthru = false;
10818 break;
10820 case GIMPLE_OMP_SECTIONS:
10821 /* Wire up the edges into and out of the nested sections. */
10823 basic_block switch_bb = single_succ (cur_region->entry);
10825 struct omp_region *i;
10826 for (i = cur_region->inner; i ; i = i->next)
10828 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10829 make_edge (switch_bb, i->entry, 0);
10830 make_edge (i->exit, bb, EDGE_FALLTHRU);
10833 /* Make the loopback edge to the block with
10834 GIMPLE_OMP_SECTIONS_SWITCH. */
10835 make_edge (bb, switch_bb, 0);
10837 /* Make the edge from the switch to exit. */
10838 make_edge (switch_bb, bb->next_bb, 0);
10839 fallthru = false;
10841 break;
10843 case GIMPLE_OMP_TASK:
10844 fallthru = true;
10845 break;
10847 default:
10848 gcc_unreachable ();
10850 break;
10852 default:
10853 gcc_unreachable ();
10856 if (*region != cur_region)
10858 *region = cur_region;
10859 if (cur_region)
10860 *region_idx = cur_region->entry->index;
10861 else
10862 *region_idx = 0;
10865 return fallthru;